diff --git a/Cargo.lock b/Cargo.lock index efe2bfe067..47af8f9e20 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -271,6 +271,15 @@ dependencies = [ "serde", ] +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "block2" version = "0.6.2" @@ -581,6 +590,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.4.2" @@ -654,6 +672,16 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "ctrlc" version = "3.5.1" @@ -759,6 +787,16 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2ea6706d74fca54e15f1d40b5cf7fe7f764aaec61352a9fcec58fe27e042fc8" +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "directories" version = "6.0.0" @@ -1093,6 +1131,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "gethostname" version = "0.4.3" @@ -1291,6 +1339,7 @@ dependencies = [ "idb-rs", "serde", "serde_json", + "sha2", "tracing", ] @@ -2585,6 +2634,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d" +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -2972,6 +3032,12 @@ version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" +[[package]] +name = "typenum" +version = "1.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" + [[package]] name = "unicode-ident" version = "1.0.18" diff --git a/plugins/idb_import/Cargo.toml b/plugins/idb_import/Cargo.toml index ce94b6f67e..191f123e53 100644 --- a/plugins/idb_import/Cargo.toml +++ b/plugins/idb_import/Cargo.toml @@ -16,4 +16,5 @@ binaryninjacore-sys.workspace = true idb-rs = { git = "https://github.com/Vector35/idb-rs", tag = "0.1.13" } tracing = "0.1" serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" \ No newline at end of file +serde_json = "1.0" +sha2 = "0.10" \ No newline at end of file diff --git a/plugins/idb_import/src/commands.rs b/plugins/idb_import/src/commands.rs index bc17aa9b4a..f617746386 100644 --- a/plugins/idb_import/src/commands.rs +++ b/plugins/idb_import/src/commands.rs @@ -27,7 +27,8 @@ impl LoadFileField { pub fn field(&self) -> FormInputField { FormInputField::OpenFileName { prompt: "File Path".to_string(), - // TODO: This is called extension but is really a filter. + // NOTE: Binary Ninja's `OpenFileName` field names this `extension`, but it accepts a + // full file filter expression (e.g. "*.idb;*.i64"), which is what we pass here. extension: Some(self.filter.clone()), default: self.default.clone(), value: None, diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 530fef1f06..51344a2f30 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -1,20 +1,24 @@ //! Map the IDB data we parsed into the [`BinaryView`]. use crate::parse::{ - BaseAddressInfo, CommentInfo, ExportInfo, FunctionInfo, IDBInfo, LabelInfo, NameInfo, - SegmentInfo, + BaseAddressInfo, CommentInfo, ExportInfo, FunctionFolderEntry, FunctionInfo, IDBInfo, + LabelInfo, NameInfo, SegmentInfo, }; use crate::translate::TILTranslator; -use binaryninja::architecture::Architecture; +use binaryninja::architecture::{Architecture, ArchitectureExt, Register, RegisterInfo}; use binaryninja::binary_view::{BinaryView, BinaryViewBase}; +use binaryninja::component::{Component, ComponentBuilder}; +use binaryninja::function::Function; use binaryninja::qualified_name::QualifiedName; use binaryninja::rc::Ref; use binaryninja::section::{SectionBuilder, Semantics}; use binaryninja::symbol::{Binding, Symbol, SymbolType}; use binaryninja::types::Type; +use binaryninja::variable::Variable; use idb_rs::id0::SegmentType; use idb_rs::til::TypeVariant; -use std::collections::HashSet; +use sha2::{Digest, Sha256}; +use std::collections::{HashMap, HashSet}; /// Maps IDB data into a [`BinaryView`]. /// @@ -34,9 +38,27 @@ impl IDBMapper { return; }; - // TODO: Actually the below comment belongs in an IDBVerifier that tries to determine if the idb - // TODO: Will process correctly for the given view. - // TODO: Have a shasum check of the file to make sure we are not mapping to bad data? + // Verify the IDB was built from the binary we are about to map into. The IDB records the + // SHA256 of its original input file; the root of the view's parent chain is the raw view, + // whose bytes are that same on-disk file, so we can hash it and compare. + if let Some(expected_sha256) = &self.info.sha256 { + match sha256_of_raw_view(view) { + Some(actual_sha256) if actual_sha256 == *expected_sha256 => { + tracing::debug!("Verified IDB matches loaded binary (SHA256 {expected_sha256})"); + } + Some(actual_sha256) => { + tracing::warn!( + "IDB input file SHA256 ({expected_sha256}) does not match the loaded \ + binary ({actual_sha256}); imported data may not correspond to this binary." + ); + } + None => { + tracing::debug!( + "Could not hash the loaded binary to verify against IDB SHA256 {expected_sha256}" + ); + } + } + } // Rebase the address from ida -> binja without this rebased views will fail to map. let bn_base_address = view.start(); @@ -45,13 +67,21 @@ impl IDBMapper { BaseAddressInfo::None => bn_base_address, BaseAddressInfo::BaseSegment(start_addr) => bn_base_address.wrapping_sub(start_addr), BaseAddressInfo::BaseSection(section_addr) => { - let bn_section_addr = view - .sections() + // Align against the lowest mapped *segment*, not the lowest section. + // IDA's `min_ea` is the image base, and it maps the file header region too. + // Across executable formats the first *section* generally begins after the + // format's headers/metadata (e.g. the Mach-O header + load commands, the PE + // headers, the ELF header + program headers), so aligning to the lowest section + // over-shifts every imported address by the header size. Segments include the + // header region, so the lowest segment matches IDA's image base regardless of + // format. + let bn_segment_addr = view + .segments() .iter() - .min_by_key(|s| s.start()) - .map(|s| s.start()); - match bn_section_addr { - Some(bn_section) => bn_section.wrapping_sub(section_addr), + .map(|s| s.address_range().start) + .min(); + match bn_segment_addr { + Some(bn_segment) => bn_segment.wrapping_sub(section_addr), None => bn_base_address, } } @@ -73,14 +103,19 @@ impl IDBMapper { // Create the type translator, adding all referencable types from both the TIL and the binary view. let platform = view.default_platform().unwrap(); - // TODO: Need to remove this, but for now keeping this since it gets referenced a lot. - view.define_auto_type( - "size_t", - "IDA", - &Type::int(platform.arch().default_integer_size(), false), - ); - let til_translator = - TILTranslator::new_from_platform(&platform).with_type_container(&view.type_container()); + // Provide a fallback `size_t` only when the view does not already define one. Many IDA + // types reference `size_t`, so we keep this safety net, but defining it conditionally + // avoids clobbering a real platform/view definition with our generic integer. + if view.type_by_name("size_t").is_none() { + view.define_auto_type( + "size_t", + "IDA", + &Type::int(platform.arch().default_integer_size(), false), + ); + } + let til_translator = TILTranslator::new_from_platform(&platform) + .with_type_container(&view.type_container()) + .with_register_names(id0.register_names.clone()); let til_translator = match &self.info.til { Some(til) => til_translator.with_til_info(&til), None => til_translator, @@ -88,20 +123,31 @@ impl IDBMapper { self.map_types_to_view(view, &til_translator, &self.info.merged_types()); + // Keep the created functions keyed by their (rebased) address so we can place them into + // folders later without depending on the analysis having indexed them yet. + let mut functions_by_address: HashMap> = HashMap::new(); for func in &self.info.merged_functions() { let mut rebased_func = func.clone(); rebased_func.address = rebase(func.address); - self.map_func_to_view(view, &til_translator, &rebased_func); + if let Some(bn_func) = self.map_func_to_view(view, &til_translator, &rebased_func) { + functions_by_address.insert(rebased_func.address, bn_func); + } } for export in &id0.exports { let mut rebased_export = export.clone(); rebased_export.address = rebase(export.address); - self.map_export_to_view(view, &til_translator, &rebased_export); + if let Some(bn_func) = + self.map_export_to_view(view, &til_translator, &rebased_export) + { + functions_by_address.insert(rebased_export.address, bn_func); + } } - // TODO: The below undo and ignore is not thread safe, this means that the mapper itself - // TODO: should be the only thing running at the time of the mapping process. + // NOTE: The undo bracketing below is not thread safe, so the mapper must be the only + // writer mutating the view while it runs. This invariant holds because mapping is + // registered as a single run-once analysis activity (see `plugin_init`) rather than being + // invoked concurrently. let undo = view.file().begin_undo_actions(true); for comment in &self.info.merged_comments() { let mut rebased_comment = comment.clone(); @@ -116,6 +162,36 @@ impl IDBMapper { self.map_name_to_view(view, &til_translator, &rebased_name); } + // IDA's local labels are named locations inside functions (e.g. loop targets). They live + // in code, so `map_name_to_view` skips them; apply them as local-label symbols instead. + for label in &id0.labels { + let mut rebased_label = label.clone(); + rebased_label.address = rebase(label.address); + self.map_label_to_view(view, &rebased_label); + } + + // Recreate IDA's "Functions" window folder hierarchy. Binary Ninja's component API backs + // what the UI shows as function folders. + if let Some(dir_tree) = &self.info.dir_tree { + if !dir_tree.function_folders.is_empty() { + let mut stats = FolderStats::default(); + self.map_function_folders_to_view( + view, + &dir_tree.function_folders, + None, + &rebase, + &functions_by_address, + &mut stats, + ); + tracing::info!( + "Mapped function folders: {} folders created, {} functions placed, {} not found", + stats.folders_created, + stats.functions_placed, + stats.functions_missing + ); + } + } + // self.map_used_types_to_view(view, &til_translator); } @@ -170,8 +246,10 @@ impl IDBMapper { if let Ok(_used_types) = til_translator.used_types.lock() { used_types = _used_types.clone(); } - // TODO: Adding types to view after the types have been applied to the functions is not a - // TODO: great idea, I imagine the NTR's will have stale references until the analysis runs again. + // NOTE: Types are resolved here after they have already been applied to functions, so any + // named type references that get defined in this pass will read as stale until analysis + // re-runs and re-resolves them. Acceptable for this best-effort backfill, but a reason to + // resolve referenced types before applying function types if this path is wired in. 'found: for used_ty in &used_types { // 0. Make sure the type doesn't already exist in the view if view.type_by_name(&used_ty.name).is_some() { @@ -205,8 +283,9 @@ impl IDBMapper { } } + // NOTE: A further lookup source would be the TILs attached to the IDB (e.g. imported + // library TILs) before giving up; not yet wired in. tracing::warn!("Failed to find type: {:?}", used_ty); - // 4. TODO: Look through the idb attached tils? } } @@ -238,7 +317,11 @@ impl IDBMapper { SegmentType::Imem => Semantics::DefaultSection, }; - // TODO: Is this section already mapped using address range not name. + // Skip a segment we have already mapped. We check both the name and the exact address + // range: an overlap-based check would be wrong because the loader normally maps the whole + // address space (so every IDA segment overlaps something), but an IDA segment that covers + // the exact same range as an existing section — even under a different name — is the same + // region and should not be added twice. if view.section_by_name(&segment.name).is_some() { tracing::debug!( "Section with name '{}' already exists, skipping...", @@ -246,6 +329,19 @@ impl IDBMapper { ); return; } + if view + .sections() + .iter() + .any(|existing| existing.address_range() == segment.region) + { + tracing::debug!( + "Section covering {:0x}-{:0x} already exists, skipping '{}'...", + segment.region.start, + segment.region.end, + segment.name + ); + return; + } tracing::info!( "Mapping segment '{}': {:0x} - {:0x} ({:?})", @@ -266,7 +362,7 @@ impl IDBMapper { view: &BinaryView, til_translator: &TILTranslator, export: &ExportInfo, - ) { + ) -> Option> { let within_code_section = view .sections_at(export.address) .iter() @@ -285,8 +381,10 @@ impl IDBMapper { address: export.address, is_library: false, is_no_return: false, + register_vars: Vec::new(), + stack_frame: None, }; - self.map_func_to_view(view, til_translator, &func_info); + return self.map_func_to_view(view, til_translator, &func_info); } else { tracing::debug!("Mapping data export: {:0x}", export.address); let name_info = NameInfo { @@ -297,6 +395,7 @@ impl IDBMapper { }; self.map_name_to_view(view, til_translator, &name_info); } + None } pub fn map_func_to_view( @@ -304,7 +403,7 @@ impl IDBMapper { view: &BinaryView, til_translator: &TILTranslator, func: &FunctionInfo, - ) { + ) -> Option> { // We need to skip things that hit the extern section, since they do not have a bearing in the // actual context of the binary, and can be derived differently between IDA and Binja. let within_extern_section = view @@ -314,14 +413,21 @@ impl IDBMapper { .is_some(); if within_extern_section { tracing::debug!("Skipping function in extern section: {:0x}", func.address); - return; + return None; } let Some(bn_func) = view.add_auto_function(func.address) else { tracing::warn!("Failed to add function for {:0x}", func.address); - return; + return None; }; + // IDA marks functions that never return (e.g. `abort`, `exit`); carry that over so BN's + // analysis does not fall through past calls to them. + if func.is_no_return { + tracing::debug!("Marking function as no-return: {:0x}", func.address); + bn_func.set_auto_can_return(false); + } + if let Some(func_ty) = &func.ty { match til_translator.translate_type_info(&func_ty) { Ok(bn_func_ty) => { @@ -339,7 +445,9 @@ impl IDBMapper { } } - // TODO: Attach a platform tuple to the FunctionInfo? + // NOTE: We let the function inherit the view's default platform. Carrying an explicit + // platform on FunctionInfo would let mixed-architecture databases (e.g. ARM/Thumb + // interworking) map each function with its own platform; that is a future enhancement. if let Some(func_sym) = symbol_from_func(func) { tracing::debug!( "Mapping function symbol: {:0x} => {}", @@ -348,6 +456,182 @@ impl IDBMapper { ); view.define_auto_symbol(&func_sym); } + + self.map_register_vars_to_func(&bn_func, func); + self.map_stack_frame_to_func(&bn_func, til_translator, func); + + Some(bn_func) + } + + /// Apply a function's IDA stack frame as Binary Ninja stack variables. + /// + /// IDA stores the frame as a structure whose members run from the bottom of the local + /// variable area upward through the saved registers, return address and stack arguments. + /// Binary Ninja measures stack offsets from the return address (locals negative, arguments + /// positive), so an IDA frame offset is shifted down by `local_size + saved_regs_size`. The + /// frame members do not carry explicit offsets, so each member's offset is the running sum of + /// the preceding member widths; the synthetic saved-register and return-address members are + /// skipped (but still advance the offset) since they are not user variables. + fn map_stack_frame_to_func( + &self, + bn_func: &binaryninja::function::Function, + til_translator: &TILTranslator, + func: &FunctionInfo, + ) { + let Some(stack_frame) = &func.stack_frame else { + return; + }; + let base = (stack_frame.local_size + stack_frame.saved_regs_size) as i64; + let mut frame_offset: u64 = 0; + for (i, member) in stack_frame.frame.members.iter().enumerate() { + let member_width = til_translator + .width_of_type(&member.member_type) + .unwrap_or(0); + + // Skip the saved-register / return-address regions, but keep advancing the offset so + // the members after them still land at the right place. + if member.is_frame_r || member.is_frame_s { + frame_offset += member_width as u64; + continue; + } + + let name = member + .name + .as_ref() + .map(|n| n.to_string()) + .unwrap_or_else(|| format!("var_{i}")); + let bn_offset = frame_offset as i64 - base; + match til_translator.translate_type_info(&member.member_type) { + Ok(ty) => { + tracing::debug!( + "Mapping stack variable '{}' at frame offset {} (bn {}) for {:0x}", + name, + frame_offset, + bn_offset, + func.address + ); + bn_func.create_auto_stack_var(bn_offset, &ty, &name); + } + Err(err) => { + tracing::warn!( + "Failed to translate stack variable '{}' type for {:0x}: {}", + name, + func.address, + err + ); + } + } + + frame_offset += member_width as u64; + } + } + + /// Apply IDA register variables ("regvars") to a function. + /// + /// IDA records that a register holds a user-named value over an address range. Binary Ninja + /// variables are function-scoped, so the range is not modeled; we name the architecture + /// register over the whole function, typed as an unsigned int of the register's width. + fn map_register_vars_to_func( + &self, + bn_func: &binaryninja::function::Function, + func: &FunctionInfo, + ) { + if func.register_vars.is_empty() { + return; + } + let arch = bn_func.arch(); + for reg_var in &func.register_vars { + let Some(register) = arch.register_by_name(®_var.register) else { + tracing::warn!( + "Skipping register variable '{}': unknown register '{}' at {:0x}", + reg_var.name, + reg_var.register, + func.address + ); + continue; + }; + let reg_width = register.info().size().max(1); + let var = Variable::from_register(register); + tracing::debug!( + "Mapping register variable {} => {} at {:0x}", + reg_var.register, + reg_var.name, + func.address + ); + bn_func.create_user_var(&var, &Type::int(reg_width, false), ®_var.name, false); + } + } + + /// Recreate an IDA function folder tree in the view. + /// + /// Binary Ninja models function folders with the component API (`Component`), which the UI + /// presents as folders. Each IDA folder becomes a component nested under its parent (the root + /// view component for top-level folders), and each function leaf is added to the folder it + /// belongs to. Functions sitting directly at the dirtree root are left in no folder, matching + /// their state in IDA. + fn map_function_folders_to_view( + &self, + view: &BinaryView, + entries: &[FunctionFolderEntry], + parent: Option<&Component>, + rebase: &impl Fn(u64) -> u64, + functions_by_address: &HashMap>, + stats: &mut FolderStats, + ) { + for entry in entries { + match entry { + FunctionFolderEntry::Function(address) => { + let Some(parent) = parent else { + continue; + }; + let rebased = rebase(*address); + // Prefer the function we created during mapping; fall back to a view lookup in + // case it came from another source. + let func = functions_by_address.get(&rebased).cloned().or_else(|| { + view.functions_at(rebased) + .iter() + .find(|func| func.start() == rebased) + .map(|func| func.to_owned()) + }); + match func { + Some(func) if parent.add_function(&func) => { + stats.functions_placed += 1; + } + Some(_) => { + tracing::debug!( + "Component rejected function at {:0x}", + rebased + ); + stats.functions_missing += 1; + } + None => { + tracing::debug!( + "No function at {:0x} to place in folder, skipping", + rebased + ); + stats.functions_missing += 1; + } + } + } + FunctionFolderEntry::Folder { name, entries } => { + let mut builder = ComponentBuilder::new(view.to_owned()).name(name.clone()); + if let Some(parent) = parent { + builder = builder.parent(parent.guid()); + } + let component = builder.finalize(); + stats.folders_created += 1; + tracing::debug!("Created folder '{}'", name); + self.map_function_folders_to_view( + view, + entries, + Some(&component), + rebase, + functions_by_address, + stats, + ); + } + } + } } pub fn map_label_to_view(&self, view: &BinaryView, label: &LabelInfo) { @@ -436,6 +720,49 @@ impl IDBMapper { } } +/// Running totals for the folder mapping, used for a single summary log line. +#[derive(Default)] +struct FolderStats { + folders_created: usize, + functions_placed: usize, + functions_missing: usize, +} + +/// Compute the SHA256 of the raw (on-disk) bytes backing `view`. +/// +/// Walks to the root of the parent-view chain, which is the raw view whose contents are the +/// original input file, then hashes its full contents. Returns `None` if the view is empty. +fn sha256_of_raw_view(view: &BinaryView) -> Option { + let mut raw_view = view.to_owned(); + while let Some(parent) = raw_view.parent_view() { + raw_view = parent; + } + + let length = raw_view.len(); + if length == 0 { + return None; + } + + // Hash in chunks so we never hold the whole binary in memory at once. + const CHUNK_SIZE: usize = 1 << 20; + let mut hasher = Sha256::new(); + let mut offset = raw_view.start(); + let end = offset + length; + while offset < end { + let want = CHUNK_SIZE.min((end - offset) as usize); + let chunk = raw_view.read_vec(offset, want); + if chunk.is_empty() { + // The view reported a length we cannot fully read; treat as unverifiable. + return None; + } + hasher.update(&chunk); + offset += chunk.len() as u64; + } + + let digest = hasher.finalize(); + Some(digest.iter().map(|b| format!("{:02x}", b)).collect()) +} + fn symbol_from_func(func: &FunctionInfo) -> Option> { let Some(func_name) = &func.name else { return None; diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index 95a2fdd0eb..fb527d1503 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -2,7 +2,7 @@ use idb_rs::addr_info::{all_address_info, AddressInfo}; use idb_rs::id0::function::{FuncIdx, FuncordsIdx, IDBFunctionType}; -use idb_rs::id0::{ID0Section, Netdelta, SegmentType}; +use idb_rs::id0::{DirTreeEntry, ID0Section, Netdelta, SegmentType}; use idb_rs::id1::ID1Section; use idb_rs::id2::ID2Section; use idb_rs::til::section::TILSection; @@ -27,6 +27,35 @@ pub struct FunctionInfo { pub address: u64, pub is_library: bool, pub is_no_return: bool, + pub register_vars: Vec, + pub stack_frame: Option, +} + +/// A register renamed by the user within a function (IDA "regvar"), e.g. `eax` -> `count`. +#[derive(Debug, Clone, Serialize)] +pub struct RegisterVarInfo { + /// Architecture register the variable lives in (e.g. `eax`). + pub register: String, + /// User-assigned name for the register over its range. + pub name: String, + pub start: u64, + pub end: u64, + pub comment: String, +} + +/// A function's stack frame, as recorded by IDA. +/// +/// The `frame` UDT describes every member of the frame (locals, saved registers, return +/// address and arguments). `local_size` (IDA's `frsize`) and `saved_regs_size` (`frregs`) give +/// the geometry needed to translate IDA frame offsets into Binary Ninja's frame convention. +#[derive(Debug, Clone, Serialize)] +pub struct StackFrameInfo { + /// Size in bytes of the local variables area (IDA `frsize`). + pub local_size: u64, + /// Size in bytes of the saved registers area (IDA `frregs`). + pub saved_regs_size: u64, + /// The frame structure describing each stack member. + pub frame: idb_rs::til::udt::UDT, } #[derive(Debug, Clone, Serialize)] @@ -85,6 +114,9 @@ pub struct ID0Info { pub comments: Vec, pub labels: Vec, pub exports: Vec, + /// Processor register names indexed by IDA register number, used to resolve the registers + /// referenced by argument/return value locations into Binary Ninja registers. + pub register_names: Vec, } #[derive(Debug, Clone, Serialize)] @@ -94,6 +126,20 @@ pub struct DirTreeInfo { /// Contains both function and data names (along with their types). pub names: Vec, pub comments: Vec, + /// The IDA "Functions" window folder hierarchy (root-level entries). + pub function_folders: Vec, +} + +/// An entry in IDA's function folder tree: either a function (by address) or a named folder +/// containing further entries. Mirrors IDA's dirtree so it can be recreated as Binary Ninja +/// components. +#[derive(Debug, Clone, Serialize)] +pub enum FunctionFolderEntry { + Function(u64), + Folder { + name: String, + entries: Vec, + }, } #[derive(Debug, Clone, Serialize, Default)] @@ -134,6 +180,12 @@ impl IDBInfo { if a.ty.is_some() { b.ty = a.ty.clone(); } + if !a.register_vars.is_empty() { + b.register_vars = a.register_vars.clone(); + } + if a.stack_frame.is_some() { + b.stack_frame = a.stack_frame.clone(); + } true }); id0_functions @@ -188,11 +240,17 @@ impl IDBInfo { types.extend(til.types.clone()); } types.sort_by_key(|t| t.name.to_string()); + // `a` is the later (dir_tree-then-til) duplicate being removed and `b` is the one we + // keep. In practice the dir_tree types are clones pulled from the same TIL (see + // `parse_dir_tree`), so the definitions are identical; we still carry over an ordinal if + // the kept entry happens to be missing one, so name/ordinal lookups stay resolvable. types.dedup_by(|a, b| { if a.name.to_string() != b.name.to_string() { return false; } - // TODO: Merge types instead of just picking b and not transferring. + if b.ordinal == 0 && a.ordinal != 0 { + b.ordinal = a.ordinal; + } true }); types @@ -261,7 +319,8 @@ impl IDBFileParser { id2 = Some(format.read_id2(&mut *data, id2_loc)?); } - // TODO: Decompress til + // NOTE: `read_til` reads the section header and transparently inflates Zlib/Zstd + // compressed TIL sections, so no explicit decompression step is needed here. let mut til = None; if let Some(til_loc) = format.til_location() { til = Some(format.read_til(&mut *data, til_loc)?); @@ -272,10 +331,18 @@ impl IDBFileParser { _ => None, }; + // The IDB records the SHA256 of the original input file; surface it so consumers (and a + // future verifier) can confirm the IDB matches the binary being mapped. + let sha256 = id0.as_ref().and_then(|id0| { + let root_idx = id0.root_node().ok()?; + let hash = id0.input_file_sha256(root_idx).ok()??; + Some(hash.iter().map(|b| format!("{:02x}", b)).collect::()) + }); + let id0_info = id0.as_ref().map(|id0| self.parse_id0(id0)).transpose()?; Ok(IDBInfo { - sha256: None, + sha256, id0: id0_info, til, dir_tree: dir_tree_info, @@ -326,30 +393,56 @@ impl IDBFileParser { IDBFunctionType::Tail(_) => { tracing::debug!("Skipping tail function... {:0x}", func_start); } - IDBFunctionType::NonTail(_func_ext) => { + IDBFunctionType::NonTail(func_ext) => { if func.flags.is_outline() { tracing::debug!("Skipping outlined function... {:0x}", func_start); continue; } - // TODO: Parse function registers and params - // for def_reg in id0.function_defined_registers(netdelta, &func, &func_ext) { - // tracing::info!("{:0x} : Function register: {:?}", func_start, def_reg); - // let Ok(_def_reg) = def_reg else { - // tracing::warn!("Failed to read function register entry"); - // continue; - // }; - // } - - // if let Ok(stack_names) = - // id0.function_defined_variables(&root_info, &func, &func_ext) - // { - // tracing::info!( - // "{:0x} : Function stack variables: {:#?}", - // func_start, - // stack_names - // ); - // } + // Collect register variables (IDA "regvars"): registers the user renamed + // over a range within the function. The register is given by name, so it + // maps cleanly onto a Binary Ninja register in the mapper. + let mut register_vars = Vec::new(); + for reg in id0.function_defined_registers(netdelta, &func, func_ext) { + let reg = match reg { + Ok(reg) => reg, + Err(err) => { + tracing::warn!( + "Failed to read register variable for {:0x}: {}", + func_start, + err + ); + continue; + } + }; + register_vars.push(RegisterVarInfo { + register: reg.register_name.to_string(), + name: reg.variable_name.to_string(), + start: reg.range.start.into_raw().into_u64(), + end: reg.range.end.into_raw().into_u64(), + comment: reg.cmt.to_string(), + }); + } + + // Collect the function's stack frame (named locals, saved registers and + // stack arguments) along with the frame geometry needed to place them. + let stack_frame = match id0 + .function_defined_variables(&root_info, &func, func_ext) + { + Ok(stack_names) => stack_names.ty.map(|frame| StackFrameInfo { + local_size: func_ext.frsize.into_u64(), + saved_regs_size: func_ext.frregs as u64, + frame, + }), + Err(err) => { + tracing::warn!( + "Failed to read stack frame for {:0x}: {}", + func_start, + err + ); + None + } + }; functions.push(FunctionInfo { name: None, @@ -357,6 +450,8 @@ impl IDBFileParser { address: func_start, is_library: func.flags.is_lib(), is_no_return: func.flags.is_no_return(), + register_vars, + stack_frame, }); } } @@ -385,6 +480,14 @@ impl IDBFileParser { (loading_base, _) => BaseAddressInfo::BaseSegment(loading_base.into_u64()), }; + // The processor module defines the register names by index; this lets us resolve the + // registers referenced by argument/return value locations into real registers. + let register_names = id0 + .processor(&root_info) + .and_then(|processor| processor.registers_info()) + .map(|info| info.names.iter().map(|name| name.to_string()).collect()) + .unwrap_or_default(); + Ok(ID0Info { base_address, segments, @@ -392,6 +495,7 @@ impl IDBFileParser { comments, labels, exports, + register_names, }) } @@ -452,8 +556,6 @@ impl IDBFileParser { let root_info = id0.ida_info(root_info_idx)?; let netdelta = root_info.netdelta(); - // sha256 - let func_info_from_addr = |addr_info: &AddressInfo| -> anyhow::Result> { let func_name = addr_info.label()?.map(|s| s.to_string()); @@ -465,6 +567,8 @@ impl IDBFileParser { address: func_addr, is_library: false, is_no_return: false, + register_vars: Vec::new(), + stack_frame: None, })) }; @@ -510,8 +614,9 @@ impl IDBFileParser { comments.extend(comment_info_from_addr(&addr_info)); } + let func_dir_tree = id0.dirtree_function_address()?; let mut functions = Vec::new(); - if let Some(func_dir_tree) = id0.dirtree_function_address()? { + if let Some(func_dir_tree) = &func_dir_tree { func_dir_tree.visit_leafs(|addr_raw| { let addr = Address::from_raw(*addr_raw); if let Some(info) = AddressInfo::new(id0, id1, id2, netdelta, addr) { @@ -522,6 +627,13 @@ impl IDBFileParser { }); } + // Preserve the folder hierarchy (not just the leaf functions) so it can be recreated as + // Binary Ninja components. + let function_folders = func_dir_tree + .as_ref() + .map(|tree| build_function_folders::(&tree.entries)) + .unwrap_or_default(); + let mut names = Vec::new(); if let Some(names_dir_tree) = id0.dirtree_names()? { names_dir_tree.visit_leafs(|name_raw| { @@ -553,6 +665,23 @@ impl IDBFileParser { types, names, comments, + function_folders, }) } } + +/// Recursively convert an IDA function dirtree into our [`FunctionFolderEntry`] tree. +fn build_function_folders( + entries: &[DirTreeEntry], +) -> Vec { + entries + .iter() + .map(|entry| match entry { + DirTreeEntry::Leaf(address) => FunctionFolderEntry::Function((*address).into_u64()), + DirTreeEntry::Directory { name, entries } => FunctionFolderEntry::Folder { + name: String::from_utf8_lossy(name).to_string(), + entries: build_function_folders::(entries), + }, + }) + .collect() +} diff --git a/plugins/idb_import/src/translate.rs b/plugins/idb_import/src/translate.rs index c3af6fa697..88f1471a92 100644 --- a/plugins/idb_import/src/translate.rs +++ b/plugins/idb_import/src/translate.rs @@ -3,15 +3,17 @@ use binaryninja::architecture::{Architecture, ArchitectureExt, CoreArchitecture}; use binaryninja::calling_convention::CoreCallingConvention; -use binaryninja::confidence::Conf; +use binaryninja::confidence::{Conf, MAX_CONFIDENCE}; use binaryninja::platform::Platform; use binaryninja::rc::Ref; use binaryninja::types::{ EnumerationBuilder, FunctionParameter, MemberAccess, MemberScope, NamedTypeReference, - NamedTypeReferenceClass, StructureBuilder, StructureMember, StructureType, TypeBuilder, - TypeContainer, + NamedTypeReferenceClass, ReturnValue, StructureBuilder, StructureMember, StructureType, Type, + TypeBuilder, TypeContainer, ValueLocation, ValueLocationComponent, ValueLocationSource, }; -use idb_rs::til::function::CallingConvention; +use binaryninja::variable::Variable; +use idb_rs::til::function::{ArgLoc, CallingConvention}; +use idb_rs::til::pointer::PointerModifier; use idb_rs::til::r#enum::EnumMembers; use idb_rs::til::{Basic, TILTypeInfo, TypeVariant, TyperefType, TyperefValue}; use std::collections::{HashMap, HashSet}; @@ -64,6 +66,18 @@ pub struct TILTranslator { pub address_size: usize, /// Default size of enumerations. pub enum_size: usize, + /// Default size of a `bool` in bytes. + pub bool_size: usize, + /// Default size of a `short` in bytes. + pub short_size: usize, + /// Default size of an `int` in bytes. + pub int_size: usize, + /// Default size of a `long` in bytes. + pub long_size: usize, + /// Default size of a `long long` in bytes. + pub long_long_size: usize, + /// Default size of a `long double` in bytes. + pub long_double_size: usize, /// Reference types, for use with typedefs. /// /// This is necessary because ordinals do not have names and can't be made into a [`NamedTypeReference`]. @@ -81,6 +95,10 @@ pub struct TILTranslator { pub cdecl_calling_convention: Option>, pub stdcall_calling_convention: Option>, pub fastcall_calling_convention: Option>, + /// Architecture used to resolve register names into register ids for value locations. + pub arch: Option, + /// Processor register names indexed by IDA register number (see [`crate::parse::ID0Info`]). + pub register_names: Vec, } impl TILTranslator { @@ -88,6 +106,12 @@ impl TILTranslator { Self { address_size, enum_size: address_size / 2, + bool_size: 1, + short_size: 2, + int_size: 4, + long_size: 4, + long_long_size: 8, + long_double_size: 8, reference_types_by_ord: HashMap::new(), reference_types_by_name: HashMap::new(), used_types: Rc::new(Mutex::new(HashSet::new())), @@ -95,6 +119,8 @@ impl TILTranslator { cdecl_calling_convention: None, stdcall_calling_convention: None, fastcall_calling_convention: None, + arch: None, + register_names: Vec::new(), } } @@ -102,6 +128,12 @@ impl TILTranslator { Self { address_size: platform.address_size(), enum_size: platform.arch().default_integer_size(), + bool_size: 1, + short_size: 2, + int_size: 4, + long_size: 4, + long_long_size: 8, + long_double_size: 8, reference_types_by_ord: HashMap::new(), reference_types_by_name: HashMap::new(), used_types: Rc::new(Mutex::new(HashSet::new())), @@ -109,6 +141,8 @@ impl TILTranslator { cdecl_calling_convention: platform.get_cdecl_calling_convention(), stdcall_calling_convention: platform.get_stdcall_calling_convention(), fastcall_calling_convention: platform.get_fastcall_calling_convention(), + arch: Some(platform.arch()), + register_names: Vec::new(), } } @@ -116,6 +150,12 @@ impl TILTranslator { Self { address_size: arch.address_size(), enum_size: arch.default_integer_size(), + bool_size: 1, + short_size: 2, + int_size: 4, + long_size: 4, + long_long_size: 8, + long_double_size: 8, reference_types_by_ord: HashMap::new(), reference_types_by_name: HashMap::new(), used_types: Rc::new(Mutex::new(HashSet::new())), @@ -123,21 +163,45 @@ impl TILTranslator { cdecl_calling_convention: arch.get_cdecl_calling_convention(), stdcall_calling_convention: arch.get_stdcall_calling_convention(), fastcall_calling_convention: arch.get_fastcall_calling_convention(), + arch: Some(*arch), + register_names: Vec::new(), } } + /// Provide the processor register names (indexed by IDA register number) used to resolve + /// register-based argument and return value locations. + pub fn with_register_names(mut self, register_names: Vec) -> Self { + self.register_names = register_names; + self + } + pub fn with_til_info(mut self, til: &idb_rs::til::section::TILSection) -> Self { if let Some(size_enum) = til.header.size_enum { self.enum_size = size_enum.get() as usize; } + // The TIL header carries the C basic-type sizing for the compiler the IDB was built + // for; prefer it over our architecture-derived defaults so types translate with the + // same widths IDA used. + self.bool_size = til.header.size_bool.get() as usize; + self.int_size = til.header.size_int.get() as usize; + self.short_size = til.sizeof_short().get() as usize; + self.long_size = til.sizeof_long().get() as usize; + self.long_long_size = til.sizeof_long_long().get() as usize; + if let Some(size_long_double) = til.header.size_long_double { + self.long_double_size = size_long_double.get() as usize; + } + // Add referencable types so that type def lookups can occur. self.reference_types_by_ord.reserve(til.types.len()); for (_idx, ty) in til.types.iter().enumerate() { self.add_referenced_type_info(ty); } - // TODO: Handle address (pointer) size information? + // NOTE: The TIL exposes `addr_size()`, but it is derived from the compiler model and + // defaults to 4 when that is absent, which would silently truncate pointers on a + // 64-bit binary. The platform/architecture `address_size` we were constructed with is + // authoritative for pointer widths, so we intentionally keep it. self } @@ -192,25 +256,37 @@ impl TILTranslator { pub fn build_basic_ty(&self, basic_ty: &idb_rs::til::Basic) -> anyhow::Result { use idb_rs::til::Basic; - // TODO: Grab the sizing information of these types from the TIL instead of hardcoding. + // The variable-width C types (short/int/long/...) are sized from the TIL header when + // one is available (see `with_til_info`), falling back to the standard C ABI defaults. match basic_ty { Basic::Void => Ok(TypeBuilder::void()), + // An unknown type of unspecified size has no integer representation; treat it as void. + Basic::Unknown { bytes: 0 } => Ok(TypeBuilder::void()), Basic::Unknown { bytes } => { // In the samples provided it appears that unknown can be used to represent a byte, // so we are going to be liberal and allow unknown basic types to be treated as a sized int. Ok(TypeBuilder::int(*bytes as usize, false)) } - Basic::Bool => Ok(TypeBuilder::bool()), - Basic::BoolSized { .. } => { - // TODO: This needs to be resized, if that cannot be done, make a NTR to an int named BOOL? - Ok(TypeBuilder::bool()) - } + Basic::Bool if self.bool_size == 1 => Ok(TypeBuilder::bool()), + // Binary Ninja's `bool` is always a single byte; a wider `bool` is represented as an + // unsigned integer of the requested size. + Basic::Bool => Ok(TypeBuilder::int(self.bool_size, false)), + Basic::BoolSized { bytes } if bytes.get() == 1 => Ok(TypeBuilder::bool()), + Basic::BoolSized { bytes } => Ok(TypeBuilder::int(bytes.get() as usize, false)), Basic::Char => Ok(TypeBuilder::char()), Basic::SegReg => Err(anyhow::anyhow!("SegReg is not supported")), - Basic::Short { is_signed } => Ok(TypeBuilder::int(2, is_signed.unwrap_or(true))), - Basic::Long { is_signed } => Ok(TypeBuilder::int(4, is_signed.unwrap_or(true))), - Basic::LongLong { is_signed } => Ok(TypeBuilder::int(8, is_signed.unwrap_or(true))), - Basic::Int { is_signed } => Ok(TypeBuilder::int(4, is_signed.unwrap_or(true))), + Basic::Short { is_signed } => { + Ok(TypeBuilder::int(self.short_size, is_signed.unwrap_or(true))) + } + Basic::Long { is_signed } => { + Ok(TypeBuilder::int(self.long_size, is_signed.unwrap_or(true))) + } + Basic::LongLong { is_signed } => { + Ok(TypeBuilder::int(self.long_long_size, is_signed.unwrap_or(true))) + } + Basic::Int { is_signed } => { + Ok(TypeBuilder::int(self.int_size, is_signed.unwrap_or(true))) + } Basic::IntSized { bytes, is_signed } => { let bytes: u8 = u8::try_from(*bytes).unwrap_or(4); Ok(TypeBuilder::int(bytes as usize, is_signed.unwrap_or(true))) @@ -219,7 +295,7 @@ impl TILTranslator { let bytes: u8 = u8::try_from(*bytes).unwrap_or(4); Ok(TypeBuilder::float(bytes as usize)) } - Basic::LongDouble => Ok(TypeBuilder::float(8)), + Basic::LongDouble => Ok(TypeBuilder::float(self.long_double_size)), } } @@ -227,11 +303,21 @@ impl TILTranslator { &self, pointer_ty: &idb_rs::til::pointer::Pointer, ) -> anyhow::Result { - // TODO: Consult pointer_ty.closure (is this how we can get based pointers?) + // A `__ptr32` / `__ptr64` modifier overrides the platform address size for this pointer + // (e.g. a 32-bit pointer embedded in an otherwise 64-bit binary). + // + // NOTE: `closure` (based pointers) and `shifted` pointers have no direct Binary Ninja + // representation, so the pointee type and width are preserved but those attributes are + // intentionally dropped. + let pointer_width = match pointer_ty.modifier { + Some(PointerModifier::Ptr32) => 4, + Some(PointerModifier::Ptr64) => 8, + Some(PointerModifier::Restricted) | None => self.address_size, + }; let inner_ty = self.translate_type_info(&pointer_ty.typ)?; Ok(TypeBuilder::pointer_of_width( &inner_ty, - self.address_size, + pointer_width, // NOTE: Set later in `translate_type_info`. false, // NOTE: Set later in `translate_type_info`. @@ -244,10 +330,16 @@ impl TILTranslator { &self, function_ty: &idb_rs::til::function::Function, ) -> anyhow::Result { - // TODO: Once branch `test_call_layout` lands use function_ty.retloc to recover return location. let return_ty = self.translate_type_info(&function_ty.ret)?; + // Recover the explicit return-value location (e.g. a non-default return register) when the + // database records one; otherwise the calling convention derives it. + let return_value = self.build_return_value(&return_ty, function_ty.retloc.as_ref()); let params: Vec = self.build_function_params(&function_ty.args)?; - let has_variable_args = false; + // An ellipsis calling convention is IDA's marker for a variadic function. + let has_variable_args = matches!( + function_ty.calling_convention, + Some(CallingConvention::Ellipsis) + ); let stack_adjust = Conf::new(0, 0); let builder = match function_ty.calling_convention { @@ -256,7 +348,7 @@ impl TILTranslator { { let cc = self.cdecl_calling_convention.clone().unwrap(); TypeBuilder::function_with_opts( - &return_ty, + return_value, ¶ms, has_variable_args, cc, @@ -266,7 +358,7 @@ impl TILTranslator { Some(CallingConvention::Stdcall) if self.stdcall_calling_convention.is_some() => { let cc = self.stdcall_calling_convention.clone().unwrap(); TypeBuilder::function_with_opts( - &return_ty, + return_value, ¶ms, has_variable_args, cc, @@ -276,7 +368,7 @@ impl TILTranslator { Some(CallingConvention::Fastcall) if self.fastcall_calling_convention.is_some() => { let cc = self.fastcall_calling_convention.clone().unwrap(); TypeBuilder::function_with_opts( - &return_ty, + return_value, ¶ms, has_variable_args, cc, @@ -289,6 +381,27 @@ impl TILTranslator { Ok(builder) } + /// Build a [`ReturnValue`] for a function, attaching an explicit storage location when the + /// database records a return location that we can resolve to registers/stack. + fn build_return_value(&self, return_ty: &Ref, retloc: Option<&ArgLoc>) -> ReturnValue { + let location = self + .value_location_components(retloc) + .map(|(components, indirect)| { + Conf::new( + ValueLocation { + components, + indirect, + returned_pointer: None, + }, + MAX_CONFIDENCE, + ) + }); + ReturnValue { + ty: Conf::new(return_ty.clone(), MAX_CONFIDENCE), + location, + } + } + pub fn build_function_params( &self, args: &[idb_rs::til::function::FunctionArg], @@ -301,8 +414,9 @@ impl TILTranslator { .clone() .map(|s| s.to_string()) .unwrap_or_else(|| format!("arg{}", idx)); + let location = self.value_location_from_arg_loc(arg.loc.as_ref()); self.translate_type_info(&arg.ty) - .map(|ty| FunctionParameter::new(ty, arg_name, None)) + .map(|ty| FunctionParameter::new(ty, arg_name, location)) }) .collect() } @@ -403,8 +517,10 @@ impl TILTranslator { builder.insert_member(member, false); } + // `extra_padding` records trailing padding bytes IDA stores for fixed-size UDTs; add it + // to the member-derived width so the structure occupies its true storage size. + let width = width + udt_ty.extra_padding.unwrap_or(0); builder.width(width); - // TODO: Handle udt_ty.extra_padding (is that tail padding?) Ok(TypeBuilder::structure(&builder.finalize())) } @@ -478,7 +594,10 @@ impl TILTranslator { } EnumMembers::Groups(groups) => { for (idx, group) in groups.iter().enumerate() { - // TODO: How does this grouping actually impact the enum besides the name? + // IDA's grouped (bitmask) enums partition the members into named bitmask + // groups. Binary Ninja enumerations are flat and have no equivalent grouping + // concept, so we flatten the groups, qualifying each member with its group + // name to keep the names unique and preserve the original grouping intent. let group_name = group .field .name @@ -526,26 +645,31 @@ impl TILTranslator { /// Computes the width of a type, in bytes. pub fn width_of_type(&self, ty: &idb_rs::til::Type) -> anyhow::Result { match &ty.type_variant { + // Keep these widths in lockstep with `build_basic_ty` so that NTR placeholder widths + // match the types they stand in for. TypeVariant::Basic(basic) => match basic { Basic::Void => Ok(0), Basic::Unknown { bytes } => Ok(*bytes as usize), - Basic::Bool => Ok(1), + Basic::Bool => Ok(self.bool_size), Basic::BoolSized { bytes } => Ok(bytes.get() as usize), Basic::Char => Ok(1), Basic::SegReg => Ok(8), - Basic::Short { .. } => Ok(2), - Basic::Long { .. } => Ok(4), - Basic::LongLong { .. } => Ok(8), - Basic::Int { .. } => Ok(4), + Basic::Short { .. } => Ok(self.short_size), + Basic::Long { .. } => Ok(self.long_size), + Basic::LongLong { .. } => Ok(self.long_long_size), + Basic::Int { .. } => Ok(self.int_size), Basic::IntSized { bytes, .. } => Ok(bytes.get() as usize), Basic::Float { bytes } => Ok(bytes.get() as usize), - Basic::LongDouble => Ok(8), + Basic::LongDouble => Ok(self.long_double_size), }, TypeVariant::Pointer(_) => Ok(self.address_size), TypeVariant::Function(_) => Err(anyhow::anyhow!("Function types do not have a width")), TypeVariant::Array(arr) => { let elem_width = self.width_of_type(&arr.elem_type)?; - // TODO: A DST array is unsized or what? I think we should error IMO. + // A flexible array member (no element count) contributes no storage of its own; + // it only names the tail of the containing structure. Treat it as zero-width to + // mirror `build_array_ty`, rather than erroring, so structures ending in one can + // still be sized. let count = arr.nelem.map(|n| n.get()).unwrap_or(0); Ok(elem_width * count as usize) } @@ -560,24 +684,13 @@ impl TILTranslator { anyhow::anyhow!("Type reference has no width: {:?}", resolved_ty) }) } - TypeVariant::Struct(s) => { - let mut total_width = 0; - for member in &s.members { - total_width += self.width_of_type(&member.member_type)?; - } - // TODO: Handle alignment and bitfields. - Ok(total_width) - } - TypeVariant::Union(u) => { - // Size of the largest member + alignment - let mut max_width = 0; - for member in &u.members { - let member_width = self.width_of_type(&member.member_type)?; - max_width = max_width.max(member_width); - } - // TODO: Handle alignment - Ok(max_width) - } + // Build the structure/union through the same path used for real translation and read + // back its finalized width, so alignment padding, bitfield storage sharing and tail + // padding are all accounted for instead of approximated. This recurses, but C types + // can only nest by value finitely (self-reference is only possible through a pointer, + // which is sized by `address_size`), so it always terminates. + TypeVariant::Struct(s) => Ok(self.build_udt_ty(s, false)?.finalize().width() as usize), + TypeVariant::Union(u) => Ok(self.build_udt_ty(u, true)?.finalize().width() as usize), TypeVariant::Enum(e) => Ok(e .storage_size .map(|s| s.get() as usize) @@ -603,4 +716,67 @@ impl TILTranslator { _ => None, } } + + /// Translate an IDA argument/return storage location into a Binary Ninja value location. + /// + /// Returns [`ValueLocationSource::Default`] for locations we cannot represent (or whose + /// registers cannot be resolved), letting the calling convention derive the location. + fn value_location_from_arg_loc(&self, loc: Option<&ArgLoc>) -> ValueLocationSource { + match self.value_location_components(loc) { + Some((components, indirect)) => ValueLocationSource::Custom(ValueLocation { + components, + indirect, + returned_pointer: None, + }), + None => ValueLocationSource::Default, + } + } + + /// Resolve an IDA [`ArgLoc`] into Binary Ninja value-location components and whether the + /// location is indirect (the value lives in memory at the component). Returns `None` for + /// forms with no representation, or when a referenced register cannot be resolved. + fn value_location_components( + &self, + loc: Option<&ArgLoc>, + ) -> Option<(Vec, bool)> { + match loc? { + ArgLoc::Stack(offset) => Some((vec![stack_component(*offset as i64)], false)), + ArgLoc::Reg1(reg) => Some((vec![self.register_component(*reg, 0)?], false)), + ArgLoc::Reg2(regs) => { + // The low and high 16 bits each hold a register index; the value is split across + // the two registers. + let low = self.register_component(regs & 0xFFFF, 0)?; + let high = self.register_component((regs >> 16) & 0xFFFF, 0)?; + Some((vec![low, high], false)) + } + ArgLoc::RRel { reg, off } => { + // Register-relative: the value lives in memory at register + offset. + Some((vec![self.register_component(u32::from(*reg), *off as i64)?], true)) + } + // Distributed, static (global address) and none/custom forms have no direct mapping. + ArgLoc::Dist(_) | ArgLoc::Static(_) | ArgLoc::None => None, + } + } + + /// Build a value-location component for an IDA register index, resolving it through the + /// processor register names and the architecture. + fn register_component(&self, reg_index: u32, offset: i64) -> Option { + let arch = self.arch.as_ref()?; + let name = self.register_names.get(reg_index as usize)?; + let register = arch.register_by_name(name)?; + Some(ValueLocationComponent { + variable: Variable::from_register(register), + offset, + size: None, + }) + } +} + +/// Build a value-location component for a stack offset. +fn stack_component(offset: i64) -> ValueLocationComponent { + ValueLocationComponent { + variable: Variable::from_stack_offset(offset), + offset: 0, + size: None, + } } diff --git a/plugins/idb_import/src/types.rs b/plugins/idb_import/src/types.rs deleted file mode 100644 index cbd37f66c8..0000000000 --- a/plugins/idb_import/src/types.rs +++ /dev/null @@ -1,770 +0,0 @@ -use std::collections::HashMap; -use std::num::{NonZeroU16, NonZeroU8}; - -use anyhow::{anyhow, Result}; -use binaryninja::architecture::{Architecture, ArchitectureExt, CoreArchitecture}; -use binaryninja::binary_view::{BinaryView}; -use binaryninja::calling_convention::CoreCallingConvention; -use binaryninja::confidence::Conf; -use binaryninja::rc::Ref; -use binaryninja::types::{ - EnumerationBuilder, FunctionParameter, MemberAccess, MemberScope, StructureBuilder, - StructureType, Type, -}; -use idb_rs::til::function::CallingConvention as TILCallingConvention; -use idb_rs::til::pointer::Pointer as TILPointer; -use idb_rs::til::r#enum::EnumMembers; -use idb_rs::til::{ - array::Array as TILArray, function::Function as TILFunction, r#enum::Enum as TILEnum, - r#struct::Struct as TILStruct, r#struct::StructMember as TILStructMember, - r#union::Union as TILUnion, section::TILSection, TILTypeInfo, Type as TILType, - TypeVariant as TILTypeVariant, -}; -use idb_rs::IDBString; - -#[derive(Debug, Clone)] -pub enum BnTypeError { - // TODO delete this and make this verification during the TIL/IDB parsing, translating the ordinal - // into a kind of type_idx - OrdinalNotFound(u32), - NameNotFound(String), - - Typedef(Box), - Function(BnTypeFunctionError), - Array(Box), - Pointer(Box), - /// Error for members - Struct(Vec<(usize, BnTypeError)>), - Union(Vec<(usize, BnTypeError)>), - - // can't create function due to missing CallingConvention - MissingArchCC, -} - -#[derive(Default, Debug, Clone)] -pub struct BnTypeFunctionError { - pub ret: Option>, - pub args: Vec<(usize, BnTypeError)>, -} - -impl std::fmt::Display for BnTypeError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - BnTypeError::OrdinalNotFound(i) => write!(f, "Reference to non existing Ordinal {i}"), - BnTypeError::NameNotFound(name) => write!(f, "Reference to non existing name {name}"), - BnTypeError::Typedef(error) => write!(f, "Typedef: {error}"), - BnTypeError::Function(BnTypeFunctionError { ret, args }) => { - if let Some(error) = ret { - write!(f, "Function return: {error} ")?; - } - for (i, error) in args { - write!(f, "Function argument {i}: {error} ")?; - } - Ok(()) - } - BnTypeError::Array(error) => write!(f, "Array: {error}"), - BnTypeError::Struct(errors) => { - for (i, error) in errors { - write!(f, "Struct Member {i}: {error} ")?; - } - Ok(()) - } - BnTypeError::Union(errors) => { - for (i, error) in errors { - write!(f, "Union Member {i}: {error} ")?; - } - Ok(()) - } - BnTypeError::Pointer(error) => write!(f, "Pointer: {error}"), - BnTypeError::MissingArchCC => write!(f, "Arch is missing a default CallingConvention"), - } - } -} - -#[derive(Default)] -pub enum TranslateTypeResult { - #[default] - NotYet, - /// Unable to solve type, there is no point in trying again - Error(BnTypeError), - /// a type that is not final, but equivalent to the final type, if error, there is no - /// point in trying again - PartiallyTranslated(Ref, Option), - Translated(Ref), -} - -impl From, BnTypeError>> for TranslateTypeResult { - fn from(value: Result, BnTypeError>) -> Self { - match value { - Ok(ty) => Self::Translated(ty), - Err(error) => Self::Error(error), - } - } -} - -pub struct TranslatesIDBType<'a> { - // sanitized name from IDB - pub name: IDBString, - // the result, if converted - pub ty: TranslateTypeResult, - pub og_ty: &'a TILTypeInfo, - pub is_symbol: bool, -} - -pub struct TranslateIDBTypes<'a, F: Fn(usize, usize) -> Result<(), ()>> { - pub arch: CoreArchitecture, - pub progress: F, - pub til: &'a TILSection, - // note it's mapped 1:1 with the same index from til types.chain(symbols) - pub types: Vec>, -} - -impl Result<(), ()>> TranslateIDBTypes<'_, F> { - fn find_typedef_by_name(&self, name: &[u8]) -> Option { - if name.is_empty() { - // TODO this is my assumption, maybe an empty names Typedef means something else. - return Some(TranslateTypeResult::Translated(Type::void())); - } - - // check for types that ar usually not defined directly - match name { - b"Unkown" | b"uint8_t" => Some(TranslateTypeResult::Translated(Type::int(1, false))), - b"IUnkown" | b"int8_t" => Some(TranslateTypeResult::Translated(Type::int(1, true))), - b"SHORT" | b"USHORT" => Some(TranslateTypeResult::Translated(Type::int( - self.til.sizeof_short().get().into(), - name == b"SHORT", - ))), - b"int16_t" => Some(TranslateTypeResult::Translated(Type::int(2, true))), - b"uint16_t" => Some(TranslateTypeResult::Translated(Type::int(2, false))), - b"int32_t" => Some(TranslateTypeResult::Translated(Type::int(4, true))), - b"uint32_t" => Some(TranslateTypeResult::Translated(Type::int(4, false))), - b"int64_t" => Some(TranslateTypeResult::Translated(Type::int(8, true))), - b"uint64_t" => Some(TranslateTypeResult::Translated(Type::int(8, false))), - b"int128_t" => Some(TranslateTypeResult::Translated(Type::int(16, true))), - b"uint128_t" => Some(TranslateTypeResult::Translated(Type::int(16, false))), - _ => None, - } - } - - fn find_typedef(&self, ty: &TranslatesIDBType) -> TranslateTypeResult { - // only return a typedef, if it's solved, at least partially - match &ty.ty { - TranslateTypeResult::NotYet => TranslateTypeResult::NotYet, - TranslateTypeResult::Error(error) => { - TranslateTypeResult::Error(BnTypeError::Typedef(Box::new(error.to_owned()))) - } - TranslateTypeResult::PartiallyTranslated(og_ty, error) => { - TranslateTypeResult::PartiallyTranslated( - Type::named_type_from_type(ty.name.as_utf8_lossy(), og_ty), - error - .as_ref() - .map(|x| BnTypeError::Typedef(Box::new(x.clone()))) - .clone(), - ) - } - TranslateTypeResult::Translated(og_ty) => TranslateTypeResult::Translated( - Type::named_type_from_type(ty.name.as_utf8_lossy(), og_ty), - ), - } - } - - fn translate_pointer(&self, ty: &TILPointer) -> TranslateTypeResult { - match self.translate_type(&ty.typ) { - TranslateTypeResult::Translated(trans) => { - TranslateTypeResult::Translated(self.inner_translate_pointer(ty, &trans)) - } - TranslateTypeResult::PartiallyTranslated(trans, error) => { - TranslateTypeResult::PartiallyTranslated( - self.inner_translate_pointer(ty, &trans), - error.map(|e| BnTypeError::Pointer(Box::new(e))), - ) - } - TranslateTypeResult::Error(error) => TranslateTypeResult::PartiallyTranslated( - self.inner_translate_pointer(ty, &Type::void()), - Some(error), - ), - TranslateTypeResult::NotYet => TranslateTypeResult::PartiallyTranslated( - self.inner_translate_pointer(ty, &Type::void()), - None, - ), - } - } - - fn inner_translate_pointer(&self, ty: &TILPointer, trans: &Type) -> Ref { - // TODO handle ty.shifted - // TODO handle ty.modifier - Type::pointer_with_options(&self.arch, trans, ty.typ.is_const, ty.typ.is_volatile, None) - } - - fn translate_function(&self, fun: &TILFunction) -> TranslateTypeResult { - let mut is_partial = false; - let mut errors: BnTypeFunctionError = Default::default(); - // funtions are always 0 len, so it's translated or partial(void) - let return_ty = match self.translate_type(&fun.ret) { - TranslateTypeResult::Translated(trans) => trans, - TranslateTypeResult::PartiallyTranslated(trans, error) => { - is_partial |= true; - errors.ret = error.map(Box::new); - trans - } - TranslateTypeResult::Error(error) => { - errors.ret = Some(Box::new(error)); - return TranslateTypeResult::PartiallyTranslated( - Type::void(), - Some(BnTypeError::Function(errors)), - ); - } - TranslateTypeResult::NotYet => { - return TranslateTypeResult::PartiallyTranslated(Type::void(), None) - } - }; - let mut partial_error_args = vec![]; - let mut bn_args = Vec::with_capacity(fun.args.len()); - for (i, fun_arg) in fun.args.iter().enumerate() { - let arg = match self.translate_type(&fun_arg.ty) { - TranslateTypeResult::Translated(trans) => trans, - TranslateTypeResult::PartiallyTranslated(trans, error) => { - is_partial = true; - if let Some(error) = error { - errors.args.push((i, error)); - } - trans - } - TranslateTypeResult::NotYet => { - return TranslateTypeResult::PartiallyTranslated(Type::void(), None) - } - TranslateTypeResult::Error(error) => { - partial_error_args.push((i, error)); - return TranslateTypeResult::PartiallyTranslated( - Type::void(), - Some(BnTypeError::Function(errors)), - ); - } - }; - // TODO create location from `arg_loc`? - let loc = None; - let name = fun_arg - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("arg_{i}")); - bn_args.push(FunctionParameter::new(arg, name, loc)); - } - - let var_args = matches!(fun.calling_convention, Some(TILCallingConvention::Ellipsis)); - let cc = fun - .calling_convention - .and_then(|cc| convert_cc(&self.arch, cc)) - .or_else(|| self.arch.get_default_calling_convention()) - .or_else(|| { - self.arch - .calling_conventions() - .iter() - .next() - .map(|x| x.clone()) - }); - let Some(cc) = cc else { - return TranslateTypeResult::Error(BnTypeError::MissingArchCC); - }; - let ty = Type::function_with_opts( - &return_ty, - &bn_args, - var_args, - cc, - Conf::new(0, binaryninja::confidence::MIN_CONFIDENCE), - ); - if is_partial { - let error = (errors.ret.is_some() || !errors.args.is_empty()) - .then_some(BnTypeError::Function(errors)); - TranslateTypeResult::PartiallyTranslated(ty, error) - } else { - assert!(errors.ret.is_none() && errors.args.is_empty()); - TranslateTypeResult::Translated(ty) - } - } - - // TODO can binja handle 0 sized array? There is a better translation? - fn translate_array(&self, array: &TILArray) -> TranslateTypeResult { - match self.translate_type(&array.elem_type) { - TranslateTypeResult::NotYet => TranslateTypeResult::NotYet, - TranslateTypeResult::Translated(ty) => TranslateTypeResult::Translated(Type::array( - &ty, - array.nelem.map(NonZeroU16::get).unwrap_or(0).into(), - )), - TranslateTypeResult::PartiallyTranslated(ty, error) => { - TranslateTypeResult::PartiallyTranslated( - Type::array(&ty, array.nelem.map(NonZeroU16::get).unwrap_or(0).into()), - error.map(Box::new).map(BnTypeError::Array), - ) - } - TranslateTypeResult::Error(error) => { - TranslateTypeResult::Error(BnTypeError::Array(Box::new(error))) - } - } - } - - fn condensate_bitfields_from_struct( - &self, - offset: usize, - members_slice: &[TILStructMember], - struct_builder: &mut StructureBuilder, - ) { - if members_slice.is_empty() { - unreachable!() - } - let mut members = members_slice - .iter() - .map(|ty| match &ty.member_type.type_variant { - TILTypeVariant::Bitfield(b) => b, - _ => unreachable!(), - }) - .enumerate(); - let (_, first_field) = members.next().unwrap(); - let mut current_field_bytes = first_field.nbytes; - let mut current_field_bits: u32 = first_field.width.into(); - let mut start_idx = 0; - - let mut create_field = |start_idx, i, bytes| { - let name = if start_idx == i - 1 { - let member: &TILStructMember = &members_slice[i - 1]; - member - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("bitfield_{}", offset + start_idx)) - } else { - format!("bitfield_{}_{}", offset + start_idx, offset + (i - 1)) - }; - let field = field_from_bytes(bytes); - struct_builder.append(&field, &name, MemberAccess::NoAccess, MemberScope::NoScope); - }; - - for (i, member) in members { - // starting a new field - let max_bits = u32::from(current_field_bytes.get()) * 8; - // this bitfield start a a new field, or can't contain other bitfields - // finish the previous and start a new - if current_field_bytes != member.nbytes - || max_bits < current_field_bits + u32::from(member.width) - { - create_field(start_idx, i, current_field_bytes.get().into()); - current_field_bytes = member.nbytes; - current_field_bits = 0; - start_idx = i; - } - - // just add the current bitfield into the field - current_field_bits += u32::from(member.width); - } - - if current_field_bits != 0 { - create_field( - start_idx, - members_slice.len(), - current_field_bytes.get().into(), - ); - } - } - - fn translate_struct(&self, ty_struct: &TILStruct) -> TranslateTypeResult { - if ty_struct.members.is_empty() { - // binary ninja crashes if you create an empty struct, because it divide by 0 - return TranslateTypeResult::Translated(Type::void()); - } - let mut is_partial = false; - let mut structure = StructureBuilder::new(); - if let Some(align) = ty_struct.alignment { - structure.alignment(align.get().into()); - } - structure.packed(ty_struct.is_unaligned && ty_struct.is_uknown_8); - - let mut errors = vec![]; - let mut first_bitfield_seq = None; - for (i, member) in ty_struct.members.iter().enumerate() { - match (&member.member_type.type_variant, first_bitfield_seq) { - // accumulate the bitfield to be condensated - (TILTypeVariant::Bitfield(_bit), None) => { - first_bitfield_seq = Some(i); - continue; - } - (TILTypeVariant::Bitfield(_bit), Some(_)) => continue, - - // condensate the bitfields into byte-wide fields - (_, Some(start_idx)) => { - first_bitfield_seq = None; - let members_bitrange = &ty_struct.members[start_idx..i]; - self.condensate_bitfields_from_struct( - start_idx, - members_bitrange, - &mut structure, - ); - } - - (_, None) => {} - } - - let mem = match self.translate_type(&member.member_type) { - TranslateTypeResult::Translated(ty) => ty, - TranslateTypeResult::PartiallyTranslated(partial_ty, error) => { - is_partial |= true; - if let Some(error) = error { - errors.push((i, error)); - } - partial_ty - } - TranslateTypeResult::NotYet => return TranslateTypeResult::NotYet, - TranslateTypeResult::Error(error) => { - errors.push((i, error)); - return TranslateTypeResult::Error(BnTypeError::Struct(errors)); - } - }; - //TODO handle member.alignment - let name = member - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("member_{i}")); - structure.append(&mem, &name, MemberAccess::NoAccess, MemberScope::NoScope); - } - if let Some(start_idx) = first_bitfield_seq { - let members_bitrange = &ty_struct.members[start_idx..]; - self.condensate_bitfields_from_struct(start_idx, members_bitrange, &mut structure); - } - let bn_ty = Type::structure(&structure.finalize()); - if is_partial { - let partial_error = (!errors.is_empty()).then_some(BnTypeError::Struct(errors)); - TranslateTypeResult::PartiallyTranslated(bn_ty, partial_error) - } else { - assert!(errors.is_empty()); - TranslateTypeResult::Translated(bn_ty) - } - } - - fn translate_union(&self, ty_union: &TILUnion) -> TranslateTypeResult { - let mut is_partial = false; - let mut structure = StructureBuilder::new(); - structure.structure_type(StructureType::UnionStructureType); - let mut errors = vec![]; - for (i, member) in ty_union.members.iter().enumerate() { - // bitfields can be translated into complete fields - let mem = match &member.ty.type_variant { - TILTypeVariant::Bitfield(field) => field_from_bytes(field.nbytes.get().into()), - _ => match self.translate_type(&member.ty) { - TranslateTypeResult::Translated(ty) => ty, - TranslateTypeResult::Error(error) => { - errors.push((i, error)); - return TranslateTypeResult::Error(BnTypeError::Union(errors)); - } - TranslateTypeResult::NotYet => return TranslateTypeResult::NotYet, - TranslateTypeResult::PartiallyTranslated(partial, error) => { - is_partial |= true; - if let Some(error) = error { - errors.push((i, error)); - } - partial - } - }, - }; - - let name = member - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("member_{i}")); - structure.append(&mem, &name, MemberAccess::NoAccess, MemberScope::NoScope); - } - let str_ref = structure.finalize(); - - let bn_ty = Type::structure(&str_ref); - if is_partial { - let partial_error = (!errors.is_empty()).then_some(BnTypeError::Struct(errors)); - TranslateTypeResult::PartiallyTranslated(bn_ty, partial_error) - } else { - assert!(errors.is_empty()); - TranslateTypeResult::Translated(bn_ty) - } - } - - fn translate_enum(&self, ty_enum: &TILEnum) -> Ref { - let mut eb = EnumerationBuilder::new(); - match &ty_enum.members { - EnumMembers::Regular(enum_members) => { - for (i, member) in enum_members.iter().enumerate() { - let name = member - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("member_{i}")); - eb.insert(&name, member.value); - } - } - EnumMembers::Groups(enum_groups) => { - for group in enum_groups { - // TODO implement groups to the importer - for (i, member) in group.sub_fields.iter().enumerate() { - let name = member - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("member_{i}")); - eb.insert(&name, member.value); - } - } - } - } - - Type::enumeration( - &eb.finalize(), - // TODO: This looks bad, look at the comment in [`Type::width`]. - // TODO check the default size of enum - ty_enum - .storage_size - .map(|x| x.into()) - .or(self.til.header.size_enum.map(|x| x.into())) - .unwrap_or(4.try_into().unwrap()), - ty_enum.is_signed, - ) - } - - fn translate_basic(&self, mdata: &idb_rs::til::Basic) -> Ref { - match *mdata { - idb_rs::til::Basic::Void => Type::void(), - idb_rs::til::Basic::Unknown { bytes: 0 } => Type::void(), - idb_rs::til::Basic::Unknown { bytes } => Type::array(&Type::char(), bytes.into()), - idb_rs::til::Basic::Bool if self.til.header.size_bool.get() == 1 => Type::bool(), - idb_rs::til::Basic::BoolSized { bytes } if bytes.get() == 1 => Type::bool(), - // NOTE Binja don't have any representation for bool other then the default - idb_rs::til::Basic::BoolSized { bytes } => Type::int(bytes.get().into(), false), - idb_rs::til::Basic::Bool /*if self.til.header.size_bool.get() != 1*/ => Type::int(self.til.header.size_bool.get().into(), false), - idb_rs::til::Basic::Char => Type::char(), - // TODO what exacly is Segment Register? - idb_rs::til::Basic::SegReg => Type::char(), - idb_rs::til::Basic::IntSized { bytes, is_signed } => { - // default into signed - let is_signed = is_signed.as_ref().copied().unwrap_or(true); - Type::int(bytes.get().into(), is_signed) - } - idb_rs::til::Basic::Int { is_signed } => { - let is_signed = is_signed.as_ref().copied().unwrap_or(true); - Type::int(self.til.header.size_int.get().into(), is_signed) - }, - idb_rs::til::Basic::Short { is_signed } => { - let is_signed = is_signed.as_ref().copied().unwrap_or(true); - Type::int(self.til.sizeof_short().get().into(), is_signed) - }, - idb_rs::til::Basic::Long { is_signed } => { - let is_signed = is_signed.as_ref().copied().unwrap_or(true); - Type::int(self.til.sizeof_long().get().into(), is_signed) - }, - idb_rs::til::Basic::LongLong { is_signed } => { - let is_signed = is_signed.as_ref().copied().unwrap_or(true); - Type::int(self.til.sizeof_long_long().get().into(), is_signed) - }, - idb_rs::til::Basic::LongDouble => { - // TODO is size_long_double architecture dependent? - Type::float(self.til.header.size_long_double.map(NonZeroU8::get).unwrap_or(8).into()) - }, - idb_rs::til::Basic::Float { bytes } => Type::float(bytes.get().into()), - } - } - - pub fn translate_type(&self, ty: &TILType) -> TranslateTypeResult { - match &ty.type_variant { - // types that are always translatable - TILTypeVariant::Basic(meta) => { - TranslateTypeResult::Translated(self.translate_basic(meta)) - } - TILTypeVariant::Bitfield(bit) => { - TranslateTypeResult::Translated(field_from_bytes(bit.nbytes.get().into())) - } - TILTypeVariant::Enum(ty_enum) => { - TranslateTypeResult::Translated(self.translate_enum(ty_enum)) - } - TILTypeVariant::Typeref(typeref) => match &typeref.typeref_value { - idb_rs::til::TyperefValue::Ref(idx) => self.find_typedef(&self.types[*idx]), - idb_rs::til::TyperefValue::UnsolvedName(name) => self - .find_typedef_by_name(name.as_ref().map(|x| x.as_bytes()).unwrap_or(&[])) - .unwrap_or_else(|| { - TranslateTypeResult::Error(BnTypeError::NameNotFound( - name.as_ref() - .map(|x| x.as_utf8_lossy().to_string()) - .unwrap_or(String::new()), - )) - }), - idb_rs::til::TyperefValue::UnsolvedOrd(ord) => { - TranslateTypeResult::Error(BnTypeError::OrdinalNotFound(*ord)) - } - }, - - TILTypeVariant::Pointer(ty) => self.translate_pointer(ty), - TILTypeVariant::Function(fun) => self.translate_function(fun), - - // can only be partially solved if all fields are solved or partially solved - TILTypeVariant::Array(array) => self.translate_array(array), - TILTypeVariant::Struct(ty_struct) => self.translate_struct(ty_struct), - TILTypeVariant::Union(ty_union) => self.translate_union(ty_union), - } - } -} - -pub fn translate_ephemeral_type(debug_file: &BinaryView, ty: &TILType) -> TranslateTypeResult { - // in case we need to translate types - let header = idb_rs::til::ephemeral_til_header(); - let translator = TranslateIDBTypes { - arch: debug_file.default_arch().unwrap(/* TODO */), - progress: |_, _| Ok(()), - // TODO it's unclear what to do here - til: &TILSection { - header, - symbols: vec![], - types: vec![], - macros: None, - }, - types: vec![], - }; - - translator.translate_type(ty) -} - -pub fn translate_til_types( - arch: CoreArchitecture, - til: &TILSection, - progress: impl Fn(usize, usize) -> Result<(), ()>, -) -> Result>> { - let total = til.symbols.len() + til.types.len(); - let mut types = Vec::with_capacity(total); - let mut types_by_ord = HashMap::with_capacity(total); - let mut types_by_name = HashMap::with_capacity(total); - let all_types = til.types.iter().zip(core::iter::repeat(false)); - // TODO: it's unclear how the demangle symbols and types names/ord, for now only parse types - //let all_types = all_types.chain(til.symbols.iter().zip(core::iter::repeat(true))); - for (i, (ty, is_symbol)) in all_types.enumerate() { - // TODO sanitized the input - // TODO find out how the namespaces used by TIL works - types.push(TranslatesIDBType { - name: ty.name.clone(), - is_symbol, - og_ty: ty, - ty: TranslateTypeResult::NotYet, - }); - if ty.ordinal != 0 && !is_symbol { - let dup1 = types_by_ord.insert(ty.ordinal, i); - if let Some(old) = dup1 { - let old_type = &types[old]; - let new_type = types.last().unwrap(); - // TODO error? - panic!( - "dup ord {}:{} {}:\n{:?}\n{:?}", - old_type.is_symbol, - new_type.is_symbol, - ty.ordinal, - &old_type.og_ty, - &new_type.og_ty, - ) - } - } - if !ty.name.as_bytes().is_empty() { - let dup2 = types_by_name.insert(ty.name.as_bytes().to_owned(), i); - if let Some(old) = dup2 { - let old_type = &types[old]; - let new_type = types.last().unwrap(); - // TODO error? - panic!( - "dup name {}:{}: {}:\n{:?}\n{:?}", - old_type.is_symbol, - new_type.is_symbol, - &ty.name.as_utf8_lossy(), - &old_type.og_ty, - &new_type.og_ty, - ) - } - } - } - - let mut translator = TranslateIDBTypes { - arch, - progress, - til, - types, - }; - if (translator.progress)(0, total).is_err() { - return Err(anyhow!("IDB import aborted")); - } - - // solve types until there is nothing else that can be solved - loop { - // if something was solved, mark this variable as true - let mut did_something = false; - let mut num_translated = 0usize; - for i in 0..translator.types.len() { - match &translator.types[i].ty { - TranslateTypeResult::NotYet => { - let result = translator.translate_type(&translator.types[i].og_ty.tinfo); - did_something |= !matches!(&result, TranslateTypeResult::NotYet); - translator.types[i].ty = result; - } - TranslateTypeResult::PartiallyTranslated(_, None) => { - let result = translator.translate_type(&translator.types[i].og_ty.tinfo); - // don't allow regress, it can goes from PartiallyTranslated to any state other then NotYet - assert!(!matches!(&result, TranslateTypeResult::NotYet)); - did_something |= - !matches!(&result, TranslateTypeResult::PartiallyTranslated(_, None)); - translator.types[i].ty = result; - // don't need to add again they will be fixed on the loop below - } - // if an error was produced, there is no point in try again - TranslateTypeResult::PartiallyTranslated(_, Some(_)) => {} - // NOTE for now we are just accumulating errors, just try to translate the max number - // of types as possible - TranslateTypeResult::Error(_) => {} - // already translated, nothing do to here - TranslateTypeResult::Translated(_) => {} - } - - // count the number of finished types - if let TranslateTypeResult::Translated(_) = &translator.types[i].ty { - num_translated += 1 - } - } - - if !did_something { - // means we acomplilshed nothing during this loop, there is no point in trying again - break; - } - if (translator.progress)(num_translated, total).is_err() { - // error means the user aborted the progress - break; - } - } - - Ok(translator.types) -} - -fn field_from_bytes(bytes: i32) -> Ref { - match bytes { - 0 => unreachable!(), - num @ (1 | 2 | 4 | 8 | 16) => Type::int(num.try_into().unwrap(), false), - nelem => Type::array(&Type::char(), nelem.try_into().unwrap()), - } -} - -fn convert_cc( - arch: &A, - in_cc: TILCallingConvention, -) -> Option> { - match in_cc { - TILCallingConvention::Cdecl => arch.get_cdecl_calling_convention(), - TILCallingConvention::Ellipsis => arch.get_cdecl_calling_convention(), - TILCallingConvention::Stdcall => arch.get_stdcall_calling_convention(), - TILCallingConvention::Fastcall => arch.get_fastcall_calling_convention(), - TILCallingConvention::Voidarg - | TILCallingConvention::Pascal - | TILCallingConvention::Thiscall - | TILCallingConvention::Swift - | TILCallingConvention::Golang - | TILCallingConvention::Reserved3 - | TILCallingConvention::Uservars - | TILCallingConvention::Userpurge - | TILCallingConvention::Usercall => None, - } -}