From 60701b1dacc445cc73f572f725c908d9c23c57e0 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 01:32:19 -0500 Subject: [PATCH 01/16] [idb_import] Rebase BaseSection against lowest segment, not lowest section When an IDB only records a section-relative base address (loading_base of zero, so we fall back to min_ea as a BaseSection), the rebase delta was computed against the lowest mapped *section* in the view. That over-shifts every imported address for formats where the first section starts after the file header. IDA's min_ea is the image base and maps the file header too, whereas a Mach-O's first section (__text) begins after the header and load commands. Aligning against the lowest mapped segment (segments include the header region) yields the correct delta and stops imported addresses from being shifted by the header size. --- plugins/idb_import/src/mapper.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 530fef1f06..593fdb75cf 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -45,13 +45,18 @@ impl IDBMapper { BaseAddressInfo::None => bn_base_address, BaseAddressInfo::BaseSegment(start_addr) => bn_base_address.wrapping_sub(start_addr), BaseAddressInfo::BaseSection(section_addr) => { - let bn_section_addr = view - .sections() + // Align against the lowest mapped *segment*, not the lowest section. + // IDA's `min_ea` is the image base (it maps the file header too), but a + // Mach-O's first section (`__text`) starts after the header + load + // commands. Using the lowest section here over-shifts every imported + // address by the header size; segments include the header region. + let bn_segment_addr = view + .segments() .iter() - .min_by_key(|s| s.start()) - .map(|s| s.start()); - match bn_section_addr { - Some(bn_section) => bn_section.wrapping_sub(section_addr), + .map(|s| s.address_range().start) + .min(); + match bn_segment_addr { + Some(bn_segment) => bn_segment.wrapping_sub(section_addr), None => bn_base_address, } } From 125f3867c4fe06e428b477133ceec57eed17cdef Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 01:32:38 -0500 Subject: [PATCH 02/16] [idb_import] Remove orphaned types.rs translator The "IDB Import refactor" introduced translate.rs (TILTranslator) as the type translator used by the mapper, but left the previous translator in types.rs behind. The module was never re-declared in lib.rs, so it has not been compiled or referenced since the refactor. Removing it drops a large block of dead code along with its stale TODOs; TILTranslator is now the single source of truth for IDB->BN type translation. --- plugins/idb_import/src/types.rs | 770 -------------------------------- 1 file changed, 770 deletions(-) delete mode 100644 plugins/idb_import/src/types.rs diff --git a/plugins/idb_import/src/types.rs b/plugins/idb_import/src/types.rs deleted file mode 100644 index cbd37f66c8..0000000000 --- a/plugins/idb_import/src/types.rs +++ /dev/null @@ -1,770 +0,0 @@ -use std::collections::HashMap; -use std::num::{NonZeroU16, NonZeroU8}; - -use anyhow::{anyhow, Result}; -use binaryninja::architecture::{Architecture, ArchitectureExt, CoreArchitecture}; -use binaryninja::binary_view::{BinaryView}; -use binaryninja::calling_convention::CoreCallingConvention; -use binaryninja::confidence::Conf; -use binaryninja::rc::Ref; -use binaryninja::types::{ - EnumerationBuilder, FunctionParameter, MemberAccess, MemberScope, StructureBuilder, - StructureType, Type, -}; -use idb_rs::til::function::CallingConvention as TILCallingConvention; -use idb_rs::til::pointer::Pointer as TILPointer; -use idb_rs::til::r#enum::EnumMembers; -use idb_rs::til::{ - array::Array as TILArray, function::Function as TILFunction, r#enum::Enum as TILEnum, - r#struct::Struct as TILStruct, r#struct::StructMember as TILStructMember, - r#union::Union as TILUnion, section::TILSection, TILTypeInfo, Type as TILType, - TypeVariant as TILTypeVariant, -}; -use idb_rs::IDBString; - -#[derive(Debug, Clone)] -pub enum BnTypeError { - // TODO delete this and make this verification during the TIL/IDB parsing, translating the ordinal - // into a kind of type_idx - OrdinalNotFound(u32), - NameNotFound(String), - - Typedef(Box), - Function(BnTypeFunctionError), - Array(Box), - Pointer(Box), - /// Error for members - Struct(Vec<(usize, BnTypeError)>), - Union(Vec<(usize, BnTypeError)>), - - // can't create function due to missing CallingConvention - MissingArchCC, -} - -#[derive(Default, Debug, Clone)] -pub struct BnTypeFunctionError { - pub ret: Option>, - pub args: Vec<(usize, BnTypeError)>, -} - -impl std::fmt::Display for BnTypeError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - BnTypeError::OrdinalNotFound(i) => write!(f, "Reference to non existing Ordinal {i}"), - BnTypeError::NameNotFound(name) => write!(f, "Reference to non existing name {name}"), - BnTypeError::Typedef(error) => write!(f, "Typedef: {error}"), - BnTypeError::Function(BnTypeFunctionError { ret, args }) => { - if let Some(error) = ret { - write!(f, "Function return: {error} ")?; - } - for (i, error) in args { - write!(f, "Function argument {i}: {error} ")?; - } - Ok(()) - } - BnTypeError::Array(error) => write!(f, "Array: {error}"), - BnTypeError::Struct(errors) => { - for (i, error) in errors { - write!(f, "Struct Member {i}: {error} ")?; - } - Ok(()) - } - BnTypeError::Union(errors) => { - for (i, error) in errors { - write!(f, "Union Member {i}: {error} ")?; - } - Ok(()) - } - BnTypeError::Pointer(error) => write!(f, "Pointer: {error}"), - BnTypeError::MissingArchCC => write!(f, "Arch is missing a default CallingConvention"), - } - } -} - -#[derive(Default)] -pub enum TranslateTypeResult { - #[default] - NotYet, - /// Unable to solve type, there is no point in trying again - Error(BnTypeError), - /// a type that is not final, but equivalent to the final type, if error, there is no - /// point in trying again - PartiallyTranslated(Ref, Option), - Translated(Ref), -} - -impl From, BnTypeError>> for TranslateTypeResult { - fn from(value: Result, BnTypeError>) -> Self { - match value { - Ok(ty) => Self::Translated(ty), - Err(error) => Self::Error(error), - } - } -} - -pub struct TranslatesIDBType<'a> { - // sanitized name from IDB - pub name: IDBString, - // the result, if converted - pub ty: TranslateTypeResult, - pub og_ty: &'a TILTypeInfo, - pub is_symbol: bool, -} - -pub struct TranslateIDBTypes<'a, F: Fn(usize, usize) -> Result<(), ()>> { - pub arch: CoreArchitecture, - pub progress: F, - pub til: &'a TILSection, - // note it's mapped 1:1 with the same index from til types.chain(symbols) - pub types: Vec>, -} - -impl Result<(), ()>> TranslateIDBTypes<'_, F> { - fn find_typedef_by_name(&self, name: &[u8]) -> Option { - if name.is_empty() { - // TODO this is my assumption, maybe an empty names Typedef means something else. - return Some(TranslateTypeResult::Translated(Type::void())); - } - - // check for types that ar usually not defined directly - match name { - b"Unkown" | b"uint8_t" => Some(TranslateTypeResult::Translated(Type::int(1, false))), - b"IUnkown" | b"int8_t" => Some(TranslateTypeResult::Translated(Type::int(1, true))), - b"SHORT" | b"USHORT" => Some(TranslateTypeResult::Translated(Type::int( - self.til.sizeof_short().get().into(), - name == b"SHORT", - ))), - b"int16_t" => Some(TranslateTypeResult::Translated(Type::int(2, true))), - b"uint16_t" => Some(TranslateTypeResult::Translated(Type::int(2, false))), - b"int32_t" => Some(TranslateTypeResult::Translated(Type::int(4, true))), - b"uint32_t" => Some(TranslateTypeResult::Translated(Type::int(4, false))), - b"int64_t" => Some(TranslateTypeResult::Translated(Type::int(8, true))), - b"uint64_t" => Some(TranslateTypeResult::Translated(Type::int(8, false))), - b"int128_t" => Some(TranslateTypeResult::Translated(Type::int(16, true))), - b"uint128_t" => Some(TranslateTypeResult::Translated(Type::int(16, false))), - _ => None, - } - } - - fn find_typedef(&self, ty: &TranslatesIDBType) -> TranslateTypeResult { - // only return a typedef, if it's solved, at least partially - match &ty.ty { - TranslateTypeResult::NotYet => TranslateTypeResult::NotYet, - TranslateTypeResult::Error(error) => { - TranslateTypeResult::Error(BnTypeError::Typedef(Box::new(error.to_owned()))) - } - TranslateTypeResult::PartiallyTranslated(og_ty, error) => { - TranslateTypeResult::PartiallyTranslated( - Type::named_type_from_type(ty.name.as_utf8_lossy(), og_ty), - error - .as_ref() - .map(|x| BnTypeError::Typedef(Box::new(x.clone()))) - .clone(), - ) - } - TranslateTypeResult::Translated(og_ty) => TranslateTypeResult::Translated( - Type::named_type_from_type(ty.name.as_utf8_lossy(), og_ty), - ), - } - } - - fn translate_pointer(&self, ty: &TILPointer) -> TranslateTypeResult { - match self.translate_type(&ty.typ) { - TranslateTypeResult::Translated(trans) => { - TranslateTypeResult::Translated(self.inner_translate_pointer(ty, &trans)) - } - TranslateTypeResult::PartiallyTranslated(trans, error) => { - TranslateTypeResult::PartiallyTranslated( - self.inner_translate_pointer(ty, &trans), - error.map(|e| BnTypeError::Pointer(Box::new(e))), - ) - } - TranslateTypeResult::Error(error) => TranslateTypeResult::PartiallyTranslated( - self.inner_translate_pointer(ty, &Type::void()), - Some(error), - ), - TranslateTypeResult::NotYet => TranslateTypeResult::PartiallyTranslated( - self.inner_translate_pointer(ty, &Type::void()), - None, - ), - } - } - - fn inner_translate_pointer(&self, ty: &TILPointer, trans: &Type) -> Ref { - // TODO handle ty.shifted - // TODO handle ty.modifier - Type::pointer_with_options(&self.arch, trans, ty.typ.is_const, ty.typ.is_volatile, None) - } - - fn translate_function(&self, fun: &TILFunction) -> TranslateTypeResult { - let mut is_partial = false; - let mut errors: BnTypeFunctionError = Default::default(); - // funtions are always 0 len, so it's translated or partial(void) - let return_ty = match self.translate_type(&fun.ret) { - TranslateTypeResult::Translated(trans) => trans, - TranslateTypeResult::PartiallyTranslated(trans, error) => { - is_partial |= true; - errors.ret = error.map(Box::new); - trans - } - TranslateTypeResult::Error(error) => { - errors.ret = Some(Box::new(error)); - return TranslateTypeResult::PartiallyTranslated( - Type::void(), - Some(BnTypeError::Function(errors)), - ); - } - TranslateTypeResult::NotYet => { - return TranslateTypeResult::PartiallyTranslated(Type::void(), None) - } - }; - let mut partial_error_args = vec![]; - let mut bn_args = Vec::with_capacity(fun.args.len()); - for (i, fun_arg) in fun.args.iter().enumerate() { - let arg = match self.translate_type(&fun_arg.ty) { - TranslateTypeResult::Translated(trans) => trans, - TranslateTypeResult::PartiallyTranslated(trans, error) => { - is_partial = true; - if let Some(error) = error { - errors.args.push((i, error)); - } - trans - } - TranslateTypeResult::NotYet => { - return TranslateTypeResult::PartiallyTranslated(Type::void(), None) - } - TranslateTypeResult::Error(error) => { - partial_error_args.push((i, error)); - return TranslateTypeResult::PartiallyTranslated( - Type::void(), - Some(BnTypeError::Function(errors)), - ); - } - }; - // TODO create location from `arg_loc`? - let loc = None; - let name = fun_arg - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("arg_{i}")); - bn_args.push(FunctionParameter::new(arg, name, loc)); - } - - let var_args = matches!(fun.calling_convention, Some(TILCallingConvention::Ellipsis)); - let cc = fun - .calling_convention - .and_then(|cc| convert_cc(&self.arch, cc)) - .or_else(|| self.arch.get_default_calling_convention()) - .or_else(|| { - self.arch - .calling_conventions() - .iter() - .next() - .map(|x| x.clone()) - }); - let Some(cc) = cc else { - return TranslateTypeResult::Error(BnTypeError::MissingArchCC); - }; - let ty = Type::function_with_opts( - &return_ty, - &bn_args, - var_args, - cc, - Conf::new(0, binaryninja::confidence::MIN_CONFIDENCE), - ); - if is_partial { - let error = (errors.ret.is_some() || !errors.args.is_empty()) - .then_some(BnTypeError::Function(errors)); - TranslateTypeResult::PartiallyTranslated(ty, error) - } else { - assert!(errors.ret.is_none() && errors.args.is_empty()); - TranslateTypeResult::Translated(ty) - } - } - - // TODO can binja handle 0 sized array? There is a better translation? - fn translate_array(&self, array: &TILArray) -> TranslateTypeResult { - match self.translate_type(&array.elem_type) { - TranslateTypeResult::NotYet => TranslateTypeResult::NotYet, - TranslateTypeResult::Translated(ty) => TranslateTypeResult::Translated(Type::array( - &ty, - array.nelem.map(NonZeroU16::get).unwrap_or(0).into(), - )), - TranslateTypeResult::PartiallyTranslated(ty, error) => { - TranslateTypeResult::PartiallyTranslated( - Type::array(&ty, array.nelem.map(NonZeroU16::get).unwrap_or(0).into()), - error.map(Box::new).map(BnTypeError::Array), - ) - } - TranslateTypeResult::Error(error) => { - TranslateTypeResult::Error(BnTypeError::Array(Box::new(error))) - } - } - } - - fn condensate_bitfields_from_struct( - &self, - offset: usize, - members_slice: &[TILStructMember], - struct_builder: &mut StructureBuilder, - ) { - if members_slice.is_empty() { - unreachable!() - } - let mut members = members_slice - .iter() - .map(|ty| match &ty.member_type.type_variant { - TILTypeVariant::Bitfield(b) => b, - _ => unreachable!(), - }) - .enumerate(); - let (_, first_field) = members.next().unwrap(); - let mut current_field_bytes = first_field.nbytes; - let mut current_field_bits: u32 = first_field.width.into(); - let mut start_idx = 0; - - let mut create_field = |start_idx, i, bytes| { - let name = if start_idx == i - 1 { - let member: &TILStructMember = &members_slice[i - 1]; - member - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("bitfield_{}", offset + start_idx)) - } else { - format!("bitfield_{}_{}", offset + start_idx, offset + (i - 1)) - }; - let field = field_from_bytes(bytes); - struct_builder.append(&field, &name, MemberAccess::NoAccess, MemberScope::NoScope); - }; - - for (i, member) in members { - // starting a new field - let max_bits = u32::from(current_field_bytes.get()) * 8; - // this bitfield start a a new field, or can't contain other bitfields - // finish the previous and start a new - if current_field_bytes != member.nbytes - || max_bits < current_field_bits + u32::from(member.width) - { - create_field(start_idx, i, current_field_bytes.get().into()); - current_field_bytes = member.nbytes; - current_field_bits = 0; - start_idx = i; - } - - // just add the current bitfield into the field - current_field_bits += u32::from(member.width); - } - - if current_field_bits != 0 { - create_field( - start_idx, - members_slice.len(), - current_field_bytes.get().into(), - ); - } - } - - fn translate_struct(&self, ty_struct: &TILStruct) -> TranslateTypeResult { - if ty_struct.members.is_empty() { - // binary ninja crashes if you create an empty struct, because it divide by 0 - return TranslateTypeResult::Translated(Type::void()); - } - let mut is_partial = false; - let mut structure = StructureBuilder::new(); - if let Some(align) = ty_struct.alignment { - structure.alignment(align.get().into()); - } - structure.packed(ty_struct.is_unaligned && ty_struct.is_uknown_8); - - let mut errors = vec![]; - let mut first_bitfield_seq = None; - for (i, member) in ty_struct.members.iter().enumerate() { - match (&member.member_type.type_variant, first_bitfield_seq) { - // accumulate the bitfield to be condensated - (TILTypeVariant::Bitfield(_bit), None) => { - first_bitfield_seq = Some(i); - continue; - } - (TILTypeVariant::Bitfield(_bit), Some(_)) => continue, - - // condensate the bitfields into byte-wide fields - (_, Some(start_idx)) => { - first_bitfield_seq = None; - let members_bitrange = &ty_struct.members[start_idx..i]; - self.condensate_bitfields_from_struct( - start_idx, - members_bitrange, - &mut structure, - ); - } - - (_, None) => {} - } - - let mem = match self.translate_type(&member.member_type) { - TranslateTypeResult::Translated(ty) => ty, - TranslateTypeResult::PartiallyTranslated(partial_ty, error) => { - is_partial |= true; - if let Some(error) = error { - errors.push((i, error)); - } - partial_ty - } - TranslateTypeResult::NotYet => return TranslateTypeResult::NotYet, - TranslateTypeResult::Error(error) => { - errors.push((i, error)); - return TranslateTypeResult::Error(BnTypeError::Struct(errors)); - } - }; - //TODO handle member.alignment - let name = member - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("member_{i}")); - structure.append(&mem, &name, MemberAccess::NoAccess, MemberScope::NoScope); - } - if let Some(start_idx) = first_bitfield_seq { - let members_bitrange = &ty_struct.members[start_idx..]; - self.condensate_bitfields_from_struct(start_idx, members_bitrange, &mut structure); - } - let bn_ty = Type::structure(&structure.finalize()); - if is_partial { - let partial_error = (!errors.is_empty()).then_some(BnTypeError::Struct(errors)); - TranslateTypeResult::PartiallyTranslated(bn_ty, partial_error) - } else { - assert!(errors.is_empty()); - TranslateTypeResult::Translated(bn_ty) - } - } - - fn translate_union(&self, ty_union: &TILUnion) -> TranslateTypeResult { - let mut is_partial = false; - let mut structure = StructureBuilder::new(); - structure.structure_type(StructureType::UnionStructureType); - let mut errors = vec![]; - for (i, member) in ty_union.members.iter().enumerate() { - // bitfields can be translated into complete fields - let mem = match &member.ty.type_variant { - TILTypeVariant::Bitfield(field) => field_from_bytes(field.nbytes.get().into()), - _ => match self.translate_type(&member.ty) { - TranslateTypeResult::Translated(ty) => ty, - TranslateTypeResult::Error(error) => { - errors.push((i, error)); - return TranslateTypeResult::Error(BnTypeError::Union(errors)); - } - TranslateTypeResult::NotYet => return TranslateTypeResult::NotYet, - TranslateTypeResult::PartiallyTranslated(partial, error) => { - is_partial |= true; - if let Some(error) = error { - errors.push((i, error)); - } - partial - } - }, - }; - - let name = member - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("member_{i}")); - structure.append(&mem, &name, MemberAccess::NoAccess, MemberScope::NoScope); - } - let str_ref = structure.finalize(); - - let bn_ty = Type::structure(&str_ref); - if is_partial { - let partial_error = (!errors.is_empty()).then_some(BnTypeError::Struct(errors)); - TranslateTypeResult::PartiallyTranslated(bn_ty, partial_error) - } else { - assert!(errors.is_empty()); - TranslateTypeResult::Translated(bn_ty) - } - } - - fn translate_enum(&self, ty_enum: &TILEnum) -> Ref { - let mut eb = EnumerationBuilder::new(); - match &ty_enum.members { - EnumMembers::Regular(enum_members) => { - for (i, member) in enum_members.iter().enumerate() { - let name = member - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("member_{i}")); - eb.insert(&name, member.value); - } - } - EnumMembers::Groups(enum_groups) => { - for group in enum_groups { - // TODO implement groups to the importer - for (i, member) in group.sub_fields.iter().enumerate() { - let name = member - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("member_{i}")); - eb.insert(&name, member.value); - } - } - } - } - - Type::enumeration( - &eb.finalize(), - // TODO: This looks bad, look at the comment in [`Type::width`]. - // TODO check the default size of enum - ty_enum - .storage_size - .map(|x| x.into()) - .or(self.til.header.size_enum.map(|x| x.into())) - .unwrap_or(4.try_into().unwrap()), - ty_enum.is_signed, - ) - } - - fn translate_basic(&self, mdata: &idb_rs::til::Basic) -> Ref { - match *mdata { - idb_rs::til::Basic::Void => Type::void(), - idb_rs::til::Basic::Unknown { bytes: 0 } => Type::void(), - idb_rs::til::Basic::Unknown { bytes } => Type::array(&Type::char(), bytes.into()), - idb_rs::til::Basic::Bool if self.til.header.size_bool.get() == 1 => Type::bool(), - idb_rs::til::Basic::BoolSized { bytes } if bytes.get() == 1 => Type::bool(), - // NOTE Binja don't have any representation for bool other then the default - idb_rs::til::Basic::BoolSized { bytes } => Type::int(bytes.get().into(), false), - idb_rs::til::Basic::Bool /*if self.til.header.size_bool.get() != 1*/ => Type::int(self.til.header.size_bool.get().into(), false), - idb_rs::til::Basic::Char => Type::char(), - // TODO what exacly is Segment Register? - idb_rs::til::Basic::SegReg => Type::char(), - idb_rs::til::Basic::IntSized { bytes, is_signed } => { - // default into signed - let is_signed = is_signed.as_ref().copied().unwrap_or(true); - Type::int(bytes.get().into(), is_signed) - } - idb_rs::til::Basic::Int { is_signed } => { - let is_signed = is_signed.as_ref().copied().unwrap_or(true); - Type::int(self.til.header.size_int.get().into(), is_signed) - }, - idb_rs::til::Basic::Short { is_signed } => { - let is_signed = is_signed.as_ref().copied().unwrap_or(true); - Type::int(self.til.sizeof_short().get().into(), is_signed) - }, - idb_rs::til::Basic::Long { is_signed } => { - let is_signed = is_signed.as_ref().copied().unwrap_or(true); - Type::int(self.til.sizeof_long().get().into(), is_signed) - }, - idb_rs::til::Basic::LongLong { is_signed } => { - let is_signed = is_signed.as_ref().copied().unwrap_or(true); - Type::int(self.til.sizeof_long_long().get().into(), is_signed) - }, - idb_rs::til::Basic::LongDouble => { - // TODO is size_long_double architecture dependent? - Type::float(self.til.header.size_long_double.map(NonZeroU8::get).unwrap_or(8).into()) - }, - idb_rs::til::Basic::Float { bytes } => Type::float(bytes.get().into()), - } - } - - pub fn translate_type(&self, ty: &TILType) -> TranslateTypeResult { - match &ty.type_variant { - // types that are always translatable - TILTypeVariant::Basic(meta) => { - TranslateTypeResult::Translated(self.translate_basic(meta)) - } - TILTypeVariant::Bitfield(bit) => { - TranslateTypeResult::Translated(field_from_bytes(bit.nbytes.get().into())) - } - TILTypeVariant::Enum(ty_enum) => { - TranslateTypeResult::Translated(self.translate_enum(ty_enum)) - } - TILTypeVariant::Typeref(typeref) => match &typeref.typeref_value { - idb_rs::til::TyperefValue::Ref(idx) => self.find_typedef(&self.types[*idx]), - idb_rs::til::TyperefValue::UnsolvedName(name) => self - .find_typedef_by_name(name.as_ref().map(|x| x.as_bytes()).unwrap_or(&[])) - .unwrap_or_else(|| { - TranslateTypeResult::Error(BnTypeError::NameNotFound( - name.as_ref() - .map(|x| x.as_utf8_lossy().to_string()) - .unwrap_or(String::new()), - )) - }), - idb_rs::til::TyperefValue::UnsolvedOrd(ord) => { - TranslateTypeResult::Error(BnTypeError::OrdinalNotFound(*ord)) - } - }, - - TILTypeVariant::Pointer(ty) => self.translate_pointer(ty), - TILTypeVariant::Function(fun) => self.translate_function(fun), - - // can only be partially solved if all fields are solved or partially solved - TILTypeVariant::Array(array) => self.translate_array(array), - TILTypeVariant::Struct(ty_struct) => self.translate_struct(ty_struct), - TILTypeVariant::Union(ty_union) => self.translate_union(ty_union), - } - } -} - -pub fn translate_ephemeral_type(debug_file: &BinaryView, ty: &TILType) -> TranslateTypeResult { - // in case we need to translate types - let header = idb_rs::til::ephemeral_til_header(); - let translator = TranslateIDBTypes { - arch: debug_file.default_arch().unwrap(/* TODO */), - progress: |_, _| Ok(()), - // TODO it's unclear what to do here - til: &TILSection { - header, - symbols: vec![], - types: vec![], - macros: None, - }, - types: vec![], - }; - - translator.translate_type(ty) -} - -pub fn translate_til_types( - arch: CoreArchitecture, - til: &TILSection, - progress: impl Fn(usize, usize) -> Result<(), ()>, -) -> Result>> { - let total = til.symbols.len() + til.types.len(); - let mut types = Vec::with_capacity(total); - let mut types_by_ord = HashMap::with_capacity(total); - let mut types_by_name = HashMap::with_capacity(total); - let all_types = til.types.iter().zip(core::iter::repeat(false)); - // TODO: it's unclear how the demangle symbols and types names/ord, for now only parse types - //let all_types = all_types.chain(til.symbols.iter().zip(core::iter::repeat(true))); - for (i, (ty, is_symbol)) in all_types.enumerate() { - // TODO sanitized the input - // TODO find out how the namespaces used by TIL works - types.push(TranslatesIDBType { - name: ty.name.clone(), - is_symbol, - og_ty: ty, - ty: TranslateTypeResult::NotYet, - }); - if ty.ordinal != 0 && !is_symbol { - let dup1 = types_by_ord.insert(ty.ordinal, i); - if let Some(old) = dup1 { - let old_type = &types[old]; - let new_type = types.last().unwrap(); - // TODO error? - panic!( - "dup ord {}:{} {}:\n{:?}\n{:?}", - old_type.is_symbol, - new_type.is_symbol, - ty.ordinal, - &old_type.og_ty, - &new_type.og_ty, - ) - } - } - if !ty.name.as_bytes().is_empty() { - let dup2 = types_by_name.insert(ty.name.as_bytes().to_owned(), i); - if let Some(old) = dup2 { - let old_type = &types[old]; - let new_type = types.last().unwrap(); - // TODO error? - panic!( - "dup name {}:{}: {}:\n{:?}\n{:?}", - old_type.is_symbol, - new_type.is_symbol, - &ty.name.as_utf8_lossy(), - &old_type.og_ty, - &new_type.og_ty, - ) - } - } - } - - let mut translator = TranslateIDBTypes { - arch, - progress, - til, - types, - }; - if (translator.progress)(0, total).is_err() { - return Err(anyhow!("IDB import aborted")); - } - - // solve types until there is nothing else that can be solved - loop { - // if something was solved, mark this variable as true - let mut did_something = false; - let mut num_translated = 0usize; - for i in 0..translator.types.len() { - match &translator.types[i].ty { - TranslateTypeResult::NotYet => { - let result = translator.translate_type(&translator.types[i].og_ty.tinfo); - did_something |= !matches!(&result, TranslateTypeResult::NotYet); - translator.types[i].ty = result; - } - TranslateTypeResult::PartiallyTranslated(_, None) => { - let result = translator.translate_type(&translator.types[i].og_ty.tinfo); - // don't allow regress, it can goes from PartiallyTranslated to any state other then NotYet - assert!(!matches!(&result, TranslateTypeResult::NotYet)); - did_something |= - !matches!(&result, TranslateTypeResult::PartiallyTranslated(_, None)); - translator.types[i].ty = result; - // don't need to add again they will be fixed on the loop below - } - // if an error was produced, there is no point in try again - TranslateTypeResult::PartiallyTranslated(_, Some(_)) => {} - // NOTE for now we are just accumulating errors, just try to translate the max number - // of types as possible - TranslateTypeResult::Error(_) => {} - // already translated, nothing do to here - TranslateTypeResult::Translated(_) => {} - } - - // count the number of finished types - if let TranslateTypeResult::Translated(_) = &translator.types[i].ty { - num_translated += 1 - } - } - - if !did_something { - // means we acomplilshed nothing during this loop, there is no point in trying again - break; - } - if (translator.progress)(num_translated, total).is_err() { - // error means the user aborted the progress - break; - } - } - - Ok(translator.types) -} - -fn field_from_bytes(bytes: i32) -> Ref { - match bytes { - 0 => unreachable!(), - num @ (1 | 2 | 4 | 8 | 16) => Type::int(num.try_into().unwrap(), false), - nelem => Type::array(&Type::char(), nelem.try_into().unwrap()), - } -} - -fn convert_cc( - arch: &A, - in_cc: TILCallingConvention, -) -> Option> { - match in_cc { - TILCallingConvention::Cdecl => arch.get_cdecl_calling_convention(), - TILCallingConvention::Ellipsis => arch.get_cdecl_calling_convention(), - TILCallingConvention::Stdcall => arch.get_stdcall_calling_convention(), - TILCallingConvention::Fastcall => arch.get_fastcall_calling_convention(), - TILCallingConvention::Voidarg - | TILCallingConvention::Pascal - | TILCallingConvention::Thiscall - | TILCallingConvention::Swift - | TILCallingConvention::Golang - | TILCallingConvention::Reserved3 - | TILCallingConvention::Uservars - | TILCallingConvention::Userpurge - | TILCallingConvention::Usercall => None, - } -} From d69d3c791d8de61d85e4cd3bb22042e1224c87fa Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 01:38:30 -0500 Subject: [PATCH 03/16] [idb_import] Improve TIL type translation fidelity Resolve the outstanding translation TODOs in the TIL translator: - Size the variable-width C basic types (bool/short/int/long/long long/ long double) from the TIL header's compiler sizing info when a TIL is attached, falling back to the standard C ABI defaults. Both build_basic_ty and width_of_type now share these sizes so referenced-type placeholder widths stay in step with the types they stand in for. - Translate BoolSized to a real width: a 1-byte bool stays bool, any other width becomes an unsigned int of that size (BN bool is always one byte). - Honor pointer __ptr32 / __ptr64 modifiers to override the platform address size, and document that based/shifted pointers have no BN representation. - Detect variadic functions via the ellipsis calling convention instead of hardcoding has_variable_args to false. - Add udt extra_padding to the computed structure width so fixed-size UDTs occupy their true storage size. - Document the resolved design decisions for grouped (bitmask) enums, flexible array members, struct/union placeholder widths, the function return location, and the authoritative pointer address size. --- plugins/idb_import/src/translate.rs | 144 ++++++++++++++++++++++------ 1 file changed, 116 insertions(+), 28 deletions(-) diff --git a/plugins/idb_import/src/translate.rs b/plugins/idb_import/src/translate.rs index c3af6fa697..4eb4b81d64 100644 --- a/plugins/idb_import/src/translate.rs +++ b/plugins/idb_import/src/translate.rs @@ -12,6 +12,7 @@ use binaryninja::types::{ TypeContainer, }; use idb_rs::til::function::CallingConvention; +use idb_rs::til::pointer::PointerModifier; use idb_rs::til::r#enum::EnumMembers; use idb_rs::til::{Basic, TILTypeInfo, TypeVariant, TyperefType, TyperefValue}; use std::collections::{HashMap, HashSet}; @@ -64,6 +65,18 @@ pub struct TILTranslator { pub address_size: usize, /// Default size of enumerations. pub enum_size: usize, + /// Default size of a `bool` in bytes. + pub bool_size: usize, + /// Default size of a `short` in bytes. + pub short_size: usize, + /// Default size of an `int` in bytes. + pub int_size: usize, + /// Default size of a `long` in bytes. + pub long_size: usize, + /// Default size of a `long long` in bytes. + pub long_long_size: usize, + /// Default size of a `long double` in bytes. + pub long_double_size: usize, /// Reference types, for use with typedefs. /// /// This is necessary because ordinals do not have names and can't be made into a [`NamedTypeReference`]. @@ -88,6 +101,12 @@ impl TILTranslator { Self { address_size, enum_size: address_size / 2, + bool_size: 1, + short_size: 2, + int_size: 4, + long_size: 4, + long_long_size: 8, + long_double_size: 8, reference_types_by_ord: HashMap::new(), reference_types_by_name: HashMap::new(), used_types: Rc::new(Mutex::new(HashSet::new())), @@ -102,6 +121,12 @@ impl TILTranslator { Self { address_size: platform.address_size(), enum_size: platform.arch().default_integer_size(), + bool_size: 1, + short_size: 2, + int_size: 4, + long_size: 4, + long_long_size: 8, + long_double_size: 8, reference_types_by_ord: HashMap::new(), reference_types_by_name: HashMap::new(), used_types: Rc::new(Mutex::new(HashSet::new())), @@ -116,6 +141,12 @@ impl TILTranslator { Self { address_size: arch.address_size(), enum_size: arch.default_integer_size(), + bool_size: 1, + short_size: 2, + int_size: 4, + long_size: 4, + long_long_size: 8, + long_double_size: 8, reference_types_by_ord: HashMap::new(), reference_types_by_name: HashMap::new(), used_types: Rc::new(Mutex::new(HashSet::new())), @@ -131,13 +162,28 @@ impl TILTranslator { self.enum_size = size_enum.get() as usize; } + // The TIL header carries the C basic-type sizing for the compiler the IDB was built + // for; prefer it over our architecture-derived defaults so types translate with the + // same widths IDA used. + self.bool_size = til.header.size_bool.get() as usize; + self.int_size = til.header.size_int.get() as usize; + self.short_size = til.sizeof_short().get() as usize; + self.long_size = til.sizeof_long().get() as usize; + self.long_long_size = til.sizeof_long_long().get() as usize; + if let Some(size_long_double) = til.header.size_long_double { + self.long_double_size = size_long_double.get() as usize; + } + // Add referencable types so that type def lookups can occur. self.reference_types_by_ord.reserve(til.types.len()); for (_idx, ty) in til.types.iter().enumerate() { self.add_referenced_type_info(ty); } - // TODO: Handle address (pointer) size information? + // NOTE: The TIL exposes `addr_size()`, but it is derived from the compiler model and + // defaults to 4 when that is absent, which would silently truncate pointers on a + // 64-bit binary. The platform/architecture `address_size` we were constructed with is + // authoritative for pointer widths, so we intentionally keep it. self } @@ -192,7 +238,8 @@ impl TILTranslator { pub fn build_basic_ty(&self, basic_ty: &idb_rs::til::Basic) -> anyhow::Result { use idb_rs::til::Basic; - // TODO: Grab the sizing information of these types from the TIL instead of hardcoding. + // The variable-width C types (short/int/long/...) are sized from the TIL header when + // one is available (see `with_til_info`), falling back to the standard C ABI defaults. match basic_ty { Basic::Void => Ok(TypeBuilder::void()), Basic::Unknown { bytes } => { @@ -200,17 +247,26 @@ impl TILTranslator { // so we are going to be liberal and allow unknown basic types to be treated as a sized int. Ok(TypeBuilder::int(*bytes as usize, false)) } - Basic::Bool => Ok(TypeBuilder::bool()), - Basic::BoolSized { .. } => { - // TODO: This needs to be resized, if that cannot be done, make a NTR to an int named BOOL? - Ok(TypeBuilder::bool()) - } + Basic::Bool if self.bool_size == 1 => Ok(TypeBuilder::bool()), + // Binary Ninja's `bool` is always a single byte; a wider `bool` is represented as an + // unsigned integer of the requested size. + Basic::Bool => Ok(TypeBuilder::int(self.bool_size, false)), + Basic::BoolSized { bytes } if bytes.get() == 1 => Ok(TypeBuilder::bool()), + Basic::BoolSized { bytes } => Ok(TypeBuilder::int(bytes.get() as usize, false)), Basic::Char => Ok(TypeBuilder::char()), Basic::SegReg => Err(anyhow::anyhow!("SegReg is not supported")), - Basic::Short { is_signed } => Ok(TypeBuilder::int(2, is_signed.unwrap_or(true))), - Basic::Long { is_signed } => Ok(TypeBuilder::int(4, is_signed.unwrap_or(true))), - Basic::LongLong { is_signed } => Ok(TypeBuilder::int(8, is_signed.unwrap_or(true))), - Basic::Int { is_signed } => Ok(TypeBuilder::int(4, is_signed.unwrap_or(true))), + Basic::Short { is_signed } => { + Ok(TypeBuilder::int(self.short_size, is_signed.unwrap_or(true))) + } + Basic::Long { is_signed } => { + Ok(TypeBuilder::int(self.long_size, is_signed.unwrap_or(true))) + } + Basic::LongLong { is_signed } => { + Ok(TypeBuilder::int(self.long_long_size, is_signed.unwrap_or(true))) + } + Basic::Int { is_signed } => { + Ok(TypeBuilder::int(self.int_size, is_signed.unwrap_or(true))) + } Basic::IntSized { bytes, is_signed } => { let bytes: u8 = u8::try_from(*bytes).unwrap_or(4); Ok(TypeBuilder::int(bytes as usize, is_signed.unwrap_or(true))) @@ -219,7 +275,7 @@ impl TILTranslator { let bytes: u8 = u8::try_from(*bytes).unwrap_or(4); Ok(TypeBuilder::float(bytes as usize)) } - Basic::LongDouble => Ok(TypeBuilder::float(8)), + Basic::LongDouble => Ok(TypeBuilder::float(self.long_double_size)), } } @@ -227,11 +283,21 @@ impl TILTranslator { &self, pointer_ty: &idb_rs::til::pointer::Pointer, ) -> anyhow::Result { - // TODO: Consult pointer_ty.closure (is this how we can get based pointers?) + // A `__ptr32` / `__ptr64` modifier overrides the platform address size for this pointer + // (e.g. a 32-bit pointer embedded in an otherwise 64-bit binary). + // + // NOTE: `closure` (based pointers) and `shifted` pointers have no direct Binary Ninja + // representation, so the pointee type and width are preserved but those attributes are + // intentionally dropped. + let pointer_width = match pointer_ty.modifier { + Some(PointerModifier::Ptr32) => 4, + Some(PointerModifier::Ptr64) => 8, + Some(PointerModifier::Restricted) | None => self.address_size, + }; let inner_ty = self.translate_type_info(&pointer_ty.typ)?; Ok(TypeBuilder::pointer_of_width( &inner_ty, - self.address_size, + pointer_width, // NOTE: Set later in `translate_type_info`. false, // NOTE: Set later in `translate_type_info`. @@ -244,10 +310,16 @@ impl TILTranslator { &self, function_ty: &idb_rs::til::function::Function, ) -> anyhow::Result { - // TODO: Once branch `test_call_layout` lands use function_ty.retloc to recover return location. + // NOTE: `function_ty.retloc` carries the explicit return-value location, but mapping an + // IDA `ArgLoc` onto a Binary Ninja variable storage location is not yet implemented, so + // the return location is left to be derived by analysis. let return_ty = self.translate_type_info(&function_ty.ret)?; let params: Vec = self.build_function_params(&function_ty.args)?; - let has_variable_args = false; + // An ellipsis calling convention is IDA's marker for a variadic function. + let has_variable_args = matches!( + function_ty.calling_convention, + Some(CallingConvention::Ellipsis) + ); let stack_adjust = Conf::new(0, 0); let builder = match function_ty.calling_convention { @@ -403,8 +475,10 @@ impl TILTranslator { builder.insert_member(member, false); } + // `extra_padding` records trailing padding bytes IDA stores for fixed-size UDTs; add it + // to the member-derived width so the structure occupies its true storage size. + let width = width + udt_ty.extra_padding.unwrap_or(0); builder.width(width); - // TODO: Handle udt_ty.extra_padding (is that tail padding?) Ok(TypeBuilder::structure(&builder.finalize())) } @@ -478,7 +552,10 @@ impl TILTranslator { } EnumMembers::Groups(groups) => { for (idx, group) in groups.iter().enumerate() { - // TODO: How does this grouping actually impact the enum besides the name? + // IDA's grouped (bitmask) enums partition the members into named bitmask + // groups. Binary Ninja enumerations are flat and have no equivalent grouping + // concept, so we flatten the groups, qualifying each member with its group + // name to keep the names unique and preserve the original grouping intent. let group_name = group .field .name @@ -526,26 +603,31 @@ impl TILTranslator { /// Computes the width of a type, in bytes. pub fn width_of_type(&self, ty: &idb_rs::til::Type) -> anyhow::Result { match &ty.type_variant { + // Keep these widths in lockstep with `build_basic_ty` so that NTR placeholder widths + // match the types they stand in for. TypeVariant::Basic(basic) => match basic { Basic::Void => Ok(0), Basic::Unknown { bytes } => Ok(*bytes as usize), - Basic::Bool => Ok(1), + Basic::Bool => Ok(self.bool_size), Basic::BoolSized { bytes } => Ok(bytes.get() as usize), Basic::Char => Ok(1), Basic::SegReg => Ok(8), - Basic::Short { .. } => Ok(2), - Basic::Long { .. } => Ok(4), - Basic::LongLong { .. } => Ok(8), - Basic::Int { .. } => Ok(4), + Basic::Short { .. } => Ok(self.short_size), + Basic::Long { .. } => Ok(self.long_size), + Basic::LongLong { .. } => Ok(self.long_long_size), + Basic::Int { .. } => Ok(self.int_size), Basic::IntSized { bytes, .. } => Ok(bytes.get() as usize), Basic::Float { bytes } => Ok(bytes.get() as usize), - Basic::LongDouble => Ok(8), + Basic::LongDouble => Ok(self.long_double_size), }, TypeVariant::Pointer(_) => Ok(self.address_size), TypeVariant::Function(_) => Err(anyhow::anyhow!("Function types do not have a width")), TypeVariant::Array(arr) => { let elem_width = self.width_of_type(&arr.elem_type)?; - // TODO: A DST array is unsized or what? I think we should error IMO. + // A flexible array member (no element count) contributes no storage of its own; + // it only names the tail of the containing structure. Treat it as zero-width to + // mirror `build_array_ty`, rather than erroring, so structures ending in one can + // still be sized. let count = arr.nelem.map(|n| n.get()).unwrap_or(0); Ok(elem_width * count as usize) } @@ -565,17 +647,23 @@ impl TILTranslator { for member in &s.members { total_width += self.width_of_type(&member.member_type)?; } - // TODO: Handle alignment and bitfields. + // NOTE: This is the tightly-packed lower bound on the size. It does not + // reintroduce the inter-member alignment padding (or bitfield storage sharing) + // that `build_udt_members` accounts for, because that requires per-type alignment + // which an unresolved type reference does not expose here. It is only used to give + // a referenced struct a placeholder width, so an underestimate is acceptable; the + // authoritative layout comes from `build_udt_ty` once the type is fully resolved. Ok(total_width) } TypeVariant::Union(u) => { - // Size of the largest member + alignment + // Size of the largest member. As with the struct case above, the union's own + // alignment padding is not applied here; this is the placeholder lower bound used + // for type-reference widths, not the final laid-out size. let mut max_width = 0; for member in &u.members { let member_width = self.width_of_type(&member.member_type)?; max_width = max_width.max(member_width); } - // TODO: Handle alignment Ok(max_width) } TypeVariant::Enum(e) => Ok(e From e9dbe80b856b41da7ca37eef013662f6c12f5c1c Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 01:40:20 -0500 Subject: [PATCH 04/16] [idb_import] Resolve parse.rs TODOs - merged_types: carry an ordinal across the dedup when the kept entry lacks one, keeping name/ordinal lookups resolvable, and document that dir_tree types are clones of the same TIL definitions so no body merge is needed. - TIL decompression: read_til already inflates Zlib/Zstd sections via its section header, so document that and drop the stale "decompress til" TODO. - Function registers/stack variables: replace the dead exploratory block with a note scoping it as a follow-up feature (needs FunctionInfo and mapper support to apply named stack variables and register names). --- plugins/idb_import/src/parse.rs | 37 ++++++++++++++------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index 95a2fdd0eb..0e68ff5f3d 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -188,11 +188,17 @@ impl IDBInfo { types.extend(til.types.clone()); } types.sort_by_key(|t| t.name.to_string()); + // `a` is the later (dir_tree-then-til) duplicate being removed and `b` is the one we + // keep. In practice the dir_tree types are clones pulled from the same TIL (see + // `parse_dir_tree`), so the definitions are identical; we still carry over an ordinal if + // the kept entry happens to be missing one, so name/ordinal lookups stay resolvable. types.dedup_by(|a, b| { if a.name.to_string() != b.name.to_string() { return false; } - // TODO: Merge types instead of just picking b and not transferring. + if b.ordinal == 0 && a.ordinal != 0 { + b.ordinal = a.ordinal; + } true }); types @@ -261,7 +267,8 @@ impl IDBFileParser { id2 = Some(format.read_id2(&mut *data, id2_loc)?); } - // TODO: Decompress til + // NOTE: `read_til` reads the section header and transparently inflates Zlib/Zstd + // compressed TIL sections, so no explicit decompression step is needed here. let mut til = None; if let Some(til_loc) = format.til_location() { til = Some(format.read_til(&mut *data, til_loc)?); @@ -332,25 +339,13 @@ impl IDBFileParser { continue; } - // TODO: Parse function registers and params - // for def_reg in id0.function_defined_registers(netdelta, &func, &func_ext) { - // tracing::info!("{:0x} : Function register: {:?}", func_start, def_reg); - // let Ok(_def_reg) = def_reg else { - // tracing::warn!("Failed to read function register entry"); - // continue; - // }; - // } - - // if let Ok(stack_names) = - // id0.function_defined_variables(&root_info, &func, &func_ext) - // { - // tracing::info!( - // "{:0x} : Function stack variables: {:#?}", - // func_start, - // stack_names - // ); - // } - + // NOTE: `id0.function_defined_registers(netdelta, &func, &func_ext)` and + // `id0.function_defined_variables(&root_info, &func, &func_ext)` expose the + // function's register definitions and named stack variables. Surfacing + // those requires extending `FunctionInfo` to carry them and teaching the + // mapper how to apply named stack variables / register names to a BN + // function, which depends on the analysed stack frame. That is tracked as a + // follow-up feature rather than parsed here. functions.push(FunctionInfo { name: None, ty: None, From ed2d9323e1052bf6c56dd9ef3eeb2cfd120335b5 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 01:44:01 -0500 Subject: [PATCH 05/16] [idb_import] Resolve mapper/commands TODOs and surface IDB SHA256 - Populate IDBInfo.sha256 from the input file SHA256 recorded in the IDB so it is no longer always None, and drop the stale placeholder comment. - Mapper logs the recorded SHA256 and documents a future IDB verifier that would compare it against the mapped view before applying data. - Define the fallback `size_t` only when the view lacks one, so a real platform/view definition is never clobbered. - Document that the undo bracketing requires the mapper to be the sole writer, an invariant the run-once loader activity already guarantees. - Document the name-based (not range-based) section dedup rationale: the BN loader already maps the address space, so a range check would suppress every IDA segment. - Replace the remaining design-question TODOs (used-type ordering, attached TIL lookup, per-function platform tuple, OpenFileName filter naming) with decisions/notes explaining the current behavior and future direction. --- plugins/idb_import/src/commands.rs | 3 +- plugins/idb_import/src/mapper.rs | 50 ++++++++++++++++++++---------- plugins/idb_import/src/parse.rs | 12 +++++-- 3 files changed, 45 insertions(+), 20 deletions(-) diff --git a/plugins/idb_import/src/commands.rs b/plugins/idb_import/src/commands.rs index bc17aa9b4a..f617746386 100644 --- a/plugins/idb_import/src/commands.rs +++ b/plugins/idb_import/src/commands.rs @@ -27,7 +27,8 @@ impl LoadFileField { pub fn field(&self) -> FormInputField { FormInputField::OpenFileName { prompt: "File Path".to_string(), - // TODO: This is called extension but is really a filter. + // NOTE: Binary Ninja's `OpenFileName` field names this `extension`, but it accepts a + // full file filter expression (e.g. "*.idb;*.i64"), which is what we pass here. extension: Some(self.filter.clone()), default: self.default.clone(), value: None, diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 593fdb75cf..0c7428f81f 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -34,9 +34,13 @@ impl IDBMapper { return; }; - // TODO: Actually the below comment belongs in an IDBVerifier that tries to determine if the idb - // TODO: Will process correctly for the given view. - // TODO: Have a shasum check of the file to make sure we are not mapping to bad data? + // NOTE: `self.info.sha256` carries the SHA256 of the original input file as recorded in + // the IDB. A dedicated IDB verifier that compares this against the file backing `view` + // and refuses to map on mismatch (to avoid applying stale data to the wrong binary) is + // left as future work; for now we log it for traceability and proceed with mapping. + if let Some(sha256) = &self.info.sha256 { + tracing::debug!("IDB recorded input file SHA256: {}", sha256); + } // Rebase the address from ida -> binja without this rebased views will fail to map. let bn_base_address = view.start(); @@ -78,12 +82,16 @@ impl IDBMapper { // Create the type translator, adding all referencable types from both the TIL and the binary view. let platform = view.default_platform().unwrap(); - // TODO: Need to remove this, but for now keeping this since it gets referenced a lot. - view.define_auto_type( - "size_t", - "IDA", - &Type::int(platform.arch().default_integer_size(), false), - ); + // Provide a fallback `size_t` only when the view does not already define one. Many IDA + // types reference `size_t`, so we keep this safety net, but defining it conditionally + // avoids clobbering a real platform/view definition with our generic integer. + if view.type_by_name("size_t").is_none() { + view.define_auto_type( + "size_t", + "IDA", + &Type::int(platform.arch().default_integer_size(), false), + ); + } let til_translator = TILTranslator::new_from_platform(&platform).with_type_container(&view.type_container()); let til_translator = match &self.info.til { @@ -105,8 +113,10 @@ impl IDBMapper { self.map_export_to_view(view, &til_translator, &rebased_export); } - // TODO: The below undo and ignore is not thread safe, this means that the mapper itself - // TODO: should be the only thing running at the time of the mapping process. + // NOTE: The undo bracketing below is not thread safe, so the mapper must be the only + // writer mutating the view while it runs. This invariant holds because mapping is + // registered as a single run-once analysis activity (see `plugin_init`) rather than being + // invoked concurrently. let undo = view.file().begin_undo_actions(true); for comment in &self.info.merged_comments() { let mut rebased_comment = comment.clone(); @@ -175,8 +185,10 @@ impl IDBMapper { if let Ok(_used_types) = til_translator.used_types.lock() { used_types = _used_types.clone(); } - // TODO: Adding types to view after the types have been applied to the functions is not a - // TODO: great idea, I imagine the NTR's will have stale references until the analysis runs again. + // NOTE: Types are resolved here after they have already been applied to functions, so any + // named type references that get defined in this pass will read as stale until analysis + // re-runs and re-resolves them. Acceptable for this best-effort backfill, but a reason to + // resolve referenced types before applying function types if this path is wired in. 'found: for used_ty in &used_types { // 0. Make sure the type doesn't already exist in the view if view.type_by_name(&used_ty.name).is_some() { @@ -210,8 +222,9 @@ impl IDBMapper { } } + // NOTE: A further lookup source would be the TILs attached to the IDB (e.g. imported + // library TILs) before giving up; not yet wired in. tracing::warn!("Failed to find type: {:?}", used_ty); - // 4. TODO: Look through the idb attached tils? } } @@ -243,7 +256,10 @@ impl IDBMapper { SegmentType::Imem => Semantics::DefaultSection, }; - // TODO: Is this section already mapped using address range not name. + // NOTE: We dedup on section name rather than address range on purpose. The BN loader has + // usually already mapped the whole address space, so an IDA segment would always overlap + // an existing section and a range-based check would suppress every segment. Name-based + // dedup only skips re-adding a section we (or the loader) already named identically. if view.section_by_name(&segment.name).is_some() { tracing::debug!( "Section with name '{}' already exists, skipping...", @@ -344,7 +360,9 @@ impl IDBMapper { } } - // TODO: Attach a platform tuple to the FunctionInfo? + // NOTE: We let the function inherit the view's default platform. Carrying an explicit + // platform on FunctionInfo would let mixed-architecture databases (e.g. ARM/Thumb + // interworking) map each function with its own platform; that is a future enhancement. if let Some(func_sym) = symbol_from_func(func) { tracing::debug!( "Mapping function symbol: {:0x} => {}", diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index 0e68ff5f3d..20d17ef6da 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -279,10 +279,18 @@ impl IDBFileParser { _ => None, }; + // The IDB records the SHA256 of the original input file; surface it so consumers (and a + // future verifier) can confirm the IDB matches the binary being mapped. + let sha256 = id0.as_ref().and_then(|id0| { + let root_idx = id0.root_node().ok()?; + let hash = id0.input_file_sha256(root_idx).ok()??; + Some(hash.iter().map(|b| format!("{:02x}", b)).collect::()) + }); + let id0_info = id0.as_ref().map(|id0| self.parse_id0(id0)).transpose()?; Ok(IDBInfo { - sha256: None, + sha256, id0: id0_info, til, dir_tree: dir_tree_info, @@ -447,8 +455,6 @@ impl IDBFileParser { let root_info = id0.ida_info(root_info_idx)?; let netdelta = root_info.netdelta(); - // sha256 - let func_info_from_addr = |addr_info: &AddressInfo| -> anyhow::Result> { let func_name = addr_info.label()?.map(|s| s.to_string()); From 5782c439c332976ff60b1d8bb0b5bd5ff1fb830c Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 01:51:10 -0500 Subject: [PATCH 06/16] [idb_import] Verify IDB matches the loaded binary by SHA256 The IDB records the SHA256 of its original input file. Walk to the root of the view's parent chain (the raw view, whose bytes are that on-disk file), hash it in 1 MiB chunks, and compare against the recorded hash. On mismatch we warn that the imported data may not correspond to the binary; on match we log the verification at debug level. --- Cargo.lock | 66 ++++++++++++++++++++++++++++++++ plugins/idb_import/Cargo.toml | 3 +- plugins/idb_import/src/mapper.rs | 62 +++++++++++++++++++++++++++--- 3 files changed, 124 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index efe2bfe067..47af8f9e20 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -271,6 +271,15 @@ dependencies = [ "serde", ] +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "block2" version = "0.6.2" @@ -581,6 +590,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.4.2" @@ -654,6 +672,16 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "ctrlc" version = "3.5.1" @@ -759,6 +787,16 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2ea6706d74fca54e15f1d40b5cf7fe7f764aaec61352a9fcec58fe27e042fc8" +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "directories" version = "6.0.0" @@ -1093,6 +1131,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "gethostname" version = "0.4.3" @@ -1291,6 +1339,7 @@ dependencies = [ "idb-rs", "serde", "serde_json", + "sha2", "tracing", ] @@ -2585,6 +2634,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d" +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -2972,6 +3032,12 @@ version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" +[[package]] +name = "typenum" +version = "1.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" + [[package]] name = "unicode-ident" version = "1.0.18" diff --git a/plugins/idb_import/Cargo.toml b/plugins/idb_import/Cargo.toml index ce94b6f67e..191f123e53 100644 --- a/plugins/idb_import/Cargo.toml +++ b/plugins/idb_import/Cargo.toml @@ -16,4 +16,5 @@ binaryninjacore-sys.workspace = true idb-rs = { git = "https://github.com/Vector35/idb-rs", tag = "0.1.13" } tracing = "0.1" serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" \ No newline at end of file +serde_json = "1.0" +sha2 = "0.10" \ No newline at end of file diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 0c7428f81f..b6be5d6997 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -14,6 +14,7 @@ use binaryninja::symbol::{Binding, Symbol, SymbolType}; use binaryninja::types::Type; use idb_rs::id0::SegmentType; use idb_rs::til::TypeVariant; +use sha2::{Digest, Sha256}; use std::collections::HashSet; /// Maps IDB data into a [`BinaryView`]. @@ -34,12 +35,26 @@ impl IDBMapper { return; }; - // NOTE: `self.info.sha256` carries the SHA256 of the original input file as recorded in - // the IDB. A dedicated IDB verifier that compares this against the file backing `view` - // and refuses to map on mismatch (to avoid applying stale data to the wrong binary) is - // left as future work; for now we log it for traceability and proceed with mapping. - if let Some(sha256) = &self.info.sha256 { - tracing::debug!("IDB recorded input file SHA256: {}", sha256); + // Verify the IDB was built from the binary we are about to map into. The IDB records the + // SHA256 of its original input file; the root of the view's parent chain is the raw view, + // whose bytes are that same on-disk file, so we can hash it and compare. + if let Some(expected_sha256) = &self.info.sha256 { + match sha256_of_raw_view(view) { + Some(actual_sha256) if actual_sha256 == *expected_sha256 => { + tracing::debug!("Verified IDB matches loaded binary (SHA256 {expected_sha256})"); + } + Some(actual_sha256) => { + tracing::warn!( + "IDB input file SHA256 ({expected_sha256}) does not match the loaded \ + binary ({actual_sha256}); imported data may not correspond to this binary." + ); + } + None => { + tracing::debug!( + "Could not hash the loaded binary to verify against IDB SHA256 {expected_sha256}" + ); + } + } } // Rebase the address from ida -> binja without this rebased views will fail to map. @@ -459,6 +474,41 @@ impl IDBMapper { } } +/// Compute the SHA256 of the raw (on-disk) bytes backing `view`. +/// +/// Walks to the root of the parent-view chain, which is the raw view whose contents are the +/// original input file, then hashes its full contents. Returns `None` if the view is empty. +fn sha256_of_raw_view(view: &BinaryView) -> Option { + let mut raw_view = view.to_owned(); + while let Some(parent) = raw_view.parent_view() { + raw_view = parent; + } + + let length = raw_view.len(); + if length == 0 { + return None; + } + + // Hash in chunks so we never hold the whole binary in memory at once. + const CHUNK_SIZE: usize = 1 << 20; + let mut hasher = Sha256::new(); + let mut offset = raw_view.start(); + let end = offset + length; + while offset < end { + let want = CHUNK_SIZE.min((end - offset) as usize); + let chunk = raw_view.read_vec(offset, want); + if chunk.is_empty() { + // The view reported a length we cannot fully read; treat as unverifiable. + return None; + } + hasher.update(&chunk); + offset += chunk.len() as u64; + } + + let digest = hasher.finalize(); + Some(digest.iter().map(|b| format!("{:02x}", b)).collect()) +} + fn symbol_from_func(func: &FunctionInfo) -> Option> { let Some(func_name) = &func.name else { return None; From cd7160362772139a689ea15db15e606e22655f90 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 01:58:37 -0500 Subject: [PATCH 07/16] [idb_import] Recover argument locations and register variables - Argument locations: translate IDA stack-passed argument locations (ArgLoc::Stack) into Binary Ninja parameter stack locations so explicit stack parameter placement is preserved. Register-encoded locations carry raw IDA register indices with no portable BN mapping and are left for analysis to derive. - Register variables: parse IDA "regvars" (a register renamed by the user within a function) into FunctionInfo, carrying them through the function merge, and apply them in the mapper by resolving the register by name and creating a user variable typed to the register width. --- plugins/idb_import/src/mapper.rs | 42 +++++++++++++++++- plugins/idb_import/src/parse.rs | 52 ++++++++++++++++++---- plugins/idb_import/src/translate.rs | 69 ++++++++++++++++------------- 3 files changed, 124 insertions(+), 39 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index b6be5d6997..dee9b9b562 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -5,13 +5,14 @@ use crate::parse::{ SegmentInfo, }; use crate::translate::TILTranslator; -use binaryninja::architecture::Architecture; +use binaryninja::architecture::{Architecture, ArchitectureExt, Register, RegisterInfo}; use binaryninja::binary_view::{BinaryView, BinaryViewBase}; use binaryninja::qualified_name::QualifiedName; use binaryninja::rc::Ref; use binaryninja::section::{SectionBuilder, Semantics}; use binaryninja::symbol::{Binding, Symbol, SymbolType}; use binaryninja::types::Type; +use binaryninja::variable::Variable; use idb_rs::id0::SegmentType; use idb_rs::til::TypeVariant; use sha2::{Digest, Sha256}; @@ -321,6 +322,7 @@ impl IDBMapper { address: export.address, is_library: false, is_no_return: false, + register_vars: Vec::new(), }; self.map_func_to_view(view, til_translator, &func_info); } else { @@ -386,6 +388,44 @@ impl IDBMapper { ); view.define_auto_symbol(&func_sym); } + + self.map_register_vars_to_func(&bn_func, func); + } + + /// Apply IDA register variables ("regvars") to a function. + /// + /// IDA records that a register holds a user-named value over an address range. Binary Ninja + /// variables are function-scoped, so the range is not modeled; we name the architecture + /// register over the whole function, typed as an unsigned int of the register's width. + fn map_register_vars_to_func( + &self, + bn_func: &binaryninja::function::Function, + func: &FunctionInfo, + ) { + if func.register_vars.is_empty() { + return; + } + let arch = bn_func.arch(); + for reg_var in &func.register_vars { + let Some(register) = arch.register_by_name(®_var.register) else { + tracing::warn!( + "Skipping register variable '{}': unknown register '{}' at {:0x}", + reg_var.name, + reg_var.register, + func.address + ); + continue; + }; + let reg_width = register.info().size().max(1); + let var = Variable::from_register(register); + tracing::debug!( + "Mapping register variable {} => {} at {:0x}", + reg_var.register, + reg_var.name, + func.address + ); + bn_func.create_user_var(&var, &Type::int(reg_width, false), ®_var.name, false); + } } pub fn map_label_to_view(&self, view: &BinaryView, label: &LabelInfo) { diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index 20d17ef6da..157ec5f230 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -27,6 +27,19 @@ pub struct FunctionInfo { pub address: u64, pub is_library: bool, pub is_no_return: bool, + pub register_vars: Vec, +} + +/// A register renamed by the user within a function (IDA "regvar"), e.g. `eax` -> `count`. +#[derive(Debug, Clone, Serialize)] +pub struct RegisterVarInfo { + /// Architecture register the variable lives in (e.g. `eax`). + pub register: String, + /// User-assigned name for the register over its range. + pub name: String, + pub start: u64, + pub end: u64, + pub comment: String, } #[derive(Debug, Clone, Serialize)] @@ -134,6 +147,9 @@ impl IDBInfo { if a.ty.is_some() { b.ty = a.ty.clone(); } + if !a.register_vars.is_empty() { + b.register_vars = a.register_vars.clone(); + } true }); id0_functions @@ -341,25 +357,44 @@ impl IDBFileParser { IDBFunctionType::Tail(_) => { tracing::debug!("Skipping tail function... {:0x}", func_start); } - IDBFunctionType::NonTail(_func_ext) => { + IDBFunctionType::NonTail(func_ext) => { if func.flags.is_outline() { tracing::debug!("Skipping outlined function... {:0x}", func_start); continue; } - // NOTE: `id0.function_defined_registers(netdelta, &func, &func_ext)` and - // `id0.function_defined_variables(&root_info, &func, &func_ext)` expose the - // function's register definitions and named stack variables. Surfacing - // those requires extending `FunctionInfo` to carry them and teaching the - // mapper how to apply named stack variables / register names to a BN - // function, which depends on the analysed stack frame. That is tracked as a - // follow-up feature rather than parsed here. + // Collect register variables (IDA "regvars"): registers the user renamed + // over a range within the function. The register is given by name, so it + // maps cleanly onto a Binary Ninja register in the mapper. + let mut register_vars = Vec::new(); + for reg in id0.function_defined_registers(netdelta, &func, func_ext) { + let reg = match reg { + Ok(reg) => reg, + Err(err) => { + tracing::warn!( + "Failed to read register variable for {:0x}: {}", + func_start, + err + ); + continue; + } + }; + register_vars.push(RegisterVarInfo { + register: reg.register_name.to_string(), + name: reg.variable_name.to_string(), + start: reg.range.start.into_raw().into_u64(), + end: reg.range.end.into_raw().into_u64(), + comment: reg.cmt.to_string(), + }); + } + functions.push(FunctionInfo { name: None, ty: None, address: func_start, is_library: func.flags.is_lib(), is_no_return: func.flags.is_no_return(), + register_vars, }); } } @@ -466,6 +501,7 @@ impl IDBFileParser { address: func_addr, is_library: false, is_no_return: false, + register_vars: Vec::new(), })) }; diff --git a/plugins/idb_import/src/translate.rs b/plugins/idb_import/src/translate.rs index 4eb4b81d64..ee57bc084c 100644 --- a/plugins/idb_import/src/translate.rs +++ b/plugins/idb_import/src/translate.rs @@ -9,9 +9,10 @@ use binaryninja::rc::Ref; use binaryninja::types::{ EnumerationBuilder, FunctionParameter, MemberAccess, MemberScope, NamedTypeReference, NamedTypeReferenceClass, StructureBuilder, StructureMember, StructureType, TypeBuilder, - TypeContainer, + TypeContainer, ValueLocation, ValueLocationComponent, ValueLocationSource, }; -use idb_rs::til::function::CallingConvention; +use binaryninja::variable::Variable; +use idb_rs::til::function::{ArgLoc, CallingConvention}; use idb_rs::til::pointer::PointerModifier; use idb_rs::til::r#enum::EnumMembers; use idb_rs::til::{Basic, TILTypeInfo, TypeVariant, TyperefType, TyperefValue}; @@ -310,9 +311,12 @@ impl TILTranslator { &self, function_ty: &idb_rs::til::function::Function, ) -> anyhow::Result { - // NOTE: `function_ty.retloc` carries the explicit return-value location, but mapping an - // IDA `ArgLoc` onto a Binary Ninja variable storage location is not yet implemented, so - // the return location is left to be derived by analysis. + // NOTE: `function_ty.retloc` (and the per-argument `arg.loc`) carry explicit storage + // locations as IDA `ArgLoc`s. Honoring them requires translating IDA's register-relative + // encodings (`Reg1`/`Reg2`/`RRel` hold raw IDA register indices) into BN register ids, + // which is processor-specific and has no general mapping available here. Until that + // per-architecture register map exists we let the calling convention derive locations, + // which is correct for the standard conventions we already map. let return_ty = self.translate_type_info(&function_ty.ret)?; let params: Vec = self.build_function_params(&function_ty.args)?; // An ellipsis calling convention is IDA's marker for a variadic function. @@ -373,8 +377,9 @@ impl TILTranslator { .clone() .map(|s| s.to_string()) .unwrap_or_else(|| format!("arg{}", idx)); + let location = value_location_from_arg_loc(arg.loc.as_ref()); self.translate_type_info(&arg.ty) - .map(|ty| FunctionParameter::new(ty, arg_name, None)) + .map(|ty| FunctionParameter::new(ty, arg_name, location)) }) .collect() } @@ -642,30 +647,13 @@ impl TILTranslator { anyhow::anyhow!("Type reference has no width: {:?}", resolved_ty) }) } - TypeVariant::Struct(s) => { - let mut total_width = 0; - for member in &s.members { - total_width += self.width_of_type(&member.member_type)?; - } - // NOTE: This is the tightly-packed lower bound on the size. It does not - // reintroduce the inter-member alignment padding (or bitfield storage sharing) - // that `build_udt_members` accounts for, because that requires per-type alignment - // which an unresolved type reference does not expose here. It is only used to give - // a referenced struct a placeholder width, so an underestimate is acceptable; the - // authoritative layout comes from `build_udt_ty` once the type is fully resolved. - Ok(total_width) - } - TypeVariant::Union(u) => { - // Size of the largest member. As with the struct case above, the union's own - // alignment padding is not applied here; this is the placeholder lower bound used - // for type-reference widths, not the final laid-out size. - let mut max_width = 0; - for member in &u.members { - let member_width = self.width_of_type(&member.member_type)?; - max_width = max_width.max(member_width); - } - Ok(max_width) - } + // Build the structure/union through the same path used for real translation and read + // back its finalized width, so alignment padding, bitfield storage sharing and tail + // padding are all accounted for instead of approximated. This recurses, but C types + // can only nest by value finitely (self-reference is only possible through a pointer, + // which is sized by `address_size`), so it always terminates. + TypeVariant::Struct(s) => Ok(self.build_udt_ty(s, false)?.finalize().width() as usize), + TypeVariant::Union(u) => Ok(self.build_udt_ty(u, true)?.finalize().width() as usize), TypeVariant::Enum(e) => Ok(e .storage_size .map(|s| s.get() as usize) @@ -692,3 +680,24 @@ impl TILTranslator { } } } + +/// Translate an IDA argument storage location into a Binary Ninja value location. +/// +/// Stack-passed arguments translate directly (the offset is architecture independent). The +/// register-based encodings (`Reg1`/`Reg2`/`RRel`) hold raw IDA register indices, and `Dist`/ +/// `Static`/custom forms have no straightforward equivalent, so those are left for analysis to +/// derive rather than guessing at a register mapping. +fn value_location_from_arg_loc(loc: Option<&ArgLoc>) -> ValueLocationSource { + match loc { + Some(ArgLoc::Stack(offset)) => ValueLocationSource::Custom(ValueLocation { + components: vec![ValueLocationComponent { + variable: Variable::from_stack_offset(*offset as i64), + offset: 0, + size: None, + }], + indirect: false, + returned_pointer: None, + }), + _ => ValueLocationSource::Default, + } +} From dfe2670f2458387fabdc7255713e7636c9d1e83d Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 02:03:01 -0500 Subject: [PATCH 08/16] [idb_import] Import IDA stack frames as stack variables Parse each function's stack frame (named locals, saved registers and stack arguments) from the IDB along with its geometry (frsize/frregs), carry it on FunctionInfo through the function merge, and apply it in the mapper. IDA records the frame as a structure running from the bottom of the locals upward; Binary Ninja measures stack offsets from the return address, so an IDA frame offset is shifted down by local_size + saved_regs_size. Member offsets are the running sum of preceding member widths (the frame members carry no explicit offset), and the synthetic saved-register/return-address members are skipped while still advancing the offset. Variables are created as auto stack variables typed from their translated IDB types. --- plugins/idb_import/src/mapper.rs | 65 ++++++++++++++++++++++++++++++++ plugins/idb_import/src/parse.rs | 41 ++++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index dee9b9b562..da53b1d1c0 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -323,6 +323,7 @@ impl IDBMapper { is_library: false, is_no_return: false, register_vars: Vec::new(), + stack_frame: None, }; self.map_func_to_view(view, til_translator, &func_info); } else { @@ -390,6 +391,70 @@ impl IDBMapper { } self.map_register_vars_to_func(&bn_func, func); + self.map_stack_frame_to_func(&bn_func, til_translator, func); + } + + /// Apply a function's IDA stack frame as Binary Ninja stack variables. + /// + /// IDA stores the frame as a structure whose members run from the bottom of the local + /// variable area upward through the saved registers, return address and stack arguments. + /// Binary Ninja measures stack offsets from the return address (locals negative, arguments + /// positive), so an IDA frame offset is shifted down by `local_size + saved_regs_size`. The + /// frame members do not carry explicit offsets, so each member's offset is the running sum of + /// the preceding member widths; the synthetic saved-register and return-address members are + /// skipped (but still advance the offset) since they are not user variables. + fn map_stack_frame_to_func( + &self, + bn_func: &binaryninja::function::Function, + til_translator: &TILTranslator, + func: &FunctionInfo, + ) { + let Some(stack_frame) = &func.stack_frame else { + return; + }; + let base = (stack_frame.local_size + stack_frame.saved_regs_size) as i64; + let mut frame_offset: u64 = 0; + for (i, member) in stack_frame.frame.members.iter().enumerate() { + let member_width = til_translator + .width_of_type(&member.member_type) + .unwrap_or(0); + + // Skip the saved-register / return-address regions, but keep advancing the offset so + // the members after them still land at the right place. + if member.is_frame_r || member.is_frame_s { + frame_offset += member_width as u64; + continue; + } + + let name = member + .name + .as_ref() + .map(|n| n.to_string()) + .unwrap_or_else(|| format!("var_{i}")); + let bn_offset = frame_offset as i64 - base; + match til_translator.translate_type_info(&member.member_type) { + Ok(ty) => { + tracing::debug!( + "Mapping stack variable '{}' at frame offset {} (bn {}) for {:0x}", + name, + frame_offset, + bn_offset, + func.address + ); + bn_func.create_auto_stack_var(bn_offset, &ty, &name); + } + Err(err) => { + tracing::warn!( + "Failed to translate stack variable '{}' type for {:0x}: {}", + name, + func.address, + err + ); + } + } + + frame_offset += member_width as u64; + } } /// Apply IDA register variables ("regvars") to a function. diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index 157ec5f230..a225530964 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -28,6 +28,7 @@ pub struct FunctionInfo { pub is_library: bool, pub is_no_return: bool, pub register_vars: Vec, + pub stack_frame: Option, } /// A register renamed by the user within a function (IDA "regvar"), e.g. `eax` -> `count`. @@ -42,6 +43,21 @@ pub struct RegisterVarInfo { pub comment: String, } +/// A function's stack frame, as recorded by IDA. +/// +/// The `frame` UDT describes every member of the frame (locals, saved registers, return +/// address and arguments). `local_size` (IDA's `frsize`) and `saved_regs_size` (`frregs`) give +/// the geometry needed to translate IDA frame offsets into Binary Ninja's frame convention. +#[derive(Debug, Clone, Serialize)] +pub struct StackFrameInfo { + /// Size in bytes of the local variables area (IDA `frsize`). + pub local_size: u64, + /// Size in bytes of the saved registers area (IDA `frregs`). + pub saved_regs_size: u64, + /// The frame structure describing each stack member. + pub frame: idb_rs::til::udt::UDT, +} + #[derive(Debug, Clone, Serialize)] pub struct ExportInfo { pub name: String, @@ -150,6 +166,9 @@ impl IDBInfo { if !a.register_vars.is_empty() { b.register_vars = a.register_vars.clone(); } + if a.stack_frame.is_some() { + b.stack_frame = a.stack_frame.clone(); + } true }); id0_functions @@ -388,6 +407,26 @@ impl IDBFileParser { }); } + // Collect the function's stack frame (named locals, saved registers and + // stack arguments) along with the frame geometry needed to place them. + let stack_frame = match id0 + .function_defined_variables(&root_info, &func, func_ext) + { + Ok(stack_names) => stack_names.ty.map(|frame| StackFrameInfo { + local_size: func_ext.frsize.into_u64(), + saved_regs_size: func_ext.frregs as u64, + frame, + }), + Err(err) => { + tracing::warn!( + "Failed to read stack frame for {:0x}: {}", + func_start, + err + ); + None + } + }; + functions.push(FunctionInfo { name: None, ty: None, @@ -395,6 +434,7 @@ impl IDBFileParser { is_library: func.flags.is_lib(), is_no_return: func.flags.is_no_return(), register_vars, + stack_frame, }); } } @@ -502,6 +542,7 @@ impl IDBFileParser { is_library: false, is_no_return: false, register_vars: Vec::new(), + stack_frame: None, })) }; From 51a30f02deb873736c63da0495fb01b8897fcb90 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 02:04:29 -0500 Subject: [PATCH 09/16] [idb_import] Apply no-return functions and local labels Two pieces of IDB data were parsed but never applied to the view: - is_no_return: mark functions IDA flags as non-returning (abort/exit/etc.) with set_auto_can_return(false) so analysis does not fall through calls to them. - Local labels: IDA's in-function named locations were folded into the name list, where map_name_to_view skips anything inside code. Route them through the dedicated map_label_to_view so they land as local-label symbols. --- plugins/idb_import/src/mapper.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index da53b1d1c0..5b9c395965 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -147,6 +147,14 @@ impl IDBMapper { self.map_name_to_view(view, &til_translator, &rebased_name); } + // IDA's local labels are named locations inside functions (e.g. loop targets). They live + // in code, so `map_name_to_view` skips them; apply them as local-label symbols instead. + for label in &id0.labels { + let mut rebased_label = label.clone(); + rebased_label.address = rebase(label.address); + self.map_label_to_view(view, &rebased_label); + } + // self.map_used_types_to_view(view, &til_translator); } @@ -361,6 +369,13 @@ impl IDBMapper { return; }; + // IDA marks functions that never return (e.g. `abort`, `exit`); carry that over so BN's + // analysis does not fall through past calls to them. + if func.is_no_return { + tracing::debug!("Marking function as no-return: {:0x}", func.address); + bn_func.set_auto_can_return(false); + } + if let Some(func_ty) = &func.ty { match til_translator.translate_type_info(&func_ty) { Ok(bn_func_ty) => { From 8cffc1c2c83750d8f3a26303a9bf92ece12c5d7c Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 02:34:45 -0500 Subject: [PATCH 10/16] [idb_import] Import IDA function folders as Binary Ninja components IDA lets users organize functions into folders in the Functions window, stored as a dirtree. Parse that hierarchy (preserving nested folders, not just the leaf functions) into FunctionFolderEntry, and recreate it in the view as Binary Ninja components: each folder becomes a component nested under its parent, and every function leaf is added to its folder's component. Functions sitting at the dirtree root are left uncomponented, matching their "no folder" state in IDA. --- plugins/idb_import/src/mapper.rs | 61 ++++++++++++++++++++++++++++++-- plugins/idb_import/src/parse.rs | 43 ++++++++++++++++++++-- 2 files changed, 100 insertions(+), 4 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 5b9c395965..3bf27a998e 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -1,12 +1,13 @@ //! Map the IDB data we parsed into the [`BinaryView`]. use crate::parse::{ - BaseAddressInfo, CommentInfo, ExportInfo, FunctionInfo, IDBInfo, LabelInfo, NameInfo, - SegmentInfo, + BaseAddressInfo, CommentInfo, ExportInfo, FunctionFolderEntry, FunctionInfo, IDBInfo, + LabelInfo, NameInfo, SegmentInfo, }; use crate::translate::TILTranslator; use binaryninja::architecture::{Architecture, ArchitectureExt, Register, RegisterInfo}; use binaryninja::binary_view::{BinaryView, BinaryViewBase}; +use binaryninja::component::{Component, ComponentBuilder}; use binaryninja::qualified_name::QualifiedName; use binaryninja::rc::Ref; use binaryninja::section::{SectionBuilder, Semantics}; @@ -155,6 +156,18 @@ impl IDBMapper { self.map_label_to_view(view, &rebased_label); } + // Recreate IDA's "Functions" window folder hierarchy as Binary Ninja components. + if let Some(dir_tree) = &self.info.dir_tree { + if !dir_tree.function_folders.is_empty() { + self.map_function_folders_to_view( + view, + &dir_tree.function_folders, + None, + &rebase, + ); + } + } + // self.map_used_types_to_view(view, &til_translator); } @@ -508,6 +521,50 @@ impl IDBMapper { } } + /// Recreate an IDA function folder tree as Binary Ninja components. + /// + /// Each IDA folder becomes a component nested under its parent (the root view component for + /// top-level folders), and each function leaf is added to the component for its folder. + /// Functions sitting directly at the dirtree root are left uncomponented, matching their + /// "no folder" status in IDA. + fn map_function_folders_to_view( + &self, + view: &BinaryView, + entries: &[FunctionFolderEntry], + parent: Option<&Component>, + rebase: &impl Fn(u64) -> u64, + ) { + for entry in entries { + match entry { + FunctionFolderEntry::Function(address) => { + let Some(parent) = parent else { + continue; + }; + let rebased = rebase(*address); + let functions_at = view.functions_at(rebased); + let func = functions_at.iter().find(|func| func.start() == rebased); + if let Some(func) = func { + parent.add_function(&func); + } else { + tracing::debug!( + "No function at {:0x} to place in folder, skipping", + rebased + ); + } + } + FunctionFolderEntry::Folder { name, entries } => { + let mut builder = ComponentBuilder::new(view.to_owned()).name(name.clone()); + if let Some(parent) = parent { + builder = builder.parent(parent.guid()); + } + let component = builder.finalize(); + tracing::debug!("Created component for folder '{}'", name); + self.map_function_folders_to_view(view, entries, Some(&component), rebase); + } + } + } + } + pub fn map_label_to_view(&self, view: &BinaryView, label: &LabelInfo) { let symbol = Symbol::builder(SymbolType::LocalLabel, &label.label, label.address).create(); tracing::debug!("Mapping label: {:0x} => {}", label.address, symbol); diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index a225530964..b05f13b6aa 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -2,7 +2,7 @@ use idb_rs::addr_info::{all_address_info, AddressInfo}; use idb_rs::id0::function::{FuncIdx, FuncordsIdx, IDBFunctionType}; -use idb_rs::id0::{ID0Section, Netdelta, SegmentType}; +use idb_rs::id0::{DirTreeEntry, ID0Section, Netdelta, SegmentType}; use idb_rs::id1::ID1Section; use idb_rs::id2::ID2Section; use idb_rs::til::section::TILSection; @@ -123,6 +123,20 @@ pub struct DirTreeInfo { /// Contains both function and data names (along with their types). pub names: Vec, pub comments: Vec, + /// The IDA "Functions" window folder hierarchy (root-level entries). + pub function_folders: Vec, +} + +/// An entry in IDA's function folder tree: either a function (by address) or a named folder +/// containing further entries. Mirrors IDA's dirtree so it can be recreated as Binary Ninja +/// components. +#[derive(Debug, Clone, Serialize)] +pub enum FunctionFolderEntry { + Function(u64), + Folder { + name: String, + entries: Vec, + }, } #[derive(Debug, Clone, Serialize, Default)] @@ -588,8 +602,9 @@ impl IDBFileParser { comments.extend(comment_info_from_addr(&addr_info)); } + let func_dir_tree = id0.dirtree_function_address()?; let mut functions = Vec::new(); - if let Some(func_dir_tree) = id0.dirtree_function_address()? { + if let Some(func_dir_tree) = &func_dir_tree { func_dir_tree.visit_leafs(|addr_raw| { let addr = Address::from_raw(*addr_raw); if let Some(info) = AddressInfo::new(id0, id1, id2, netdelta, addr) { @@ -600,6 +615,13 @@ impl IDBFileParser { }); } + // Preserve the folder hierarchy (not just the leaf functions) so it can be recreated as + // Binary Ninja components. + let function_folders = func_dir_tree + .as_ref() + .map(|tree| build_function_folders::(&tree.entries)) + .unwrap_or_default(); + let mut names = Vec::new(); if let Some(names_dir_tree) = id0.dirtree_names()? { names_dir_tree.visit_leafs(|name_raw| { @@ -631,6 +653,23 @@ impl IDBFileParser { types, names, comments, + function_folders, }) } } + +/// Recursively convert an IDA function dirtree into our [`FunctionFolderEntry`] tree. +fn build_function_folders( + entries: &[DirTreeEntry], +) -> Vec { + entries + .iter() + .map(|entry| match entry { + DirTreeEntry::Leaf(address) => FunctionFolderEntry::Function((*address).into_u64()), + DirTreeEntry::Directory { name, entries } => FunctionFolderEntry::Folder { + name: String::from_utf8_lossy(name).to_string(), + entries: build_function_folders::(entries), + }, + }) + .collect() +} From 541cd9fc7a5a91121d531185b6c13d5145fd6ba0 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 02:39:56 -0500 Subject: [PATCH 11/16] [idb_import] Resolve register-based argument locations Expose the processor's register names (indexed by IDA register number) from the database and hand them to the type translator along with the architecture. Argument locations encoded as registers (Reg1, the Reg2 register pair, and register-relative RRel) are now resolved through those names into Binary Ninja registers and emitted as value locations, in addition to the stack locations already handled. Forms with no equivalent (distributed, static, custom) still fall back to the calling convention. --- plugins/idb_import/src/mapper.rs | 5 +- plugins/idb_import/src/parse.rs | 12 ++++ plugins/idb_import/src/translate.rs | 97 +++++++++++++++++++++++------ 3 files changed, 93 insertions(+), 21 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 3bf27a998e..d197e3f5df 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -109,8 +109,9 @@ impl IDBMapper { &Type::int(platform.arch().default_integer_size(), false), ); } - let til_translator = - TILTranslator::new_from_platform(&platform).with_type_container(&view.type_container()); + let til_translator = TILTranslator::new_from_platform(&platform) + .with_type_container(&view.type_container()) + .with_register_names(id0.register_names.clone()); let til_translator = match &self.info.til { Some(til) => til_translator.with_til_info(&til), None => til_translator, diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index b05f13b6aa..fb527d1503 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -114,6 +114,9 @@ pub struct ID0Info { pub comments: Vec, pub labels: Vec, pub exports: Vec, + /// Processor register names indexed by IDA register number, used to resolve the registers + /// referenced by argument/return value locations into Binary Ninja registers. + pub register_names: Vec, } #[derive(Debug, Clone, Serialize)] @@ -477,6 +480,14 @@ impl IDBFileParser { (loading_base, _) => BaseAddressInfo::BaseSegment(loading_base.into_u64()), }; + // The processor module defines the register names by index; this lets us resolve the + // registers referenced by argument/return value locations into real registers. + let register_names = id0 + .processor(&root_info) + .and_then(|processor| processor.registers_info()) + .map(|info| info.names.iter().map(|name| name.to_string()).collect()) + .unwrap_or_default(); + Ok(ID0Info { base_address, segments, @@ -484,6 +495,7 @@ impl IDBFileParser { comments, labels, exports, + register_names, }) } diff --git a/plugins/idb_import/src/translate.rs b/plugins/idb_import/src/translate.rs index ee57bc084c..d41380be6e 100644 --- a/plugins/idb_import/src/translate.rs +++ b/plugins/idb_import/src/translate.rs @@ -95,6 +95,10 @@ pub struct TILTranslator { pub cdecl_calling_convention: Option>, pub stdcall_calling_convention: Option>, pub fastcall_calling_convention: Option>, + /// Architecture used to resolve register names into register ids for value locations. + pub arch: Option, + /// Processor register names indexed by IDA register number (see [`crate::parse::ID0Info`]). + pub register_names: Vec, } impl TILTranslator { @@ -115,6 +119,8 @@ impl TILTranslator { cdecl_calling_convention: None, stdcall_calling_convention: None, fastcall_calling_convention: None, + arch: None, + register_names: Vec::new(), } } @@ -135,6 +141,8 @@ impl TILTranslator { cdecl_calling_convention: platform.get_cdecl_calling_convention(), stdcall_calling_convention: platform.get_stdcall_calling_convention(), fastcall_calling_convention: platform.get_fastcall_calling_convention(), + arch: Some(platform.arch()), + register_names: Vec::new(), } } @@ -155,9 +163,18 @@ impl TILTranslator { cdecl_calling_convention: arch.get_cdecl_calling_convention(), stdcall_calling_convention: arch.get_stdcall_calling_convention(), fastcall_calling_convention: arch.get_fastcall_calling_convention(), + arch: Some(*arch), + register_names: Vec::new(), } } + /// Provide the processor register names (indexed by IDA register number) used to resolve + /// register-based argument and return value locations. + pub fn with_register_names(mut self, register_names: Vec) -> Self { + self.register_names = register_names; + self + } + pub fn with_til_info(mut self, til: &idb_rs::til::section::TILSection) -> Self { if let Some(size_enum) = til.header.size_enum { self.enum_size = size_enum.get() as usize; @@ -377,7 +394,7 @@ impl TILTranslator { .clone() .map(|s| s.to_string()) .unwrap_or_else(|| format!("arg{}", idx)); - let location = value_location_from_arg_loc(arg.loc.as_ref()); + let location = self.value_location_from_arg_loc(arg.loc.as_ref()); self.translate_type_info(&arg.ty) .map(|ty| FunctionParameter::new(ty, arg_name, location)) }) @@ -679,25 +696,67 @@ impl TILTranslator { _ => None, } } + + /// Translate an IDA argument/return storage location into a Binary Ninja value location. + /// + /// Returns [`ValueLocationSource::Default`] for locations we cannot represent (or whose + /// registers cannot be resolved), letting the calling convention derive the location. + fn value_location_from_arg_loc(&self, loc: Option<&ArgLoc>) -> ValueLocationSource { + match self.value_location_components(loc) { + Some((components, indirect)) => ValueLocationSource::Custom(ValueLocation { + components, + indirect, + returned_pointer: None, + }), + None => ValueLocationSource::Default, + } + } + + /// Resolve an IDA [`ArgLoc`] into Binary Ninja value-location components and whether the + /// location is indirect (the value lives in memory at the component). Returns `None` for + /// forms with no representation, or when a referenced register cannot be resolved. + fn value_location_components( + &self, + loc: Option<&ArgLoc>, + ) -> Option<(Vec, bool)> { + match loc? { + ArgLoc::Stack(offset) => Some((vec![stack_component(*offset as i64)], false)), + ArgLoc::Reg1(reg) => Some((vec![self.register_component(*reg, 0)?], false)), + ArgLoc::Reg2(regs) => { + // The low and high 16 bits each hold a register index; the value is split across + // the two registers. + let low = self.register_component(regs & 0xFFFF, 0)?; + let high = self.register_component((regs >> 16) & 0xFFFF, 0)?; + Some((vec![low, high], false)) + } + ArgLoc::RRel { reg, off } => { + // Register-relative: the value lives in memory at register + offset. + Some((vec![self.register_component(u32::from(*reg), *off as i64)?], true)) + } + // Distributed, static (global address) and none/custom forms have no direct mapping. + ArgLoc::Dist(_) | ArgLoc::Static(_) | ArgLoc::None => None, + } + } + + /// Build a value-location component for an IDA register index, resolving it through the + /// processor register names and the architecture. + fn register_component(&self, reg_index: u32, offset: i64) -> Option { + let arch = self.arch.as_ref()?; + let name = self.register_names.get(reg_index as usize)?; + let register = arch.register_by_name(name)?; + Some(ValueLocationComponent { + variable: Variable::from_register(register), + offset, + size: None, + }) + } } -/// Translate an IDA argument storage location into a Binary Ninja value location. -/// -/// Stack-passed arguments translate directly (the offset is architecture independent). The -/// register-based encodings (`Reg1`/`Reg2`/`RRel`) hold raw IDA register indices, and `Dist`/ -/// `Static`/custom forms have no straightforward equivalent, so those are left for analysis to -/// derive rather than guessing at a register mapping. -fn value_location_from_arg_loc(loc: Option<&ArgLoc>) -> ValueLocationSource { - match loc { - Some(ArgLoc::Stack(offset)) => ValueLocationSource::Custom(ValueLocation { - components: vec![ValueLocationComponent { - variable: Variable::from_stack_offset(*offset as i64), - offset: 0, - size: None, - }], - indirect: false, - returned_pointer: None, - }), - _ => ValueLocationSource::Default, +/// Build a value-location component for a stack offset. +fn stack_component(offset: i64) -> ValueLocationComponent { + ValueLocationComponent { + variable: Variable::from_stack_offset(offset), + offset: 0, + size: None, } } From ee7c94f03c1246676e6e83119369324a90724cde Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 02:54:29 -0500 Subject: [PATCH 12/16] [idb_import] Make folder mapping robust and observable Function folders in Binary Ninja's symbol list are backed by the component API (the docs describe creating them "automatically via the API", linking to binaryninja.component.Component), so the component approach is correct. Improve the mapping so it does not depend on analysis having indexed the functions yet: capture the Ref returned when each function is created and key it by rebased address, then place those into folders directly (falling back to a view lookup only when needed). Add a summary log line reporting how many folders were created, how many functions were placed, and how many could not be found, and align terminology to "folder" to match the UI while the underlying type stays a component. --- plugins/idb_import/src/mapper.rs | 109 ++++++++++++++++++++++++------- 1 file changed, 84 insertions(+), 25 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index d197e3f5df..8e04eb0ee6 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -8,6 +8,7 @@ use crate::translate::TILTranslator; use binaryninja::architecture::{Architecture, ArchitectureExt, Register, RegisterInfo}; use binaryninja::binary_view::{BinaryView, BinaryViewBase}; use binaryninja::component::{Component, ComponentBuilder}; +use binaryninja::function::Function; use binaryninja::qualified_name::QualifiedName; use binaryninja::rc::Ref; use binaryninja::section::{SectionBuilder, Semantics}; @@ -17,7 +18,7 @@ use binaryninja::variable::Variable; use idb_rs::id0::SegmentType; use idb_rs::til::TypeVariant; use sha2::{Digest, Sha256}; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; /// Maps IDB data into a [`BinaryView`]. /// @@ -119,16 +120,25 @@ impl IDBMapper { self.map_types_to_view(view, &til_translator, &self.info.merged_types()); + // Keep the created functions keyed by their (rebased) address so we can place them into + // folders later without depending on the analysis having indexed them yet. + let mut functions_by_address: HashMap> = HashMap::new(); for func in &self.info.merged_functions() { let mut rebased_func = func.clone(); rebased_func.address = rebase(func.address); - self.map_func_to_view(view, &til_translator, &rebased_func); + if let Some(bn_func) = self.map_func_to_view(view, &til_translator, &rebased_func) { + functions_by_address.insert(rebased_func.address, bn_func); + } } for export in &id0.exports { let mut rebased_export = export.clone(); rebased_export.address = rebase(export.address); - self.map_export_to_view(view, &til_translator, &rebased_export); + if let Some(bn_func) = + self.map_export_to_view(view, &til_translator, &rebased_export) + { + functions_by_address.insert(rebased_export.address, bn_func); + } } // NOTE: The undo bracketing below is not thread safe, so the mapper must be the only @@ -157,14 +167,24 @@ impl IDBMapper { self.map_label_to_view(view, &rebased_label); } - // Recreate IDA's "Functions" window folder hierarchy as Binary Ninja components. + // Recreate IDA's "Functions" window folder hierarchy. Binary Ninja's component API backs + // what the UI shows as function folders. if let Some(dir_tree) = &self.info.dir_tree { if !dir_tree.function_folders.is_empty() { + let mut stats = FolderStats::default(); self.map_function_folders_to_view( view, &dir_tree.function_folders, None, &rebase, + &functions_by_address, + &mut stats, + ); + tracing::info!( + "Mapped function folders: {} folders created, {} functions placed, {} not found", + stats.folders_created, + stats.functions_placed, + stats.functions_missing ); } } @@ -325,7 +345,7 @@ impl IDBMapper { view: &BinaryView, til_translator: &TILTranslator, export: &ExportInfo, - ) { + ) -> Option> { let within_code_section = view .sections_at(export.address) .iter() @@ -347,7 +367,7 @@ impl IDBMapper { register_vars: Vec::new(), stack_frame: None, }; - self.map_func_to_view(view, til_translator, &func_info); + return self.map_func_to_view(view, til_translator, &func_info); } else { tracing::debug!("Mapping data export: {:0x}", export.address); let name_info = NameInfo { @@ -358,6 +378,7 @@ impl IDBMapper { }; self.map_name_to_view(view, til_translator, &name_info); } + None } pub fn map_func_to_view( @@ -365,7 +386,7 @@ impl IDBMapper { view: &BinaryView, til_translator: &TILTranslator, func: &FunctionInfo, - ) { + ) -> Option> { // We need to skip things that hit the extern section, since they do not have a bearing in the // actual context of the binary, and can be derived differently between IDA and Binja. let within_extern_section = view @@ -375,12 +396,12 @@ impl IDBMapper { .is_some(); if within_extern_section { tracing::debug!("Skipping function in extern section: {:0x}", func.address); - return; + return None; } let Some(bn_func) = view.add_auto_function(func.address) else { tracing::warn!("Failed to add function for {:0x}", func.address); - return; + return None; }; // IDA marks functions that never return (e.g. `abort`, `exit`); carry that over so BN's @@ -421,6 +442,8 @@ impl IDBMapper { self.map_register_vars_to_func(&bn_func, func); self.map_stack_frame_to_func(&bn_func, til_translator, func); + + Some(bn_func) } /// Apply a function's IDA stack frame as Binary Ninja stack variables. @@ -522,18 +545,21 @@ impl IDBMapper { } } - /// Recreate an IDA function folder tree as Binary Ninja components. + /// Recreate an IDA function folder tree in the view. /// - /// Each IDA folder becomes a component nested under its parent (the root view component for - /// top-level folders), and each function leaf is added to the component for its folder. - /// Functions sitting directly at the dirtree root are left uncomponented, matching their - /// "no folder" status in IDA. + /// Binary Ninja models function folders with the component API (`Component`), which the UI + /// presents as folders. Each IDA folder becomes a component nested under its parent (the root + /// view component for top-level folders), and each function leaf is added to the folder it + /// belongs to. Functions sitting directly at the dirtree root are left in no folder, matching + /// their state in IDA. fn map_function_folders_to_view( &self, view: &BinaryView, entries: &[FunctionFolderEntry], parent: Option<&Component>, rebase: &impl Fn(u64) -> u64, + functions_by_address: &HashMap>, + stats: &mut FolderStats, ) { for entry in entries { match entry { @@ -542,15 +568,32 @@ impl IDBMapper { continue; }; let rebased = rebase(*address); - let functions_at = view.functions_at(rebased); - let func = functions_at.iter().find(|func| func.start() == rebased); - if let Some(func) = func { - parent.add_function(&func); - } else { - tracing::debug!( - "No function at {:0x} to place in folder, skipping", - rebased - ); + // Prefer the function we created during mapping; fall back to a view lookup in + // case it came from another source. + let func = functions_by_address.get(&rebased).cloned().or_else(|| { + view.functions_at(rebased) + .iter() + .find(|func| func.start() == rebased) + .map(|func| func.to_owned()) + }); + match func { + Some(func) if parent.add_function(&func) => { + stats.functions_placed += 1; + } + Some(_) => { + tracing::debug!( + "Component rejected function at {:0x}", + rebased + ); + stats.functions_missing += 1; + } + None => { + tracing::debug!( + "No function at {:0x} to place in folder, skipping", + rebased + ); + stats.functions_missing += 1; + } } } FunctionFolderEntry::Folder { name, entries } => { @@ -559,8 +602,16 @@ impl IDBMapper { builder = builder.parent(parent.guid()); } let component = builder.finalize(); - tracing::debug!("Created component for folder '{}'", name); - self.map_function_folders_to_view(view, entries, Some(&component), rebase); + stats.folders_created += 1; + tracing::debug!("Created folder '{}'", name); + self.map_function_folders_to_view( + view, + entries, + Some(&component), + rebase, + functions_by_address, + stats, + ); } } } @@ -652,6 +703,14 @@ impl IDBMapper { } } +/// Running totals for the folder mapping, used for a single summary log line. +#[derive(Default)] +struct FolderStats { + folders_created: usize, + functions_placed: usize, + functions_missing: usize, +} + /// Compute the SHA256 of the raw (on-disk) bytes backing `view`. /// /// Walks to the root of the parent-view chain, which is the raw view whose contents are the From d61c0d896dfbd9623a30b40f5aba6ad1805fb7dc Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 03:16:01 -0500 Subject: [PATCH 13/16] [idb_import] Resolve function return-value locations Reuse the register/stack location resolver to honor a function's explicit return location (function retloc) when the database records one, attaching it to the BN return value at full confidence. Functions without an explicit return location, or whose location cannot be resolved, keep the calling-convention-derived return as before. --- plugins/idb_import/src/translate.rs | 42 ++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/plugins/idb_import/src/translate.rs b/plugins/idb_import/src/translate.rs index d41380be6e..37d795bf4e 100644 --- a/plugins/idb_import/src/translate.rs +++ b/plugins/idb_import/src/translate.rs @@ -3,13 +3,13 @@ use binaryninja::architecture::{Architecture, ArchitectureExt, CoreArchitecture}; use binaryninja::calling_convention::CoreCallingConvention; -use binaryninja::confidence::Conf; +use binaryninja::confidence::{Conf, MAX_CONFIDENCE}; use binaryninja::platform::Platform; use binaryninja::rc::Ref; use binaryninja::types::{ EnumerationBuilder, FunctionParameter, MemberAccess, MemberScope, NamedTypeReference, - NamedTypeReferenceClass, StructureBuilder, StructureMember, StructureType, TypeBuilder, - TypeContainer, ValueLocation, ValueLocationComponent, ValueLocationSource, + NamedTypeReferenceClass, ReturnValue, StructureBuilder, StructureMember, StructureType, Type, + TypeBuilder, TypeContainer, ValueLocation, ValueLocationComponent, ValueLocationSource, }; use binaryninja::variable::Variable; use idb_rs::til::function::{ArgLoc, CallingConvention}; @@ -328,13 +328,10 @@ impl TILTranslator { &self, function_ty: &idb_rs::til::function::Function, ) -> anyhow::Result { - // NOTE: `function_ty.retloc` (and the per-argument `arg.loc`) carry explicit storage - // locations as IDA `ArgLoc`s. Honoring them requires translating IDA's register-relative - // encodings (`Reg1`/`Reg2`/`RRel` hold raw IDA register indices) into BN register ids, - // which is processor-specific and has no general mapping available here. Until that - // per-architecture register map exists we let the calling convention derive locations, - // which is correct for the standard conventions we already map. let return_ty = self.translate_type_info(&function_ty.ret)?; + // Recover the explicit return-value location (e.g. a non-default return register) when the + // database records one; otherwise the calling convention derives it. + let return_value = self.build_return_value(&return_ty, function_ty.retloc.as_ref()); let params: Vec = self.build_function_params(&function_ty.args)?; // An ellipsis calling convention is IDA's marker for a variadic function. let has_variable_args = matches!( @@ -349,7 +346,7 @@ impl TILTranslator { { let cc = self.cdecl_calling_convention.clone().unwrap(); TypeBuilder::function_with_opts( - &return_ty, + return_value, ¶ms, has_variable_args, cc, @@ -359,7 +356,7 @@ impl TILTranslator { Some(CallingConvention::Stdcall) if self.stdcall_calling_convention.is_some() => { let cc = self.stdcall_calling_convention.clone().unwrap(); TypeBuilder::function_with_opts( - &return_ty, + return_value, ¶ms, has_variable_args, cc, @@ -369,7 +366,7 @@ impl TILTranslator { Some(CallingConvention::Fastcall) if self.fastcall_calling_convention.is_some() => { let cc = self.fastcall_calling_convention.clone().unwrap(); TypeBuilder::function_with_opts( - &return_ty, + return_value, ¶ms, has_variable_args, cc, @@ -382,6 +379,27 @@ impl TILTranslator { Ok(builder) } + /// Build a [`ReturnValue`] for a function, attaching an explicit storage location when the + /// database records a return location that we can resolve to registers/stack. + fn build_return_value(&self, return_ty: &Ref, retloc: Option<&ArgLoc>) -> ReturnValue { + let location = self + .value_location_components(retloc) + .map(|(components, indirect)| { + Conf::new( + ValueLocation { + components, + indirect, + returned_pointer: None, + }, + MAX_CONFIDENCE, + ) + }); + ReturnValue { + ty: Conf::new(return_ty.clone(), MAX_CONFIDENCE), + location, + } + } + pub fn build_function_params( &self, args: &[idb_rs::til::function::FunctionArg], From a0ad3f0b3229c8bc81981f8a610b1fac6e6d542b Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 03:16:56 -0500 Subject: [PATCH 14/16] [idb_import] Dedup segments by exact range as well as name A segment that covers the exact same address range as an existing section is the same region under a possibly different name, so skip it rather than add a duplicate. We still avoid an overlap-based check, which would wrongly suppress every segment because the loader maps the whole address space. --- plugins/idb_import/src/mapper.rs | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 8e04eb0ee6..2c7a9391d9 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -314,10 +314,11 @@ impl IDBMapper { SegmentType::Imem => Semantics::DefaultSection, }; - // NOTE: We dedup on section name rather than address range on purpose. The BN loader has - // usually already mapped the whole address space, so an IDA segment would always overlap - // an existing section and a range-based check would suppress every segment. Name-based - // dedup only skips re-adding a section we (or the loader) already named identically. + // Skip a segment we have already mapped. We check both the name and the exact address + // range: an overlap-based check would be wrong because the loader normally maps the whole + // address space (so every IDA segment overlaps something), but an IDA segment that covers + // the exact same range as an existing section — even under a different name — is the same + // region and should not be added twice. if view.section_by_name(&segment.name).is_some() { tracing::debug!( "Section with name '{}' already exists, skipping...", @@ -325,6 +326,19 @@ impl IDBMapper { ); return; } + if view + .sections() + .iter() + .any(|existing| existing.address_range() == segment.region) + { + tracing::debug!( + "Section covering {:0x}-{:0x} already exists, skipping '{}'...", + segment.region.start, + segment.region.end, + segment.name + ); + return; + } tracing::info!( "Mapping segment '{}': {:0x} - {:0x} ({:?})", From 06fb9b02d9e91eb452918d950952da083dfe72cc Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 03:18:47 -0500 Subject: [PATCH 15/16] [idb_import] Generalize base-address comment to all formats The lowest-segment rebasing fix is format agnostic; reword its comment so it no longer reads as Mach-O specific. The first section starting after the format headers (Mach-O load commands, PE headers, ELF program headers) is a general property, and aligning to the lowest segment matches IDA's image base regardless of format. --- plugins/idb_import/src/mapper.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 2c7a9391d9..51344a2f30 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -68,10 +68,13 @@ impl IDBMapper { BaseAddressInfo::BaseSegment(start_addr) => bn_base_address.wrapping_sub(start_addr), BaseAddressInfo::BaseSection(section_addr) => { // Align against the lowest mapped *segment*, not the lowest section. - // IDA's `min_ea` is the image base (it maps the file header too), but a - // Mach-O's first section (`__text`) starts after the header + load - // commands. Using the lowest section here over-shifts every imported - // address by the header size; segments include the header region. + // IDA's `min_ea` is the image base, and it maps the file header region too. + // Across executable formats the first *section* generally begins after the + // format's headers/metadata (e.g. the Mach-O header + load commands, the PE + // headers, the ELF header + program headers), so aligning to the lowest section + // over-shifts every imported address by the header size. Segments include the + // header region, so the lowest segment matches IDA's image base regardless of + // format. let bn_segment_addr = view .segments() .iter() From dd3e8e0c978400314f976c2d58198707717173f0 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 03:23:23 -0500 Subject: [PATCH 16/16] [idb_import] Translate unknown-unsized basic type as void idb-rs now parses a bare unknown type (unspecified size) instead of erroring, so handle it here: a zero-width unknown has no integer representation, so map it to void rather than constructing a zero-width int. --- plugins/idb_import/src/translate.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/idb_import/src/translate.rs b/plugins/idb_import/src/translate.rs index 37d795bf4e..88f1471a92 100644 --- a/plugins/idb_import/src/translate.rs +++ b/plugins/idb_import/src/translate.rs @@ -260,6 +260,8 @@ impl TILTranslator { // one is available (see `with_til_info`), falling back to the standard C ABI defaults. match basic_ty { Basic::Void => Ok(TypeBuilder::void()), + // An unknown type of unspecified size has no integer representation; treat it as void. + Basic::Unknown { bytes: 0 } => Ok(TypeBuilder::void()), Basic::Unknown { bytes } => { // In the samples provided it appears that unknown can be used to represent a byte, // so we are going to be liberal and allow unknown basic types to be treated as a sized int.