From 6a86314f7d51545f89a602c105b04bb9f0449166 Mon Sep 17 00:00:00 2001 From: Peter LaFosse Date: Fri, 5 Jun 2026 21:42:35 -0400 Subject: [PATCH 1/2] Refactor MSVC demangler parse and finalize flow Split MSVC symbol parsing from finalization by carrying the parsed qualified name in DemangleContext instead of using mutable demangler-wide name state. Function parsing now returns a DemangledFunction with the parsed DemangledTypeNode plus optional decoded thunk adjustor metadata, and the symbol-context callers finalize name-dependent work such as thunk suffixes and implicit this parameter synthesis. Expand DemangledTypeNode into the shared representation used by the MSVC path for delayed type construction, platform-aware widths, calling convention resolution, member pointers, postfix forms, and explicit implicit-this parameters. This lets MSVC parsing produce structured type/name data first and defer Binary Ninja Type construction until finalization has the platform and view context. Also route MSVC string literals, raw type-info names, vtables, RTTI, dynamic init/fini stubs, local guards, and referenced-symbol template values through explicit DemangledTypeNode/name flow. Extensively tested with the MSVC demangler unit suite and a 199,500-symbol regression corpus with zero output changes or failures. --- demangle.cpp | 15 +- demangler/gnu3/demangled_type_node.cpp | 1309 +++++++++-- demangler/gnu3/demangled_type_node.h | 278 ++- demangler/msvc/CMakeLists.txt | 4 +- demangler/msvc/demangle_msvc.cpp | 2934 +++++++++++++++--------- demangler/msvc/demangle_msvc.h | 353 ++- plugins/pdb-ng/src/symbol_parser.rs | 4 +- plugins/rtti/rtti.cpp | 27 +- rust/src/demangle.rs | 40 + view/pe/coffview.cpp | 2 +- view/pe/peview.cpp | 2 +- 11 files changed, 3567 insertions(+), 1401 deletions(-) diff --git a/demangle.cpp b/demangle.cpp index 8f13c2132e..bb7eb1326b 100644 --- a/demangle.cpp +++ b/demangle.cpp @@ -47,8 +47,19 @@ namespace BinaryNinja { bool DemangleMS(Architecture* arch, const std::string& mangledName, Ref& outType, QualifiedName& outVarName, BinaryView* view) { - const bool simplify = Settings::Instance()->Get("analysis.types.templateSimplifier", view); - return DemangleMS(arch, mangledName, outType, outVarName, simplify); + BNType* localType = nullptr; + char** localVarName = nullptr; + size_t localSize = 0; + if (!BNDemangleMSWithOptions(arch->GetObject(), mangledName.c_str(), &localType, &localVarName, &localSize, + view ? view->GetObject() : nullptr)) + return false; + outType = localType ? new Type(localType) : nullptr; + for (size_t i = 0; i < localSize; i++) + { + outVarName.push_back(localVarName[i]); + } + BNFreeDemangledName(&localVarName, localSize); + return true; } bool DemangleMS(Architecture* arch, const std::string& mangledName, Ref& outType, QualifiedName& outVarName, diff --git a/demangler/gnu3/demangled_type_node.cpp b/demangler/gnu3/demangled_type_node.cpp index a52f9ef8fc..afbebf457a 100644 --- a/demangler/gnu3/demangled_type_node.cpp +++ b/demangler/gnu3/demangled_type_node.cpp @@ -13,7 +13,13 @@ // limitations under the License. #include "demangled_type_node.h" +#ifdef BINARYNINJACORE_LIBRARY +#include "binaryview.h" +#endif +#include +#include #include +#include #ifdef BINARYNINJACORE_LIBRARY using namespace BinaryNinjaCore; @@ -23,22 +29,316 @@ using namespace BinaryNinja; using namespace std; #endif +namespace +{ + static constexpr uint8_t DemangledPtr64Bit = 1u << 0; + static constexpr uint8_t DemangledUnalignedBit = 1u << 1; + static constexpr uint8_t DemangledRestrictBit = 1u << 2; + static constexpr uint8_t DemangledReferenceBit = 1u << 3; + static constexpr uint8_t DemangledLvalueBit = 1u << 4; + + static void AppendPointerSuffixToken(string& out, const char* token) + { + if (!out.empty() && out.back() != ' ') + out += ' '; + out += token; + } + + static string JoinNameList(const StringList& name) + { + if (name.empty()) + return {}; + if (name.size() == 1) + return name[0]; + + size_t total = (name.size() - 1) * 2; + for (const auto& segment : name) + total += segment.size(); + + string out; + out.reserve(total); + out += name[0]; + for (size_t i = 1; i < name.size(); i++) + { + out += "::"; + out += name[i]; + } + return out; + } + + static void AppendTemplateArgumentList(string& out, const vector& args, + bool spaceAfterComma, Platform* platform) + { + if (args.empty()) + return; + + out += '<'; + for (size_t i = 0; i < args.size(); i++) + { + if (i > 0) + out += spaceAfterComma ? ", " : ","; + if (args[i].type) + { + if (spaceAfterComma) + { + string arg; + args[i].type->AppendString(arg, platform); + out += arg; + } + else + args[i].type->AppendString(out, platform); + } + } + if (out.back() == '>') + out += ' '; + out += '>'; + } + + static DemangledQualifiedName ConvertNameSegments(StringList nameSegments) + { + DemangledQualifiedName result; + result.reserve(nameSegments.size()); + for (auto& segment: nameSegments) + result.emplace_back(std::move(segment)); + return result; + } + + static const DemangledQualifiedName& EmptyDemangledQualifiedName() + { + static const DemangledQualifiedName empty; + return empty; + } + + static size_t ResolveAddressWidth(Platform* platform) + { + if (platform) + return platform->GetAddressSize(); + return 8; + } + + static size_t ResolveDefaultIntegerWidth(Platform* platform) + { + if (platform) + { +#ifdef BINARYNINJACORE_LIBRARY + Architecture* platformArch = platform->GetArchitecture(); +#else + Ref platformArch = platform->GetArchitecture(); +#endif + if (platformArch) + return platformArch->GetDefaultIntegerSize(); + } + return 4; + } + + static Ref ResolveCallingConvention(BNCallingConventionName cc, Platform* platform) + { +#ifndef BINARYNINJACORE_LIBRARY + Ref platformArch; +#endif + Architecture* arch = nullptr; + if (platform) + { +#ifdef BINARYNINJACORE_LIBRARY + arch = platform->GetArchitecture(); +#else + platformArch = platform->GetArchitecture(); + arch = platformArch.GetPtr(); +#endif + } + + switch (cc) + { + case CdeclCallingConvention: + if (platform) + { + auto platformCC = platform->GetCdeclCallingConvention(); + if (platformCC) + return platformCC; + } + if (arch) + { + auto archCC = arch->GetCdeclCallingConvention(); + if (archCC) + return archCC; + } + return arch ? arch->GetCallingConventionByName("cdecl") : nullptr; + case STDCallCallingConvention: + if (platform) + { + auto platformCC = platform->GetStdcallCallingConvention(); + if (platformCC) + return platformCC; + } + if (arch) + { + auto archCC = arch->GetStdcallCallingConvention(); + if (archCC) + return archCC; + } + return arch ? arch->GetCallingConventionByName("stdcall") : nullptr; + case FastcallCallingConvention: + if (platform) + { + auto platformCC = platform->GetFastcallCallingConvention(); + if (platformCC) + return platformCC; + } + if (arch) + { + auto archCC = arch->GetFastcallCallingConvention(); + if (archCC) + return archCC; + } + return arch ? arch->GetCallingConventionByName("fastcall") : nullptr; + case ThisCallCallingConvention: + if (arch) + return arch->GetCallingConventionByName("thiscall"); + return nullptr; + default: + return nullptr; + } + } + +} + +#define HAS_POINTER_SUFFIX(bit) ((m_pointerSuffixBits & (bit)) != 0) + +static const char* CallingConventionString[] = +{ + "", + "__cdecl", + "__pascal", + "__thiscall", + "__stdcall", + "__fastcall", + "__clrcall", + "__eabi", + "__vectorcall", + "__swiftcall", + "__swiftasync" +}; + + +DemangledNamePart::DemangledNamePart(): m_hasTemplateArgs(false), m_spaceAfterTemplateComma(false) +{ +} + + +DemangledNamePart::DemangledNamePart(string base): + m_base(std::move(base)), m_hasTemplateArgs(false), m_spaceAfterTemplateComma(false) +{ +} + + +DemangledNamePart::DemangledNamePart(string base, std::shared_ptr baseTypeSuffix): + m_base(std::move(base)), m_baseTypeSuffix(std::move(baseTypeSuffix)), m_hasTemplateArgs(false), + m_spaceAfterTemplateComma(false) +{ +} + + +DemangledNamePart::DemangledNamePart( + string base, vector templateArgs, bool spaceAfterComma): + m_base(std::move(base)), m_templateArgs(std::move(templateArgs)), m_hasTemplateArgs(true), + m_spaceAfterTemplateComma(spaceAfterComma) +{ +} + + +void DemangledNamePart::SetTemplateArguments(vector args, bool spaceAfterComma) +{ + m_templateArgs = std::move(args); + m_hasTemplateArgs = true; + m_spaceAfterTemplateComma = spaceAfterComma; +} + + +void DemangledNamePart::AppendString(string& out, Platform* platform) const +{ + out += m_base; + if (m_baseTypeSuffix) + m_baseTypeSuffix->AppendString(out, platform); + if (m_templateArgs.empty() && m_hasTemplateArgs) + { + out += "<>"; + return; + } + AppendTemplateArgumentList(out, m_templateArgs, m_spaceAfterTemplateComma, platform); +} + + +string DemangledNamePart::GetString(Platform* platform) const +{ + string out; + AppendString(out, platform); + return out; +} + + +bool DemangledNamePart::IsStructurallyEqual(const DemangledNamePart& other) const +{ + if (m_base != other.m_base || m_hasTemplateArgs != other.m_hasTemplateArgs || + m_spaceAfterTemplateComma != other.m_spaceAfterTemplateComma) + return false; + if (m_baseTypeSuffix != other.m_baseTypeSuffix) + { + if (!m_baseTypeSuffix || !other.m_baseTypeSuffix || + !m_baseTypeSuffix->IsStructurallyEqual(*other.m_baseTypeSuffix)) + return false; + } + if (m_templateArgs.size() != other.m_templateArgs.size()) + return false; + for (size_t i = 0; i < m_templateArgs.size(); i++) + { + const auto& a = m_templateArgs[i]; + const auto& b = other.m_templateArgs[i]; + if (a.name != b.name) + return false; + if (a.type == b.type) + continue; + if (!a.type || !b.type || !a.type->IsStructurallyEqual(*b.type)) + return false; + } + return true; +} + + +DemangledNamePart::Ref DemangledNamePart::CreateShared(DemangledNamePart part) +{ + return std::make_shared(std::move(part)); +} + + +DemangledNamePart::Ref DemangledNamePart::CreateSharedCopy(const DemangledNamePart& part) +{ + return std::make_shared(part); +} + DemangledTypeNode::DemangledTypeNode() - : m_typeClass(VoidTypeClass), m_width(0), m_alignment(0), - m_const(false), m_volatile(false), m_signed(false), - m_hasTemplateArgs(false), m_nameType(NoNameType), - m_ntrClass(UnknownNamedTypeClass), - m_pointerReference(PointerReferenceType), m_elements(0), - m_returnTypeConfidence(BN_DEFAULT_CONFIDENCE) + : m_nameType(NoNameType), m_pointerSuffixBits(0), m_returnTypeConfidence(BN_FULL_CONFIDENCE), + m_const(false), m_volatile(false), m_payload(VoidPayload{}) +{ +} + + +DemangledTypeNode::NodeRef DemangledTypeNode::CreateShared(DemangledTypeNode node) +{ + return std::make_shared(std::move(node)); +} + + +DemangledTypeNode::NodeRef DemangledTypeNode::CreateSharedCopy(const DemangledTypeNode& node) { + return std::make_shared(node); } DemangledTypeNode DemangledTypeNode::VoidType() { DemangledTypeNode n; - n.m_typeClass = VoidTypeClass; + n.m_payload = VoidPayload{}; return n; } @@ -46,8 +346,7 @@ DemangledTypeNode DemangledTypeNode::VoidType() DemangledTypeNode DemangledTypeNode::BoolType() { DemangledTypeNode n; - n.m_typeClass = BoolTypeClass; - n.m_width = 1; + n.m_payload = BoolPayload{}; return n; } @@ -55,10 +354,26 @@ DemangledTypeNode DemangledTypeNode::BoolType() DemangledTypeNode DemangledTypeNode::IntegerType(size_t width, bool isSigned, const string& altName) { DemangledTypeNode n; - n.m_typeClass = IntegerTypeClass; - n.m_width = width; - n.m_signed = isSigned; - n.m_altName = altName; + if (altName == "char16_t" || altName == "char32_t" || altName == "wchar_t") + { + n.m_payload = WideCharPayload{width, altName}; + return n; + } + IntegerPayload payload; + payload.width = width; + payload.isSigned = isSigned; + if (!(width == 1 && isSigned && altName == "char")) + payload.altName = altName; + n.m_payload = std::move(payload); + return n; +} + + +DemangledTypeNode DemangledTypeNode::AddressSizedIntegerType(bool isSigned, const string& altName) +{ + DemangledTypeNode n = IntegerType(0, isSigned, altName); + if (auto payload = std::get_if(&n.m_payload)) + payload->widthKind = AddressWidth; return n; } @@ -66,9 +381,15 @@ DemangledTypeNode DemangledTypeNode::IntegerType(size_t width, bool isSigned, co DemangledTypeNode DemangledTypeNode::FloatType(size_t width, const string& altName) { DemangledTypeNode n; - n.m_typeClass = FloatTypeClass; - n.m_width = width; - n.m_altName = altName; + n.m_payload = FloatPayload{width, altName}; + return n; +} + + +DemangledTypeNode DemangledTypeNode::WideCharType(size_t width, const string& altName) +{ + DemangledTypeNode n; + n.m_payload = WideCharPayload{width, altName}; return n; } @@ -76,21 +397,49 @@ DemangledTypeNode DemangledTypeNode::FloatType(size_t width, const string& altNa DemangledTypeNode DemangledTypeNode::VarArgsType() { DemangledTypeNode n; - n.m_typeClass = VarArgsTypeClass; + n.m_payload = VarArgsPayload{}; + return n; +} + + +DemangledTypeNode DemangledTypeNode::PointerType(DemangledTypeNode child, bool cnst, bool vltl, BNReferenceType refType) +{ + DemangledTypeNode n; + n.m_const = cnst; + n.m_volatile = vltl; + n.m_payload = PointerPayload{CreateShared(std::move(child)), refType}; + return n; +} + + +DemangledTypeNode DemangledTypeNode::PointerType(NodeRef child, bool cnst, bool vltl, BNReferenceType refType) +{ + DemangledTypeNode n; + n.m_const = cnst; + n.m_volatile = vltl; + n.m_payload = PointerPayload{std::move(child), refType}; + return n; +} + + +DemangledTypeNode DemangledTypeNode::MemberPointerType( + DemangledTypeNode child, DemangledQualifiedName ownerName, bool cnst, bool vltl) +{ + DemangledTypeNode n; + n.m_const = cnst; + n.m_volatile = vltl; + n.m_payload = MemberPointerPayload{CreateShared(std::move(child)), std::move(ownerName), false}; return n; } -DemangledTypeNode DemangledTypeNode::PointerType(Architecture* arch, DemangledTypeNode child, - bool cnst, bool vltl, BNReferenceType refType) +DemangledTypeNode DemangledTypeNode::MemberPointerType( + NodeRef child, DemangledQualifiedName ownerName, bool cnst, bool vltl) { DemangledTypeNode n; - n.m_typeClass = PointerTypeClass; - n.m_width = arch->GetAddressSize(); - n.m_childType = std::make_shared(std::move(child)); n.m_const = cnst; n.m_volatile = vltl; - n.m_pointerReference = refType; + n.m_payload = MemberPointerPayload{std::move(child), std::move(ownerName), false}; return n; } @@ -98,9 +447,15 @@ DemangledTypeNode DemangledTypeNode::PointerType(Architecture* arch, DemangledTy DemangledTypeNode DemangledTypeNode::ArrayType(DemangledTypeNode child, uint64_t count) { DemangledTypeNode n; - n.m_typeClass = ArrayTypeClass; - n.m_childType = std::make_shared(std::move(child)); - n.m_elements = count; + n.m_payload = ArrayPayload{CreateShared(std::move(child)), count}; + return n; +} + + +DemangledTypeNode DemangledTypeNode::ArrayType(NodeRef child, uint64_t count) +{ + DemangledTypeNode n; + n.m_payload = ArrayPayload{std::move(child), count}; return n; } @@ -109,196 +464,571 @@ DemangledTypeNode DemangledTypeNode::FunctionType(DemangledTypeNode retType, std::nullptr_t, vector params) { DemangledTypeNode n; - n.m_typeClass = FunctionTypeClass; - n.m_childType = std::make_shared(std::move(retType)); - n.m_params = std::move(params); + FunctionPayload payload; + payload.returnType = CreateShared(std::move(retType)); + payload.params = std::move(params); + n.m_payload = std::move(payload); return n; } -DemangledTypeNode DemangledTypeNode::NamedType(BNNamedTypeReferenceClass cls, - vector nameSegments, size_t width, size_t align) +DemangledTypeNode DemangledTypeNode::FunctionType(NodeRef retType, + std::nullptr_t, vector params) { DemangledTypeNode n; - n.m_typeClass = NamedTypeReferenceClass; - n.m_ntrClass = cls; - n.m_nameSegments = std::make_shared>(std::move(nameSegments)); - n.m_width = width; - n.m_alignment = align; + FunctionPayload payload; + payload.returnType = std::move(retType); + payload.params = std::move(params); + n.m_payload = std::move(payload); return n; } +void DemangledTypeNode::SetImplicitThisParameter(DemangledTypeNode type) +{ + if (auto payload = std::get_if(&m_payload)) + { + payload->implicitThisParameterType = CreateShared(std::move(type)); + return; + } + assert(false && "SetImplicitThisParameter called for non-function demangled type"); +} + + +DemangledTypeNode DemangledTypeNode::NamedType(BNNamedTypeReferenceClass cls, + StringList nameSegments, size_t width, bool isSigned) +{ + DemangledTypeNode n; + n.m_payload = NamedTypePayload{cls, ConvertNameSegments(std::move(nameSegments)), width, FixedWidth, isSigned}; + return n; +} + DemangledTypeNode DemangledTypeNode::NamedType(BNNamedTypeReferenceClass cls, - const QualifiedName& name, size_t width, size_t align) + DemangledQualifiedName nameSegments, size_t width, bool isSigned) +{ + DemangledTypeNode n; + n.m_payload = NamedTypePayload{cls, std::move(nameSegments), width, FixedWidth, isSigned}; + return n; +} + +DemangledTypeNode DemangledTypeNode::NamedTypeWithDefaultIntegerWidth(BNNamedTypeReferenceClass cls, + StringList nameSegments, bool isSigned) +{ + DemangledTypeNode n = NamedType(cls, std::move(nameSegments), 0, isSigned); + if (auto payload = std::get_if(&n.m_payload)) + payload->widthKind = DefaultIntegerWidth; + return n; +} + + +DemangledTypeNode DemangledTypeNode::PostfixType(NodeRef child, string suffix) { - return NamedType(cls, vector(name.begin(), name.end()), width, align); + DemangledTypeNode n; + n.m_payload = PostfixPayload{std::move(child), std::move(suffix), nullptr}; + return n; } -void DemangledTypeNode::SetNTR(BNNamedTypeReferenceClass cls, vector nameSegments) +DemangledTypeNode DemangledTypeNode::PostfixType(NodeRef child, string separator, NodeRef suffixType) { - m_ntrClass = cls; - m_nameSegments = std::make_shared>(std::move(nameSegments)); + DemangledTypeNode n = PostfixType(child, std::move(separator)); + if (auto payload = std::get_if(&n.m_payload)) + payload->suffixType = std::move(suffixType); + return n; } -void DemangledTypeNode::SetNTR(BNNamedTypeReferenceClass cls, const QualifiedName& name) +uint8_t DemangledTypeNode::PointerSuffixBit(BNPointerSuffix ps) { - SetNTR(cls, vector(name.begin(), name.end())); + switch (ps) + { + case Ptr64Suffix: + return DemangledPtr64Bit; + case UnalignedSuffix: + return DemangledUnalignedBit; + case RestrictSuffix: + return DemangledRestrictBit; + case ReferenceSuffix: + return DemangledReferenceBit; + case LvalueSuffix: + return DemangledLvalueBit; + default: + return 0; + } } -string DemangledTypeNode::GetTypeNameString() const +size_t DemangledTypeNode::ResolveWidth(size_t width, WidthKind widthKind, Platform* platform) { - if (!m_nameSegments) - return {}; - const auto& segs = *m_nameSegments; - size_t n = segs.size(); - if (n == 0) - return {}; - if (n == 1) - return segs[0]; + switch (widthKind) + { + case AddressWidth: + return ResolveAddressWidth(platform); + case DefaultIntegerWidth: + return ResolveDefaultIntegerWidth(platform); + case FixedWidth: + default: + return width; + } +} - // Pre-reserve: sum of segments + (n-1) * 2 for "::" separators - size_t total = (n - 1) * 2; - for (const auto& s : segs) - total += s.size(); - string result; - result.reserve(total); - result += segs[0]; - for (size_t i = 1; i < n; i++) +BNTypeClass DemangledTypeNode::GetPayloadClass() const +{ + switch (m_payload.index()) { - result += "::"; - result += segs[i]; + case 0: return VoidTypeClass; + case 1: return BoolTypeClass; + case 2: return IntegerTypeClass; + case 3: return FloatTypeClass; + case 4: return WideCharTypeClass; + case 5: return VarArgsTypeClass; + case 6: + case 7: + // PointerPayload and MemberPointerPayload both preserve the public pointer type class. + return PointerTypeClass; + case 8: return ArrayTypeClass; + case 9: return FunctionTypeClass; + case 10: + case 11: + // PostfixPayload is an internal named-type rendering form, so it reports as a named type. + return NamedTypeReferenceClass; + default: + return VoidTypeClass; } +} + + +DemangledTypeNode::NodeRef DemangledTypeNode::GetPrimaryChild() const +{ + if (auto payload = std::get_if(&m_payload)) + return payload->childType; + if (auto payload = std::get_if(&m_payload)) + return payload->childType; + if (auto payload = std::get_if(&m_payload)) + return payload->childType; + if (auto payload = std::get_if(&m_payload)) + return payload->returnType; + if (auto payload = std::get_if(&m_payload)) + return payload->childType; + return nullptr; +} + + +bool DemangledTypeNode::AddQualifiersToPointerChild(bool cnst, bool vltl) +{ + NodeRef* childType = nullptr; + if (auto payload = std::get_if(&m_payload)) + childType = &payload->childType; + else if (auto payload = std::get_if(&m_payload)) + childType = &payload->childType; + else + return false; + + if (!*childType) + return true; + if ((*childType).use_count() > 1) + *childType = CreateSharedCopy(**childType); + if (cnst) + (*childType)->SetConst(true); + if (vltl) + (*childType)->SetVolatile(true); + return true; +} + + +const DemangledQualifiedName& DemangledTypeNode::GetName() const +{ + if (auto payload = std::get_if(&m_payload)) + return payload->name; + return EmptyDemangledQualifiedName(); +} + + +DemangledQualifiedName& DemangledTypeNode::GetMutableName() +{ + if (auto payload = std::get_if(&m_payload)) + return payload->name; + assert(false && "GetMutableName called for non-named demangled type"); + static thread_local DemangledQualifiedName empty; + empty.clear(); + return empty; +} + + +void DemangledTypeNode::SetName(DemangledQualifiedName name) +{ + if (auto payload = std::get_if(&m_payload)) + { + payload->name = std::move(name); + return; + } + assert(false && "SetName called for non-named demangled type"); +} + + +BNNamedTypeReferenceClass DemangledTypeNode::GetNTRClass() const +{ + if (auto payload = std::get_if(&m_payload)) + return payload->ntrClass; + return UnknownNamedTypeClass; +} + + +void DemangledTypeNode::SetNTRType(BNNamedTypeReferenceClass cls) +{ + if (auto payload = std::get_if(&m_payload)) + { + payload->ntrClass = cls; + return; + } + assert(false && "SetNTRType called for non-named demangled type"); +} + + +void DemangledTypeNode::SetParenthesizedMemberPointer(bool parenthesized) +{ + if (auto payload = std::get_if(&m_payload)) + { + payload->parenthesized = parenthesized; + return; + } + assert(false && "SetParenthesizedMemberPointer called for non-member-pointer demangled type"); +} + + +void DemangledTypeNode::SetCallingConventionName(BNCallingConventionName cc) +{ + if (auto payload = std::get_if(&m_payload)) + { + payload->callingConventionName = cc; + return; + } + assert(false && "SetCallingConventionName called for non-function demangled type"); +} + + +bool DemangledTypeNode::HasTemplateArguments() const +{ + const auto* payload = std::get_if(&m_payload); + if (!payload) + return false; + for (const auto& segment: payload->name) + if (segment.HasTemplateArguments()) + return true; + return false; +} + + +bool DemangledTypeNode::IsStructurallyEqual(const DemangledTypeNode& other) const +{ + if (m_nameType != other.m_nameType || m_pointerSuffixBits != other.m_pointerSuffixBits || + m_returnTypeConfidence != other.m_returnTypeConfidence || + m_const != other.m_const || m_volatile != other.m_volatile || + m_payload.index() != other.m_payload.index()) + return false; + + auto typePtrsEqual = [](const NodeRef& a, const NodeRef& b) { + if (a == b) + return true; + if (!a || !b) + return false; + return a->IsStructurallyEqual(*b); + }; + + auto namePartsEqual = [](const DemangledQualifiedName& a, const DemangledQualifiedName& b) { + if (a.size() != b.size()) + return false; + for (size_t i = 0; i < a.size(); i++) + { + if (!a[i].IsStructurallyEqual(b[i])) + return false; + } + return true; + }; + + auto paramsEqual = [&typePtrsEqual](const vector& a, const vector& b) { + if (a.size() != b.size()) + return false; + for (size_t i = 0; i < a.size(); i++) + { + if (a[i].name != b[i].name || !typePtrsEqual(a[i].type, b[i].type)) + return false; + } + return true; + }; + + if (auto payload = std::get_if(&m_payload)) + return payload && std::get_if(&other.m_payload); + if (auto payload = std::get_if(&m_payload)) + return payload && std::get_if(&other.m_payload); + if (auto payload = std::get_if(&m_payload)) + return payload && std::get_if(&other.m_payload); + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->width == otherPayload->width && + payload->widthKind == otherPayload->widthKind && + payload->isSigned == otherPayload->isSigned && payload->altName == otherPayload->altName; + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->width == otherPayload->width && payload->altName == otherPayload->altName; + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->width == otherPayload->width && payload->altName == otherPayload->altName; + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->referenceType == otherPayload->referenceType && + typePtrsEqual(payload->childType, otherPayload->childType); + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->parenthesized == otherPayload->parenthesized && + typePtrsEqual(payload->childType, otherPayload->childType) && + namePartsEqual(payload->ownerName, otherPayload->ownerName); + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->elements == otherPayload->elements && + typePtrsEqual(payload->childType, otherPayload->childType); + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->callingConventionName == otherPayload->callingConventionName && + typePtrsEqual(payload->returnType, otherPayload->returnType) && + typePtrsEqual(payload->implicitThisParameterType, otherPayload->implicitThisParameterType) && + paramsEqual(payload->params, otherPayload->params); + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->ntrClass == otherPayload->ntrClass && + payload->width == otherPayload->width && payload->widthKind == otherPayload->widthKind && + payload->isSigned == otherPayload->isSigned && + namePartsEqual(payload->name, otherPayload->name); + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->suffix == otherPayload->suffix && + typePtrsEqual(payload->childType, otherPayload->childType) && + typePtrsEqual(payload->suffixType, otherPayload->suffixType); + } + + return false; +} + + +StringList DemangledTypeNode::RenderTypeNameSegments(Platform* platform) const +{ + StringList result; + if (auto payload = std::get_if(&m_payload)) + { + result.push_back(GetString(platform)); + return result; + } + auto payload = std::get_if(&m_payload); + if (!payload) + return result; + result.reserve(payload->name.size()); + for (const auto& segment: payload->name) + result.push_back(segment.GetString(platform)); return result; } -size_t DemangledTypeNode::NameStringSize() const +void DemangledTypeNode::AddPointerSuffixes(TypeBuilder& tb, bool omitPtr64) const { - if (!m_nameSegments) - return 0; - size_t total = 0; - for (const auto& s : *m_nameSegments) - total += s.size(); - return total; + if (HAS_POINTER_SUFFIX(DemangledPtr64Bit) && !omitPtr64) + tb.AddPointerSuffix(Ptr64Suffix); + if (HAS_POINTER_SUFFIX(DemangledUnalignedBit)) + tb.AddPointerSuffix(UnalignedSuffix); + if (HAS_POINTER_SUFFIX(DemangledRestrictBit)) + tb.AddPointerSuffix(RestrictSuffix); + if (HAS_POINTER_SUFFIX(DemangledReferenceBit)) + tb.AddPointerSuffix(ReferenceSuffix); + if (HAS_POINTER_SUFFIX(DemangledLvalueBit)) + tb.AddPointerSuffix(LvalueSuffix); } -string DemangledTypeNode::GetModifierString() const +bool DemangledTypeNode::HasPostfixType() const +{ + return std::holds_alternative(m_payload); +} + + +void DemangledTypeNode::AppendPostfixType(string& out, Platform* platform) const +{ + const auto* payload = std::get_if(&m_payload); + if (!payload) + return; + if (payload->childType) + payload->childType->AppendString(out, platform); + out += payload->suffix; + if (payload->suffixType) + payload->suffixType->AppendString(out, platform); +} + + +void DemangledTypeNode::AppendModifiers(string& out) const { if (m_const && m_volatile) - return "const volatile"; - if (m_const) - return "const"; - if (m_volatile) - return "volatile"; - return ""; + out += " const volatile"; + else if (m_const) + out += " const"; + else if (m_volatile) + out += " volatile"; } -string DemangledTypeNode::GetPointerSuffixString() const +void DemangledTypeNode::AppendPointerSuffix(string& out) const { - static const char* suffixStrings[] = { - "__ptr64", - "__unaligned", - "__restrict", - "&", - "&&" - }; + if (HAS_POINTER_SUFFIX(DemangledUnalignedBit)) + AppendPointerSuffixToken(out, "__unaligned"); + if (HAS_POINTER_SUFFIX(DemangledRestrictBit)) + AppendPointerSuffixToken(out, "__restrict"); + if (HAS_POINTER_SUFFIX(DemangledReferenceBit)) + AppendPointerSuffixToken(out, "&"); + if (HAS_POINTER_SUFFIX(DemangledLvalueBit)) + AppendPointerSuffixToken(out, "&&"); +} - string out; - for (auto& s : m_pointerSuffix) + +void DemangledTypeNode::AppendNamePartList( + string& out, const DemangledQualifiedName& name, Platform* platform) +{ + if (name.empty()) + return; + name[0].AppendString(out, platform); + for (size_t i = 1; i < name.size(); i++) { - if (!out.empty() && out.back() != ' ') - out += ' '; - out += suffixStrings[s]; + out += "::"; + name[i].AppendString(out, platform); } - return out; } -string DemangledTypeNode::GetStringBeforeName() const +void DemangledTypeNode::AppendTypeName(string& out, Platform* platform) const +{ + if (auto payload = std::get_if(&m_payload)) + AppendNamePartList(out, payload->name, platform); +} + + +string DemangledTypeNode::GetStringBeforeName(Platform* platform) const { string out; - AppendBeforeName(out); + AppendBeforeName(out, nullptr, platform); return out; } -string DemangledTypeNode::GetStringAfterName() const +string DemangledTypeNode::GetStringAfterName(Platform* platform) const { string out; - AppendAfterName(out); + AppendAfterName(out, nullptr, platform); return out; } -void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* parentType) const +void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* parentType, Platform* platform) const { - string modifiers = GetModifierString(); - string ptrSuffix = GetPointerSuffixString(); - - switch (m_typeClass) + switch (GetPayloadClass()) { case FunctionTypeClass: + { + const auto& payload = std::get(m_payload); // Return type before name - if (m_childType) + if (payload.returnType) { - if (!out.empty() && out.back() != ' ') - out += " "; - m_childType->AppendBeforeName(out, this); + if (!out.empty() && out.back() != ' ' && out.back() != '(') + out += ' '; + payload.returnType->AppendBeforeName(out, this, platform); } // If parent is a pointer, add "(" for function pointer syntax - if (parentType && parentType->m_typeClass == PointerTypeClass) + if (parentType && parentType->GetPayloadClass() == PointerTypeClass) + { + const auto* parentMemberPointer = std::get_if(&parentType->m_payload); + if (!out.empty() && out.back() != ' ' && + !(parentMemberPointer && parentMemberPointer->parenthesized)) + out += ' '; + out += '('; + } + if (static_cast(payload.callingConventionName) < (sizeof(CallingConventionString) / sizeof(CallingConventionString[0]))) { - if (!out.empty() && out.back() != ' ') - out += " "; - out += "("; + const char* callingConvention = CallingConventionString[static_cast(payload.callingConventionName)]; + if (callingConvention[0] != 0) + { + if (!out.empty() && out.back() != ' ' && out.back() != '(') + out += ' '; + out += callingConvention; + } } break; + } case IntegerTypeClass: - if (!m_altName.empty()) - out += m_altName; - else if (m_signed && m_width == 1) + { + const auto& payload = std::get(m_payload); + const size_t width = ResolveWidth(payload.width, payload.widthKind, platform); + if (!payload.altName.empty()) + out += payload.altName; + else if (payload.isSigned && width == 1) out += "char"; - else if (m_signed) - out += "int" + to_string(m_width * 8) + "_t"; + else if (payload.isSigned) + { + out += "int"; + out += to_string(width * 8); + out += "_t"; + } else - out += "uint" + to_string(m_width * 8) + "_t"; - if (!modifiers.empty()) - out += " " + modifiers; + { + out += "uint"; + out += to_string(width * 8); + out += "_t"; + } + AppendModifiers(out); break; + } case FloatTypeClass: - if (!m_altName.empty()) - out += m_altName; - else switch (m_width) + { + const auto& payload = std::get(m_payload); + if (!payload.altName.empty()) + out += payload.altName; + else switch (payload.width) { case 2: out += "float16"; break; case 4: out += "float"; break; case 8: out += "double"; break; case 10: out += "long double"; break; - default: out += "float" + to_string(m_width * 8); break; + default: + out += "float"; + out += to_string(payload.width * 8); + break; } - if (!modifiers.empty()) - out += " " + modifiers; + AppendModifiers(out); break; + } case BoolTypeClass: out += "bool"; - if (!modifiers.empty()) - out += " " + modifiers; + AppendModifiers(out); break; case VoidTypeClass: out += "void"; - if (!modifiers.empty()) - out += " " + modifiers; + AppendModifiers(out); break; case VarArgsTypeClass: @@ -306,32 +1036,65 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p break; case PointerTypeClass: - if (m_childType) - m_childType->AppendBeforeName(out, this); - switch (m_pointerReference) + if (auto payload = std::get_if(&m_payload)) { - case ReferenceReferenceType: out += "&"; break; - case PointerReferenceType: out += "*"; break; - case RValueReferenceType: out += "&&"; break; - default: break; + if (payload->childType) + payload->childType->AppendBeforeName(out, this, platform); + if (payload->parenthesized) + { + if (out.empty() || out.back() != '(') + out += '('; + } + else if (!out.empty() && out.back() != ' ' && out.back() != '(') + out += ' '; + if (!payload->ownerName.empty()) + AppendNamePartList(out, payload->ownerName, platform); + out += "::*"; + } + else if (auto payload = std::get_if(&m_payload)) + { + if (payload->childType) + payload->childType->AppendBeforeName(out, this, platform); + switch (payload->referenceType) + { + case ReferenceReferenceType: out += '&'; break; + case PointerReferenceType: out += '*'; break; + case RValueReferenceType: out += "&&"; break; + default: break; + } } - if (!ptrSuffix.empty()) - out += " " + ptrSuffix; - if (!modifiers.empty()) - out += " " + modifiers; + if ((m_pointerSuffixBits & (DemangledUnalignedBit | DemangledRestrictBit | + DemangledReferenceBit | DemangledLvalueBit)) != 0) + { + out += ' '; + AppendPointerSuffix(out); + } + AppendModifiers(out); break; case ArrayTypeClass: - if (m_childType) - m_childType->AppendBeforeName(out, this); - if (parentType && parentType->m_typeClass == PointerTypeClass) + { + const auto& payload = std::get(m_payload); + if (payload.childType) + payload.childType->AppendBeforeName(out, this, platform); + if (parentType && parentType->GetPayloadClass() == PointerTypeClass) { - out += " ("; + const auto* parentMemberPointer = std::get_if(&parentType->m_payload); + out += (parentMemberPointer && parentMemberPointer->parenthesized) ? "(" : " ("; } break; + } case NamedTypeReferenceClass: - switch (m_ntrClass) + if (HasPostfixType()) + { + AppendPostfixType(out, platform); + AppendModifiers(out); + break; + } + { + const auto& payload = std::get(m_payload); + switch (payload.ntrClass) { case ClassNamedTypeClass: out += "class "; break; case StructNamedTypeClass: out += "struct "; break; @@ -339,10 +1102,21 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p case EnumNamedTypeClass: out += "enum "; break; default: break; } - out += GetTypeNameString(); - if (!modifiers.empty()) - out += " " + modifiers; + AppendTypeName(out, platform); + AppendModifiers(out); + break; + } + + case WideCharTypeClass: + { + const auto& payload = std::get(m_payload); + if (!payload.altName.empty()) + out += payload.altName; + else + out += "wchar_t"; + AppendModifiers(out); break; + } default: break; @@ -356,73 +1130,116 @@ static string FormatArrayCount(uint64_t elements) } -void DemangledTypeNode::AppendAfterName(string& out, const DemangledTypeNode* parentType) const +void DemangledTypeNode::AppendAfterName(string& out, const DemangledTypeNode* parentType, Platform* platform) const { - string modifiers = GetModifierString(); - string ptrSuffix = GetPointerSuffixString(); - - switch (m_typeClass) + switch (GetPayloadClass()) { case FunctionTypeClass: { + const auto& payload = std::get(m_payload); // Close the "(" from before-name if parent is pointer - if (parentType && parentType->m_typeClass == PointerTypeClass) - out += ")"; + if (parentType && parentType->GetPayloadClass() == PointerTypeClass) + out += ')'; - out += "("; - for (size_t i = 0; i < m_params.size(); i++) + out += '('; + for (size_t i = 0; i < payload.params.size(); i++) { if (i != 0) out += ", "; - if (m_params[i].type) - out += m_params[i].type->GetString(); + if (payload.params[i].type) + payload.params[i].type->AppendString(out, platform); } - out += ")"; - if (!modifiers.empty()) - out += " " + modifiers; - if (!ptrSuffix.empty()) - out += ptrSuffix; + out += ')'; + AppendModifiers(out); + if ((m_pointerSuffixBits & (DemangledUnalignedBit | DemangledRestrictBit | + DemangledReferenceBit | DemangledLvalueBit)) != 0) + AppendPointerSuffix(out); // Return type's after-name tokens - if (m_childType) - m_childType->AppendAfterName(out, this); + if (payload.returnType) + payload.returnType->AppendAfterName(out, this, platform); break; } case PointerTypeClass: - if (m_childType) - m_childType->AppendAfterName(out, this); + if (auto payload = std::get_if(&m_payload)) + { + if (payload->childType) + payload->childType->AppendAfterName(out, this, platform); + const BNTypeClass childClass = payload->childType ? payload->childType->GetPayloadClass() : VoidTypeClass; + if (payload->parenthesized && (!payload->childType || + (childClass != FunctionTypeClass && childClass != ArrayTypeClass))) + out += ')'; + } + else if (auto payload = std::get_if(&m_payload)) + { + if (payload->childType) + payload->childType->AppendAfterName(out, this, platform); + } break; case ArrayTypeClass: - if (parentType && parentType->m_typeClass == PointerTypeClass) + { + const auto& payload = std::get(m_payload); + if (parentType && parentType->GetPayloadClass() == PointerTypeClass) out += ")"; - out += "[" + FormatArrayCount(m_elements) + "]"; - if (m_childType) - m_childType->AppendAfterName(out, this); + out += "[" + FormatArrayCount(payload.elements) + "]"; + if (payload.childType) + payload.childType->AppendAfterName(out, this, platform); break; + } default: break; } } -string DemangledTypeNode::GetString() const +void DemangledTypeNode::AppendString(string& out, Platform* platform) const { - const string before = GetStringBeforeName(); - const string after = GetStringAfterName(); - if (!before.empty() && !after.empty() && before.back() != ' ' && before.back() != '*' - && before.back() != '&' && after.front() != ' ' && after.front() != '[' - && m_childType && m_childType->m_typeClass != FunctionTypeClass) + size_t beforeEnd = out.size(); + AppendBeforeName(out, nullptr, platform); + beforeEnd = out.size(); // track where "before" ends + + string after; + AppendAfterName(after, nullptr, platform); + + if (!after.empty() && beforeEnd > 0) { - return before + " " + after; + char lastBefore = out[beforeEnd - 1]; + NodeRef child = GetPrimaryChild(); + if (lastBefore != ' ' && lastBefore != '*' && lastBefore != '&' + && after.front() != ' ' && after.front() != '[' + && child && child->GetPayloadClass() != FunctionTypeClass) + { + out += ' '; + } } - return before + after; + out += after; +} + + +string DemangledTypeNode::GetString() const +{ + return GetString(nullptr); +} + + +string DemangledTypeNode::GetString(Platform* platform) const +{ + string out; + AppendString(out, platform); + return out; +} + + +string DemangledTypeNode::GetTypeAndName(const StringList& name) const +{ + return GetTypeAndName(name, nullptr); } -string DemangledTypeNode::GetTypeAndName(const QualifiedName& name) const +string DemangledTypeNode::GetTypeAndName(const StringList& name, Platform* platform) const { - const string before = GetStringBeforeName(); - const string qName = name.GetString(); - const string after = GetStringAfterName(); + const string before = GetStringBeforeName(platform); + const string qName = JoinNameList(name); + const string after = GetStringAfterName(platform); if ((!before.empty() && !qName.empty() && before.back() != ' ' && qName.front() != ' ') || (!before.empty() && !after.empty() && before.back() != ' ' && after.front() != ' ')) return before + " " + qName + after; @@ -430,9 +1247,27 @@ string DemangledTypeNode::GetTypeAndName(const QualifiedName& name) const } -Ref DemangledTypeNode::Finalize() const +bool DemangledTypeNode::HasUndeterminedTopLevelSize() const +{ + if (auto payload = std::get_if(&m_payload)) + return payload->widthKind == FixedWidth && payload->width == 0; + if (std::holds_alternative(m_payload)) + return true; + if (auto payload = std::get_if(&m_payload)) + return payload->childType && payload->childType->HasUndeterminedTopLevelSize(); + return false; +} + + +uint8_t DemangledTypeNode::GetValueConfidence() const +{ + return HasUndeterminedTopLevelSize() ? BN_DEFAULT_CONFIDENCE : BN_FULL_CONFIDENCE; +} + + +Ref DemangledTypeNode::Finalize(Platform* platform) const { - switch (m_typeClass) + switch (GetPayloadClass()) { case VoidTypeClass: { @@ -456,9 +1291,11 @@ Ref DemangledTypeNode::Finalize() const case IntegerTypeClass: { + const auto& payload = std::get(m_payload); + const size_t width = ResolveWidth(payload.width, payload.widthKind, platform); if (!m_const && !m_volatile) - return Type::IntegerType(m_width, m_signed, m_altName); - TypeBuilder tb = TypeBuilder::IntegerType(m_width, m_signed, m_altName); + return Type::IntegerType(width, payload.isSigned, payload.altName); + TypeBuilder tb = TypeBuilder::IntegerType(width, payload.isSigned, payload.altName); tb.SetConst(m_const); tb.SetVolatile(m_volatile); return tb.Finalize(); @@ -466,9 +1303,10 @@ Ref DemangledTypeNode::Finalize() const case FloatTypeClass: { + const auto& payload = std::get(m_payload); if (!m_const && !m_volatile) - return Type::FloatType(m_width, m_altName); - TypeBuilder tb = TypeBuilder::FloatType(m_width, m_altName); + return Type::FloatType(payload.width, payload.altName); + TypeBuilder tb = TypeBuilder::FloatType(payload.width, payload.altName); tb.SetConst(m_const); tb.SetVolatile(m_volatile); return tb.Finalize(); @@ -477,16 +1315,43 @@ Ref DemangledTypeNode::Finalize() const case VarArgsTypeClass: return TypeBuilder::VarArgsType().Finalize(); + case WideCharTypeClass: + { + const auto& payload = std::get(m_payload); + if (!m_const && !m_volatile) + return Type::WideCharType(payload.width, payload.altName); + TypeBuilder tb = TypeBuilder::WideCharType(payload.width, payload.altName); + tb.SetConst(m_const); + tb.SetVolatile(m_volatile); + return tb.Finalize(); + } + case PointerTypeClass: { - Ref child = m_childType ? m_childType->Finalize() : Ref(Type::VoidType()); - return TypeBuilder::PointerType(m_width, child, m_const, m_volatile, m_pointerReference).Finalize(); + if (auto payload = std::get_if(&m_payload)) + { + Ref child = payload->childType ? payload->childType->Finalize(platform) : Ref(Type::VoidType()); + TypeBuilder tb = TypeBuilder::PointerType( + ResolveWidth(0, AddressWidth, platform), child, m_const, m_volatile, PointerReferenceType); + AddPointerSuffixes(tb, true); + Ref normalized = tb.Finalize(); + return Type::NamedType(QualifiedName({GetString(platform)}), normalized.GetPtr()); + } + + const auto& payload = std::get(m_payload); + Ref child = payload.childType ? payload.childType->Finalize(platform) : Ref(Type::VoidType()); + TypeBuilder tb = TypeBuilder::PointerType( + ResolveWidth(0, AddressWidth, platform), child, m_const, m_volatile, payload.referenceType); + AddPointerSuffixes(tb, true); + Ref normalized = tb.Finalize(); + return normalized; } case ArrayTypeClass: { - Ref child = m_childType ? m_childType->Finalize() : Ref(Type::VoidType()); - TypeBuilder tb = TypeBuilder::ArrayType(child, m_elements); + const auto& payload = std::get(m_payload); + Ref child = payload.childType ? payload.childType->Finalize(platform) : Ref(Type::VoidType()); + TypeBuilder tb = TypeBuilder::ArrayType(child, payload.elements); if (m_const) tb.SetConst(m_const); if (m_volatile) @@ -496,35 +1361,68 @@ Ref DemangledTypeNode::Finalize() const case FunctionTypeClass: { - Ref retType = m_childType ? m_childType->Finalize() : Ref(Type::VoidType()); + const auto& payload = std::get(m_payload); + Ref retType = payload.returnType ? payload.returnType->Finalize(platform) : Ref(Type::VoidType()); + uint8_t retTypeConfidence = payload.returnType ? payload.returnType->GetValueConfidence() : BN_FULL_CONFIDENCE; + retTypeConfidence = std::min(retTypeConfidence, m_returnTypeConfidence); + vector finalParams; - finalParams.reserve(m_params.size()); - for (auto& p : m_params) + finalParams.reserve(payload.params.size() + (payload.implicitThisParameterType ? 1 : 0)); + if (payload.implicitThisParameterType) + { + Ref thisType = payload.implicitThisParameterType->Finalize(platform); + finalParams.push_back({"this", thisType->WithConfidence(payload.implicitThisParameterType->GetValueConfidence()), + DefaultLocationSource, Variable()}); + } + for (auto& p : payload.params) + { + Ref pType = p.type ? p.type->Finalize(platform) : Ref(Type::VoidType()); + uint8_t pTypeConfidence = p.type ? p.type->GetValueConfidence() : BN_FULL_CONFIDENCE; + finalParams.push_back({p.name, pType->WithConfidence(pTypeConfidence), DefaultLocationSource, Variable()}); + } + Confidence> callingConvention; + if (payload.callingConventionName != NoCallingConvention) { - Ref pType = p.type ? p.type->Finalize() : Ref(Type::VoidType()); - finalParams.push_back({p.name, pType, DefaultLocationSource, Variable()}); + if (auto resolvedCallingConvention = ResolveCallingConvention(payload.callingConventionName, platform)) + callingConvention = Confidence>(resolvedCallingConvention, BN_FULL_CONFIDENCE); } - TypeBuilder tb = TypeBuilder::FunctionType(retType->WithConfidence(m_returnTypeConfidence), nullptr, finalParams); + TypeBuilder tb = TypeBuilder::FunctionType( + retType->WithConfidence(retTypeConfidence), callingConvention, finalParams, + Confidence(false, 0)); tb.SetConst(m_const); tb.SetVolatile(m_volatile); - for (auto ps : m_pointerSuffix) - tb.AddPointerSuffix(ps); + AddPointerSuffixes(tb); tb.SetNameType(m_nameType); + if (payload.callingConventionName != NoCallingConvention) + tb.SetCallingConventionName(payload.callingConventionName); return tb.Finalize(); } case NamedTypeReferenceClass: { + if (auto payload = std::get_if(&m_payload)) + { + QualifiedName name(RenderTypeNameSegments(platform)); + TypeBuilder tb = TypeBuilder::NamedType( + NamedTypeReference::GenerateAutoDemangledTypeReference(UnknownNamedTypeClass, name), 0, 1); + tb.SetConst(m_const); + tb.SetVolatile(m_volatile); + AddPointerSuffixes(tb); + tb.SetNameType(m_nameType); + tb.SetHasTemplateArguments(false); + return tb.Finalize(); + } + + const auto& payload = std::get(m_payload); + QualifiedName name(RenderTypeNameSegments(platform)); TypeBuilder tb = TypeBuilder::NamedType( - NamedTypeReference::GenerateAutoDemangledTypeReference( - m_ntrClass, QualifiedName(m_nameSegments ? *m_nameSegments : vector{})), - m_width, m_alignment > 0 ? m_alignment : 1); + NamedTypeReference::GenerateAutoDemangledTypeReference(payload.ntrClass, name), + ResolveWidth(payload.width, payload.widthKind, platform), 1); tb.SetConst(m_const); tb.SetVolatile(m_volatile); - for (auto ps : m_pointerSuffix) - tb.AddPointerSuffix(ps); + AddPointerSuffixes(tb); tb.SetNameType(m_nameType); - tb.SetHasTemplateArguments(m_hasTemplateArgs); + tb.SetHasTemplateArguments(HasTemplateArguments()); return tb.Finalize(); } @@ -532,3 +1430,6 @@ Ref DemangledTypeNode::Finalize() const return Type::VoidType(); } } + +#undef HAS_POINTER_SUFFIX +#undef GetClass diff --git a/demangler/gnu3/demangled_type_node.h b/demangler/gnu3/demangled_type_node.h index 62ad9004a5..39573d94a0 100644 --- a/demangler/gnu3/demangled_type_node.h +++ b/demangler/gnu3/demangled_type_node.h @@ -40,26 +40,76 @@ #endif #endif +#include #include +#include + #ifdef BINARYNINJACORE_LIBRARY -#include "binaryninjacore_global.h" -#define _STD_SET BinaryNinjaCore::set +namespace BinaryNinjaCore { class Platform; } #else -#include -#define _STD_SET std::set +namespace BinaryNinja { class Platform; } #endif -// Lightweight type representation for the GNU3 demangler. +using StringList = _STD_VECTOR<_STD_STRING>; + +class DemangledTypeNode; + +struct DemangledTypeNodeParam +{ + _STD_STRING name; + std::shared_ptr type = nullptr; +}; + +class DemangledNamePart +{ +public: + using Ref = std::shared_ptr; + + DemangledNamePart(); + explicit DemangledNamePart(_STD_STRING base); + DemangledNamePart(_STD_STRING base, std::shared_ptr baseTypeSuffix); + DemangledNamePart(_STD_STRING base, _STD_VECTOR templateArgs, + bool spaceAfterComma = false); + + const _STD_STRING& GetBase() const { return m_base; } + void SetBase(_STD_STRING base) { m_base = std::move(base); } + void AppendBase(const _STD_STRING& suffix) { m_base += suffix; } + bool HasTemplateArguments() const { return m_hasTemplateArgs || !m_templateArgs.empty(); } + _STD_VECTOR& GetMutableTemplateArguments() { return m_templateArgs; } + void SetTemplateArguments(_STD_VECTOR args, bool spaceAfterComma = false); + + void AppendString(_STD_STRING& out, BN::Platform* platform) const; + _STD_STRING GetString(BN::Platform* platform = nullptr) const; + bool IsStructurallyEqual(const DemangledNamePart& other) const; + + static Ref CreateShared(DemangledNamePart part); + static Ref CreateSharedCopy(const DemangledNamePart& part); + +private: + _STD_STRING m_base; + std::shared_ptr m_baseTypeSuffix; + _STD_VECTOR m_templateArgs; + bool m_hasTemplateArgs; + bool m_spaceAfterTemplateComma; +}; + +using DemangledQualifiedName = _STD_VECTOR; + +// Lightweight type representation for demanglers (GNU3 and MSVC). // This object serves as an abstraction layer between C++'s type system and our own. // It also removes a source of a lot of reallocation of NamedTypeReference BinaryNinja::Type objects // and only creates real Type objects when Finalize() is called. class DemangledTypeNode { public: - struct Param + using NodeRef = std::shared_ptr; + using Param = DemangledTypeNodeParam; + + enum WidthKind : uint8_t { - _STD_STRING name; - std::shared_ptr type; + FixedWidth, + AddressWidth, + DefaultIntegerWidth }; DemangledTypeNode(); @@ -72,100 +122,174 @@ class DemangledTypeNode static DemangledTypeNode VoidType(); static DemangledTypeNode BoolType(); static DemangledTypeNode IntegerType(size_t width, bool isSigned, const _STD_STRING& altName = ""); + static DemangledTypeNode AddressSizedIntegerType(bool isSigned, const _STD_STRING& altName = ""); static DemangledTypeNode FloatType(size_t width, const _STD_STRING& altName = ""); + static DemangledTypeNode WideCharType(size_t width, const _STD_STRING& altName = ""); static DemangledTypeNode VarArgsType(); - static DemangledTypeNode PointerType(BN::Architecture* arch, DemangledTypeNode child, - bool cnst, bool vltl, BNReferenceType refType); + static DemangledTypeNode PointerType(DemangledTypeNode child, bool cnst, bool vltl, BNReferenceType refType); + static DemangledTypeNode PointerType(NodeRef child, bool cnst, bool vltl, BNReferenceType refType); + static DemangledTypeNode MemberPointerType(DemangledTypeNode child, DemangledQualifiedName ownerName, + bool cnst, bool vltl); + static DemangledTypeNode MemberPointerType(NodeRef child, DemangledQualifiedName ownerName, + bool cnst, bool vltl); static DemangledTypeNode ArrayType(DemangledTypeNode child, uint64_t count); + static DemangledTypeNode ArrayType(NodeRef child, uint64_t count); static DemangledTypeNode FunctionType(DemangledTypeNode retType, std::nullptr_t, _STD_VECTOR params); + static DemangledTypeNode FunctionType(NodeRef retType, + std::nullptr_t, _STD_VECTOR params); static DemangledTypeNode NamedType(BNNamedTypeReferenceClass cls, - _STD_VECTOR<_STD_STRING> nameSegments, size_t width = 0, size_t align = 0); + StringList nameSegments, size_t width = 0, bool isSigned = false); static DemangledTypeNode NamedType(BNNamedTypeReferenceClass cls, - const BN::QualifiedName& name, size_t width = 0, size_t align = 0); + DemangledQualifiedName nameSegments, size_t width = 0, bool isSigned = false); + static DemangledTypeNode NamedTypeWithDefaultIntegerWidth(BNNamedTypeReferenceClass cls, + StringList nameSegments, bool isSigned = false); + static DemangledTypeNode PostfixType(NodeRef child, _STD_STRING suffix); + static DemangledTypeNode PostfixType(NodeRef child, _STD_STRING separator, NodeRef suffixType); + static NodeRef CreateShared(DemangledTypeNode node); + static NodeRef CreateSharedCopy(const DemangledTypeNode& node); - // Getters - BNTypeClass GetClass() const { return m_typeClass; } -#ifdef BINARYNINJACORE_LIBRARY - BNTypeClass GetTypeClass() const { return m_typeClass; } -#endif - const _STD_VECTOR<_STD_STRING>& GetTypeName() const - { - if (!m_nameSegments) - { - static const _STD_VECTOR<_STD_STRING> empty; - return empty; - } - return *m_nameSegments; - } - _STD_VECTOR<_STD_STRING>& GetMutableTypeName() - { - if (!m_nameSegments) - m_nameSegments = std::make_shared<_STD_VECTOR<_STD_STRING>>(); - else if (m_nameSegments.use_count() > 1) - m_nameSegments = std::make_shared<_STD_VECTOR<_STD_STRING>>(*m_nameSegments); - return *m_nameSegments; - } - _STD_STRING GetTypeNameString() const; - size_t NameStringSize() const; + BNTypeClass GetClass() const { return GetPayloadClass(); } + const DemangledQualifiedName& GetName() const; + DemangledQualifiedName& GetMutableName(); bool IsConst() const { return m_const; } bool IsVolatile() const { return m_volatile; } BNNameType GetNameType() const { return m_nameType; } - bool HasTemplateArguments() const { return m_hasTemplateArgs; } - const _STD_SET& GetPointerSuffix() const { return m_pointerSuffix; } - BNNamedTypeReferenceClass GetNTRClass() const { return m_ntrClass; } + bool HasTemplateArguments() const; + uint8_t GetPointerSuffixBits() const { return m_pointerSuffixBits; } + BNNamedTypeReferenceClass GetNTRClass() const; + void SetParenthesizedMemberPointer(bool parenthesized); + StringList RenderTypeNameSegments(BN::Platform* platform = nullptr) const; + bool IsStructurallyEqual(const DemangledTypeNode& other) const; - // Setters - void SetTypeName(_STD_VECTOR<_STD_STRING> name) { m_nameSegments = std::make_shared<_STD_VECTOR<_STD_STRING>>(std::move(name)); } + void SetName(DemangledQualifiedName name); void SetConst(bool c) { m_const = c; } void SetVolatile(bool v) { m_volatile = v; } void SetNameType(BNNameType nt) { m_nameType = nt; } - void SetHasTemplateArguments(bool t) { m_hasTemplateArgs = t; } - void SetPointerSuffix(const _STD_SET& s) { m_pointerSuffix = s; } - void AddPointerSuffix(BNPointerSuffix ps) { m_pointerSuffix.insert(ps); } + void SetPointerSuffixBits(uint8_t bits) { m_pointerSuffixBits = bits; } + void AddPointerSuffixBits(uint8_t bits) { m_pointerSuffixBits |= bits; } + void AddPointerSuffix(BNPointerSuffix ps) { m_pointerSuffixBits |= PointerSuffixBit(ps); } + bool AddQualifiersToPointerChild(bool cnst, bool vltl); void SetReturnTypeConfidence(uint8_t c) { m_returnTypeConfidence = c; } + void SetCallingConventionName(BNCallingConventionName cc); + void SetNTRType(BNNamedTypeReferenceClass cls); + void SetImplicitThisParameter(DemangledTypeNode type); - // Named type reference operations - void SetNTR(BNNamedTypeReferenceClass cls, _STD_VECTOR<_STD_STRING> nameSegments); - void SetNTR(BNNamedTypeReferenceClass cls, const BN::QualifiedName& name); - - // String formatting + void AppendString(_STD_STRING& out, BN::Platform* platform) const; _STD_STRING GetString() const; - _STD_STRING GetStringBeforeName() const; - _STD_STRING GetStringAfterName() const; - _STD_STRING GetTypeAndName(const BN::QualifiedName& name) const; + _STD_STRING GetString(BN::Platform* platform) const; + _STD_STRING GetStringBeforeName(BN::Platform* platform) const; + _STD_STRING GetStringAfterName(BN::Platform* platform) const; + _STD_STRING GetTypeAndName(const StringList& name) const; + _STD_STRING GetTypeAndName(const StringList& name, BN::Platform* platform) const; - // Conversion to real Type - BN::Ref Finalize() const; + BN::Ref Finalize(BN::Platform* platform = nullptr) const; private: - BNTypeClass m_typeClass; - size_t m_width; - size_t m_alignment; - bool m_const; - bool m_volatile; - bool m_signed; - bool m_hasTemplateArgs; - BNNameType m_nameType; - _STD_SET m_pointerSuffix; - _STD_STRING m_altName; + struct VoidPayload {}; + struct BoolPayload {}; + struct VarArgsPayload {}; - // Named type ref data - BNNamedTypeReferenceClass m_ntrClass; - std::shared_ptr<_STD_VECTOR<_STD_STRING>> m_nameSegments; + struct IntegerPayload + { + size_t width = 0; + WidthKind widthKind = FixedWidth; + bool isSigned = false; + _STD_STRING altName; + }; + + struct FloatPayload + { + size_t width = 0; + _STD_STRING altName; + }; - // Child type (for pointer/array/function return) - std::shared_ptr m_childType; - BNReferenceType m_pointerReference; - uint64_t m_elements; + struct WideCharPayload + { + size_t width = 0; + _STD_STRING altName; + }; + + struct PointerPayload + { + NodeRef childType; + BNReferenceType referenceType = PointerReferenceType; + }; + + struct MemberPointerPayload + { + NodeRef childType; + DemangledQualifiedName ownerName; + bool parenthesized = false; + }; + + struct ArrayPayload + { + NodeRef childType; + uint64_t elements = 0; + }; - // Function params - _STD_VECTOR m_params; + struct FunctionPayload + { + NodeRef returnType; + _STD_VECTOR params; + NodeRef implicitThisParameterType; + BNCallingConventionName callingConventionName = NoCallingConvention; + }; + + struct NamedTypePayload + { + BNNamedTypeReferenceClass ntrClass = UnknownNamedTypeClass; + DemangledQualifiedName name; + size_t width = 0; + WidthKind widthKind = FixedWidth; + bool isSigned = false; + }; + + struct PostfixPayload + { + NodeRef childType; + _STD_STRING suffix; + NodeRef suffixType; + }; + + using Payload = std::variant< + VoidPayload, + BoolPayload, + IntegerPayload, + FloatPayload, + WideCharPayload, + VarArgsPayload, + PointerPayload, + MemberPointerPayload, + ArrayPayload, + FunctionPayload, + NamedTypePayload, + PostfixPayload>; + + bool HasUndeterminedTopLevelSize() const; + uint8_t GetValueConfidence() const; + BNTypeClass GetPayloadClass() const; + NodeRef GetPrimaryChild() const; + static size_t ResolveWidth(size_t width, WidthKind widthKind, BN::Platform* platform = nullptr); + + BNNameType m_nameType; + uint8_t m_pointerSuffixBits; uint8_t m_returnTypeConfidence; + bool m_const; + bool m_volatile; + Payload m_payload; // Helpers for string formatting - _STD_STRING GetModifierString() const; - _STD_STRING GetPointerSuffixString() const; - void AppendBeforeName(_STD_STRING& out, const DemangledTypeNode* parentType = nullptr) const; - void AppendAfterName(_STD_STRING& out, const DemangledTypeNode* parentType = nullptr) const; + static uint8_t PointerSuffixBit(BNPointerSuffix ps); + void AddPointerSuffixes(BN::TypeBuilder& tb, bool omitPtr64 = true) const; + bool HasPostfixType() const; + void AppendPostfixType(_STD_STRING& out, BN::Platform* platform) const; + void AppendModifiers(_STD_STRING& out) const; + void AppendPointerSuffix(_STD_STRING& out) const; + static void AppendNamePartList(_STD_STRING& out, const DemangledQualifiedName& name, + BN::Platform* platform); + void AppendTypeName(_STD_STRING& out, BN::Platform* platform) const; + void AppendBeforeName(_STD_STRING& out, const DemangledTypeNode* parentType, BN::Platform* platform) const; + void AppendAfterName(_STD_STRING& out, const DemangledTypeNode* parentType, BN::Platform* platform) const; }; diff --git a/demangler/msvc/CMakeLists.txt b/demangler/msvc/CMakeLists.txt index b125599168..3536c899ab 100644 --- a/demangler/msvc/CMakeLists.txt +++ b/demangler/msvc/CMakeLists.txt @@ -5,7 +5,9 @@ project(demangle_msvc) file(GLOB SOURCES CONFIGURE_DEPENDS *.cpp *.c - *.h) + *.h + ../gnu3/demangled_type_node.cpp + ../gnu3/demangled_type_node.h) if(DEMO) add_library(${PROJECT_NAME} STATIC ${SOURCES}) diff --git a/demangler/msvc/demangle_msvc.cpp b/demangler/msvc/demangle_msvc.cpp index 412ed96080..64956fd6d8 100644 --- a/demangler/msvc/demangle_msvc.cpp +++ b/demangler/msvc/demangle_msvc.cpp @@ -16,195 +16,391 @@ // See https://llvm.org/LICENSE.txt for license information. #include "demangle_msvc.h" +#include "unicode.h" +#include #include +#include +#include #ifdef BINARYNINJACORE_LIBRARY using namespace BinaryNinjaCore; -#define GetClass GetTypeClass #else using namespace BinaryNinja; using namespace std; #endif -#define MAX_DEMANGLE_LENGTH 4096 +// The largest observed depth in a real-world corpus of roughly 200k MSVC symbols was 54. +static constexpr size_t MAX_DEMANGLE_NESTING_DEPTH = 256; +static constexpr size_t MAX_ENCODED_NUMBER_HEX_DIGITS = 16; +static constexpr size_t MAX_BACKREFS = 10; -Demangle::Reader::Reader(string data) +static int64_t EncodedNumberToInt64(uint64_t magnitude, bool negative) { - m_data = data; - //Check for non-ascii characters - for (auto a : m_data) + constexpr auto int64Max = static_cast(std::numeric_limits::max()); + constexpr auto int64MinMagnitude = int64Max + 1; + + if (!negative) { - if (a < 0x20 || a > 0x7e) - throw DemangleException(); + if (magnitude > int64Max) + throw DemangleException("Invalid encoded number"); + return static_cast(magnitude); } + + if (magnitude > int64MinMagnitude) + throw DemangleException("Invalid encoded number"); + if (magnitude == int64MinMagnitude) + return std::numeric_limits::min(); + return -static_cast(magnitude); +} + +static _STD_STRING FormatEncodedNumberLiteral(uint64_t magnitude, bool negative) +{ + if (negative) + return "-" + to_string(magnitude); + return to_string(magnitude); } +// Define MSVC_DEMANGLE_DEBUG to enable trace logging +#ifdef MSVC_DEMANGLE_DEBUG +#define MSVC_TRACE(...) LogTraceF(__VA_ARGS__) +#else +#define MSVC_TRACE(...) do {} while(0) +#endif -string Demangle::Reader::PeekString(size_t count) +_STD_STRING Demangle::Reader::ReadString(size_t count) { if (count > Length()) throw DemangleException(); - return m_data.substr(0, count); + _STD_STRING out(m_ptr, count); + m_ptr += count; + return out; } -char Demangle::Reader::Peek() +_STD_STRING Demangle::Reader::ReadUntil(char sentinel) { - if (1 > Length()) + const char* found = static_cast(memchr(m_ptr, sentinel, m_end - m_ptr)); + if (!found) throw DemangleException(); - return (char)m_data[0]; + size_t count = found - m_ptr; + _STD_STRING out = ReadString(count); + Consume(); // sentinel + return out; } -const char* Demangle::Reader::GetRaw() +DemangledTypeNode::NodeRef Demangle::BackrefList::GetTypeBackrefRef(size_t reference) { - return m_data.c_str(); + if (reference < typeList.size() && typeList[reference]) + return typeList[reference]; + throw DemangleException(_STD_STRING("Backref too large " + std::to_string(reference))); } -char Demangle::Reader::Read() +DemangledNamePart::Ref Demangle::BackrefList::GetNameBackrefRef(size_t reference) { - if (1 > Length()) - throw DemangleException(); - char out = m_data[0]; - m_data = m_data.substr(1); - return out; + if (reference < nameList.size() && nameList[reference]) + return nameList[reference]; + MSVC_TRACE("type: {} - Backref too large: {}/{}", fmt::ptr(this), nameList.size(), reference); + throw DemangleException(_STD_STRING("Backref too large " + std::to_string(reference))); } -string Demangle::Reader::ReadString(size_t count) +const DemangledTypeNode& Demangle::BackrefList::GetTypeBackref(size_t reference) { - if (count > Length()) - throw DemangleException(); - string out = m_data.substr(0, count); - m_data = m_data.substr(count + 1); - return out; + return *GetTypeBackrefRef(reference); } -string Demangle::Reader::ReadUntil(char sentinal) +const DemangledNamePart& Demangle::BackrefList::GetNameBackref(size_t reference) { - size_t pos = m_data.find_first_of(sentinal); - if (pos == string::npos) - throw DemangleException(); - return ReadString(pos); + return *GetNameBackrefRef(reference); } -void Demangle::Reader::Consume(size_t count) +DemangledTypeNode::NodeRef Demangle::BackrefList::PushTypeBackref(DemangledTypeNode::NodeRef t) { - if (count > Length()) - throw DemangleException(); - m_data = m_data.substr(count); + if (!t) + return nullptr; + if (typeList.size() >= MAX_BACKREFS) + return nullptr; + typeList.push_back(t); + return t; } -size_t Demangle::Reader::Length() +DemangledTypeNode::NodeRef Demangle::BackrefList::PushTypeBackref(const DemangledTypeNode& t) { - return m_data.length(); + if (typeList.size() < MAX_BACKREFS) + return PushTypeBackref(DemangledTypeNode::CreateSharedCopy(t)); + return nullptr; } -const TypeBuilder& Demangle::BackrefList::GetTypeBackref(size_t reference) +DemangledTypeNode::NodeRef Demangle::BackrefList::PushTypeBackref(DemangledTypeNode&& t) { - if (reference < typeList.size()) - return typeList[reference]; - // LogDebug("type: %llx - : %d/%d\n", this, typeList.size(), reference); - throw DemangleException(string("Backref too large " + std::to_string(reference))); + if (typeList.size() < MAX_BACKREFS) + return PushTypeBackref(DemangledTypeNode::CreateShared(std::move(t))); + return nullptr; } -string Demangle::BackrefList::GetStringBackref(size_t reference) +DemangledNamePart::Ref Demangle::BackrefList::PushNameBackref(DemangledNamePart::Ref t) { - // LogDebug("type: %llx - ref: %d\n", this, reference); - if (reference < nameList.size()) - return nameList[reference]; - LogDebug("type: %p - Backref too large: %zu/%zu\n", this, nameList.size(), reference); - throw DemangleException(string("Backref too large " + std::to_string(reference))); + if (!t) + return nullptr; + MSVC_TRACE("this: {} - Backref: {}", fmt::ptr(this), nameList.size()); + for (const auto& name : nameList) + if (name && ((name == t) || name->IsStructurallyEqual(*t))) + return name; + if (nameList.size() < MAX_BACKREFS) + { + nameList.push_back(t); + return t; + } + return nullptr; } -void Demangle::BackrefList::PushTypeBackref(TypeBuilder t) +DemangledNamePart::Ref Demangle::BackrefList::PushNameBackref(const DemangledNamePart& t) { - // LogDebug("this: %llx - TypeBackref: %lld %s\n", this, nameList.size(), t.GetString().c_str()); - if (typeList.size() <= 9) - typeList.push_back(t); + MSVC_TRACE("this: {} - Backref: {}", fmt::ptr(this), nameList.size()); + for (const auto& name : nameList) + if (name && name->IsStructurallyEqual(t)) + return name; + if (nameList.size() < MAX_BACKREFS) + { + auto ref = DemangledNamePart::CreateSharedCopy(t); + nameList.push_back(ref); + return ref; + } + return nullptr; } -void Demangle::BackrefList::PushStringBackref(string& s) +DemangledNamePart::Ref Demangle::BackrefList::PushNameBackref(DemangledNamePart&& t) { - if (s.size() > MAX_DEMANGLE_LENGTH) - throw DemangleException(); - LogDebug("this: %p - Backref: %zu - %s\n", this, nameList.size(), s.c_str()); + MSVC_TRACE("this: {} - Backref: {}", fmt::ptr(this), nameList.size()); for (const auto& name : nameList) - if (name == s) - return; - nameList.push_back(s); + if (name && name->IsStructurallyEqual(t)) + return name; + if (nameList.size() < MAX_BACKREFS) + { + auto ref = DemangledNamePart::CreateShared(std::move(t)); + nameList.push_back(ref); + return ref; + } + return nullptr; } -void Demangle::BackrefList::PushFrontStringBackref(string& s) +DemangledNamePart::Ref Demangle::BackrefList::PushTemplateSpecialization(DemangledNamePart::Ref t) { - if (s.size() > MAX_DEMANGLE_LENGTH) - throw DemangleException(); - // LogDebug("this: %llx - F-Backref: %lld - %s\n", this, nameList.size(), s.c_str()); - nameList.insert(nameList.begin(), s); + if (!t) + return nullptr; + templateList.push_back(t); + return t; +} + + +DemangledNamePart::Ref Demangle::BackrefList::PushTemplateSpecialization(const DemangledNamePart& t) +{ + return PushTemplateSpecialization(DemangledNamePart::CreateSharedCopy(t)); +} + + +DemangledNamePart::Ref Demangle::BackrefList::PushTemplateSpecialization(DemangledNamePart&& t) +{ + return PushTemplateSpecialization(DemangledNamePart::CreateShared(std::move(t))); +} + + +Demangle::BackrefContextSwitch::BackrefContextSwitch(BackrefList& active): active(active) +{ + Swap(active, saved); +} + + +Demangle::BackrefContextSwitch::~BackrefContextSwitch() +{ + Swap(active, saved); +} + + +void Demangle::BackrefContextSwitch::Swap(BackrefList& left, BackrefList& right) +{ + std::swap(left.typeList, right.typeList); + std::swap(left.nameList, right.nameList); + std::swap(left.templateList, right.templateList); } -Demangle::Demangle(Architecture* arch, string mangledName) : - reader(mangledName), + +Demangle::Demangle(Architecture* arch, _STD_STRING mangledName) : + m_mangledName(std::move(mangledName)), + m_reader(m_mangledName), m_arch(arch), m_platform(nullptr), m_view(nullptr) { - m_logger = LogRegistry::CreateLogger("MSVCDemangle"); - //m_logger->ResetIndent(); } -Demangle::Demangle(Ref platform, string mangledName) : - reader(mangledName), - m_arch(platform->GetArchitecture()), - m_platform(platform), +Demangle::Demangle(Ref platform, _STD_STRING mangledName) : + m_mangledName(std::move(mangledName)), + m_reader(m_mangledName), + m_arch(nullptr), + m_platform(std::move(platform)), m_view(nullptr) { - m_logger = LogRegistry::CreateLogger("MSVCDemangle"); - //m_logger->ResetIndent(); } -Demangle::Demangle(Ref view, string mangledName) : - reader(mangledName), - m_view(view) +Demangle::Demangle(Ref view, _STD_STRING mangledName) : + m_mangledName(std::move(mangledName)), + m_reader(m_mangledName), + m_arch(nullptr), + m_platform(nullptr), + m_view(std::move(view)) { - m_platform = view->GetDefaultPlatform(); - if (!m_platform) - throw DemangleException(); - m_arch = m_platform->GetArchitecture(); - m_logger = LogRegistry::CreateLogger("MSVCDemangle"); - //m_logger->ResetIndent(); } -TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, QualifiedName& name) +Demangle::NestingGuard::NestingGuard(Demangle& demangler) : m_demangler(demangler) +{ + m_demangler.m_nestingDepth++; + if (m_demangler.m_nestingDepth > MAX_DEMANGLE_NESTING_DEPTH) + { + m_demangler.m_nestingDepth--; + throw DemangleException("Detected adversarial mangled string"); + } +} + + +Demangle::NestingGuard::~NestingGuard() +{ + m_demangler.m_nestingDepth--; +} + + +void Demangle::Reset(Architecture* arch, const _STD_STRING& mangledName) +{ + m_mangledName = mangledName; + m_reader.Reset(m_mangledName); + m_backrefList.Clear(); + m_arch = arch; + m_platform = nullptr; + m_view = nullptr; + m_templateParamDepth = 0; + m_nestingDepth = 0; +} + + +void Demangle::RewriteTemplateBackrefName(NameList& typeName, const BackrefList& nameBackrefList) +{ + if (typeName.empty()) + return; + + DemangledNamePart& baseName = typeName.back(); + if (baseName.HasTemplateArguments()) + return; + _STD_STRING base = baseName.GetBase(); + + for (const auto & it : std::views::reverse(nameBackrefList.templateList)) + { + if (!it) + continue; + const DemangledNamePart& candidate = *it; + if (!candidate.HasTemplateArguments()) + continue; + if (candidate.GetBase() != base) + continue; + baseName = candidate; + return; + } +} + +_STD_STRING Demangle::FormatTypeAndName(const DemangledTypeNode& type, const NameList& name) const +{ + StringList nameSegments = FinalizeNameList(name); + if (type.GetNameType() == OperatorReturnTypeNameType) + { + Ref finalizedType = type.Finalize(m_platform.GetPtr()); + if (finalizedType) + return finalizedType->GetTypeAndName(QualifiedName(nameSegments)); + } + return type.GetTypeAndName(nameSegments); +} + +DemangledTypeNode Demangle::DemangleReferencedSymbolValue(BackrefList& varList) +{ + // Match LLVM's TemplateParameterReferenceNode parsing: referenced-symbol + // non-type template arguments are parsed in the active backref context, so + // later template arguments may refer to names/types introduced inside the + // referenced symbol. + BackrefList symbolBackrefs = varList; + + auto context = DemangleSymbol(symbolBackrefs); + varList = symbolBackrefs; + _STD_STRING value = "&" + FormatTypeAndName(context.type, context.name); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{value}); +} + + +DemangledTypeNode Demangle::DemangleAutoNonTypeTemplateParam(BackrefList& varList) +{ + if (m_reader.ConsumeIf('0')) + { + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{DecodeEncodedNumberLiteral()}); + } + if (m_reader.ConsumeIf('1')) + { + return DemangleReferencedSymbolValue(varList); + } + throw DemangleException(); +} + + +DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, + bool includeImplicitThis, DemangledTypeNode::NodeRef* outTypeBackref, TypeBackrefMode typeBackrefMode) { - m_logger->LogDebug("%s: '%s' - %lu\n", __FUNCTION__, reader.GetRaw(), varList.nameList.size()); - TypeBuilder newType; - bool _const = false, _volatile = false, isMember = false; //TODO: use this info, _signed = false; - BNReferenceType refType; + NestingGuard nestingGuard(*this); + MSVC_TRACE("{}: '{}' - {}", __FUNCTION__, m_reader.GetRaw(), varList.nameList.size()); + if (outTypeBackref) + *outTypeBackref = nullptr; + auto recordTypeBackref = [&](const DemangledTypeNode& type) -> DemangledTypeNode::NodeRef { + if (isReturn || typeBackrefMode == TypeBackrefMode::SuppressTopLevel) + return nullptr; + auto ref = varList.PushTypeBackref(type); + if (outTypeBackref) + *outTypeBackref = ref; + return ref; + }; + DemangledTypeNode newType; + bool _const = false, _volatile = false; + BNReferenceType refType = PointerReferenceType; BNTypeClass typeClass = IntegerTypeClass; - BNStructureVariant structType; - QualifiedName varName; - QualifiedName typeName; + BNStructureVariant structType = StructStructureType; + NameList typeName; BNNameType classFunctionType; - - size_t width; - char elm = reader.Read(); - switch (elm) + size_t width = 0; + bool _enumSigned = false; + auto demangleArrayExtents = [this]() -> _STD_VECTOR { + uint64_t dimensionCount = DecodeEncodedUnsignedNumber(); + if (dimensionCount > static_cast(m_reader.Length())) + throw DemangleException("Array dimension count is too large"); + + _STD_VECTOR elementList; + for (uint64_t i = 0; i < dimensionCount; i++) + { + uint64_t element = DecodeEncodedUnsignedNumber(); + elementList.push_back(element); + } + return elementList; + }; + switch (char elm = m_reader.Read()) { case 'A': typeClass = PointerTypeClass; @@ -218,18 +414,18 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali _const = false; _volatile = true; break; - case 'C': return TypeBuilder::IntegerType(1, true); - case 'D': return TypeBuilder::IntegerType(1, true); - case 'E': return TypeBuilder::IntegerType(1, false); - case 'F': return TypeBuilder::IntegerType(2, true); - case 'G': return TypeBuilder::IntegerType(2, false); - case 'H': return TypeBuilder::IntegerType(4, true); - case 'I': return TypeBuilder::IntegerType(4, false); - case 'J': return TypeBuilder::IntegerType(4, true, "long"); - case 'K': return TypeBuilder::IntegerType(4, false, "unsigned long"); - case 'M': return TypeBuilder::FloatType(4); - case 'N': return TypeBuilder::FloatType(8); - case 'O': return TypeBuilder::FloatType(10, "long double"); + case 'C': return DemangledTypeNode::IntegerType(1, true, "signed char"); + case 'D': return DemangledTypeNode::IntegerType(1, true); + case 'E': return DemangledTypeNode::IntegerType(1, false); + case 'F': return DemangledTypeNode::IntegerType(2, true); + case 'G': return DemangledTypeNode::IntegerType(2, false); + case 'H': return DemangledTypeNode::IntegerType(4, true); + case 'I': return DemangledTypeNode::IntegerType(4, false); + case 'J': return DemangledTypeNode::IntegerType(4, true, "long"); + case 'K': return DemangledTypeNode::IntegerType(4, false, "unsigned long"); + case 'M': return DemangledTypeNode::FloatType(4); + case 'N': return DemangledTypeNode::FloatType(8); + case 'O': return DemangledTypeNode::FloatType(10, "long double"); case 'P': // * typeClass = PointerTypeClass; refType = PointerReferenceType; @@ -259,111 +455,205 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali case 'V': typeClass = StructureTypeClass; structType = ClassStructureType; break; case 'W': typeClass = EnumerationTypeClass; - switch (reader.Read()) + switch (m_reader.Read()) { - case '0': width = 1; /* TODO: use these _signed = true; */ break; - case '1': width = 1; /* TODO: use these _signed = false; */ break; - case '2': width = 2; /* TODO: use these _signed = true; */ break; - case '3': width = 2; /* TODO: use these _signed = false; */ break; - case '4': width = 4; /* TODO: use these _signed = true; */ break; - case '5': width = 4; /* TODO: use these _signed = false; */ break; - case '6': width = 4; /* TODO: use these _signed = true; */ break; - case '7': width = 4; /* TODO: use these _signed = false; */ break; + case '0': width = 1; _enumSigned = true; break; + case '1': width = 1; _enumSigned = false; break; + case '2': width = 2; _enumSigned = true; break; + case '3': width = 2; _enumSigned = false; break; + case '4': width = 4; _enumSigned = true; break; + case '5': width = 4; _enumSigned = false; break; + case '6': width = 4; _enumSigned = true; break; + case '7': width = 4; _enumSigned = false; break; default: throw DemangleException(); } break; - case 'X': return TypeBuilder::VoidType(); break; + case 'X': return DemangledTypeNode::VoidType(); break; case 'Y': - throw DemangleException(); //TODO: handle cointerfaces - case 'Z': return TypeBuilder::VarArgsType(); break; + { + // Multi-dimensional array type: Y...@ + _STD_VECTOR elementList = demangleArrayExtents(); + newType = DemangleVarType(varList, false); + for (uint64_t i : std::views::reverse(elementList)) + { + newType = DemangledTypeNode::ArrayType(std::move(newType), i); + } + recordTypeBackref(newType); + return newType; + } + case 'Z': return DemangledTypeNode::VarArgsType(); + case '?': + { + char next = m_reader.PeekOr(); + if (next >= '0' && next <= '9') + { + size_t reference = m_reader.Read() - '0'; + if (reference < varList.typeList.size() && varList.typeList[reference]) + { + auto ref = varList.typeList[reference]; + if (outTypeBackref) + *outTypeBackref = ref; + return *ref; + } + // Legacy fallback: old generated symbols used `?2` here for + // a deduced-auto placeholder before clang/MSVC settled on + // the explicit `?@` spelling handled below. + if (reference == 2) + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"auto"}); + throw DemangleException(_STD_STRING("Backref too large " + std::to_string(reference))); + } + if (next != '<') + throw DemangleException(); + + _STD_STRING placeholder = m_reader.ReadUntil('@'); + m_reader.ConsumeIf('@'); + if (placeholder == "") + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"auto"}); + if (placeholder == "") + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"decltype(auto)"}); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{placeholder}); + } case '_': - switch (reader.Read()) + switch (m_reader.Read()) { - case 'D': newType = TypeBuilder::IntegerType(1, true); break; - case 'E': newType = TypeBuilder::IntegerType(1, false); break; - case 'F': newType = TypeBuilder::IntegerType(2, true); break; - case 'G': newType = TypeBuilder::IntegerType(2, false); break; - case 'H': newType = TypeBuilder::IntegerType(4, true); break; - case 'I': newType = TypeBuilder::IntegerType(4, false); break; - case 'J': newType = TypeBuilder::IntegerType(8, true); break; - case 'K': newType = TypeBuilder::IntegerType(8, false); break; - case 'L': newType = TypeBuilder::IntegerType(16, true); break; - case 'M': newType = TypeBuilder::IntegerType(16, false); break; - case 'N': newType = TypeBuilder::BoolType(); break; + case 'D': newType = DemangledTypeNode::IntegerType(1, true); break; + case 'E': newType = DemangledTypeNode::IntegerType(1, false); break; + case 'F': newType = DemangledTypeNode::IntegerType(2, true); break; + case 'G': newType = DemangledTypeNode::IntegerType(2, false); break; + case 'H': newType = DemangledTypeNode::IntegerType(4, true); break; + case 'I': newType = DemangledTypeNode::IntegerType(4, false); break; + case 'J': newType = DemangledTypeNode::IntegerType(8, true); break; + case 'K': newType = DemangledTypeNode::IntegerType(8, false); break; + case 'L': newType = DemangledTypeNode::IntegerType(16, true); break; + case 'M': newType = DemangledTypeNode::IntegerType(16, false); break; + case 'N': newType = DemangledTypeNode::BoolType(); break; case 'O': { - QualifiedName name; - //m_logger->Indent(); - auto childType = DemangleVarType(varList, false, name); - //m_logger->Dedent(); - newType = TypeBuilder::ArrayType(childType.Finalize(), 0); + auto childType = DemangleVarType(varList, false); + newType = DemangledTypeNode::ArrayType(std::move(childType), 0); break; } - case 'S': newType = TypeBuilder::IntegerType(2, true, "char16_t"); break; - case 'U': newType = TypeBuilder::IntegerType(4, true, "char32_t"); break; - case 'W': newType = TypeBuilder::IntegerType(2, false, "wchar_t"); break; - case 'X': typeClass = StructureTypeClass; structType = ClassStructureType; break; //Coclass - case 'Y': typeClass = StructureTypeClass; structType = ClassStructureType; break; //Cointerface + case 'S': newType = DemangledTypeNode::WideCharType(2, "char16_t"); break; + case 'U': newType = DemangledTypeNode::WideCharType(4, "char32_t"); break; + case 'W': newType = DemangledTypeNode::WideCharType(2, "wchar_t"); break; + // `_P` (auto) and `_T` (decltype(auto)) are placeholder return-type + // encodings. For normal source code they are deduced at the function + // definition and mangled as the deduced type — you will not see `_P` + // or `_T` from something like `auto foo() { return 0; }` (that becomes + // `?foo@@YAHXZ`). They do appear in compiler-emitted symbols for + // function templates whose declared return type is literally `auto` + // or `decltype(auto)` and which are mangled before/without deduction + // settling on a concrete type — e.g. `??$seq@HX@llvm@@YA?A_PH@Z` + // (llvm::seq) or `??$_Get_unwrapped@...@std@@YA?A_T...@Z`. Handle + // them as named-type placeholders so downstream type consumers get + // something sensible (rather than a `` demangle) even though + // the underlying type is not expressible as a Binary Ninja Type. + case 'P': newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"auto"}); break; + case 'Q': newType = DemangledTypeNode::IntegerType(1, true, "char8_t"); break; // C++20 char8_t + case 'T': newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"decltype(auto)"}); break; + // NOTE: `_X` and `_Y` were previously mapped to coclass/cointerface + // here, but those encodings are not emitted by any real toolchain. + // LLVM's MicrosoftDemangle / MicrosoftMangle and Wine's undname + // reimplementation none of them recognize `_X` or `_Y` as type + // codes. Real cointerface is plain `Y@@` (no underscore) at + // the top-level type switch, grouped with T/U/V; coclass has no + // dedicated mangling and is emitted as `V@@` (class). Let + // `_X` / `_Y` fall through to the `default: throw` so malformed + // input is rejected instead of producing a bogus class type. default: throw DemangleException(); } break; case '$': - if (reader.PeekString(2) == "$Q") // && + if (m_reader.ConsumeIf("$Q")) // && { - reader.Consume(2); typeClass = PointerTypeClass; refType = RValueReferenceType; _const = false; _volatile = false; } - else if (reader.PeekString(2) == "$R") // && volatile + else if (m_reader.ConsumeIf("$R")) // && volatile { - reader.Consume(2); typeClass = PointerTypeClass; refType = RValueReferenceType; _const = false; _volatile = true; } - else if (reader.PeekString(2) == "$A") + else if (m_reader.ConsumeIf("$A")) { - reader.Consume(2); - char num = reader.Read(); - if (num == 8) - return DemangleFunction(NoNameType, true, varList); - if (num == '6' || num == '7') - return DemangleFunction(NoNameType, false, varList); + char num = m_reader.Read(); + if (num >= '6' && num <= '9') + { + // For member function types (8/9), skip the class scope marker @@ + if (num == '8' || num == '9') + m_reader.ConsumeIf("@@"); + return DemangleFunction(NoNameType, num >= '7', varList).type; + } throw DemangleException(); } - else if (reader.PeekString(2) == "$C") + else if (m_reader.ConsumeIf("$C")) { - reader.Consume(2); + bool isMember = false; DemangleModifiers(_const, _volatile, isMember); - QualifiedName name; - //m_logger->Indent(); - newType = DemangleVarType(varList, false, name); - //m_logger->Dedent(); + newType = DemangleVarType(varList, isReturn, includeImplicitThis, nullptr, + TypeBackrefMode::SuppressTopLevel); newType.SetConst(_const); newType.SetVolatile(_volatile); + recordTypeBackref(newType); return newType; } - else if (reader.PeekString(2) == "$T") + else if (m_reader.ConsumeIf("$T")) + { + auto t = DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"std::nullptr_t"}); + recordTypeBackref(t); + return t; + } + else if (m_reader.ConsumeIf("$B")) + { + // $$B is a type modifier (managed/const) - strip and parse underlying type + return DemangleVarType(varList, isReturn, includeImplicitThis, outTypeBackref, typeBackrefMode); + } + else if (m_reader.ConsumeIf('0')) + { + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{DecodeEncodedNumberLiteral()}); + } + else if (m_reader.ConsumeIf('D')) + { + // $D - template type alias / anonymous type parameter + return DemangleVarType(varList, isReturn, includeImplicitThis, outTypeBackref, typeBackrefMode); + } + else if (m_reader.ConsumeIf('M')) { - reader.Consume(2); - return TypeBuilder::ValueType("std::nullptr"); + // $M - C++17 `auto` non-type template parameter. + // The encoded type is the deduced type for the following bare + // non-type payload and is not itself printed as a template arg. + DemangleVarType(varList, false); + return DemangleAutoNonTypeTemplateParam(varList); } - else if (reader.Peek() == '0') + else if (char next = m_reader.PeekOr(); next == 'H' || next == 'I' || next == 'J') { - reader.Consume(); - int64_t value; - DemangleNumber(value); - return TypeBuilder::ValueType(to_string(value)); + // $H/$I/$J - member function pointer value as a non-type template + // parameter. Format: $H@; + // $I has two adjustment numbers, $J has three. + char kind = m_reader.Read(); + BackrefList symbolBackrefs = varList; + auto context = DemangleSymbol(symbolBackrefs); + varList = symbolBackrefs; + _STD_STRING value = "{" + FormatTypeAndName(context.type, context.name); + + // Read adjustment number(s) — NOT $-prefixed, just raw numbers. + int adjustments = (kind == 'H') ? 1 : (kind == 'I') ? 2 : 3; + for (int i = 0; i < adjustments; i++) + { + int64_t adj = DecodeEncodedSignedNumber(); + value += "," + to_string(adj); + } + value += "}"; + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{value}); } - else if (reader.Peek() == '1') + else if (m_reader.ConsumeIf('1')) { - reader.Consume(); - auto context = DemangleSymbol(); - return TypeBuilder::PointerType(m_arch, context.type.Finalize()); + return DemangleReferencedSymbolValue(varList); } else throw DemangleException(); @@ -378,9 +668,13 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali case '7': case '8': case '9': - //Make a copy of the item in the backref list. Exit early since we don't want this added to the backref list. - m_logger->LogDebug("Backref %u %lu", elm - '0', varList.typeList.size()); - return varList.GetTypeBackref(elm - '0'); + { + MSVC_TRACE("Backref {} {}", elm - '0', varList.typeList.size()); + auto ref = varList.GetTypeBackrefRef(elm - '0'); + if (outTypeBackref) + *outTypeBackref = ref; + return *ref; + } default: throw DemangleException(); } @@ -389,7 +683,28 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali { case PointerTypeClass: { - switch (reader.Peek()) + if (m_reader.ConsumeIf('6')) + { + auto childType = DemangleFunction(NoNameType, false, varList).type; + newType = DemangledTypeNode::PointerType(std::move(childType), + _const, + _volatile, + refType); + break; + } + if (m_reader.ConsumeIf('8')) + { + NameList ownerName; + DemangleName(ownerName, classFunctionType, varList, true); + RewriteTemplateBackrefName(ownerName, varList); + auto childType = DemangleFunction(NoNameType, true, varList).type; + newType = DemangledTypeNode::MemberPointerType(std::move(childType), + std::move(ownerName), + _const, + _volatile); + break; + } + switch (m_reader.PeekOr()) { case '0': case '1': @@ -397,209 +712,166 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali case '3': case '4': case '5': + case '7': + case '9': throw DemangleException(); - case '6': - { - if (refType != PointerReferenceType) //No references to functions - { - throw DemangleException(); - } - reader.Consume(); - auto childType = DemangleFunction(NoNameType, false, varList); - newType = TypeBuilder::PointerType(m_arch, - childType.Finalize(), - _const, - _volatile, - refType); - break; - } - case '7': //Function pointer - case '9': //Class Function pointer - { - if (refType != PointerReferenceType) //No references to functions - { - throw DemangleException(); - } - reader.Consume(); - auto childType = DemangleFunction(NoNameType, true, varList); - newType = TypeBuilder::PointerType(m_arch, - childType.Finalize(), - _const, - _volatile, - refType); - break; - } - case '8': //Named class function pointer - { - if (refType != PointerReferenceType) //No references to functions - { - throw DemangleException(); - } - reader.Consume(); - DemangleName(name, classFunctionType, varList); - name.push_back(""); - auto childType = DemangleFunction(NoNameType, true, varList); - newType = TypeBuilder::PointerType(m_arch, - childType.Finalize(), - _const, - _volatile, - refType); - break; - } default: // Non-numeric { - m_logger->LogDebug("Demangle pointer subtype: '%s'\n", reader.GetRaw()); - TypeBuilder child; - bool _const2 = false, _volatile2 = false, isMember = false; + MSVC_TRACE("Demangle pointer subtype: '{}'", m_reader.GetRaw()); + DemangledTypeNode child; + bool _const2 = false, _volatile2 = false, localIsMember = false; + NameList ownerName; auto suffix = DemanglePointerSuffix(); - DemangleModifiers(_const2, _volatile2, isMember); - if (reader.Peek() == 'Y') //Multi-dimentional array + ConsumeExtendedModifierPrefix(); + DemangleModifiers(_const2, _volatile2, localIsMember); + if (localIsMember) { - m_logger->LogDebug("Demangle multi-dimentional array"); - int64_t nDimentions; - reader.Consume(); - DemangleNumber(nDimentions); - vector elementList; - while (nDimentions--) - { - int64_t element = 0; - DemangleNumber(element); - elementList.push_back(element); - } - QualifiedName name; - //m_logger->Indent(); - child = DemangleVarType(varList, false, name); - //m_logger->Dedent(); + DemangleName(ownerName, classFunctionType, varList, true); + RewriteTemplateBackrefName(ownerName, varList); + } + if (m_reader.ConsumeIf('Y')) //Multi-dimensions array + { + MSVC_TRACE("Demangle multi-dimensions array"); + _STD_VECTOR elementList = demangleArrayExtents(); + child = DemangleVarType(varList, false); - for (auto i = elementList.rbegin(); i != elementList.rend(); i++) + for (uint64_t i : std::views::reverse(elementList)) { - child = TypeBuilder::ArrayType(child.Finalize(), *i); + child = DemangledTypeNode::ArrayType(std::move(child), i); } } else { - QualifiedName name; - //m_logger->Indent(); - child = DemangleVarType(varList, true, name); - //m_logger->Dedent(); + child = DemangleVarType(varList, true, includeImplicitThis && !localIsMember); } child.SetConst(_const2); child.SetVolatile(_volatile2); - newType = TypeBuilder::PointerType(m_arch, - child.Finalize(), - _const, - _volatile, - refType); - - newType.SetPointerSuffix(suffix); - m_logger->LogDebug("Name: %s\n", newType.GetString().c_str()); + if (localIsMember) + { + newType = DemangledTypeNode::MemberPointerType( + std::move(child), std::move(ownerName), _const, _volatile); + } + else + { + newType = DemangledTypeNode::PointerType(std::move(child), + _const, + _volatile, + refType); + } + + newType.SetPointerSuffixBits(suffix); + MSVC_TRACE("Name: {}", newType.GetString()); break; } } break; } case EnumerationTypeClass: - m_logger->LogDebug("Demangle enumeration\n"); - //m_logger->Indent(); - DemangleName(typeName, classFunctionType, varList); - //m_logger->Dedent(); - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference(EnumNamedTypeClass, typeName), - width, width); + MSVC_TRACE("Demangle enumeration"); + DemangleName(typeName, classFunctionType, varList, true); + newType = DemangledTypeNode::NamedType(EnumNamedTypeClass, typeName, width, _enumSigned); break; case StructureTypeClass: - m_logger->LogDebug("Demangle structure\n"); - //m_logger->Indent(); - DemangleName(typeName, classFunctionType, varList); - //m_logger->Dedent(); + MSVC_TRACE("Demangle structure"); + DemangleName(typeName, classFunctionType, varList, true); + RewriteTemplateBackrefName(typeName, varList); switch (structType) { case ClassStructureType: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - ClassNamedTypeClass, typeName)); + newType = DemangledTypeNode::NamedType(ClassNamedTypeClass, typeName); break; case StructStructureType: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, typeName)); + newType = DemangledTypeNode::NamedType(StructNamedTypeClass, typeName); break; case UnionStructureType: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - UnionNamedTypeClass, typeName)); + newType = DemangledTypeNode::NamedType(UnionNamedTypeClass, typeName); break; default: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - UnknownNamedTypeClass, typeName)); + newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, typeName); break; } break; default: break; } - if (!isReturn) - { - varList.PushTypeBackref(newType); - } + recordTypeBackref(newType); return newType; } - -void Demangle::DemangleNumber(int64_t& num) +Demangle::EncodedNumber Demangle::DecodeEncodedNumber() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - num = 0; - int mult = 1; - if (reader.Peek() == '?') - { - mult = -1; - reader.Consume(); - } + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + bool negative = m_reader.ConsumeIf('?'); + if (m_reader.Length() == 0) + throw DemangleException("Invalid encoded number"); - //The number is decimal 1-10 - if (reader.Peek() >= '0' && reader.Peek() <= '9') + char next = m_reader.PeekOr(); + if (next >= '0' && next <= '9') { - num = mult * (reader.Read() + 1 - '0'); - return; + uint64_t magnitude = static_cast(m_reader.Read() + 1 - '0'); + return {magnitude, negative}; } - else + + uint64_t magnitude = 0; + size_t digitCount = 0; + while (!m_reader.ConsumeIf('@')) { - //The number is hexidecimal - string strnum = reader.ReadUntil('@'); - for (auto a : strnum) - { - num *= 16; - if (a >= 'A' && a <= 'P') - num += a - 'A'; - else - throw DemangleException(); - } - num *= mult; - return; + char ch = m_reader.Read(); + if (ch < 'A' || ch > 'P') + throw DemangleException("Invalid encoded number"); + if (digitCount >= MAX_ENCODED_NUMBER_HEX_DIGITS) + throw DemangleException("Invalid encoded number"); + magnitude = (magnitude << 4) | static_cast(ch - 'A'); + digitCount++; } + + return {magnitude, negative}; +} + +int64_t Demangle::DecodeEncodedSignedNumber() +{ + EncodedNumber number = DecodeEncodedNumber(); + return EncodedNumberToInt64(number.magnitude, number.negative); +} + +uint64_t Demangle::DecodeEncodedUnsignedNumber() +{ + EncodedNumber number = DecodeEncodedNumber(); + if (number.negative) + throw DemangleException("Invalid encoded number"); + return number.magnitude; +} + +int32_t Demangle::DecodeEncodedSignedInt32() +{ + uint32_t lowBits = static_cast(DecodeEncodedUnsignedNumber()); + if ((lowBits & 0x80000000U) != 0) + return static_cast(static_cast(lowBits) - 0x100000000LL); + return static_cast(lowBits); } +_STD_STRING Demangle::DecodeEncodedNumberLiteral() +{ + EncodedNumber number = DecodeEncodedNumber(); + return FormatEncodedNumberLiteral(number.magnitude, number.negative); +} -void Demangle::DemangleChar(char& ch) + +char Demangle::DemangleChar() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); // Basic char is just the char - if (reader.Peek() != '?') - { - ch = reader.Peek(); - reader.Consume(); - return; - } - reader.Consume(); + if (!m_reader.ConsumeIf('?')) + return m_reader.Read(); // Hex char is ?$XX for 2 hex digits XX - if (reader.Peek() == '$') + if (m_reader.ConsumeIf('$')) { - m_logger->LogDebug("%s: Hex digit '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: Hex digit '{}'", __FUNCTION__, m_reader.GetRaw()); - reader.Consume(); - char c1 = reader.Peek(); - reader.Consume(); - char c2 = reader.Peek(); - reader.Consume(); + char c1 = m_reader.Read(); + char c2 = m_reader.Read(); if (c1 < 'A' || c1 > 'P') throw DemangleException("Invalid character"); @@ -609,224 +881,256 @@ void Demangle::DemangleChar(char& ch) uint8_t b1 = c1 - 'A'; uint8_t b2 = c2 - 'A'; - ch = (char)((b1 << 4) | b2); - return; + return static_cast((b1 << 4) | b2); } - m_logger->LogDebug("%s: Table lookup '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: Table lookup '{}'", __FUNCTION__, m_reader.GetRaw()); // Otherwise it's a lookup based on some big table // Thanks, LLVM! - switch (reader.Peek()) - { - case '0': ch = ','; reader.Consume(); return; - case '1': ch = '/'; reader.Consume(); return; - case '2': ch = '\\'; reader.Consume(); return; - case '3': ch = ':'; reader.Consume(); return; - case '4': ch = '.'; reader.Consume(); return; - case '5': ch = ' '; reader.Consume(); return; - case '6': ch = '\n'; reader.Consume(); return; - case '7': ch = '\t'; reader.Consume(); return; - case '8': ch = '\''; reader.Consume(); return; - case '9': ch = '-'; reader.Consume(); return; - case 'a': ch = '\xE1'; reader.Consume(); return; - case 'b': ch = '\xE2'; reader.Consume(); return; - case 'c': ch = '\xE3'; reader.Consume(); return; - case 'd': ch = '\xE4'; reader.Consume(); return; - case 'e': ch = '\xE5'; reader.Consume(); return; - case 'f': ch = '\xE6'; reader.Consume(); return; - case 'g': ch = '\xE7'; reader.Consume(); return; - case 'h': ch = '\xE8'; reader.Consume(); return; - case 'i': ch = '\xE9'; reader.Consume(); return; - case 'j': ch = '\xEA'; reader.Consume(); return; - case 'k': ch = '\xEB'; reader.Consume(); return; - case 'l': ch = '\xEC'; reader.Consume(); return; - case 'm': ch = '\xED'; reader.Consume(); return; - case 'n': ch = '\xEE'; reader.Consume(); return; - case 'o': ch = '\xEF'; reader.Consume(); return; - case 'p': ch = '\xF0'; reader.Consume(); return; - case 'q': ch = '\xF1'; reader.Consume(); return; - case 'r': ch = '\xF2'; reader.Consume(); return; - case 's': ch = '\xF3'; reader.Consume(); return; - case 't': ch = '\xF4'; reader.Consume(); return; - case 'u': ch = '\xF5'; reader.Consume(); return; - case 'v': ch = '\xF6'; reader.Consume(); return; - case 'w': ch = '\xF7'; reader.Consume(); return; - case 'x': ch = '\xF8'; reader.Consume(); return; - case 'y': ch = '\xF9'; reader.Consume(); return; - case 'z': ch = '\xFA'; reader.Consume(); return; - case 'A': ch = '\xC1'; reader.Consume(); return; - case 'B': ch = '\xC2'; reader.Consume(); return; - case 'C': ch = '\xC3'; reader.Consume(); return; - case 'D': ch = '\xC4'; reader.Consume(); return; - case 'E': ch = '\xC5'; reader.Consume(); return; - case 'F': ch = '\xC6'; reader.Consume(); return; - case 'G': ch = '\xC7'; reader.Consume(); return; - case 'H': ch = '\xC8'; reader.Consume(); return; - case 'I': ch = '\xC9'; reader.Consume(); return; - case 'J': ch = '\xCA'; reader.Consume(); return; - case 'K': ch = '\xCB'; reader.Consume(); return; - case 'L': ch = '\xCC'; reader.Consume(); return; - case 'M': ch = '\xCD'; reader.Consume(); return; - case 'N': ch = '\xCE'; reader.Consume(); return; - case 'O': ch = '\xCF'; reader.Consume(); return; - case 'P': ch = '\xD0'; reader.Consume(); return; - case 'Q': ch = '\xD1'; reader.Consume(); return; - case 'R': ch = '\xD2'; reader.Consume(); return; - case 'S': ch = '\xD3'; reader.Consume(); return; - case 'T': ch = '\xD4'; reader.Consume(); return; - case 'U': ch = '\xD5'; reader.Consume(); return; - case 'V': ch = '\xD6'; reader.Consume(); return; - case 'W': ch = '\xD7'; reader.Consume(); return; - case 'X': ch = '\xD8'; reader.Consume(); return; - case 'Y': ch = '\xD9'; reader.Consume(); return; - case 'Z': ch = '\xDA'; reader.Consume(); return; + switch (m_reader.Read()) + { + case '0': return ','; + case '1': return '/'; + case '2': return '\\'; + case '3': return ':'; + case '4': return '.'; + case '5': return ' '; + case '6': return '\n'; + case '7': return '\t'; + case '8': return '\''; + case '9': return '-'; + case 'a': return '\xE1'; + case 'b': return '\xE2'; + case 'c': return '\xE3'; + case 'd': return '\xE4'; + case 'e': return '\xE5'; + case 'f': return '\xE6'; + case 'g': return '\xE7'; + case 'h': return '\xE8'; + case 'i': return '\xE9'; + case 'j': return '\xEA'; + case 'k': return '\xEB'; + case 'l': return '\xEC'; + case 'm': return '\xED'; + case 'n': return '\xEE'; + case 'o': return '\xEF'; + case 'p': return '\xF0'; + case 'q': return '\xF1'; + case 'r': return '\xF2'; + case 's': return '\xF3'; + case 't': return '\xF4'; + case 'u': return '\xF5'; + case 'v': return '\xF6'; + case 'w': return '\xF7'; + case 'x': return '\xF8'; + case 'y': return '\xF9'; + case 'z': return '\xFA'; + case 'A': return '\xC1'; + case 'B': return '\xC2'; + case 'C': return '\xC3'; + case 'D': return '\xC4'; + case 'E': return '\xC5'; + case 'F': return '\xC6'; + case 'G': return '\xC7'; + case 'H': return '\xC8'; + case 'I': return '\xC9'; + case 'J': return '\xCA'; + case 'K': return '\xCB'; + case 'L': return '\xCC'; + case 'M': return '\xCD'; + case 'N': return '\xCE'; + case 'O': return '\xCF'; + case 'P': return '\xD0'; + case 'Q': return '\xD1'; + case 'R': return '\xD2'; + case 'S': return '\xD3'; + case 'T': return '\xD4'; + case 'U': return '\xD5'; + case 'V': return '\xD6'; + case 'W': return '\xD7'; + case 'X': return '\xD8'; + case 'Y': return '\xD9'; + case 'Z': return '\xDA'; default: throw DemangleException("Unknown character"); } } -void Demangle::DemangleWideChar(uint16_t& wch) +void Demangle::DemangleVariableList(_STD_VECTOR& paramList, BackrefList& varList, bool typeBackrefs) { - char c1, c2; - DemangleChar(c1); - DemangleChar(c2); - - wch = (uint16_t)(((uint16_t)c1 << 8) | (uint16_t)c2); -} - - -void Demangle::DemangleVariableList(vector& paramList, BackrefList& varList) -{ - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; - set suffix; - for (size_t i = 0; reader.Peek() != 'Z'; i++) + uint8_t suffix = 0; + for (;;) { bool hasModifiers = false; - if (reader.Peek() == '@') + if (m_reader.PeekOr() == 'Z') + { + if (m_reader.PeekMatch("ZZ", 2)) + { + paramList.push_back({"", DemangledTypeNode::CreateShared(DemangledTypeNode::VarArgsType())}); + m_reader.Consume(); + continue; + } + break; + } + if (m_reader.ConsumeIf('@')) { - reader.Consume(); break; } - else if (reader.Peek() == '?') + else if (m_reader.ConsumeIf("$$$V")) + { + // $$$V = empty expanded type / template-template pack (post-MSVC2015 mangling). + // See clang/lib/AST/MicrosoftMangle.cpp: for MSVC2015-compat this emits $$V, + // otherwise $$$V. + continue; + } + else if (m_reader.ConsumeIf("$$V") || m_reader.ConsumeIf("$$Z")) + { + // $$V = empty expanded type / template-template pack (MSVC2015-compat mangling). + // $$Z = separator between two consecutive packs (emitted between non-empty packs, + // not as a lone template argument). LLVM's demangler leniently skips it in + // any position; we follow suit. + // NB: $$S is NOT emitted by any known toolchain - only $S (single $) is a real + // token, handled below. + continue; + } + else if (m_reader.ConsumeIf("$S")) + { + // $S = empty expanded non-type template pack + // (e.g. `template` or `template` instantiated with zero args). + continue; + } + else if (m_reader.ConsumeIf('?')) { - reader.Consume(); suffix = DemanglePointerSuffix(); + ConsumeExtendedModifierPrefix(); DemangleModifiers(_const, _volatile, isMember); hasModifiers = true; } - FunctionParameter vt; - QualifiedName name; - m_logger->LogDebug("Argument %d: %s", i, reader.GetRaw()); - //m_logger->Indent(); - TypeBuilder type = DemangleVarType(varList, false, name); - //m_logger->Dedent(); + MSVC_TRACE("Argument {}: {}", paramList.size(), m_reader.GetRaw()); + DemangledTypeNode::NodeRef parsedType; + DemangledTypeNode type = DemangleVarType(varList, false, true, &parsedType, + typeBackrefs ? TypeBackrefMode::RecordTopLevel : TypeBackrefMode::SuppressTopLevel); if (hasModifiers) { type.SetConst(_const); type.SetVolatile(_volatile); - type.SetPointerSuffix(suffix); + type.SetPointerSuffixBits(suffix); } - vt.name = name.GetString(); - vt.type = type.Finalize(); - vt.locationSource = DefaultLocationSource; - paramList.push_back(vt); - m_logger->LogDebug("Argument %zu: '%s' - '%s'\n", i, vt.type->GetString().c_str(), reader.GetRaw()); + DemangledTypeNode::Param vt; + if (hasModifiers || !parsedType) + vt.type = DemangledTypeNode::CreateShared(std::move(type)); + else + vt.type = parsedType; + paramList.push_back(std::move(vt)); + MSVC_TRACE("Argument {}: '{}' - '{}'", paramList.size() - 1, paramList.back().type->GetString(), m_reader.GetRaw()); } - if (reader.Peek() == 'Z') - reader.Consume(); - m_logger->LogDebug("%s: done '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: done '{}'", __FUNCTION__, m_reader.GetRaw()); +} + + +void Demangle::DemangleNameTypeString(_STD_STRING& out) +{ + out = m_reader.ReadUntil('@'); +} + + +static bool IsWinRTEscapedScopeNameChar(char ch) +{ + return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') + || (ch >= 'a' && ch <= 'z') || (ch == '_') || (ch == '$'); } -Demangle::NameType Demangle::GetNameType() +bool Demangle::TryDemangleWinRTEscapedScopeName(NameList& nameList, BackrefList& nameBackrefList) { - if (reader.Peek() == '?') + // LLVM's Microsoft demangler rejects these WinRT interface-scope spellings: + // ?get@?QIXamlType@Markup@Xaml@UI@Windows@@Outer@@... + // We accept them for compatibility with existing BN test cases. At entry, + // DemangleName has consumed the leading '?' and `m_reader` points at the first + // simple scope component. The escaped chain ends at its inner '@@'; the + // normal outer qualified-name '@' is intentionally left for the DemangleName + // loop to consume. + const char* start = m_reader.GetRaw(); + if (m_reader.Length() < 4) + return false; + + char prefix = start[0]; + if (!((prefix >= 'A' && prefix <= 'Z') || (prefix == '_'))) + return false; + if (start[1] == '@' || start[1] == '?') + return false; + + const char* limit = start + m_reader.Length(); + const char* end = nullptr; + for (const char* cur = start + 1; (cur + 1) < limit; cur++) { - reader.Consume(); - if (reader.Peek()== '?') - { - reader.Consume(); - return GetNameType(); - } - else if (reader.Peek() == '$') + if ((cur[0] == '@') && (cur[1] == '@')) { - reader.Consume(); - return NameTemplate; - } - else if (reader.Peek() == '0') - { - reader.Consume(); - return NameConstructor; - } - else if (reader.Peek() == '1') - { - reader.Consume(); - return NameDestructor; - } - else if (reader.Peek() == 'B') - { - reader.Consume(); - return NameReturn; - } - else if (reader.PeekString(2) == "_R") - { - reader.Consume(2); - return NameRtti; + end = cur; + break; } - // else if (reader.PeekString(3) == "__E") - // { - // reader.Consume(2); - // return NameDynamicInitializer; - // } - else + } + if (!end) + return false; + + _STD_VECTOR<_STD_STRING> scopeNames; + const char* componentStart = start; + while (componentStart < end) + { + const char* componentEnd = componentStart; + while ((componentEnd < end) && (*componentEnd != '@')) { - return NameLookup; + if (!IsWinRTEscapedScopeNameChar(*componentEnd)) + return false; + componentEnd++; } + if (componentEnd == componentStart) + return false; + + scopeNames.emplace_back(componentStart, componentEnd - componentStart); + componentStart = componentEnd + 1; } - else if (reader.Peek() >= '0' && reader.Peek() <= '9') + + for (const auto& scopeName: scopeNames) { - return NameBackref; + DemangledNamePart scope = MakeNameSegment(scopeName); + nameList.insert(nameList.begin(), scope); + nameBackrefList.PushNameBackref(std::move(scope)); } - return NameString; -} - - -void Demangle::DemangleNameTypeString(string& out) -{ - out = reader.ReadUntil('@'); + m_reader.SetRaw(end + 2); + return true; } void Demangle::DemangleNameTypeRtti(BNNameType& classFunctionType, BackrefList& nameBackrefList, - string& out) + _STD_STRING& out) { - TypeBuilder rtti; - switch (reader.Read()) + switch (m_reader.Read()) { case '0': { - if (reader.Peek() != '?') - throw DemangleException(); - reader.Consume(); - - bool _const = false, _volatile = false, isMember = false; - auto suffix = DemanglePointerSuffix(); - DemangleModifiers(_const, _volatile, isMember); + bool _const = false, _volatile = false; + uint8_t suffix = 0; + if (m_reader.ConsumeIf('?')) + { + bool isMember = false; + suffix = DemanglePointerSuffix(); + ConsumeExtendedModifierPrefix(); + DemangleModifiers(_const, _volatile, isMember); + } - QualifiedName name; - //m_logger->Indent(); - rtti = DemangleVarType(nameBackrefList, false, name); - //m_logger->Dedent(); + DemangledTypeNode rtti = DemangleVarType(nameBackrefList, false); rtti.SetConst(_const); rtti.SetVolatile(_volatile); - rtti.SetPointerSuffix(suffix); - out = rtti.GetString() + " `RTTI Type Descriptor' "; + rtti.SetPointerSuffixBits(suffix); + out = rtti.GetString() + " `RTTI Type Descriptor'"; classFunctionType = RttiTypeDescriptor; break; } @@ -834,11 +1138,10 @@ void Demangle::DemangleNameTypeRtti(BNNameType& classFunctionType, out = "`RTTI Base Class Descriptor at ("; for (int i = 0; i < 4; i++) { - int64_t num = 0; - DemangleNumber(num); + int64_t num = DecodeEncodedSignedNumber(); if (i > 0) { - out += ","; + out += ", "; } out += to_string(num); } @@ -862,12 +1165,15 @@ void Demangle::DemangleNameTypeRtti(BNNameType& classFunctionType, } -void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) +void Demangle::DemangleTypeNameLookup(_STD_STRING& out, BNNameType& functionType) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - switch (reader.Read()) + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + switch (m_reader.Read()) { case '?': functionType = NoNameType; break; + case '0': functionType = ConstructorNameType; break; + case '1': functionType = ConstructorNameType; out = "~"; break; // destructor + case 'B': functionType = OperatorReturnTypeNameType; out = "operator"; break; // conversion operator case '2': functionType = OperatorNewNameType; break; case '3': functionType = OperatorDeleteNameType; break; case '4': functionType = OperatorAssignNameType; break; @@ -903,8 +1209,8 @@ void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) case 'Z': functionType = OperatorMinusEqualNameType; break; case '_': { - m_logger->LogDebug(" %s: '%s'\n", __FUNCTION__, reader.GetRaw()); - switch (reader.Read()) + MSVC_TRACE(" {}: '{}'", __FUNCTION__, m_reader.GetRaw()); + switch (m_reader.Read()) { case '0': functionType = OperatorDivideEqualNameType; break; case '1': functionType = OperatorModulusEqualNameType; break; @@ -942,23 +1248,46 @@ void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) case 'W': // Fallthrough case 'Z': functionType = NoNameType; break; case '_': - m_logger->LogDebug(" %s: '%s'\n", __FUNCTION__, reader.GetRaw()); - switch (reader.Read()) + { + MSVC_TRACE(" {}: '{}'", __FUNCTION__, m_reader.GetRaw()); + switch (const char extendedNameType = m_reader.Read()) { case 'A': functionType = ManagedVectorConstructorIteratorNameType; break; case 'B': functionType = ManagedVectorDestructorIteratorNameType; break; case 'C': functionType = EHVectorCopyConstructorIteratorNameType; break; - case 'D': functionType = EHVectorVBaseConstructorIteratorNameType; break; - case 'E': functionType = DynamicInitializerNameType; break; - case 'F': functionType = DynamicAtExitDestructorNameType; break; + // ??__D is the *copy* variant per LLVM (MicrosoftDemangle.cpp:701). + // Previously routed to EHVectorVBaseConstructorIteratorNameType + // (the non-copy enum used by ??_O), which dropped the "copy" word. + case 'D': functionType = EHVectorVBaseCopyConstructorIteratorNameType; break; + // ??__E and ??__F are not reached here — they're handled at the + // top level in DemangleSymbol, matching LLVM's special-intrinsic + // dispatch. See DemangleDynamicInitFini. + case 'E': // fall through — unreachable in practice + case 'F': functionType = (extendedNameType == 'E') ? DynamicInitializerNameType : DynamicAtExitDestructorNameType; break; case 'G': functionType = VectorCopyConstructorIteratorNameType; break; case 'H': functionType = VectorVBaseCopyConstructorIteratorNameType; break; case 'I': functionType = ManagedVectorCopyConstructorIteratorNameType; break; - case 'J': functionType = LocalStaticGuardNameType; break; - case 'K': functionType = UserDefinedLiteralOperatorNameType; break; + case 'J': functionType = LocalStaticThreadGuardNameType; break; + case 'K': + { + // User-defined literal operator: ??__K@ + // LLVM's demangleLiteralOperatorIdentifier consumes a simple + // string terminated by '@' as the literal suffix and renders it + // as `operator ""`. The outer DemangleName loop then + // picks up any enclosing scope chain as a normal prefix. + functionType = UserDefinedLiteralOperatorNameType; + _STD_STRING suffix = m_reader.ReadUntil('@'); + if (suffix.empty()) + throw DemangleException("??__K requires a non-empty literal suffix"); + out = "operator \"\"" + suffix; + break; + } + case 'L': functionType = NoNameType; out = "operator co_await"; break; + case 'M': functionType = NoNameType; out = "operator<=>"; break; // spaceship operator default: throw DemangleException("Demangle Lookup Failed"); // fall through } break; + } default: throw DemangleException("Demangle Lookup Failed"); } @@ -966,108 +1295,145 @@ void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) } default: throw DemangleException("Demangle Lookup Failed"); } - out = Type::GetNameTypeString(functionType); + if (out.empty()) + out = Type::GetNameTypeString(functionType); } -string Demangle::DemangleTemplateInstantiationName(BackrefList& nameBackrefList) +DemangledNamePart Demangle::DemangleTemplateInstantiationName(BackrefList& nameBackrefList) { - string out; - BackrefList templateBackref; - reader.Consume(2); - m_logger->LogDebug("DemangleTemplateInstantiationName: '%s'\n", reader.GetRaw()); - if (reader.Peek() >= '0' && reader.Peek() <= '9') + DemangledNamePart out; + MSVC_TRACE("DemangleTemplateInstantiationName: '{}'", m_reader.GetRaw()); + if (!m_reader.ConsumeIf("?$")) + throw DemangleException(); + char next = m_reader.PeekOr(); + if (next >= '0' && next <= '9') { - out = nameBackrefList.GetStringBackref(reader.Read() - '0'); + out = nameBackrefList.GetNameBackref(m_reader.Read() - '0'); } else { - DemangleNameTypeString(out); + _STD_STRING name; + DemangleNameTypeString(name); + out = MakeNameSegment(name); } - nameBackrefList.PushStringBackref(out); + nameBackrefList.PushNameBackref(out); return out; } -string Demangle::DemangleTemplateParams(vector& params, BackrefList& nameBackrefList, string& out) +DemangledNamePart Demangle::DemangleTemplateInstantiationNameInLocalContext(BackrefList& nameBackrefList) { - //m_logger->Indent(); - DemangleVariableList(params, nameBackrefList); - //m_logger->Dedent(); - m_logger->LogDebug("VariableList done\n"); - out += "<"; - for (size_t i = 0; i < params.size(); i++) + DemangledNamePart out; + BNNameType dummyFunctionType = NoNameType; + MSVC_TRACE("DemangleTemplateInstantiationNameInLocalContext: '{}'", m_reader.GetRaw()); + { - if (i == 0) - { - out += params[i].type->GetString(); - } - else - { - out += "," + params[i].type->GetString(); - } + _STD_VECTOR params; + bool backrefEligible = true; + BackrefContextSwitch localContext(nameBackrefList); + if (!m_reader.ConsumeIf("?$")) + throw DemangleException(); + out = DemangleUnqualifiedSymbolName(nameBackrefList, dummyFunctionType, backrefEligible); + if (backrefEligible && dummyFunctionType == NoNameType) + nameBackrefList.PushNameBackref(out); + DemangleTemplateParams(params, nameBackrefList, out); } - if (out[out.size()-1] == '>') - out += " "; //Be c++03 compliant where we can - out += ">"; - nameBackrefList.PushStringBackref(out); + // DemangleTemplateParams pushed into the temporary local context above. + // Record the completed specialization again after BackrefContextSwitch + // restores the enclosing context. + nameBackrefList.PushTemplateSpecialization(out); + nameBackrefList.PushNameBackref(out); return out; } -// void Demangle::DemangleInitFiniStub(bool destructor, QualifiedName& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType) -// { -// bool isStatic = false; -// if (reader.Peek() == '?') -// { -// reader.Consume(); -// isStatic = true; -// } -// string out = DemangleUnqualifiedSymbolName(nameList, nameBackrefList, classFunctionType); -// } - -string Demangle::DemangleUnqualifiedSymbolName(QualifiedName& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType) +void Demangle::DemangleTemplateParams(_STD_VECTOR& params, BackrefList& nameBackrefList, DemangledNamePart& out) { - string out; - if (reader.PeekString(2) == "?$") + NestingGuard nestingGuard(*this); + params.clear(); + const bool nestedTemplateContext = (m_templateParamDepth > 0); + struct NameBackrefScopeGuard + { + BackrefList& backrefs; + size_t typeCount; + size_t nameCount; + ~NameBackrefScopeGuard() + { + backrefs.typeList.resize(typeCount); + backrefs.nameList.resize(nameCount); + } + }; + struct TemplateDepthGuard + { + size_t& depth; + TemplateDepthGuard(size_t& depth): depth(depth) { depth++; } + ~TemplateDepthGuard() { depth--; } + }; + { - reader.Consume(2); - out = DemangleTemplateInstantiationName(nameBackrefList); - nameList.insert(nameList.begin(), out); + TemplateDepthGuard depthGuard(m_templateParamDepth); + NameBackrefScopeGuard scopeGuard { + nameBackrefList, + nameBackrefList.typeList.size(), + nameBackrefList.nameList.size() + }; + + DemangleVariableList(params, nameBackrefList, false); } - else if (reader.Peek() == '?') + + out.SetTemplateArguments(params); + nameBackrefList.PushTemplateSpecialization(out); + if (nestedTemplateContext) + nameBackrefList.PushNameBackref(out); +} + + +DemangledNamePart Demangle::DemangleUnqualifiedSymbolName(BackrefList& nameBackrefList, BNNameType& classFunctionType, + bool& backrefEligible) +{ + backrefEligible = true; + DemangledNamePart out; + _STD_STRING text; + if (m_reader.ConsumeIf('?')) { - reader.Consume(); - DemangleTypeNameLookup(out, classFunctionType); + text.clear(); + DemangleTypeNameLookup(text, classFunctionType); + out = MakeNameSegment(text); + // Lookup-based operator names are not normal identifier components and + // should not satisfy later scope backrefs such as strong_ordering@0@. + backrefEligible = false; } - else if (reader.Peek() >= '0' && reader.Peek() <= '9') + else if (char next = m_reader.PeekOr(); next >= '0' && next <= '9') { - out = nameBackrefList.GetStringBackref(reader.Read() - '0'); + out = nameBackrefList.GetNameBackref(m_reader.Read() - '0'); } else { - DemangleNameTypeString(out); + DemangleNameTypeString(text); + out = MakeNameSegment(text); } return out; } -TypeBuilder Demangle::DemangleString() +DemangledTypeNode Demangle::DemangleString(NameList& symbolName) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); // ??_C@_@ - if (reader.Peek() != '_') + if (!m_reader.ConsumeIf('_')) { throw DemangleException("Invalid mangled string name"); } - reader.Consume(); // Wide char flag (1 yes / 0 no) bool isWideChar = false; - switch (reader.Peek()) + switch (m_reader.Read()) { case '1': + case '2': // UTF-16/UTF-32 encoding variants + case '3': isWideChar = true; break; case '0': @@ -1075,66 +1441,67 @@ TypeBuilder Demangle::DemangleString() default: throw DemangleException("Invalid mangled string name"); } - reader.Consume(); // Length is just a number - int64_t lengthRaw; - DemangleNumber(lengthRaw); - if (lengthRaw < 0) - { - throw DemangleException("Invalid mangled string name"); - } - uint64_t length = (uint64_t)lengthRaw; + uint64_t length = DecodeEncodedUnsignedNumber(); - m_logger->LogDebug("%s: Before CRC32 '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: Before CRC32 '{}'", __FUNCTION__, m_reader.GetRaw()); // CRC32 (ignored) - while (reader.Peek() != '@') + while (m_reader.Peek() != '@') { // Usually 8 bytes but I've seen it be 7 for some ungodly reason - reader.Consume(); + m_reader.Consume(); } - reader.Consume(); + m_reader.Consume(); bool truncated = false; - string name = ""; - TypeBuilder type; + _STD_STRING name; + _STD_STRING literalPrefix; + DemangledTypeNode type; // String bytes if (isWideChar) { - m_logger->LogDebug("%s: Wide string '%s'\n", __FUNCTION__, reader.GetRaw()); - string utf8name; - truncated = (length > 64); - while (reader.Peek() != '@') + MSVC_TRACE("{}: Wide string '{}'", __FUNCTION__, m_reader.GetRaw()); + _STD_STRING utf8name; + literalPrefix = "L"; + // Track the last wide char so we can detect missing null terminator. + bool lastWideCharWasNull = false; + size_t wcharCount = 0; + while (m_reader.Peek() != '@') { - uint16_t wch; - DemangleWideChar(wch); - - uint8_t chs[2]; - chs[0] = wch & 0xFF; - chs[1] = wch >> 8; + char highByte = DemangleChar(); + char lowByte = DemangleChar(); + uint8_t chs[2] = {static_cast(lowByte), static_cast(highByte)}; + lastWideCharWasNull = (chs[0] == 0) && (chs[1] == 0); + wcharCount++; // TODO: This is actually UCS2 but we don't have an easy decoder for that utf8name += Unicode::UTF16ToUTF8(&chs[0], 2); } - reader.Consume(); + m_reader.Consume(); + + // MSVC string literals always mangle their trailing null. A payload + // that doesn't end in a wide null means the original was too long to + // fit in the mangling and was truncated. Matches LLVM's demangler. + if (wcharCount == 0 || !lastWideCharWasNull) + truncated = true; name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); - type = Type::ArrayType(Type::WideCharType(2), length / 2); + type = DemangledTypeNode::ArrayType(DemangledTypeNode::WideCharType(2), length / 2); } else { - m_logger->LogDebug("%s: Non-wide string '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: Non-wide string '{}'", __FUNCTION__, m_reader.GetRaw()); uint64_t numNulls = 0; size_t endNulls = 0; - vector chars; - while (reader.Peek() != '@') + _STD_VECTOR chars; + while (m_reader.Peek() != '@') { - char ch; - DemangleChar(ch); + char ch = DemangleChar(); if (ch == 0) { numNulls++; @@ -1146,233 +1513,435 @@ TypeBuilder Demangle::DemangleString() } chars.push_back(ch); } - reader.Consume(); + m_reader.Consume(); - if (length > (uint64_t)chars.size() + 1) + if (length > static_cast(chars.size()) + 1) { truncated = true; } + // MSVC includes the trailing '\0' in the mangled payload. If the last + // byte isn't a null, the original string was truncated to fit the + // encoding's size limit — LLVM signals this with a `...` suffix. + if (!chars.empty() && chars.back() != 0) + truncated = true; - // Now time to guess encoding - if (chars.size() % 1 != 0) + // Now time to guess encoding. Only take a wide-character guess if both + // the decoded byte payload and declared array length are aligned for it. + const size_t payloadBytes = chars.size() - endNulls; + if ((payloadBytes % 4 == 0) && (length % 4 == 0) && numNulls > length * 2 / 3) { - m_logger->LogDebug("%s: Looks like UTF8 '%s'\n", __FUNCTION__, reader.GetRaw()); - name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, chars.data(), chars.size() - endNulls); - type = Type::ArrayType(Type::IntegerType(1, true), length); - } - else - { - if (chars.size() % 4 == 0 && numNulls > length * 2 / 3) + MSVC_TRACE("{}: Looks like UTF32 '{}'", __FUNCTION__, m_reader.GetRaw()); + _STD_STRING utf8name; + for (size_t i = 0; i < payloadBytes; i += 4) { - m_logger->LogDebug("%s: Looks like UTF32 '%s'\n", __FUNCTION__, reader.GetRaw()); - string utf8name; - for (size_t i = 0; i < chars.size() - endNulls; i += 4) - { - utf8name += Unicode::UTF32ToUTF8(chars.data() + i); - } - name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); - type = Type::ArrayType(Type::WideCharType(4), length / 4); + utf8name += Unicode::UTF32ToUTF8(chars.data() + i); } - else if (numNulls > length / 3) + name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); + literalPrefix = "U"; + type = DemangledTypeNode::ArrayType(DemangledTypeNode::WideCharType(4), length / 4); + } + else if ((payloadBytes % 2 == 0) && (length % 2 == 0) && numNulls > length / 3) + { + MSVC_TRACE("{}: Looks like UTF16 '{}'", __FUNCTION__, m_reader.GetRaw()); + _STD_STRING utf8name; + for (size_t i = 0; i < payloadBytes; i += 2) { - m_logger->LogDebug("%s: Looks like UTF16 '%s'\n", __FUNCTION__, reader.GetRaw()); - string utf8name; - for (size_t i = 0; i < chars.size() - endNulls; i += 2) - { - utf8name += Unicode::UTF16ToUTF8(chars.data() + i, 2); - } - name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); - type = Type::ArrayType(Type::WideCharType(2), length / 2); + utf8name += Unicode::UTF16ToUTF8(chars.data() + i, 2); } - else - { - m_logger->LogDebug("%s: Looks like UTF8 '%s'\n", __FUNCTION__, reader.GetRaw()); + name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); + literalPrefix = "L"; + type = DemangledTypeNode::ArrayType(DemangledTypeNode::WideCharType(2), length / 2); + } + else + { + MSVC_TRACE("{}: Looks like UTF8 '{}'", __FUNCTION__, m_reader.GetRaw()); - name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, chars.data(), chars.size() - endNulls); - type = Type::ArrayType(Type::IntegerType(1, true), length); - } + name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, chars.data(), chars.size() - endNulls); + type = DemangledTypeNode::ArrayType(DemangledTypeNode::IntegerType(1, true), length); } } - if (truncated) - { - name += "..."; - } - m_varName.push_back(name); + symbolName.clear(); + symbolName.push_back(MakeNameSegment(fmt::bnformat("{}\"{}\"{}", literalPrefix, name, truncated ? "..." : ""))); return type; } -TypeBuilder Demangle::DemangleTypeInfoName() +DemangledTypeNode Demangle::DemangleTypeInfoName(NameList& symbolName) { - if (reader.Read() != '?') + if (m_reader.Read() != '?') throw DemangleException("Unknown raw name type"); bool _const = false; bool _volatile = false; bool isMember = false; DemangleModifiers(_const, _volatile, isMember); - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); - QualifiedName name; - TypeBuilder type = DemangleVarType(m_backrefList, false, name); + DemangledTypeNode type = DemangleVarType(m_backrefList, false); type.SetConst(_const); type.SetVolatile(_volatile); switch (type.GetClass()) { case NamedTypeReferenceClass: - m_varName = type.GetNamedTypeReference()->GetName(); - return type; + { + // Match LLVM's demangler: a raw type-info name (.?A...) renders as + // ` `RTTI Type Descriptor Name''`. Bake the type + // keyword + name into the symbol's qualified name, then return a + // fresh NamedType marked RttiTypeDescriptor so BN's core type + // formatter skips its own class/struct prefix - this mirrors the + // treatment of ??_R0 in DemangleNameTypeRtti case '0'. + _STD_STRING rendered = type.GetString() + " `RTTI Type Descriptor Name'"; + symbolName = { MakeNameSegment(rendered) }; + NameList rttiTypeName = type.GetName(); + if (rttiTypeName.empty()) + for (const auto& segment: type.RenderTypeNameSegments()) + rttiTypeName.push_back(MakeNameSegment(segment)); + DemangledTypeNode newType = DemangledTypeNode::NamedType(StructNamedTypeClass, std::move(rttiTypeName)); + newType.SetNameType(RttiTypeDescriptor); + return newType; + } default: throw DemangleException("Unexpected type of RTTI Type Name"); } } -void Demangle::DemangleName(QualifiedName& nameList, +void Demangle::PrependNameComponent(NameList& nameList, DemangledNamePart name) +{ + nameList.insert(nameList.begin(), std::move(name)); +} + + +void Demangle::AppendStringName(NameList& nameList, BackrefList& nameBackrefList) +{ + _STD_STRING text; + DemangleNameTypeString(text); + DemangledNamePart name = MakeNameSegment(text); + PrependNameComponent(nameList, name); + nameBackrefList.PushNameBackref(std::move(name)); +} + + +void Demangle::FinalizeConstructorTemplateName(NameList& nameList, size_t nameListSizeAtEntry, bool pending) +{ + if (!pending) + return; + + if (nameList.size() <= nameListSizeAtEntry + 1) + throw DemangleException("Constructor template missing class scope"); + + DemangledNamePart& constructorTemplateName = nameList.back(); + if (!constructorTemplateName.HasTemplateArguments()) + throw DemangleException("Invalid constructor template name"); + + // `??$?0...@Class@@` is a templated constructor. LLVM models `?0` as a + // structor identifier and attaches the parsed enclosing class to it after + // the qualified name is complete; Wine's undname does the same as a string + // post-process. Keep the parsed template args and only fill in the + // constructor's base name here: + // `?0` becomes `Class`. + constructorTemplateName.SetBase(nameList[nameList.size() - 2].GetString() + + constructorTemplateName.GetBase()); +} + + +bool Demangle::FunctionTypeHasPointerSuffix(char functionType) +{ + return functionType != 'C' && functionType != 'D' && functionType != 'K' && functionType != 'L' + && functionType != 'S' && functionType != 'T' && functionType != 'Y' && functionType != 'Z'; +} + + +_STD_STRING Demangle::FormatFunctionScopeSignature(const DemangledTypeNode& type, const NameList& scopeName) +{ + _STD_STRING out = type.GetTypeAndName(FinalizeNameList(scopeName)); + while (!out.empty() && out.back() == ' ') + out.pop_back(); + return out; +} + + +void Demangle::AppendLocalScope(NameList& nameList, BackrefList& nameBackrefList, uint64_t scopeOrdinal, + bool typeNameContext) +{ + NameList scopeName; + BNNameType scopeFunctionType = NoNameType; + DemangleName(scopeName, scopeFunctionType, nameBackrefList, typeNameContext); + + if (m_reader.Length() == 0) + throw DemangleException("Missing local scope function encoding"); + + char ft = m_reader.Read(); + if (ft == '9' && m_reader.PeekOr() == '@') + { + PrependNameComponent(nameList, MakeNameSegment("`" + to_string(scopeOrdinal) + "'")); + nameList.insert(nameList.begin(), scopeName.begin(), scopeName.end()); + return; + } + if (ft < 'A' || ft > 'Z') + throw DemangleException("Invalid local scope function encoding"); + + DemangledTypeNode scopeType = DemangleFunction( + scopeFunctionType, FunctionTypeHasPointerSuffix(ft), nameBackrefList).type; + + PrependNameComponent(nameList, MakeNameSegment("`" + to_string(scopeOrdinal) + "'")); + PrependNameComponent(nameList, MakeNameSegment("`" + FormatFunctionScopeSignature(scopeType, scopeName) + "'")); +} + + +bool Demangle::TryAppendLocalScopeAt(NameList& nameList, BackrefList& nameBackrefList, + const char* encodedNumberStart, bool typeNameContext) +{ + struct LocalScopeParseCheckpoint + { + Demangle& demangler; + BackrefList& backrefs; + NameList& nameList; + const char* reader; + NameList savedNameList; + size_t typeBackrefs; + size_t nameBackrefs; + size_t templateBackrefs; + + LocalScopeParseCheckpoint(Demangle& demangler, NameList& nameList, BackrefList& backrefs) : + demangler(demangler), + backrefs(backrefs), + nameList(nameList), + reader(demangler.m_reader.GetRaw()), + savedNameList(nameList), + typeBackrefs(backrefs.typeList.size()), + nameBackrefs(backrefs.nameList.size()), + templateBackrefs(backrefs.templateList.size()) + { + } + + void Restore() + { + demangler.m_reader.SetRaw(reader); + nameList = savedNameList; + backrefs.typeList.resize(typeBackrefs); + backrefs.nameList.resize(nameBackrefs); + backrefs.templateList.resize(templateBackrefs); + } + }; + + LocalScopeParseCheckpoint checkpoint(*this, nameList, nameBackrefList); + + m_reader.SetRaw(encodedNumberStart); + uint64_t scopeOrdinal = 0; + try + { + scopeOrdinal = DecodeEncodedUnsignedNumber(); + } + catch (DemangleException&) + { + checkpoint.Restore(); + return false; + } + + if (m_reader.PeekMatch("??", 2)) + { + AppendLocalScope(nameList, nameBackrefList, scopeOrdinal, typeNameContext); + return true; + } + + checkpoint.Restore(); + return false; +} + + +void Demangle::DemangleName(NameList& nameList, BNNameType& classFunctionType, - BackrefList& nameBackrefList) + BackrefList& nameBackrefList, + bool typeNameContext) { - string out; - BNNameType functionType; - BNNameType dummyFunctionType; - vector params; - while(1) + NestingGuard nestingGuard(*this); + // NameList is stored outermost-first for QualifiedName, but MSVC encodes + // names leaf-first. Ordinary parsed components are prepended; constructor + // and destructor branches recurse to parse the class scope, then append the + // synthesized leaf intentionally. + size_t nameListSizeAtEntry = nameList.size(); + bool pendingConstructorTemplateName = false; + + DemangledNamePart out; + _STD_STRING outText; + BNNameType functionType = NoNameType; + BNNameType dummyFunctionType = NoNameType; + _STD_VECTOR params; + + size_t strippedNestedNamePrefixes = 0; + while(true) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - switch (GetNameType()) + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + if (m_reader.ConsumeIf("??@")) { - case NameString: - m_logger->LogDebug("Demangle String\n"); - DemangleNameTypeString(out); - nameList.insert(nameList.begin(), out); - m_logger->LogDebug("Pushing backref NameString %s", out.c_str()); - nameBackrefList.PushStringBackref(out); - m_logger->LogDebug("nameList.front(): %s\n", nameList.front().c_str()); - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - break; - case NameLookup: - m_logger->LogDebug("Demangle Lookup\n"); - DemangleTypeNameLookup(out, functionType); - classFunctionType = functionType; - nameList.insert(nameList.begin(), out); - break; - case NameBackref: - m_logger->LogDebug("Demangle Backref"); - out = nameBackrefList.GetStringBackref(reader.Read() - '0'); - m_logger->LogDebug("Demangle Backref: %s", out.c_str()); - nameList.insert(nameList.begin(), out); - break; - case NameTemplate: + AppendStringName(nameList, nameBackrefList); + } + else if (m_reader.ConsumeIf("??")) { - m_logger->LogDebug("Demangle Template: '%s'\n", reader.GetRaw()); - BackrefList templateBackref; - out = DemangleUnqualifiedSymbolName(nameList, templateBackref, functionType); - m_logger->LogDebug("Pushing backref NameTemplate %s", out.c_str()); - templateBackref.PushStringBackref(out); - m_logger->LogDebug("Demangling Template variables %s\n", reader.GetRaw()); - DemangleTemplateParams(params, templateBackref, out); - nameList.insert(nameList.begin(), out); - nameBackrefList.PushStringBackref(out); - break; + if (m_nestingDepth + strippedNestedNamePrefixes >= MAX_DEMANGLE_NESTING_DEPTH) + throw DemangleException("Demangle nesting depth exceeded"); + strippedNestedNamePrefixes++; + continue; } - case NameConstructor: - m_logger->LogDebug("NameConstructor\n"); - classFunctionType = ConstructorNameType; - DemangleName(nameList, dummyFunctionType, nameBackrefList); - if (nameList.size() == 0) - throw DemangleException(); - nameList.push_back(nameList[nameList.size()-1]); - return; - case NameDestructor: - classFunctionType = ConstructorNameType; - m_logger->LogDebug("NameDestructor\n"); - DemangleName(nameList, dummyFunctionType, nameBackrefList); - if (nameList.size() == 0) - throw DemangleException(); - nameList.push_back("~" + nameList[nameList.size()-1]); - return; - case NameRtti: - m_logger->LogDebug("NameRtti\n"); - DemangleNameTypeRtti(classFunctionType, nameBackrefList, out); - nameList.insert(nameList.begin(), out); - break; - // case NameDynamicInitializer: - // m_logger->LogDebug("NameDynamicInitializer\n"); - // DemangleInitFiniStub(false); - // break; - // case NameDynamicAtExitDestructor: - // m_logger->LogDebug("NameDynamicAtExitDestructor\n"); - // DemangleInitFiniStub(false); - // break; - case NameReturn: - m_logger->LogDebug("NameReturn\n"); - classFunctionType = OperatorReturnTypeNameType; - if (reader.PeekString(2) == "?$") + else if (m_reader.PeekMatch("?$", 2)) + { + MSVC_TRACE("Demangle Template: '{}'", m_reader.GetRaw()); + if (typeNameContext || (m_templateParamDepth > 0) || (nameList.size() > nameListSizeAtEntry)) { - out = DemangleTemplateInstantiationName(nameBackrefList); + out = DemangleTemplateInstantiationNameInLocalContext(nameBackrefList); + } + else + { + if (!m_reader.ConsumeIf("?$")) + throw DemangleException(); + BNNameType localFunctionType = NoNameType; + bool backrefEligible = true; + out = DemangleUnqualifiedSymbolName(nameBackrefList, localFunctionType, backrefEligible); + if (backrefEligible && localFunctionType == NoNameType) + { + MSVC_TRACE("Pushing backref NameTemplate {}", out.GetString()); + nameBackrefList.PushNameBackref(out); + } + MSVC_TRACE("Demangling Template variables {}", m_reader.GetRaw()); DemangleTemplateParams(params, nameBackrefList, out); + if (localFunctionType == ConstructorNameType) + { + classFunctionType = ConstructorNameType; + pendingConstructorTemplateName = true; + } + } + PrependNameComponent(nameList, out); + } + else if (char next = m_reader.PeekOr(); next >= '0' && next <= '9') + { + MSVC_TRACE("Demangle Backref"); + out = nameBackrefList.GetNameBackref(m_reader.Read() - '0'); + MSVC_TRACE("Demangle Backref: {}", out.GetString()); + PrependNameComponent(nameList, out); + } + else if (m_reader.ConsumeIf('?')) + { + if (char next = m_reader.PeekOr(); next >= 'a' && next <= 'z') + { + // Lowercase after ? indicates a non-standard extension name + // (e.g., ??null$initializer$ for thread-safe static init guards). + AppendStringName(nameList, nameBackrefList); + } + else if (m_reader.PeekMatch("A0x", 3)) + { + m_reader.Consume(); + DemangleNameTypeString(outText); // discard compiler-generated hash + out = MakeNameSegment("`anonymous namespace'"); + PrependNameComponent(nameList, out); + nameBackrefList.PushNameBackref(std::move(out)); + } + else if (m_reader.ConsumeIf("_R")) + { + MSVC_TRACE("NameRtti"); + DemangleNameTypeRtti(classFunctionType, nameBackrefList, outText); + out = MakeNameSegment(outText); + PrependNameComponent(nameList, out); } else { - DemangleNameTypeString(out); - nameBackrefList.PushStringBackref(out); + bool parsedScopePrefix = false; + if (nameList.size() > nameListSizeAtEntry) + { + parsedScopePrefix = TryAppendLocalScopeAt(nameList, nameBackrefList, m_reader.GetRaw(), typeNameContext) || + TryDemangleWinRTEscapedScopeName(nameList, nameBackrefList); + } + + if (!parsedScopePrefix) + { + if (m_reader.ConsumeIf('0')) + { + MSVC_TRACE("NameConstructor"); + classFunctionType = ConstructorNameType; + DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); + if (nameList.empty()) + throw DemangleException(); + nameList.push_back(nameList[nameList.size()-1]); + return; + } + if (m_reader.ConsumeIf('1')) + { + MSVC_TRACE("NameDestructor"); + classFunctionType = ConstructorNameType; + DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); + if (nameList.empty()) + throw DemangleException(); + nameList.push_back(MakeNameSegment("~" + nameList[nameList.size()-1].GetString())); + return; + } + if (m_reader.ConsumeIf('B')) + { + MSVC_TRACE("NameReturn"); + classFunctionType = OperatorReturnTypeNameType; + if (m_reader.PeekMatch("?$", 2)) + { + if (m_templateParamDepth > 0) + { + out = DemangleTemplateInstantiationNameInLocalContext(nameBackrefList); + } + else + { + out = DemangleTemplateInstantiationName(nameBackrefList); + DemangleTemplateParams(params, nameBackrefList, out); + } + } + else + { + DemangleNameTypeString(outText); + out = MakeNameSegment(outText); + nameBackrefList.PushNameBackref(out); + } + PrependNameComponent(nameList, out); + } + else + { + MSVC_TRACE("Demangle Lookup"); + outText.clear(); + DemangleTypeNameLookup(outText, functionType); + out = MakeNameSegment(outText); + classFunctionType = functionType; + PrependNameComponent(nameList, out); + // Check if this is a scope specifier. Scope specifiers are ? + // followed by either @?? or directly ?? (for digit scopes like ?3??func@...) + // When nameList has prior components, the operator name is actually a scope index + // Also handle dynamic init/dtor wrapping ??@ (MD5 hash) + if (m_reader.ConsumeIf("??@")) + { + _STD_STRING hash = m_reader.ReadUntil('@'); + PrependNameComponent(nameList, MakeNameSegment("??@" + hash + "@")); + // Consume the trailing @ (name terminator) — the ??@hash@ pattern + // is followed by @@ (end of scoped name) before the function type + if (m_reader.Length() > 0) + m_reader.ConsumeIf('@'); + } + } + } } - nameList.insert(nameList.begin(), out); - break; - default: - throw DemangleException(); } - if (nameList.StringSize() > MAX_DEMANGLE_LENGTH) - throw DemangleException(); - if (reader.Peek() == '@') + else + { + AppendStringName(nameList, nameBackrefList); + } + if (m_reader.ConsumeIf('@')) { - reader.Consume(); + FinalizeConstructorTemplateName(nameList, nameListSizeAtEntry, pendingConstructorTemplateName); return; } } } -Ref Demangle::GetCallingConventionForType(BNCallingConventionName ccName) -{ - string name; - switch (ccName) - { - case NoCallingConvention: name = ""; break; - case CdeclCallingConvention: name = "cdecl"; break; - case PascalCallingConvention: name = "pascal"; break; - case ThisCallCallingConvention: name = "thiscall"; break; - case STDCallCallingConvention: name = "stdcall"; break; - case FastcallCallingConvention: name = "fastcall"; break; - case CLRCallCallingConvention: name = "clrcall"; break; - case EabiCallCallingConvention: name = "eabi"; break; - case VectorCallCallingConvention: name = "vectorcall"; break; - case SwiftCallingConvention: name = "swiftcall"; break; - case SwiftAsyncCallingConvention: name = "swiftasync"; break; - default: break; - } - - if (m_platform) - { - for (const auto& cc : m_platform->GetCallingConventions()) - { - if (cc->GetName() == name) - return cc; - } - } - for (const auto& cc : m_arch->GetCallingConventions()) - { - if (cc->GetName() == name) - return cc; - } - return nullptr; -} BNCallingConventionName Demangle::DemangleCallingConvention() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - switch (reader.Read()) + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + switch (m_reader.Read()) { case 'A': //Exported function case 'B': return CdeclCallingConvention; @@ -1397,298 +1966,323 @@ BNCallingConventionName Demangle::DemangleCallingConvention() } } -set Demangle::DemanglePointerSuffix() + +void Demangle::ConsumeExtendedModifierPrefix() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - set suffix; - if (reader.Peek() == '@') + while (m_reader.ConsumeIf("$A")) + { + } +} + + +uint8_t Demangle::DemanglePointerSuffix() +{ + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + uint8_t suffix = 0; + if (m_reader.PeekOr() == '@') return suffix; - char elm = reader.Peek(); - for (int i = 0; i < 5; i++, elm = reader.Peek()) + char elm = m_reader.PeekOr(); + for (int i = 0; i < 5; i++, elm = m_reader.PeekOr()) { if (elm == 'E') - suffix.insert(suffix.end(), Ptr64Suffix); + suffix |= (1u << Ptr64Suffix); else if (elm == 'F') - suffix.insert(suffix.end(), UnalignedSuffix); + suffix |= (1u << UnalignedSuffix); else if (elm == 'G') - suffix.insert(suffix.end(), ReferenceSuffix); + suffix |= (1u << ReferenceSuffix); else if (elm == 'H') - suffix.insert(suffix.end(), LvalueSuffix); + suffix |= (1u << LvalueSuffix); else if (elm == 'I') - suffix.insert(suffix.end(), RestrictSuffix); + suffix |= (1u << RestrictSuffix); else break; - reader.Consume(1); + m_reader.Consume(); } return suffix; } void Demangle::DemangleModifiers(bool& _const, bool& _volatile, bool &isMember) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - if (reader.Peek() == '@') - return; - + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + // Always write the out params, even when `@` marks the no-modifiers case. _const = false; _volatile = false; isMember = false; - char elm = reader.Read(); - switch (elm) + if (m_reader.PeekOr() == '@') + return; + + switch (m_reader.Read()) { case 'A': break; - case 'B': _const = true; break; + case 'B': //fall through case 'J': _const = true; break; - case 'C': _volatile = true; break; - case 'G': _volatile = true; break; + case 'C': //fall through + case 'G': //fall through case 'K': _volatile = true; break; - case 'D': _const = true; _volatile = true; break; - case 'H': _const = true; _volatile = true; break; + case 'D': //fall through + case 'H': //fall through case 'L': _const = true; _volatile = true; break; - case '6': break; - case '7': break; - case 'M': break; + case '6': //fall through + case '7': //fall through + case 'M': //fall through case 'N': break; case 'O': _volatile = true; break; case 'P': _volatile = true; _const = true; break; case 'Q': isMember = true; break; - case 'U': break; + case 'U': //fall through case 'Y': break; case 'R': _const = true; isMember = true; break; - case 'V': _const = true; break; + case 'V': //fall through case 'Z': _const = true; break; case 'S': _volatile = true; isMember = true; break; - case 'W': _volatile = true; break; + case 'W': //fall through case '0': _volatile = true; break; case 'T': _const = true; _volatile = true; isMember = true; break; - case 'X': _const = true; _volatile = true; break; + case 'X': //fall through case '1': _const = true; _volatile = true; break; - case '8': break; - case '9': break; + case '8': //fall through + case '9': //fall through case '2': break; case '3': _const = true; break; case '4': _volatile = true; break; case '5': _const = true; _volatile = true; break; case '_': - elm = reader.Read(); - if (elm == 'A' || elm == 'B') + switch (m_reader.Read()) { - //For unhandled "member" and "based" parameters + case 'A': + case 'B': + case 'C': + case 'D': + // Accepted but not currently modeled. break; - } - else if (elm == 'C' || elm == 'D') - { - //For unhandled "member" and "based" parameters - break; - } - else - { + default: throw DemangleException(); } break; default: throw DemangleException(); } - return; } -TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& nameBackrefList, int funcClass) +bool Demangle::FunctionClassNeedsImplicitThis(int funcClass) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - bool _const = false, _volatile = false, isMember = false; - set suffix; - TypeBuilder returnType; + return funcClass != NoneFunctionClass + && (funcClass & StaticFunctionClass) != StaticFunctionClass + && (funcClass & GlobalFunctionClass) != GlobalFunctionClass; +} + + +void Demangle::AppendThunkAdjustorToName(NameList& nameList, const ThunkAdjustor& adjustor) +{ + switch (adjustor.kind) + { + case ThunkAdjustorKind::Static: + AppendToLastNameSegment(nameList, "`adjustor{" + to_string(adjustor.adjustor) + "}'"); + return; + case ThunkAdjustorKind::Vtordisp: + AppendToLastNameSegment(nameList, "`vtordisp{" + to_string(adjustor.vtorDispOffset) + ", " + + to_string(adjustor.staticOffset) + "}'"); + return; + case ThunkAdjustorKind::Vtordispex: + AppendToLastNameSegment(nameList, "`vtordispex{" + to_string(adjustor.vbptrOffset) + ", " + + to_string(adjustor.vbOffsetOffset) + ", " + to_string(adjustor.vtorDispOffset) + ", " + + to_string(adjustor.staticOffset) + "}'"); + return; + } +} + + +void Demangle::SetImplicitThisParameter(DemangledTypeNode& type, BNNameType classFunctionType, const NameList& enclosingName) +{ + NameList thisName = enclosingName; + if (classFunctionType != OperatorReturnTypeNameType && !thisName.empty()) + thisName.pop_back(); + auto thisNamedType = DemangledTypeNode::NamedType(TypedefNamedTypeClass, std::move(thisName)); + type.SetImplicitThisParameter(DemangledTypeNode::PointerType( + std::move(thisNamedType), false, false, PointerReferenceType)); +} + + +void Demangle::ApplySymbolFunctionContext(DemangledFunction& function, NameList& symbolName, + BNNameType classFunctionType, int funcClass) +{ + if (function.thunkAdjustor) + AppendThunkAdjustorToName(symbolName, *function.thunkAdjustor); + if (FunctionClassNeedsImplicitThis(funcClass)) + SetImplicitThisParameter(function.type, classFunctionType, symbolName); +} + + +Demangle::DemangledFunction Demangle::DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, + BackrefList& nameBackrefList, int funcClass) +{ + NestingGuard nestingGuard(*this); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + bool _const = false, _volatile = false; + uint8_t suffix = 0; + DemangledTypeNode returnType; BNCallingConventionName cc; + std::optional thunkAdjustor; - //Demangle adjustor which we don't do anything with for now + // Thunk adjustors are part of the function grammar, but the symbol parser + // owns the name that displays them. if ((funcClass & StaticThunkFunctionClass) == StaticThunkFunctionClass) { - int64_t adjustor; - DemangleNumber(adjustor); - m_varName.back() += "`adjustor{" + to_string(adjustor) + "}'"; + ThunkAdjustor adjustor {}; + adjustor.kind = ThunkAdjustorKind::Static; + adjustor.adjustor = DecodeEncodedUnsignedNumber(); + thunkAdjustor = adjustor; } else if ((funcClass & VirtualThunkFunctionClass) == VirtualThunkFunctionClass) { if ((funcClass & VirtualThunkExFunctionClass) == VirtualThunkExFunctionClass) { - int64_t vbptrOffset; - int64_t vbOffsetOffset; - int64_t vtorDispOffset; - int64_t staticOffset; - DemangleNumber(vbptrOffset); - DemangleNumber(vbOffsetOffset); - DemangleNumber(vtorDispOffset); - DemangleNumber(staticOffset); - m_varName.back() += "`vtordispex{" + to_string(vbptrOffset) + ", " + to_string(vbOffsetOffset) + ", " + to_string(vtorDispOffset) + ", " + to_string(staticOffset) + "}'"; + ThunkAdjustor adjustor {}; + adjustor.kind = ThunkAdjustorKind::Vtordispex; + adjustor.vbptrOffset = DecodeEncodedSignedInt32(); + adjustor.vbOffsetOffset = DecodeEncodedSignedInt32(); + adjustor.vtorDispOffset = DecodeEncodedSignedInt32(); + adjustor.staticOffset = DecodeEncodedUnsignedNumber(); + thunkAdjustor = adjustor; } else { - int64_t vtorDispOffset; - int64_t staticOffset; - DemangleNumber(vtorDispOffset); - DemangleNumber(staticOffset); - m_varName.back() += "`vtordisp{" + to_string(vtorDispOffset) + ", " + to_string(staticOffset) + "}'"; + ThunkAdjustor adjustor {}; + adjustor.kind = ThunkAdjustorKind::Vtordisp; + adjustor.vtorDispOffset = DecodeEncodedSignedInt32(); + adjustor.staticOffset = DecodeEncodedUnsignedNumber(); + thunkAdjustor = adjustor; } } if (pointerSuffix) { + bool isMember = false; suffix = DemanglePointerSuffix(); + ConsumeExtendedModifierPrefix(); DemangleModifiers(_const, _volatile, isMember); } - if (reader.Peek() == '?') - reader.Consume(); + m_reader.ConsumeIf('?'); cc = DemangleCallingConvention(); bool shouldHaveReturnType = true; - if (reader.Peek() == '@') + if (m_reader.ConsumeIf('@')) { //No return type shouldHaveReturnType = false; - reader.Consume(); - m_logger->LogDebug("Function has no return type %s", reader.GetRaw()); + MSVC_TRACE("Function has no return type {}", m_reader.GetRaw()); } else { //Demangle function return type - bool return_const = false, return_volatile = false, isMember = false; - set return_suffix; + bool return_const = false, return_volatile = false; + uint8_t return_suffix = 0; bool hasModifiers = false; //Check for modifiers before return type - if (reader.Peek() == '?') + if (m_reader.ConsumeIf('?')) { - reader.Consume(1); + bool localIsMember = false; return_suffix = DemanglePointerSuffix(); - DemangleModifiers(return_const, return_volatile, isMember); + DemangleModifiers(return_const, return_volatile, localIsMember); hasModifiers = true; } - QualifiedName name; - m_logger->LogDebug("Demangle function return type %s", reader.GetRaw()); - //m_logger->Indent(); - returnType = DemangleVarType(nameBackrefList, true, name); - m_logger->LogDebug("Return type: %s", returnType.GetString().c_str()); - //m_logger->Dedent(); + MSVC_TRACE("Demangle function return type {}", m_reader.GetRaw()); + returnType = DemangleVarType(nameBackrefList, true); + MSVC_TRACE("Return type: {}", returnType.GetString()); + // '...' (varargs) is only legal as the trailing parameter marker, + // never as a return type. Reject so we don't build a bogus type. + if (returnType.GetClass() == VarArgsTypeClass) + throw DemangleException("Varargs ('Z') is not a valid function return type"); if (hasModifiers) { returnType.SetConst(return_const); returnType.SetVolatile(return_volatile); - returnType.SetPointerSuffix(return_suffix); + returnType.SetPointerSuffixBits(return_suffix); } } - if (reader.Peek() == '@') - reader.Consume(); + m_reader.ConsumeIf('@'); - m_logger->LogDebug("\tDemangle Function Parameters %s", reader.GetRaw()); - vector params; - bool needsThisPtr = false; - if (cc == ThisCallCallingConvention) - { - needsThisPtr = true; - } - if (funcClass != NoneFunctionClass) - { - if ((funcClass & VirtualFunctionClass) == VirtualFunctionClass - || (funcClass & StaticThunkFunctionClass) == StaticThunkFunctionClass - || (funcClass & VirtualThunkFunctionClass) == VirtualThunkFunctionClass) - { - needsThisPtr = true; - } - else if ((funcClass & StaticFunctionClass) != StaticFunctionClass - && (funcClass & GlobalFunctionClass) != GlobalFunctionClass) - { - needsThisPtr = true; - } - } - - if (needsThisPtr) - { - // Insert implicit "this" parameter for thiscall - // TODO: Replace this with calling convention / platform callbacks to insert thisptr (ask rss) - QualifiedName thisName = m_varName; - if (thisName.size() > 0) - thisName.erase(thisName.end() - 1); - params.push_back(FunctionParameter("this", Type::PointerType(m_arch, Type::NamedType(thisName, Type::VoidType())), DefaultLocationSource, {})); - } + MSVC_TRACE("\tDemangle Function Parameters {}", m_reader.GetRaw()); + _STD_VECTOR params; - DemangleVariableList(params, m_backrefList); + DemangleVariableList(params, nameBackrefList); + m_reader.ConsumeIf('Z'); - if (params.size() >= 1 && params.back().type->GetClass() == VoidTypeClass) + if (!params.empty() && params.back().type && params.back().type->GetClass() == VoidTypeClass) params.pop_back(); - // TODO: fix calling convention - Ref returnTypeObj; - if (shouldHaveReturnType) - returnTypeObj = returnType.Finalize(); - else - returnTypeObj = Type::VoidType(); - TypeBuilder newType = TypeBuilder::FunctionType(returnTypeObj, nullptr, params); + if (!shouldHaveReturnType) + returnType = DemangledTypeNode::VoidType(); + DemangledTypeNode newType = DemangledTypeNode::FunctionType(std::move(returnType), nullptr, std::move(params)); newType.SetConst(_const); newType.SetVolatile(_volatile); - newType.SetPointerSuffix(suffix); + newType.SetPointerSuffixBits(suffix); newType.SetNameType(classFunctionType); newType.SetCallingConventionName(cc); - auto convention = GetCallingConventionForType(cc); - if (convention) - newType.SetCallingConvention(convention); - m_logger->LogDebug("Successfully Created Function Type!\n"); - return newType; + MSVC_TRACE("Successfully Created Function Type!"); + return {std::move(newType), std::move(thunkAdjustor)}; } -TypeBuilder Demangle::DemangleData() +DemangledTypeNode Demangle::DemangleData(BackrefList& varList) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; - QualifiedName name; - //m_logger->Indent(); - TypeBuilder newType = DemangleVarType(m_backrefList, false, name); - //m_logger->Dedent(); + DemangledTypeNode newType = DemangleVarType(varList, false); auto suffix = DemanglePointerSuffix(); DemangleModifiers(_const, _volatile, isMember); - newType.SetConst(_const); - newType.SetVolatile(_volatile); - newType.SetPointerSuffix(suffix); + if (newType.GetClass() == PointerTypeClass) + { + newType.AddPointerSuffixBits(suffix); + newType.AddQualifiersToPointerChild(_const, _volatile); + } + else + { + newType.SetConst(_const); + newType.SetVolatile(_volatile); + newType.SetPointerSuffixBits(suffix); + } return newType; } -TypeBuilder Demangle::DemanagleRTTI(BNNameType nameType) +DemangledTypeNode Demangle::DemangleRTTI(BNNameType nameType, const NameList& symbolName) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; - if (reader.Length() > 0) + if (m_reader.Length() > 0) DemangleModifiers(_const, _volatile, isMember); - QualifiedName typeName = m_varName; - m_logger->LogDebug("new struct type\n"); - TypeBuilder newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, typeName)); + NameList typeName = symbolName; + MSVC_TRACE("new struct type"); + DemangledTypeNode newType = DemangledTypeNode::NamedType(StructNamedTypeClass, typeName); newType.SetNameType(nameType); newType.SetConst(_const); newType.SetVolatile(_volatile); - m_logger->LogDebug("log: %s\n", newType.GetString().c_str()); + MSVC_TRACE("log: {}", newType.GetString()); return newType; } -TypeBuilder Demangle::DemangleVTable() +DemangledTypeNode Demangle::DemangleVTable(BackrefList& nameBackrefList, NameList& symbolName) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; DemangleModifiers(_const, _volatile, isMember); - TypeBuilder newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, m_varName)); - if (reader.Peek() != '@') + DemangledTypeNode newType = DemangledTypeNode::NamedType(StructNamedTypeClass, symbolName); + if (m_reader.PeekOr() != '@') { - QualifiedName typeName; + NameList typeName; BNNameType classFunctionType = NoNameType; - DemangleName(typeName, classFunctionType, m_backrefList); - string suffix = m_varName.back(); - m_varName.back() += "{for `" + typeName.GetString() + "'}"; + DemangleName(typeName, classFunctionType, nameBackrefList, true); + if (symbolName.empty()) + throw DemangleException("VTable name missing suffix"); + DemangledNamePart suffix = symbolName.back(); + AppendToLastNameSegment(symbolName, "{for `" + JoinNameList(typeName) + "'}"); typeName.push_back(suffix); - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, typeName)); + newType = DemangledTypeNode::NamedType(StructNamedTypeClass, typeName); } newType.SetConst(_const); newType.SetVolatile(_volatile); @@ -1697,164 +2291,456 @@ TypeBuilder Demangle::DemangleVTable() } +// ??__E (dynamic initializer) / ??__F (dynamic atexit destructor). +// +// LLVM dispatches these at the top level via demangleSpecialIntrinsic --> +// demangleInitFiniStub. The mangling wraps another symbol (either a variable +// or a function) and emits a new function stub that initializes/destroys it: +// +// ??__E function form, e.g. ??__Efoo@@YAXXZ +// ??__E?@@ variable form, e.g. ??__E?foo@@3HA@@YAXXZ +// +// LLVM's output places the descriptor (`dynamic initializer for ''`) +// at file scope — not as a member of the target's enclosing class — and +// interpolates the target name inside backticks/quotes. For the variable +// form, it additionally renders the variable's type inside the inner +// backtick pair: `dynamic initializer for `int foo''. +Demangle::DemangleContext Demangle::DemangleDynamicInitFini(bool isDtor, BackrefList& backrefList) +{ + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + + // /d2FH4 may replace a long wrapped target with an MD5 name (??@@). + // Parse it before the optional '?' marker below; otherwise the first '?' + // of the hash spelling is mistaken for IsKnownStaticDataMember. + NameList innerNameList; + BNNameType innerClassFunctionType = NoNameType; + bool isMD5Name = false; + if (m_reader.ConsumeIf("??@")) + { + _STD_STRING hash = m_reader.ReadUntil('@'); + innerNameList.push_back(MakeNameSegment("??@" + hash + "@")); + isMD5Name = true; + } + + // Optional leading '?' flags the "known static data member" form. LLVM + // calls this IsKnownStaticDataMember — when present, the mangling is + // required to carry two trailing '@' before the outer function encoding + // rather than one. + bool isKnownStaticDataMember = false; + if (!isMD5Name && m_reader.ConsumeIf('?')) + { + isKnownStaticDataMember = true; + } + + // Parse the inner symbol's qualified name exactly as any other symbol + // would. DemangleName handles locally-scoped pieces, anonymous namespaces, + // templates, etc. so a target like + // instance@?1??Get@Globals@@SAAEAU1@XZ@ + // resolves correctly. + if (!isMD5Name) + DemangleName(innerNameList, innerClassFunctionType, backrefList); + + const char* prefix = isDtor + ? "`dynamic atexit destructor for " + : "`dynamic initializer for "; + BNNameType classFunctionType = isDtor + ? DynamicAtExitDestructorNameType + : DynamicInitializerNameType; + + _STD_STRING descriptor; + + if (m_reader.Length() == 0) + throw DemangleException("Truncated ??__E/??__F"); + + char next = m_reader.Peek(); + if (next >= '0' && next <= '4') + { + // Variable form: <@-terminators> + // . We don't attach the storage class to + // anything — it exists only to disambiguate variable-vs-function + // inside the wrapper and to match the mangling grammar. + m_reader.Consume(); // storage class + DemangledTypeNode varType = DemangleData(backrefList); + _STD_STRING varTypeStr = varType.GetString(); + _STD_STRING innerJoined = JoinNameList(innerNameList); + descriptor = _STD_STRING(prefix) + "`" + varTypeStr + " " + innerJoined + "''"; + + // Consume the @-terminators between the inner variable encoding and + // the outer function encoding. LLVM requires two when the optional + // leading '?' was present, one otherwise. + int atCount = isKnownStaticDataMember ? 2 : 1; + for (int i = 0; i < atCount; i++) + { + if (m_reader.Length() == 0 || m_reader.Read() != '@') + throw DemangleException("Expected '@' terminator in ??__E/??__F variable form"); + } + } + else + { + // Function form: the inner symbol's function encoding follows + // directly. The outer stub reuses that encoding (there's no separate + // outer signature). + if (isKnownStaticDataMember) + throw DemangleException("??__E/??__F with leading '?' but no variable form"); + if (isMD5Name) + { + while (m_reader.ConsumeIf('@')) + { + } + } + _STD_STRING innerJoined = JoinNameList(innerNameList); + descriptor = _STD_STRING(prefix) + "'" + innerJoined + "''"; + } + + // Replace the symbol's qualified name with just the descriptor — this is + // what puts the output at file scope with no enclosing class prefix. + NameList descriptorName = { MakeNameSegment(descriptor) }; + + auto parseOuterFunction = [&](bool pointerSuffix, int funcClass, BNMemberAccess access, BNMemberScope scope) { + DemangledFunction function = DemangleFunction(classFunctionType, pointerSuffix, backrefList, funcClass); + ApplySymbolFunctionContext(function, descriptorName, classFunctionType, funcClass); + return DemangleContext{std::move(descriptorName), std::move(function.type), access, scope}; + }; + + // Parse the outer function encoding. MSVC emits a global cdecl stub + // ('Y'/'Z') in practice but we dispatch through the full table for + // robustness (private/public/static/etc.). + if (m_reader.Length() == 0) + throw DemangleException("Truncated ??__E/??__F outer function encoding"); + switch (char funcType = m_reader.Read()) + { + case 'A': //fall through + case 'B': return parseOuterFunction(true, PrivateFunctionClass, PrivateAccess, NoScope ); + case 'C': //fall through + case 'D': return parseOuterFunction(false, PrivateFunctionClass | StaticFunctionClass, PrivateAccess, StaticScope); + case 'I': //fall through + case 'J': return parseOuterFunction(true, ProtectedFunctionClass, ProtectedAccess, NoScope ); + case 'K': //fall through + case 'L': return parseOuterFunction(false, ProtectedFunctionClass | StaticFunctionClass, ProtectedAccess, StaticScope); + case 'Q': //fall through + case 'R': return parseOuterFunction(true, PublicFunctionClass, PublicAccess, NoScope ); + case 'S': //fall through + case 'T': return parseOuterFunction(false, PublicFunctionClass | StaticFunctionClass, PublicAccess, StaticScope); + case 'Y': //fall through + case 'Z': return parseOuterFunction(false, GlobalFunctionClass, NoAccess, NoScope ); + default: + throw DemangleException(_STD_STRING("Unexpected outer function type '") + funcType + "' in ??__E/??__F"); + } +} + Demangle::DemangleContext Demangle::DemangleSymbol() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - //m_logger->Indent(); + return DemangleSymbol(m_backrefList); +} + + +Demangle::DemangleContext Demangle::DemangleSymbol(BackrefList& backrefList) +{ + NestingGuard nestingGuard(*this); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); BNNameType classFunctionType = NoNameType; - QualifiedName varName; + NameList varName; - if (reader.Peek() == '.') + if (m_reader.ConsumeIf('.')) { - reader.Consume(); - - return { DemangleTypeInfoName(), NoAccess, NoScope }; + NameList typeInfoName; + DemangledTypeNode type = DemangleTypeInfoName(typeInfoName); + return { std::move(typeInfoName), std::move(type), NoAccess, NoScope }; } - if (reader.Read() != '?') + if (m_reader.Read() != '?') { throw DemangleException(); } - DemangleName(varName, classFunctionType, m_backrefList); - m_logger->LogDebug("Done demangling Name: '%s' - '%s'", varName.GetString().c_str(), reader.GetRaw()); - m_varName = varName; + // MD5-hashed names: ??@<32hex>@ + if (m_reader.ConsumeIf("?@")) + { + _STD_STRING hash = m_reader.ReadUntil('@'); + NameList md5Name = { MakeNameSegment("??@" + hash + "@") }; + return { std::move(md5Name), DemangledTypeNode::VoidType(), NoAccess, NoScope }; + } + + // Special intrinsics dispatched at the top level (matches LLVM's + // demangleSpecialIntrinsic). ??__E/??__F have a non-uniform grammar + // that the normal DemangleName scope-chain loop can't express — the + // bytes after the code are a wrapped inner symbol, not scope prefixes. + if (m_reader.ConsumeIf("?__E")) + return DemangleDynamicInitFini(false, backrefList); + if (m_reader.ConsumeIf("?__F")) + return DemangleDynamicInitFini(true, backrefList); + + DemangleName(varName, classFunctionType, backrefList); + MSVC_TRACE("Done demangling Name: '{}' - '{}'", JoinNameList(varName), m_reader.GetRaw()); DemangleContext context; + auto setContext = [&](DemangledTypeNode type, BNMemberAccess access, BNMemberScope scope) { + context.type = std::move(type); + context.access = access; + context.scope = scope; + }; + auto finishContext = [&]() { + context.name = std::move(varName); + return std::move(context); + }; if (classFunctionType == StringNameType) { - context = { DemangleString(), NoAccess, NoScope }; - return context; - } - - char funcType = reader.Read(); - switch(funcType) - { - case '0': context = {DemangleData(), PrivateAccess, StaticScope }; break; - case '1': context = {DemangleData(), ProtectedAccess, StaticScope }; break; - case '2': context = {DemangleData(), PublicAccess, StaticScope }; break; - case '3': context = {DemangleData(), NoAccess, NoScope }; break; - case '4': context = {DemangleData(), NoAccess, NoScope }; break; - case '5': context = {DemangleVTable(), NoAccess, NoScope }; break; - case '6': context = {DemangleVTable(), NoAccess, NoScope }; break; - case '7': context = {DemangleVTable(), NoAccess, NoScope }; break; - case '8': context = {DemanagleRTTI(classFunctionType), NoAccess, NoScope }; break; - case '9': context = {DemanagleRTTI(classFunctionType), NoAccess, NoScope }; break; - case 'A': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; break; - case 'B': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; break; - case 'C': context = {DemangleFunction(classFunctionType, false, m_backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; break; - case 'D': context = {DemangleFunction(classFunctionType, false, m_backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; break; - case 'E': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | VirtualFunctionClass), PrivateAccess, VirtualScope}; break; - case 'F': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | VirtualFunctionClass), PrivateAccess, VirtualScope}; break; - case 'G': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | StaticThunkFunctionClass), PrivateAccess, ThunkScope }; break; - case 'H': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | StaticThunkFunctionClass), PrivateAccess, ThunkScope }; break; - case 'I': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; break; - case 'J': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; break; - case 'K': context = {DemangleFunction(classFunctionType, false, m_backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; break; - case 'L': context = {DemangleFunction(classFunctionType, false, m_backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; break; - case 'M': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | VirtualFunctionClass), ProtectedAccess, VirtualScope}; break; - case 'N': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | VirtualFunctionClass), ProtectedAccess, VirtualScope}; break; - case 'O': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | StaticThunkFunctionClass), ProtectedAccess, ThunkScope }; break; - case 'P': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | StaticThunkFunctionClass), ProtectedAccess, ThunkScope }; break; - case 'Q': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass), PublicAccess, NoScope }; break; - case 'R': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass), PublicAccess, NoScope }; break; - case 'S': context = {DemangleFunction(classFunctionType, false, m_backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; break; - case 'T': context = {DemangleFunction(classFunctionType, false, m_backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; break; - case 'U': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | VirtualFunctionClass), PublicAccess, VirtualScope}; break; - case 'V': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | VirtualFunctionClass), PublicAccess, VirtualScope}; break; - case 'W': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | StaticThunkFunctionClass), PublicAccess, ThunkScope }; break; - case 'X': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | StaticThunkFunctionClass), PublicAccess, ThunkScope }; break; - case 'Y': context = {DemangleFunction(classFunctionType, false, m_backrefList, GlobalFunctionClass), NoAccess, NoScope }; break; - case 'Z': context = {DemangleFunction(classFunctionType, false, m_backrefList, GlobalFunctionClass), NoAccess, NoScope }; break; + setContext(DemangleString(varName), NoAccess, NoScope); + return finishContext(); + } + + // ??__J (local static thread guard) and local-scope ??_B guards are + // variables, not functions. The storage marker is '4' (not visible) or '5' + // (visible). Some local guard names then carry a one-digit local ordinal + // instead of a type encoding, e.g. `...@51` -> `{2}`. + char nextSymbolByte = m_reader.PeekOr(); + if ((classFunctionType == LocalStaticThreadGuardNameType) + || (classFunctionType == LocalStaticGuardNameType && m_reader.Length() >= 2 + && (nextSymbolByte == '4' || nextSymbolByte == '5') + && m_reader.PeekAt(1) >= '0' && m_reader.PeekAt(1) <= '9')) + { + if (m_reader.Length() == 0) + throw DemangleException("Truncated local static guard"); + char next = m_reader.Read(); + if (next != '4' && next != '5') + throw DemangleException("local static guard requires variable storage class ('4' or '5'), got '" + _STD_STRING(1, next) + "'"); + if (char next = m_reader.PeekOr(); next >= '0' && next <= '9') + { + int64_t guardOrdinal = m_reader.Read() - '0' + 1; + AppendToLastNameSegment(varName, "{" + to_string(guardOrdinal) + "}"); + setContext(DemangledTypeNode::IntegerType(4, false), NoAccess, NoScope); + return finishContext(); + } + setContext(DemangleData(backrefList), NoAccess, NoScope); + return finishContext(); + } + + auto setDataContext = [&](BNMemberAccess access, BNMemberScope scope) { + setContext(DemangleData(backrefList), access, scope); + }; + auto setFunctionContext = [&](bool pointerSuffix, int funcClass, BNMemberAccess access, BNMemberScope scope) { + DemangledFunction function = DemangleFunction(classFunctionType, pointerSuffix, backrefList, funcClass); + ApplySymbolFunctionContext(function, varName, classFunctionType, funcClass); + setContext(std::move(function.type), access, scope); + }; + + switch(char funcType = m_reader.Read()) + { + case '0': setDataContext(PrivateAccess, StaticScope); break; + case '1': setDataContext(ProtectedAccess, StaticScope); break; + case '2': setDataContext(PublicAccess, StaticScope); break; + case '3': //fall through + case '4': setDataContext(NoAccess, NoScope ); break; + case '5': //fall through + case '6': //fall through + case '7': + setContext(DemangleVTable(backrefList, varName), NoAccess, NoScope); + break; + case '8': //fall through + case '9': + setContext(DemangleRTTI(classFunctionType, varName), NoAccess, NoScope); + break; + case 'A': //fall through + case 'B': setFunctionContext(true, PrivateFunctionClass, PrivateAccess, NoScope ); break; + case 'C': //fall through + case 'D': setFunctionContext(false, PrivateFunctionClass | StaticFunctionClass, PrivateAccess, StaticScope ); break; + case 'E': //fall through + case 'F': setFunctionContext(true, PrivateFunctionClass | VirtualFunctionClass, PrivateAccess, VirtualScope); break; + case 'G': //fall through + case 'H': setFunctionContext(true, PrivateFunctionClass | StaticThunkFunctionClass, PrivateAccess, ThunkScope ); break; + case 'I': //fall through + case 'J': setFunctionContext(true, ProtectedFunctionClass, ProtectedAccess, NoScope ); break; + case 'K': //fall through + case 'L': setFunctionContext(false, ProtectedFunctionClass | StaticFunctionClass, ProtectedAccess, StaticScope ); break; + case 'M': //fall through + case 'N': setFunctionContext(true, ProtectedFunctionClass | VirtualFunctionClass, ProtectedAccess, VirtualScope); break; + case 'O': //fall through + case 'P': setFunctionContext(true, ProtectedFunctionClass | StaticThunkFunctionClass, ProtectedAccess, ThunkScope ); break; + case 'Q': //fall through + case 'R': setFunctionContext(true, PublicFunctionClass, PublicAccess, NoScope ); break; + case 'S': //fall through + case 'T': setFunctionContext(false, PublicFunctionClass | StaticFunctionClass, PublicAccess, StaticScope ); break; + case 'U': //fall through + case 'V': setFunctionContext(true, PublicFunctionClass | VirtualFunctionClass, PublicAccess, VirtualScope); break; + case 'W': //fall through + case 'X': setFunctionContext(true, PublicFunctionClass | StaticThunkFunctionClass, PublicAccess, ThunkScope ); break; + case 'Y': //fall through + case 'Z': setFunctionContext(false, GlobalFunctionClass, NoAccess, NoScope ); break; case '$': { + if (m_reader.ConsumeIf('B')) + { + // Vcall thunk: $B + uint64_t offset = DecodeEncodedUnsignedNumber(); + if (varName.empty()) + throw DemangleException("Vcall thunk missing name"); + varName.back() = MakeNameSegment("`vcall'{" + to_string(offset) + ", {flat}}'"); + // Consume calling convention char + this-type flag char + if (m_reader.Length() >= 1) + m_reader.Consume(); // calling convention (A=cdecl, etc.) + char next = m_reader.PeekOr(); + if (next != '\0' && next != '@') + m_reader.Consume(); // this-type flag + setContext(DemangledTypeNode::VoidType(), NoAccess, NoScope); + break; + } int funcClass = VirtualThunkFunctionClass; - if (reader.Peek() == 'R') + if (m_reader.ConsumeIf('R')) { - reader.Consume(); funcClass |= VirtualThunkExFunctionClass; } - char thunkType = reader.Read(); - switch (thunkType) + switch (char thunkType = m_reader.Read()) { - case '0': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PrivateFunctionClass), PrivateAccess, ThunkScope}; break; - case '1': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PrivateFunctionClass), PrivateAccess, ThunkScope}; break; - case '2': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | ProtectedFunctionClass), ProtectedAccess, ThunkScope}; break; - case '3': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | ProtectedFunctionClass), ProtectedAccess, ThunkScope}; break; - case '4': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PublicFunctionClass), PublicAccess, ThunkScope}; break; - case '5': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PublicFunctionClass), PublicAccess, ThunkScope}; break; - default: throw DemangleException("Unknown virtual thunk type " + string(1, thunkType)); + case '0': //fall through + case '1': setFunctionContext(true, funcClass | VirtualFunctionClass | PrivateFunctionClass, PrivateAccess, ThunkScope); break; + case '2': //fall through + case '3': setFunctionContext(true, funcClass | VirtualFunctionClass | ProtectedFunctionClass, ProtectedAccess, ThunkScope); break; + case '4': //fall through + case '5': setFunctionContext(true, funcClass | VirtualFunctionClass | PublicFunctionClass, PublicAccess, ThunkScope); break; + default: throw DemangleException("Unknown virtual thunk type " + _STD_STRING(1, thunkType)); } break; } - default: throw DemangleException("Unknown function type " + string(1, funcType)); + default: throw DemangleException("Unknown function type " + _STD_STRING(1, funcType)); } - return context; + return finishContext(); } -bool Demangle::DemangleMS(Architecture* arch, const string& mangledName, Ref& outType, - QualifiedName& outVarName, const Ref& view) +std::pair, QualifiedName> Demangle::Finalize(BinaryView* view) { - outType = nullptr; - if (mangledName.empty() || (mangledName[0] != '?' && mangledName[0] != '.')) - return false; - return DemangleMS(arch, mangledName, outType, outVarName); + DemangleContext context = DemangleSymbol(); + if (m_reader.Length() != 0) + LogDebugF("Demangling Succeeded with trailing characters '{}' in '{}'", m_reader.GetRaw(), m_mangledName); + + Ref platform = m_platform; + if (!platform && view) + platform = view->GetDefaultPlatform(); + + Architecture* arch = m_arch; +#ifdef BINARYNINJACORE_LIBRARY + if (!arch && platform) + arch = platform->GetArchitecture(); + if (!arch && view) + arch = view->GetDefaultArchitecture(); +#else + Ref viewArch; + Ref platformArch; + if (!arch && platform) + { + platformArch = platform->GetArchitecture(); + arch = platformArch.GetPtr(); + } + if (!arch && view) + { + viewArch = view->GetDefaultArchitecture(); + arch = viewArch.GetPtr(); + } +#endif + if (!arch) + throw DemangleException(); + + if (!platform) + platform = arch->GetStandalonePlatform(); + + return {context.type.Finalize(platform.GetPtr()), QualifiedName(FinalizeNameList(context.name))}; } -bool Demangle::DemangleMS(Architecture* arch, const string& mangledName, Ref& outType, - QualifiedName& outVarName, BinaryView* view) +std::pair, QualifiedName> Demangle::Finalize() { - outType = nullptr; - if (mangledName.empty() || (mangledName[0] != '?' && mangledName[0] != '.')) - return false; - return DemangleMS(arch, mangledName, outType, outVarName); + return Finalize(m_view.GetPtr()); } -bool Demangle::DemangleMS(Architecture* arch, const string& mangledName, Ref& outType, - QualifiedName& outVarName) +template +static bool DemangleMSImpl(const _STD_STRING& mangledName, Ref& outType, QualifiedName& outVarName, + DemangleBody&& demangleBody) { outType = nullptr; if (mangledName.empty() || (mangledName[0] != '?' && mangledName[0] != '.')) return false; + try { - Demangle demangle(arch, mangledName); - // For now we're throwing away MemberScope and MemberAccess - outType = demangle.DemangleSymbol().type.Finalize(); - outVarName = demangle.GetVarName(); - + auto result = demangleBody(); + outType = std::move(result.first); + outVarName = std::move(result.second); + return true; } - catch (DemangleException &e) + catch (DemangleException& e) { - LogDebugForException(e, "Demangling Failed '%s' '%s;", mangledName.c_str(), e.what()); + LogDebugF("Demangling Failed '{}' '{}'", mangledName, e.what()); + return false; + } + catch (std::exception& e) + { + LogDebugF("Demangling Failed '{}' '{}'", mangledName, e.what()); return false; } - return true; } - -bool Demangle::DemangleMS(const string& mangledName, Ref& outType, +bool Demangle::DemangleMS(Architecture* arch, const _STD_STRING& mangledName, Ref& outType, QualifiedName& outVarName, const Ref& view) +{ + if (view) + { + return DemangleMSImpl(mangledName, outType, outVarName, [&]() { + Demangle demangle(arch, mangledName); + return demangle.Finalize(view.GetPtr()); + }); + } + return DemangleMS(arch, mangledName, outType, outVarName); +} + +bool Demangle::DemangleMS(Architecture* arch, const _STD_STRING& mangledName, Ref& outType, + QualifiedName& outVarName, BinaryView* view) +{ + if (view) + return DemangleMS(arch, mangledName, outType, outVarName, Ref(view)); + return DemangleMS(arch, mangledName, outType, outVarName); +} + +bool Demangle::DemangleMS(Platform* platform, const _STD_STRING& mangledName, Ref& outType, + QualifiedName& outVarName) { outType = nullptr; - if (mangledName.empty() || (mangledName[0] != '?' && mangledName[0] != '.')) + if (!platform) return false; - try - { + + return DemangleMSImpl(mangledName, outType, outVarName, [&]() { + Demangle demangle(Ref(platform), mangledName); + return demangle.Finalize(); + }); +} + +bool Demangle::DemangleMS(Architecture* arch, const _STD_STRING& mangledName, Ref& outType, + QualifiedName& outVarName) +{ + return DemangleMSImpl(mangledName, outType, outVarName, [&]() { + thread_local Demangle demangle(arch, mangledName); + demangle.Reset(arch, mangledName); + return demangle.Finalize(); + }); +} + + +bool Demangle::DemangleMS(const _STD_STRING& mangledName, Ref& outType, + QualifiedName& outVarName, const Ref& view) +{ + return DemangleMSImpl(mangledName, outType, outVarName, [&]() { + // Can't use thread_local here — BinaryView overload needs platform/view state Demangle demangle(view, mangledName); - // For now we're throwing away MemberScope and MemberAccess - outType = demangle.DemangleSymbol().type.Finalize(); - outVarName = demangle.GetVarName(); + return demangle.Finalize(); + }); +} - } - catch (DemangleException &e) - { - LogDebugForException(e, "Demangling Failed '%s' '%s;", mangledName.c_str(), e.what()); +bool Demangle::DemangleMS(const _STD_STRING& mangledName, Ref& outType, + QualifiedName& outVarName, BinaryView* view) +{ + outType = nullptr; + if (!view) return false; - } - return true; + return DemangleMS(mangledName, outType, outVarName, Ref(view)); } @@ -1864,18 +2750,18 @@ class MSDemangler: public Demangler MSDemangler(): Demangler("MS") { } - ~MSDemangler() override {} + ~MSDemangler() override = default; - virtual bool IsMangledString(const string& name) override + bool IsMangledString(const _STD_STRING& name) override { - return name[0] == '?'; + return !name.empty() && (name[0] == '?' || name[0] == '.'); } #ifdef BINARYNINJACORE_LIBRARY - virtual bool Demangle(Architecture* arch, const string& name, Ref& outType, QualifiedName& outVarName, + bool Demangle(Architecture* arch, const _STD_STRING& name, Ref& outType, QualifiedName& outVarName, BinaryView* view) override #else - virtual bool Demangle(Ref arch, const string& name, Ref& outType, QualifiedName& outVarName, + virtual bool Demangle(Ref arch, const _STD_STRING& name, Ref& outType, QualifiedName& outVarName, Ref view) override #endif { @@ -1899,7 +2785,7 @@ extern "C" BINARYNINJAPLUGIN bool CorePluginInit() #endif { - static MSDemangler* demangler = new MSDemangler(); + static auto demangler = new MSDemangler(); Demangler::Register(demangler); return true; } diff --git a/demangler/msvc/demangle_msvc.h b/demangler/msvc/demangle_msvc.h index c2eeb79f61..168aa6de73 100644 --- a/demangler/msvc/demangle_msvc.h +++ b/demangler/msvc/demangle_msvc.h @@ -13,8 +13,9 @@ // limitations under the License. #pragma once -#include #include +#include +#include // XXX: Compiled directly into the core for performance reasons // Will still work fine compiled independently, just at about a @@ -25,47 +26,33 @@ #include "architecture.h" #include "binaryview.h" #include "demangle.h" -#include "unicode.h" #define BN BinaryNinjaCore #define _STD_STRING BinaryNinjaCore::string #define _STD_VECTOR BinaryNinjaCore::vector -#define _STD_SET BinaryNinjaCore::set #else #include "binaryninjaapi.h" #define BN BinaryNinja #define _STD_STRING std::string #define _STD_VECTOR std::vector -#define _STD_SET std::set +#endif + +#ifdef BINARYNINJACORE_LIBRARY +#include "demangler/gnu3/demangled_type_node.h" +#else +#include "../gnu3/demangled_type_node.h" #endif class DemangleException: public std::exception { _STD_STRING m_message; public: - DemangleException(_STD_STRING msg="Attempt to read beyond bounds or missing expected character"): m_message(msg){} - virtual const char* what() const noexcept { return m_message.c_str(); } + DemangleException(_STD_STRING msg="Attempt to read beyond bounds or missing expected character"): m_message(std::move(msg)){} + [[nodiscard]] const char* what() const noexcept override { return m_message.c_str(); } }; class Demangle { - enum NameType - { - NameEmpty, - NameString, - NameLookup, - NameBackref, - NameTemplate, - NameConstructor, - NameDestructor, - NameRtti, - NameReturn, - NameDynamicInitializer, - NameDynamicAtExitDestructor, - NameLocalStaticThreadGuard, - NameLocalVftable - }; - enum FunctionClass { NoneFunctionClass = 0, @@ -81,93 +68,302 @@ class Demangle VirtualThunkExFunctionClass = 1 << 9, }; +public: + struct DemangleContext + { + DemangledQualifiedName name; + DemangledTypeNode type; + BNMemberAccess access; + BNMemberScope scope; + }; + +private: class Reader { public: - Reader(_STD_STRING data); - _STD_STRING PeekString(size_t count=1); - char Peek(); - const char* GetRaw(); - char Read(); - _STD_STRING ReadString(size_t count=1); - _STD_STRING ReadUntil(char sentinal); - void Consume(size_t count=1); - size_t Length(); + Reader(const _STD_STRING& data) + { + Reset(data); + } + void Reset(const _STD_STRING& data) + { + m_ptr = data.c_str(); + m_end = data.c_str() + data.size(); + ValidatePrintableAscii(); + } + bool PeekMatch(const char* str, size_t len) const + { + if (len > Length()) + return false; + return memcmp(m_ptr, str, len) == 0; + } + [[nodiscard]] char PeekAt(size_t offset) const + { + if (offset >= Length()) + throw DemangleException(); + return m_ptr[offset]; + } + [[nodiscard]] char Peek() const + { + if (m_ptr >= m_end) + throw DemangleException(); + return *m_ptr; + } + [[nodiscard]] char PeekOr(char fallback = '\0') const + { + if (Length() == 0) + return fallback; + return *m_ptr; + } + [[nodiscard]] const char* GetRaw() const { return m_ptr; } + void SetRaw(const char* p) { m_ptr = p; } + [[nodiscard]] char Read() + { + if (m_ptr >= m_end) + throw DemangleException(); + return *m_ptr++; + } + bool ConsumeIf(char ch) + { + if (PeekOr() != ch) + return false; + Consume(); + return true; + } + bool ConsumeIf(const char* str, size_t len) + { + if (!PeekMatch(str, len)) + return false; + Consume(len); + return true; + } + template + bool ConsumeIf(const char (&str)[N]) + { + return ConsumeIf(str, N - 1); + } + void Consume(size_t count = 1) + { + if (count > Length()) + throw DemangleException(); + m_ptr += count; + } + [[nodiscard]] size_t Length() const { return static_cast(m_end - m_ptr); } + _STD_STRING ReadString(size_t count); + _STD_STRING ReadUntil(char sentinel); private: - _STD_STRING m_data; + void ValidatePrintableAscii() const + { + for (const char* p = m_ptr; p < m_end; p++) + if (*p < 0x20 || *p > 0x7e) + throw DemangleException(); + } + const char* m_ptr; + const char* m_end; }; class BackrefList { public: - _STD_VECTOR typeList; - _STD_VECTOR<_STD_STRING> nameList; - const BN::TypeBuilder& GetTypeBackref(size_t reference); - _STD_STRING GetStringBackref(size_t reference); - void PushTypeBackref(BN::TypeBuilder t); - void PushStringBackref(_STD_STRING& s); - void PushFrontStringBackref(_STD_STRING& s); + _STD_VECTOR typeList; + _STD_VECTOR nameList; + _STD_VECTOR templateList; + void Clear() { typeList.clear(); nameList.clear(); templateList.clear(); } + DemangledTypeNode::NodeRef GetTypeBackrefRef(size_t reference); + DemangledNamePart::Ref GetNameBackrefRef(size_t reference); + const DemangledTypeNode& GetTypeBackref(size_t reference); + const DemangledNamePart& GetNameBackref(size_t reference); + DemangledTypeNode::NodeRef PushTypeBackref(DemangledTypeNode::NodeRef t); + DemangledTypeNode::NodeRef PushTypeBackref(const DemangledTypeNode& t); + DemangledTypeNode::NodeRef PushTypeBackref(DemangledTypeNode&& t); + DemangledNamePart::Ref PushNameBackref(DemangledNamePart::Ref t); + DemangledNamePart::Ref PushNameBackref(const DemangledNamePart& t); + DemangledNamePart::Ref PushNameBackref(DemangledNamePart&& t); + DemangledNamePart::Ref PushTemplateSpecialization(DemangledNamePart::Ref t); + DemangledNamePart::Ref PushTemplateSpecialization(const DemangledNamePart& t); + DemangledNamePart::Ref PushTemplateSpecialization(DemangledNamePart&& t); }; - Reader reader; + struct BackrefContextSwitch + { + BackrefList& active; + BackrefList saved; + + BackrefContextSwitch(BackrefList& active); + BackrefContextSwitch(const BackrefContextSwitch&) = delete; + BackrefContextSwitch& operator=(const BackrefContextSwitch&) = delete; + ~BackrefContextSwitch(); + + static void Swap(BackrefList& left, BackrefList& right); + }; + + // Internal name list type - keeps template names structured during parsing. + using NameList = _STD_VECTOR; + + static DemangledNamePart MakeNameSegment(const _STD_STRING& s) + { + return DemangledNamePart(s); + } + + static void AppendToLastNameSegment(NameList& nl, const _STD_STRING& suffix) + { + if (nl.empty()) + throw DemangleException(); + nl.back() = MakeNameSegment(nl.back().GetString() + suffix); + } + + static _STD_STRING JoinNameList(const NameList& nl) + { + if (nl.empty()) return {}; + if (nl.size() == 1) return nl[0].GetString(); + + size_t size = 2 * (nl.size() - 1); + for (const auto& name : nl) + size += name.GetString().size(); + + _STD_STRING out; + out.reserve(size); + out = nl[0].GetString(); + for (size_t i = 1; i < nl.size(); i++) + { + out += ':'; + out += ':'; + out += nl[i].GetString(); + } + return out; + } + + static StringList FinalizeNameList(const NameList& nl) + { + StringList out; + out.reserve(nl.size()); + for (const auto& n: nl) + out.push_back(n.GetString()); + return out; + } + + _STD_STRING m_mangledName; // Owns the string; Reader points into it + Reader m_reader; BackrefList m_backrefList; BN::Architecture* m_arch; BN::Ref m_platform; BN::Ref m_view; - BN::QualifiedName m_varName; - BN::Ref m_logger; - - NameType GetNameType(); - BN::TypeBuilder DemangleVarType(BackrefList& varList, bool isReturn, BN::QualifiedName& name); - void DemangleNumber(int64_t& num); - void DemangleChar(char& ch); - void DemangleWideChar(uint16_t& wch); + size_t m_templateParamDepth = 0; + size_t m_nestingDepth = 0; + class NestingGuard + { + Demangle& m_demangler; + public: + NestingGuard(Demangle& demangler); + ~NestingGuard(); + }; + + static void RewriteTemplateBackrefName(NameList& typeName, const BackrefList& nameBackrefList); + static void PrependNameComponent(NameList& nameList, DemangledNamePart name); + void AppendStringName(NameList& nameList, BackrefList& nameBackrefList); + static void FinalizeConstructorTemplateName(NameList& nameList, size_t nameListSizeAtEntry, bool pending); + static bool FunctionTypeHasPointerSuffix(char functionType); + static _STD_STRING FormatFunctionScopeSignature(const DemangledTypeNode& type, const NameList& scopeName); + void AppendLocalScope(NameList& nameList, BackrefList& nameBackrefList, uint64_t scopeOrdinal, bool typeNameContext); + bool TryAppendLocalScopeAt(NameList& nameList, BackrefList& nameBackrefList, const char* encodedNumberStart, + bool typeNameContext); + _STD_STRING FormatTypeAndName(const DemangledTypeNode& type, const NameList& name) const; + enum class TypeBackrefMode + { + RecordTopLevel, + SuppressTopLevel, + }; + struct EncodedNumber + { + uint64_t magnitude; + bool negative; + }; + enum class ThunkAdjustorKind + { + Static, + Vtordisp, + Vtordispex, + }; + struct ThunkAdjustor + { + ThunkAdjustorKind kind = ThunkAdjustorKind::Static; + uint64_t adjustor = 0; + int32_t vbptrOffset = 0; + int32_t vbOffsetOffset = 0; + int32_t vtorDispOffset = 0; + uint64_t staticOffset = 0; + }; + struct DemangledFunction + { + DemangledTypeNode type; + std::optional thunkAdjustor; + }; + static bool FunctionClassNeedsImplicitThis(int funcClass); + static void AppendThunkAdjustorToName(NameList& nameList, const ThunkAdjustor& adjustor); + static void SetImplicitThisParameter(DemangledTypeNode& type, BNNameType classFunctionType, const NameList& enclosingName); + static void ApplySymbolFunctionContext(DemangledFunction& function, NameList& symbolName, + BNNameType classFunctionType, int funcClass); + DemangledTypeNode DemangleReferencedSymbolValue(BackrefList& varList); + DemangledTypeNode DemangleAutoNonTypeTemplateParam(BackrefList& varList); + DemangledTypeNode DemangleVarType(BackrefList& varList, bool isReturn, + bool includeImplicitThis = true, DemangledTypeNode::NodeRef* outTypeBackref = nullptr, + TypeBackrefMode typeBackrefMode = TypeBackrefMode::RecordTopLevel); + EncodedNumber DecodeEncodedNumber(); + int64_t DecodeEncodedSignedNumber(); + uint64_t DecodeEncodedUnsignedNumber(); + int32_t DecodeEncodedSignedInt32(); + _STD_STRING DecodeEncodedNumberLiteral(); + char DemangleChar(); void DemangleModifiers(bool& _const, bool& _volatile, bool& isMember); - _STD_SET DemanglePointerSuffix(); - void DemangleVariableList(_STD_VECTOR& paramList, BackrefList& varList); - void DemangleNameTypeRtti(BNNameType& classFunctionType, - BackrefList& nameBackrefList, - _STD_STRING& out, - _STD_STRING& rttiTypeName); + uint8_t DemanglePointerSuffix(); + void DemangleVariableList(_STD_VECTOR& paramList, BackrefList& varList, bool typeBackrefs = true); void DemangleTypeNameLookup(_STD_STRING& out, BNNameType& functionType); + bool TryDemangleWinRTEscapedScopeName(NameList& nameList, BackrefList& nameBackrefList); void DemangleNameTypeString(_STD_STRING& out); - void DemangleNameTypeBackref(_STD_STRING& out, const _STD_VECTOR<_STD_STRING>& backrefList); - void DemangleName(BN::QualifiedName& nameList, + void DemangleName(NameList& nameList, BNNameType& classFunctionType, - BackrefList& nameBackrefList); - BN::Ref GetCallingConventionForType(BNCallingConventionName ccName); + BackrefList& nameBackrefList, + bool typeNameContext = false); BNCallingConventionName DemangleCallingConvention(); - BN::TypeBuilder DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& varList, int funcClass = NoneFunctionClass); - BN::TypeBuilder DemangleData(); + void ConsumeExtendedModifierPrefix(); + DemangledFunction DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& varList, + int funcClass = NoneFunctionClass); + DemangledTypeNode DemangleData(BackrefList& varList); void DemangleNameTypeRtti(BNNameType& classFunctionType, BackrefList& nameBackrefList, _STD_STRING& out); - BN::TypeBuilder DemangleVTable(); - BN::TypeBuilder DemanagleRTTI(BNNameType classFunctionType); - _STD_STRING DemangleTemplateInstantiationName(BackrefList& nameBackrefList); - _STD_STRING DemangleTemplateParams(_STD_VECTOR& params, BackrefList& nameBackrefList, _STD_STRING& out); - _STD_STRING DemangleUnqualifiedSymbolName(BN::QualifiedName& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType); - BN::TypeBuilder DemangleString(); - BN::TypeBuilder DemangleTypeInfoName(); + DemangledTypeNode DemangleVTable(BackrefList& nameBackrefList, NameList& symbolName); + DemangledTypeNode DemangleRTTI(BNNameType classFunctionType, const NameList& symbolName); + DemangledNamePart DemangleTemplateInstantiationNameInLocalContext(BackrefList& nameBackrefList); + DemangledNamePart DemangleTemplateInstantiationName(BackrefList& nameBackrefList); + void DemangleTemplateParams(_STD_VECTOR& params, BackrefList& nameBackrefList, DemangledNamePart& out); + DemangledNamePart DemangleUnqualifiedSymbolName(BackrefList& nameBackrefList, BNNameType& classFunctionType, + bool& backrefEligible); + DemangledTypeNode DemangleString(NameList& symbolName); + DemangledTypeNode DemangleTypeInfoName(NameList& symbolName); + DemangleContext DemangleDynamicInitFini(bool isDtor, BackrefList& backrefList); + DemangleContext DemangleSymbol(BackrefList& backrefList); + std::pair, BN::QualifiedName> Finalize(BN::BinaryView* view); public: - struct DemangleContext - { - BN::TypeBuilder type; - BNMemberAccess access; - BNMemberScope scope; - }; - Demangle(BN::Architecture* arch, _STD_STRING mangledName); - Demangle(BN::Ref view, _STD_STRING mangledName); - Demangle(BN::Ref platform, _STD_STRING mangledName); + Demangle(BN::Architecture* arch, _STD_STRING mangledName); + Demangle(BN::Ref view, _STD_STRING mangledName); + Demangle(BN::Ref platform, _STD_STRING mangledName); + Demangle(const Demangle&) = delete; + Demangle(Demangle&&) = delete; + Demangle& operator=(const Demangle&) = delete; + Demangle& operator=(Demangle&&) = delete; + void Reset(BN::Architecture* arch, const _STD_STRING& mangledName); DemangleContext DemangleSymbol(); - BN::QualifiedName GetVarName() const { return m_varName; } + std::pair, BN::QualifiedName> Finalize(); // Be careful not to accidentally implicitly cast a BinaryView* to a bool static bool DemangleMS(BN::Architecture* arch, const _STD_STRING& mangledName, BN::Ref& outType, BN::QualifiedName& outVarName, const BN::Ref& view); static bool DemangleMS(BN::Architecture* arch, const _STD_STRING& mangledName, BN::Ref& outType, BN::QualifiedName& outVarName, BN::BinaryView* view); + static bool DemangleMS(BN::Platform* platform, const _STD_STRING& mangledName, BN::Ref& outType, + BN::QualifiedName& outVarName); static bool DemangleMS(BN::Architecture* arch, const _STD_STRING& mangledName, BN::Ref& outType, BN::QualifiedName& outVarName); @@ -176,4 +372,3 @@ class Demangle static bool DemangleMS(const _STD_STRING& mangledName, BN::Ref& outType, BN::QualifiedName& outVarName, BN::BinaryView* view); }; - diff --git a/plugins/pdb-ng/src/symbol_parser.rs b/plugins/pdb-ng/src/symbol_parser.rs index 125d7c88f7..c15566deab 100644 --- a/plugins/pdb-ng/src/symbol_parser.rs +++ b/plugins/pdb-ng/src/symbol_parser.rs @@ -39,7 +39,7 @@ use crate::PDBParserInstance; use binaryninja::architecture::{Architecture, ArchitectureExt, Register, RegisterId}; use binaryninja::binary_view::BinaryViewBase; use binaryninja::confidence::{Conf, MAX_CONFIDENCE, MIN_CONFIDENCE}; -use binaryninja::demangle::demangle_ms; +use binaryninja::demangle::demangle_ms_with_view; use binaryninja::rc::Ref; use binaryninja::types::{FunctionParameter, QualifiedName, StructureBuilder, Type, TypeClass}; use binaryninja::variable::{Variable, VariableSourceType}; @@ -1813,7 +1813,7 @@ impl<'a, S: Source<'a> + 'a> PDBParserInstance<'a, S> { raw_name: &String, rva: Rva, ) -> Result<(Option>>, Option)> { - let (mut t, mut name) = match demangle_ms(&self.arch, raw_name, true) { + let (mut t, mut name) = match demangle_ms_with_view(&self.arch, raw_name, Some(self.bv)) { Some((name, Some(t))) => (Some(Conf::new(t, DEMANGLE_CONFIDENCE)), name), Some((name, _)) => (None, name), _ => (None, QualifiedName::new(vec![raw_name.clone()])), diff --git a/plugins/rtti/rtti.cpp b/plugins/rtti/rtti.cpp index fde5a6ab43..f713aef77d 100644 --- a/plugins/rtti/rtti.cpp +++ b/plugins/rtti/rtti.cpp @@ -3,6 +3,20 @@ using namespace BinaryNinja; using namespace BinaryNinja::RTTI; +namespace +{ + std::string NormalizeRTTIClassName(std::string name) + { + size_t beginFind = name.find_first_of(' '); + if (beginFind != std::string::npos) + name.erase(0, beginFind + 1); + size_t endFind = name.find(" `RTTI Type Descriptor Name'"); + if (endFind != std::string::npos) + name.erase(endFind, name.length()); + return name; + } +} + Ref RTTI::GetRealSymbol(BinaryView *view, uint64_t relocAddr, uint64_t symAddr) { @@ -24,9 +38,9 @@ std::optional RTTI::DemangleNameMS(BinaryView* view, bool allowMang { QualifiedName demangledName = {}; Ref outType = {}; - if (!DemangleMS(view->GetDefaultArchitecture(), mangledName, outType, demangledName, true)) + if (!DemangleMS(view->GetDefaultArchitecture(), mangledName, outType, demangledName, view)) return DemangleNameLLVM(allowMangled, mangledName); - return demangledName.GetString(); + return NormalizeRTTIClassName(demangledName.GetString()); } @@ -90,14 +104,7 @@ std::optional RTTI::DemangleNameLLVM(bool allowMangled, const std:: Ref outType = {}; if (!DemangleLLVM(mangledName, demangledName, true)) return allowMangled ? std::optional(mangledName) : std::nullopt; - auto demangledNameStr = demangledName.GetString(); - size_t beginFind = demangledNameStr.find_first_of(' '); - if (beginFind != std::string::npos) - demangledNameStr.erase(0, beginFind + 1); - size_t endFind = demangledNameStr.find(" `RTTI Type Descriptor Name'"); - if (endFind != std::string::npos) - demangledNameStr.erase(endFind, demangledNameStr.length()); - return demangledNameStr; + return NormalizeRTTIClassName(demangledName.GetString()); } diff --git a/rust/src/demangle.rs b/rust/src/demangle.rs index 1f9f8941cc..d6aad25cee 100644 --- a/rust/src/demangle.rs +++ b/rust/src/demangle.rs @@ -165,6 +165,46 @@ pub fn demangle_ms( } } +pub fn demangle_ms_with_view( + arch: &CoreArchitecture, + mangled_name: &str, + view: Option<&BinaryView>, +) -> Option<(QualifiedName, Option>)> { + let mangled_name = mangled_name.to_cstr(); + let mut out_type: *mut BNType = std::ptr::null_mut(); + let mut out_name: *mut *mut std::os::raw::c_char = std::ptr::null_mut(); + let mut out_size: usize = 0; + let res = unsafe { + BNDemangleMSWithOptions( + arch.handle, + mangled_name.as_ptr(), + &mut out_type, + &mut out_name, + &mut out_size, + view.map(|v| v.handle).unwrap_or(std::ptr::null_mut()), + ) + }; + + match res { + true => { + assert!(!out_name.is_null()); + let names: Vec<_> = unsafe { ArrayGuard::::new(out_name, out_size, ()) } + .iter() + .map(str::to_string) + .collect(); + unsafe { BNFreeDemangledName(&mut out_name, out_size) }; + + let out_type = match out_type.is_null() { + true => None, + false => Some(unsafe { Type::ref_from_raw(out_type) }), + }; + + Some((names.into(), out_type)) + } + false => None, + } +} + #[derive(PartialEq, Eq, Hash)] pub struct Demangler { pub(crate) handle: *mut BNDemangler, diff --git a/view/pe/coffview.cpp b/view/pe/coffview.cpp index e009b91db3..a36aff64a3 100644 --- a/view/pe/coffview.cpp +++ b/view/pe/coffview.cpp @@ -1531,7 +1531,7 @@ void COFFView::AddCOFFSymbol(BNSymbolType type, const string& dll, const string& { QualifiedName demangledName; Ref demangledType; - if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, nullptr, m_simplifyTemplates)) + if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, this, m_simplifyTemplates)) { shortName = demangledName.GetString(); fullName = shortName; diff --git a/view/pe/peview.cpp b/view/pe/peview.cpp index a793eeb3a3..6af6283eaf 100644 --- a/view/pe/peview.cpp +++ b/view/pe/peview.cpp @@ -3567,7 +3567,7 @@ void PEView::AddPESymbol(BNSymbolType type, const string& dll, const string& nam { QualifiedName demangledName; Ref demangledType; - if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, nullptr, m_simplifyTemplates)) + if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, this, m_simplifyTemplates)) { shortName = demangledName.GetString(); fullName = shortName; From 7862d28bab7da7c9931d29435400d53108dcd68b Mon Sep 17 00:00:00 2001 From: Peter LaFosse Date: Fri, 5 Jun 2026 21:42:54 -0400 Subject: [PATCH 2/2] Fix GNU3 demangler template and backref handling Update the GNU3 parser to use shared DemangledTypeNode references for substitutions, template substitutions, and nested type/name construction so backrefs preserve structure instead of copying stale formatted strings. Fix template argument parsing for non-type template parameter declarations, expression arguments, argument packs, and generic lambda auto parameters while preserving enclosing template substitution state. Carry platform context through GNU3 demangling/finalization so rendered and finalized types use the same delayed DemangledTypeNode representation as the other demangler paths. --- demangler/gnu3/demangle_gnu3.cpp | 1104 ++++++++++++++++++------------ demangler/gnu3/demangle_gnu3.h | 71 +- 2 files changed, 733 insertions(+), 442 deletions(-) diff --git a/demangler/gnu3/demangle_gnu3.cpp b/demangler/gnu3/demangle_gnu3.cpp index f2130a4e1a..2c0ef8d8ac 100644 --- a/demangler/gnu3/demangle_gnu3.cpp +++ b/demangler/gnu3/demangle_gnu3.cpp @@ -23,14 +23,23 @@ #ifdef BINARYNINJACORE_LIBRARY using namespace BinaryNinjaCore; -#define GetClass GetTypeClass #else using namespace BinaryNinja; using namespace std; #endif -#define MAX_DEMANGLE_LENGTH 262144 +static constexpr size_t MAX_DEMANGLE_NESTING_DEPTH = 1024; + +static BNTypeClass GetFinalizedTypeClass(const Ref& type) +{ +#ifdef BINARYNINJACORE_LIBRARY + return type->GetTypeClass(); +#else + return type->GetClass(); +#endif +} + #define hash(x,y) (64 * x + y) #undef GNUDEMANGLE_DEBUG @@ -52,13 +61,7 @@ void MyLogDebug(const char* fmt, ...) #define MyLogDebug(...) do {} while(0) #endif -static inline void rtrim(string &s) -{ - s.erase(find_if(s.rbegin(), s.rend(), [](int c) { return !isspace(c); }).base(), s.end()); -} - - -static size_t TotalStringSize(const _STD_VECTOR<_STD_STRING>& v) +static size_t TotalStringSize(const StringList& v) { size_t n = 0; for (const auto& s : v) @@ -67,32 +70,66 @@ static size_t TotalStringSize(const _STD_VECTOR<_STD_STRING>& v) } -static string GetTemplateString(const vector& args) +static string JoinNameSegments(const StringList& name) { - // Pre-calculate total length to avoid reallocations - size_t total = 2; // "<" + ">" - for (size_t i = 0; i < args.size(); i++) + if (name.empty()) + return {}; + if (name.size() == 1) + return name[0]; + + string out; + out.reserve(TotalStringSize(name) + (name.size() - 1) * 2); + out += name[0]; + for (size_t i = 1; i < name.size(); i++) { - if (i != 0) - total += 2; // ", " - total += args[i].size(); + out += "::"; + out += name[i]; } - total += 1; // possible " " before ">" + return out; +} + - string name; - name.reserve(total); - name += '<'; - for (size_t i = 0; i < args.size(); i++) +static bool TemplateArgsReferenceTemplateParam(const string& raw) +{ + if (raw.empty() || (raw[0] != 'I' && raw[0] != 'J')) + return false; + + size_t i = 0; + size_t depth = 0; + while (i < raw.size()) { - if (i != 0) - name += ", "; - name += args[i]; + char c = raw[i++]; + if (c == 'I' || c == 'J') + { + depth++; + continue; + } + if (c == 'E') + { + if (depth == 0) + return false; + depth--; + if (depth == 0) + return false; + continue; + } + if (c == 'T') + return true; + if (c >= '0' && c <= '9') + { + size_t len = c - '0'; + while (i < raw.size() && raw[i] >= '0' && raw[i] <= '9') + len = (len * 10) + (raw[i++] - '0'); + i = std::min(raw.size(), i + len); + } } - rtrim(name); - if (name.back() == '>') - name += " "; //Be c++03 compliant where we can - name += '>'; - return name; + return false; +} + + +static DemangledNamePart NameSegmentWithTemplateArgs(const string& name, vector args) +{ + return DemangledNamePart(name, std::move(args), true); } @@ -110,6 +147,7 @@ static string GetOperator(char elm1, char elm2) case hash('s','z'): return "sizeof"; case hash('a','t'): return "alignof"; case hash('a','z'): return "alignof"; + case hash('a','w'): return "co_await"; case hash('n','x'): return "noexcept"; case hash('s','Z'): return "sizeof..."; case hash('s','P'): return "sizeof..."; @@ -316,27 +354,49 @@ string DemangleGNU3Reader::ReadString(size_t count) // ===== DemangleGNU3 implementation ===== -DemangleGNU3::DemangleGNU3(Architecture* arch, const string& mangledName) : +DemangleGNU3::DemangleGNU3(Platform* platform, const string& mangledName) : m_reader(mangledName), - m_arch(arch), + m_platform(platform), + m_lastTypeRef(nullptr), m_isParameter(false), m_shouldDeleteReader(true), m_topLevel(true), m_isOperatorOverload(false), - m_permitForwardTemplateRefs(false) + m_parsingLambdaParams(false), + m_lambdaTemplateParamBase(0), + m_permitForwardTemplateRefs(false), + m_inLocalName(false), + m_nestingDepth(0) { MyLogDebug("%s : %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); } -void DemangleGNU3::Reset(Architecture* arch, const string& mangledName) +DemangleGNU3::NestingGuard::NestingGuard(DemangleGNU3& demangler) : m_demangler(demangler) +{ + m_demangler.m_nestingDepth++; + if (m_demangler.m_nestingDepth > MAX_DEMANGLE_NESTING_DEPTH) + { + m_demangler.m_nestingDepth--; + throw DemangleException("Detected adversarial mangled string"); + } +} + + +DemangleGNU3::NestingGuard::~NestingGuard() +{ + m_demangler.m_nestingDepth--; +} + + +void DemangleGNU3::Reset(Platform* platform, const string& mangledName) { m_reader.Reset(mangledName); - m_arch = arch; - m_varName.clear(); + m_platform = platform; m_substitute.clear(); m_templateSubstitute.clear(); m_functionSubstitute.clear(); + m_lastTypeRef = nullptr; m_lastName.clear(); m_nameType = {}; m_localType = {}; @@ -345,13 +405,16 @@ void DemangleGNU3::Reset(Architecture* arch, const string& mangledName) m_shouldDeleteReader = true; m_topLevel = true; m_isOperatorOverload = false; + m_parsingLambdaParams = false; + m_lambdaTemplateParamBase = 0; m_permitForwardTemplateRefs = false; m_pendingForwardRefs.clear(); m_inLocalName = false; + m_nestingDepth = 0; } -DemangledTypeNode DemangleGNU3::CreateUnknownType(const QualifiedName& s) +DemangledTypeNode DemangleGNU3::CreateUnknownType(const StringList& s) { return DemangledTypeNode::NamedType(UnknownNamedTypeClass, s); } @@ -359,28 +422,100 @@ DemangledTypeNode DemangleGNU3::CreateUnknownType(const QualifiedName& s) DemangledTypeNode DemangleGNU3::CreateUnknownType(const string& s) { - return DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{s}); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{s}); +} + + +static DemangledQualifiedName CopyQualifiedName(const DemangledTypeNode& type) +{ + return type.GetName(); } void DemangleGNU3::ExtendTypeName(DemangledTypeNode& type, const string& extend) { - if (type.NameStringSize() + extend.size() > MAX_DEMANGLE_LENGTH) - throw DemangleException("Detected adversarial mangled string"); + if (type.GetClass() != NamedTypeReferenceClass) + return; + DemangledQualifiedName name = CopyQualifiedName(type); + if (name.empty()) { - auto& qn = type.GetMutableTypeName(); - if (qn.size() > 0) - qn.back() += extend; - else - qn.push_back(extend); + name.emplace_back(extend); + type.SetName(std::move(name)); + return; } + + name.back().AppendBase(extend); + type.SetName(std::move(name)); } -void DemangleGNU3::PushTemplateType(const DemangledTypeNode& type) +void DemangleGNU3::ApplyTemplateArgs(DemangledTypeNode& type, ParamList args) { - m_templateSubstitute.push_back(type); + if (type.GetClass() != NamedTypeReferenceClass) + return; + + DemangledQualifiedName qn = CopyQualifiedName(type); + if (qn.empty()) + qn.emplace_back(""); + + qn.back().SetTemplateArguments(std::move(args), true); + type.SetName(std::move(qn)); +} + + +void DemangleGNU3::AppendTypeName(DemangledTypeNode& type, const DemangledTypeNode& extend) +{ + if (type.GetClass() != NamedTypeReferenceClass) + return; + + DemangledQualifiedName newName = CopyQualifiedName(type); + DemangledQualifiedName extendName = CopyQualifiedName(extend); + newName.reserve(newName.size() + extendName.size()); + newName.insert(newName.end(), extendName.begin(), extendName.end()); + type.SetName(std::move(newName)); +} + + +string DemangleGNU3::LastTypeNameSegmentBase(const DemangledTypeNode& type) +{ + const auto& qn = type.GetName(); + if (!qn.empty()) + return qn.back().GetBase(); + return {}; +} + + +bool DemangleGNU3::LastTypeNameSegmentHasTemplateArguments(const DemangledTypeNode& type) +{ + const auto& qn = type.GetName(); + if (qn.empty()) + return false; + return qn.back().HasTemplateArguments(); +} + + +DemangleGNU3::NodeRef DemangleGNU3::PushTemplateType(NodeRef type) +{ + if (type) + m_templateSubstitute.push_back(std::move(type)); + return type; +} + + +DemangleGNU3::NodeRef DemangleGNU3::PushTemplateType(const DemangledTypeNode& type) +{ + auto ref = DemangledTypeNode::CreateSharedCopy(type); + m_templateSubstitute.push_back(ref); + return ref; +} + + +DemangleGNU3::NodeRef DemangleGNU3::PushTemplateType(DemangledTypeNode&& type) +{ + auto ref = DemangledTypeNode::CreateShared(std::move(type)); + m_templateSubstitute.push_back(ref); + return ref; } @@ -389,25 +524,53 @@ const DemangledTypeNode& DemangleGNU3::GetTemplateType(size_t ref) { if (ref >= m_templateSubstitute.size()) throw DemangleException(); - return m_templateSubstitute[ref]; + if (!m_templateSubstitute[ref]) + throw DemangleException(); + return *m_templateSubstitute[ref]; } #endif -void DemangleGNU3::PushType(const DemangledTypeNode& type) +DemangleGNU3::NodeRef DemangleGNU3::PushType(NodeRef type) { - m_substitute.push_back(type); + if (type) + m_substitute.push_back(std::move(type)); + return type; } -const DemangledTypeNode& DemangleGNU3::GetType(size_t ref) +DemangleGNU3::NodeRef DemangleGNU3::PushType(const DemangledTypeNode& type) +{ + auto ref = DemangledTypeNode::CreateSharedCopy(type); + m_substitute.push_back(ref); + return ref; +} + + +DemangleGNU3::NodeRef DemangleGNU3::PushType(DemangledTypeNode&& type) +{ + auto ref = DemangledTypeNode::CreateShared(std::move(type)); + m_substitute.push_back(ref); + return ref; +} + + +DemangleGNU3::NodeRef DemangleGNU3::GetTypeRef(size_t ref) { if (ref >= m_substitute.size()) throw DemangleException(); + if (!m_substitute[ref]) + throw DemangleException(); return m_substitute[ref]; } +const DemangledTypeNode& DemangleGNU3::GetType(size_t ref) +{ + return *GetTypeRef(ref); +} + + #ifdef GNUDEMANGLE_DEBUG void DemangleGNU3::PrintTables() { @@ -456,6 +619,7 @@ string DemangleGNU3::DemangleSourceName() DemangledTypeNode DemangleGNU3::DemangleFunction(bool cnst, bool vltl) { + NestingGuard nestingGuard(*this); indent(); MyLogDebug("%s : %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); bool old_isparam; @@ -466,6 +630,7 @@ DemangledTypeNode DemangleGNU3::DemangleFunction(bool cnst, bool vltl) } DemangledTypeNode retType = DemangleType(); + NodeRef retTypeRef = m_lastTypeRef; ParamList params; old_isparam = m_isParameter; @@ -475,16 +640,21 @@ DemangledTypeNode DemangleGNU3::DemangleFunction(bool cnst, bool vltl) while (m_reader.Peek() != 'E') { DemangledTypeNode param = DemangleType(); + NodeRef paramRef = m_lastTypeRef; if (param.GetClass() == VoidTypeClass) continue; MyLogDebug("Var_%d - %s\n", i++, param.GetString().c_str()); - m_functionSubstitute.back().push_back(param); - params.push_back({"", std::make_shared(std::move(param))}); + if (!paramRef) + paramRef = DemangledTypeNode::CreateShared(std::move(param)); + m_functionSubstitute.back().push_back(paramRef); + params.push_back({"", paramRef}); } m_reader.Consume(); m_functionSubstitute.pop_back(); m_isParameter = old_isparam; - DemangledTypeNode newType = DemangledTypeNode::FunctionType(std::move(retType), nullptr, std::move(params)); + if (!retTypeRef) + retTypeRef = DemangledTypeNode::CreateShared(std::move(retType)); + DemangledTypeNode newType = DemangledTypeNode::FunctionType(retTypeRef, nullptr, std::move(params)); PushType(newType); newType.SetConst(cnst); @@ -498,46 +668,28 @@ DemangledTypeNode DemangleGNU3::DemangleFunction(bool cnst, bool vltl) } -string DemangleGNU3::ForwardRefPlaceholder(size_t index) -{ - return "\x01FWDREF:" + to_string(index) + "\x01"; -} - - -void DemangleGNU3::ResolveForwardTemplateRefs(DemangledTypeNode& type, const vector& args) +void DemangleGNU3::ResolveForwardTemplateRefs(DemangledTypeNode&, const ParamList& args) { if (m_pendingForwardRefs.empty()) return; - auto& segs = type.GetMutableTypeName(); - bool resolved = false; - for (const auto& fr : m_pendingForwardRefs) + for (const auto& ref : m_pendingForwardRefs) { - string placeholder = ForwardRefPlaceholder(fr.index); - string replacement = (fr.index < args.size()) ? args[fr.index] : "auto"; - for (auto& seg : segs) - { - size_t pos; - while ((pos = seg.find(placeholder)) != string::npos) - { - seg.replace(pos, placeholder.size(), replacement); - resolved = true; - } - } + if (!ref.typeRef) + continue; + if (ref.index >= args.size() || !args[ref.index].type) + throw DemangleException(); + *ref.typeRef = *args[ref.index].type; } - // Only clear the pending list when we actually resolved something. Inner - // nested-name 'I' handlers (e.g. template args of types nested inside the - // cv-operator result type) may call here with a type that does not contain - // the placeholder; we must not discard the pending entry in that case so - // that the correct outer 'I' handler can still resolve it. - if (resolved) - m_pendingForwardRefs.clear(); + m_pendingForwardRefs.clear(); } -DemangledTypeNode DemangleGNU3::DemangleTemplateSubstitution() +DemangledTypeNode DemangleGNU3::DemangleTemplateSubstitution(NodeRef* outTypeRef) { indent(); MyLogDebug("%s : %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); + if (outTypeRef) + *outTypeRef = nullptr; size_t number = 0; char elm = m_reader.Peek(); if (elm == '_') @@ -568,15 +720,32 @@ DemangledTypeNode DemangleGNU3::DemangleTemplateSubstitution() dedent(); if (number < m_templateSubstitute.size()) - return m_templateSubstitute[number]; + { + if (!m_templateSubstitute[number]) + throw DemangleException(); + if (outTypeRef) + *outTypeRef = m_templateSubstitute[number]; + return *m_templateSubstitute[number]; + } // If forward template references are permitted (e.g. inside a cv conversion - // operator type), return a placeholder that will be resolved once the outer - // template args are known. + // operator type), return a shared placeholder node whose contents will be + // replaced once the outer template args are known. if (m_permitForwardTemplateRefs) { - m_pendingForwardRefs.push_back({number}); - return CreateUnknownType(ForwardRefPlaceholder(number)); + auto typeRef = DemangledTypeNode::CreateShared(CreateUnknownType("auto")); + m_pendingForwardRefs.push_back({number, typeRef}); + if (outTypeRef) + *outTypeRef = typeRef; + return *typeRef; + } + + if (m_parsingLambdaParams && number >= m_lambdaTemplateParamBase) + { + auto typeRef = DemangledTypeNode::CreateShared(CreateUnknownType("auto")); + if (outTypeRef) + *outTypeRef = typeRef; + return *typeRef; } throw DemangleException(); @@ -585,12 +754,14 @@ DemangledTypeNode DemangleGNU3::DemangleTemplateSubstitution() DemangledTypeNode DemangleGNU3::DemangleType() { + NestingGuard nestingGuard(*this); indent(); MyLogDebug("%s : %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); + m_lastTypeRef = nullptr; DemangledTypeNode type; + NodeRef typeRef = nullptr; bool cnst = false, vltl = false, rstrct = false; bool substitute = false; - QualifiedName name; DemangleCVQualifiers(cnst, vltl, rstrct); @@ -602,8 +773,9 @@ DemangledTypeNode DemangleGNU3::DemangleType() if (vltl) type.SetVolatile(true); if (rstrct) - type.SetPointerSuffix({RestrictSuffix}); - PushType(type); + type.SetPointerSuffixBits(1u << RestrictSuffix); + typeRef = PushType(type); + m_lastTypeRef = typeRef; return type; } @@ -613,14 +785,14 @@ DemangledTypeNode DemangleGNU3::DemangleType() { if (isdigit(m_reader.Peek()) || m_reader.Peek() == '_' || isupper(m_reader.Peek())) { - type = DemangleSubstitution(); + type = DemangleSubstitution(&typeRef); if (m_reader.Peek() == 'I') { m_reader.Consume(); - vector args; + ParamList args; DemangleTemplateArgs(args); - ExtendTypeName(type, GetTemplateString(args)); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); + typeRef = nullptr; substitute = true; } } @@ -630,24 +802,26 @@ DemangledTypeNode DemangleGNU3::DemangleType() { m_reader.Consume(1); type = DemangleUnqualifiedName(); - auto qn = type.GetTypeName(); - qn.insert(qn.begin(), "std"); - type.SetTypeName(std::move(qn)); + auto qn = CopyQualifiedName(type); + qn.insert(qn.begin(), DemangledNamePart("std")); + type.SetName(std::move(qn)); substitute = true; } else { - type = DemangleSubstitution(); + type = DemangleSubstitution(&typeRef); } if (m_reader.Peek() == 'I') { m_reader.Consume(); - if (substitute) + bool dependentTemplatePrefix = LastTypeNameSegmentBase(type) == "basic_ostream" && + TemplateArgsReferenceTemplateParam("I" + m_reader.PeekString(m_reader.Length())); + if (substitute && !dependentTemplatePrefix) PushType(type); - vector args; + ParamList args; DemangleTemplateArgs(args); - ExtendTypeName(type, GetTemplateString(args)); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); + typeRef = nullptr; substitute = true; } } @@ -663,25 +837,25 @@ DemangledTypeNode DemangleGNU3::DemangleType() if (m_reader.Peek() == 's') { m_reader.Consume(); - type = DemangledTypeNode::NamedType(StructNamedTypeClass, _STD_VECTOR<_STD_STRING>{DemangleSourceName()}); + type = DemangledTypeNode::NamedType(StructNamedTypeClass, StringList{DemangleSourceName()}); break; } else if (m_reader.Peek() == 'u') { m_reader.Consume(); - type = DemangledTypeNode::NamedType(UnionNamedTypeClass, _STD_VECTOR<_STD_STRING>{DemangleSourceName()}); + type = DemangledTypeNode::NamedType(UnionNamedTypeClass, StringList{DemangleSourceName()}); break; } else if (m_reader.Peek() == 'e') { m_reader.Consume(); - type = DemangledTypeNode::NamedType(EnumNamedTypeClass, QualifiedName({DemangleSourceName()}), - m_arch->GetDefaultIntegerSize(), m_arch->GetDefaultIntegerSize()); + type = DemangledTypeNode::NamedTypeWithDefaultIntegerWidth( + EnumNamedTypeClass, StringList{DemangleSourceName()}); break; } //Template Substitution - type = DemangleTemplateSubstitution(); + type = DemangleTemplateSubstitution(&typeRef); // In forward-ref mode (cv conversion operator type parsing), do not consume // trailing IE — it belongs to the enclosing nested-name and will be // processed by DemangleNestedName's 'I' case, which resolves forward refs. @@ -691,32 +865,41 @@ DemangledTypeNode DemangleGNU3::DemangleType() m_reader.Consume(); if (substitute) PushType(type); - vector args; + ParamList args; DemangleTemplateArgs(args); - ExtendTypeName(type, GetTemplateString(args)); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); + typeRef = nullptr; } break; } case 'P': - { - DemangledTypeNode child = DemangleType(); - type = DemangledTypeNode::PointerType(m_arch, std::move(child), cnst, vltl, PointerReferenceType); - substitute = true; + { + NodeRef childRef = nullptr; + DemangledTypeNode child = DemangleType(); + childRef = m_lastTypeRef; + type = childRef ? DemangledTypeNode::PointerType(childRef, cnst, vltl, PointerReferenceType) : + DemangledTypeNode::PointerType(std::move(child), cnst, vltl, PointerReferenceType); + substitute = true; break; } case 'R': - { - DemangledTypeNode child = DemangleType(); - type = DemangledTypeNode::PointerType(m_arch, std::move(child), cnst, vltl, ReferenceReferenceType); - substitute = true; + { + NodeRef childRef = nullptr; + DemangledTypeNode child = DemangleType(); + childRef = m_lastTypeRef; + type = childRef ? DemangledTypeNode::PointerType(childRef, cnst, vltl, ReferenceReferenceType) : + DemangledTypeNode::PointerType(std::move(child), cnst, vltl, ReferenceReferenceType); + substitute = true; break; } case 'O': - { - DemangledTypeNode child = DemangleType(); - type = DemangledTypeNode::PointerType(m_arch, std::move(child), cnst, vltl, RValueReferenceType); - substitute = true; + { + NodeRef childRef = nullptr; + DemangledTypeNode child = DemangleType(); + childRef = m_lastTypeRef; + type = childRef ? DemangledTypeNode::PointerType(childRef, cnst, vltl, RValueReferenceType) : + DemangledTypeNode::PointerType(std::move(child), cnst, vltl, RValueReferenceType); + substitute = true; break; } case 'C': //TODO:complex @@ -727,17 +910,20 @@ DemangledTypeNode DemangleGNU3::DemangleType() // Vendor-extended type: U [] // Commonly used for Objective-C block pointers: // U13block_pointer -> "void (params...) block_pointer" - string extName = DemangleSourceName(); + DemangledNamePart extName(DemangleSourceName()); if (m_reader.Peek() == 'I') { m_reader.Consume(); - vector targs; + ParamList targs; DemangleTemplateArgs(targs); if (!targs.empty()) - extName += GetTemplateString(targs); + extName.SetTemplateArguments(std::move(targs), true); } DemangledTypeNode inner = DemangleType(); - type = CreateUnknownType(inner.GetString() + " " + extName); + NodeRef innerRef = m_lastTypeRef ? m_lastTypeRef : DemangledTypeNode::CreateShared(std::move(inner)); + auto extType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, DemangledQualifiedName{std::move(extName)}); + NodeRef extNameRef = DemangledTypeNode::CreateShared(std::move(extType)); + type = DemangledTypeNode::PostfixType(innerRef, " ", extNameRef); substitute = true; break; } @@ -745,31 +931,31 @@ DemangledTypeNode DemangleGNU3::DemangleType() { // Vendor extended type: u [] // e.g. u14__remove_cvref, u20__remove_reference_t - string extName = DemangleSourceName(); + DemangledNamePart extName(DemangleSourceName()); if (m_reader.Peek() == 'I') { m_reader.Consume(); - vector targs; + ParamList targs; DemangleTemplateArgs(targs); if (!targs.empty()) - extName += GetTemplateString(targs); + extName.SetTemplateArguments(std::move(targs), true); } - type = CreateUnknownType(extName); + type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, DemangledQualifiedName{std::move(extName)}); substitute = true; break; } case 'v': type = DemangledTypeNode::VoidType(); break; - case 'w': type = DemangledTypeNode::IntegerType(4, false, "wchar_t"); break; //TODO: verify + case 'w': type = DemangledTypeNode::WideCharType(4, "wchar_t"); break; //TODO: verify case 'b': type = DemangledTypeNode::BoolType(); break; - case 'c': type = DemangledTypeNode::IntegerType(1, true, "char"); break; + case 'c': type = DemangledTypeNode::IntegerType(1, true); break; case 'a': type = DemangledTypeNode::IntegerType(1, true, "signed char"); break; case 'h': type = DemangledTypeNode::IntegerType(1, false); break; case 's': type = DemangledTypeNode::IntegerType(2, true); break; case 't': type = DemangledTypeNode::IntegerType(2, false); break; case 'i': type = DemangledTypeNode::IntegerType(4, true); break; case 'j': type = DemangledTypeNode::IntegerType(4, false); break; - case 'l': type = DemangledTypeNode::IntegerType(m_arch->GetAddressSize(), true); break; //long - case 'm': type = DemangledTypeNode::IntegerType(m_arch->GetAddressSize(), false); break; //ulong + case 'l': type = DemangledTypeNode::AddressSizedIntegerType(true); break; //long + case 'm': type = DemangledTypeNode::AddressSizedIntegerType(false); break; //ulong case 'x': type = DemangledTypeNode::IntegerType(8, true); break; case 'y': type = DemangledTypeNode::IntegerType(8, false); break; case 'n': type = DemangledTypeNode::IntegerType(16, true); break; @@ -782,12 +968,11 @@ DemangledTypeNode DemangleGNU3::DemangleType() case 'M': // TODO: Make into pointer to function member { DemangledTypeNode memberName = DemangleType(); + NodeRef memberNameRef = m_lastTypeRef ? m_lastTypeRef : DemangledTypeNode::CreateShared(std::move(memberName)); DemangledTypeNode member = DemangleType(); - string fullName = member.GetStringBeforeName() + "(" + memberName.GetString() + "::*)" + member.GetStringAfterName(); - //member.SetScope(NonStaticScope); - //DemangledTypeNode ptr = DemangledTypeNode::PointerType(m_arch, member, cnst, vltl); - //QualifiedName qn({memberName.GetString(), "*"}); - type = CreateUnknownType(fullName); + NodeRef memberRef = m_lastTypeRef ? m_lastTypeRef : DemangledTypeNode::CreateShared(std::move(member)); + type = DemangledTypeNode::MemberPointerType(memberRef, CopyQualifiedName(*memberNameRef), cnst, vltl); + type.SetParenthesizedMemberPointer(true); substitute = true; break; } @@ -799,20 +984,21 @@ DemangledTypeNode DemangleGNU3::DemangleType() case 'e': type = DemangledTypeNode::FloatType(16, "decimal128"); break; case 'f': type = DemangledTypeNode::FloatType(4, "decimal32"); break; case 'h': type = DemangledTypeNode::FloatType(2); break; - case 'i': type = DemangledTypeNode::IntegerType(4, true, "char32_t"); break; - case 's': type = DemangledTypeNode::IntegerType(2, true, "char16_t"); break; + case 'i': type = DemangledTypeNode::WideCharType(4, "char32_t"); break; + case 's': type = DemangledTypeNode::WideCharType(2, "char16_t"); break; case 'a': type = CreateUnknownType("auto"); break; //auto type case 'c': type = CreateUnknownType("decltype(auto)"); break; //decltype(auto) case 'n': { - static const QualifiedName stdNullptrTName(vector{"std", "nullptr_t"}); + static const StringList stdNullptrTName(vector{"std", "nullptr_t"}); type = CreateUnknownType(stdNullptrTName); break; } case 'p': { DemangledTypeNode inner = DemangleType(); - type = CreateUnknownType(inner.GetString() + "..."); + NodeRef innerRef = m_lastTypeRef ? m_lastTypeRef : DemangledTypeNode::CreateShared(std::move(inner)); + type = DemangledTypeNode::PostfixType(innerRef, "..."); break; } case 't': @@ -827,8 +1013,11 @@ DemangledTypeNode DemangleGNU3::DemangleType() uint64_t size = DemangleNumber(); if (m_reader.Read() != '_') throw DemangleException(); + NodeRef childRef = nullptr; DemangledTypeNode child = DemangleType(); - type = DemangledTypeNode::ArrayType(std::move(child), size); + childRef = m_lastTypeRef; + type = childRef ? DemangledTypeNode::ArrayType(childRef, size) : + DemangledTypeNode::ArrayType(std::move(child), size); break; } default: @@ -837,7 +1026,7 @@ DemangledTypeNode DemangleGNU3::DemangleType() } break; case 'N': - type = DemangleNestedName(); + type = DemangleNestedName(nullptr, false); substitute = true; break; case 'A': @@ -849,14 +1038,17 @@ DemangledTypeNode DemangleGNU3::DemangleType() uint64_t size = DemangleNumber(); if (m_reader.Read() != '_') throw DemangleException(); + NodeRef childRef = nullptr; DemangledTypeNode child = DemangleType(); - type = DemangledTypeNode::ArrayType(std::move(child), size); + childRef = m_lastTypeRef; + type = childRef ? DemangledTypeNode::ArrayType(childRef, size) : + DemangledTypeNode::ArrayType(std::move(child), size); } else { //[] _ //Since our type system doesn't support expressions as dimensions - //we instead demangle this as just a string. + //we preserve the element type node and render a synthetic name at finalization. string dimension = "[]"; if (m_reader.Peek() != '_') { @@ -865,8 +1057,9 @@ DemangledTypeNode DemangleGNU3::DemangleType() if (m_reader.Read() != '_') throw DemangleException(); - const string typeString = DemangleType().GetString() + dimension; - type = CreateUnknownType(typeString); + DemangledTypeNode inner = DemangleType(); + NodeRef innerRef = m_lastTypeRef ? m_lastTypeRef : DemangledTypeNode::CreateShared(std::move(inner)); + type = DemangledTypeNode::PostfixType(innerRef, std::move(dimension)); } substitute = true; break; @@ -875,10 +1068,10 @@ DemangledTypeNode DemangleGNU3::DemangleType() m_reader.UnRead(); type = DemangleName(); - auto nameList = type.GetTypeName(); - if (nameList.size() < 1) + string lastName = LastTypeNameSegmentBase(type); + if (lastName.empty()) throw DemangleException(); - m_lastName = nameList.back(); + m_lastName = lastName; substitute = true; if (m_reader.Peek() == 'I') @@ -886,38 +1079,40 @@ DemangledTypeNode DemangleGNU3::DemangleType() substitute = false; m_reader.Consume(); PushType(type); - vector args; + ParamList args; DemangleTemplateArgs(args); - ExtendTypeName(type, GetTemplateString(args)); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); PushType(type); } } } if (substitute) - PushType(type); + typeRef = PushType(type); + m_lastTypeRef = typeRef; dedent(); return type; } -DemangledTypeNode DemangleGNU3::DemangleSubstitution() +DemangledTypeNode DemangleGNU3::DemangleSubstitution(NodeRef* outTypeRef) { - static const QualifiedName stdAllocatorName(vector{"std", "allocator"}); - static const QualifiedName stdBasicStringName(vector{"std", "basic_string"}); - static const QualifiedName stdIostreamName(vector{"std", "iostream"}); - static const QualifiedName stdIstreamName(vector{"std", "istream"}); - static const QualifiedName stdOstreamName(vector{"std", "ostream"}); - static const QualifiedName stdStringName(vector{"std", "string"}); - static const QualifiedName stdName(vector{"std"}); + if (outTypeRef) + *outTypeRef = nullptr; + static const StringList stdAllocatorName(vector{"std", "allocator"}); + static const StringList stdBasicStringName(vector{"std", "basic_string"}); + static const StringList stdIostreamName(vector{"std", "iostream"}); + static const StringList stdIstreamName(vector{"std", "istream"}); + static const StringList stdOstreamName(vector{"std", "ostream"}); + static const StringList stdStringName(vector{"std", "string"}); + static const StringList stdName(vector{"std"}); indent() MyLogDebug("%s: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); char elm; elm = m_reader.Read(); - QualifiedName name; + StringList name; size_t number = 0; switch (elm) { @@ -965,10 +1160,13 @@ DemangledTypeNode DemangleGNU3::DemangleSubstitution() } dedent(); - const DemangledTypeNode& resolved = GetType(number); - const auto& segs = resolved.GetTypeName(); - if (!segs.empty()) - m_lastName = segs.back(); + auto ref = GetTypeRef(number); + const DemangledTypeNode& resolved = *ref; + string lastName = LastTypeNameSegmentBase(resolved); + if (!lastName.empty()) + m_lastName = lastName; + if (outTypeRef) + *outTypeRef = ref; return resolved; } m_lastName = name.back(); @@ -1023,7 +1221,7 @@ string DemangleGNU3::DemanglePrimaryExpression() MyLogDebug("%s: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); char elm1 = '\0'; string out; - QualifiedName tmpList; + StringList tmpList; bool oldTopLevel; //expr-primary if (m_reader.PeekString(2) == "_Z") @@ -1040,7 +1238,7 @@ string DemangleGNU3::DemanglePrimaryExpression() DemangledTypeNode t = DemangleSymbol(tmpList); m_topLevel = oldTopLevel; m_templateSubstitute = std::move(savedTemplateSubstitute); - out += t.GetTypeAndName(tmpList); + out += t.GetTypeAndName(tmpList, m_platform.GetPtr()); dedent() return out; } @@ -1055,7 +1253,7 @@ string DemangleGNU3::DemanglePrimaryExpression() DemangledTypeNode t2 = DemangleSymbol(tmpList); m_topLevel = oldTopLevel; m_templateSubstitute = std::move(savedTemplateSubstitute2); - out += t2.GetTypeAndName(tmpList); + out += t2.GetTypeAndName(tmpList, m_platform.GetPtr()); dedent(); return out; } @@ -1157,7 +1355,7 @@ string DemangleGNU3::DemangleExpressionList() expr += ", "; const string e = DemangleExpression(); expr += e; - m_functionSubstitute.back().push_back(CreateUnknownType(e)); + m_functionSubstitute.back().push_back(DemangledTypeNode::CreateShared(CreateUnknownType(e))); first = false; } m_functionSubstitute.pop_back(); @@ -1230,6 +1428,7 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() case hash('s','z'): case hash('a','t'): case hash('a','z'): + case hash('a','w'): case hash('n','x'): case hash('s','Z'): case hash('s','P'): @@ -1294,13 +1493,13 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() string name; name = "'lambda"; vector lambdaParams; - // Generic lambdas encode 'auto' params as T_, T0_, T1_... which reference - // the lambda's own operator() template params, not any outer template scope. - // Save and replace the template substitution table with 'auto' placeholders. - auto savedTemplateSubstitute = m_templateSubstitute; - m_templateSubstitute.clear(); - for (int autoIdx = 0; autoIdx < 16; autoIdx++) - m_templateSubstitute.push_back(CreateUnknownType("auto")); + // Generic lambdas encode 'auto' params as template params. Preserve any + // enclosing template substitutions, and synthesize lambda-local autos + // lazily only when a template-param reference does not resolve. + bool savedParsingLambdaParams = m_parsingLambdaParams; + size_t savedLambdaTemplateParamBase = m_lambdaTemplateParamBase; + m_parsingLambdaParams = true; + m_lambdaTemplateParamBase = m_templateSubstitute.size(); do { DemangledTypeNode param = DemangleType(); @@ -1309,7 +1508,8 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() lambdaParams.push_back(std::move(param)); }while (m_reader.Peek() != 'E'); m_reader.Consume(); - m_templateSubstitute = std::move(savedTemplateSubstitute); + m_parsingLambdaParams = savedParsingLambdaParams; + m_lambdaTemplateParamBase = savedLambdaTemplateParamBase; if (isdigit(m_reader.Peek())) { @@ -1328,6 +1528,7 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() name += ")"; m_lastName = name; outType = CreateUnknownType(name); + PushType(outType); break; } case hash('U','t'): @@ -1353,16 +1554,18 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() // The conversion operator type may reference template params (T_, T0_, ...) // that aren't yet in m_templateSubstitute (they're defined by a following // IE in the enclosing nested name). Set m_permitForwardTemplateRefs so - // that DemangleTemplateSubstitution() returns a placeholder instead of + // that DemangleTemplateSubstitution() returns a shared placeholder instead of // throwing, and don't consume trailing IE in the T case of DemangleType. // The outer DemangleNestedName case 'I' will parse those args and call - // ResolveForwardTemplateRefs() to patch the placeholders. + // ResolveForwardTemplateRefs() to replace those placeholders with the real args. bool savedPermit = m_permitForwardTemplateRefs; m_pendingForwardRefs.clear(); m_permitForwardTemplateRefs = true; DemangledTypeNode cvType = DemangleType(); + NodeRef cvTypeRef = m_lastTypeRef ? m_lastTypeRef : DemangledTypeNode::CreateShared(std::move(cvType)); m_permitForwardTemplateRefs = savedPermit; - outType = CreateUnknownType("operator " + cvType.GetString()); + outType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, + DemangledQualifiedName{DemangledNamePart("operator ", std::move(cvTypeRef))}); break; } default: @@ -1386,18 +1589,16 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() { m_reader.Consume(); string tag = "[abi:" + DemangleSourceName() + "]"; - auto qn = outType.GetTypeName(); - if (!qn.empty()) - qn.back() += tag; - outType.SetTypeName(std::move(qn)); - m_lastName = qn.empty() ? tag : qn.back(); + ExtendTypeName(outType, tag); + string lastName = LastTypeNameSegmentBase(outType); + m_lastName = lastName.empty() ? tag : lastName; } dedent(); return outType; } -QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() +StringList DemangleGNU3::DemangleBaseUnresolvedName() { // ::= # unresolved name // ::= on # unresolved operator-function-id @@ -1407,7 +1608,7 @@ QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() indent() MyLogDebug("%s: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); - QualifiedName out; + StringList out; if (m_reader.Length() > 1) { const string str = m_reader.PeekString(2); @@ -1420,9 +1621,9 @@ QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() if (m_reader.Peek() == 'I') { m_reader.Consume(); - vector args; + ParamList args; DemangleTemplateArgs(args); - out.back() += GetTemplateString(args); + out.back() = NameSegmentWithTemplateArgs(out.back(), std::move(args)).GetString(); PushType(CreateUnknownType(out)); } } @@ -1441,9 +1642,9 @@ QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() if (m_reader.Peek() == 'I') { m_reader.Consume(); - vector args; + ParamList args; DemangleTemplateArgs(args); - out.back() += GetTemplateString(args); + out.back() = NameSegmentWithTemplateArgs(out.back(), std::move(args)).GetString(); } } } @@ -1468,10 +1669,9 @@ DemangledTypeNode DemangleGNU3::DemangleUnresolvedType() { PushType(type); m_reader.Consume(); - vector args; + ParamList args; DemangleTemplateArgs(args); - ExtendTypeName(type, GetTemplateString(args)); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); PushType(type); } else @@ -1607,9 +1807,9 @@ string DemangleGNU3::DemangleExpression() return GetOperator(elm1, elm2) + "(" + DemangleTypeString() + ")"; case hash('s','P'): { - vector args; + ParamList args; DemangleTemplateArgs(args); - return "sizeof...(" + GetTemplateString(args) + ")..."; + return "sizeof...(" + NameSegmentWithTemplateArgs("", std::move(args)).GetString() + ")..."; } case hash('s','p'): return "(" + DemangleExpression() + ")..."; @@ -1695,7 +1895,7 @@ string DemangleGNU3::DemangleExpression() if (!firstArg) args += ", "; const string e = DemangleExpression(); args += e; - m_functionSubstitute.back().push_back(CreateUnknownType(e)); + m_functionSubstitute.back().push_back(DemangledTypeNode::CreateShared(CreateUnknownType(e))); firstArg = false; } m_functionSubstitute.pop_back(); @@ -1761,7 +1961,9 @@ string DemangleGNU3::DemangleExpression() out = (elementNum == 0) ? "fp" : "fp" + std::to_string(elementNum - 1); break; } - type = m_functionSubstitute[listNumber][elementNum]; + if (!m_functionSubstitute[listNumber][elementNum]) + throw DemangleException(); + type = *m_functionSubstitute[listNumber][elementNum]; } else if (isdigit(elm) || isupper(elm)) { @@ -1776,7 +1978,9 @@ string DemangleGNU3::DemangleExpression() out = "fp" + std::to_string(elementNum - 1); break; } - type = m_functionSubstitute[listNumber][elementNum]; + if (!m_functionSubstitute[listNumber][elementNum]) + throw DemangleException(); + type = *m_functionSubstitute[listNumber][elementNum]; } else { @@ -1819,22 +2023,22 @@ string DemangleGNU3::DemangleExpression() { out += DemangleSourceName(); // Push bare name (before template args) to substitution table. - PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{out})); + PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{out})); if (m_reader.Peek() == 'I') { - vector args; + ParamList args; m_reader.Consume(); // DemangleTemplateArgs(args); - out += GetTemplateString(args); + out = NameSegmentWithTemplateArgs(out, std::move(args)).GetString(); // Also push the template instantiation (name+args). - PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{out})); + PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{out})); } out += "::"; }while (m_reader.Peek() != 'E'); m_reader.Consume(); - out += DemangleBaseUnresolvedName().GetString(); + out += JoinNameSegments(DemangleBaseUnresolvedName()); return out; } if (isdigit(m_reader.Peek())) @@ -1859,10 +2063,10 @@ string DemangleGNU3::DemangleExpression() PushType(CreateUnknownType(out)); if (m_reader.Peek() == 'I') { - vector args; + ParamList args; m_reader.Consume(); DemangleTemplateArgs(args); // consumes the trailing 'E' - out += GetTemplateString(args); + out = NameSegmentWithTemplateArgs(out, std::move(args)).GetString(); // Also push the template instantiation. PushType(CreateUnknownType(out)); hadTemplateArgs = true; @@ -1874,7 +2078,7 @@ string DemangleGNU3::DemangleExpression() // so check rather than unconditionally consuming. if (m_reader.Peek() == 'E') m_reader.Consume(); - out += DemangleBaseUnresolvedName().GetString(); + out += JoinNameSegments(DemangleBaseUnresolvedName()); return out; } else @@ -1890,7 +2094,7 @@ string DemangleGNU3::DemangleExpression() const string segName = DemangleSourceName(); if (m_reader.Peek() == 'I') { - vector args; + ParamList args; m_reader.Consume(); DemangleTemplateArgs(args); if (isdigit(m_reader.Peek())) @@ -1898,13 +2102,14 @@ string DemangleGNU3::DemangleExpression() // Another source name follows — intermediate qualifier. // Push to the substitution table, mirroring what the // N-prefix sr branch does for each nested qualifier. - PushType(CreateUnknownType(out + segName + GetTemplateString(args))); - out += segName + GetTemplateString(args) + "::"; + string segment = NameSegmentWithTemplateArgs(segName, std::move(args)).GetString(); + PushType(CreateUnknownType(out + segment)); + out += segment + "::"; } else { // No more source names — this template-id is the final name. - out += segName + GetTemplateString(args); + out += NameSegmentWithTemplateArgs(segName, std::move(args)).GetString(); return out; } } @@ -1916,7 +2121,7 @@ string DemangleGNU3::DemangleExpression() } } // peek is not a digit: fall back for operator-names ("on") / destructor-names ("dn"). - out += DemangleBaseUnresolvedName().GetString(); + out += JoinNameSegments(DemangleBaseUnresolvedName()); } return out; default: @@ -1924,11 +2129,11 @@ string DemangleGNU3::DemangleExpression() out = DemangleSourceName(); if (m_reader.Peek() == 'I') { - vector args; + ParamList args; m_reader.Consume(); // DemangleTemplateArgs(args); - out += GetTemplateString(args); + out = NameSegmentWithTemplateArgs(out, std::move(args)).GetString(); } break; } @@ -1936,55 +2141,88 @@ string DemangleGNU3::DemangleExpression() } -void DemangleGNU3::DemangleTemplateArgs(vector& args, bool* hadNonTypeArg) +bool DemangleGNU3::DemangleTemplateArg(ParamList& args, bool* hadNonTypeArg) { - indent(); - MyLogDebug("%s:: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); DemangledTypeNode tmp; + NodeRef tmpRef; bool tmpValid = false; string expr; bool topLevel; - const string lastName = m_lastName; - while (m_reader.Peek() != 'E') + switch (m_reader.Read()) { - switch (m_reader.Read()) - { - case 'L': - expr = DemanglePrimaryExpression(); - args.push_back(expr); - tmp = CreateUnknownType(expr); - tmpValid = true; - if (hadNonTypeArg) *hadNonTypeArg = true; - break; - case 'X': - args.push_back(DemangleExpression()); - if (m_reader.Read() != 'E') - throw DemangleException(); - if (hadNonTypeArg) *hadNonTypeArg = true; - break; - case 'I': // GCC sometimes uses I...E for argument packs instead of J...E - case 'J': + case 'L': + expr = DemanglePrimaryExpression(); + tmp = CreateUnknownType(expr); + tmpRef = DemangledTypeNode::CreateShared(std::move(tmp)); + args.push_back({"", tmpRef}); + tmpValid = true; + if (hadNonTypeArg) *hadNonTypeArg = true; + break; + case 'X': + { + DemangledTypeNode exprNode = CreateUnknownType(DemangleExpression()); + args.push_back({"", DemangledTypeNode::CreateShared(std::move(exprNode))}); + if (m_reader.Read() != 'E') + throw DemangleException(); + if (hadNonTypeArg) *hadNonTypeArg = true; + break; + } + case 'I': // GCC sometimes uses I...E for argument packs instead of J...E + case 'J': + { + size_t prevTemplateSize = m_templateSubstitute.size(); + DemangleTemplateArgs(args, hadNonTypeArg); + if (m_topLevel && m_templateSubstitute.size() == prevTemplateSize) + PushTemplateType(CreateUnknownType("auto")); + break; + } + case 'T': + if (m_reader.Peek() == 'n') { - size_t prevTemplateSize = m_templateSubstitute.size(); - DemangleTemplateArgs(args); - if (m_topLevel && m_templateSubstitute.size() == prevTemplateSize) - PushTemplateType(CreateUnknownType("auto")); - break; - } - default: - m_reader.UnRead(); + // ::= + // ::= Tn # non-type parameter + // + // The declaration names a synthetic non-type template parameter + // for the following argument. Binary Ninja does not print those + // synthetic parameter names, so consume the declaration type and + // keep only the actual following template argument. + m_reader.Consume(); topLevel = m_topLevel; m_topLevel = false; - tmp = DemangleType(); + DemangleType(); m_topLevel = topLevel; - args.push_back(tmp.GetString()); - tmpValid = true; - } - if (m_topLevel && tmpValid) - { - MyLogDebug("Adding template ref: %s\n", tmp.GetString().c_str()); - PushTemplateType(tmp); + return DemangleTemplateArg(args, hadNonTypeArg); } + [[fallthrough]]; + default: + m_reader.UnRead(); + topLevel = m_topLevel; + m_topLevel = false; + tmp = DemangleType(); + m_topLevel = topLevel; + tmpRef = DemangledTypeNode::CreateShared(std::move(tmp)); + args.push_back({"", tmpRef}); + tmpValid = true; + } + if (m_topLevel && tmpValid) + { + MyLogDebug("Adding template ref: %s\n", tmpRef ? tmpRef->GetString().c_str() : ""); + PushTemplateType(tmpRef); + } + return true; +} + + +void DemangleGNU3::DemangleTemplateArgs(ParamList& args, bool* hadNonTypeArg) +{ + NestingGuard nestingGuard(*this); + indent(); + MyLogDebug("%s:: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); + const string lastName = m_lastName; + while (m_reader.Peek() != 'E') + { + if (!DemangleTemplateArg(args, hadNonTypeArg)) + break; } m_reader.Consume(); m_lastName = lastName; @@ -1993,8 +2231,9 @@ void DemangleGNU3::DemangleTemplateArgs(vector& args, bool* hadNonTypeAr } -DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) +DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs, bool pushBareTemplatePrefix) { + NestingGuard nestingGuard(*this); /* This can be either a qualified name like: "foo::bar::bas" or it can be a qualified type like: "foo::bar::bas & const" thus we return either @@ -2027,7 +2266,7 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) indent(); MyLogDebug("%s:: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); - DemangledTypeNode type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, QualifiedName()); + DemangledTypeNode type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{}); bool cnst = false, vltl = false, rstrct = false; bool ref = false; bool rvalueRef = false; @@ -2073,7 +2312,7 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) { if (!base) throw DemangleException(); - vector args; + ParamList args; bool hadNonType = false; DemangleTemplateArgs(args, allTypeTemplateArgs ? &hadNonType : nullptr); if (allTypeTemplateArgs) @@ -2084,8 +2323,7 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) // type parsing itself where m_permitForwardTemplateRefs is true). if (!m_permitForwardTemplateRefs) ResolveForwardTemplateRefs(type, args); - ExtendTypeName(type, GetTemplateString(args)); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); isTemplate = true; break; } @@ -2099,16 +2337,8 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) if (!isTemplate) { type.SetNameType(newType.GetNameType()); - auto aNames = type.GetTypeName(); - auto bNames = newType.GetTypeName(); - _STD_VECTOR<_STD_STRING> newName; - newName.reserve(aNames.size() + bNames.size()); - newName.insert(newName.end(), aNames.begin(), aNames.end()); - newName.insert(newName.end(), bNames.begin(), bNames.end()); - if (TotalStringSize(newName) > MAX_DEMANGLE_LENGTH) - throw DemangleException("Detected adversarial mangled string"); - type.SetNTR(type.GetNTRClass(), newName); - type.SetHasTemplateArguments(false); + AppendTypeName(type, newType); + type.SetNTRType(newType.GetNTRClass()); } // Consume any ABI tags (B ) following this name component. // These appear as suffixes on in the Itanium ABI: @@ -2123,11 +2353,12 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) string savedLastName = m_lastName; string abiTag = DemangleSourceName(); m_lastName = savedLastName; - auto& segs = type.GetMutableTypeName(); - if (!segs.empty()) - segs.back() += "[abi:" + abiTag + "]"; + ExtendTypeName(type, "[abi:" + abiTag + "]"); } - if (substitute && m_reader.Peek() != 'E') + bool dependentTemplatePrefix = !pushBareTemplatePrefix && m_reader.Peek() == 'I' && + LastTypeNameSegmentBase(type) == "basic_ostream" && + TemplateArgsReferenceTemplateParam(m_reader.PeekString(m_reader.Length())); + if (substitute && m_reader.Peek() != 'E' && !dependentTemplatePrefix) { //Those template arguments were not the primary arguments so clear them from the sub listType while (m_templateSubstitute.size() > startSize) @@ -2160,10 +2391,11 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) DemangledTypeNode DemangleGNU3::DemangleLocalName() { + NestingGuard nestingGuard(*this); indent(); MyLogDebug("%s '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); DemangledTypeNode type; - QualifiedName varName; + StringList varName; // The local function has its own template scope. Save the outer template // substitution table and set m_topLevel = true so that when the local // function's template args are parsed (e.g. handleMessageDelayed), @@ -2177,11 +2409,9 @@ DemangledTypeNode DemangleGNU3::DemangleLocalName() m_inLocalName = true; type = DemangleSymbol(varName); m_inLocalName = savedInLocalName; - m_topLevel = oldTopLevel; - m_templateSubstitute = std::move(savedTemplateSubstitute); if (varName.size() > 0) - varName.back() += (type.GetStringAfterName()); + varName.back() += type.GetStringAfterName(m_platform.GetPtr()); else varName.push_back(type.GetString()); @@ -2199,23 +2429,20 @@ DemangledTypeNode DemangleGNU3::DemangleLocalName() // DemangledTypeNode tmpType = DemangleName(); type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, varName); - auto aNames = type.GetTypeName(); - auto bNames = tmpType.GetTypeName(); - _STD_VECTOR<_STD_STRING> newName; - newName.reserve(aNames.size() + bNames.size()); - newName.insert(newName.end(), aNames.begin(), aNames.end()); - newName.insert(newName.end(), bNames.begin(), bNames.end()); - if (TotalStringSize(newName) > MAX_DEMANGLE_LENGTH) - throw DemangleException("Detected adversarial mangled string"); - type.SetTypeName(std::move(newName)); + AppendTypeName(type, tmpType); + type.SetNTRType(tmpType.GetNTRClass()); type.SetConst(tmpType.IsConst()); type.SetVolatile(tmpType.IsVolatile()); - type.SetPointerSuffix(tmpType.GetPointerSuffix()); + type.SetPointerSuffixBits(tmpType.GetPointerSuffixBits()); + m_templateSubstitute = std::move(savedTemplateSubstitute); + m_topLevel = oldTopLevel; } else { m_reader.Consume(); type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, varName); + m_templateSubstitute = std::move(savedTemplateSubstitute); + m_topLevel = oldTopLevel; } // [] //TODO: What do we do with discriminators? @@ -2241,6 +2468,7 @@ DemangledTypeNode DemangleGNU3::DemangleLocalName() DemangledTypeNode DemangleGNU3::DemangleName() { + NestingGuard nestingGuard(*this); indent(); MyLogDebug("%s '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); /* @@ -2264,9 +2492,9 @@ DemangledTypeNode DemangleGNU3::DemangleName() { m_reader.Consume(1); type = DemangleUnqualifiedName(); - auto qn = type.GetTypeName(); - qn.insert(qn.begin(), "std"); - type.SetTypeName(std::move(qn)); + auto qn = CopyQualifiedName(type); + qn.insert(qn.begin(), DemangledNamePart("std")); + type.SetName(std::move(qn)); substitute = true; } else @@ -2279,10 +2507,9 @@ DemangledTypeNode DemangleGNU3::DemangleName() m_reader.Consume(); if (substitute) PushType(type); - vector args; + ParamList args; DemangleTemplateArgs(args); - ExtendTypeName(type, GetTemplateString(args)); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); // Push the template instantiation (e.g. std::swap) so that the // substitution table matches what the encoder built. The encoder adds // both the unscoped-template-name (prefix, already pushed above) and @@ -2316,14 +2543,11 @@ DemangledTypeNode DemangleGNU3::DemangleName() { PushType(type); // - vector args; + ParamList args; m_reader.Consume(); // DemangleTemplateArgs(args); - LogDebug("Typename: %s", type.GetTypeName()[0].c_str()); - ExtendTypeName(type, GetTemplateString(args)); - LogDebug("Typename: %s", type.GetTypeName()[0].c_str()); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); } } dedent(); @@ -2331,17 +2555,19 @@ DemangledTypeNode DemangleGNU3::DemangleName() } -DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) +DemangledTypeNode DemangleGNU3::DemangleSymbol(StringList& varName) { + NestingGuard nestingGuard(*this); indent(); MyLogDebug("%s: %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); DemangledTypeNode returnType; + NodeRef returnTypeRef = nullptr; bool isReturnTypeUnknown = false; DemangledTypeNode type; ParamList params; bool cnst = false, vltl = false, rstrct = false; bool oldTopLevel; - QualifiedName name; + StringList name; /* ::= @@ -2387,7 +2613,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{name.GetString() + " [transaction clone]" + t.GetStringAfterName()}); + StringList{JoinNameSegments(name) + " [transaction clone]" + t.GetStringAfterName(m_platform.GetPtr())}); } case 'V': { @@ -2437,7 +2663,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) { // Guard variable (original behavior) DemangledTypeNode t = DemangleSymbol(name); - varName.push_back("guard_variable_for_" + t.GetTypeAndName(name)); + varName.push_back("guard_variable_for_" + t.GetTypeAndName(name, m_platform.GetPtr())); type = DemangledTypeNode::IntegerType(1, false); if (m_reader.Length() == 0) return type; @@ -2576,7 +2802,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) annotation += ']'; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{routineName + annotation}); + StringList{routineName + annotation}); } default: throw DemangleException(); @@ -2628,7 +2854,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"covariant_return_thunk_to_" + name.GetString() + t.GetStringAfterName()}); + StringList{"covariant_return_thunk_to_" + JoinNameSegments(name) + t.GetStringAfterName(m_platform.GetPtr())}); } case 'C': { @@ -2638,7 +2864,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) throw DemangleException(); return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"construction_vtable_for_" + DemangleTypeString() + "-in-" + t.GetString()}); + StringList{"construction_vtable_for_" + DemangleTypeString() + "-in-" + t.GetString()}); } case 'D': LogWarn("Unsupported: 'typeinfo common proxy'\n"); @@ -2656,7 +2882,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"non-virtual_thunk_to_" + name.GetString() + t.GetStringAfterName()}); + StringList{"non-virtual_thunk_to_" + JoinNameSegments(name) + t.GetStringAfterName(m_platform.GetPtr())}); } case 'H': // TLS init function { @@ -2665,11 +2891,11 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"tls_init_function_for_" + t.GetTypeAndName(name)}); + StringList{"tls_init_function_for_" + t.GetTypeAndName(name, m_platform.GetPtr())}); } case 'I': return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"typeinfo_for_" + DemangleTypeString()}); + StringList{"typeinfo_for_" + DemangleTypeString()}); case 'J': LogWarn("Unsupported: 'java class'\n"); throw DemangleException(); @@ -2684,7 +2910,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) { DemangledTypeNode t = DemangleType(); return DemangledTypeNode::NamedType(StructNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"VTT_for_" + t.GetString()}); + StringList{"VTT_for_" + t.GetString()}); } case 'v': // virtual thunk { @@ -2699,11 +2925,11 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"virtual_thunk_to_" + name.GetString() + t.GetStringAfterName()}); + StringList{"virtual_thunk_to_" + JoinNameSegments(name) + t.GetStringAfterName(m_platform.GetPtr())}); } case 'V': //Vtable return DemangledTypeNode::NamedType(StructNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"vtable_for_" + DemangleTypeString()}); + StringList{"vtable_for_" + DemangleTypeString()}); case 'W': // TLS wrapper function { oldTopLevel = m_topLevel; @@ -2711,7 +2937,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"tls_wrapper_function_for_" + t.GetTypeAndName(name)}); + StringList{"tls_wrapper_function_for_" + t.GetTypeAndName(name, m_platform.GetPtr())}); } default: throw DemangleException(); @@ -2732,10 +2958,9 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) return type; } - varName = type.GetTypeName(); cnst = type.IsConst(); vltl = type.IsVolatile(); - auto suffix = type.GetPointerSuffix(); + auto suffix = type.GetPointerSuffixBits(); if (m_reader.Peek() == 'J') { m_reader.Consume(); @@ -2750,24 +2975,25 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) string savedLastName = m_lastName; string abiTag = DemangleSourceName(); m_lastName = savedLastName; - auto& segs = type.GetMutableTypeName(); - if (!segs.empty()) - segs.back() += "[abi:" + abiTag + "]"; + ExtendTypeName(type, "[abi:" + abiTag + "]"); } + const bool nameRequiresReturnType = m_isParameter || LastTypeNameSegmentHasTemplateArguments(type); + varName = type.RenderTypeNameSegments(m_platform.GetPtr()); if (m_isOperatorOverload || type.GetNameType() == ConstructorNameType || type.GetNameType() == DestructorNameType) { returnType = DemangledTypeNode::VoidType(); } - else if (m_isParameter || type.HasTemplateArguments()) + else if (nameRequiresReturnType) { returnType = DemangleType(); + returnTypeRef = m_lastTypeRef; } else { isReturnTypeUnknown = true; - returnType = DemangledTypeNode::IntegerType(m_arch->GetAddressSize(), true); + returnType = DemangledTypeNode::AddressSizedIntegerType(true); } m_functionSubstitute.push_back({}); @@ -2799,6 +3025,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) if (m_reader.PeekString(2) == "@@") break; DemangledTypeNode param = DemangleType(); + NodeRef paramRef = m_lastTypeRef; if (param.GetClass() == VoidTypeClass) { if (m_reader.Peek() == 'E') @@ -2808,9 +3035,11 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) } break; } - m_functionSubstitute.back().push_back(param); bool isVarArgs = param.GetClass() == VarArgsTypeClass; - params.push_back({"", std::make_shared(std::move(param))}); + if (!paramRef) + paramRef = DemangledTypeNode::CreateShared(std::move(param)); + m_functionSubstitute.back().push_back(paramRef); + params.push_back({"", paramRef}); if (isVarArgs) { if (m_reader.Peek() == 'E') @@ -2824,19 +3053,21 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) m_functionSubstitute.pop_back(); m_isParameter = false; - type = DemangledTypeNode::FunctionType(std::move(returnType), nullptr, std::move(params)); + if (!returnTypeRef) + returnTypeRef = DemangledTypeNode::CreateShared(std::move(returnType)); + type = DemangledTypeNode::FunctionType(returnTypeRef, nullptr, std::move(params)); if (isReturnTypeUnknown) type.SetReturnTypeConfidence(BN_MINIMUM_CONFIDENCE); - type.SetPointerSuffix(suffix); + type.SetPointerSuffixBits(suffix); type.SetConst(cnst); type.SetVolatile(vltl); if (rstrct) - type.SetPointerSuffix({RestrictSuffix}); + type.SetPointerSuffixBits(1u << RestrictSuffix); // PrintTables(); - MyLogDebug("Done: %s%s%s\n", type.GetStringBeforeName().c_str(), varName.GetString().c_str(), - type.GetStringAfterName().c_str()); + MyLogDebug("Done: %s%s%s\n", type.GetStringBeforeName(m_platform.GetPtr()).c_str(), JoinNameSegments(varName).c_str(), + type.GetStringAfterName(m_platform.GetPtr()).c_str()); dedent(); return type; @@ -2892,121 +3123,146 @@ bool DemangleGNU3Static::DemangleGlobalHeader(string& name, string& header) } -bool DemangleGNU3Static::DemangleStringGNU3(Architecture* arch, const string& name, Ref& outType, QualifiedName& outVarName) +namespace { - // Handle _block_invoke[.N] and _block_invoke_N suffixes (Clang/Apple block invocations). - // E.g. ____ZN4dyld5_mainEPK12macho_headermiPPKcS5_S5_Pm_block_invoke.110 - // -> "invocation_function_for_block_in_dyld::_main(...)" - static const string blockInvokeSuffix = "_block_invoke"; - size_t blockPos = name.rfind(blockInvokeSuffix); - if (blockPos != string::npos) - { - // Verify the suffix is _block_invoke optionally followed by [._] only - string tail = name.substr(blockPos + blockInvokeSuffix.size()); - bool validSuffix = tail.empty(); - if (!validSuffix && (tail[0] == '.' || tail[0] == '_')) - { - size_t i = 1; - while (i < tail.size() && isdigit((unsigned char)tail[i])) - i++; - validSuffix = (i == tail.size() && i > 1); - } - if (validSuffix) - { - // Extract the base symbol: everything before _block_invoke - string base = name.substr(0, blockPos); - // Normalize leading underscores: find 'Z' after underscores, keep one '_' before it - size_t zPos = base.find_first_not_of('_'); - if (zPos != string::npos && base[zPos] == 'Z') + static bool DemangleStringGNU3Segments( + Platform* platform, const string& name, Ref& outType, StringList& outVarName) + { + // Handle _block_invoke[.N] and _block_invoke_N suffixes (Clang/Apple block invocations). + // E.g. ____ZN4dyld5_mainEPK12macho_headermiPPKcS5_S5_Pm_block_invoke.110 + // -> "invocation_function_for_block_in_dyld::_main(...)" + static const string blockInvokeSuffix = "_block_invoke"; + size_t blockPos = name.rfind(blockInvokeSuffix); + if (blockPos != string::npos) + { + // Verify the suffix is _block_invoke optionally followed by [._] only + string tail = name.substr(blockPos + blockInvokeSuffix.size()); + bool validSuffix = tail.empty(); + if (!validSuffix && (tail[0] == '.' || tail[0] == '_')) + { + size_t i = 1; + while (i < tail.size() && isdigit((unsigned char)tail[i])) + i++; + validSuffix = (i == tail.size() && i > 1); + } + if (validSuffix) { - string normalized = "_" + base.substr(zPos); - Ref baseType; - QualifiedName baseName; - if (DemangleStringGNU3(arch, normalized, baseType, baseName)) + // Extract the base symbol: everything before _block_invoke + string base = name.substr(0, blockPos); + // Normalize leading underscores: find 'Z' after underscores, keep one '_' before it + size_t zPos = base.find_first_not_of('_'); + if (zPos != string::npos && base[zPos] == 'Z') { - outVarName.clear(); - outVarName.push_back("invocation_function_for_block_in_" + baseName.GetString()); - outType = baseType; - return true; + string normalized = "_" + base.substr(zPos); + Ref baseType; + StringList baseName; + if (DemangleStringGNU3Segments(platform, normalized, baseType, baseName)) + { + outVarName.clear(); + outVarName.push_back("invocation_function_for_block_in_" + JoinNameSegments(baseName)); + outType = baseType; + return true; + } } } } - } - // Handle macOS thread-local variable initializer suffix: $tlv$init - // E.g. __ZL9recursive$tlv$init -> demangle "__ZL9recursive" then annotate. - static const string tlvInitSuffix = "$tlv$init"; - if (name.size() > tlvInitSuffix.size() && - name.compare(name.size() - tlvInitSuffix.size(), tlvInitSuffix.size(), tlvInitSuffix) == 0) - { - string base = name.substr(0, name.size() - tlvInitSuffix.size()); - Ref baseType; - QualifiedName baseName; - if (DemangleStringGNU3(arch, base, baseType, baseName)) + // Handle macOS thread-local variable initializer suffix: $tlv$init + // E.g. __ZL9recursive$tlv$init -> demangle "__ZL9recursive" then annotate. + static const string tlvInitSuffix = "$tlv$init"; + if (name.size() > tlvInitSuffix.size() && + name.compare(name.size() - tlvInitSuffix.size(), tlvInitSuffix.size(), tlvInitSuffix) == 0) { - outVarName = baseName; - if (outVarName.size() > 0) - outVarName[outVarName.size() - 1] += "$tlv$init"; - else - outVarName.push_back("$tlv$init"); - outType = baseType; - return true; + string base = name.substr(0, name.size() - tlvInitSuffix.size()); + Ref baseType; + StringList baseName; + if (DemangleStringGNU3Segments(platform, base, baseType, baseName)) + { + outVarName = std::move(baseName); + if (outVarName.size() > 0) + outVarName[outVarName.size() - 1] += "$tlv$init"; + else + outVarName.push_back("$tlv$init"); + outType = baseType; + return true; + } } - } - string encoding = name; - string header; - bool foundHeader = DemangleGlobalHeader(encoding, header); - - if (!encoding.compare(0, 2, "_Z")) - encoding = encoding.substr(2); - else if (!encoding.compare(0, 3, "__Z")) - encoding = encoding.substr(3); - else if (foundHeader && !header.empty()) - { - outVarName.clear(); - outVarName.push_back(header); - outVarName.push_back(encoding); - outType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, outVarName).Finalize(); - return true; - } - else - return false; + string encoding = name; + string header; + bool foundHeader = DemangleGNU3Static::DemangleGlobalHeader(encoding, header); - thread_local DemangleGNU3 demangle(arch, encoding); - demangle.Reset(arch, encoding); - try - { - outType = demangle.DemangleSymbol(outVarName).Finalize(); + if (!encoding.compare(0, 2, "_Z")) + encoding = encoding.substr(2); + else if (!encoding.compare(0, 3, "__Z")) + encoding = encoding.substr(3); + else if (foundHeader && !header.empty()) + { + outVarName.clear(); + outVarName.push_back(header); + outVarName.push_back(encoding); + outType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, outVarName).Finalize(platform); + return true; + } + else + return false; - if (outVarName.size() == 0) + thread_local DemangleGNU3 demangle(platform, encoding); + demangle.Reset(platform, encoding); + try { - if (outType->GetClass() == NamedTypeReferenceClass && outType->GetNamedTypeReference()->GetTypeReferenceClass() == UnknownNamedTypeClass) - { - outVarName = outType->GetTypeName(); - outType = nullptr; - } - else if (outType->GetClass() == NamedTypeReferenceClass) + outType = demangle.DemangleSymbol(outVarName).Finalize(platform); + + if (outVarName.size() == 0) { - auto typeName = outType->GetTypeName(); - if (typeName.size() > 0) - outVarName = "_" + typeName[typeName.size() - 1]; + if (GetFinalizedTypeClass(outType) == NamedTypeReferenceClass && + outType->GetNamedTypeReference()->GetTypeReferenceClass() == UnknownNamedTypeClass) + { + const auto typeName = outType->GetTypeName(); + outVarName = StringList(typeName.begin(), typeName.end()); + outType = nullptr; + } + else if (GetFinalizedTypeClass(outType) == NamedTypeReferenceClass) + { + auto typeName = outType->GetTypeName(); + if (typeName.size() > 0) + outVarName = StringList{"_" + typeName[typeName.size() - 1]}; + } } - } - if (foundHeader && !header.empty()) + if (foundHeader && !header.empty()) + outVarName.insert(outVarName.begin(), header); + } + catch (const DemangleException&) { - outVarName.insert(outVarName.begin(), header); + return false; } + return true; } - catch (std::exception&) - { +} + + +bool DemangleGNU3Static::DemangleStringGNU3(Platform* platform, const string& name, Ref& outType, + QualifiedName& outVarName) +{ + StringList outVarNameSegments; + if (!DemangleStringGNU3Segments(platform, name, outType, outVarNameSegments)) return false; - } + outVarName = QualifiedName(outVarNameSegments); return true; } +bool DemangleGNU3Static::DemangleStringGNU3(Architecture* arch, const string& name, Ref& outType, + QualifiedName& outVarName) +{ + Ref platform; + if (arch) + platform = arch->GetStandalonePlatform(); + return DemangleStringGNU3(platform.GetPtr(), name, outType, outVarName); +} + + // ===== Explicit template instantiation ===== @@ -3033,6 +3289,16 @@ class GNU3Demangler: public Demangler Ref view) override #endif { + if (view) + { + auto platform = view->GetDefaultPlatform(); + if (platform) +#ifdef BINARYNINJACORE_LIBRARY + return DemangleGNU3Static::DemangleStringGNU3(platform, name, outType, outVarName); +#else + return DemangleGNU3Static::DemangleStringGNU3(platform.GetPtr(), name, outType, outVarName); +#endif + } return DemangleGNU3Static::DemangleStringGNU3(arch, name, outType, outVarName); } }; diff --git a/demangler/gnu3/demangle_gnu3.h b/demangler/gnu3/demangle_gnu3.h index 8ee0fca624..94e9a36326 100644 --- a/demangler/gnu3/demangle_gnu3.h +++ b/demangler/gnu3/demangle_gnu3.h @@ -94,13 +94,15 @@ class DemangleGNU3Reader class DemangleGNU3 { using ParamList = _STD_VECTOR; + using NodeRef = DemangledTypeNode::NodeRef; + using NodeRefList = _STD_VECTOR; - BN::QualifiedName m_varName; DemangleGNU3Reader m_reader; - BN::Architecture* m_arch; - _STD_VECTOR m_substitute; - _STD_VECTOR m_templateSubstitute; - _STD_VECTOR<_STD_VECTOR> m_functionSubstitute; + BN::Ref m_platform; + NodeRefList m_substitute; + NodeRefList m_templateSubstitute; + _STD_VECTOR m_functionSubstitute; + NodeRef m_lastTypeRef; _STD_STRING m_lastName; BNNameType m_nameType; bool m_localType; @@ -109,19 +111,32 @@ class DemangleGNU3 bool m_shouldDeleteReader; bool m_topLevel; bool m_isOperatorOverload; + bool m_parsingLambdaParams; + size_t m_lambdaTemplateParamBase; // Forward template reference support (for cv conversion operator types). // When m_permitForwardTemplateRefs is true, DemangleTemplateSubstitution() - // returns a placeholder instead of throwing for out-of-bounds template params. - // m_pendingForwardRefs records which param indices have placeholders so that - // ResolveForwardTemplateRefs() can patch them once template args are known. + // returns a shared placeholder node instead of throwing for out-of-bounds + // template params. m_pendingForwardRefs records those nodes so that + // ResolveForwardTemplateRefs() can replace their contents once args are known. bool m_permitForwardTemplateRefs; bool m_inLocalName; - struct ForwardRef { size_t index; }; + size_t m_nestingDepth; + struct ForwardRef + { + size_t index; + NodeRef typeRef; + }; _STD_VECTOR m_pendingForwardRefs; - void ResolveForwardTemplateRefs(DemangledTypeNode& type, const _STD_VECTOR<_STD_STRING>& args); - static _STD_STRING ForwardRefPlaceholder(size_t index); + class NestingGuard + { + DemangleGNU3& m_demangler; + public: + NestingGuard(DemangleGNU3& demangler); + ~NestingGuard(); + }; + void ResolveForwardTemplateRefs(DemangledTypeNode& type, const ParamList& args); enum SymbolType { Function, FunctionWithReturn, Data, VTable, Rtti, Name}; - BN::QualifiedName DemangleBaseUnresolvedName(); + StringList DemangleBaseUnresolvedName(); DemangledTypeNode DemangleUnresolvedType(); _STD_STRING DemangleUnarySuffixExpression(const _STD_STRING& op); _STD_STRING DemangleUnaryPrefixExpression(const _STD_STRING& op); @@ -138,20 +153,30 @@ class DemangleGNU3 DemangledTypeNode DemangleLocalName(); void DemangleCVQualifiers(bool& cnst, bool& vltl, bool& rstrct); - DemangledTypeNode DemangleSubstitution(); - DemangledTypeNode DemangleTemplateSubstitution(); - void DemangleTemplateArgs(_STD_VECTOR<_STD_STRING>& args, bool* hadNonTypeArg = nullptr); + DemangledTypeNode DemangleSubstitution(NodeRef* outTypeRef = nullptr); + DemangledTypeNode DemangleTemplateSubstitution(NodeRef* outTypeRef = nullptr); + bool DemangleTemplateArg(ParamList& args, bool* hadNonTypeArg = nullptr); + void DemangleTemplateArgs(ParamList& args, bool* hadNonTypeArg = nullptr); DemangledTypeNode DemangleFunction(bool cnst, bool vltl); DemangledTypeNode DemangleType(); int64_t DemangleNumber(); - DemangledTypeNode DemangleNestedName(bool* allTypeTemplateArgs = nullptr); - void PushTemplateType(const DemangledTypeNode& type); - void PushType(const DemangledTypeNode& type); + DemangledTypeNode DemangleNestedName(bool* allTypeTemplateArgs = nullptr, bool pushBareTemplatePrefix = true); + NodeRef PushTemplateType(NodeRef type); + NodeRef PushTemplateType(const DemangledTypeNode& type); + NodeRef PushTemplateType(DemangledTypeNode&& type); + NodeRef PushType(NodeRef type); + NodeRef PushType(const DemangledTypeNode& type); + NodeRef PushType(DemangledTypeNode&& type); + NodeRef GetTypeRef(size_t ref); const DemangledTypeNode& GetType(size_t ref); - DemangledTypeNode CreateUnknownType(const BN::QualifiedName& s); + DemangledTypeNode CreateUnknownType(const StringList& s); DemangledTypeNode CreateUnknownType(const _STD_STRING& s); static void ExtendTypeName(DemangledTypeNode& type, const _STD_STRING& extend); + static void ApplyTemplateArgs(DemangledTypeNode& type, ParamList args); + static void AppendTypeName(DemangledTypeNode& type, const DemangledTypeNode& extend); + static _STD_STRING LastTypeNameSegmentBase(const DemangledTypeNode& type); + static bool LastTypeNameSegmentHasTemplateArguments(const DemangledTypeNode& type); #ifdef GNUDEMANGLE_DEBUG const DemangledTypeNode& GetTemplateType(size_t ref); @@ -159,10 +184,9 @@ class DemangleGNU3 #endif public: - DemangleGNU3(BN::Architecture* arch, const _STD_STRING& mangledName); - void Reset(BN::Architecture* arch, const _STD_STRING& mangledName); - DemangledTypeNode DemangleSymbol(BN::QualifiedName& varName); - BN::QualifiedName GetVarName() const { return m_varName; } + DemangleGNU3(BN::Platform* platform, const _STD_STRING& mangledName); + void Reset(BN::Platform* platform, const _STD_STRING& mangledName); + DemangledTypeNode DemangleSymbol(StringList& varName); }; @@ -172,5 +196,6 @@ class DemangleGNU3Static static bool IsGNU3MangledString(const _STD_STRING& name); static bool DemangleGlobalHeader(_STD_STRING& name, _STD_STRING& header); + static bool DemangleStringGNU3(BN::Platform* platform, const _STD_STRING& name, BN::Ref& outType, BN::QualifiedName& outVarName); static bool DemangleStringGNU3(BN::Architecture* arch, const _STD_STRING& name, BN::Ref& outType, BN::QualifiedName& outVarName); };