From cbce88d00547cd2dded64e18eb9208fc1a417911 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 5 Jun 2026 11:12:55 +0200 Subject: [PATCH] Fix memory explosion in construction of TypeAssignmentGraph while computing getPointerIndicesOfType() --- .../phasar/PhasarLLVM/Utils/LLVMShorthands.h | 31 +++++ .../ControlFlow/VTA/TypeAssignmentGraph.cpp | 120 +----------------- lib/PhasarLLVM/Utils/LLVMShorthands.cpp | 112 ++++++++++++++++ 3 files changed, 148 insertions(+), 115 deletions(-) diff --git a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h index 0377bd5e83..9841805160 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h @@ -17,15 +17,20 @@ #ifndef PHASAR_PHASARLLVM_UTILS_LLVMSHORTHANDS_H #define PHASAR_PHASARLLVM_UTILS_LLVMSHORTHANDS_H +#include "phasar/Utils/BitSet.h" #include "phasar/Utils/Utilities.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" #include +#include #include namespace llvm { @@ -42,6 +47,8 @@ class CallInst; class AllocaInst; class DIType; class DIDerivedType; +class DICompositeType; +class DataLayout; } // namespace llvm namespace psr { @@ -343,6 +350,30 @@ inline const llvm::Function *getFunction(const llvm::Instruction *Inst) { return Inst->getFunction(); } +const llvm::DIType *stripMemberAndTypedef(const llvm::DIType *Ty); + +inline bool isPointerTy(const llvm::DIType *Ty) { + if (const auto *DerivedTy = + llvm::dyn_cast(stripMemberAndTypedef(Ty))) { + return DerivedTy->getTag() == llvm::dwarf::DW_TAG_pointer_type || + DerivedTy->getTag() == llvm::dwarf::DW_TAG_reference_type; + } + return false; +} + +struct PointerIndicesCache { + std::unordered_map> + Cache; +}; + +[[nodiscard]] BitSet +getPointerIndicesOfType(const llvm::DIType *Ty, const llvm::DataLayout &DL); + +[[nodiscard]] BitSet & +getPointerIndicesOfType(const llvm::DIType *Ty, const llvm::DataLayout &DL, + PointerIndicesCache &PIC); + /** * Retrieves String annotation value as per * diff --git a/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp index f337da1060..7da9f66019 100644 --- a/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp +++ b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp @@ -56,118 +56,6 @@ void vta::printNode(llvm::raw_ostream &OS, TAGNode TN) { std::visit([&OS](auto Nod) { printNodeImpl(OS, Nod); }, TN.Label); } -static const llvm::DIType *stripMemberAndTypedef(const llvm::DIType *Ty) { - while (const auto *DerivedTy = llvm::dyn_cast(Ty)) { - if (DerivedTy->getTag() == llvm::dwarf::DW_TAG_typedef || - DerivedTy->getTag() == llvm::dwarf::DW_TAG_member) { - Ty = DerivedTy->getBaseType(); - continue; - } - break; - } - return Ty; -} - -static bool isPointerTy(const llvm::DIType *Ty) { - if (const auto *DerivedTy = - llvm::dyn_cast(stripMemberAndTypedef(Ty))) { - return DerivedTy->getTag() == llvm::dwarf::DW_TAG_pointer_type || - DerivedTy->getTag() == llvm::dwarf::DW_TAG_reference_type; - } - return false; -} - -static const llvm::DICompositeType *isCompositeTy(const llvm::DIType *Ty) { - return llvm::dyn_cast(stripMemberAndTypedef(Ty)); -} - -static llvm::SmallBitVector -getPointerIndicesOfType(llvm::DICompositeType *Ty, const llvm::DataLayout &DL) { - llvm::SmallBitVector Ret; - - auto PointerSize = DL.getPointerSizeInBits(); - - // XXX: Does every type provide a meaningful getSizeInBits? - auto MaxNumPointers = Ty->getSizeInBits() / PointerSize; - if (!MaxNumPointers) { - return Ret; - } - Ret.resize(MaxNumPointers); - - llvm::SmallVector> WorkList = {{Ty, 0}}; - - while (!WorkList.empty()) { - auto [CurrTy, CurrBitOffs] = WorkList.pop_back_val(); - - if (isPointerTy(CurrTy)) { - size_t Idx = CurrBitOffs / PointerSize; - if (CurrBitOffs % PointerSize) [[unlikely]] { - PHASAR_LOG_LEVEL(WARNING, "Unaligned pointer.."); - } - assert(Ret.size() > Idx && - "reserved unsufficient space for pointer indices"); - Ret.set(Idx); - continue; - } - - const auto *CompTy = isCompositeTy(CurrTy); - if (!CompTy) { - continue; - } - - auto Tag = CompTy->getTag(); - - if (Tag == llvm::dwarf::DW_TAG_array_type) { - auto *ElemTy = CompTy->getBaseType(); - const auto *ArrayLenRange = - llvm::cast(CompTy->getElements()[0]); - auto ArrayLenBound = ArrayLenRange->getCount(); - if (const auto *ArrayLenCInt = - ArrayLenBound.dyn_cast()) { - auto ArrayLen = ArrayLenCInt->getSExtValue(); - // Count is -1 for flexible array members; - if (ArrayLen < 0) { - continue; - } - - auto ElemSize = int64_t(ElemTy->getSizeInBits()); - for (int64_t I = 0, Offs = CurrBitOffs; I < ArrayLen; - ++I, Offs += ElemSize) { - WorkList.emplace_back(ElemTy, Offs); - } - } - - continue; - } - - if (Tag == llvm::dwarf::DW_TAG_structure_type || - Tag == llvm::dwarf::DW_TAG_class_type) { - - auto Elems = CompTy->getElements(); - uint64_t Offs = CurrBitOffs; - for (auto *Elem : Elems) { - auto *ElemTy = llvm::dyn_cast(Elem); - if (!ElemTy) { - continue; - } - - scope_exit IncOffs = [&] { Offs += ElemTy->getSizeInBits(); }; - - if (Elem->getTag() != llvm::dwarf::DW_TAG_inheritance && - Elem->getTag() != llvm::dwarf::DW_TAG_member) { - continue; - } - - WorkList.emplace_back(ElemTy, Offs); - } - - continue; - } - } - - return Ret; -} - static void addTAGNode(TAGNode TN, TypeAssignmentGraph &TAG) { TAG.Nodes.getOrInsert(TN); } @@ -180,12 +68,14 @@ static void addFields(const LLVMProjectIRDB &IRDB, TypeAssignmentGraph &TAG, llvm::DebugInfoFinder DIF; DIF.processModule(*IRDB.getModule()); + PointerIndicesCache PIC{}; + for (auto *DITy : DIF.types()) { if (auto *CompTy = llvm::dyn_cast(DITy)) { - auto Offsets = getPointerIndicesOfType(CompTy, DL); - for (auto Offs : Offsets.set_bits()) { + auto &&Offsets = getPointerIndicesOfType(CompTy, DL, PIC); + Offsets.foreach ([&](uint32_t Offs) { addTAGNode({Field{CompTy, Offs * PointerSize}}, TAG); - } + }); addTAGNode({Field{CompTy, SIZE_MAX}}, TAG); } } diff --git a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp index 7b13dd2194..5abdf627c3 100644 --- a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp +++ b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp @@ -617,6 +617,118 @@ bool psr::isVarAnnotationIntrinsic(const llvm::Function *F) { return F->getName() == KVarAnnotationName; } +const llvm::DIType *psr::stripMemberAndTypedef(const llvm::DIType *Ty) { + while (const auto *DerivedTy = llvm::dyn_cast(Ty)) { + if (DerivedTy->getTag() == llvm::dwarf::DW_TAG_typedef || + DerivedTy->getTag() == llvm::dwarf::DW_TAG_member) { + Ty = DerivedTy->getBaseType(); + continue; + } + break; + } + return Ty; +} + +static const llvm::DICompositeType *isCompositeTy(const llvm::DIType *Ty) { + return llvm::dyn_cast(stripMemberAndTypedef(Ty)); +} + +BitSet & +psr::getPointerIndicesOfType(const llvm::DIType *Ty, const llvm::DataLayout &DL, + PointerIndicesCache &PIC) { + auto [It, Inserted] = PIC.Cache.try_emplace(Ty); + auto &Ret = It->second; + + if (!Inserted) { + return Ret; + } + + // XXX: Does every type provide a meaningful getSizeInBits? + auto PointerSize = DL.getPointerSizeInBits(); + auto MaxNumPointers = Ty->getSizeInBits() / PointerSize; + if (!MaxNumPointers) { + return Ret; + } + + if (isPointerTy(Ty)) { + Ret.insert(0); + return Ret; + } + + const auto *CompTy = isCompositeTy(Ty); + if (!CompTy) { + return Ret; + } + + Ret.reserve(MaxNumPointers); + + auto Tag = CompTy->getTag(); + + if (Tag == llvm::dwarf::DW_TAG_array_type) { + auto *ElemTy = CompTy->getBaseType(); + const auto *ArrayLenRange = + llvm::cast(CompTy->getElements()[0]); + auto ArrayLenBound = ArrayLenRange->getCount(); + if (const auto *ArrayLenCInt = + ArrayLenBound.dyn_cast()) { + auto ArrayLen = ArrayLenCInt->getSExtValue(); + // Count is -1 for flexible array members; + if (ArrayLen < 0) { + return Ret; + } + + auto ElemSize = int64_t(ElemTy->getSizeInBits()); + auto ElemPtrs = getPointerIndicesOfType(ElemTy, DL, PIC); + if (ElemPtrs.empty()) { + return Ret; + } + + // XXX: Optimize: + for (int64_t I = 0, Offs = 0; I < ArrayLen; ++I, Offs += ElemSize) { + ElemPtrs.foreach ([&](uint32_t Idx) { Ret.insert(Offs + Idx); }); + } + } + + return Ret; + } + + if (Tag == llvm::dwarf::DW_TAG_structure_type || + Tag == llvm::dwarf::DW_TAG_class_type) { + + auto Elems = CompTy->getElements(); + uint64_t Offs = 0; + for (auto *Elem : Elems) { + auto *ElemTy = llvm::dyn_cast(Elem); + if (!ElemTy) { + continue; + } + + scope_exit IncOffs = [&] { Offs += ElemTy->getSizeInBits(); }; + + if (Elem->getTag() != llvm::dwarf::DW_TAG_inheritance && + Elem->getTag() != llvm::dwarf::DW_TAG_member) { + continue; + } + + auto &ElemPtrs = getPointerIndicesOfType(ElemTy, DL, PIC); + if (!ElemPtrs.empty()) { + ElemPtrs.foreach ([&](uint32_t Idx) { Ret.insert(Offs + Idx); }); + } + } + return Ret; + } + + // fallback + return Ret; +} + +BitSet +psr::getPointerIndicesOfType(const llvm::DIType *Ty, + const llvm::DataLayout &DL) { + PointerIndicesCache PIC; + return std::move(getPointerIndicesOfType(Ty, DL, PIC)); +} + llvm::StringRef psr::getVarAnnotationIntrinsicName(const llvm::CallInst *CallInst) { const int KPointerGlobalStringIdx = 1;