From 8908155034f8bf654a09e7458a0c189b500dbef5 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 23 Apr 2026 19:40:38 +0200 Subject: [PATCH 01/36] Initial vide-coded impl of andersen analysis with OTF CG construction --- .../phasar/PhasarLLVM/Pointer/AndersenOTFAA.h | 105 +++ include/phasar/Pointer/AliasAnalysisType.def | 1 + lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 598 ++++++++++++++++++ 3 files changed, 704 insertions(+) create mode 100644 include/phasar/PhasarLLVM/Pointer/AndersenOTFAA.h create mode 100644 lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp diff --git a/include/phasar/PhasarLLVM/Pointer/AndersenOTFAA.h b/include/phasar/PhasarLLVM/Pointer/AndersenOTFAA.h new file mode 100644 index 0000000000..ef2bde9325 --- /dev/null +++ b/include/phasar/PhasarLLVM/Pointer/AndersenOTFAA.h @@ -0,0 +1,105 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.h" +#include "phasar/PhasarLLVM/Pointer/LLVMUnionFindAA.h" +#include "phasar/Pointer/RawAliasSet.h" +#include "phasar/Pointer/UnionFindAA.h" +#include "phasar/Utils/MaybeUniquePtr.h" +#include "phasar/Utils/NonNullPtr.h" +#include "phasar/Utils/TypedVector.h" +#include "phasar/Utils/ValueCompressor.h" + +#include "llvm/ADT/ArrayRef.h" + +namespace llvm { +class Function; +} // namespace llvm + +namespace psr { + +class LLVMProjectIRDB; + +/// Alias-analysis result for the Andersen-style OTF points-to analysis. +/// +/// Two values may-alias iff their points-to sets share at least one abstract +/// object. Satisfies \c UnionFindAAResult so it can be wrapped by +/// \c LLVMUnionFindAliasIterator. +struct AndersenOTFResult { + TypedVector> AliasSets; + size_t NumVars{}; + + [[nodiscard]] static constexpr bool isCached() noexcept { return true; } + [[nodiscard]] constexpr size_t size() const noexcept { return NumVars; } + + [[nodiscard]] RawAliasSet + getRawAliasSet(ValueId Var) const noexcept { + if (!AliasSets.inbounds(Var)) { + return {}; + } + return AliasSets[Var]; + } + + [[nodiscard]] bool mayAlias(ValueId Var1, ValueId Var2) const noexcept { + if (Var1 == Var2) { + return true; + } + if (!AliasSets.inbounds(Var1)) { + return false; + } + return AliasSets[Var1].contains(Var2); + } +}; + +static_assert(UnionFindAAResult); + +/// Andersen-style inclusion-based points-to analysis that co-refines the call +/// graph and points-to sets in a single fixpoint. +/// +/// Unlike the staged pipeline (resolver → PA), this solver owns its own +/// function-worklist loop: direct calls add callees immediately; indirect +/// calls are resolved as \c pts(fp) grows. +/// +/// Phase 1: context- and field-insensitive. +class AndersenOTFSolver { +public: + explicit AndersenOTFSolver(const LLVMProjectIRDB &IRDB, + llvm::ArrayRef Entries, + ValueCompressor &VC) noexcept; + + /// Run the full OTF fixpoint and return the alias-analysis result. + [[nodiscard]] AndersenOTFResult solve(); + +private: + struct SolverData; + + NonNullPtr IRDB; + llvm::ArrayRef Entries; + NonNullPtr> VC; +}; + +// ---- Factory functions ------------------------------------------------ + +/// Runs the Andersen OTF fixpoint and returns the raw alias-analysis result +/// (no LLVM-value wrapping). If \p VC is null, a fresh one is allocated. +[[nodiscard]] AndersenOTFResult computeAndersenOTFRaw( + const LLVMProjectIRDB &IRDB, + llvm::ArrayRef EntryPoints, + MaybeUniquePtr> VC = nullptr); + +/// Runs the Andersen OTF fixpoint and returns an \c LLVMUnionFindAliasIterator +/// that implements \c IsLLVMAliasIterator. +[[nodiscard]] LLVMUnionFindAliasIterator +computeAndersenOTF(const LLVMProjectIRDB &IRDB, + llvm::ArrayRef EntryPoints, + MaybeUniquePtr> VC = nullptr); + +} // namespace psr diff --git a/include/phasar/Pointer/AliasAnalysisType.def b/include/phasar/Pointer/AliasAnalysisType.def index 258819b378..c7359ff64c 100644 --- a/include/phasar/Pointer/AliasAnalysisType.def +++ b/include/phasar/Pointer/AliasAnalysisType.def @@ -16,6 +16,7 @@ ALIAS_ANALYSIS_TYPE(CFLSteens, "cflsteens", "Steensgaard-style alias analysis (e ALIAS_ANALYSIS_TYPE(CFLAnders, "cflanders", "Andersen-style alias analysis (subset-based) (default)") ALIAS_ANALYSIS_TYPE(PointsTo, "points-to", "Alias-information based on (external) points-to information") ALIAS_ANALYSIS_TYPE(UnionFind, "union-find", "Steensgaard-style alias analysis based on union-find structures") +ALIAS_ANALYSIS_TYPE(AndersenOTF, "andersen-otf", "Andersen-style inclusion-based on-the-fly points-to analysis") #ifdef PHASAR_USE_SVF ALIAS_ANALYSIS_TYPE(SVFDDA, "svf-dda", "Alias-information based on SVF's ContextDDA analysis. Requires SVF.") diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp new file mode 100644 index 0000000000..b5cacfa29f --- /dev/null +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -0,0 +1,598 @@ +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/PhasarLLVM/Pointer/AndersenOTFAA.h" + +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/IotaIterator.h" +#include "phasar/Utils/UnionFind.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Casting.h" + +#include +#include +#include + +using namespace psr; + +// Sentinel: non-pointer argument slot (no ValueId assigned). +static constexpr ValueId NoArgId = ValueId(UINT32_MAX); + +struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { + // ---- Per-node state ------------------------------------------------- + + struct NodeInfo { + RawAliasSet PtsSet; + // Assignment edges: pts(this) ⊆ pts(dst) for each dst. + llvm::SmallVector AssignDsts; + llvm::SmallDenseSet AssignDstSet; // dedup guard + // Load constraints: dst = *this. + llvm::SmallVector LoadDsts; + // Store constraints: *this = src. + llvm::SmallVector StoreSrcs; + // MemCopy: memcpy(dst_ptr, this=src_ptr). + llvm::SmallVector MemCopyAsSrc; + // MemCopy: memcpy(this=dst_ptr, src_ptr). + llvm::SmallVector MemCopyAsDst; + }; + + struct FPCallRecord { + const llvm::CallBase *CS; + ValueId FPId; + llvm::SmallVector Args; + std::optional CSRetVal; + }; + + // ---- Data fields ---------------------------------------------------- + + const LLVMProjectIRDB &IRDB; // NOLINT + const llvm::DataLayout &DL; // NOLINT + ValueCompressor &VC; // NOLINT + + llvm::SmallVector FunctionWorklist; + llvm::DenseSet Reachable; + llvm::DenseSet Processed; + + UnionFind SCCUf; + TypedVector Nodes; + + llvm::SmallVector UnresolvedFPCalls; + llvm::DenseMap> + ConnectedCallees; + llvm::SmallVector PropWorklist; + + // ---- Constructor ---------------------------------------------------- + + SolverData(const LLVMProjectIRDB &IRDB, + llvm::ArrayRef Entries, + ValueCompressor &VC) + : IRDB(IRDB), DL(IRDB.getModule()->getDataLayout()), VC(VC) { + for (const auto *F : Entries) { + if (Reachable.insert(F).second) { + FunctionWorklist.push_back(F); + } + } + } + + // ---- Node growth ---------------------------------------------------- + + NodeInfo &grow(ValueId V) { + const auto Idx = size_t(V); + if (Idx >= Nodes.size()) { + Nodes.resize(Idx + 1); + SCCUf.grow(Idx + 1); + } + return Nodes[V]; + } + + ValueId getOrInsert(PAGVariable Var) { + auto [Id, Inserted] = VC.insert(Var); + (void)Inserted; + grow(Id); + return Id; + } + + ValueId getOrInsert(const llvm::Value *V) { + return getOrInsert(PAGVariable(V)); + } + + // ---- Operand traversal ---------------------------------------------- + + void forEachOpId(const llvm::Value *V, std::invocable auto Handler) { + V = V->stripPointerCastsAndAliases(); + if (definitelyContainsNoPointer(V)) { + return; + } + + if (!llvm::isa(V)) { + std::invoke(Handler, getOrInsert(V)); + return; + } + + // Walk ConstantExpr chains to find the underlying GlobalObject(s). + llvm::SmallDenseSet Seen = {V}; + llvm::SmallVector WL = { + llvm::cast(V)}; + do { + const auto *Curr = WL.pop_back_val(); + for (const auto *Op : Curr->operand_values()) { + if (definitelyContainsNoPointer(Op) || !Seen.insert(Op).second) { + continue; + } + if (const auto *GObj = llvm::dyn_cast(Op)) { + std::invoke(Handler, getOrInsert(GObj)); + continue; + } + if (const auto *User = llvm::dyn_cast(Op)) { + WL.push_back(User); + } + } + } while (!WL.empty()); + } + + // ---- Constraint insertion ------------------------------------------- + + void addPointee(ValueId Ptr, ValueId Obj) { + auto &PtrNode = grow(Ptr); + (void)grow(Obj); + if (PtrNode.PtsSet.tryInsert(Obj)) { + PropWorklist.push_back(Ptr); + } + } + + void addAssignEdge(ValueId Src, ValueId Dst) { + if (Src == Dst) { + return; + } + auto &SrcNode = grow(Src); + (void)grow(Dst); + if (SrcNode.AssignDstSet.insert(Dst).second) { + SrcNode.AssignDsts.push_back(Dst); + if (!SrcNode.PtsSet.empty()) { + PropWorklist.push_back(Src); + } + } + } + + void addLoad(ValueId Ptr, ValueId Dst) { + auto &PtrNode = grow(Ptr); + (void)grow(Dst); + PtrNode.PtsSet.foreach ([&](ValueId Obj) { addAssignEdge(Obj, Dst); }); + PtrNode.LoadDsts.push_back(Dst); + } + + void addStore(ValueId Ptr, ValueId Src) { + auto &PtrNode = grow(Ptr); + (void)grow(Src); + PtrNode.PtsSet.foreach ([&](ValueId Obj) { addAssignEdge(Src, Obj); }); + PtrNode.StoreSrcs.push_back(Src); + } + + void addMemCopy(ValueId SrcPtr, ValueId DstPtr) { + auto &SrcNode = grow(SrcPtr); + auto &DstNode = grow(DstPtr); + SrcNode.PtsSet.foreach ([&](ValueId O1) { + DstNode.PtsSet.foreach ([&](ValueId O2) { addAssignEdge(O1, O2); }); + }); + SrcNode.MemCopyAsSrc.push_back(DstPtr); + DstNode.MemCopyAsDst.push_back(SrcPtr); + } + + // ---- Propagation ---------------------------------------------------- + + void onNewPointee(ValueId PtrRep, ValueId NewObj) { + assert(Nodes.inbounds(PtrRep)); + const auto &Node = Nodes[PtrRep]; + + for (ValueId Dst : Node.LoadDsts) { + addAssignEdge(NewObj, Dst); + } + for (ValueId Src : Node.StoreSrcs) { + addAssignEdge(Src, NewObj); + } + for (ValueId DstPtr : Node.MemCopyAsSrc) { + if (!Nodes.inbounds(DstPtr)) { + continue; + } + Nodes[DstPtr].PtsSet.foreach ( + [&](ValueId O2) { addAssignEdge(NewObj, O2); }); + } + for (ValueId SrcPtr : Node.MemCopyAsDst) { + if (!Nodes.inbounds(SrcPtr)) { + continue; + } + Nodes[SrcPtr].PtsSet.foreach ( + [&](ValueId O1) { addAssignEdge(O1, NewObj); }); + } + } + + void propagate() { + while (!PropWorklist.empty()) { + const ValueId U = PropWorklist.pop_back_val(); + if (!Nodes.inbounds(U)) { + continue; + } + const auto &UNode = Nodes[U]; + + for (ValueId V : UNode.AssignDsts) { + if (!Nodes.inbounds(V) || V == U) { + continue; + } + auto &VNode = Nodes[V]; + RawAliasSet NewPts = UNode.PtsSet; + NewPts -= VNode.PtsSet; + if (NewPts.empty()) { + continue; + } + VNode.PtsSet |= NewPts; + PropWorklist.push_back(V); + NewPts.foreach ([&](ValueId NewObj) { onNewPointee(V, NewObj); }); + } + } + } + + // ---- IR translation ------------------------------------------------- + + void initGlobals() { + for (const auto &G : IRDB.getModule()->globals()) { + if (definitelyContainsNoPointer(G.getValueType())) { + continue; + } + const ValueId GId = getOrInsert(&G); + addPointee(GId, GId); + } + propagate(); + } + + void processFunction(const llvm::Function *F) { + for (const auto &Arg : F->args()) { + if (!definitelyContainsNoPointer(&Arg)) { + (void)getOrInsert(&Arg); + } + } + for (const auto &I : llvm::instructions(F)) { + processInstruction(I); + } + } + + void processInstruction(const llvm::Instruction &I) { + if (const auto *Alloca = llvm::dyn_cast(&I)) { + const ValueId Id = getOrInsert(Alloca); + addPointee(Id, Id); + return; + } + if (const auto *S = llvm::dyn_cast(&I)) { + handleStore(S); + return; + } + if (const auto *L = llvm::dyn_cast(&I)) { + handleLoad(L); + return; + } + if (const auto *M = llvm::dyn_cast(&I)) { + handleMemTransfer(M); + return; + } + if (const auto *C = llvm::dyn_cast(&I)) { + handleCall(C); + return; + } + if (const auto *R = llvm::dyn_cast(&I)) { + handleReturn(R); + return; + } + if (const auto *P = llvm::dyn_cast(&I)) { + handlePhi(P); + return; + } + if (const auto *S = llvm::dyn_cast(&I)) { + handleSelect(S); + return; + } + + // Casts: alias result to stripped operand (field-insensitive). + if (const auto *Cast = llvm::dyn_cast(&I)) { + if (definitelyContainsNoPointer(Cast)) { + return; + } + forEachOpId(Cast->getOperand(0), [&](ValueId OpId) { + VC.addAlias(Cast, OpId); + grow(OpId); + }); + return; + } + + // GEPs: alias result to base pointer (field-insensitive). + if (const auto *GEP = llvm::dyn_cast(&I)) { + forEachOpId(GEP->getPointerOperand(), [&](ValueId OpId) { + VC.addAlias(GEP, OpId); + grow(OpId); + }); + } + } + + void handleStore(const llvm::StoreInst *S) { + if (definitelyContainsNoPointer(S->getValueOperand())) { + return; + } + forEachOpId(S->getPointerOperand(), [&](ValueId PtrId) { + forEachOpId(S->getValueOperand(), + [&](ValueId ValId) { addStore(PtrId, ValId); }); + }); + } + + void handleLoad(const llvm::LoadInst *L) { + if (definitelyContainsNoPointer(L)) { + return; + } + const ValueId DstId = getOrInsert(L); + forEachOpId(L->getPointerOperand(), + [&](ValueId PtrId) { addLoad(PtrId, DstId); }); + } + + void handleMemTransfer(const llvm::MemTransferInst *M) { + forEachOpId(M->getDest(), [&](ValueId DstPtr) { + forEachOpId(M->getSource(), + [&](ValueId SrcPtr) { addMemCopy(SrcPtr, DstPtr); }); + }); + } + + void handlePhi(const llvm::PHINode *P) { + if (definitelyContainsNoPointer(P)) { + return; + } + const ValueId PhiId = getOrInsert(P); + for (const auto &Inc : P->incoming_values()) { + if (definitelyContainsNoPointer(Inc.get())) { + continue; + } + forEachOpId(Inc.get(), + [&](ValueId IncId) { addAssignEdge(IncId, PhiId); }); + } + } + + void handleSelect(const llvm::SelectInst *S) { + if (definitelyContainsNoPointer(S)) { + return; + } + const ValueId SelId = getOrInsert(S); + const auto *TV = S->getTrueValue(); + const auto *FV = S->getFalseValue(); + if (!definitelyContainsNoPointer(TV)) { + forEachOpId(TV, [&](ValueId Id) { addAssignEdge(Id, SelId); }); + } + if (!definitelyContainsNoPointer(FV)) { + forEachOpId(FV, [&](ValueId Id) { addAssignEdge(Id, SelId); }); + } + } + + void handleReturn(const llvm::ReturnInst *R) { + const auto *RetVal = R->getReturnValue(); + if (!RetVal || definitelyContainsNoPointer(RetVal)) { + return; + } + const ValueId RetSlotId = + getOrInsert(PAGVariable::Return{R->getFunction()}); + forEachOpId(RetVal, + [&](ValueId ValId) { addAssignEdge(ValId, RetSlotId); }); + } + + // ---- Call-graph co-refinement --------------------------------------- + + void connectCallee(const llvm::CallBase *CS, const llvm::Function *Callee, + llvm::ArrayRef Args, + std::optional CSRetVal) { + if (Callee->isDeclaration()) { + return; + } + + const ValueId CalleeId = getOrInsert(Callee); + if (!ConnectedCallees[CS].insert(CalleeId).second) { + return; + } + + if (Reachable.insert(Callee).second) { + FunctionWorklist.push_back(Callee); + } + + if (CSRetVal && !Callee->getReturnType()->isVoidTy()) { + const ValueId RetSlotId = getOrInsert(PAGVariable::Return{Callee}); + addAssignEdge(RetSlotId, *CSRetVal); + } + + for (const auto &[Param, ArgId] : llvm::zip(Callee->args(), Args)) { + if (ArgId == NoArgId || definitelyContainsNoPointer(&Param)) { + continue; + } + addAssignEdge(ArgId, getOrInsert(&Param)); + } + + propagate(); + } + + void handleCall(const llvm::CallBase *C) { + if (C->isInlineAsm()) { + return; + } + + llvm::SmallVector Args; + for (const auto &Arg : C->args()) { + if (definitelyContainsNoPointer(Arg.get())) { + Args.push_back(NoArgId); + continue; + } + ValueId ArgId = NoArgId; + forEachOpId(Arg.get(), [&](ValueId Id) { ArgId = Id; }); + Args.push_back(ArgId); + } + + std::optional CSRetVal; + if (C->getType()->isPointerTy()) { + CSRetVal = getOrInsert(C); + } + + const auto *FnPtr = C->getCalledOperand()->stripPointerCastsAndAliases(); + + if (const auto *Callee = llvm::dyn_cast(FnPtr)) { + connectCallee(C, Callee, Args, CSRetVal); + return; + } + + // Indirect call: connect already-known targets, record for fixpoint. + const ValueId FPId = getOrInsert(FnPtr); + + const auto ConnectKnownTargets = [&]() { + if (!Nodes.inbounds(FPId)) { + return; + } + Nodes[FPId].PtsSet.foreach ([&](ValueId ObjId) { + if (!Nodes.inbounds(ObjId)) { + return; + } + for (const auto &Var : VC.id2vars(ObjId)) { + const auto *Fun = + llvm::dyn_cast_or_null(Var.valueOrNull()); + if (Fun) { + connectCallee(C, Fun, Args, CSRetVal); + } + } + }); + }; + + ConnectKnownTargets(); + UnresolvedFPCalls.push_back(FPCallRecord{ + .CS = C, + .FPId = FPId, + .Args = {Args.begin(), Args.end()}, + .CSRetVal = CSRetVal, + }); + } + + void checkUnresolvedFPCalls() { + for (const auto &Rec : UnresolvedFPCalls) { + if (!Nodes.inbounds(Rec.FPId)) { + continue; + } + Nodes[Rec.FPId].PtsSet.foreach ([&](ValueId ObjId) { + if (!Nodes.inbounds(ObjId)) { + return; + } + for (const auto &Var : VC.id2vars(ObjId)) { + const auto *Fun = + llvm::dyn_cast_or_null(Var.valueOrNull()); + if (Fun) { + connectCallee(Rec.CS, Fun, Rec.Args, Rec.CSRetVal); + } + } + }); + } + } + + // ---- Result construction -------------------------------------------- + + AndersenOTFResult buildResult() { + const size_t NumVars = VC.size(); + AndersenOTFResult Result; + Result.NumVars = NumVars; + + // Reverse map: abstract object → set of values pointing to it. + TypedVector> Obj2Ptrs(NumVars); + for (auto VId : iota(NumVars)) { + if (!Nodes.inbounds(VId)) { + continue; + } + Nodes[VId].PtsSet.foreach ([&](ValueId Obj) { + if (size_t(Obj) < NumVars) { + Obj2Ptrs[Obj].insert(VId); + } + }); + } + + Result.AliasSets.resize(NumVars); + for (auto VId : iota(NumVars)) { + if (!Nodes.inbounds(VId)) { + continue; + } + Nodes[VId].PtsSet.foreach ([&](ValueId Obj) { + if (size_t(Obj) < NumVars) { + Result.AliasSets[VId] |= Obj2Ptrs[Obj]; + } + }); + } + + return Result; + } + + // ---- Main loop ------------------------------------------------------ + + AndersenOTFResult run() { + initGlobals(); + + do { + while (!FunctionWorklist.empty()) { + const auto *F = FunctionWorklist.pop_back_val(); + if (!Processed.insert(F).second) { + continue; + } + processFunction(F); + propagate(); + } + checkUnresolvedFPCalls(); + } while (!FunctionWorklist.empty()); + + return buildResult(); + } +}; + +// ---- AndersenOTFSolver -------------------------------------------------- + +AndersenOTFSolver::AndersenOTFSolver( + const LLVMProjectIRDB &IRDB, llvm::ArrayRef Entries, + ValueCompressor &VC) noexcept + : IRDB(IRDB), Entries(Entries), VC(VC) {} + +AndersenOTFResult AndersenOTFSolver::solve() { + SolverData Impl{*IRDB, Entries, *VC}; + return Impl.run(); +} + +// ---- Factory functions -------------------------------------------------- + +AndersenOTFResult +psr::computeAndersenOTFRaw(const LLVMProjectIRDB &IRDB, + llvm::ArrayRef EntryPoints, + MaybeUniquePtr> VC) { + if (!VC) { + VC = std::make_unique>(); + } + AndersenOTFSolver Solver(IRDB, EntryPoints, *VC); + return Solver.solve(); +} + +LLVMUnionFindAliasIterator +psr::computeAndersenOTF(const LLVMProjectIRDB &IRDB, + llvm::ArrayRef EntryPoints, + MaybeUniquePtr> VC) { + if (!VC) { + VC = std::make_unique>(); + } + AndersenOTFSolver Solver(IRDB, EntryPoints, *VC); + auto Res = Solver.solve(); + return LLVMUnionFindAliasIterator{std::move(Res), std::move(VC)}; +} From c59d59163183f3a9f99152c8352c1e2f710c2d25 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 23 Apr 2026 19:48:07 +0200 Subject: [PATCH 02/36] Add online cycle detection --- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 92 +++++++++++++++++++++--- 1 file changed, 84 insertions(+), 8 deletions(-) diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index b5cacfa29f..da2df777f0 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -112,6 +112,61 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return getOrInsert(PAGVariable(V)); } + ValueId rep(ValueId V) const { return SCCUf.find(V); } + + // Merges the SCCs containing A and B. Returns the new representative. + // Folds all pts/edges/constraints from the non-rep into the rep, then + // clears the non-rep's NodeInfo. + ValueId merge(ValueId A, ValueId B) { + A = rep(A); + B = rep(B); + if (A == B) { + return A; + } + const ValueId Rep = SCCUf.join(A, B); + const ValueId NonRep = (Rep == A) ? B : A; + + // Steal assign edges from NonRep and re-register under Rep. + llvm::SmallVector NRDsts = std::move(Nodes[NonRep].AssignDsts); + Nodes[NonRep].AssignDstSet.clear(); + for (ValueId Dst : NRDsts) { + const ValueId DstRep = rep(Dst); + if (DstRep != Rep) { + addAssignEdge(Rep, DstRep); + } + } + + // Merge pts sets. + bool PtsGrew = false; + Nodes[NonRep].PtsSet.foreach ([&](ValueId Obj) { + if (Nodes[Rep].PtsSet.tryInsert(Obj)) { + PtsGrew = true; + } + }); + if (PtsGrew) { + PropWorklist.push_back(Rep); + } + + // Merge complex constraints. + auto &RepNode = Nodes[Rep]; + auto &NRNode = Nodes[NonRep]; + for (ValueId D : NRNode.LoadDsts) { + RepNode.LoadDsts.push_back(D); + } + for (ValueId S : NRNode.StoreSrcs) { + RepNode.StoreSrcs.push_back(S); + } + for (ValueId D : NRNode.MemCopyAsSrc) { + RepNode.MemCopyAsSrc.push_back(D); + } + for (ValueId S : NRNode.MemCopyAsDst) { + RepNode.MemCopyAsDst.push_back(S); + } + + NRNode = NodeInfo{}; + return Rep; + } + // ---- Operand traversal ---------------------------------------------- void forEachOpId(const llvm::Value *V, std::invocable auto Handler) { @@ -149,6 +204,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // ---- Constraint insertion ------------------------------------------- void addPointee(ValueId Ptr, ValueId Obj) { + Ptr = rep(Ptr); + Obj = rep(Obj); auto &PtrNode = grow(Ptr); (void)grow(Obj); if (PtrNode.PtsSet.tryInsert(Obj)) { @@ -157,6 +214,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } void addAssignEdge(ValueId Src, ValueId Dst) { + Src = rep(Src); + Dst = rep(Dst); if (Src == Dst) { return; } @@ -171,6 +230,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } void addLoad(ValueId Ptr, ValueId Dst) { + Ptr = rep(Ptr); + Dst = rep(Dst); auto &PtrNode = grow(Ptr); (void)grow(Dst); PtrNode.PtsSet.foreach ([&](ValueId Obj) { addAssignEdge(Obj, Dst); }); @@ -178,6 +239,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } void addStore(ValueId Ptr, ValueId Src) { + Ptr = rep(Ptr); + Src = rep(Src); auto &PtrNode = grow(Ptr); (void)grow(Src); PtrNode.PtsSet.foreach ([&](ValueId Obj) { addAssignEdge(Src, Obj); }); @@ -185,6 +248,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } void addMemCopy(ValueId SrcPtr, ValueId DstPtr) { + SrcPtr = rep(SrcPtr); + DstPtr = rep(DstPtr); auto &SrcNode = grow(SrcPtr); auto &DstNode = grow(DstPtr); SrcNode.PtsSet.foreach ([&](ValueId O1) { @@ -224,23 +289,34 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { void propagate() { while (!PropWorklist.empty()) { - const ValueId U = PropWorklist.pop_back_val(); + ValueId U = rep(PropWorklist.pop_back_val()); if (!Nodes.inbounds(U)) { continue; } - const auto &UNode = Nodes[U]; - for (ValueId V : UNode.AssignDsts) { - if (!Nodes.inbounds(V) || V == U) { + // Snapshot resolved successors: merge() can modify Nodes[U].AssignDsts. + llvm::SmallVector Dsts; + for (ValueId V : Nodes[U].AssignDsts) { + Dsts.push_back(rep(V)); + } + + for (ValueId VSnap : Dsts) { + const ValueId V = + rep(VSnap); // re-resolve: prior merge may have changed rep + if (V == U || !Nodes.inbounds(V)) { continue; } - auto &VNode = Nodes[V]; - RawAliasSet NewPts = UNode.PtsSet; - NewPts -= VNode.PtsSet; + + RawAliasSet NewPts = Nodes[U].PtsSet; + NewPts -= Nodes[V].PtsSet; if (NewPts.empty()) { + // LCD: direct back-edge V→U with pts(U) ⊆ pts(V) → cycle, collapse. + if (Nodes[V].AssignDstSet.contains(U)) { + U = merge(U, V); + } continue; } - VNode.PtsSet |= NewPts; + Nodes[V].PtsSet |= NewPts; PropWorklist.push_back(V); NewPts.foreach ([&](ValueId NewObj) { onNewPointee(V, NewObj); }); } From ace582cf81d3401fc6bf4838e038c3ea8c4f2f5e Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 29 Apr 2026 18:47:42 +0200 Subject: [PATCH 03/36] Fix reference invalidation, missing retroactive firing, and arg aliasing in AndersenOTFSolver - grow() may reallocate Nodes; all constraint methods now call every grow() before indexing Nodes[X], and snapshot pts sets before any addAssignEdge call that fires inside a foreach callback - onNewPointee snapshots all four constraint lists upfront for the same reason - merge() snapshots NonRep vectors before any addAssignEdge call, and retroactively fires load/store/memcopy constraints for Rep's merged pts set (previously those constraints were silently dropped for already-existing pointees) - ConnectKnownTargets and checkUnresolvedFPCalls snapshot pts(FPId) before iterating: connectCallee->propagate() can grow that set - handleCall now collects all resolved IDs per argument (not just the last one) via SmallVector per slot; FPCallRecord::Args and connectCallee updated accordingly - Add dedup guards (LoadDstSet, StoreSrcSet, MemCopyAs{Src,Dst}Set) to NodeInfo to avoid redundant constraint firing - Remove unused NoArgId sentinel and include - Mark rep() [[nodiscard]] Co-Authored-By: Claude Sonnet 4.6 --- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 233 +++++++++++++++-------- 1 file changed, 157 insertions(+), 76 deletions(-) diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index da2df777f0..8b6457d650 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -26,14 +26,10 @@ #include "llvm/Support/Casting.h" #include -#include #include using namespace psr; -// Sentinel: non-pointer argument slot (no ValueId assigned). -static constexpr ValueId NoArgId = ValueId(UINT32_MAX); - struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // ---- Per-node state ------------------------------------------------- @@ -44,18 +40,25 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { llvm::SmallDenseSet AssignDstSet; // dedup guard // Load constraints: dst = *this. llvm::SmallVector LoadDsts; + llvm::SmallDenseSet LoadDstSet; // dedup guard // Store constraints: *this = src. llvm::SmallVector StoreSrcs; + llvm::SmallDenseSet StoreSrcSet; // dedup guard // MemCopy: memcpy(dst_ptr, this=src_ptr). llvm::SmallVector MemCopyAsSrc; + llvm::SmallDenseSet MemCopyAsSrcSet; // dedup guard // MemCopy: memcpy(this=dst_ptr, src_ptr). llvm::SmallVector MemCopyAsDst; + llvm::SmallDenseSet MemCopyAsDstSet; // dedup guard }; + // One set of ValueIds per call argument; empty means non-pointer. + using ArgList = llvm::SmallVector>; + struct FPCallRecord { const llvm::CallBase *CS; ValueId FPId; - llvm::SmallVector Args; + ArgList Args; std::optional CSRetVal; }; @@ -112,11 +115,12 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return getOrInsert(PAGVariable(V)); } - ValueId rep(ValueId V) const { return SCCUf.find(V); } + [[nodiscard]] ValueId rep(ValueId V) const { return SCCUf.find(V); } // Merges the SCCs containing A and B. Returns the new representative. // Folds all pts/edges/constraints from the non-rep into the rep, then - // clears the non-rep's NodeInfo. + // clears the non-rep's NodeInfo. All NonRep data is snapshotted before any + // addAssignEdge call to avoid reference invalidation via grow(). ValueId merge(ValueId A, ValueId B) { A = rep(A); B = rep(B); @@ -126,10 +130,23 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { const ValueId Rep = SCCUf.join(A, B); const ValueId NonRep = (Rep == A) ? B : A; - // Steal assign edges from NonRep and re-register under Rep. - llvm::SmallVector NRDsts = std::move(Nodes[NonRep].AssignDsts); + // Snapshot all NonRep data before any addAssignEdge / grow calls that + // may reallocate Nodes and invalidate references. + llvm::SmallVector NRAssignDsts = + std::move(Nodes[NonRep].AssignDsts); Nodes[NonRep].AssignDstSet.clear(); - for (ValueId Dst : NRDsts) { + const RawAliasSet NRPts = Nodes[NonRep].PtsSet; + llvm::SmallVector NRLoadDsts = + std::move(Nodes[NonRep].LoadDsts); + llvm::SmallVector NRStoreSrcs = + std::move(Nodes[NonRep].StoreSrcs); + llvm::SmallVector NRMemCopyAsSrc = + std::move(Nodes[NonRep].MemCopyAsSrc); + llvm::SmallVector NRMemCopyAsDst = + std::move(Nodes[NonRep].MemCopyAsDst); + + // Re-register NonRep's assign edges under Rep. + for (ValueId Dst : NRAssignDsts) { const ValueId DstRep = rep(Dst); if (DstRep != Rep) { addAssignEdge(Rep, DstRep); @@ -137,33 +154,60 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } // Merge pts sets. - bool PtsGrew = false; - Nodes[NonRep].PtsSet.foreach ([&](ValueId Obj) { - if (Nodes[Rep].PtsSet.tryInsert(Obj)) { - PtsGrew = true; - } - }); + const bool PtsGrew = Nodes[Rep].PtsSet.tryMergeWith(NRPts); if (PtsGrew) { PropWorklist.push_back(Rep); } - // Merge complex constraints. - auto &RepNode = Nodes[Rep]; - auto &NRNode = Nodes[NonRep]; - for (ValueId D : NRNode.LoadDsts) { - RepNode.LoadDsts.push_back(D); + // Snapshot Rep's pts (after merge) for retroactive constraint firing. + const RawAliasSet RepPts = Nodes[Rep].PtsSet; + + // Transfer NonRep's load constraints and retroactively fire them for + // Rep's existing pts members. + for (ValueId D : NRLoadDsts) { + if (Nodes[Rep].LoadDstSet.insert(D).second) { + Nodes[Rep].LoadDsts.push_back(D); + RepPts.foreach ([&](ValueId Obj) { addAssignEdge(Obj, D); }); + } } - for (ValueId S : NRNode.StoreSrcs) { - RepNode.StoreSrcs.push_back(S); + + // Transfer NonRep's store constraints with retroactive firing. + for (ValueId S : NRStoreSrcs) { + if (Nodes[Rep].StoreSrcSet.insert(S).second) { + Nodes[Rep].StoreSrcs.push_back(S); + RepPts.foreach ([&](ValueId Obj) { addAssignEdge(S, Obj); }); + } } - for (ValueId D : NRNode.MemCopyAsSrc) { - RepNode.MemCopyAsSrc.push_back(D); + + // Transfer NonRep's memcpy-as-src constraints with retroactive firing. + for (ValueId D : NRMemCopyAsSrc) { + if (Nodes[Rep].MemCopyAsSrcSet.insert(D).second) { + Nodes[Rep].MemCopyAsSrc.push_back(D); + if (Nodes.inbounds(D)) { + // Snapshot DstPtr's pts: addAssignEdge may resize Nodes. + const RawAliasSet DstPts = Nodes[D].PtsSet; + RepPts.foreach ([&](ValueId O1) { + DstPts.foreach ([&](ValueId O2) { addAssignEdge(O1, O2); }); + }); + } + } } - for (ValueId S : NRNode.MemCopyAsDst) { - RepNode.MemCopyAsDst.push_back(S); + + // Transfer NonRep's memcpy-as-dst constraints with retroactive firing. + for (ValueId S : NRMemCopyAsDst) { + if (Nodes[Rep].MemCopyAsDstSet.insert(S).second) { + Nodes[Rep].MemCopyAsDst.push_back(S); + if (Nodes.inbounds(S)) { + // Snapshot SrcPtr's pts: addAssignEdge may resize Nodes. + const RawAliasSet SrcPts = Nodes[S].PtsSet; + SrcPts.foreach ([&](ValueId O1) { + RepPts.foreach ([&](ValueId O2) { addAssignEdge(O1, O2); }); + }); + } + } } - NRNode = NodeInfo{}; + Nodes[NonRep] = NodeInfo{}; return Rep; } @@ -202,13 +246,21 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } // ---- Constraint insertion ------------------------------------------- + // + // INVARIANT: every method resolves all ids through rep() first, then calls + // grow() for all ids before accessing Nodes by reference. Any grow() call + // may reallocate the Nodes backing array, so no NodeInfo& must be held + // across a grow() call or across any call that may invoke grow() (i.e. + // addAssignEdge, addPointee, etc.). Where the existing pts set must be + // iterated while addAssignEdge is called inside, the pts set is first + // copied into a local snapshot. void addPointee(ValueId Ptr, ValueId Obj) { Ptr = rep(Ptr); Obj = rep(Obj); - auto &PtrNode = grow(Ptr); - (void)grow(Obj); - if (PtrNode.PtsSet.tryInsert(Obj)) { + grow(Ptr); + grow(Obj); // grow before indexing Nodes[Ptr] + if (Nodes[Ptr].PtsSet.tryInsert(Obj)) { PropWorklist.push_back(Ptr); } } @@ -219,11 +271,11 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (Src == Dst) { return; } - auto &SrcNode = grow(Src); - (void)grow(Dst); - if (SrcNode.AssignDstSet.insert(Dst).second) { - SrcNode.AssignDsts.push_back(Dst); - if (!SrcNode.PtsSet.empty()) { + grow(Src); + grow(Dst); // grow before indexing Nodes[Src] + if (Nodes[Src].AssignDstSet.insert(Dst).second) { + Nodes[Src].AssignDsts.push_back(Dst); + if (!Nodes[Src].PtsSet.empty()) { PropWorklist.push_back(Src); } } @@ -232,58 +284,81 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { void addLoad(ValueId Ptr, ValueId Dst) { Ptr = rep(Ptr); Dst = rep(Dst); - auto &PtrNode = grow(Ptr); - (void)grow(Dst); - PtrNode.PtsSet.foreach ([&](ValueId Obj) { addAssignEdge(Obj, Dst); }); - PtrNode.LoadDsts.push_back(Dst); + grow(Ptr); + grow(Dst); // grow before accessing Nodes[Ptr] + // Snapshot pts: addAssignEdge inside the lambda may resize Nodes. + const RawAliasSet ExistingPts = Nodes[Ptr].PtsSet; + ExistingPts.foreach ([&](ValueId Obj) { addAssignEdge(Obj, Dst); }); + if (Nodes[Ptr].LoadDstSet.insert(Dst).second) { + Nodes[Ptr].LoadDsts.push_back(Dst); + } } void addStore(ValueId Ptr, ValueId Src) { Ptr = rep(Ptr); Src = rep(Src); - auto &PtrNode = grow(Ptr); - (void)grow(Src); - PtrNode.PtsSet.foreach ([&](ValueId Obj) { addAssignEdge(Src, Obj); }); - PtrNode.StoreSrcs.push_back(Src); + grow(Ptr); + grow(Src); // grow before accessing Nodes[Ptr] + // Snapshot pts: addAssignEdge inside the lambda may resize Nodes. + const RawAliasSet ExistingPts = Nodes[Ptr].PtsSet; + ExistingPts.foreach ([&](ValueId Obj) { addAssignEdge(Src, Obj); }); + if (Nodes[Ptr].StoreSrcSet.insert(Src).second) { + Nodes[Ptr].StoreSrcs.push_back(Src); + } } void addMemCopy(ValueId SrcPtr, ValueId DstPtr) { SrcPtr = rep(SrcPtr); DstPtr = rep(DstPtr); - auto &SrcNode = grow(SrcPtr); - auto &DstNode = grow(DstPtr); - SrcNode.PtsSet.foreach ([&](ValueId O1) { - DstNode.PtsSet.foreach ([&](ValueId O2) { addAssignEdge(O1, O2); }); + grow(SrcPtr); + grow(DstPtr); // grow before accessing Nodes[SrcPtr/DstPtr] + // Snapshot both pts sets: addAssignEdge inside the lambdas may resize + // Nodes, invalidating any reference into it. + const RawAliasSet SrcPts = Nodes[SrcPtr].PtsSet; + const RawAliasSet DstPts = Nodes[DstPtr].PtsSet; + SrcPts.foreach ([&](ValueId O1) { + DstPts.foreach ([&](ValueId O2) { addAssignEdge(O1, O2); }); }); - SrcNode.MemCopyAsSrc.push_back(DstPtr); - DstNode.MemCopyAsDst.push_back(SrcPtr); + if (Nodes[SrcPtr].MemCopyAsSrcSet.insert(DstPtr).second) { + Nodes[SrcPtr].MemCopyAsSrc.push_back(DstPtr); + } + if (Nodes[DstPtr].MemCopyAsDstSet.insert(SrcPtr).second) { + Nodes[DstPtr].MemCopyAsDst.push_back(SrcPtr); + } } // ---- Propagation ---------------------------------------------------- void onNewPointee(ValueId PtrRep, ValueId NewObj) { assert(Nodes.inbounds(PtrRep)); - const auto &Node = Nodes[PtrRep]; - - for (ValueId Dst : Node.LoadDsts) { + // Snapshot all constraint lists before any addAssignEdge call: grow() + // inside addAssignEdge may reallocate Nodes, invalidating references. + const auto LoadDsts = Nodes[PtrRep].LoadDsts; + const auto StoreSrcs = Nodes[PtrRep].StoreSrcs; + const auto MemSrcs = Nodes[PtrRep].MemCopyAsSrc; + const auto MemDsts = Nodes[PtrRep].MemCopyAsDst; + + for (ValueId Dst : LoadDsts) { addAssignEdge(NewObj, Dst); } - for (ValueId Src : Node.StoreSrcs) { + for (ValueId Src : StoreSrcs) { addAssignEdge(Src, NewObj); } - for (ValueId DstPtr : Node.MemCopyAsSrc) { + for (ValueId DstPtr : MemSrcs) { if (!Nodes.inbounds(DstPtr)) { continue; } - Nodes[DstPtr].PtsSet.foreach ( - [&](ValueId O2) { addAssignEdge(NewObj, O2); }); + // Snapshot DstPtr's pts: addAssignEdge may resize Nodes. + const RawAliasSet DstPts = Nodes[DstPtr].PtsSet; + DstPts.foreach ([&](ValueId O2) { addAssignEdge(NewObj, O2); }); } - for (ValueId SrcPtr : Node.MemCopyAsDst) { + for (ValueId SrcPtr : MemDsts) { if (!Nodes.inbounds(SrcPtr)) { continue; } - Nodes[SrcPtr].PtsSet.foreach ( - [&](ValueId O1) { addAssignEdge(O1, NewObj); }); + // Snapshot SrcPtr's pts: addAssignEdge may resize Nodes. + const RawAliasSet SrcPts = Nodes[SrcPtr].PtsSet; + SrcPts.foreach ([&](ValueId O1) { addAssignEdge(O1, NewObj); }); } } @@ -301,8 +376,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } for (ValueId VSnap : Dsts) { - const ValueId V = - rep(VSnap); // re-resolve: prior merge may have changed rep + // Re-resolve: a prior iteration's merge() may have changed the rep. + const ValueId V = rep(VSnap); if (V == U || !Nodes.inbounds(V)) { continue; } @@ -310,7 +385,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { RawAliasSet NewPts = Nodes[U].PtsSet; NewPts -= Nodes[V].PtsSet; if (NewPts.empty()) { - // LCD: direct back-edge V→U with pts(U) ⊆ pts(V) → cycle, collapse. + // LCD: direct back-edge V→U with pts(U)⊆pts(V) → 2-cycle, collapse. if (Nodes[V].AssignDstSet.contains(U)) { U = merge(U, V); } @@ -472,7 +547,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // ---- Call-graph co-refinement --------------------------------------- void connectCallee(const llvm::CallBase *CS, const llvm::Function *Callee, - llvm::ArrayRef Args, + llvm::ArrayRef> Args, std::optional CSRetVal) { if (Callee->isDeclaration()) { return; @@ -492,11 +567,14 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { addAssignEdge(RetSlotId, *CSRetVal); } - for (const auto &[Param, ArgId] : llvm::zip(Callee->args(), Args)) { - if (ArgId == NoArgId || definitelyContainsNoPointer(&Param)) { + for (const auto &[Param, ArgIds] : llvm::zip(Callee->args(), Args)) { + if (ArgIds.empty() || definitelyContainsNoPointer(&Param)) { continue; } - addAssignEdge(ArgId, getOrInsert(&Param)); + const ValueId ParamId = getOrInsert(&Param); + for (ValueId ArgId : ArgIds) { + addAssignEdge(ArgId, ParamId); + } } propagate(); @@ -507,15 +585,14 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return; } - llvm::SmallVector Args; + // Build one entry per call argument: empty inner vector = non-pointer. + ArgList Args; for (const auto &Arg : C->args()) { - if (definitelyContainsNoPointer(Arg.get())) { - Args.push_back(NoArgId); - continue; + llvm::SmallVector ArgIds; + if (!definitelyContainsNoPointer(Arg.get())) { + forEachOpId(Arg.get(), [&](ValueId Id) { ArgIds.push_back(Id); }); } - ValueId ArgId = NoArgId; - forEachOpId(Arg.get(), [&](ValueId Id) { ArgId = Id; }); - Args.push_back(ArgId); + Args.push_back(std::move(ArgIds)); } std::optional CSRetVal; @@ -537,7 +614,9 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (!Nodes.inbounds(FPId)) { return; } - Nodes[FPId].PtsSet.foreach ([&](ValueId ObjId) { + // Snapshot pts(FPId): connectCallee→propagate() may grow pts(FPId). + const RawAliasSet FPPts = Nodes[FPId].PtsSet; + FPPts.foreach ([&](ValueId ObjId) { if (!Nodes.inbounds(ObjId)) { return; } @@ -565,7 +644,9 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (!Nodes.inbounds(Rec.FPId)) { continue; } - Nodes[Rec.FPId].PtsSet.foreach ([&](ValueId ObjId) { + // Snapshot pts(FPId): connectCallee→propagate() may grow it. + const RawAliasSet FPPts = Nodes[Rec.FPId].PtsSet; + FPPts.foreach ([&](ValueId ObjId) { if (!Nodes.inbounds(ObjId)) { return; } From 8bba47de8d5a7728ae4cfb6069d39a0ac0fb05b4 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 3 May 2026 19:53:08 +0200 Subject: [PATCH 04/36] Vibe code some tests + identify bug that converts many alias sets into speensgaard sets --- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 183 ++++++++--- test/llvm_test_code/pointers/CMakeLists.txt | 4 + .../llvm_test_code/pointers/andersen_otf_fp.c | 14 + .../pointers/andersen_otf_interproc.c | 12 + .../Problems/IFDSConstAnalysisTest.cpp | 5 +- .../PhasarLLVM/Pointer/AndersenOTFAATest.cpp | 300 ++++++++++++++++++ unittests/PhasarLLVM/Pointer/CMakeLists.txt | 1 + 7 files changed, 471 insertions(+), 48 deletions(-) create mode 100644 test/llvm_test_code/pointers/andersen_otf_fp.c create mode 100644 test/llvm_test_code/pointers/andersen_otf_interproc.c create mode 100644 unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index 8b6457d650..2ab93deae3 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -12,6 +12,7 @@ #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/IotaIterator.h" +#include "phasar/Utils/LibrarySummary.h" #include "phasar/Utils/UnionFind.h" #include "llvm/ADT/DenseMap.h" @@ -30,6 +31,37 @@ using namespace psr; +namespace { +/// File-local wrapper: extends PAGVariable with a variable/object flag. +/// Variable nodes (IsObject=false) represent SSA pointer values. +/// Object nodes (IsObject=true) represent abstract memory cells. +struct AndersenVar { + PAGVariable Base{}; + bool IsObject = false; + + friend bool operator==(AndersenVar A, AndersenVar B) noexcept { + return A.Base == B.Base && A.IsObject == B.IsObject; + } +}; +} // namespace + +namespace llvm { +template <> struct DenseMapInfo { + static AndersenVar getEmptyKey() noexcept { + return {DenseMapInfo::getEmptyKey(), false}; + } + static AndersenVar getTombstoneKey() noexcept { + return {DenseMapInfo::getTombstoneKey(), false}; + } + static unsigned getHashValue(AndersenVar V) noexcept { + return llvm::hash_combine( + DenseMapInfo::getHashValue(V.Base), + unsigned(V.IsObject)); + } + static bool isEqual(AndersenVar A, AndersenVar B) noexcept { return A == B; } +}; +} // namespace llvm + struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // ---- Per-node state ------------------------------------------------- @@ -64,9 +96,10 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // ---- Data fields ---------------------------------------------------- - const LLVMProjectIRDB &IRDB; // NOLINT - const llvm::DataLayout &DL; // NOLINT - ValueCompressor &VC; // NOLINT + const LLVMProjectIRDB &IRDB; // NOLINT + const llvm::DataLayout &DL; // NOLINT + ValueCompressor &ExternalVC; // NOLINT – caller-visible output + ValueCompressor LocalVC{}; // internal variable+object nodes llvm::SmallVector FunctionWorklist; llvm::DenseSet Reachable; @@ -85,7 +118,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { SolverData(const LLVMProjectIRDB &IRDB, llvm::ArrayRef Entries, ValueCompressor &VC) - : IRDB(IRDB), DL(IRDB.getModule()->getDataLayout()), VC(VC) { + : IRDB(IRDB), DL(IRDB.getModule()->getDataLayout()), ExternalVC(VC) { for (const auto *F : Entries) { if (Reachable.insert(F).second) { FunctionWorklist.push_back(F); @@ -104,15 +137,16 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return Nodes[V]; } - ValueId getOrInsert(PAGVariable Var) { - auto [Id, Inserted] = VC.insert(Var); - (void)Inserted; + ValueId getOrInsertVar(PAGVariable Var) { + auto [Id, _] = LocalVC.insert(AndersenVar{Var, false}); grow(Id); return Id; } - ValueId getOrInsert(const llvm::Value *V) { - return getOrInsert(PAGVariable(V)); + ValueId getOrInsertObj(PAGVariable Var) { + auto [Id, _] = LocalVC.insert(AndersenVar{Var, true}); + grow(Id); + return Id; } [[nodiscard]] ValueId rep(ValueId V) const { return SCCUf.find(V); } @@ -220,7 +254,15 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } if (!llvm::isa(V)) { - std::invoke(Handler, getOrInsert(V)); + const ValueId VId = getOrInsertVar(PAGVariable(V)); + // A function used as a value (e.g. stored into a function-pointer + // variable) is an addressable abstract object: pts(F) = {F}. + // Without this, pts(fp_alloca) never gains F and OTF call resolution + // silently produces no callees. + if (llvm::isa(V)) { + addPointee(VId, VId); + } + std::invoke(Handler, VId); return; } @@ -235,7 +277,11 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { continue; } if (const auto *GObj = llvm::dyn_cast(Op)) { - std::invoke(Handler, getOrInsert(GObj)); + const ValueId GId = getOrInsertVar(PAGVariable(GObj)); + if (llvm::isa(GObj)) { + addPointee(GId, GId); + } + std::invoke(Handler, GId); continue; } if (const auto *User = llvm::dyn_cast(Op)) { @@ -405,8 +451,9 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (definitelyContainsNoPointer(G.getValueType())) { continue; } - const ValueId GId = getOrInsert(&G); - addPointee(GId, GId); + const ValueId VarId = getOrInsertVar(PAGVariable(&G)); + const ValueId ObjId = getOrInsertObj(PAGVariable(&G)); + addPointee(VarId, ObjId); } propagate(); } @@ -414,7 +461,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { void processFunction(const llvm::Function *F) { for (const auto &Arg : F->args()) { if (!definitelyContainsNoPointer(&Arg)) { - (void)getOrInsert(&Arg); + (void)getOrInsertVar(PAGVariable(&Arg)); } } for (const auto &I : llvm::instructions(F)) { @@ -424,8 +471,9 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { void processInstruction(const llvm::Instruction &I) { if (const auto *Alloca = llvm::dyn_cast(&I)) { - const ValueId Id = getOrInsert(Alloca); - addPointee(Id, Id); + const ValueId VarId = getOrInsertVar(PAGVariable(Alloca)); + const ValueId ObjId = getOrInsertObj(PAGVariable(Alloca)); + addPointee(VarId, ObjId); return; } if (const auto *S = llvm::dyn_cast(&I)) { @@ -463,7 +511,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return; } forEachOpId(Cast->getOperand(0), [&](ValueId OpId) { - VC.addAlias(Cast, OpId); + LocalVC.addAlias(AndersenVar{PAGVariable(Cast), false}, OpId); grow(OpId); }); return; @@ -472,7 +520,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // GEPs: alias result to base pointer (field-insensitive). if (const auto *GEP = llvm::dyn_cast(&I)) { forEachOpId(GEP->getPointerOperand(), [&](ValueId OpId) { - VC.addAlias(GEP, OpId); + LocalVC.addAlias(AndersenVar{PAGVariable(GEP), false}, OpId); grow(OpId); }); } @@ -492,7 +540,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (definitelyContainsNoPointer(L)) { return; } - const ValueId DstId = getOrInsert(L); + const ValueId DstId = getOrInsertVar(PAGVariable(L)); forEachOpId(L->getPointerOperand(), [&](ValueId PtrId) { addLoad(PtrId, DstId); }); } @@ -508,7 +556,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (definitelyContainsNoPointer(P)) { return; } - const ValueId PhiId = getOrInsert(P); + const ValueId PhiId = getOrInsertVar(PAGVariable(P)); for (const auto &Inc : P->incoming_values()) { if (definitelyContainsNoPointer(Inc.get())) { continue; @@ -522,7 +570,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (definitelyContainsNoPointer(S)) { return; } - const ValueId SelId = getOrInsert(S); + const ValueId SelId = getOrInsertVar(PAGVariable(S)); const auto *TV = S->getTrueValue(); const auto *FV = S->getFalseValue(); if (!definitelyContainsNoPointer(TV)) { @@ -539,7 +587,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return; } const ValueId RetSlotId = - getOrInsert(PAGVariable::Return{R->getFunction()}); + getOrInsertVar(PAGVariable::Return{R->getFunction()}); forEachOpId(RetVal, [&](ValueId ValId) { addAssignEdge(ValId, RetSlotId); }); } @@ -553,7 +601,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return; } - const ValueId CalleeId = getOrInsert(Callee); + const ValueId CalleeId = getOrInsertVar(PAGVariable(Callee)); if (!ConnectedCallees[CS].insert(CalleeId).second) { return; } @@ -563,7 +611,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } if (CSRetVal && !Callee->getReturnType()->isVoidTy()) { - const ValueId RetSlotId = getOrInsert(PAGVariable::Return{Callee}); + const ValueId RetSlotId = getOrInsertVar(PAGVariable::Return{Callee}); addAssignEdge(RetSlotId, *CSRetVal); } @@ -571,7 +619,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (ArgIds.empty() || definitelyContainsNoPointer(&Param)) { continue; } - const ValueId ParamId = getOrInsert(&Param); + const ValueId ParamId = getOrInsertVar(PAGVariable(&Param)); for (ValueId ArgId : ArgIds) { addAssignEdge(ArgId, ParamId); } @@ -581,7 +629,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } void handleCall(const llvm::CallBase *C) { - if (C->isInlineAsm()) { + if (C->isInlineAsm() || C->isDebugOrPseudoInst()) { return; } @@ -597,7 +645,15 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { std::optional CSRetVal; if (C->getType()->isPointerTy()) { - CSRetVal = getOrInsert(C); + const ValueId VarId = getOrInsertVar(PAGVariable(C)); + CSRetVal = VarId; + const auto *DirectCallee = llvm::dyn_cast( + C->getCalledOperand()->stripPointerCastsAndAliases()); + if (DirectCallee && + psr::isHeapAllocatingFunction(DirectCallee->getName())) { + const ValueId ObjId = getOrInsertObj(PAGVariable(C)); + addPointee(VarId, ObjId); + } } const auto *FnPtr = C->getCalledOperand()->stripPointerCastsAndAliases(); @@ -608,7 +664,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } // Indirect call: connect already-known targets, record for fixpoint. - const ValueId FPId = getOrInsert(FnPtr); + const ValueId FPId = getOrInsertVar(PAGVariable(FnPtr)); const auto ConnectKnownTargets = [&]() { if (!Nodes.inbounds(FPId)) { @@ -620,9 +676,9 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (!Nodes.inbounds(ObjId)) { return; } - for (const auto &Var : VC.id2vars(ObjId)) { + for (const auto &Var : LocalVC.id2vars(ObjId)) { const auto *Fun = - llvm::dyn_cast_or_null(Var.valueOrNull()); + llvm::dyn_cast_or_null(Var.Base.valueOrNull()); if (Fun) { connectCallee(C, Fun, Args, CSRetVal); } @@ -650,9 +706,9 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (!Nodes.inbounds(ObjId)) { return; } - for (const auto &Var : VC.id2vars(ObjId)) { + for (const auto &Var : LocalVC.id2vars(ObjId)) { const auto *Fun = - llvm::dyn_cast_or_null(Var.valueOrNull()); + llvm::dyn_cast_or_null(Var.Base.valueOrNull()); if (Fun) { connectCallee(Rec.CS, Fun, Rec.Args, Rec.CSRetVal); } @@ -664,32 +720,65 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // ---- Result construction -------------------------------------------- AndersenOTFResult buildResult() { - const size_t NumVars = VC.size(); - AndersenOTFResult Result; - Result.NumVars = NumVars; + const size_t NumLocal = LocalVC.size(); - // Reverse map: abstract object → set of values pointing to it. - TypedVector> Obj2Ptrs(NumVars); - for (auto VId : iota(NumVars)) { - if (!Nodes.inbounds(VId)) { + // Reverse map: abstract object → all local IDs that point to it. + TypedVector> Obj2Ptrs(NumLocal); + for (auto VId : iota(NumLocal)) { + const ValueId RepId = rep(VId); + if (!Nodes.inbounds(RepId)) { continue; } - Nodes[VId].PtsSet.foreach ([&](ValueId Obj) { - if (size_t(Obj) < NumVars) { + Nodes[RepId].PtsSet.foreach ([&](ValueId Obj) { + if (size_t(Obj) < NumLocal) { Obj2Ptrs[Obj].insert(VId); } }); } - Result.AliasSets.resize(NumVars); - for (auto VId : iota(NumVars)) { - if (!Nodes.inbounds(VId)) { + // Map variable local IDs → external VC IDs. + // Object nodes are internal only and do not appear in the external result. + TypedVector> LocalToExt(NumLocal); + for (auto VId : iota(NumLocal)) { + ValueId FirstExtId{}; + bool HasFirst = false; + for (const auto &V : LocalVC.id2vars(VId)) { + if (V.IsObject) { + continue; + } + if (!HasFirst) { + FirstExtId = ExternalVC.insert(V.Base).first; + HasFirst = true; + LocalToExt[VId] = FirstExtId; + } else { + ExternalVC.addAlias(V.Base, FirstExtId); + } + } + } + + AndersenOTFResult Result; + Result.NumVars = ExternalVC.size(); + Result.AliasSets.resize(Result.NumVars); + + for (auto VId : iota(NumLocal)) { + if (!LocalToExt[VId]) { + continue; + } + const ValueId ExtVId = *LocalToExt[VId]; + const ValueId RepId = rep(VId); + if (!Nodes.inbounds(RepId)) { continue; } - Nodes[VId].PtsSet.foreach ([&](ValueId Obj) { - if (size_t(Obj) < NumVars) { - Result.AliasSets[VId] |= Obj2Ptrs[Obj]; + + Nodes[RepId].PtsSet.foreach ([&](ValueId Obj) { + if (size_t(Obj) >= NumLocal) { + return; } + Obj2Ptrs[Obj].foreach ([&](ValueId AliasLocalId) { + if (const auto &AliasExt = LocalToExt[AliasLocalId]) { + Result.AliasSets[ExtVId].insert(*AliasExt); + } + }); }); } diff --git a/test/llvm_test_code/pointers/CMakeLists.txt b/test/llvm_test_code/pointers/CMakeLists.txt index 0802f9e736..fb255af43f 100644 --- a/test/llvm_test_code/pointers/CMakeLists.txt +++ b/test/llvm_test_code/pointers/CMakeLists.txt @@ -1,4 +1,6 @@ set(lca_files + andersen_otf_interproc.c + andersen_otf_fp.c basic_01.c basic_02.c basic_03.c @@ -45,6 +47,8 @@ set(lca_files ) set(lca_files_mem2reg + andersen_otf_interproc.c + andersen_otf_fp.c basic_01.c basic_02.c basic_03.c diff --git a/test/llvm_test_code/pointers/andersen_otf_fp.c b/test/llvm_test_code/pointers/andersen_otf_fp.c new file mode 100644 index 0000000000..cb0155a614 --- /dev/null +++ b/test/llvm_test_code/pointers/andersen_otf_fp.c @@ -0,0 +1,14 @@ +// On-the-fly function-pointer resolution: +// id is stored into fp and then called indirectly. The OTF fixpoint must +// discover id as a callee and propagate the alias between its formal +// parameter and its return value. +static int *id(int *x) { return x; } + +int main() { + int a; + int *p = &a; + int *(*fp)(int *) = id; + int *q = fp(p); + (void)q; + return 0; +} diff --git a/test/llvm_test_code/pointers/andersen_otf_interproc.c b/test/llvm_test_code/pointers/andersen_otf_interproc.c new file mode 100644 index 0000000000..b8643bc22a --- /dev/null +++ b/test/llvm_test_code/pointers/andersen_otf_interproc.c @@ -0,0 +1,12 @@ +// Direct interprocedural alias propagation: +// retptr returns its argument, so the formal param and the return value +// share the same points-to set. +static int *retptr(int *x) { return x; } + +int main() { + int a; + int *p = &a; + int *q = retptr(p); + (void)q; + return 0; +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSConstAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSConstAnalysisTest.cpp index 68402e4275..6badb8e24d 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSConstAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSConstAnalysisTest.cpp @@ -8,7 +8,9 @@ #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "phasar/PhasarLLVM/SimpleAnalysisConstructor.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/DebugOutput.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/Support/Casting.h" @@ -71,7 +73,8 @@ class IFDSConstAnalysisTest : public ::testing::Test { } } - EXPECT_EQ(GroundTruth, AllMutableAllocas); + EXPECT_EQ(GroundTruth, AllMutableAllocas) + << " Expected " << PrettyPrinter{GroundTruth}; } void compareResults(const std::set &GroundTruth, diff --git a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp new file mode 100644 index 0000000000..d5dba4e11c --- /dev/null +++ b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp @@ -0,0 +1,300 @@ +#include "phasar/PhasarLLVM/Pointer/AndersenOTFAA.h" + +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.h" +#include "phasar/Pointer/RawAliasSet.h" +#include "phasar/Pointer/UnionFindAA.h" +#include "phasar/Utils/IotaIterator.h" +#include "phasar/Utils/ValueCompressor.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Instruction.h" +#include "llvm/Support/raw_ostream.h" + +#include "SrcCodeLocationEntry.h" +#include "TestConfig.h" +#include "gtest/gtest.h" + +#include +#include +#include +#include + +namespace { +using namespace psr; +using namespace psr::unittest; + +static_assert(UnionFindAAResult); + +constexpr auto PathToLLFiles = PHASAR_BUILD_SUBFOLDER("pointers/"); + +using TSL = TestingSrcLocation; +using GTMap = std::map>; + +[[nodiscard]] ValueId asId(const ValueCompressor &Compressor, + const LLVMProjectIRDB &IRDB, TSL Var) { + const auto *LLVMVar = testingLocInIR(Var, IRDB); + auto MaybeId = Compressor.getOrNull(LLVMVar); + if (!MaybeId) { + ADD_FAILURE() << "Value not in VC: " << Var; + return ValueId{}; + } + return *MaybeId; +} + +[[nodiscard]] std::string +stringifyVal(const ValueCompressor &Compressor, ValueId VId) { + std::string Ret; + llvm::raw_string_ostream ROS(Ret); + ROS << "{ "; + llvm::interleaveComma(Compressor.id2vars(VId), ROS, + [&](PAGVariable Var) { ROS << to_string(Var); }); + ROS << " }"; + return Ret; +} + +void dumpAnalysisState(const ValueCompressor &Compressor, + const AndersenOTFResult &Results) { + llvm::errs() << "ValueCompressor: {\n"; + for (const auto &[VId, Values] : Compressor.id2vars().enumerate()) { + llvm::errs() << " #" << uint32_t(VId) << ":\n"; + for (const auto Val : Values) { + llvm::errs() << " " << to_string(Val) << '\n'; + } + } + llvm::errs() << "}\n"; + llvm::errs() << "AliasSets: {\n"; + for (auto VId : iota(Results.NumVars)) { + if (!Results.AliasSets.inbounds(VId)) { + continue; + } + + bool First = true; + for (const auto &Var : Compressor.id2vars(VId)) { + llvm::errs() << " " << to_string(Var); + + if (First) { + First = false; + } else { + llvm::errs() << " MUST ALIAS with " + << to_string(*Compressor.id2vars(VId).begin()) << '\n'; + continue; + } + + if (Results.AliasSets[VId].empty()) { + llvm::errs() << " aliases: EMPTY\n"; + continue; + } + + llvm::errs() << " aliases: {\n"; + Results.AliasSets[VId].foreach ([&](ValueId AId) { + llvm::errs() << " " << stringifyVal(Compressor, AId) << '\n'; + }); + llvm::errs() << " }\n"; + } + } + llvm::errs() << "}\n"; +} + +constexpr llvm::StringRef EntryNames[] = {"main"}; + +/// Exact bidirectional GT check. +/// +/// Soundness: every alias listed in the GT must appear in the computed set. +/// Precision: no computed alias that is named in the GT (the "domain") may +/// be absent from the expected set. Values not named in the GT are outside +/// the domain and are not subject to the precision check. +void doAnalysisAndCheckExact( + const llvm::Twine &IRFile, const GTMap &ExpectedResults, + bool DumpResults = false, + std::source_location Loc = std::source_location::current()) { + + auto IRDB = LLVMProjectIRDB::loadOrExit(PathToLLFiles + IRFile); + + llvm::SmallVector Entries; + for (llvm::StringRef Name : EntryNames) { + const auto *Func = IRDB.getFunctionDefinition(Name); + if (!Func) { + ADD_FAILURE_AT(Loc.file_name(), Loc.line()) + << "Entry function not found: " << Name.str(); + return; + } + Entries.push_back(Func); + } + + auto Compressor = std::make_unique>(); + AndersenOTFResult Results = + computeAndersenOTFRaw(IRDB, Entries, Compressor.get()); + + // Build domain from all values explicitly named in the GT. + llvm::SmallDenseSet Domain; + for (const auto &[PtrVar, ExpectedAliasVars] : ExpectedResults) { + Domain.insert(asId(*Compressor, IRDB, PtrVar)); + for (const auto &AliasVar : ExpectedAliasVars) { + Domain.insert(asId(*Compressor, IRDB, AliasVar)); + } + } + + for (const auto &[PtrVar, ExpectedAliasVars] : ExpectedResults) { + const auto PtrId = asId(*Compressor, IRDB, PtrVar); + const RawAliasSet &Computed = Results.getRawAliasSet(PtrId); + + RawAliasSet Expected; + for (const auto &AliasVar : ExpectedAliasVars) { + Expected.insert(asId(*Compressor, IRDB, AliasVar)); + } + + // Soundness. + Expected.foreach ([&](ValueId AliasId) { + if (!Computed.contains(AliasId)) { + ADD_FAILURE_AT(Loc.file_name(), Loc.line()) + << "Missing expected alias of " << PtrVar << ": " + << stringifyVal(*Compressor, AliasId); + } + }); + + // Precision (domain-restricted). + Computed.foreach ([&](ValueId VId) { + if (!Domain.contains(VId) || Expected.contains(VId)) { + return; + } + ADD_FAILURE_AT(Loc.file_name(), Loc.line()) + << "Unexpected alias of " << PtrVar << ": " + << stringifyVal(*Compressor, VId); + }); + } + + if (DumpResults || ::testing::Test::HasFailure()) { + dumpAnalysisState(*Compressor, Results); + } +} + +// ---- Tests ---------------------------------------------------------------- + +TEST(AndersenOTFAATest, InterProcArgRetAlias) { + // retptr(x) returns x — formal parameter and return value must alias. + const GTMap ExpectedResults = { + {TSL(ArgInFun{.Idx = 0, .InFunction = "retptr"}), + {TSL(ArgInFun{.Idx = 0, .InFunction = "retptr"}), + TSL(RetVal{.InFunction = "retptr"})}}, + {TSL(RetVal{.InFunction = "retptr"}), + {TSL(ArgInFun{.Idx = 0, .InFunction = "retptr"}), + TSL(RetVal{.InFunction = "retptr"})}}, + }; + doAnalysisAndCheckExact("andersen_otf_interproc_c_m2r_dbg.ll", + ExpectedResults); +} + +TEST(AndersenOTFAATest, FuncPtrArgRetAlias) { + // id(x) returns x, called only via function pointer. + // OTF must discover id as a callee and propagate arg/ret alias. + const GTMap ExpectedResults = { + {TSL(ArgInFun{.Idx = 0, .InFunction = "id"}), + {TSL(ArgInFun{.Idx = 0, .InFunction = "id"}), + TSL(RetVal{.InFunction = "id"})}}, + {TSL(RetVal{.InFunction = "id"}), + {TSL(ArgInFun{.Idx = 0, .InFunction = "id"}), + TSL(RetVal{.InFunction = "id"})}}, + }; + doAnalysisAndCheckExact("andersen_otf_fp_c_m2r_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, FuncByNameInVC) { + // The function 'id' has its address stored into fp; it must appear in VC. + auto IRDB = LLVMProjectIRDB::loadOrExit( + PathToLLFiles + llvm::Twine("andersen_otf_fp_c_m2r_dbg.ll")); + + const auto *MainFn = IRDB.getFunctionDefinition("main"); + ASSERT_NE(MainFn, nullptr); + + auto Compressor = std::make_unique>(); + [[maybe_unused]] auto Results = + computeAndersenOTFRaw(IRDB, {MainFn}, Compressor.get()); + + const auto *IdFn = IRDB.getFunctionDefinition("id"); + ASSERT_NE(IdFn, nullptr); + auto MaybeId = Compressor->getOrNull(IdFn); + EXPECT_TRUE(MaybeId.has_value()) + << "Function 'id' not in VC — address-taken functions must be inserted"; +} + +TEST(AndersenOTFAATest, ContextInsensitiveCallsMerge) { + // context_01: id(&x) and id(&y) called from two call sites. + // Context-insensitive: both call-site return values alias the same node + // (pts merges both args). A context-sensitive analysis would keep them + // separate; this test verifies the expected context-insensitive behaviour. + const TSL Arg = TSL(ArgInFun{.Idx = 0, .InFunction = "id"}); + const TSL Ret = TSL(RetVal{.InFunction = "id"}); + // Call instructions for id(&x) and id(&y) in main (lines 8 and 9). + const TSL Call1 = TSL(LineColFunOp{.Line = 8, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const TSL Call2 = TSL(LineColFunOp{.Line = 9, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const GTMap ExpectedResults = { + {Arg, {Arg, Ret, Call1, Call2}}, + {Ret, {Arg, Ret, Call1, Call2}}, + {Call1, {Arg, Ret, Call1, Call2}}, + {Call2, {Arg, Ret, Call1, Call2}}, + }; + doAnalysisAndCheckExact("context_01_c_dbg.ll", ExpectedResults, true); +} + +TEST(AndersenOTFAATest, SeparateFunctionsDontAlias) { + // context_02: id1 and id2 are independent identity functions called with + // different arguments. Their parameter and return-value nodes must not + // alias each other (precision check for context-insensitive analysis). + const TSL Id1Arg = TSL(ArgInFun{.Idx = 0, .InFunction = "id1"}); + const TSL Id1Ret = TSL(RetVal{.InFunction = "id1"}); + const TSL Id2Arg = TSL(ArgInFun{.Idx = 0, .InFunction = "id2"}); + const TSL Id2Ret = TSL(RetVal{.InFunction = "id2"}); + const GTMap ExpectedResults = { + {Id1Arg, {Id1Arg, Id1Ret}}, + {Id1Ret, {Id1Arg, Id1Ret}}, + {Id2Arg, {Id2Arg, Id2Ret}}, + {Id2Ret, {Id2Arg, Id2Ret}}, + }; + doAnalysisAndCheckExact("context_02_c_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, TransitiveCallChain) { + // context_03: id2(q) = id1(q). Alias must propagate through the chain: + // id2_arg → id1_arg → id1_ret → id2_ret. All four must alias. + const GTMap ExpectedResults = { + {TSL(ArgInFun{.Idx = 0, .InFunction = "id1"}), + {TSL(ArgInFun{.Idx = 0, .InFunction = "id1"}), + TSL(RetVal{.InFunction = "id1"}), + TSL(ArgInFun{.Idx = 0, .InFunction = "id2"}), + TSL(RetVal{.InFunction = "id2"})}}, + {TSL(RetVal{.InFunction = "id1"}), + {TSL(ArgInFun{.Idx = 0, .InFunction = "id1"}), + TSL(RetVal{.InFunction = "id1"}), + TSL(ArgInFun{.Idx = 0, .InFunction = "id2"}), + TSL(RetVal{.InFunction = "id2"})}}, + {TSL(ArgInFun{.Idx = 0, .InFunction = "id2"}), + {TSL(ArgInFun{.Idx = 0, .InFunction = "id1"}), + TSL(RetVal{.InFunction = "id1"}), + TSL(ArgInFun{.Idx = 0, .InFunction = "id2"}), + TSL(RetVal{.InFunction = "id2"})}}, + {TSL(RetVal{.InFunction = "id2"}), + {TSL(ArgInFun{.Idx = 0, .InFunction = "id1"}), + TSL(RetVal{.InFunction = "id1"}), + TSL(ArgInFun{.Idx = 0, .InFunction = "id2"}), + TSL(RetVal{.InFunction = "id2"})}}, + }; + doAnalysisAndCheckExact("context_03_c_dbg.ll", ExpectedResults); +} + +} // namespace + +int main(int Argc, char **Argv) { + ::testing::InitGoogleTest(&Argc, Argv); + return RUN_ALL_TESTS(); +} diff --git a/unittests/PhasarLLVM/Pointer/CMakeLists.txt b/unittests/PhasarLLVM/Pointer/CMakeLists.txt index 7a8857df82..7085c599ae 100644 --- a/unittests/PhasarLLVM/Pointer/CMakeLists.txt +++ b/unittests/PhasarLLVM/Pointer/CMakeLists.txt @@ -1,4 +1,5 @@ set(PointerFlowSources + AndersenOTFAATest.cpp LLVMAliasSetTest.cpp LLVMAliasSetSerializationTest.cpp FilteredLLVMAliasSetTest.cpp From 87641c6ce33e7963a4288d9a4219e1c8c52541b9 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 3 May 2026 20:01:12 +0200 Subject: [PATCH 05/36] Reduce the size of AndersenVar by half (sth the AI apparently could not do...) --- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 43 +++++++++++++++--------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index 2ab93deae3..777103237a 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -10,6 +10,7 @@ #include "phasar/PhasarLLVM/Pointer/AndersenOTFAA.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/IotaIterator.h" #include "phasar/Utils/LibrarySummary.h" @@ -17,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -35,13 +37,26 @@ namespace { /// File-local wrapper: extends PAGVariable with a variable/object flag. /// Variable nodes (IsObject=false) represent SSA pointer values. /// Object nodes (IsObject=true) represent abstract memory cells. -struct AndersenVar { - PAGVariable Base{}; - bool IsObject = false; +class AndersenVar { +public: + AndersenVar() noexcept = default; + AndersenVar(PAGVariable Base, bool IsObject) : Base(Base, IsObject) {} + + [[nodiscard]] PAGVariable getBase() const noexcept { + return Base.getPointer(); + } + [[nodiscard]] bool isObject() const noexcept { return Base.getInt(); } friend bool operator==(AndersenVar A, AndersenVar B) noexcept { - return A.Base == B.Base && A.IsObject == B.IsObject; + return A.Base == B.Base; + } + + friend auto hash_value(AndersenVar V) noexcept { + return llvm::hash_value(V.Base.getOpaqueValue()); } + +private: + llvm::PointerIntPair Base{}; }; } // namespace @@ -53,11 +68,7 @@ template <> struct DenseMapInfo { static AndersenVar getTombstoneKey() noexcept { return {DenseMapInfo::getTombstoneKey(), false}; } - static unsigned getHashValue(AndersenVar V) noexcept { - return llvm::hash_combine( - DenseMapInfo::getHashValue(V.Base), - unsigned(V.IsObject)); - } + static unsigned getHashValue(AndersenVar V) noexcept { return hash_value(V); } static bool isEqual(AndersenVar A, AndersenVar B) noexcept { return A == B; } }; } // namespace llvm @@ -677,8 +688,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return; } for (const auto &Var : LocalVC.id2vars(ObjId)) { - const auto *Fun = - llvm::dyn_cast_or_null(Var.Base.valueOrNull()); + const auto *Fun = llvm::dyn_cast_or_null( + Var.getBase().valueOrNull()); if (Fun) { connectCallee(C, Fun, Args, CSRetVal); } @@ -707,8 +718,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return; } for (const auto &Var : LocalVC.id2vars(ObjId)) { - const auto *Fun = - llvm::dyn_cast_or_null(Var.Base.valueOrNull()); + const auto *Fun = llvm::dyn_cast_or_null( + Var.getBase().valueOrNull()); if (Fun) { connectCallee(Rec.CS, Fun, Rec.Args, Rec.CSRetVal); } @@ -743,15 +754,15 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { ValueId FirstExtId{}; bool HasFirst = false; for (const auto &V : LocalVC.id2vars(VId)) { - if (V.IsObject) { + if (V.isObject()) { continue; } if (!HasFirst) { - FirstExtId = ExternalVC.insert(V.Base).first; + FirstExtId = ExternalVC.insert(V.getBase()).first; HasFirst = true; LocalToExt[VId] = FirstExtId; } else { - ExternalVC.addAlias(V.Base, FirstExtId); + ExternalVC.addAlias(V.getBase(), FirstExtId); } } } From a289f5fb135a19236e9b3ad7c6fb66e42b127b88 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 14 May 2026 20:45:18 +0200 Subject: [PATCH 06/36] Add AndersenOTF tests for deep chains, recursion, and function pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix OperandOf::operator< (was comparing R2.Inst instead of R1.Inst) - DeepChainTwoObjectsMerge (context_04_1): three-level id chain with x/y - RecursiveSelfAlias (context_08): SCC collapsing under self-recursion - MutualRecursionAlias (context_10_0): Forth↔Back two-way recursion - ReturnSecondArgContextInsensitive (context_12_1): argretq precision - FuncPtrCallbackIdentity (context_14_1): OTF resolves indirect call - RecursionTwoObjectsMerge (context_09_0): recursive with two objects - MutualRecursionTwoObjects (context_10_1): mutual recursion, two objects - ThreeWayMutualRecursion (context_11_0): Forth↔Back↔Stop recursion - ThreeArgReturnQContextInsensitive (context_13_1): three-param function - FuncPtrCallbackThreeWayMerge (context_14_2): three function pointers Co-Authored-By: Claude Sonnet 4.6 --- .../PhasarLLVM/Pointer/AndersenOTFAATest.cpp | 349 +++++++++++++++++- unittests/TestUtils/SrcCodeLocationEntry.h | 2 +- 2 files changed, 347 insertions(+), 4 deletions(-) diff --git a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp index d5dba4e11c..851ed41cc1 100644 --- a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp +++ b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp @@ -4,6 +4,7 @@ #include "phasar/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.h" #include "phasar/Pointer/RawAliasSet.h" #include "phasar/Pointer/UnionFindAA.h" +#include "phasar/Utils/DebugOutput.h" #include "phasar/Utils/IotaIterator.h" #include "phasar/Utils/ValueCompressor.h" @@ -144,15 +145,20 @@ void doAnalysisAndCheckExact( const RawAliasSet &Computed = Results.getRawAliasSet(PtrId); RawAliasSet Expected; + // llvm::errs() << "For PtrId: #" << uint32_t(PtrId) << ":\n"; for (const auto &AliasVar : ExpectedAliasVars) { - Expected.insert(asId(*Compressor, IRDB, AliasVar)); + auto AliasId = asId(*Compressor, IRDB, AliasVar); + Expected.insert(AliasId); + // llvm::errs() << "> Insert #" << uint32_t(AliasId) + // << " into Expected due to " << AliasVar << '\n'; } // Soundness. Expected.foreach ([&](ValueId AliasId) { if (!Computed.contains(AliasId)) { ADD_FAILURE_AT(Loc.file_name(), Loc.line()) - << "Missing expected alias of " << PtrVar << ": " + << "Missing expected alias of " << PtrVar << "(#" << uint32_t(PtrId) + << "): #" << uint32_t(AliasId) << " as " << stringifyVal(*Compressor, AliasId); } }); @@ -244,7 +250,7 @@ TEST(AndersenOTFAATest, ContextInsensitiveCallsMerge) { {Call1, {Arg, Ret, Call1, Call2}}, {Call2, {Arg, Ret, Call1, Call2}}, }; - doAnalysisAndCheckExact("context_01_c_dbg.ll", ExpectedResults, true); + doAnalysisAndCheckExact("context_01_c_dbg.ll", ExpectedResults); } TEST(AndersenOTFAATest, SeparateFunctionsDontAlias) { @@ -292,6 +298,343 @@ TEST(AndersenOTFAATest, TransitiveCallChain) { doAnalysisAndCheckExact("context_03_c_dbg.ll", ExpectedResults); } +TEST(AndersenOTFAATest, DeepChainTwoObjectsMerge) { + // context_04_1: three-level identity chain (id3→id2→id1) called with both + // &x and &y. Context-insensitive: all params and rets of id1/id2/id3 and + // all four call sites alias each other AND with x/y (they share x_obj or + // y_obj as common pointee). x and y themselves do NOT alias each other. + const TSL Id1Arg = TSL(ArgInFun{.Idx = 0, .InFunction = "id1"}); + const TSL Id2Arg = TSL(ArgInFun{.Idx = 0, .InFunction = "id2"}); + const TSL Id3Arg = TSL(ArgInFun{.Idx = 0, .InFunction = "id3"}); + const TSL Id1Ret = TSL(RetVal{.InFunction = "id1"}); + const TSL Id2Ret = TSL(RetVal{.InFunction = "id2"}); + const TSL Id3Ret = TSL(RetVal{.InFunction = "id3"}); + const TSL XX1 = TSL(LineColFunOp{.Line = 10, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const TSL XX2 = TSL(LineColFunOp{.Line = 11, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const TSL YY1 = TSL(LineColFunOp{.Line = 12, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const TSL YY2 = TSL(LineColFunOp{.Line = 13, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + // %x / %y: the alloca pointers passed to id3; recovered as arg 0 of + // respective call sites (operand 0 of a CallInst = first argument). + const TSL XAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 10, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL YAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 12, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const std::vector Chain = {Id1Arg, Id2Arg, Id3Arg, Id1Ret, Id2Ret, + Id3Ret, XX1, XX2, YY1, YY2}; + // Chain members alias each other and both allocas (share x_obj or y_obj). + std::vector ChainWithBoth = Chain; + ChainWithBoth.push_back(XAlloca); + ChainWithBoth.push_back(YAlloca); + GTMap ExpectedResults; + for (const auto &ChainV : Chain) { + ExpectedResults[ChainV] = ChainWithBoth; + } + // x alloca aliases the chain (via x_obj) but NOT y. + std::vector XAliases = Chain; + XAliases.push_back(XAlloca); + ExpectedResults[XAlloca] = XAliases; + // y alloca aliases the chain (via y_obj) but NOT x. + std::vector YAliases = Chain; + YAliases.push_back(YAlloca); + ExpectedResults[YAlloca] = YAliases; + + // llvm::errs() << "ExpectedResults[XAlloca]: " + // << PrettyPrinter{ExpectedResults[XAlloca]} << '\n'; + // llvm::errs() << "ExpectedResults[YAlloca]: " + // << PrettyPrinter{ExpectedResults[YAlloca]} << '\n'; + + doAnalysisAndCheckExact("context_04_1_c_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, RecursiveSelfAlias) { + // context_08: selfRecursion(Ptr) calls itself with Ptr, forming a cycle in + // the constraint graph. SCC collapsing must merge the recursive call result + // with the formal parameter and the two call-site results in main. + const TSL Ptr = TSL(ArgInFun{.Idx = 0, .InFunction = "selfRecursion"}); + const TSL Ret = TSL(RetVal{.InFunction = "selfRecursion"}); + // int *x = selfRecursion(kptr) at line 15 + const TSL X = TSL(LineColFunOp{.Line = 15, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + // int *y = selfRecursion(kptr) at line 19 + const TSL Y = TSL(LineColFunOp{.Line = 19, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const std::vector All = {Ptr, Ret, X, Y}; + GTMap ExpectedResults; + for (const auto &V : All) { + ExpectedResults[V] = All; + } + doAnalysisAndCheckExact("context_08_c_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, MutualRecursionAlias) { + // context_10_0: Forth and Back call each other with the same pointer; both + // called from main with &k. The mutual recursion forces all four + // param/ret nodes and the two call-site results to alias. + const TSL ForthPtr = TSL(ArgInFun{.Idx = 0, .InFunction = "Forth"}); + const TSL BackPtr = TSL(ArgInFun{.Idx = 0, .InFunction = "Back"}); + const TSL ForthRet = TSL(RetVal{.InFunction = "Forth"}); + const TSL BackRet = TSL(RetVal{.InFunction = "Back"}); + // int *x = Back(&k) at line 26 + const TSL X = TSL(LineColFunOp{.Line = 26, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + // int *y = Back(&k) at line 30 + const TSL Y = TSL(LineColFunOp{.Line = 30, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const std::vector All = {ForthPtr, BackPtr, ForthRet, BackRet, X, Y}; + GTMap ExpectedResults; + for (const auto &V : All) { + ExpectedResults[V] = All; + } + doAnalysisAndCheckExact("context_10_0_c_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, ReturnSecondArgContextInsensitive) { + // context_12_1: argretq(p,q) returns q. Two call sites swap which + // argument is &x and which is &y. Context-insensitive: p, q, and the + // return value all receive both &x and &y, so they all alias each other. + const TSL P = TSL(ArgInFun{.Idx = 0, .InFunction = "argretq"}); + const TSL Q = TSL(ArgInFun{.Idx = 1, .InFunction = "argretq"}); + const TSL Ret = TSL(RetVal{.InFunction = "argretq"}); + // int *xx1 = argretq(&y, &x) at line 8 + const TSL XX1 = TSL(LineColFunOp{.Line = 8, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + // int *yy1 = argretq(&x, &y) at line 9 + const TSL YY1 = TSL(LineColFunOp{.Line = 9, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const std::vector All = {P, Q, Ret, XX1, YY1}; + GTMap ExpectedResults; + for (const auto &V : All) { + ExpectedResults[V] = All; + } + doAnalysisAndCheckExact("context_12_1_c_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, FuncPtrCallbackIdentity) { + // context_14_1: callback(Func) returns Func — identity on function pointers. + // Two call sites pass &ret0 and &ret1 respectively. OTF must discover + // both callees. The formal parameter and return value of callback must + // alias (they point to the same set of function objects). + const TSL Func = TSL(ArgInFun{.Idx = 0, .InFunction = "callback"}); + const TSL Ret = TSL(RetVal{.InFunction = "callback"}); + const GTMap ExpectedResults = { + {Func, {Func, Ret}}, + {Ret, {Func, Ret}}, + }; + doAnalysisAndCheckExact("context_14_1_c_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, RecursionTwoObjectsMerge) { + // context_09_0: selfRecursion called with &k and &l. + // Context-insensitive: Ptr receives both; all four alias. + // k and l alias the chain (via their objects) but not each other. + const TSL Ptr = TSL(ArgInFun{.Idx = 0, .InFunction = "selfRecursion"}); + const TSL Ret = TSL(RetVal{.InFunction = "selfRecursion"}); + const TSL CallX = TSL(LineColFunOp{.Line = 15, .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const TSL CallY = TSL(LineColFunOp{.Line = 16, .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const TSL KAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 15, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL LAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 16, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const std::vector Chain = {Ptr, Ret, CallX, CallY}; + GTMap ExpectedResults; + std::vector ChainAndBoth = Chain; + ChainAndBoth.push_back(KAlloca); + ChainAndBoth.push_back(LAlloca); + for (const auto &Item : Chain) { + ExpectedResults[Item] = ChainAndBoth; + } + std::vector KAliases = Chain; + KAliases.push_back(KAlloca); + ExpectedResults[KAlloca] = KAliases; + std::vector LAliases = Chain; + LAliases.push_back(LAlloca); + ExpectedResults[LAlloca] = LAliases; + doAnalysisAndCheckExact("context_09_0_c_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, MutualRecursionTwoObjects) { + // context_10_1: Forth↔Back mutual recursion, called with &k and &l. + // All four params/rets and four call-site results alias. + // k and l each alias all eight but not each other. + const TSL ForthPtr = TSL(ArgInFun{.Idx = 0, .InFunction = "Forth"}); + const TSL BackPtr = TSL(ArgInFun{.Idx = 0, .InFunction = "Back"}); + const TSL ForthRet = TSL(RetVal{.InFunction = "Forth"}); + const TSL BackRet = TSL(RetVal{.InFunction = "Back"}); + // xx1=Back(&k) line 27, xx2=Back(&k) line 29, yy1=Back(&l) line 31, yy2=Back(&l) line 33 + const auto MkCall = [](uint32_t Line) { + return TSL(LineColFunOp{.Line = Line, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + }; + const TSL XX1 = MkCall(27); + const TSL XX2 = MkCall(29); + const TSL YY1 = MkCall(31); + const TSL YY2 = MkCall(33); + const TSL KAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 27, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL LAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 31, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const std::vector Chain = {ForthPtr, BackPtr, ForthRet, BackRet, + XX1, XX2, YY1, YY2}; + GTMap ExpectedResults; + std::vector ChainAndBoth = Chain; + ChainAndBoth.push_back(KAlloca); + ChainAndBoth.push_back(LAlloca); + for (const auto &Item : Chain) { + ExpectedResults[Item] = ChainAndBoth; + } + std::vector KAliases = Chain; + KAliases.push_back(KAlloca); + ExpectedResults[KAlloca] = KAliases; + std::vector LAliases = Chain; + LAliases.push_back(LAlloca); + ExpectedResults[LAlloca] = LAliases; + doAnalysisAndCheckExact("context_10_1_c_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, ThreeWayMutualRecursion) { + // context_11_0: Forth↔Back↔Stop three-way mutual recursion. + // All six params/rets and both call-site results alias. + const TSL ForthPtr = TSL(ArgInFun{.Idx = 0, .InFunction = "Forth"}); + const TSL BackPtr = TSL(ArgInFun{.Idx = 0, .InFunction = "Back"}); + const TSL StopPtr = TSL(ArgInFun{.Idx = 0, .InFunction = "Stop"}); + const TSL ForthRet = TSL(RetVal{.InFunction = "Forth"}); + const TSL BackRet = TSL(RetVal{.InFunction = "Back"}); + const TSL StopRet = TSL(RetVal{.InFunction = "Stop"}); + // x=Back(&k) line 36, y=Forth(&l) line 37 + const TSL CallX = TSL(LineColFunOp{.Line = 36, .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const TSL CallY = TSL(LineColFunOp{.Line = 37, .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const TSL KAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 36, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL LAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 37, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const std::vector Chain = {ForthPtr, BackPtr, StopPtr, + ForthRet, BackRet, StopRet, + CallX, CallY}; + GTMap ExpectedResults; + std::vector ChainAndBoth = Chain; + ChainAndBoth.push_back(KAlloca); + ChainAndBoth.push_back(LAlloca); + for (const auto &Item : Chain) { + ExpectedResults[Item] = ChainAndBoth; + } + std::vector KAliases = Chain; + KAliases.push_back(KAlloca); + ExpectedResults[KAlloca] = KAliases; + std::vector LAliases = Chain; + LAliases.push_back(LAlloca); + ExpectedResults[LAlloca] = LAliases; + doAnalysisAndCheckExact("context_11_0_c_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, ThreeArgReturnQContextInsensitive) { + // context_13_1: argretq(p,q,r) returns q. Two call sites pass all-x and + // all-y. Context-insensitive: all three params and the return merge. + // x and y allocas alias the group but not each other. + const TSL ArgP = TSL(ArgInFun{.Idx = 0, .InFunction = "argretq"}); + const TSL ArgQ = TSL(ArgInFun{.Idx = 1, .InFunction = "argretq"}); + const TSL ArgR = TSL(ArgInFun{.Idx = 2, .InFunction = "argretq"}); + const TSL Ret = TSL(RetVal{.InFunction = "argretq"}); + // xx1=argretq(&x,&x,&x) line 8, yy1=argretq(&y,&y,&y) line 9 + const TSL XX1 = TSL(LineColFunOp{.Line = 8, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const TSL YY1 = TSL(LineColFunOp{.Line = 9, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const TSL XAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 8, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL YAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 9, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const std::vector Chain = {ArgP, ArgQ, ArgR, Ret, XX1, YY1}; + GTMap ExpectedResults; + std::vector ChainAndBoth = Chain; + ChainAndBoth.push_back(XAlloca); + ChainAndBoth.push_back(YAlloca); + for (const auto &Item : Chain) { + ExpectedResults[Item] = ChainAndBoth; + } + std::vector XAliases = Chain; + XAliases.push_back(XAlloca); + ExpectedResults[XAlloca] = XAliases; + std::vector YAliases = Chain; + YAliases.push_back(YAlloca); + ExpectedResults[YAlloca] = YAliases; + doAnalysisAndCheckExact("context_13_1_c_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, FuncPtrCallbackThreeWayMerge) { + // context_14_2: callback(Func) returns Func, called with &ret0, &ret1, + // &ret2. Func and Ret alias all three function values. The individual + // function values alias Func and Ret but NOT each other (disjoint pts sets). + const TSL Func = TSL(ArgInFun{.Idx = 0, .InFunction = "callback"}); + const TSL Ret = TSL(RetVal{.InFunction = "callback"}); + const TSL Ret0 = TSL(FuncByName{.FuncName = "ret0"}); + const TSL Ret1 = TSL(FuncByName{.FuncName = "ret1"}); + const TSL Ret2 = TSL(FuncByName{.FuncName = "ret2"}); + const GTMap ExpectedResults = { + {Func, {Func, Ret, Ret0, Ret1, Ret2}}, + {Ret, {Func, Ret, Ret0, Ret1, Ret2}}, + {Ret0, {Ret0, Func, Ret}}, + {Ret1, {Ret1, Func, Ret}}, + {Ret2, {Ret2, Func, Ret}}, + }; + doAnalysisAndCheckExact("context_14_2_c_dbg.ll", ExpectedResults); +} + } // namespace int main(int Argc, char **Argv) { diff --git a/unittests/TestUtils/SrcCodeLocationEntry.h b/unittests/TestUtils/SrcCodeLocationEntry.h index 61f6b7c37f..b3456f7418 100644 --- a/unittests/TestUtils/SrcCodeLocationEntry.h +++ b/unittests/TestUtils/SrcCodeLocationEntry.h @@ -168,7 +168,7 @@ struct OperandOf { LineColFunOp Inst{}; friend bool operator<(OperandOf R1, OperandOf R2) noexcept { - return std::tie(R1.OperandIndex, R2.Inst) < + return std::tie(R1.OperandIndex, R1.Inst) < std::tie(R2.OperandIndex, R2.Inst); } friend bool operator==(OperandOf R1, OperandOf R2) noexcept { From c8c0260299d8fa52bb12c32ef501173045e38f96 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 19 May 2026 19:44:24 +0200 Subject: [PATCH 07/36] Perf improvement in AndersOTFAA --- .gitmodules | 3 + CMakeLists.txt | 7 ++ external/CRoaring | 1 + include/phasar/Pointer/RawAliasSet.h | 107 ++++++++++++++++++++--- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 105 ++++++++++++++++------ lib/Pointer/CMakeLists.txt | 3 + 6 files changed, 188 insertions(+), 38 deletions(-) create mode 160000 external/CRoaring diff --git a/.gitmodules b/.gitmodules index 350885ac54..4afb3bad23 100644 --- a/.gitmodules +++ b/.gitmodules @@ -5,3 +5,6 @@ [submodule "external/json-schema-validator"] path = external/json-schema-validator url = https://github.com/pboettch/json-schema-validator.git +[submodule "external/CRoaring"] + path = external/CRoaring + url = https://github.com/fabianbs96/CRoaring.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 0bb9701c17..948bff787e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,7 +74,9 @@ set(RELEASE_CONFIGURATIONS RELWITHDEBINFO RELEASE CACHE INTERNAL "" FORCE) string(APPEND CMAKE_CXX_FLAGS " -MP -fstack-protector-strong -ffunction-sections -fdata-sections -pipe") string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer") +string(APPEND CMAKE_C_FLAGS_DEBUG " -fno-omit-frame-pointer") string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -fno-omit-frame-pointer") +string(APPEND CMAKE_C_FLAGS_RELWITHDEBINFO " -fno-omit-frame-pointer") string(APPEND CMAKE_CXX_FLAGS_RELEASE "") option(CMAKE_VISIBILITY_INLINES_HIDDEN "Hide inlined functions from the DSO table (default ON)" ON) @@ -123,6 +125,7 @@ if (NOT "${PHASAR_TARGET_ARCH_INTERNAL}" STREQUAL "") if (MARCH_SUPPORTED) message(STATUS "Target architecture '${PHASAR_TARGET_ARCH_INTERNAL}' enabled") string(APPEND CMAKE_CXX_FLAGS_RELEASE " -march=${PHASAR_TARGET_ARCH_INTERNAL}") + string(APPEND CMAKE_C_FLAGS_RELEASE " -march=${PHASAR_TARGET_ARCH_INTERNAL}") else() message(WARNING "Target architecture '${PHASAR_TARGET_ARCH_INTERNAL}' not supported. Fallback to generic build") endif() @@ -339,6 +342,10 @@ set(PHASAR_LLVM_VERSION 16 CACHE STRING "The LLVM major-version that PhASAR shou include(add_llvm) add_llvm() +# Roaring +set(ENABLE_ROARING_TESTS OFF) +add_subdirectory(external/CRoaring EXCLUDE_FROM_ALL) + # SVF option(PHASAR_USE_SVF "Use SVF for more options in alias analysis (default is OFF)" OFF) if(PHASAR_USE_SVF) diff --git a/external/CRoaring b/external/CRoaring new file mode 160000 index 0000000000..d3092b5b4f --- /dev/null +++ b/external/CRoaring @@ -0,0 +1 @@ +Subproject commit d3092b5b4f724b48542d2de14e32f08cd45a282c diff --git a/include/phasar/Pointer/RawAliasSet.h b/include/phasar/Pointer/RawAliasSet.h index 2aa70f0f31..58ae3f8117 100644 --- a/include/phasar/Pointer/RawAliasSet.h +++ b/include/phasar/Pointer/RawAliasSet.h @@ -11,9 +11,13 @@ #include "phasar/Utils/TypeTraits.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SparseBitVector.h" +#include "roaring/roaring.hh" + #include +#include namespace psr { @@ -33,6 +37,8 @@ concept IsRawAliasSet = requires(ASet &MutSet, const ASet &ConstSet, { ConstSet.contains(ValId) } -> std::convertible_to; // ConstSet.begin(); // ConstSet.end(); + + /// Iteration must be in ascending order ConstSet.foreach (DummyFn{}); MutSet |= ConstSet; MutSet &= ConstSet; @@ -52,11 +58,11 @@ concept IsRawAliasSet = requires(ASet &MutSet, const ASet &ConstSet, /// Satisfies \c IsRawAliasSet. /// /// \tparam IdT Integer-like id type (e.g., \c ValueId). -template class RawAliasSet { +template class LLVMRawAliasSet { public: using value_type = IdT; - RawAliasSet() = default; + LLVMRawAliasSet() = default; void insert(IdT Id) { Bits.set(uint32_t(Id)); } @@ -66,16 +72,23 @@ template class RawAliasSet { [[nodiscard]] bool contains(IdT Id) const { return Bits.test(uint32_t(Id)); } - LLVM_ATTRIBUTE_ALWAYS_INLINE void foreach ( - std::invocable auto Handler) const { + template HandlerFn> + LLVM_ATTRIBUTE_ALWAYS_INLINE void foreach (HandlerFn Handler) const { for (auto Bit : Bits) { - std::invoke(Handler, IdT(Bit)); + if constexpr (std::convertible_to, + bool>) { + if (!std::invoke(Handler, IdT(Bit))) { + break; + } + } else { + std::invoke(Handler, IdT(Bit)); + } } } - void operator|=(const RawAliasSet &Other) { Bits |= Other.Bits; } - void operator&=(const RawAliasSet &Other) { Bits &= Other.Bits; } - void operator-=(const RawAliasSet &Other) { + void operator|=(const LLVMRawAliasSet &Other) { Bits |= Other.Bits; } + void operator&=(const LLVMRawAliasSet &Other) { Bits &= Other.Bits; } + void operator-=(const LLVMRawAliasSet &Other) { Bits.intersectWithComplement(Other.Bits); } @@ -87,13 +100,13 @@ template class RawAliasSet { [[nodiscard]] auto begin() const noexcept { return Bits.begin(); } [[nodiscard]] auto end() const noexcept { return Bits.end(); } - [[nodiscard]] bool tryMergeWith(const RawAliasSet &Other) { + [[nodiscard]] bool tryMergeWith(const LLVMRawAliasSet &Other) { return Bits |= Other.Bits; } void erase(IdT Id) { Bits.reset(uint32_t(Id)); } - [[nodiscard]] bool operator==(const RawAliasSet &Other) const noexcept { + [[nodiscard]] bool operator==(const LLVMRawAliasSet &Other) const noexcept { return Bits == Other.Bits; } @@ -101,4 +114,78 @@ template class RawAliasSet { llvm::SparseBitVector<> Bits; // TODO: roaring::Roaring Bits; }; + +template class RoaringAliasSet { +public: + using value_type = IdT; + + RoaringAliasSet() = default; + + void insert(IdT Id) { Bits.add(uint32_t(Id)); } + + [[nodiscard]] bool tryInsert(IdT Id) { return Bits.addChecked(uint32_t(Id)); } + + [[nodiscard]] bool contains(IdT Id) const { + return Bits.contains(uint32_t(Id)); + } + + template HandlerFn> + LLVM_ATTRIBUTE_ALWAYS_INLINE void foreach (HandlerFn Handler) const { + return Bits.iterate( + [](uint32_t Id, void *HandlerPtr) { + auto &Handler = *(HandlerFn *)HandlerPtr; + if constexpr (std::convertible_to< + std::invoke_result_t, bool>) { + if (!std::invoke(Handler, IdT(Id))) { + return false; + } + } else { + std::invoke(Handler, IdT(Id)); + } + return true; + }, + &Handler); + } + + void operator|=(const RoaringAliasSet &Other) { Bits |= Other.Bits; } + void operator&=(const RoaringAliasSet &Other) { Bits &= Other.Bits; } + void operator-=(const RoaringAliasSet &Other) { Bits -= Other.Bits; } + [[nodiscard]] RoaringAliasSet operator-(const RoaringAliasSet &Other) { + return Bits - Other.Bits; + } + + [[nodiscard]] bool empty() const noexcept { return Bits.isEmpty(); } + [[nodiscard]] size_t size() const noexcept { return Bits.cardinality(); } + + void clear() noexcept { Bits.clear(); } + + [[nodiscard]] auto begin() const noexcept { return Bits.begin(); } + [[nodiscard]] auto end() const noexcept { return Bits.end(); } + + [[nodiscard]] bool tryMergeWith(const RoaringAliasSet &Other) { + auto OldSz = size(); + Bits |= Other.Bits; + return size() != OldSz; + } + + void erase(IdT Id) { Bits.remove(uint32_t(Id)); } + + // Bulk-inserts from a sorted, deduplicated array. + // Roaring constructs containers in O(N) for sorted input. + void insertSorted(llvm::ArrayRef Sorted) { + Bits.addMany(Sorted.size(), Sorted.data()); + } + + [[nodiscard]] bool operator==(const RoaringAliasSet &Other) const noexcept { + return Bits == Other.Bits; + } + +private: + RoaringAliasSet(roaring::Roaring &&RR) : Bits(std::move(RR)) {} + + roaring::Roaring Bits{}; +}; + +template using RawAliasSet = RoaringAliasSet; + } // namespace psr diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index 777103237a..6f0e3ba7b9 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -439,8 +440,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { continue; } - RawAliasSet NewPts = Nodes[U].PtsSet; - NewPts -= Nodes[V].PtsSet; + RawAliasSet NewPts = Nodes[U].PtsSet - Nodes[V].PtsSet; if (NewPts.empty()) { // LCD: direct back-edge V→U with pts(U)⊆pts(V) → 2-cycle, collapse. if (Nodes[V].AssignDstSet.contains(U)) { @@ -685,7 +685,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { const RawAliasSet FPPts = Nodes[FPId].PtsSet; FPPts.foreach ([&](ValueId ObjId) { if (!Nodes.inbounds(ObjId)) { - return; + // Iteration is in sorted order + return false; } for (const auto &Var : LocalVC.id2vars(ObjId)) { const auto *Fun = llvm::dyn_cast_or_null( @@ -694,6 +695,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { connectCallee(C, Fun, Args, CSRetVal); } } + return true; }); }; @@ -715,7 +717,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { const RawAliasSet FPPts = Nodes[Rec.FPId].PtsSet; FPPts.foreach ([&](ValueId ObjId) { if (!Nodes.inbounds(ObjId)) { - return; + // Iteration is in sorted order + return false; } for (const auto &Var : LocalVC.id2vars(ObjId)) { const auto *Fun = llvm::dyn_cast_or_null( @@ -724,6 +727,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { connectCallee(Rec.CS, Fun, Rec.Args, Rec.CSRetVal); } } + return true; }); } } @@ -733,20 +737,6 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { AndersenOTFResult buildResult() { const size_t NumLocal = LocalVC.size(); - // Reverse map: abstract object → all local IDs that point to it. - TypedVector> Obj2Ptrs(NumLocal); - for (auto VId : iota(NumLocal)) { - const ValueId RepId = rep(VId); - if (!Nodes.inbounds(RepId)) { - continue; - } - Nodes[RepId].PtsSet.foreach ([&](ValueId Obj) { - if (size_t(Obj) < NumLocal) { - Obj2Ptrs[Obj].insert(VId); - } - }); - } - // Map variable local IDs → external VC IDs. // Object nodes are internal only and do not appear in the external result. TypedVector> LocalToExt(NumLocal); @@ -767,30 +757,89 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } } - AndersenOTFResult Result; - Result.NumVars = ExternalVC.size(); - Result.AliasSets.resize(Result.NumVars); - + // Build rep → bitset of external IDs for all vars in that SCC. + TypedVector> RepToExtVIds(NumLocal); for (auto VId : iota(NumLocal)) { if (!LocalToExt[VId]) { continue; } - const ValueId ExtVId = *LocalToExt[VId]; const ValueId RepId = rep(VId); if (!Nodes.inbounds(RepId)) { continue; } + RepToExtVIds[RepId].push_back(*LocalToExt[VId]); + } + // Reverse map: abstract object → bitset of representatives pointing to it. + // Only representatives with at least one external variable are inserted. + TypedVector> Obj2Reps(NumLocal); + for (auto RepId : iota(NumLocal)) { + if (RepToExtVIds[RepId].empty()) { + continue; + } + if (!Nodes.inbounds(RepId)) { + continue; + } Nodes[RepId].PtsSet.foreach ([&](ValueId Obj) { - if (size_t(Obj) >= NumLocal) { - return; + if (size_t(Obj) < NumLocal) { + Obj2Reps[Obj].insert(RepId); + return true; } - Obj2Ptrs[Obj].foreach ([&](ValueId AliasLocalId) { - if (const auto &AliasExt = LocalToExt[AliasLocalId]) { - Result.AliasSets[ExtVId].insert(*AliasExt); + // Iteration is in sorted order + return false; + }); + } + + // Precompute per-object alias set: for each abstract object, the union of + // all external IDs of every representative that points to it. Built once + // here via sort+insertSorted so the main loop below can use fast |=. + TypedVector> ObjToAliasExtVIds(NumLocal); + { + llvm::SmallVector Buf; + for (auto Obj : iota(NumLocal)) { + if (Obj2Reps[Obj].empty()) { + continue; + } + Obj2Reps[Obj].foreach ([&](ValueId AliasRepId) { + for (auto EId : RepToExtVIds[AliasRepId]) { + Buf.push_back(uint32_t(EId)); } }); + std::ranges::sort(Buf); + // Buf.erase(std::ranges::unique(Buf).begin(), Buf.end()); + ObjToAliasExtVIds[Obj].insertSorted(Buf); + Buf.clear(); + } + } + + AndersenOTFResult Result; + Result.NumVars = ExternalVC.size(); + Result.AliasSets.resize(Result.NumVars); + + for (auto RepId : iota(NumLocal)) { + const auto &MyExtVIds = RepToExtVIds[RepId]; + if (MyExtVIds.empty()) { + continue; + } + if (!Nodes.inbounds(RepId)) { + break; + } + + // Union the pre-built per-object alias sets for all pointees. + RawAliasSet AliasExtVIds; + Nodes[RepId].PtsSet.foreach ([&](ValueId Obj) { + if (size_t(Obj) >= NumLocal) { + // Iteration is in sorted order + return false; + } + AliasExtVIds |= ObjToAliasExtVIds[Obj]; + return true; }); + + // Broadcast to every external ID mapped to this representative. + for (auto ExtVId : MyExtVIds) { + Result.AliasSets[ExtVId] |= AliasExtVIds; + } } return Result; diff --git a/lib/Pointer/CMakeLists.txt b/lib/Pointer/CMakeLists.txt index 66b1d2710f..6be2f9d0c2 100644 --- a/lib/Pointer/CMakeLists.txt +++ b/lib/Pointer/CMakeLists.txt @@ -10,6 +10,9 @@ add_phasar_library(phasar_pointer LLVM_LINK_COMPONENTS Support + LINK_PUBLIC + roaring::roaring + MODULE_FILES PhasarPointer.cppm ) From 992604dbbb490f414f2aac5b665cc1ffbec84c57 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 19 May 2026 20:01:06 +0200 Subject: [PATCH 08/36] Vibe-code delta propagation --- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 27 ++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index 6f0e3ba7b9..ffa2cc5e2a 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -79,6 +79,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { struct NodeInfo { RawAliasSet PtsSet; + RawAliasSet PendingPts; // Assignment edges: pts(this) ⊆ pts(dst) for each dst. llvm::SmallVector AssignDsts; llvm::SmallDenseSet AssignDstSet; // dedup guard @@ -202,6 +203,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // Merge pts sets. const bool PtsGrew = Nodes[Rep].PtsSet.tryMergeWith(NRPts); if (PtsGrew) { + Nodes[Rep].PendingPts |= NRPts; PropWorklist.push_back(Rep); } @@ -319,6 +321,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { grow(Ptr); grow(Obj); // grow before indexing Nodes[Ptr] if (Nodes[Ptr].PtsSet.tryInsert(Obj)) { + Nodes[Ptr].PendingPts.insert(Obj); PropWorklist.push_back(Ptr); } } @@ -334,6 +337,9 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (Nodes[Src].AssignDstSet.insert(Dst).second) { Nodes[Src].AssignDsts.push_back(Dst); if (!Nodes[Src].PtsSet.empty()) { + // New edge: Dst has never seen Src's pts history, so mark all of + // Src's current pts as pending (not just the incremental delta). + Nodes[Src].PendingPts |= Nodes[Src].PtsSet; PropWorklist.push_back(Src); } } @@ -423,7 +429,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { void propagate() { while (!PropWorklist.empty()) { ValueId U = rep(PropWorklist.pop_back_val()); - if (!Nodes.inbounds(U)) { + if (!Nodes.inbounds(U) || Nodes[U].PendingPts.empty()) { continue; } @@ -433,6 +439,10 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { Dsts.push_back(rep(V)); } + // Drain before iterating Dsts: onNewPointee → addPointee may write + // to Nodes[U].PendingPts while we iterate, and merge() may resize Nodes. + RawAliasSet UPending = std::move(Nodes[U].PendingPts); + for (ValueId VSnap : Dsts) { // Re-resolve: a prior iteration's merge() may have changed the rep. const ValueId V = rep(VSnap); @@ -440,17 +450,22 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { continue; } - RawAliasSet NewPts = Nodes[U].PtsSet - Nodes[V].PtsSet; - if (NewPts.empty()) { - // LCD: direct back-edge V→U with pts(U)⊆pts(V) → 2-cycle, collapse. + bool AddedAny = false; + UPending.foreach([&](ValueId Obj) { + if (Nodes[V].PtsSet.tryInsert(Obj)) { + Nodes[V].PendingPts.insert(Obj); + onNewPointee(V, Obj); + AddedAny = true; + } + }); + if (!AddedAny) { + // LCD: V has all of U's pending wave, so V.PtsSet ⊇ U.PtsSet. if (Nodes[V].AssignDstSet.contains(U)) { U = merge(U, V); } continue; } - Nodes[V].PtsSet |= NewPts; PropWorklist.push_back(V); - NewPts.foreach ([&](ValueId NewObj) { onNewPointee(V, NewObj); }); } } } From 8f3f88e9ded6fb94e6cf04cf3b7b6d77c23835d4 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 19 May 2026 20:15:14 +0200 Subject: [PATCH 09/36] Let AI write more tests --- .../PhasarLLVM/Pointer/AndersenOTFAATest.cpp | 203 ++++++++++++++++++ 1 file changed, 203 insertions(+) diff --git a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp index 851ed41cc1..cac62a2e77 100644 --- a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp +++ b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp @@ -635,6 +635,209 @@ TEST(AndersenOTFAATest, FuncPtrCallbackThreeWayMerge) { doAnalysisAndCheckExact("context_14_2_c_dbg.ll", ExpectedResults); } +TEST(AndersenOTFAATest, FourLevelChainTwoObjects) { + // context_05_1: 4-level identity chain (id4→id3→id2→id1), called 4 times + // with &x and &y. All params/rets and call sites merge (context-insensitive). + // x and y allocas alias the chain but not each other. + const auto MkArg = [](llvm::StringRef Fn) { + return TSL(ArgInFun{.Idx = 0, .InFunction = Fn}); + }; + const auto MkRet = [](llvm::StringRef Fn) { + return TSL(RetVal{.InFunction = Fn}); + }; + const auto MkCall = [](uint32_t Line) { + return TSL(LineColFunOp{.Line = Line, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + }; + const std::vector Chain = { + MkArg("id1"), MkArg("id2"), MkArg("id3"), MkArg("id4"), + MkRet("id1"), MkRet("id2"), MkRet("id3"), MkRet("id4"), + MkCall(11), MkCall(12), MkCall(13), MkCall(14), + }; + // arg 0 of call at line 11 is &x; arg 0 of call at line 13 is &y. + const TSL XAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 11, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL YAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 13, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + GTMap ExpectedResults; + auto ChainAndBoth = Chain; + ChainAndBoth.push_back(XAlloca); + ChainAndBoth.push_back(YAlloca); + for (const auto &Item : Chain) { + ExpectedResults[Item] = ChainAndBoth; + } + auto XAliases = Chain; + XAliases.push_back(XAlloca); + ExpectedResults[XAlloca] = XAliases; + auto YAliases = Chain; + YAliases.push_back(YAlloca); + ExpectedResults[YAlloca] = YAliases; + doAnalysisAndCheckExact("context_05_1_c_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, FourLevelChainVariantTwoObjects) { + // context_07: foo→bar→baz→buzz 4-level identity chain, called with &x and + // &y. All params/rets and both call sites alias; x and y don't alias. + const auto MkArg = [](llvm::StringRef Fn) { + return TSL(ArgInFun{.Idx = 0, .InFunction = Fn}); + }; + const auto MkRet = [](llvm::StringRef Fn) { + return TSL(RetVal{.InFunction = Fn}); + }; + const auto MkCall = [](uint32_t Line) { + return TSL(LineColFunOp{.Line = Line, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + }; + const std::vector Chain = { + MkArg("buzz"), MkArg("baz"), MkArg("bar"), MkArg("foo"), + MkRet("buzz"), MkRet("baz"), MkRet("bar"), MkRet("foo"), + MkCall(11), MkCall(12), + }; + const TSL XAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 11, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL YAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 12, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + GTMap ExpectedResults; + auto ChainAndBoth = Chain; + ChainAndBoth.push_back(XAlloca); + ChainAndBoth.push_back(YAlloca); + for (const auto &Item : Chain) { + ExpectedResults[Item] = ChainAndBoth; + } + auto XAliases = Chain; + XAliases.push_back(XAlloca); + ExpectedResults[XAlloca] = XAliases; + auto YAliases = Chain; + YAliases.push_back(YAlloca); + ExpectedResults[YAlloca] = YAliases; + doAnalysisAndCheckExact("context_07_c_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, RecursionFourCallSites) { + // context_09_1: selfRecursion called with &k (twice) and &l (twice). + // Context-insensitive: Ptr and Ret alias all 4 call sites. + // k and l each alias the chain but not each other. + const TSL Ptr = TSL(ArgInFun{.Idx = 0, .InFunction = "selfRecursion"}); + const TSL Ret = TSL(RetVal{.InFunction = "selfRecursion"}); + const auto MkCall = [](uint32_t Line) { + return TSL(LineColFunOp{.Line = Line, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + }; + const std::vector Chain = {Ptr, Ret, MkCall(15), MkCall(17), + MkCall(18), MkCall(20)}; + const TSL KAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 15, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL LAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 18, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + GTMap ExpectedResults; + auto ChainAndBoth = Chain; + ChainAndBoth.push_back(KAlloca); + ChainAndBoth.push_back(LAlloca); + for (const auto &Item : Chain) { + ExpectedResults[Item] = ChainAndBoth; + } + auto KAliases = Chain; + KAliases.push_back(KAlloca); + ExpectedResults[KAlloca] = KAliases; + auto LAliases = Chain; + LAliases.push_back(LAlloca); + ExpectedResults[LAlloca] = LAliases; + doAnalysisAndCheckExact("context_09_1_c_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, ThreeWayMutualRecursionFourCallSites) { + // context_11_1: Forth↔Back↔Stop three-way mutual recursion, called with &k + // (twice) and &l (twice). All six params/rets and all four call sites alias. + // k and l each alias the chain but not each other. + const TSL ForthPtr = TSL(ArgInFun{.Idx = 0, .InFunction = "Forth"}); + const TSL BackPtr = TSL(ArgInFun{.Idx = 0, .InFunction = "Back"}); + const TSL StopPtr = TSL(ArgInFun{.Idx = 0, .InFunction = "Stop"}); + const TSL ForthRet = TSL(RetVal{.InFunction = "Forth"}); + const TSL BackRet = TSL(RetVal{.InFunction = "Back"}); + const TSL StopRet = TSL(RetVal{.InFunction = "Stop"}); + const auto MkCall = [](uint32_t Line) { + return TSL(LineColFunOp{.Line = Line, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + }; + const std::vector Chain = {ForthPtr, BackPtr, StopPtr, + ForthRet, BackRet, StopRet, + MkCall(36), MkCall(37), + MkCall(38), MkCall(39)}; + const TSL KAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 36, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL LAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 38, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + GTMap ExpectedResults; + auto ChainAndBoth = Chain; + ChainAndBoth.push_back(KAlloca); + ChainAndBoth.push_back(LAlloca); + for (const auto &Item : Chain) { + ExpectedResults[Item] = ChainAndBoth; + } + auto KAliases = Chain; + KAliases.push_back(KAlloca); + ExpectedResults[KAlloca] = KAliases; + auto LAliases = Chain; + LAliases.push_back(LAlloca); + ExpectedResults[LAlloca] = LAliases; + doAnalysisAndCheckExact("context_11_1_c_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, TwoArgSecondRetFourCallSites) { + // context_12_0: argretq(p,q) returns q. Four call sites mix &x and &y: + // argretq(&y,&x) twice and argretq(&x,&y) twice. + // Context-insensitive: p and q both receive {&x,&y}; all alias. + // x and y allocas each alias the group but not each other. + const TSL P = TSL(ArgInFun{.Idx = 0, .InFunction = "argretq"}); + const TSL Q = TSL(ArgInFun{.Idx = 1, .InFunction = "argretq"}); + const TSL Ret = TSL(RetVal{.InFunction = "argretq"}); + const auto MkCall = [](uint32_t Line) { + return TSL(LineColFunOp{.Line = Line, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + }; + const std::vector Chain = {P, Q, Ret, MkCall(8), MkCall(9), + MkCall(10), MkCall(11)}; + // arg 1 of call at line 8 is &x (argretq(&y, &x)); arg 0 is &y. + const TSL XAlloca = TSL(OperandOf{ + .OperandIndex = 1, + .Inst = LineColFunOp{.Line = 8, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL YAlloca = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 8, .Col = 0, .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + GTMap ExpectedResults; + auto ChainAndBoth = Chain; + ChainAndBoth.push_back(XAlloca); + ChainAndBoth.push_back(YAlloca); + for (const auto &Item : Chain) { + ExpectedResults[Item] = ChainAndBoth; + } + auto XAliases = Chain; + XAliases.push_back(XAlloca); + ExpectedResults[XAlloca] = XAliases; + auto YAliases = Chain; + YAliases.push_back(YAlloca); + ExpectedResults[YAlloca] = YAliases; + doAnalysisAndCheckExact("context_12_0_c_dbg.ll", ExpectedResults); +} + } // namespace int main(int Argc, char **Argv) { From 4619bcb3749bdf215c1dc2b780cc4648851cb35c Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 20 May 2026 19:58:33 +0200 Subject: [PATCH 10/36] Handle global initializers --- .../PhasarLLVM/Pointer/LLVMGlobalInitCache.h | 92 +++++++++++++++++++ include/phasar/Pointer/RawAliasSet.h | 2 +- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 35 ++++--- .../Pointer/LLVMPointerAssignmentGraph.cpp | 72 ++------------- 4 files changed, 122 insertions(+), 79 deletions(-) create mode 100644 include/phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h b/include/phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h new file mode 100644 index 0000000000..12e5fee836 --- /dev/null +++ b/include/phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h @@ -0,0 +1,92 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/ValueCompressor.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/Casting.h" + +#include +#include + +namespace psr { + +/// Memoised walker for global-variable pointer initializers. +/// +/// Traverses a \c llvm::Constant initializer and collects the \c ValueId of +/// every pointer-typed sub-constant it contains (direct pointer, GEP base, +/// or pointer elements of an aggregate). Results are cached so shared +/// sub-expressions are not revisited. +/// +/// Create one instance per analysis run; it is tied to a single +/// \c ValueCompressor via the \p GetVar callback. +struct GlobalInitCache { + std::unordered_map> + Cache; + + /// Returns the \c ValueId slice for all pointer-typed constants reachable + /// from \p Const. \p GetVar maps an \c llvm::Value* to a \c ValueId + /// (typically \c getOrInsertVar). + template GetVarFn> + [[nodiscard]] llvm::ArrayRef + getOrCreate(const llvm::Constant *Const, GetVarFn &&GetVar) { + if (definitelyContainsNoPointer(Const)) { + return {}; + } + + auto [It, Inserted] = Cache.try_emplace(Const); + if (!Inserted) { + return It->second; + } + auto &Vec = It->second; + + if (llvm::isa(Const)) { + return {}; + } + + if (const auto *CGep = llvm::dyn_cast(Const)) { + // TODO: Properly handle constant GEPs + return getOrCreate( + llvm::cast(CGep->getPointerOperand()), GetVar); + } + + if (Const->getType()->isPointerTy()) { + Vec.push_back(std::invoke(GetVar, Const)); + return Vec; + } + + // TODO: Get rid of the recursion + + if (const auto *Agg = llvm::dyn_cast(Const)) { + if (Agg->getType()->isArrayTy() && + definitelyContainsNoPointer( + Agg->getType()->getArrayElementType())) { + return {}; + } + for (size_t I = 0, N = Agg->getNumOperands(); I < N; ++I) { + const auto *Elem = llvm::cast( + Agg->getAggregateElement(I)->stripPointerCastsAndAliases()); + auto Sub = getOrCreate(Elem, GetVar); + Vec.append(Sub.begin(), Sub.end()); + } + } + + // TODO: more + + return Vec; + } +}; + +} // namespace psr diff --git a/include/phasar/Pointer/RawAliasSet.h b/include/phasar/Pointer/RawAliasSet.h index 58ae3f8117..146846b221 100644 --- a/include/phasar/Pointer/RawAliasSet.h +++ b/include/phasar/Pointer/RawAliasSet.h @@ -150,7 +150,7 @@ template class RoaringAliasSet { void operator|=(const RoaringAliasSet &Other) { Bits |= Other.Bits; } void operator&=(const RoaringAliasSet &Other) { Bits &= Other.Bits; } void operator-=(const RoaringAliasSet &Other) { Bits -= Other.Bits; } - [[nodiscard]] RoaringAliasSet operator-(const RoaringAliasSet &Other) { + [[nodiscard]] RoaringAliasSet operator-(const RoaringAliasSet &Other) const { return Bits - Other.Bits; } diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index ffa2cc5e2a..a477fa5322 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -10,6 +10,7 @@ #include "phasar/PhasarLLVM/Pointer/AndersenOTFAA.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h" #include "phasar/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/IotaIterator.h" @@ -179,18 +180,13 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // Snapshot all NonRep data before any addAssignEdge / grow calls that // may reallocate Nodes and invalidate references. - llvm::SmallVector NRAssignDsts = - std::move(Nodes[NonRep].AssignDsts); + auto NRAssignDsts = std::move(Nodes[NonRep].AssignDsts); Nodes[NonRep].AssignDstSet.clear(); const RawAliasSet NRPts = Nodes[NonRep].PtsSet; - llvm::SmallVector NRLoadDsts = - std::move(Nodes[NonRep].LoadDsts); - llvm::SmallVector NRStoreSrcs = - std::move(Nodes[NonRep].StoreSrcs); - llvm::SmallVector NRMemCopyAsSrc = - std::move(Nodes[NonRep].MemCopyAsSrc); - llvm::SmallVector NRMemCopyAsDst = - std::move(Nodes[NonRep].MemCopyAsDst); + auto NRLoadDsts = std::move(Nodes[NonRep].LoadDsts); + auto NRStoreSrcs = std::move(Nodes[NonRep].StoreSrcs); + auto NRMemCopyAsSrc = std::move(Nodes[NonRep].MemCopyAsSrc); + auto NRMemCopyAsDst = std::move(Nodes[NonRep].MemCopyAsDst); // Re-register NonRep's assign edges under Rep. for (ValueId Dst : NRAssignDsts) { @@ -201,14 +197,19 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } // Merge pts sets. + const auto OldRepPts = Nodes[Rep].PtsSet; const bool PtsGrew = Nodes[Rep].PtsSet.tryMergeWith(NRPts); if (PtsGrew) { Nodes[Rep].PendingPts |= NRPts; PropWorklist.push_back(Rep); + // Fire Rep's pre-existing load/store/memcopy constraints for pointees + // absorbed from NonRep that Rep didn't previously have. + const auto Diff = NRPts - OldRepPts; + Diff.foreach ([&](ValueId NewObj) { onNewPointee(Rep, NewObj); }); } // Snapshot Rep's pts (after merge) for retroactive constraint firing. - const RawAliasSet RepPts = Nodes[Rep].PtsSet; + const auto RepPts = Nodes[Rep].PtsSet; // Transfer NonRep's load constraints and retroactively fire them for // Rep's existing pts members. @@ -451,7 +452,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } bool AddedAny = false; - UPending.foreach([&](ValueId Obj) { + UPending.foreach ([&](ValueId Obj) { if (Nodes[V].PtsSet.tryInsert(Obj)) { Nodes[V].PendingPts.insert(Obj); onNewPointee(V, Obj); @@ -473,6 +474,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // ---- IR translation ------------------------------------------------- void initGlobals() { + GlobalInitCache GCache; for (const auto &G : IRDB.getModule()->globals()) { if (definitelyContainsNoPointer(G.getValueType())) { continue; @@ -480,6 +482,15 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { const ValueId VarId = getOrInsertVar(PAGVariable(&G)); const ValueId ObjId = getOrInsertObj(PAGVariable(&G)); addPointee(VarId, ObjId); + if (!G.hasInitializer()) { + continue; + } + for (ValueId SrcId : + GCache.getOrCreate(G.getInitializer(), [&](const llvm::Value *V) { + return getOrInsertVar(PAGVariable(V)); + })) { + addStore(VarId, SrcId); + } } propagate(); } diff --git a/lib/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.cpp b/lib/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.cpp index 63ebf47d58..4a0800c1ee 100644 --- a/lib/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.cpp +++ b/lib/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.cpp @@ -1,6 +1,7 @@ #include "phasar/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h" #include "phasar/PhasarLLVM/Utils/LLVMFunctionDataFlowFacts.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Pointer/PointerAssignmentGraph.h" @@ -35,67 +36,6 @@ std::string psr::to_string(PAGVariable Var) { namespace { -struct GlobalCache { - const llvm::DataLayout &DL; // NOLINT - // Due to the recursion in getOrCreateGCacheEntry, we need pointer stability - std::unordered_map> - Cache{}; - - [[nodiscard]] llvm::ArrayRef getOrCreateGCacheEntry( - LLVMPBStrategyRef Strategy, const llvm::Constant *Const, - std::invocable auto GetVariable) { - if (definitelyContainsNoPointer(Const)) { - return {}; - } - - auto [It, Inserted] = Cache.try_emplace(Const); - if (!Inserted) { - return It->second; - } - - auto &Vec = It->second; - - // We do not care about null here - if (llvm::isa(Const)) { - return {}; - } - - if (const auto *CGep = llvm::dyn_cast(Const)) { - // TODO: Properly handle constant GEPs - return getOrCreateGCacheEntry( - Strategy, llvm::cast(CGep->getPointerOperand()), - GetVariable); - } - - if (Const->getType()->isPointerTy()) { - Vec.push_back(GetVariable(Const, Strategy)); - - return Vec; - } - - // TODO: Get rid of the recursion - - if (const auto *Arr = llvm::dyn_cast(Const)) { - if (Arr->getType()->isArrayTy() && - definitelyContainsNoPointer(Arr->getType()->getArrayElementType())) { - return {}; - } - - size_t ArrayLen = Arr->getNumOperands(); - for (size_t I = 0; I < ArrayLen; ++I) { - auto *Elem = llvm::cast( - Arr->getAggregateElement(I)->stripPointerCastsAndAliases()); - auto ElemVars = getOrCreateGCacheEntry(Strategy, Elem, GetVariable); - Vec.append(ElemVars.begin(), ElemVars.end()); - } - return Vec; - } - - // TODO: more - - return Vec; - } -}; struct PAGMappedLibrarySummary { const library_summary::LLVMFunctionDataFlowFacts &Facts; // NOLINT @@ -202,7 +142,7 @@ struct [[clang::internal_linkage]] LLVMPAGBuilder::PAGBuildData { void initializeGlobals(const LLVMProjectIRDB &IRDB, LLVMPBStrategyRef Strategy) { - GlobalCache GCache{IRDB.getModule()->getDataLayout()}; + GlobalInitCache GCache; for (const auto &Glob : IRDB.getModule()->globals()) { if (definitelyContainsNoPointer(Glob.getValueType())) { @@ -215,12 +155,12 @@ struct [[clang::internal_linkage]] LLVMPAGBuilder::PAGBuildData { } } - void initializeGlobal(GlobalCache &GCache, LLVMPBStrategyRef Strategy, + void initializeGlobal(GlobalInitCache &GCache, LLVMPBStrategyRef Strategy, const llvm::GlobalVariable &Glob) { auto GlobObj = getVariable(&Glob, Strategy); - auto Stores = GCache.getOrCreateGCacheEntry( - Strategy, Glob.getInitializer(), - [this](const llvm::Value *V, LLVMPBStrategyRef Strategy) { + auto Stores = GCache.getOrCreate( + Glob.getInitializer(), + [this, Strategy](const llvm::Value *V) { return getVariable(V, Strategy); }); From 0f54781ef166e68e97909792569567c673eada47 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 20 May 2026 20:15:46 +0200 Subject: [PATCH 11/36] Reduce unnecessary copies --- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 62 +++++++++++------------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index a477fa5322..d457b5cd90 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" #include #include @@ -233,8 +234,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (Nodes[Rep].MemCopyAsSrcSet.insert(D).second) { Nodes[Rep].MemCopyAsSrc.push_back(D); if (Nodes.inbounds(D)) { - // Snapshot DstPtr's pts: addAssignEdge may resize Nodes. - const RawAliasSet DstPts = Nodes[D].PtsSet; + const auto &DstPts = Nodes[D].PtsSet; RepPts.foreach ([&](ValueId O1) { DstPts.foreach ([&](ValueId O2) { addAssignEdge(O1, O2); }); }); @@ -247,8 +247,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (Nodes[Rep].MemCopyAsDstSet.insert(S).second) { Nodes[Rep].MemCopyAsDst.push_back(S); if (Nodes.inbounds(S)) { - // Snapshot SrcPtr's pts: addAssignEdge may resize Nodes. - const RawAliasSet SrcPts = Nodes[S].PtsSet; + const auto &SrcPts = Nodes[S].PtsSet; SrcPts.foreach ([&](ValueId O1) { RepPts.foreach ([&](ValueId O2) { addAssignEdge(O1, O2); }); }); @@ -311,10 +310,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // INVARIANT: every method resolves all ids through rep() first, then calls // grow() for all ids before accessing Nodes by reference. Any grow() call // may reallocate the Nodes backing array, so no NodeInfo& must be held - // across a grow() call or across any call that may invoke grow() (i.e. - // addAssignEdge, addPointee, etc.). Where the existing pts set must be - // iterated while addAssignEdge is called inside, the pts set is first - // copied into a local snapshot. + // across a grow() call. addAssignEdge does not call grow(), so references + // into Nodes remain valid across it. void addPointee(ValueId Ptr, ValueId Obj) { Ptr = rep(Ptr); @@ -333,8 +330,15 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (Src == Dst) { return; } - grow(Src); - grow(Dst); // grow before indexing Nodes[Src] + + if (!Nodes.inbounds(Src) || !Nodes.inbounds(Dst)) [[unlikely]] { + llvm::report_fatal_error( + "Connecting nodes which are not allocated yet. Node allocation " + "should happen through getOrInsertVar or getOrInsertObj"); + } + + // grow(Src); + // grow(Dst); // grow before indexing Nodes[Src] if (Nodes[Src].AssignDstSet.insert(Dst).second) { Nodes[Src].AssignDsts.push_back(Dst); if (!Nodes[Src].PtsSet.empty()) { @@ -350,9 +354,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { Ptr = rep(Ptr); Dst = rep(Dst); grow(Ptr); - grow(Dst); // grow before accessing Nodes[Ptr] - // Snapshot pts: addAssignEdge inside the lambda may resize Nodes. - const RawAliasSet ExistingPts = Nodes[Ptr].PtsSet; + grow(Dst); + const auto &ExistingPts = Nodes[Ptr].PtsSet; ExistingPts.foreach ([&](ValueId Obj) { addAssignEdge(Obj, Dst); }); if (Nodes[Ptr].LoadDstSet.insert(Dst).second) { Nodes[Ptr].LoadDsts.push_back(Dst); @@ -363,9 +366,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { Ptr = rep(Ptr); Src = rep(Src); grow(Ptr); - grow(Src); // grow before accessing Nodes[Ptr] - // Snapshot pts: addAssignEdge inside the lambda may resize Nodes. - const RawAliasSet ExistingPts = Nodes[Ptr].PtsSet; + grow(Src); + const auto &ExistingPts = Nodes[Ptr].PtsSet; ExistingPts.foreach ([&](ValueId Obj) { addAssignEdge(Src, Obj); }); if (Nodes[Ptr].StoreSrcSet.insert(Src).second) { Nodes[Ptr].StoreSrcs.push_back(Src); @@ -376,11 +378,9 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { SrcPtr = rep(SrcPtr); DstPtr = rep(DstPtr); grow(SrcPtr); - grow(DstPtr); // grow before accessing Nodes[SrcPtr/DstPtr] - // Snapshot both pts sets: addAssignEdge inside the lambdas may resize - // Nodes, invalidating any reference into it. - const RawAliasSet SrcPts = Nodes[SrcPtr].PtsSet; - const RawAliasSet DstPts = Nodes[DstPtr].PtsSet; + grow(DstPtr); + const auto &SrcPts = Nodes[SrcPtr].PtsSet; + const auto &DstPts = Nodes[DstPtr].PtsSet; SrcPts.foreach ([&](ValueId O1) { DstPts.foreach ([&](ValueId O2) { addAssignEdge(O1, O2); }); }); @@ -396,12 +396,10 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { void onNewPointee(ValueId PtrRep, ValueId NewObj) { assert(Nodes.inbounds(PtrRep)); - // Snapshot all constraint lists before any addAssignEdge call: grow() - // inside addAssignEdge may reallocate Nodes, invalidating references. - const auto LoadDsts = Nodes[PtrRep].LoadDsts; - const auto StoreSrcs = Nodes[PtrRep].StoreSrcs; - const auto MemSrcs = Nodes[PtrRep].MemCopyAsSrc; - const auto MemDsts = Nodes[PtrRep].MemCopyAsDst; + const auto &LoadDsts = Nodes[PtrRep].LoadDsts; + const auto &StoreSrcs = Nodes[PtrRep].StoreSrcs; + const auto &MemSrcs = Nodes[PtrRep].MemCopyAsSrc; + const auto &MemDsts = Nodes[PtrRep].MemCopyAsDst; for (ValueId Dst : LoadDsts) { addAssignEdge(NewObj, Dst); @@ -413,16 +411,14 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (!Nodes.inbounds(DstPtr)) { continue; } - // Snapshot DstPtr's pts: addAssignEdge may resize Nodes. - const RawAliasSet DstPts = Nodes[DstPtr].PtsSet; + const auto &DstPts = Nodes[DstPtr].PtsSet; DstPts.foreach ([&](ValueId O2) { addAssignEdge(NewObj, O2); }); } for (ValueId SrcPtr : MemDsts) { if (!Nodes.inbounds(SrcPtr)) { continue; } - // Snapshot SrcPtr's pts: addAssignEdge may resize Nodes. - const RawAliasSet SrcPts = Nodes[SrcPtr].PtsSet; + const auto &SrcPts = Nodes[SrcPtr].PtsSet; SrcPts.foreach ([&](ValueId O1) { addAssignEdge(O1, NewObj); }); } } @@ -440,8 +436,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { Dsts.push_back(rep(V)); } - // Drain before iterating Dsts: onNewPointee → addPointee may write - // to Nodes[U].PendingPts while we iterate, and merge() may resize Nodes. + // Drain before iterating Dsts: addAssignEdge inside onNewPointee/merge() + // may write to Nodes[U].PendingPts while we iterate. RawAliasSet UPending = std::move(Nodes[U].PendingPts); for (ValueId VSnap : Dsts) { From 532c62180c69d987a9b31e745bd196814f1dc2ce Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 26 May 2026 20:20:59 +0200 Subject: [PATCH 12/36] Fix globals + fnptr handling --- external/CRoaring | 2 +- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 31 ++++++++--- test/llvm_test_code/pointers/CMakeLists.txt | 3 ++ .../pointers/andersen_otf_global_init.c | 9 ++++ .../pointers/andersen_otf_merge_load.c | 22 ++++++++ .../pointers/andersen_otf_vtable.cpp | 16 ++++++ .../PhasarLLVM/Pointer/AndersenOTFAATest.cpp | 54 +++++++++++++++++++ 7 files changed, 128 insertions(+), 9 deletions(-) create mode 100644 test/llvm_test_code/pointers/andersen_otf_global_init.c create mode 100644 test/llvm_test_code/pointers/andersen_otf_merge_load.c create mode 100644 test/llvm_test_code/pointers/andersen_otf_vtable.cpp diff --git a/external/CRoaring b/external/CRoaring index d3092b5b4f..5505f1bf1a 160000 --- a/external/CRoaring +++ b/external/CRoaring @@ -1 +1 @@ -Subproject commit d3092b5b4f724b48542d2de14e32f08cd45a282c +Subproject commit 5505f1bf1a62d9e7adad798b418ce873ddff7b1d diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index d457b5cd90..bbdec9817c 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -269,12 +269,14 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (!llvm::isa(V)) { const ValueId VId = getOrInsertVar(PAGVariable(V)); - // A function used as a value (e.g. stored into a function-pointer - // variable) is an addressable abstract object: pts(F) = {F}. - // Without this, pts(fp_alloca) never gains F and OTF call resolution - // silently produces no callees. if (llvm::isa(V)) { + // Function address is its own abstract object: pts(F) = {F}. addPointee(VId, VId); + } else if (const auto *GVar = llvm::dyn_cast(V)) { + // Global variable used as a pointer: ensure its object exists so + // pts(var_G) = {obj_G} (e.g. `return &x` where x is a global). + const ValueId OId = getOrInsertObj(PAGVariable(GVar)); + addPointee(VId, OId); } std::invoke(Handler, VId); return; @@ -294,6 +296,10 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { const ValueId GId = getOrInsertVar(PAGVariable(GObj)); if (llvm::isa(GObj)) { addPointee(GId, GId); + } else if (const auto *GVar = + llvm::dyn_cast(GObj)) { + const ValueId OId = getOrInsertObj(PAGVariable(GVar)); + addPointee(GId, OId); } std::invoke(Handler, GId); continue; @@ -481,10 +487,19 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (!G.hasInitializer()) { continue; } - for (ValueId SrcId : - GCache.getOrCreate(G.getInitializer(), [&](const llvm::Value *V) { - return getOrInsertVar(PAGVariable(V)); - })) { + for (ValueId SrcId : GCache.getOrCreate( + G.getInitializer(), [&](const llvm::Value *V) { + const ValueId VId = getOrInsertVar(PAGVariable(V)); + if (llvm::isa(V)) { + // Function address is its own abstract object (self-pointing). + addPointee(VId, VId); + } else if (const auto *GV = + llvm::dyn_cast(V)) { + const ValueId OId = getOrInsertObj(PAGVariable(GV)); + addPointee(VId, OId); + } + return VId; + })) { addStore(VarId, SrcId); } } diff --git a/test/llvm_test_code/pointers/CMakeLists.txt b/test/llvm_test_code/pointers/CMakeLists.txt index fb255af43f..c4178a8f15 100644 --- a/test/llvm_test_code/pointers/CMakeLists.txt +++ b/test/llvm_test_code/pointers/CMakeLists.txt @@ -1,6 +1,9 @@ set(lca_files andersen_otf_interproc.c andersen_otf_fp.c + andersen_otf_global_init.c + andersen_otf_merge_load.c + andersen_otf_vtable.cpp basic_01.c basic_02.c basic_03.c diff --git a/test/llvm_test_code/pointers/andersen_otf_global_init.c b/test/llvm_test_code/pointers/andersen_otf_global_init.c new file mode 100644 index 0000000000..3f4ddbe1d0 --- /dev/null +++ b/test/llvm_test_code/pointers/andersen_otf_global_init.c @@ -0,0 +1,9 @@ +// Global pointer @p is initialised to &@x. +// Loading from @p must yield a pointer that aliases @x (Bug 2 soundness). +int x = 0; +int *p = &x; + +int main() { + int *q = p; + return 0; +} diff --git a/test/llvm_test_code/pointers/andersen_otf_merge_load.c b/test/llvm_test_code/pointers/andersen_otf_merge_load.c new file mode 100644 index 0000000000..1645247649 --- /dev/null +++ b/test/llvm_test_code/pointers/andersen_otf_merge_load.c @@ -0,0 +1,22 @@ +// h->f->h cycle; h returns *p (the load result). +// After both h(&px) and h(&py), h's return value must alias x and y. +static int *f(int **p); + +static int *h(int **p) { + f(p); + return *p; +} + +static int *f(int **p) { + return h(p); +} + +int main() { + int x = 0; + int y = 0; + int *px = &x; + int *py = &y; + h(&px); + h(&py); + return 0; +} diff --git a/test/llvm_test_code/pointers/andersen_otf_vtable.cpp b/test/llvm_test_code/pointers/andersen_otf_vtable.cpp new file mode 100644 index 0000000000..9fa9eca0f4 --- /dev/null +++ b/test/llvm_test_code/pointers/andersen_otf_vtable.cpp @@ -0,0 +1,16 @@ +// Virtual dispatch via a pointer forces the vtable lookup path. +// call_get's return value must alias @x (returned by A::get). +struct A { + virtual int *get(); +}; + +int x; +int *A::get() { return &x; } + +static int *call_get(A *a) { return a->get(); } + +int main() { + A a; + int *p = call_get(&a); + return 0; +} diff --git a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp index cac62a2e77..fb6a87250a 100644 --- a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp +++ b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp @@ -838,6 +838,60 @@ TEST(AndersenOTFAATest, TwoArgSecondRetFourCallSites) { doAnalysisAndCheckExact("context_12_0_c_dbg.ll", ExpectedResults); } +TEST(AndersenOTFAATest, VTableDispatch) { + // Virtual call via A* in call_get must resolve through the vtable. + // A::get() returns @x, so call_get's return must alias @x. + const TSL CallGetRet = + TSL(RetVal{.InFunction = "_ZL8call_getP1A"}); + const TSL X = TSL(GlobalVar{.Name = "x"}); + const GTMap ExpectedResults = { + {CallGetRet, {CallGetRet, X}}, + {X, {X, CallGetRet}}, + }; + doAnalysisAndCheckExact("andersen_otf_vtable_cpp_dbg.ll", ExpectedResults); +} + +TEST(AndersenOTFAATest, GlobalPtrInitializer) { + // @p = global ptr @x; loading from @p must alias @x (Bug 2 soundness). + const TSL LoadQ = + TSL(LineColFunOp{.Line = 7, + .Col = 12, + .InFunction = "main", + .OpCode = llvm::Instruction::Load}); + const TSL X = TSL(GlobalVar{.Name = "x"}); + const GTMap ExpectedResults = { + {LoadQ, {LoadQ, X}}, + {X, {X, LoadQ}}, + }; + doAnalysisAndCheckExact("andersen_otf_global_init_c_dbg.ll", + ExpectedResults); +} + +TEST(AndersenOTFAATest, MergeLoadConstraint) { + // h->f->h cycle; h returns *p. + // ret(h) must alias x and y after h(&px) and h(&py) (Bug 1 soundness). + const TSL RetH = TSL(RetVal{.InFunction = "h"}); + const TSL VarX = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 17, + .Col = 8, + .InFunction = "main", + .OpCode = llvm::Instruction::Store}}); + const TSL VarY = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 18, + .Col = 8, + .InFunction = "main", + .OpCode = llvm::Instruction::Store}}); + const GTMap ExpectedResults = { + {RetH, {RetH, VarX, VarY}}, + {VarX, {RetH, VarX}}, + {VarY, {RetH, VarY}}, + }; + doAnalysisAndCheckExact("andersen_otf_merge_load_c_dbg.ll", + ExpectedResults); +} + } // namespace int main(int Argc, char **Argv) { From 4789a3a052f4e3057fd52d26f87186c2e9f390e8 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 27 May 2026 18:42:41 +0200 Subject: [PATCH 13/36] Better vtable handling --- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 173 ++++++++++++------ test/llvm_test_code/pointers/CMakeLists.txt | 1 + .../pointers/andersen_otf_vtable2.cpp | 22 +++ .../PhasarLLVM/Pointer/AndersenOTFAATest.cpp | 16 ++ 4 files changed, 155 insertions(+), 57 deletions(-) create mode 100644 test/llvm_test_code/pointers/andersen_otf_vtable2.cpp diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index bbdec9817c..4fbe63c42f 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -9,13 +9,17 @@ #include "phasar/PhasarLLVM/Pointer/AndersenOTFAA.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h" #include "phasar/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.h" +#include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" +#include "phasar/PhasarLLVM/TypeHierarchy/LLVMVFTable.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/IotaIterator.h" #include "phasar/Utils/LibrarySummary.h" #include "phasar/Utils/UnionFind.h" +#include "phasar/Utils/ValueCompressor.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -109,6 +113,14 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { std::optional CSRetVal; }; + struct VCallRecord { + const llvm::CallBase *CS; + ValueId VtablePtrId; + uint64_t VtableIndex; + ArgList Args; + std::optional CSRetVal; + }; + // ---- Data fields ---------------------------------------------------- const LLVMProjectIRDB &IRDB; // NOLINT @@ -124,6 +136,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { TypedVector Nodes; llvm::SmallVector UnresolvedFPCalls; + llvm::SmallVector UnresolvedVCalls; llvm::DenseMap> ConnectedCallees; llvm::SmallVector PropWorklist; @@ -487,19 +500,19 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (!G.hasInitializer()) { continue; } - for (ValueId SrcId : GCache.getOrCreate( - G.getInitializer(), [&](const llvm::Value *V) { - const ValueId VId = getOrInsertVar(PAGVariable(V)); - if (llvm::isa(V)) { - // Function address is its own abstract object (self-pointing). - addPointee(VId, VId); - } else if (const auto *GV = - llvm::dyn_cast(V)) { - const ValueId OId = getOrInsertObj(PAGVariable(GV)); - addPointee(VId, OId); - } - return VId; - })) { + for (ValueId SrcId : + GCache.getOrCreate(G.getInitializer(), [&](const llvm::Value *V) { + const ValueId VId = getOrInsertVar(PAGVariable(V)); + if (llvm::isa(V)) { + // Function address is its own abstract object (self-pointing). + addPointee(VId, VId); + } else if (const auto *GV = + llvm::dyn_cast(V)) { + const ValueId OId = getOrInsertObj(PAGVariable(GV)); + addPointee(VId, OId); + } + return VId; + })) { addStore(VarId, SrcId); } } @@ -676,6 +689,70 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { propagate(); } + void resolveVtableCall(const llvm::CallBase *CS, ValueId VtablePtrId, + uint64_t VtableIndex, const ArgList &Args, + std::optional CSRetVal) { + if (!Nodes.inbounds(VtablePtrId)) { + // return; + llvm::report_fatal_error("Invalid Vtable Id #" + + llvm::Twine(uint32_t(VtablePtrId))); + } + // Snapshot: connectCallee→propagate() may grow pts(VtablePtrId). + const RawAliasSet VPPts = Nodes[VtablePtrId].PtsSet; + VPPts.foreach ([&](ValueId ObjId) { + if (!Nodes.inbounds(ObjId)) { + return false; + } + for (const auto &Var : LocalVC.id2vars(ObjId)) { + const auto *GV = llvm::dyn_cast_or_null( + Var.getBase().valueOrNull()); + if (!GV || !GV->hasName() || + !GV->getName().starts_with(DIBasedTypeHierarchy::VTablePrefix) || + !GV->hasInitializer()) { + continue; + } + const auto *VTStruct = + llvm::dyn_cast(GV->getInitializer()); + if (!VTStruct) { + continue; + } + auto VFs = LLVMVFTable::getVFVectorFromIRVTable(*VTStruct); + if (VtableIndex >= VFs.size()) { + continue; + } + const auto *Callee = VFs[VtableIndex]; + if (!Callee || !isConsistentCall(CS, Callee)) { + continue; + } + connectCallee(CS, Callee, Args, CSRetVal); + } + return true; + }); + } + + void resolveFPCall(const llvm::CallBase *CS, ValueId FPId, + const ArgList &Args, std::optional CSRetVal) { + if (!Nodes.inbounds(FPId)) { + llvm::report_fatal_error("Invalid FPId"); + } + // Snapshot pts(FPId): connectCallee→propagate() may grow pts(FPId). + const RawAliasSet FPPts = Nodes[FPId].PtsSet; + FPPts.foreach ([&](ValueId ObjId) { + if (!Nodes.inbounds(ObjId)) { + // Iteration is in sorted order + return false; + } + for (const auto &Var : LocalVC.id2vars(ObjId)) { + const auto *Fun = + llvm::dyn_cast_or_null(Var.getBase().valueOrNull()); + if (Fun && isConsistentCall(CS, Fun)) { + connectCallee(CS, Fun, Args, CSRetVal); + } + } + return true; + }); + } + void handleCall(const llvm::CallBase *C) { if (C->isInlineAsm() || C->isDebugOrPseudoInst()) { return; @@ -711,61 +788,42 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return; } - // Indirect call: connect already-known targets, record for fixpoint. - const ValueId FPId = getOrInsertVar(PAGVariable(FnPtr)); - - const auto ConnectKnownTargets = [&]() { - if (!Nodes.inbounds(FPId)) { - return; - } - // Snapshot pts(FPId): connectCallee→propagate() may grow pts(FPId). - const RawAliasSet FPPts = Nodes[FPId].PtsSet; - FPPts.foreach ([&](ValueId ObjId) { - if (!Nodes.inbounds(ObjId)) { - // Iteration is in sorted order - return false; - } - for (const auto &Var : LocalVC.id2vars(ObjId)) { - const auto *Fun = llvm::dyn_cast_or_null( - Var.getBase().valueOrNull()); - if (Fun) { - connectCallee(C, Fun, Args, CSRetVal); - } - } - return true; + // Virtual call: read the concrete vtable at the specific slot index. + if (auto VCallInfo = getVFTIndexAndVT(C)) { + auto [VtablePtr, VtableIndex] = *VCallInfo; + const ValueId VtablePtrId = getOrInsertVar(PAGVariable(VtablePtr)); + resolveVtableCall(C, VtablePtrId, VtableIndex, Args, CSRetVal); + UnresolvedVCalls.push_back(VCallRecord{ + .CS = C, + .VtablePtrId = VtablePtrId, + .VtableIndex = VtableIndex, + .Args = std::move(Args), + .CSRetVal = CSRetVal, }); - }; + return; + } - ConnectKnownTargets(); + // Indirect call: connect already-known targets, record for fixpoint. + const ValueId FPId = getOrInsertVar(PAGVariable(FnPtr)); + resolveFPCall(C, FPId, Args, CSRetVal); UnresolvedFPCalls.push_back(FPCallRecord{ .CS = C, .FPId = FPId, - .Args = {Args.begin(), Args.end()}, + .Args = std::move(Args), .CSRetVal = CSRetVal, }); } void checkUnresolvedFPCalls() { for (const auto &Rec : UnresolvedFPCalls) { - if (!Nodes.inbounds(Rec.FPId)) { - continue; - } - // Snapshot pts(FPId): connectCallee→propagate() may grow it. - const RawAliasSet FPPts = Nodes[Rec.FPId].PtsSet; - FPPts.foreach ([&](ValueId ObjId) { - if (!Nodes.inbounds(ObjId)) { - // Iteration is in sorted order - return false; - } - for (const auto &Var : LocalVC.id2vars(ObjId)) { - const auto *Fun = llvm::dyn_cast_or_null( - Var.getBase().valueOrNull()); - if (Fun) { - connectCallee(Rec.CS, Fun, Rec.Args, Rec.CSRetVal); - } - } - return true; - }); + resolveFPCall(Rec.CS, Rec.FPId, Rec.Args, Rec.CSRetVal); + } + } + + void checkUnresolvedVCalls() { + for (const auto &Rec : UnresolvedVCalls) { + resolveVtableCall(Rec.CS, Rec.VtablePtrId, Rec.VtableIndex, Rec.Args, + Rec.CSRetVal); } } @@ -897,6 +955,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { propagate(); } checkUnresolvedFPCalls(); + checkUnresolvedVCalls(); } while (!FunctionWorklist.empty()); return buildResult(); diff --git a/test/llvm_test_code/pointers/CMakeLists.txt b/test/llvm_test_code/pointers/CMakeLists.txt index c4178a8f15..406b4b3dc4 100644 --- a/test/llvm_test_code/pointers/CMakeLists.txt +++ b/test/llvm_test_code/pointers/CMakeLists.txt @@ -4,6 +4,7 @@ set(lca_files andersen_otf_global_init.c andersen_otf_merge_load.c andersen_otf_vtable.cpp + andersen_otf_vtable2.cpp basic_01.c basic_02.c basic_03.c diff --git a/test/llvm_test_code/pointers/andersen_otf_vtable2.cpp b/test/llvm_test_code/pointers/andersen_otf_vtable2.cpp new file mode 100644 index 0000000000..4ea6f652eb --- /dev/null +++ b/test/llvm_test_code/pointers/andersen_otf_vtable2.cpp @@ -0,0 +1,22 @@ +// Two virtual methods in the same vtable. +// call_getX (slot 0) must alias @x; call_getY (slot 1) must alias @y. +// With imprecise (all-slots) vtable handling both rets would alias both +// globals; the slot-specific path must keep them separate. +struct B { + virtual int *getX(); + virtual int *getY(); +}; + +int x, y; +int *B::getX() { return &x; } +int *B::getY() { return &y; } + +static int *call_getX(B *b) { return b->getX(); } +static int *call_getY(B *b) { return b->getY(); } + +int main() { + B b; + int *px = call_getX(&b); + int *py = call_getY(&b); + return 0; +} diff --git a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp index fb6a87250a..37e1c3b004 100644 --- a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp +++ b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp @@ -892,6 +892,22 @@ TEST(AndersenOTFAATest, MergeLoadConstraint) { ExpectedResults); } +TEST(AndersenOTFAATest, VTableDispatchPrecision) { + // B has two virtual methods: getX (slot 0) returns @x, getY (slot 1) + // returns @y. Per-slot dispatch must keep the two return values separate. + const TSL RetGetX = TSL(RetVal{.InFunction = "_ZL9call_getXP1B"}); + const TSL RetGetY = TSL(RetVal{.InFunction = "_ZL9call_getYP1B"}); + const TSL X = TSL(GlobalVar{.Name = "x"}); + const TSL Y = TSL(GlobalVar{.Name = "y"}); + const GTMap ExpectedResults = { + {RetGetX, {RetGetX, X}}, + {X, {X, RetGetX}}, + {RetGetY, {RetGetY, Y}}, + {Y, {Y, RetGetY}}, + }; + doAnalysisAndCheckExact("andersen_otf_vtable2_cpp_dbg.ll", ExpectedResults); +} + } // namespace int main(int Argc, char **Argv) { From ed0b6cb4bbc7a58d320c0a1b30343e5b5080e0cd Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 27 May 2026 19:14:34 +0200 Subject: [PATCH 14/36] Fix minor bug in vtable handling + add failing test case for too early fixpoint --- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 3 +- test/llvm_test_code/pointers/CMakeLists.txt | 1 + .../andersen_otf_fp_already_processed.c | 51 +++++++++++++++++++ .../PhasarLLVM/Pointer/AndersenOTFAATest.cpp | 19 +++++++ 4 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 test/llvm_test_code/pointers/andersen_otf_fp_already_processed.c diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index 4fbe63c42f..de6f9179ec 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -692,8 +692,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { void resolveVtableCall(const llvm::CallBase *CS, ValueId VtablePtrId, uint64_t VtableIndex, const ArgList &Args, std::optional CSRetVal) { + VtablePtrId = rep(VtablePtrId); if (!Nodes.inbounds(VtablePtrId)) { - // return; llvm::report_fatal_error("Invalid Vtable Id #" + llvm::Twine(uint32_t(VtablePtrId))); } @@ -732,6 +732,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { void resolveFPCall(const llvm::CallBase *CS, ValueId FPId, const ArgList &Args, std::optional CSRetVal) { + FPId = rep(FPId); if (!Nodes.inbounds(FPId)) { llvm::report_fatal_error("Invalid FPId"); } diff --git a/test/llvm_test_code/pointers/CMakeLists.txt b/test/llvm_test_code/pointers/CMakeLists.txt index 406b4b3dc4..5b24e19728 100644 --- a/test/llvm_test_code/pointers/CMakeLists.txt +++ b/test/llvm_test_code/pointers/CMakeLists.txt @@ -3,6 +3,7 @@ set(lca_files andersen_otf_fp.c andersen_otf_global_init.c andersen_otf_merge_load.c + andersen_otf_fp_already_processed.c andersen_otf_vtable.cpp andersen_otf_vtable2.cpp basic_01.c diff --git a/test/llvm_test_code/pointers/andersen_otf_fp_already_processed.c b/test/llvm_test_code/pointers/andersen_otf_fp_already_processed.c new file mode 100644 index 0000000000..ce08b548c8 --- /dev/null +++ b/test/llvm_test_code/pointers/andersen_otf_fp_already_processed.c @@ -0,0 +1,51 @@ +// Demonstrates Bug 2: outer fixpoint exits when FunctionWorklist is empty +// even though checkUnresolvedFPCalls just grew pts for a call site that was +// already examined earlier in the same pass. +// +// Processing order (LIFO FunctionWorklist; main pushes D, A, B): +// pop B → call2 (g_fp2()) deferred, pts={}. +// pop A → call1 (g_fp1(get_y)) deferred, pts={} (D not yet run). +// pop D → relay processed (g_fp2=get_x), g_fp1=relay set. +// After D, propagation: pts(g_fp2_load)={get_x}, pts(g_fp1_load)={relay}. +// +// checkUnresolvedFPCalls: [call2, call1] +// call2: pts(g_fp2_load)={get_x} → connects get_x. ret(B) gets x. +// call1: pts(g_fp1_load)={relay} → connects relay with arg get_y +// → relay already processed → propagate → g_fp2 gains get_y. +// FunctionWorklist still empty → outer loop exits. call2 re-check skipped. +// +// Expected (sound): ret(B) must alias both x and y. + +int x, y; + +static int *get_x(void) { return &x; } +static int *get_y(void) { return &y; } + +static int *(*g_fp2)(void); +static void (*g_fp1)(int *(*)(void)); + +static void relay(int *(*cb)(void)) { g_fp2 = cb; } + +// Processed first (B pushed last by main). +// g_fp2 is still unset, so call2 deferred with pts={}. +static int *B(void) { return g_fp2(); } + +// Processed second (A pushed second by main). +// g_fp1 is still unset (D not yet run), so call1 deferred with pts={}. +static void A(void) { g_fp1(get_y); } + +// Processed third (D pushed first by main). +// Ensures relay, get_x, get_y are all processed before checkUnresolved runs. +static void D(void) { + get_x(); + get_y(); + relay(get_x); + g_fp1 = relay; +} + +int main(void) { + D(); // pushed first → bottom of stack → processed third + A(); // pushed second → processed second + B(); // pushed third → top of stack → processed first + return 0; +} diff --git a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp index 37e1c3b004..a85070036c 100644 --- a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp +++ b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp @@ -892,6 +892,25 @@ TEST(AndersenOTFAATest, MergeLoadConstraint) { ExpectedResults); } +TEST(AndersenOTFAATest, AlreadyProcessedCalleePropagation) { + // andersen_otf_fp_already_processed: main pushes D, A, B → LIFO processes + // B first (call2 deferred, pts={}), A second (call1 deferred, pts={}), + // D third (relay/get_x/get_y processed, g_fp1=relay, g_fp2=get_x set). + // checkUnresolvedFPCalls: call2 sees pts={get_x}, call1 connects already- + // processed relay with get_y → g_fp2 gains get_y — but call2 already ran. + // The outer loop must re-check so ret(B) aliases both &x and &y. + const TSL RetB = TSL(RetVal{.InFunction = "B"}); + const TSL X = TSL(GlobalVar{.Name = "x"}); + const TSL Y = TSL(GlobalVar{.Name = "y"}); + const GTMap ExpectedResults = { + {RetB, {RetB, X, Y}}, + {X, {X, RetB}}, + {Y, {Y, RetB}}, + }; + doAnalysisAndCheckExact("andersen_otf_fp_already_processed_c_dbg.ll", + ExpectedResults); +} + TEST(AndersenOTFAATest, VTableDispatchPrecision) { // B has two virtual methods: getX (slot 0) returns @x, getY (slot 1) // returns @y. Per-slot dispatch must keep the two return values separate. From 057076a046ba5309b5e56580978f91aa5a4a850d Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 27 May 2026 19:42:52 +0200 Subject: [PATCH 15/36] Let claude fix the early fixpoint bug --- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 40 +++++++++++++++--------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index de6f9179ec..6d8da26218 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -655,16 +655,16 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // ---- Call-graph co-refinement --------------------------------------- - void connectCallee(const llvm::CallBase *CS, const llvm::Function *Callee, + bool connectCallee(const llvm::CallBase *CS, const llvm::Function *Callee, llvm::ArrayRef> Args, std::optional CSRetVal) { if (Callee->isDeclaration()) { - return; + return false; } const ValueId CalleeId = getOrInsertVar(PAGVariable(Callee)); if (!ConnectedCallees[CS].insert(CalleeId).second) { - return; + return false; } if (Reachable.insert(Callee).second) { @@ -687,9 +687,10 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } propagate(); + return true; } - void resolveVtableCall(const llvm::CallBase *CS, ValueId VtablePtrId, + bool resolveVtableCall(const llvm::CallBase *CS, ValueId VtablePtrId, uint64_t VtableIndex, const ArgList &Args, std::optional CSRetVal) { VtablePtrId = rep(VtablePtrId); @@ -697,6 +698,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { llvm::report_fatal_error("Invalid Vtable Id #" + llvm::Twine(uint32_t(VtablePtrId))); } + bool NewEdge = false; // Snapshot: connectCallee→propagate() may grow pts(VtablePtrId). const RawAliasSet VPPts = Nodes[VtablePtrId].PtsSet; VPPts.foreach ([&](ValueId ObjId) { @@ -724,18 +726,20 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (!Callee || !isConsistentCall(CS, Callee)) { continue; } - connectCallee(CS, Callee, Args, CSRetVal); + NewEdge |= connectCallee(CS, Callee, Args, CSRetVal); } return true; }); + return NewEdge; } - void resolveFPCall(const llvm::CallBase *CS, ValueId FPId, + bool resolveFPCall(const llvm::CallBase *CS, ValueId FPId, const ArgList &Args, std::optional CSRetVal) { FPId = rep(FPId); if (!Nodes.inbounds(FPId)) { llvm::report_fatal_error("Invalid FPId"); } + bool NewEdge = false; // Snapshot pts(FPId): connectCallee→propagate() may grow pts(FPId). const RawAliasSet FPPts = Nodes[FPId].PtsSet; FPPts.foreach ([&](ValueId ObjId) { @@ -747,11 +751,12 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { const auto *Fun = llvm::dyn_cast_or_null(Var.getBase().valueOrNull()); if (Fun && isConsistentCall(CS, Fun)) { - connectCallee(CS, Fun, Args, CSRetVal); + NewEdge |= connectCallee(CS, Fun, Args, CSRetVal); } } return true; }); + return NewEdge; } void handleCall(const llvm::CallBase *C) { @@ -815,17 +820,21 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { }); } - void checkUnresolvedFPCalls() { + bool checkUnresolvedFPCalls() { + bool NewEdge = false; for (const auto &Rec : UnresolvedFPCalls) { - resolveFPCall(Rec.CS, Rec.FPId, Rec.Args, Rec.CSRetVal); + NewEdge |= resolveFPCall(Rec.CS, Rec.FPId, Rec.Args, Rec.CSRetVal); } + return NewEdge; } - void checkUnresolvedVCalls() { + bool checkUnresolvedVCalls() { + bool NewEdge = false; for (const auto &Rec : UnresolvedVCalls) { - resolveVtableCall(Rec.CS, Rec.VtablePtrId, Rec.VtableIndex, Rec.Args, - Rec.CSRetVal); + NewEdge |= resolveVtableCall(Rec.CS, Rec.VtablePtrId, Rec.VtableIndex, + Rec.Args, Rec.CSRetVal); } + return NewEdge; } // ---- Result construction -------------------------------------------- @@ -946,6 +955,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { AndersenOTFResult run() { initGlobals(); + bool Changed{}; do { while (!FunctionWorklist.empty()) { const auto *F = FunctionWorklist.pop_back_val(); @@ -955,9 +965,9 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { processFunction(F); propagate(); } - checkUnresolvedFPCalls(); - checkUnresolvedVCalls(); - } while (!FunctionWorklist.empty()); + Changed = checkUnresolvedFPCalls(); + Changed |= checkUnresolvedVCalls(); + } while (!FunctionWorklist.empty() || Changed); return buildResult(); } From 76020c194556dd334a0e10969ef899f7948514bc Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 27 May 2026 20:08:22 +0200 Subject: [PATCH 16/36] minor --- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 81 ++++++++++-------------- 1 file changed, 35 insertions(+), 46 deletions(-) diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index 6d8da26218..2c20583b3a 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -129,7 +129,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { ValueCompressor LocalVC{}; // internal variable+object nodes llvm::SmallVector FunctionWorklist; - llvm::DenseSet Reachable; + llvm::DenseSet Queued; // ever pushed to worklist llvm::DenseSet Processed; UnionFind SCCUf; @@ -148,7 +148,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { ValueCompressor &VC) : IRDB(IRDB), DL(IRDB.getModule()->getDataLayout()), ExternalVC(VC) { for (const auto *F : Entries) { - if (Reachable.insert(F).second) { + if (Queued.insert(F).second) { FunctionWorklist.push_back(F); } } @@ -156,13 +156,12 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // ---- Node growth ---------------------------------------------------- - NodeInfo &grow(ValueId V) { + void grow(ValueId V) { const auto Idx = size_t(V); if (Idx >= Nodes.size()) { Nodes.resize(Idx + 1); SCCUf.grow(Idx + 1); } - return Nodes[V]; } ValueId getOrInsertVar(PAGVariable Var) { @@ -177,6 +176,16 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return Id; } + // pts(VarId) for global objects: functions self-point (the address IS + // the abstract object); global variables point to their object node. + void addGlobalPointee(const llvm::GlobalObject *GO, ValueId VarId) { + if (llvm::isa(GO)) { + addPointee(VarId, VarId); + } else if (const auto *GVar = llvm::dyn_cast(GO)) { + addPointee(VarId, getOrInsertObj(PAGVariable(GVar))); + } + } + [[nodiscard]] ValueId rep(ValueId V) const { return SCCUf.find(V); } // Merges the SCCs containing A and B. Returns the new representative. @@ -282,14 +291,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (!llvm::isa(V)) { const ValueId VId = getOrInsertVar(PAGVariable(V)); - if (llvm::isa(V)) { - // Function address is its own abstract object: pts(F) = {F}. - addPointee(VId, VId); - } else if (const auto *GVar = llvm::dyn_cast(V)) { - // Global variable used as a pointer: ensure its object exists so - // pts(var_G) = {obj_G} (e.g. `return &x` where x is a global). - const ValueId OId = getOrInsertObj(PAGVariable(GVar)); - addPointee(VId, OId); + if (const auto *GO = llvm::dyn_cast(V)) { + addGlobalPointee(GO, VId); } std::invoke(Handler, VId); return; @@ -307,13 +310,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } if (const auto *GObj = llvm::dyn_cast(Op)) { const ValueId GId = getOrInsertVar(PAGVariable(GObj)); - if (llvm::isa(GObj)) { - addPointee(GId, GId); - } else if (const auto *GVar = - llvm::dyn_cast(GObj)) { - const ValueId OId = getOrInsertObj(PAGVariable(GVar)); - addPointee(GId, OId); - } + addGlobalPointee(GObj, GId); std::invoke(Handler, GId); continue; } @@ -356,8 +353,6 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { "should happen through getOrInsertVar or getOrInsertObj"); } - // grow(Src); - // grow(Dst); // grow before indexing Nodes[Src] if (Nodes[Src].AssignDstSet.insert(Dst).second) { Nodes[Src].AssignDsts.push_back(Dst); if (!Nodes[Src].PtsSet.empty()) { @@ -503,13 +498,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { for (ValueId SrcId : GCache.getOrCreate(G.getInitializer(), [&](const llvm::Value *V) { const ValueId VId = getOrInsertVar(PAGVariable(V)); - if (llvm::isa(V)) { - // Function address is its own abstract object (self-pointing). - addPointee(VId, VId); - } else if (const auto *GV = - llvm::dyn_cast(V)) { - const ValueId OId = getOrInsertObj(PAGVariable(GV)); - addPointee(VId, OId); + if (const auto *GO = llvm::dyn_cast(V)) { + addGlobalPointee(GO, VId); } return VId; })) { @@ -530,6 +520,13 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } } + void addPtrAlias(const llvm::Value *V, const llvm::Value *Src) { + forEachOpId(Src, [&](ValueId OpId) { + LocalVC.addAlias(AndersenVar{PAGVariable(V), false}, OpId); + grow(OpId); + }); + } + void processInstruction(const llvm::Instruction &I) { if (const auto *Alloca = llvm::dyn_cast(&I)) { const ValueId VarId = getOrInsertVar(PAGVariable(Alloca)); @@ -568,22 +565,15 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // Casts: alias result to stripped operand (field-insensitive). if (const auto *Cast = llvm::dyn_cast(&I)) { - if (definitelyContainsNoPointer(Cast)) { - return; + if (!definitelyContainsNoPointer(Cast)) { + addPtrAlias(Cast, Cast->getOperand(0)); } - forEachOpId(Cast->getOperand(0), [&](ValueId OpId) { - LocalVC.addAlias(AndersenVar{PAGVariable(Cast), false}, OpId); - grow(OpId); - }); return; } // GEPs: alias result to base pointer (field-insensitive). if (const auto *GEP = llvm::dyn_cast(&I)) { - forEachOpId(GEP->getPointerOperand(), [&](ValueId OpId) { - LocalVC.addAlias(AndersenVar{PAGVariable(GEP), false}, OpId); - grow(OpId); - }); + addPtrAlias(GEP, GEP->getPointerOperand()); } } @@ -667,7 +657,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return false; } - if (Reachable.insert(Callee).second) { + if (Queued.insert(Callee).second) { FunctionWorklist.push_back(Callee); } @@ -846,18 +836,16 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // Object nodes are internal only and do not appear in the external result. TypedVector> LocalToExt(NumLocal); for (auto VId : iota(NumLocal)) { - ValueId FirstExtId{}; - bool HasFirst = false; + std::optional FirstExtId; for (const auto &V : LocalVC.id2vars(VId)) { if (V.isObject()) { continue; } - if (!HasFirst) { + if (!FirstExtId) { FirstExtId = ExternalVC.insert(V.getBase()).first; - HasFirst = true; LocalToExt[VId] = FirstExtId; } else { - ExternalVC.addAlias(V.getBase(), FirstExtId); + ExternalVC.addAlias(V.getBase(), *FirstExtId); } } } @@ -911,7 +899,6 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } }); std::ranges::sort(Buf); - // Buf.erase(std::ranges::unique(Buf).begin(), Buf.end()); ObjToAliasExtVIds[Obj].insertSorted(Buf); Buf.clear(); } @@ -927,7 +914,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { continue; } if (!Nodes.inbounds(RepId)) { - break; + break; // iota is monotone; all subsequent IDs exceed Nodes.size() } // Union the pre-built per-object alias sets for all pointees. @@ -963,6 +950,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { continue; } processFunction(F); + // Drain pending pts for functions that make no pointer-relevant + // calls (connectCallee would otherwise be the only propagate site). propagate(); } Changed = checkUnresolvedFPCalls(); From a1be4936139ed80b9077e1a27fee745ace980d41 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 28 May 2026 18:18:05 +0200 Subject: [PATCH 17/36] minor --- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index 2c20583b3a..61f570a58f 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -37,6 +37,7 @@ #include #include +#include using namespace psr; @@ -452,7 +453,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // Drain before iterating Dsts: addAssignEdge inside onNewPointee/merge() // may write to Nodes[U].PendingPts while we iterate. - RawAliasSet UPending = std::move(Nodes[U].PendingPts); + RawAliasSet UPending = std::exchange(Nodes[U].PendingPts, {}); for (ValueId VSnap : Dsts) { // Re-resolve: a prior iteration's merge() may have changed the rep. @@ -757,11 +758,10 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // Build one entry per call argument: empty inner vector = non-pointer. ArgList Args; for (const auto &Arg : C->args()) { - llvm::SmallVector ArgIds; + auto &ArgIds = Args.emplace_back(); if (!definitelyContainsNoPointer(Arg.get())) { forEachOpId(Arg.get(), [&](ValueId Id) { ArgIds.push_back(Id); }); } - Args.push_back(std::move(ArgIds)); } std::optional CSRetVal; From fd3e394e7cc2e06de2d507c87a3da542e1e34ece Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 28 May 2026 19:44:24 +0200 Subject: [PATCH 18/36] Expose call-graph built by AndersenOTFAA + add some configurable soundness with extern functions --- .../phasar/PhasarLLVM/Pointer/AndersenOTFAA.h | 13 +++- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 65 ++++++++++++++++--- test/llvm_test_code/pointers/CMakeLists.txt | 1 + .../pointers/andersen_otf_extern_callback.c | 15 +++++ .../PhasarLLVM/Pointer/AndersenOTFAATest.cpp | 42 ++++++++++++ 5 files changed, 124 insertions(+), 12 deletions(-) create mode 100644 test/llvm_test_code/pointers/andersen_otf_extern_callback.c diff --git a/include/phasar/PhasarLLVM/Pointer/AndersenOTFAA.h b/include/phasar/PhasarLLVM/Pointer/AndersenOTFAA.h index ef2bde9325..3f9789e236 100644 --- a/include/phasar/PhasarLLVM/Pointer/AndersenOTFAA.h +++ b/include/phasar/PhasarLLVM/Pointer/AndersenOTFAA.h @@ -9,12 +9,14 @@ * Fabian Schiebel and others *****************************************************************************/ +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" #include "phasar/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.h" #include "phasar/PhasarLLVM/Pointer/LLVMUnionFindAA.h" #include "phasar/Pointer/RawAliasSet.h" #include "phasar/Pointer/UnionFindAA.h" #include "phasar/Utils/MaybeUniquePtr.h" #include "phasar/Utils/NonNullPtr.h" +#include "phasar/Utils/Soundness.h" #include "phasar/Utils/TypedVector.h" #include "phasar/Utils/ValueCompressor.h" @@ -36,6 +38,7 @@ class LLVMProjectIRDB; struct AndersenOTFResult { TypedVector> AliasSets; size_t NumVars{}; + LLVMBasedCallGraph CG; [[nodiscard]] static constexpr bool isCached() noexcept { return true; } [[nodiscard]] constexpr size_t size() const noexcept { return NumVars; } @@ -73,7 +76,8 @@ class AndersenOTFSolver { public: explicit AndersenOTFSolver(const LLVMProjectIRDB &IRDB, llvm::ArrayRef Entries, - ValueCompressor &VC) noexcept; + ValueCompressor &VC, + Soundness S = Soundness::Soundy) noexcept; /// Run the full OTF fixpoint and return the alias-analysis result. [[nodiscard]] AndersenOTFResult solve(); @@ -84,6 +88,7 @@ class AndersenOTFSolver { NonNullPtr IRDB; llvm::ArrayRef Entries; NonNullPtr> VC; + Soundness S; }; // ---- Factory functions ------------------------------------------------ @@ -93,13 +98,15 @@ class AndersenOTFSolver { [[nodiscard]] AndersenOTFResult computeAndersenOTFRaw( const LLVMProjectIRDB &IRDB, llvm::ArrayRef EntryPoints, - MaybeUniquePtr> VC = nullptr); + MaybeUniquePtr> VC = nullptr, + Soundness S = Soundness::Soundy); /// Runs the Andersen OTF fixpoint and returns an \c LLVMUnionFindAliasIterator /// that implements \c IsLLVMAliasIterator. [[nodiscard]] LLVMUnionFindAliasIterator computeAndersenOTF(const LLVMProjectIRDB &IRDB, llvm::ArrayRef EntryPoints, - MaybeUniquePtr> VC = nullptr); + MaybeUniquePtr> VC = nullptr, + Soundness S = Soundness::Soundy); } // namespace psr diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index 61f570a58f..47158f13b9 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -18,6 +18,7 @@ #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/IotaIterator.h" #include "phasar/Utils/LibrarySummary.h" +#include "phasar/Utils/Soundness.h" #include "phasar/Utils/UnionFind.h" #include "phasar/Utils/ValueCompressor.h" @@ -128,6 +129,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { const llvm::DataLayout &DL; // NOLINT ValueCompressor &ExternalVC; // NOLINT – caller-visible output ValueCompressor LocalVC{}; // internal variable+object nodes + Soundness SoundnessFlag; llvm::SmallVector FunctionWorklist; llvm::DenseSet Queued; // ever pushed to worklist @@ -140,17 +142,25 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { llvm::SmallVector UnresolvedVCalls; llvm::DenseMap> ConnectedCallees; + CallGraphBuilder CGBuilder; llvm::SmallVector PropWorklist; // ---- Constructor ---------------------------------------------------- SolverData(const LLVMProjectIRDB &IRDB, llvm::ArrayRef Entries, - ValueCompressor &VC) - : IRDB(IRDB), DL(IRDB.getModule()->getDataLayout()), ExternalVC(VC) { + ValueCompressor &VC, Soundness S) + : IRDB(IRDB), DL(IRDB.getModule()->getDataLayout()), ExternalVC(VC), + SoundnessFlag(S) { + + CGBuilder.reserve(IRDB.getNumFunctions()); for (const auto *F : Entries) { if (Queued.insert(F).second) { FunctionWorklist.push_back(F); + + // entry functions may be missed in the CG, if they are never called + // explicitly in the code + std::ignore = CGBuilder.addFunctionVertex(F); } } } @@ -646,10 +656,43 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // ---- Call-graph co-refinement --------------------------------------- + // For each argument, add every function in pts(ArgId) to the worklist + // as an entry point. Used when a callee is a declaration and we want to + // treat fn-ptr arguments as reachable callbacks (Soundy / Sound mode). + void addFnPtrArgsAsEntries( + llvm::ArrayRef> Args) { + for (const auto &ArgIds : Args) { + for (ValueId ArgId : ArgIds) { + ArgId = rep(ArgId); + if (!Nodes.inbounds(ArgId)) { + continue; + } + Nodes[ArgId].PtsSet.foreach ([&](ValueId ObjId) { + if (!Nodes.inbounds(ObjId)) { + return false; + } + for (const auto &Var : LocalVC.id2vars(ObjId)) { + const auto *Fun = llvm::dyn_cast_or_null( + Var.getBase().valueOrNull()); + if (Fun && !Fun->isDeclaration() && + Queued.insert(Fun).second) { + FunctionWorklist.push_back(Fun); + std::ignore = CGBuilder.addFunctionVertex(Fun); + } + } + return true; + }); + } + } + } + bool connectCallee(const llvm::CallBase *CS, const llvm::Function *Callee, llvm::ArrayRef> Args, std::optional CSRetVal) { if (Callee->isDeclaration()) { + if (SoundnessFlag != Soundness::Unsound) { + addFnPtrArgsAsEntries(Args); + } return false; } @@ -657,6 +700,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (!ConnectedCallees[CS].insert(CalleeId).second) { return false; } + CGBuilder.addCallEdge(CS, Callee); if (Queued.insert(Callee).second) { FunctionWorklist.push_back(Callee); @@ -934,6 +978,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } } + Result.CG = CGBuilder.consumeCallGraph(); return Result; } @@ -966,11 +1011,11 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { AndersenOTFSolver::AndersenOTFSolver( const LLVMProjectIRDB &IRDB, llvm::ArrayRef Entries, - ValueCompressor &VC) noexcept - : IRDB(IRDB), Entries(Entries), VC(VC) {} + ValueCompressor &VC, Soundness S) noexcept + : IRDB(IRDB), Entries(Entries), VC(VC), S(S) {} AndersenOTFResult AndersenOTFSolver::solve() { - SolverData Impl{*IRDB, Entries, *VC}; + SolverData Impl{*IRDB, Entries, *VC, S}; return Impl.run(); } @@ -979,22 +1024,24 @@ AndersenOTFResult AndersenOTFSolver::solve() { AndersenOTFResult psr::computeAndersenOTFRaw(const LLVMProjectIRDB &IRDB, llvm::ArrayRef EntryPoints, - MaybeUniquePtr> VC) { + MaybeUniquePtr> VC, + Soundness S) { if (!VC) { VC = std::make_unique>(); } - AndersenOTFSolver Solver(IRDB, EntryPoints, *VC); + AndersenOTFSolver Solver(IRDB, EntryPoints, *VC, S); return Solver.solve(); } LLVMUnionFindAliasIterator psr::computeAndersenOTF(const LLVMProjectIRDB &IRDB, llvm::ArrayRef EntryPoints, - MaybeUniquePtr> VC) { + MaybeUniquePtr> VC, + Soundness S) { if (!VC) { VC = std::make_unique>(); } - AndersenOTFSolver Solver(IRDB, EntryPoints, *VC); + AndersenOTFSolver Solver(IRDB, EntryPoints, *VC, S); auto Res = Solver.solve(); return LLVMUnionFindAliasIterator{std::move(Res), std::move(VC)}; } diff --git a/test/llvm_test_code/pointers/CMakeLists.txt b/test/llvm_test_code/pointers/CMakeLists.txt index 5b24e19728..266dcc33fc 100644 --- a/test/llvm_test_code/pointers/CMakeLists.txt +++ b/test/llvm_test_code/pointers/CMakeLists.txt @@ -4,6 +4,7 @@ set(lca_files andersen_otf_global_init.c andersen_otf_merge_load.c andersen_otf_fp_already_processed.c + andersen_otf_extern_callback.c andersen_otf_vtable.cpp andersen_otf_vtable2.cpp basic_01.c diff --git a/test/llvm_test_code/pointers/andersen_otf_extern_callback.c b/test/llvm_test_code/pointers/andersen_otf_extern_callback.c new file mode 100644 index 0000000000..b6b08eb29c --- /dev/null +++ b/test/llvm_test_code/pointers/andersen_otf_extern_callback.c @@ -0,0 +1,15 @@ +// Soundness test: close_stdout is passed as a fn-ptr arg to the external +// register_callback (a declaration). At Soundy/Sound, the solver must +// treat close_stdout as a reachable entry point and analyse its body, +// discovering flush_impl as a callee. At Unsound neither should appear. +void flush_impl(void) {} + +void close_stdout(void) { flush_impl(); } + +// External: only a declaration, body not available in this module. +void register_callback(void (*f)(void)); + +int main(void) { + register_callback(close_stdout); + return 0; +} diff --git a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp index a85070036c..b432aed2d1 100644 --- a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp +++ b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp @@ -927,6 +927,48 @@ TEST(AndersenOTFAATest, VTableDispatchPrecision) { doAnalysisAndCheckExact("andersen_otf_vtable2_cpp_dbg.ll", ExpectedResults); } +TEST(AndersenOTFAATest, SoundnessFnPtrToExternalDecl) { + // andersen_otf_extern_callback: main passes @close_stdout to the + // declaration-only register_callback. close_stdout calls flush_impl. + // + // Soundy: both must appear as CG vertices (entry-point promotion). + // Unsound: neither must appear (no processing of external callbacks). + auto IRDB = LLVMProjectIRDB::loadOrExit( + PathToLLFiles + "andersen_otf_extern_callback_c_dbg.ll"); + + const auto *CloseStdout = IRDB.getFunctionDefinition("close_stdout"); + const auto *FlushImpl = IRDB.getFunctionDefinition("flush_impl"); + const auto *MainFn = IRDB.getFunctionDefinition("main"); + ASSERT_NE(MainFn, nullptr); + ASSERT_NE(CloseStdout, nullptr); + ASSERT_NE(FlushImpl, nullptr); + + auto HasCGVertex = [](const LLVMBasedCallGraph &Graph, + const llvm::Function *Fun) { + return llvm::is_contained(Graph.getAllVertexFunctions(), Fun); + }; + + { + auto Cmp = std::make_unique>(); + auto Res = computeAndersenOTFRaw(IRDB, {MainFn}, Cmp.get(), + Soundness::Soundy); + EXPECT_TRUE(HasCGVertex(Res.CG, CloseStdout)) + << "close_stdout must be a CG vertex at Soundy"; + EXPECT_TRUE(HasCGVertex(Res.CG, FlushImpl)) + << "flush_impl must be a CG vertex at Soundy"; + } + + { + auto Cmp = std::make_unique>(); + auto Res = computeAndersenOTFRaw(IRDB, {MainFn}, Cmp.get(), + Soundness::Unsound); + EXPECT_FALSE(HasCGVertex(Res.CG, CloseStdout)) + << "close_stdout must not be a CG vertex at Unsound"; + EXPECT_FALSE(HasCGVertex(Res.CG, FlushImpl)) + << "flush_impl must not be a CG vertex at Unsound"; + } +} + } // namespace int main(int Argc, char **Argv) { From c13038798d97e3f84174473de0a43c2d255ea4ba Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sat, 30 May 2026 12:09:30 +0200 Subject: [PATCH 19/36] Debug missing callees in AndersenOTFAA --- .../ControlFlow/Resolver/Resolver.cpp | 4 +- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 19 +++++---- test/llvm_test_code/pointers/CMakeLists.txt | 1 + .../pointers/andersen_otf_fp_struct_field.c | 24 ++++++++++++ .../PhasarLLVM/Pointer/AndersenOTFAATest.cpp | 39 +++++++++++++++++++ 5 files changed, 76 insertions(+), 11 deletions(-) create mode 100644 test/llvm_test_code/pointers/andersen_otf_fp_struct_field.c diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index db0423a194..b6791285ce 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -77,7 +77,9 @@ psr::getVFTIndexAndVT(const llvm::CallBase *CallSite) { const auto *GEP = llvm::dyn_cast(Load->getPointerOperand()); - if (GEP == nullptr) { + // Vtable GEPs index into a pointer array with a single index. + // Multi-index GEPs (e.g. struct field access) are not vtable patterns. + if (GEP == nullptr || GEP->getNumOperands() != 2) { return std::nullopt; } diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index 47158f13b9..d77ec11d03 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -659,8 +659,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // For each argument, add every function in pts(ArgId) to the worklist // as an entry point. Used when a callee is a declaration and we want to // treat fn-ptr arguments as reachable callbacks (Soundy / Sound mode). - void addFnPtrArgsAsEntries( - llvm::ArrayRef> Args) { + void + addFnPtrArgsAsEntries(llvm::ArrayRef> Args) { for (const auto &ArgIds : Args) { for (ValueId ArgId : ArgIds) { ArgId = rep(ArgId); @@ -674,8 +674,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { for (const auto &Var : LocalVC.id2vars(ObjId)) { const auto *Fun = llvm::dyn_cast_or_null( Var.getBase().valueOrNull()); - if (Fun && !Fun->isDeclaration() && - Queued.insert(Fun).second) { + if (Fun && !Fun->isDeclaration() && Queued.insert(Fun).second) { FunctionWorklist.push_back(Fun); std::ignore = CGBuilder.addFunctionVertex(Fun); } @@ -689,6 +688,12 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { bool connectCallee(const llvm::CallBase *CS, const llvm::Function *Callee, llvm::ArrayRef> Args, std::optional CSRetVal) { + const ValueId CalleeId = getOrInsertVar(PAGVariable(Callee)); + if (!ConnectedCallees[CS].insert(CalleeId).second) { + return false; + } + CGBuilder.addCallEdge(CS, Callee); + if (Callee->isDeclaration()) { if (SoundnessFlag != Soundness::Unsound) { addFnPtrArgsAsEntries(Args); @@ -696,12 +701,6 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return false; } - const ValueId CalleeId = getOrInsertVar(PAGVariable(Callee)); - if (!ConnectedCallees[CS].insert(CalleeId).second) { - return false; - } - CGBuilder.addCallEdge(CS, Callee); - if (Queued.insert(Callee).second) { FunctionWorklist.push_back(Callee); } diff --git a/test/llvm_test_code/pointers/CMakeLists.txt b/test/llvm_test_code/pointers/CMakeLists.txt index 266dcc33fc..3648a52708 100644 --- a/test/llvm_test_code/pointers/CMakeLists.txt +++ b/test/llvm_test_code/pointers/CMakeLists.txt @@ -5,6 +5,7 @@ set(lca_files andersen_otf_merge_load.c andersen_otf_fp_already_processed.c andersen_otf_extern_callback.c + andersen_otf_fp_struct_field.c andersen_otf_vtable.cpp andersen_otf_vtable2.cpp basic_01.c diff --git a/test/llvm_test_code/pointers/andersen_otf_fp_struct_field.c b/test/llvm_test_code/pointers/andersen_otf_fp_struct_field.c new file mode 100644 index 0000000000..1566578d20 --- /dev/null +++ b/test/llvm_test_code/pointers/andersen_otf_fp_struct_field.c @@ -0,0 +1,24 @@ +// Test: function pointer stored in a struct field via an initializer function, +// then retrieved and called indirectly. Mirrors the obstack chunkfun pattern. +// Expected: the indirect call in do_call() must have target() as a callee. + +struct Ctx { + void *(*fn)(void *); +}; + +static void *target(void *arg) { return arg; } + +static void init_ctx(struct Ctx *ctx, void *(*fn)(void *)) { + ctx->fn = fn; +} + +static void *do_call(struct Ctx *ctx, void *arg) { + return ctx->fn(arg); // indirect call via struct field +} + +int main(void) { + struct Ctx ctx; + init_ctx(&ctx, target); + do_call(&ctx, (void *)0); + return 0; +} diff --git a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp index b432aed2d1..fd7dd62136 100644 --- a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp +++ b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp @@ -13,7 +13,9 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/Support/raw_ostream.h" #include "SrcCodeLocationEntry.h" @@ -969,6 +971,43 @@ TEST(AndersenOTFAATest, SoundnessFnPtrToExternalDecl) { } } +TEST(AndersenOTFAATest, FnPtrStoredInStructField) { + // Function pointer stored into a struct field by an initializer, then + // retrieved and called indirectly. The indirect call in do_call() must + // have target() as a callee. + auto IRDB = LLVMProjectIRDB::loadOrExit( + PathToLLFiles + "andersen_otf_fp_struct_field_c_dbg.ll"); + + const auto *DoCall = IRDB.getFunctionDefinition("do_call"); + const auto *Target = IRDB.getFunctionDefinition("target"); + const auto *MainFn = IRDB.getFunctionDefinition("main"); + ASSERT_NE(MainFn, nullptr); + ASSERT_NE(DoCall, nullptr); + ASSERT_NE(Target, nullptr); + + auto Cmp = std::make_unique>(); + auto Res = computeAndersenOTFRaw(IRDB, {MainFn}, Cmp.get()); + + // Find the indirect call instruction in do_call. + const llvm::CallBase *IndirectCS = nullptr; + for (const auto &I : llvm::instructions(DoCall)) { + const auto *CS = llvm::dyn_cast(&I); + if (!CS || CS->isDebugOrPseudoInst()) { + continue; + } + if (!llvm::isa( + CS->getCalledOperand()->stripPointerCastsAndAliases())) { + IndirectCS = CS; + break; + } + } + ASSERT_NE(IndirectCS, nullptr) << "No indirect call found in do_call"; + + const auto &Callees = Res.CG.getCalleesOfCallAt(IndirectCS); + EXPECT_TRUE(llvm::is_contained(Callees, Target)) + << "target() must be a callee of the indirect call in do_call()"; +} + } // namespace int main(int Argc, char **Argv) { From b47acc1346bcf93bc9d4de390142ab9c5deb5cd5 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sat, 30 May 2026 13:25:12 +0200 Subject: [PATCH 20/36] minor perf improvement --- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 30 ++++++++++++++++++------ 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index d77ec11d03..f4b16f073a 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -472,14 +472,30 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { continue; } - bool AddedAny = false; - UPending.foreach ([&](ValueId Obj) { - if (Nodes[V].PtsSet.tryInsert(Obj)) { - Nodes[V].PendingPts.insert(Obj); - onNewPointee(V, Obj); - AddedAny = true; + const bool AddedAny = [&] { + bool AddedAny = false; + constexpr size_t DiffThreshold = 32; + // operator- is expensive, but it is definitely a lot faster than the + // foreach loop if UPending is large + if (UPending.size() > DiffThreshold) { + auto Diff = UPending - Nodes[V].PtsSet; + AddedAny = !Diff.empty(); + if (AddedAny) { + Nodes[V].PtsSet |= Diff; + Nodes[V].PendingPts |= Diff; + Diff.foreach ([this, V](ValueId Obj) { onNewPointee(V, Obj); }); + } + } else { + UPending.foreach ([&](ValueId Obj) { + if (Nodes[V].PtsSet.tryInsert(Obj)) { + Nodes[V].PendingPts.insert(Obj); + onNewPointee(V, Obj); + AddedAny = true; + } + }); } - }); + return AddedAny; + }(); if (!AddedAny) { // LCD: V has all of U's pending wave, so V.PtsSet ⊇ U.PtsSet. if (Nodes[V].AssignDstSet.contains(U)) { From 098d60bfa116aaa9b4d227ac523cd185bca47832 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sat, 30 May 2026 14:55:13 +0200 Subject: [PATCH 21/36] minor --- include/phasar/Pointer/RawAliasSet.h | 8 +++++++- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 22 ++++++++++++---------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/include/phasar/Pointer/RawAliasSet.h b/include/phasar/Pointer/RawAliasSet.h index 146846b221..279e956805 100644 --- a/include/phasar/Pointer/RawAliasSet.h +++ b/include/phasar/Pointer/RawAliasSet.h @@ -43,6 +43,7 @@ concept IsRawAliasSet = requires(ASet &MutSet, const ASet &ConstSet, MutSet |= ConstSet; MutSet &= ConstSet; MutSet -= ConstSet; + { ConstSet - ConstSet } -> std::convertible_to; { ConstSet == ConstSet } noexcept -> std::convertible_to; { ConstSet != ConstSet } noexcept -> std::convertible_to; { MutSet.tryMergeWith(ConstSet) } -> std::convertible_to; @@ -92,6 +93,12 @@ template class LLVMRawAliasSet { Bits.intersectWithComplement(Other.Bits); } + [[nodiscard]] LLVMRawAliasSet operator-(const LLVMRawAliasSet &Other) const { + LLVMRawAliasSet Ret; + Ret.Bits = Bits - Other.Bits; + return Ret; + } + [[nodiscard]] bool empty() const noexcept { return Bits.empty(); } [[nodiscard]] size_t size() const noexcept { return Bits.count(); } @@ -112,7 +119,6 @@ template class LLVMRawAliasSet { private: llvm::SparseBitVector<> Bits; - // TODO: roaring::Roaring Bits; }; template class RoaringAliasSet { diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index f4b16f073a..8f737ec952 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -231,15 +231,17 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } // Merge pts sets. - const auto OldRepPts = Nodes[Rep].PtsSet; - const bool PtsGrew = Nodes[Rep].PtsSet.tryMergeWith(NRPts); - if (PtsGrew) { - Nodes[Rep].PendingPts |= NRPts; - PropWorklist.push_back(Rep); - // Fire Rep's pre-existing load/store/memcopy constraints for pointees - // absorbed from NonRep that Rep didn't previously have. - const auto Diff = NRPts - OldRepPts; - Diff.foreach ([&](ValueId NewObj) { onNewPointee(Rep, NewObj); }); + { + const auto OldRepPts = Nodes[Rep].PtsSet; + const bool PtsGrew = Nodes[Rep].PtsSet.tryMergeWith(NRPts); + if (PtsGrew) { + // Fire Rep's pre-existing load/store/memcopy constraints for pointees + // absorbed from NonRep that Rep didn't previously have. + const auto Diff = NRPts - OldRepPts; + Nodes[Rep].PendingPts |= Diff; + PropWorklist.push_back(Rep); + Diff.foreach ([&](ValueId NewObj) { onNewPointee(Rep, NewObj); }); + } } // Snapshot Rep's pts (after merge) for retroactive constraint firing. @@ -474,7 +476,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { const bool AddedAny = [&] { bool AddedAny = false; - constexpr size_t DiffThreshold = 32; + constexpr size_t DiffThreshold = 16; // operator- is expensive, but it is definitely a lot faster than the // foreach loop if UPending is large if (UPending.size() > DiffThreshold) { From c8621e7120f5588d4f951739354abfd297bea55f Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 3 Jun 2026 18:34:38 +0200 Subject: [PATCH 22/36] Add library-summary handling to AndersenOTFAA. XXX: Should we allow passing-in an instance of LLVMFunctionDataFlowFacts? --- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 47 ++++++++++++++++++- test/llvm_test_code/pointers/CMakeLists.txt | 1 + .../pointers/andersen_otf_libc.c | 12 +++++ .../PhasarLLVM/Pointer/AndersenOTFAATest.cpp | 21 +++++++++ 4 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 test/llvm_test_code/pointers/andersen_otf_libc.c diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index 8f737ec952..e0cfea22f5 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -15,8 +15,10 @@ #include "phasar/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.h" #include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMVFTable.h" +#include "phasar/PhasarLLVM/Utils/LLVMFunctionDataFlowFacts.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/IotaIterator.h" +#include "phasar/Utils/LibCSummary.h" #include "phasar/Utils/LibrarySummary.h" #include "phasar/Utils/Soundness.h" #include "phasar/Utils/UnionFind.h" @@ -130,6 +132,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { ValueCompressor &ExternalVC; // NOLINT – caller-visible output ValueCompressor LocalVC{}; // internal variable+object nodes Soundness SoundnessFlag; + library_summary::LLVMFunctionDataFlowFacts LibFacts; llvm::SmallVector FunctionWorklist; llvm::DenseSet Queued; // ever pushed to worklist @@ -151,7 +154,10 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { llvm::ArrayRef Entries, ValueCompressor &VC, Soundness S) : IRDB(IRDB), DL(IRDB.getModule()->getDataLayout()), ExternalVC(VC), - SoundnessFlag(S) { + SoundnessFlag(S), LibFacts(library_summary::readFromFDFF( + getLibCSummary(), [&IRDB](llvm::StringRef Name) { + return IRDB.getFunction(Name); + })) { CGBuilder.reserve(IRDB.getNumFunctions()); for (const auto *F : Entries) { @@ -703,6 +709,41 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } } + void applyLibrarySummary( + const library_summary::LLVMFunctionDataFlowFacts::ParameterMappingTy + &LibSum, + const llvm::Function *Fun, + llvm::ArrayRef> Args, + std::optional CSRetVal) { + const size_t NumParams = Fun->arg_size(); + for (const auto &[ParamIdx, Dests] : LibSum) { + if (ParamIdx >= NumParams || ParamIdx >= Args.size() || + !Fun->getArg(ParamIdx)->getType()->isPointerTy()) { + continue; + } + for (const auto &DestFact : Dests) { + if (const auto *DestParam = + DestFact.dyn_cast()) { + if (DestParam->Index >= Args.size()) { + continue; + } + for (ValueId DstId : Args[DestParam->Index]) { + for (ValueId SrcId : Args[ParamIdx]) { + addStore(DstId, SrcId); + } + } + } else { + if (!CSRetVal) { + continue; + } + for (ValueId SrcId : Args[ParamIdx]) { + addAssignEdge(SrcId, *CSRetVal); + } + } + } + } + } + bool connectCallee(const llvm::CallBase *CS, const llvm::Function *Callee, llvm::ArrayRef> Args, std::optional CSRetVal) { @@ -713,6 +754,10 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { CGBuilder.addCallEdge(CS, Callee); if (Callee->isDeclaration()) { + if (const auto *LibSum = LibFacts.getFactsForFunctionOrNull(Callee)) { + applyLibrarySummary(*LibSum, Callee, Args, CSRetVal); + return false; + } if (SoundnessFlag != Soundness::Unsound) { addFnPtrArgsAsEntries(Args); } diff --git a/test/llvm_test_code/pointers/CMakeLists.txt b/test/llvm_test_code/pointers/CMakeLists.txt index 3648a52708..b278fc82cd 100644 --- a/test/llvm_test_code/pointers/CMakeLists.txt +++ b/test/llvm_test_code/pointers/CMakeLists.txt @@ -56,6 +56,7 @@ set(lca_files set(lca_files_mem2reg andersen_otf_interproc.c andersen_otf_fp.c + andersen_otf_libc.c basic_01.c basic_02.c basic_03.c diff --git a/test/llvm_test_code/pointers/andersen_otf_libc.c b/test/llvm_test_code/pointers/andersen_otf_libc.c new file mode 100644 index 0000000000..1d7ab0440e --- /dev/null +++ b/test/llvm_test_code/pointers/andersen_otf_libc.c @@ -0,0 +1,12 @@ +// strcpy(dst, src) library summary: +// param 0 (dst) -> ReturnValue => ret aliases dst +// param 1 (src) -> Parameter{0} => *dst = src +// The return value of strcpy must alias buf (arg 0). +#include + +int main(void) { + char buf[64]; + char *p = strcpy(buf, "hello"); + (void)p; + return 0; +} diff --git a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp index fd7dd62136..41f7cc15f4 100644 --- a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp +++ b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp @@ -971,6 +971,27 @@ TEST(AndersenOTFAATest, SoundnessFnPtrToExternalDecl) { } } +TEST(AndersenOTFAATest, LibCSummaryStrcpyReturnAliasesDst) { + // strcpy(buf, "hello") summary: param 0 (dst) -> ReturnValue. + // The call result must alias buf (arg 0); they share the same buffer object. + // This exercises the ReturnValue branch of applyLibrarySummary(). + const TSL Call = TSL(LineColFunOp{.Line = 9, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const TSL Buf = TSL(OperandOf{ + .OperandIndex = 0, + .Inst = LineColFunOp{.Line = 9, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const GTMap ExpectedResults = { + {Call, {Call, Buf}}, + {Buf, {Buf, Call}}, + }; + doAnalysisAndCheckExact("andersen_otf_libc_c_m2r_dbg.ll", ExpectedResults); +} + TEST(AndersenOTFAATest, FnPtrStoredInStructField) { // Function pointer stored into a struct field by an initializer, then // retrieved and called indirectly. The indirect call in do_call() must From 9876945ea0f7dc659319e7ffadc94d70f5bfa0ad Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 3 Jun 2026 19:58:03 +0200 Subject: [PATCH 23/36] Best-effort approach to more precisely handle calls through hand-rolled vtables --- .../ControlFlow/Resolver/Resolver.h | 13 +++ .../phasar/PhasarLLVM/Utils/LLVMShorthands.h | 13 +++ .../ControlFlow/Resolver/Resolver.cpp | 23 +++++ lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 95 +++++++++++++++++++ lib/PhasarLLVM/Utils/LLVMShorthands.cpp | 33 +++++++ test/llvm_test_code/pointers/CMakeLists.txt | 1 + .../pointers/andersen_otf_struct_vtable.c | 17 ++++ .../PhasarLLVM/Pointer/AndersenOTFAATest.cpp | 40 ++++++++ 8 files changed, 235 insertions(+) create mode 100644 test/llvm_test_code/pointers/andersen_otf_struct_vtable.c diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index b2cba8ae3a..12b91c72c0 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -27,6 +27,7 @@ #include #include #include +#include namespace llvm { class Instruction; @@ -50,6 +51,18 @@ getVFTIndex(const llvm::CallBase *CallSite); [[nodiscard]] std::optional> getVFTIndexAndVT(const llvm::CallBase *CallSite); +/// Detects the pattern \c call(load(GEP(base, const_indices...))) with a +/// typed (>=3-operand) GEP, i.e. an indirect call through a struct function +/// pointer field. Distinct from the 2-operand raw-pointer C++ vptr case +/// handled by \c getVFTIndexAndVT. +/// +/// Returns \c {base_ptr, all_GEP_indices, gep_source_elem_ty} on match, +/// or \c std::nullopt otherwise. +[[nodiscard]] std::optional< + std::tuple, + llvm::Type *>> +getStructVCallInfo(const llvm::CallBase *CallSite); + /// Assuming that `CallSite` is a call to a non-static member function, /// retrieves the type of the receiver. Returns nullptr, if the receiver-type /// could not be extracted diff --git a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h index dcad26415a..371fe5d196 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h @@ -19,6 +19,7 @@ #include "phasar/Utils/Utilities.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instruction.h" @@ -360,6 +361,18 @@ getVaListTagOrNull(const llvm::Function &Fun); [[nodiscard]] bool isVaListAlloca(const llvm::AllocaInst &Alloc); [[nodiscard]] const llvm::DIType *stripPointerTypes(const llvm::DIType *DITy); + +/// Walk a constant initializer along a GEP index path and return the +/// \c Function* at the leaf, or nullptr. +/// +/// \p Indices mirrors GEP index semantics: +/// - \c Indices[0] is the outer "pointer-array" index: +/// \c ConstantArray -> selects the element; \c ConstantStruct -> +/// must be 0 (pointer-arithmetic no-op, struct is not an array). +/// - \c Indices[1+] navigate recursively through ConstantAggregate. +[[nodiscard]] const llvm::Function * +walkConstInitPath(const llvm::Constant *Init, + llvm::ArrayRef Indices); } // namespace psr #endif diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index b6791285ce..47b8e99f7e 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -37,12 +37,14 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include #include +#include using namespace psr; @@ -320,3 +322,24 @@ Resolver::create(CallGraphAnalysisType Ty, const LLVMProjectIRDB *IRDB, llvm_unreachable("All possible callgraph algorithms should be handled in the " "above switch"); } + +std::optional, + llvm::Type *>> +psr::getStructVCallInfo(const llvm::CallBase *CallSite) { + const auto *Load = + llvm::dyn_cast(CallSite->getCalledOperand()); + if (!Load) { + return std::nullopt; + } + const auto *GEP = + llvm::dyn_cast(Load->getPointerOperand()); + if (!GEP || GEP->getNumOperands() < 3 || !GEP->hasAllConstantIndices()) { + return std::nullopt; + } + llvm::SmallVector Indices; + for (const llvm::Use &Idx : GEP->indices()) { + Indices.push_back(llvm::cast(Idx.get())->getZExtValue()); + } + return {{GEP->getPointerOperand(), std::move(Indices), + GEP->getSourceElementType()}}; +} diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index e0cfea22f5..8c9e5e9275 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -31,6 +31,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -125,6 +126,16 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { std::optional CSRetVal; }; + struct StructVCallRecord { + const llvm::CallBase *CS; + ValueId BaseId; // pts(BaseId) = struct objects + ValueId FPId; // pts(FPId) = fn objects (field-insensitive fallback) + llvm::SmallVector Indices; // all GEP indices + llvm::Type *GEPElemTy; // GEP source element type (for type check) + ArgList Args; + std::optional CSRetVal; + }; + // ---- Data fields ---------------------------------------------------- const LLVMProjectIRDB &IRDB; // NOLINT @@ -143,6 +154,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { llvm::SmallVector UnresolvedFPCalls; llvm::SmallVector UnresolvedVCalls; + llvm::SmallVector UnresolvedStructVCalls; llvm::DenseMap> ConnectedCallees; CallGraphBuilder CGBuilder; @@ -830,6 +842,55 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return NewEdge; } + bool resolveStructVCall(const StructVCallRecord &Rec) { + const ValueId BaseId = rep(Rec.BaseId); + if (!Nodes.inbounds(BaseId)) { + llvm::report_fatal_error("Invalid BaseId in resolveStructVCall"); + } + bool NewEdge = false; + bool NeedFPFallback = false; + // Snapshot: connectCallee->propagate() may grow pts(BaseId). + const RawAliasSet BasePts = Nodes[BaseId].PtsSet; + BasePts.foreach ([&](ValueId ObjId) { + if (!Nodes.inbounds(ObjId)) { + return false; + } + for (const auto &Var : LocalVC.id2vars(ObjId)) { + // Resolve GlobalAlias to the underlying GlobalVariable. + const llvm::Value *Val = Var.getBase().valueOrNull(); + if (const auto *GA = llvm::dyn_cast_or_null(Val)) { + Val = GA->getAliaseeObject(); + } + const auto *GV = llvm::dyn_cast_or_null(Val); + if (!GV || !GV->isConstant() || !GV->hasInitializer()) { + NeedFPFallback = true; + continue; + } + // Type check: GV must be of GEPElemTy or [N x GEPElemTy]. + // Field-insensitive aliasing can put wrong-type objects in pts. + llvm::Type *const GVTy = GV->getValueType(); + if (GVTy != Rec.GEPElemTy) { + const auto *ArrTy = llvm::dyn_cast(GVTy); + if (!ArrTy || ArrTy->getElementType() != Rec.GEPElemTy) { + NeedFPFallback = true; + continue; + } + } + const auto *Callee = + walkConstInitPath(GV->getInitializer(), Rec.Indices); + if (!Callee || !isConsistentCall(Rec.CS, Callee)) { + continue; + } + NewEdge |= connectCallee(Rec.CS, Callee, Rec.Args, Rec.CSRetVal); + } + return true; + }); + if (NeedFPFallback) { + NewEdge |= resolveFPCall(Rec.CS, Rec.FPId, Rec.Args, Rec.CSRetVal); + } + return NewEdge; + } + bool resolveFPCall(const llvm::CallBase *CS, ValueId FPId, const ArgList &Args, std::optional CSRetVal) { FPId = rep(FPId); @@ -905,6 +966,31 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return; } + // Struct-field vtable call: call(load(GEP(base, const_indices...))) + // with a typed (>=3-operand) GEP. Resolve via global initializer for + // const globals; fall back to FP resolution for non-const objects. + if (auto SVInfo = getStructVCallInfo(C)) { + auto &[BasePtr, Indices, GEPElemTy] = *SVInfo; + const auto *Load = llvm::cast(C->getCalledOperand()); + const ValueId BaseId = getOrInsertVar(PAGVariable(BasePtr)); + const ValueId FPId = getOrInsertVar(PAGVariable(Load)); + StructVCallRecord Rec{ + .CS = C, + .BaseId = BaseId, + .FPId = FPId, + .Indices = std::move(Indices), + .GEPElemTy = GEPElemTy, + .Args = std::move(Args), + .CSRetVal = CSRetVal, + }; + resolveStructVCall(Rec); + // llvm::errs() << "[handleCall]: Adding struct-vcall-record #" + // << UnresolvedStructVCalls.size() << " at " + // << llvmIRToString(C) << '\n'; + UnresolvedStructVCalls.push_back(std::move(Rec)); + return; + } + // Indirect call: connect already-known targets, record for fixpoint. const ValueId FPId = getOrInsertVar(PAGVariable(FnPtr)); resolveFPCall(C, FPId, Args, CSRetVal); @@ -933,6 +1019,14 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { return NewEdge; } + bool checkUnresolvedStructVCalls() { + bool NewEdge = false; + for (const auto &Rec : UnresolvedStructVCalls) { + NewEdge |= resolveStructVCall(Rec); + } + return NewEdge; + } + // ---- Result construction -------------------------------------------- AndersenOTFResult buildResult() { @@ -1063,6 +1157,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } Changed = checkUnresolvedFPCalls(); Changed |= checkUnresolvedVCalls(); + Changed |= checkUnresolvedStructVCalls(); } while (!FunctionWorklist.empty() || Changed); return buildResult(); diff --git a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp index adca056732..91eab08530 100644 --- a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp +++ b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp @@ -734,3 +734,36 @@ const llvm::DIType *psr::stripPointerTypes(const llvm::DIType *DITy) { } return DITy; } + +const llvm::Function * +psr::walkConstInitPath(const llvm::Constant *Init, + llvm::ArrayRef Indices) { + if (Indices.empty()) { + return llvm::dyn_cast( + Init->stripPointerCastsAndAliases()); + } + const uint64_t Idx0 = Indices[0]; + const llvm::Constant *Elem = nullptr; + if (const auto *CA = llvm::dyn_cast(Init)) { + if (Idx0 >= CA->getNumOperands()) { + return nullptr; + } + Elem = CA->getOperand(Idx0); + } else if (llvm::isa(Init)) { + if (Idx0 != 0) { + return nullptr; + } + Elem = Init; // struct: idx0 is pointer-arithmetic no-op, stay here + } else { + return nullptr; + } + for (const uint64_t Idx : Indices.drop_front(1)) { + const auto *Agg = llvm::dyn_cast(Elem); + if (!Agg || Idx >= Agg->getNumOperands()) { + return nullptr; + } + Elem = Agg->getOperand(Idx); + } + return llvm::dyn_cast_or_null( + Elem->stripPointerCastsAndAliases()); +} diff --git a/test/llvm_test_code/pointers/CMakeLists.txt b/test/llvm_test_code/pointers/CMakeLists.txt index b278fc82cd..664f175ef4 100644 --- a/test/llvm_test_code/pointers/CMakeLists.txt +++ b/test/llvm_test_code/pointers/CMakeLists.txt @@ -57,6 +57,7 @@ set(lca_files_mem2reg andersen_otf_interproc.c andersen_otf_fp.c andersen_otf_libc.c + andersen_otf_struct_vtable.c basic_01.c basic_02.c basic_03.c diff --git a/test/llvm_test_code/pointers/andersen_otf_struct_vtable.c b/test/llvm_test_code/pointers/andersen_otf_struct_vtable.c new file mode 100644 index 0000000000..0c25016a20 --- /dev/null +++ b/test/llvm_test_code/pointers/andersen_otf_struct_vtable.c @@ -0,0 +1,17 @@ +// Test: hand-rolled C vtable via const struct global. +// ops->write(...) must resolve precisely to myWrite, not myRead. +// Field-insensitive analysis would add both; the struct-vtable path +// reads the initializer at the specific field index. + +static int myRead(void *ctx) { return 0; } +static int myWrite(void *ctx, int v) { return v; } + +struct Ops { int (*read)(void *); int (*write)(void *, int); }; + +static const struct Ops myOps = { myRead, myWrite }; + +int dispatch(const struct Ops *ops, void *ctx, int v) { + return ops->write(ctx, v); +} + +int main(void) { return dispatch(&myOps, 0, 42); } diff --git a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp index 41f7cc15f4..ebe13c4151 100644 --- a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp +++ b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp @@ -1029,6 +1029,46 @@ TEST(AndersenOTFAATest, FnPtrStoredInStructField) { << "target() must be a callee of the indirect call in do_call()"; } +TEST(AndersenOTFAATest, StructVtableDispatch) { + // Hand-rolled C vtable: const struct Ops { read, write }. + // ops->write(...) must resolve to myWrite only, not myRead. + // Without the struct-vtable path, field-insensitive analysis adds both. + auto IRDB = LLVMProjectIRDB::loadOrExit( + PathToLLFiles + "andersen_otf_struct_vtable_c_m2r_dbg.ll"); + + const auto *DispatchFn = IRDB.getFunctionDefinition("dispatch"); + const auto *MyRead = IRDB.getFunctionDefinition("myRead"); + const auto *MyWrite = IRDB.getFunctionDefinition("myWrite"); + const auto *MainFn = IRDB.getFunctionDefinition("main"); + ASSERT_NE(MainFn, nullptr); + ASSERT_NE(DispatchFn, nullptr); + ASSERT_NE(MyRead, nullptr); + ASSERT_NE(MyWrite, nullptr); + + auto Cmp = std::make_unique>(); + auto Res = computeAndersenOTFRaw(IRDB, {MainFn}, Cmp.get()); + + const llvm::CallBase *IndirectCS = nullptr; + for (const auto &I : llvm::instructions(DispatchFn)) { + const auto *CS = llvm::dyn_cast(&I); + if (!CS || CS->isDebugOrPseudoInst()) { + continue; + } + if (!llvm::isa( + CS->getCalledOperand()->stripPointerCastsAndAliases())) { + IndirectCS = CS; + break; + } + } + ASSERT_NE(IndirectCS, nullptr) << "No indirect call found in dispatch()"; + + const auto &Callees = Res.CG.getCalleesOfCallAt(IndirectCS); + EXPECT_TRUE(llvm::is_contained(Callees, MyWrite)) + << "myWrite must be a callee of ops->write(...)"; + EXPECT_FALSE(llvm::is_contained(Callees, MyRead)) + << "myRead must not be a callee of ops->write(...) (field 1, not 0)"; +} + } // namespace int main(int Argc, char **Argv) { From ec5b5a3c1dacb42d4eda3fc7768e759d2cbb881c Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 4 Jun 2026 12:55:09 +0200 Subject: [PATCH 24/36] Small deduplication --- .../phasar/PhasarLLVM/Utils/LLVMShorthands.h | 42 +++++++++++++++++- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 44 +++++-------------- .../Pointer/LLVMPointerAssignmentGraph.cpp | 43 +++--------------- 3 files changed, 58 insertions(+), 71 deletions(-) diff --git a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h index 371fe5d196..f0b63fd96e 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h @@ -20,12 +20,17 @@ #include "phasar/Utils/Utilities.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalObject.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" +#include +#include #include #include @@ -296,6 +301,40 @@ definitelyContainsNoPointer(const llvm::Type *Ty) noexcept { definitelyContainsNoPointer(Val->getType()); } +/// Strips pointer-cast and alias wrappers from \p V, then invokes \p Handler +/// for each concrete underlying value: +/// - If \p V is not a ConstantExpr after stripping, Handler is called once +/// with the stripped value. +/// - If \p V is a ConstantExpr, the expression tree is walked and Handler +/// is called for each GlobalObject leaf. +template HandlerT> +void forEachPointerOperand(const llvm::Value *V, HandlerT Handler) { + V = V->stripPointerCastsAndAliases(); + const auto *CExpr = llvm::dyn_cast(V); + if (!CExpr) [[likely]] { + std::invoke(Handler, V); + return; + } + + llvm::SmallPtrSet Seen = {V}; + llvm::SmallVector WL = {CExpr}; + do { + const auto *Curr = WL.pop_back_val(); + for (const auto *Op : Curr->operand_values()) { + if (definitelyContainsNoPointer(Op) || !Seen.insert(Op).second) { + continue; + } + if (const auto *GObj = llvm::dyn_cast(Op)) { + std::invoke(Handler, static_cast(GObj)); + continue; + } + if (const auto *OpUser = llvm::dyn_cast(Op)) { + WL.push_back(OpUser); + } + } + } while (!WL.empty()); +} + /// Approximates, whether the given LLVM value may be address-taken, i.e., /// whether its pointer value is used for other purposes than just /// store/load/gep. @@ -371,8 +410,7 @@ getVaListTagOrNull(const llvm::Function &Fun); /// must be 0 (pointer-arithmetic no-op, struct is not an array). /// - \c Indices[1+] navigate recursively through ConstantAggregate. [[nodiscard]] const llvm::Function * -walkConstInitPath(const llvm::Constant *Init, - llvm::ArrayRef Indices); +walkConstInitPath(const llvm::Constant *Init, llvm::ArrayRef Indices); } // namespace psr #endif diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index 8c9e5e9275..fab9187f15 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -22,6 +22,7 @@ #include "phasar/Utils/LibrarySummary.h" #include "phasar/Utils/Soundness.h" #include "phasar/Utils/UnionFind.h" +#include "phasar/Utils/Utilities.h" #include "phasar/Utils/ValueCompressor.h" #include "llvm/ADT/DenseMap.h" @@ -315,41 +316,18 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { // ---- Operand traversal ---------------------------------------------- void forEachOpId(const llvm::Value *V, std::invocable auto Handler) { - V = V->stripPointerCastsAndAliases(); - if (definitelyContainsNoPointer(V)) { + const llvm::Value *Stripped = V->stripPointerCastsAndAliases(); + if (definitelyContainsNoPointer(Stripped)) { return; } - - if (!llvm::isa(V)) { - const ValueId VId = getOrInsertVar(PAGVariable(V)); - if (const auto *GO = llvm::dyn_cast(V)) { - addGlobalPointee(GO, VId); - } - std::invoke(Handler, VId); - return; - } - - // Walk ConstantExpr chains to find the underlying GlobalObject(s). - llvm::SmallDenseSet Seen = {V}; - llvm::SmallVector WL = { - llvm::cast(V)}; - do { - const auto *Curr = WL.pop_back_val(); - for (const auto *Op : Curr->operand_values()) { - if (definitelyContainsNoPointer(Op) || !Seen.insert(Op).second) { - continue; - } - if (const auto *GObj = llvm::dyn_cast(Op)) { - const ValueId GId = getOrInsertVar(PAGVariable(GObj)); - addGlobalPointee(GObj, GId); - std::invoke(Handler, GId); - continue; - } - if (const auto *User = llvm::dyn_cast(Op)) { - WL.push_back(User); - } - } - } while (!WL.empty()); + psr::forEachPointerOperand( + Stripped, [this, Handler = copyOrRef(Handler)](const llvm::Value *Op) { + const ValueId VId = getOrInsertVar(PAGVariable(Op)); + if (const auto *GO = llvm::dyn_cast(Op)) { + addGlobalPointee(GO, VId); + } + std::invoke(Handler, VId); + }); } // ---- Constraint insertion ------------------------------------------- diff --git a/lib/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.cpp b/lib/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.cpp index 4a0800c1ee..99127e37c0 100644 --- a/lib/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.cpp +++ b/lib/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.cpp @@ -8,6 +8,7 @@ #include "phasar/Utils/BitSet.h" #include "phasar/Utils/LibCSummary.h" #include "phasar/Utils/MapUtils.h" +#include "phasar/Utils/Utilities.h" #include "phasar/Utils/ValueCompressor.h" #include "llvm/ADT/STLExtras.h" @@ -36,7 +37,6 @@ std::string psr::to_string(PAGVariable Var) { namespace { - struct PAGMappedLibrarySummary { const library_summary::LLVMFunctionDataFlowFacts &Facts; // NOLINT @@ -158,11 +158,10 @@ struct [[clang::internal_linkage]] LLVMPAGBuilder::PAGBuildData { void initializeGlobal(GlobalInitCache &GCache, LLVMPBStrategyRef Strategy, const llvm::GlobalVariable &Glob) { auto GlobObj = getVariable(&Glob, Strategy); - auto Stores = GCache.getOrCreate( - Glob.getInitializer(), - [this, Strategy](const llvm::Value *V) { - return getVariable(V, Strategy); - }); + auto Stores = GCache.getOrCreate(Glob.getInitializer(), + [this, Strategy](const llvm::Value *V) { + return getVariable(V, Strategy); + }); for (auto Src : Stores) { // NOTE: We don't consider this a POI for now; probably, that's fine @@ -280,36 +279,8 @@ struct [[clang::internal_linkage]] LLVMPAGBuilder::PAGBuildData { static void handleOperand(const llvm::Value *RawOp, std::invocable auto Handler) { - RawOp = RawOp->stripPointerCastsAndAliases(); - const auto *RawOpCExpr = llvm::dyn_cast(RawOp); - if (!RawOpCExpr) [[likely]] { - // fast-path: - return (void)std::invoke(Handler, RawOp); - } - - llvm::SmallDenseSet Seen = {RawOp}; - llvm::SmallVector WL = {RawOpCExpr}; - do { - const auto *Curr = WL.pop_back_val(); - for (const auto *Op : Curr->operand_values()) { - if (definitelyContainsNoPointer(Op) || !Seen.insert(Op).second) { - continue; - } - - if (const auto *GObj = llvm::dyn_cast(Op)) { - std::invoke(Handler, GObj); - continue; - } - - // TODO: Handle constant GEP! - - if (const auto *OpUser = llvm::dyn_cast(Op)) { - WL.push_back(OpUser); - continue; - } - } - - } while (!WL.empty()); + // TODO: Handle constant GEP! + psr::forEachPointerOperand(RawOp, copyOrRef(Handler)); } void handleStore(LLVMPBStrategyRef Strategy, const llvm::StoreInst *Store) { From d96da4adac9d45609a2039c53d490c27c5ff06b7 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 4 Jun 2026 13:40:51 +0200 Subject: [PATCH 25/36] Small manual refactorings --- .../phasar/PhasarLLVM/Pointer/AndersenOTFAA.h | 15 +- include/phasar/Pointer/RawAliasSet.h | 101 +++++ lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 63 +--- .../PhasarLLVM/Pointer/AndersenOTFAATest.cpp | 350 ++++++++++-------- 4 files changed, 323 insertions(+), 206 deletions(-) diff --git a/include/phasar/PhasarLLVM/Pointer/AndersenOTFAA.h b/include/phasar/PhasarLLVM/Pointer/AndersenOTFAA.h index 3f9789e236..8b718379de 100644 --- a/include/phasar/PhasarLLVM/Pointer/AndersenOTFAA.h +++ b/include/phasar/PhasarLLVM/Pointer/AndersenOTFAA.h @@ -37,11 +37,12 @@ class LLVMProjectIRDB; /// \c LLVMUnionFindAliasIterator. struct AndersenOTFResult { TypedVector> AliasSets; - size_t NumVars{}; LLVMBasedCallGraph CG; [[nodiscard]] static constexpr bool isCached() noexcept { return true; } - [[nodiscard]] constexpr size_t size() const noexcept { return NumVars; } + [[nodiscard]] constexpr size_t size() const noexcept { + return AliasSets.size(); + } [[nodiscard]] RawAliasSet getRawAliasSet(ValueId Var) const noexcept { @@ -95,11 +96,11 @@ class AndersenOTFSolver { /// Runs the Andersen OTF fixpoint and returns the raw alias-analysis result /// (no LLVM-value wrapping). If \p VC is null, a fresh one is allocated. -[[nodiscard]] AndersenOTFResult computeAndersenOTFRaw( - const LLVMProjectIRDB &IRDB, - llvm::ArrayRef EntryPoints, - MaybeUniquePtr> VC = nullptr, - Soundness S = Soundness::Soundy); +[[nodiscard]] AndersenOTFResult +computeAndersenOTFRaw(const LLVMProjectIRDB &IRDB, + llvm::ArrayRef EntryPoints, + MaybeUniquePtr> VC = nullptr, + Soundness S = Soundness::Soundy); /// Runs the Andersen OTF fixpoint and returns an \c LLVMUnionFindAliasIterator /// that implements \c IsLLVMAliasIterator. diff --git a/include/phasar/Pointer/RawAliasSet.h b/include/phasar/Pointer/RawAliasSet.h index 279e956805..f92994230d 100644 --- a/include/phasar/Pointer/RawAliasSet.h +++ b/include/phasar/Pointer/RawAliasSet.h @@ -10,6 +10,7 @@ *****************************************************************************/ #include "phasar/Utils/TypeTraits.h" +#include "phasar/Utils/Utilities.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SparseBitVector.h" @@ -50,6 +51,17 @@ concept IsRawAliasSet = requires(ASet &MutSet, const ASet &ConstSet, { MutSet.clear() } noexcept; { ConstSet.empty() } noexcept -> std::convertible_to; { ConstSet.size() } noexcept -> std::convertible_to; + { + // Merges the ConstSet into MutSet, as with tryMergeWith, but invokes a + // callback for each element that was newly inserted.The Diff will be + // materialized and merged into that out-param + MutSet.mergeWithDiff(ConstSet, DummyFn{}, MutSet) + } -> std::convertible_to; + { + // Merges the ConstSet into MutSet, as with tryMergeWith, but invokes a + // callback for each element that was newly inserted. + MutSet.mergeWithDiff(ConstSet, DummyFn{}) + } -> std::convertible_to; }; /// Sparse bit-set used to represent alias sets in union-find analyses. @@ -117,7 +129,36 @@ template class LLVMRawAliasSet { return Bits == Other.Bits; } + bool mergeWithDiff(const LLVMRawAliasSet &Other, + std::invocable auto WithNewElem, + LLVMRawAliasSet &IntoDiff) { + return mergeWithDiffImpl(Other, copyOrRef(WithNewElem), &IntoDiff); + } + + bool mergeWithDiff(const LLVMRawAliasSet &Other, + std::invocable auto WithNewElem) { + return mergeWithDiffImpl(Other, copyOrRef(WithNewElem), nullptr); + } + private: + bool mergeWithDiffImpl(const LLVMRawAliasSet &Other, + std::invocable auto WithNewElem, + LLVMRawAliasSet *IntoDiff) { + auto Diff = Other.Bits - Bits; + if (Diff.empty()) { + return false; + } + + Bits |= Diff; + if (IntoDiff) { + IntoDiff->Bits |= Diff; + } + for (auto Elem : Diff) { + std::invoke(WithNewElem, IdT(Elem)); + } + return true; + } + llvm::SparseBitVector<> Bits; }; @@ -186,6 +227,66 @@ template class RoaringAliasSet { return Bits == Other.Bits; } + bool mergeWithDiff(const RoaringAliasSet &Other, + std::invocable auto WithNewElem) { + constexpr size_t DiffThreshold = 16; + // operator- is expensive, but it is definitely a lot faster than the + // foreach loop if UPending is large + + if (Other.size() > DiffThreshold) { + RoaringAliasSet Diff = Other - *this; + if (Diff.empty()) { + return false; + } + + *this |= Diff; + + Diff.foreach (copyOrRef(WithNewElem)); + return true; + } + + bool Ret = false; + Other.foreach ([&](IdT Elem) { + if (tryInsert(Elem)) { + std::invoke(WithNewElem, Elem); + Ret = true; + } + }); + return Ret; + } + + bool mergeWithDiff(const RoaringAliasSet &Other, + std::invocable auto WithNewElem, + RoaringAliasSet &IntoDiff) { + constexpr size_t DiffThreshold = 16; + // operator- is expensive, but it is definitely a lot faster than the + // foreach loop if Other is large + + if (Other.size() > DiffThreshold) { + RoaringAliasSet Diff = Other - *this; + if (Diff.empty()) { + return false; + } + + *this |= Diff; + IntoDiff |= Diff; + + Diff.foreach (copyOrRef(WithNewElem)); + return true; + } + + bool Ret = false; + Other.foreach ([&](IdT Elem) { + if (tryInsert(Elem)) { + IntoDiff.insert(Elem); + std::invoke(WithNewElem, Elem); + Ret = true; + } + }); + + return Ret; + } + private: RoaringAliasSet(roaring::Roaring &&RR) : Bits(std::move(RR)) {} diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index fab9187f15..14de1650f2 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -250,18 +250,9 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } // Merge pts sets. - { - const auto OldRepPts = Nodes[Rep].PtsSet; - const bool PtsGrew = Nodes[Rep].PtsSet.tryMergeWith(NRPts); - if (PtsGrew) { - // Fire Rep's pre-existing load/store/memcopy constraints for pointees - // absorbed from NonRep that Rep didn't previously have. - const auto Diff = NRPts - OldRepPts; - Nodes[Rep].PendingPts |= Diff; - PropWorklist.push_back(Rep); - Diff.foreach ([&](ValueId NewObj) { onNewPointee(Rep, NewObj); }); - } - } + Nodes[Rep].PtsSet.mergeWithDiff( + NRPts, [&](ValueId NewObj) { onNewPointee(Rep, NewObj); }, + Nodes[Rep].PendingPts); // Snapshot Rep's pts (after merge) for retroactive constraint firing. const auto RepPts = Nodes[Rep].PtsSet; @@ -470,30 +461,10 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { continue; } - const bool AddedAny = [&] { - bool AddedAny = false; - constexpr size_t DiffThreshold = 16; - // operator- is expensive, but it is definitely a lot faster than the - // foreach loop if UPending is large - if (UPending.size() > DiffThreshold) { - auto Diff = UPending - Nodes[V].PtsSet; - AddedAny = !Diff.empty(); - if (AddedAny) { - Nodes[V].PtsSet |= Diff; - Nodes[V].PendingPts |= Diff; - Diff.foreach ([this, V](ValueId Obj) { onNewPointee(V, Obj); }); - } - } else { - UPending.foreach ([&](ValueId Obj) { - if (Nodes[V].PtsSet.tryInsert(Obj)) { - Nodes[V].PendingPts.insert(Obj); - onNewPointee(V, Obj); - AddedAny = true; - } - }); - } - return AddedAny; - }(); + const bool AddedAny = Nodes[V].PtsSet.mergeWithDiff( + UPending, [this, V](ValueId Obj) { onNewPointee(V, Obj); }, + Nodes[V].PendingPts); + if (!AddedAny) { // LCD: V has all of U's pending wave, so V.PtsSet ⊇ U.PtsSet. if (Nodes[V].AssignDstSet.contains(U)) { @@ -1052,7 +1023,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { continue; } Nodes[RepId].PtsSet.foreach ([&](ValueId Obj) { - if (size_t(Obj) < NumLocal) { + if (Obj2Reps.inbounds(Obj)) { Obj2Reps[Obj].insert(RepId); return true; } @@ -1067,11 +1038,11 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { TypedVector> ObjToAliasExtVIds(NumLocal); { llvm::SmallVector Buf; - for (auto Obj : iota(NumLocal)) { - if (Obj2Reps[Obj].empty()) { + for (const auto &[Obj, Reps] : Obj2Reps.enumerate()) { + if (Reps.empty()) { continue; } - Obj2Reps[Obj].foreach ([&](ValueId AliasRepId) { + Reps.foreach ([&](ValueId AliasRepId) { for (auto EId : RepToExtVIds[AliasRepId]) { Buf.push_back(uint32_t(EId)); } @@ -1082,13 +1053,11 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } } - AndersenOTFResult Result; - Result.NumVars = ExternalVC.size(); - Result.AliasSets.resize(Result.NumVars); + AndersenOTFResult Result{}; + Result.AliasSets.resize(ExternalVC.size()); - for (auto RepId : iota(NumLocal)) { - const auto &MyExtVIds = RepToExtVIds[RepId]; - if (MyExtVIds.empty()) { + for (const auto &[RepId, ExtVIds] : RepToExtVIds.enumerate()) { + if (ExtVIds.empty()) { continue; } if (!Nodes.inbounds(RepId)) { @@ -1107,7 +1076,7 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { }); // Broadcast to every external ID mapped to this representative. - for (auto ExtVId : MyExtVIds) { + for (auto ExtVId : ExtVIds) { Result.AliasSets[ExtVId] |= AliasExtVIds; } } diff --git a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp index ebe13c4151..7caa4fa499 100644 --- a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp +++ b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp @@ -14,8 +14,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/Support/raw_ostream.h" #include "SrcCodeLocationEntry.h" @@ -71,11 +71,7 @@ void dumpAnalysisState(const ValueCompressor &Compressor, } llvm::errs() << "}\n"; llvm::errs() << "AliasSets: {\n"; - for (auto VId : iota(Results.NumVars)) { - if (!Results.AliasSets.inbounds(VId)) { - continue; - } - + for (const auto &[VId, Aliases] : Results.AliasSets.enumerate()) { bool First = true; for (const auto &Var : Compressor.id2vars(VId)) { llvm::errs() << " " << to_string(Var); @@ -88,13 +84,13 @@ void dumpAnalysisState(const ValueCompressor &Compressor, continue; } - if (Results.AliasSets[VId].empty()) { + if (Aliases.empty()) { llvm::errs() << " aliases: EMPTY\n"; continue; } llvm::errs() << " aliases: {\n"; - Results.AliasSets[VId].foreach ([&](ValueId AId) { + Aliases.foreach ([&](ValueId AId) { llvm::errs() << " " << stringifyVal(Compressor, AId) << '\n'; }); llvm::errs() << " }\n"; @@ -463,20 +459,26 @@ TEST(AndersenOTFAATest, RecursionTwoObjectsMerge) { // k and l alias the chain (via their objects) but not each other. const TSL Ptr = TSL(ArgInFun{.Idx = 0, .InFunction = "selfRecursion"}); const TSL Ret = TSL(RetVal{.InFunction = "selfRecursion"}); - const TSL CallX = TSL(LineColFunOp{.Line = 15, .Col = 0, + const TSL CallX = TSL(LineColFunOp{.Line = 15, + .Col = 0, .InFunction = "main", .OpCode = llvm::Instruction::Call}); - const TSL CallY = TSL(LineColFunOp{.Line = 16, .Col = 0, + const TSL CallY = TSL(LineColFunOp{.Line = 16, + .Col = 0, .InFunction = "main", .OpCode = llvm::Instruction::Call}); - const TSL KAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 15, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); - const TSL LAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 16, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); + const TSL KAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 15, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL LAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 16, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); const std::vector Chain = {Ptr, Ret, CallX, CallY}; GTMap ExpectedResults; std::vector ChainAndBoth = Chain; @@ -502,25 +504,32 @@ TEST(AndersenOTFAATest, MutualRecursionTwoObjects) { const TSL BackPtr = TSL(ArgInFun{.Idx = 0, .InFunction = "Back"}); const TSL ForthRet = TSL(RetVal{.InFunction = "Forth"}); const TSL BackRet = TSL(RetVal{.InFunction = "Back"}); - // xx1=Back(&k) line 27, xx2=Back(&k) line 29, yy1=Back(&l) line 31, yy2=Back(&l) line 33 + // xx1=Back(&k) line 27, xx2=Back(&k) line 29, yy1=Back(&l) line 31, + // yy2=Back(&l) line 33 const auto MkCall = [](uint32_t Line) { - return TSL(LineColFunOp{.Line = Line, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}); + return TSL(LineColFunOp{.Line = Line, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); }; const TSL XX1 = MkCall(27); const TSL XX2 = MkCall(29); const TSL YY1 = MkCall(31); const TSL YY2 = MkCall(33); - const TSL KAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 27, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); - const TSL LAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 31, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); + const TSL KAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 27, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL LAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 31, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); const std::vector Chain = {ForthPtr, BackPtr, ForthRet, BackRet, - XX1, XX2, YY1, YY2}; + XX1, XX2, YY1, YY2}; GTMap ExpectedResults; std::vector ChainAndBoth = Chain; ChainAndBoth.push_back(KAlloca); @@ -547,23 +556,28 @@ TEST(AndersenOTFAATest, ThreeWayMutualRecursion) { const TSL BackRet = TSL(RetVal{.InFunction = "Back"}); const TSL StopRet = TSL(RetVal{.InFunction = "Stop"}); // x=Back(&k) line 36, y=Forth(&l) line 37 - const TSL CallX = TSL(LineColFunOp{.Line = 36, .Col = 0, + const TSL CallX = TSL(LineColFunOp{.Line = 36, + .Col = 0, .InFunction = "main", .OpCode = llvm::Instruction::Call}); - const TSL CallY = TSL(LineColFunOp{.Line = 37, .Col = 0, + const TSL CallY = TSL(LineColFunOp{.Line = 37, + .Col = 0, .InFunction = "main", .OpCode = llvm::Instruction::Call}); - const TSL KAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 36, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); - const TSL LAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 37, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); - const std::vector Chain = {ForthPtr, BackPtr, StopPtr, - ForthRet, BackRet, StopRet, - CallX, CallY}; + const TSL KAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 36, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL LAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 37, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const std::vector Chain = {ForthPtr, BackPtr, StopPtr, ForthRet, + BackRet, StopRet, CallX, CallY}; GTMap ExpectedResults; std::vector ChainAndBoth = Chain; ChainAndBoth.push_back(KAlloca); @@ -589,18 +603,26 @@ TEST(AndersenOTFAATest, ThreeArgReturnQContextInsensitive) { const TSL ArgR = TSL(ArgInFun{.Idx = 2, .InFunction = "argretq"}); const TSL Ret = TSL(RetVal{.InFunction = "argretq"}); // xx1=argretq(&x,&x,&x) line 8, yy1=argretq(&y,&y,&y) line 9 - const TSL XX1 = TSL(LineColFunOp{.Line = 8, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}); - const TSL YY1 = TSL(LineColFunOp{.Line = 9, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}); - const TSL XAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 8, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); - const TSL YAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 9, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); + const TSL XX1 = TSL(LineColFunOp{.Line = 8, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const TSL YY1 = TSL(LineColFunOp{.Line = 9, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); + const TSL XAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 8, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL YAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 9, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); const std::vector Chain = {ArgP, ArgQ, ArgR, Ret, XX1, YY1}; GTMap ExpectedResults; std::vector ChainAndBoth = Chain; @@ -639,8 +661,8 @@ TEST(AndersenOTFAATest, FuncPtrCallbackThreeWayMerge) { TEST(AndersenOTFAATest, FourLevelChainTwoObjects) { // context_05_1: 4-level identity chain (id4→id3→id2→id1), called 4 times - // with &x and &y. All params/rets and call sites merge (context-insensitive). - // x and y allocas alias the chain but not each other. + // with &x and &y. All params/rets and call sites merge + // (context-insensitive). x and y allocas alias the chain but not each other. const auto MkArg = [](llvm::StringRef Fn) { return TSL(ArgInFun{.Idx = 0, .InFunction = Fn}); }; @@ -648,23 +670,29 @@ TEST(AndersenOTFAATest, FourLevelChainTwoObjects) { return TSL(RetVal{.InFunction = Fn}); }; const auto MkCall = [](uint32_t Line) { - return TSL(LineColFunOp{.Line = Line, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}); + return TSL(LineColFunOp{.Line = Line, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); }; const std::vector Chain = { MkArg("id1"), MkArg("id2"), MkArg("id3"), MkArg("id4"), MkRet("id1"), MkRet("id2"), MkRet("id3"), MkRet("id4"), - MkCall(11), MkCall(12), MkCall(13), MkCall(14), + MkCall(11), MkCall(12), MkCall(13), MkCall(14), }; // arg 0 of call at line 11 is &x; arg 0 of call at line 13 is &y. - const TSL XAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 11, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); - const TSL YAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 13, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); + const TSL XAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 11, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL YAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 13, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); GTMap ExpectedResults; auto ChainAndBoth = Chain; ChainAndBoth.push_back(XAlloca); @@ -691,22 +719,27 @@ TEST(AndersenOTFAATest, FourLevelChainVariantTwoObjects) { return TSL(RetVal{.InFunction = Fn}); }; const auto MkCall = [](uint32_t Line) { - return TSL(LineColFunOp{.Line = Line, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}); + return TSL(LineColFunOp{.Line = Line, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); }; const std::vector Chain = { - MkArg("buzz"), MkArg("baz"), MkArg("bar"), MkArg("foo"), - MkRet("buzz"), MkRet("baz"), MkRet("bar"), MkRet("foo"), - MkCall(11), MkCall(12), + MkArg("buzz"), MkArg("baz"), MkArg("bar"), MkArg("foo"), MkRet("buzz"), + MkRet("baz"), MkRet("bar"), MkRet("foo"), MkCall(11), MkCall(12), }; - const TSL XAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 11, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); - const TSL YAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 12, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); + const TSL XAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 11, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL YAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 12, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); GTMap ExpectedResults; auto ChainAndBoth = Chain; ChainAndBoth.push_back(XAlloca); @@ -730,19 +763,25 @@ TEST(AndersenOTFAATest, RecursionFourCallSites) { const TSL Ptr = TSL(ArgInFun{.Idx = 0, .InFunction = "selfRecursion"}); const TSL Ret = TSL(RetVal{.InFunction = "selfRecursion"}); const auto MkCall = [](uint32_t Line) { - return TSL(LineColFunOp{.Line = Line, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}); + return TSL(LineColFunOp{.Line = Line, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); }; - const std::vector Chain = {Ptr, Ret, MkCall(15), MkCall(17), - MkCall(18), MkCall(20)}; - const TSL KAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 15, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); - const TSL LAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 18, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); + const std::vector Chain = {Ptr, Ret, MkCall(15), + MkCall(17), MkCall(18), MkCall(20)}; + const TSL KAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 15, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL LAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 18, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); GTMap ExpectedResults; auto ChainAndBoth = Chain; ChainAndBoth.push_back(KAlloca); @@ -770,21 +809,26 @@ TEST(AndersenOTFAATest, ThreeWayMutualRecursionFourCallSites) { const TSL BackRet = TSL(RetVal{.InFunction = "Back"}); const TSL StopRet = TSL(RetVal{.InFunction = "Stop"}); const auto MkCall = [](uint32_t Line) { - return TSL(LineColFunOp{.Line = Line, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}); + return TSL(LineColFunOp{.Line = Line, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); }; - const std::vector Chain = {ForthPtr, BackPtr, StopPtr, - ForthRet, BackRet, StopRet, - MkCall(36), MkCall(37), - MkCall(38), MkCall(39)}; - const TSL KAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 36, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); - const TSL LAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 38, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); + const std::vector Chain = {ForthPtr, BackPtr, StopPtr, ForthRet, + BackRet, StopRet, MkCall(36), MkCall(37), + MkCall(38), MkCall(39)}; + const TSL KAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 36, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL LAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 38, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); GTMap ExpectedResults; auto ChainAndBoth = Chain; ChainAndBoth.push_back(KAlloca); @@ -810,20 +854,26 @@ TEST(AndersenOTFAATest, TwoArgSecondRetFourCallSites) { const TSL Q = TSL(ArgInFun{.Idx = 1, .InFunction = "argretq"}); const TSL Ret = TSL(RetVal{.InFunction = "argretq"}); const auto MkCall = [](uint32_t Line) { - return TSL(LineColFunOp{.Line = Line, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}); + return TSL(LineColFunOp{.Line = Line, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}); }; - const std::vector Chain = {P, Q, Ret, MkCall(8), MkCall(9), - MkCall(10), MkCall(11)}; + const std::vector Chain = {P, Q, Ret, MkCall(8), + MkCall(9), MkCall(10), MkCall(11)}; // arg 1 of call at line 8 is &x (argretq(&y, &x)); arg 0 is &y. - const TSL XAlloca = TSL(OperandOf{ - .OperandIndex = 1, - .Inst = LineColFunOp{.Line = 8, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); - const TSL YAlloca = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 8, .Col = 0, .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); + const TSL XAlloca = + TSL(OperandOf{.OperandIndex = 1, + .Inst = LineColFunOp{.Line = 8, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); + const TSL YAlloca = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 8, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); GTMap ExpectedResults; auto ChainAndBoth = Chain; ChainAndBoth.push_back(XAlloca); @@ -843,8 +893,7 @@ TEST(AndersenOTFAATest, TwoArgSecondRetFourCallSites) { TEST(AndersenOTFAATest, VTableDispatch) { // Virtual call via A* in call_get must resolve through the vtable. // A::get() returns @x, so call_get's return must alias @x. - const TSL CallGetRet = - TSL(RetVal{.InFunction = "_ZL8call_getP1A"}); + const TSL CallGetRet = TSL(RetVal{.InFunction = "_ZL8call_getP1A"}); const TSL X = TSL(GlobalVar{.Name = "x"}); const GTMap ExpectedResults = { {CallGetRet, {CallGetRet, X}}, @@ -855,43 +904,40 @@ TEST(AndersenOTFAATest, VTableDispatch) { TEST(AndersenOTFAATest, GlobalPtrInitializer) { // @p = global ptr @x; loading from @p must alias @x (Bug 2 soundness). - const TSL LoadQ = - TSL(LineColFunOp{.Line = 7, - .Col = 12, - .InFunction = "main", - .OpCode = llvm::Instruction::Load}); + const TSL LoadQ = TSL(LineColFunOp{.Line = 7, + .Col = 12, + .InFunction = "main", + .OpCode = llvm::Instruction::Load}); const TSL X = TSL(GlobalVar{.Name = "x"}); const GTMap ExpectedResults = { {LoadQ, {LoadQ, X}}, {X, {X, LoadQ}}, }; - doAnalysisAndCheckExact("andersen_otf_global_init_c_dbg.ll", - ExpectedResults); + doAnalysisAndCheckExact("andersen_otf_global_init_c_dbg.ll", ExpectedResults); } TEST(AndersenOTFAATest, MergeLoadConstraint) { // h->f->h cycle; h returns *p. // ret(h) must alias x and y after h(&px) and h(&py) (Bug 1 soundness). const TSL RetH = TSL(RetVal{.InFunction = "h"}); - const TSL VarX = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 17, - .Col = 8, - .InFunction = "main", - .OpCode = llvm::Instruction::Store}}); - const TSL VarY = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 18, - .Col = 8, - .InFunction = "main", - .OpCode = llvm::Instruction::Store}}); + const TSL VarX = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 17, + .Col = 8, + .InFunction = "main", + .OpCode = llvm::Instruction::Store}}); + const TSL VarY = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 18, + .Col = 8, + .InFunction = "main", + .OpCode = llvm::Instruction::Store}}); const GTMap ExpectedResults = { {RetH, {RetH, VarX, VarY}}, {VarX, {RetH, VarX}}, {VarY, {RetH, VarY}}, }; - doAnalysisAndCheckExact("andersen_otf_merge_load_c_dbg.ll", - ExpectedResults); + doAnalysisAndCheckExact("andersen_otf_merge_load_c_dbg.ll", ExpectedResults); } TEST(AndersenOTFAATest, AlreadyProcessedCalleePropagation) { @@ -952,8 +998,8 @@ TEST(AndersenOTFAATest, SoundnessFnPtrToExternalDecl) { { auto Cmp = std::make_unique>(); - auto Res = computeAndersenOTFRaw(IRDB, {MainFn}, Cmp.get(), - Soundness::Soundy); + auto Res = + computeAndersenOTFRaw(IRDB, {MainFn}, Cmp.get(), Soundness::Soundy); EXPECT_TRUE(HasCGVertex(Res.CG, CloseStdout)) << "close_stdout must be a CG vertex at Soundy"; EXPECT_TRUE(HasCGVertex(Res.CG, FlushImpl)) @@ -962,8 +1008,8 @@ TEST(AndersenOTFAATest, SoundnessFnPtrToExternalDecl) { { auto Cmp = std::make_unique>(); - auto Res = computeAndersenOTFRaw(IRDB, {MainFn}, Cmp.get(), - Soundness::Unsound); + auto Res = + computeAndersenOTFRaw(IRDB, {MainFn}, Cmp.get(), Soundness::Unsound); EXPECT_FALSE(HasCGVertex(Res.CG, CloseStdout)) << "close_stdout must not be a CG vertex at Unsound"; EXPECT_FALSE(HasCGVertex(Res.CG, FlushImpl)) @@ -979,12 +1025,12 @@ TEST(AndersenOTFAATest, LibCSummaryStrcpyReturnAliasesDst) { .Col = 0, .InFunction = "main", .OpCode = llvm::Instruction::Call}); - const TSL Buf = TSL(OperandOf{ - .OperandIndex = 0, - .Inst = LineColFunOp{.Line = 9, - .Col = 0, - .InFunction = "main", - .OpCode = llvm::Instruction::Call}}); + const TSL Buf = + TSL(OperandOf{.OperandIndex = 0, + .Inst = LineColFunOp{.Line = 9, + .Col = 0, + .InFunction = "main", + .OpCode = llvm::Instruction::Call}}); const GTMap ExpectedResults = { {Call, {Call, Buf}}, {Buf, {Buf, Call}}, From 8f03c6032dacadcf4ca2987ab382883b593e7b00 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 4 Jun 2026 13:51:17 +0200 Subject: [PATCH 26/36] minor in test --- .../PhasarLLVM/Pointer/AndersenOTFAATest.cpp | 39 ++++++++----------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp index 7caa4fa499..2d36c6e188 100644 --- a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp +++ b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp @@ -125,27 +125,26 @@ void doAnalysisAndCheckExact( Entries.push_back(Func); } - auto Compressor = std::make_unique>(); - AndersenOTFResult Results = - computeAndersenOTFRaw(IRDB, Entries, Compressor.get()); + ValueCompressor Compressor; + AndersenOTFResult Results = computeAndersenOTFRaw(IRDB, Entries, &Compressor); // Build domain from all values explicitly named in the GT. llvm::SmallDenseSet Domain; for (const auto &[PtrVar, ExpectedAliasVars] : ExpectedResults) { - Domain.insert(asId(*Compressor, IRDB, PtrVar)); + Domain.insert(asId(Compressor, IRDB, PtrVar)); for (const auto &AliasVar : ExpectedAliasVars) { - Domain.insert(asId(*Compressor, IRDB, AliasVar)); + Domain.insert(asId(Compressor, IRDB, AliasVar)); } } for (const auto &[PtrVar, ExpectedAliasVars] : ExpectedResults) { - const auto PtrId = asId(*Compressor, IRDB, PtrVar); - const RawAliasSet &Computed = Results.getRawAliasSet(PtrId); + const auto PtrId = asId(Compressor, IRDB, PtrVar); + const auto &Computed = Results.getRawAliasSet(PtrId); RawAliasSet Expected; // llvm::errs() << "For PtrId: #" << uint32_t(PtrId) << ":\n"; for (const auto &AliasVar : ExpectedAliasVars) { - auto AliasId = asId(*Compressor, IRDB, AliasVar); + auto AliasId = asId(Compressor, IRDB, AliasVar); Expected.insert(AliasId); // llvm::errs() << "> Insert #" << uint32_t(AliasId) // << " into Expected due to " << AliasVar << '\n'; @@ -157,7 +156,7 @@ void doAnalysisAndCheckExact( ADD_FAILURE_AT(Loc.file_name(), Loc.line()) << "Missing expected alias of " << PtrVar << "(#" << uint32_t(PtrId) << "): #" << uint32_t(AliasId) << " as " - << stringifyVal(*Compressor, AliasId); + << stringifyVal(Compressor, AliasId); } }); @@ -168,12 +167,12 @@ void doAnalysisAndCheckExact( } ADD_FAILURE_AT(Loc.file_name(), Loc.line()) << "Unexpected alias of " << PtrVar << ": " - << stringifyVal(*Compressor, VId); + << stringifyVal(Compressor, VId); }); } if (DumpResults || ::testing::Test::HasFailure()) { - dumpAnalysisState(*Compressor, Results); + dumpAnalysisState(Compressor, Results); } } @@ -215,13 +214,13 @@ TEST(AndersenOTFAATest, FuncByNameInVC) { const auto *MainFn = IRDB.getFunctionDefinition("main"); ASSERT_NE(MainFn, nullptr); - auto Compressor = std::make_unique>(); + ValueCompressor Compressor; [[maybe_unused]] auto Results = - computeAndersenOTFRaw(IRDB, {MainFn}, Compressor.get()); + computeAndersenOTFRaw(IRDB, {MainFn}, &Compressor); const auto *IdFn = IRDB.getFunctionDefinition("id"); ASSERT_NE(IdFn, nullptr); - auto MaybeId = Compressor->getOrNull(IdFn); + auto MaybeId = Compressor.getOrNull(IdFn); EXPECT_TRUE(MaybeId.has_value()) << "Function 'id' not in VC — address-taken functions must be inserted"; } @@ -997,9 +996,8 @@ TEST(AndersenOTFAATest, SoundnessFnPtrToExternalDecl) { }; { - auto Cmp = std::make_unique>(); auto Res = - computeAndersenOTFRaw(IRDB, {MainFn}, Cmp.get(), Soundness::Soundy); + computeAndersenOTFRaw(IRDB, {MainFn}, nullptr, Soundness::Soundy); EXPECT_TRUE(HasCGVertex(Res.CG, CloseStdout)) << "close_stdout must be a CG vertex at Soundy"; EXPECT_TRUE(HasCGVertex(Res.CG, FlushImpl)) @@ -1007,9 +1005,8 @@ TEST(AndersenOTFAATest, SoundnessFnPtrToExternalDecl) { } { - auto Cmp = std::make_unique>(); auto Res = - computeAndersenOTFRaw(IRDB, {MainFn}, Cmp.get(), Soundness::Unsound); + computeAndersenOTFRaw(IRDB, {MainFn}, nullptr, Soundness::Unsound); EXPECT_FALSE(HasCGVertex(Res.CG, CloseStdout)) << "close_stdout must not be a CG vertex at Unsound"; EXPECT_FALSE(HasCGVertex(Res.CG, FlushImpl)) @@ -1052,8 +1049,7 @@ TEST(AndersenOTFAATest, FnPtrStoredInStructField) { ASSERT_NE(DoCall, nullptr); ASSERT_NE(Target, nullptr); - auto Cmp = std::make_unique>(); - auto Res = computeAndersenOTFRaw(IRDB, {MainFn}, Cmp.get()); + auto Res = computeAndersenOTFRaw(IRDB, {MainFn}); // Find the indirect call instruction in do_call. const llvm::CallBase *IndirectCS = nullptr; @@ -1091,8 +1087,7 @@ TEST(AndersenOTFAATest, StructVtableDispatch) { ASSERT_NE(MyRead, nullptr); ASSERT_NE(MyWrite, nullptr); - auto Cmp = std::make_unique>(); - auto Res = computeAndersenOTFRaw(IRDB, {MainFn}, Cmp.get()); + auto Res = computeAndersenOTFRaw(IRDB, {MainFn}); const llvm::CallBase *IndirectCS = nullptr; for (const auto &I : llvm::instructions(DispatchFn)) { From 86cdd07db3243d3f14699428b311e733b41063cb Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 4 Jun 2026 17:39:40 +0200 Subject: [PATCH 27/36] Add MemorySSA to AndersenOTFAA + let AI debug a soundness-bug. Root-cause was integral stores being found as reaching definition for a ptr-load --- .../phasar/PhasarLLVM/Pointer/MemSSAUtils.h | 51 +++++++++ lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 40 +++++++ .../Pointer/LLVMPointerAssignmentGraph.cpp | 105 ++++-------------- lib/PhasarLLVM/Pointer/MemSSAUtils.cpp | 68 ++++++++++++ 4 files changed, 183 insertions(+), 81 deletions(-) create mode 100644 include/phasar/PhasarLLVM/Pointer/MemSSAUtils.h create mode 100644 lib/PhasarLLVM/Pointer/MemSSAUtils.cpp diff --git a/include/phasar/PhasarLLVM/Pointer/MemSSAUtils.h b/include/phasar/PhasarLLVM/Pointer/MemSSAUtils.h new file mode 100644 index 0000000000..443f2f5de8 --- /dev/null +++ b/include/phasar/PhasarLLVM/Pointer/MemSSAUtils.h @@ -0,0 +1,51 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" + +namespace psr { + +// Bundle of per-function analyses for the built-in MemorySSA provider. +// Members are declared in initialization order: each field depends only on +// the ones before it. +struct MemSSABundle { + llvm::AssumptionCache AC; + llvm::DominatorTree DT; + llvm::BasicAAResult BAA; + llvm::AAResults AA; + llvm::MemorySSA MSSA; + + explicit MemSSABundle(llvm::Function &F, const llvm::TargetLibraryInfo *TLI); +}; + +/// Walks the MemorySSA def chain rooted at MA, collecting all StoreInst +/// reaching definitions into ReachingDefs. +/// Returns true if a LiveOnEntry def is reachable (value may come from outside +/// the function). In that case, ReachingDefs may be incompletely populated. +[[nodiscard]] bool collectReachingDefs( + llvm::MemoryAccess *MA, const llvm::MemorySSA &MSSA, + llvm::SmallPtrSetImpl &ReachingDefs, + llvm::SmallPtrSetImpl &Visited); + +/// Collects all store instructions that may define the value loaded from the +/// given load. Forwards to the above collectReachingDefs overload. +[[nodiscard]] bool collectReachingDefs( + const llvm::LoadInst *Load, llvm::MemorySSA &MSSA, + llvm::SmallPtrSetImpl &ReachingDefs); + +} // namespace psr diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index 14de1650f2..586915f08f 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -13,6 +13,7 @@ #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h" #include "phasar/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.h" +#include "phasar/PhasarLLVM/Pointer/MemSSAUtils.h" #include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMVFTable.h" #include "phasar/PhasarLLVM/Utils/LLVMFunctionDataFlowFacts.h" @@ -29,7 +30,11 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" @@ -146,6 +151,10 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { Soundness SoundnessFlag; library_summary::LLVMFunctionDataFlowFacts LibFacts; + llvm::TargetLibraryInfoWrapperPass TLA{}; + std::optional MSSABundle{}; + llvm::MemorySSA *CurrentMemSSA = nullptr; + llvm::SmallVector FunctionWorklist; llvm::DenseSet Queued; // ever pushed to worklist llvm::DenseSet Processed; @@ -506,6 +515,8 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { } void processFunction(const llvm::Function *F) { + MSSABundle.emplace(const_cast(*F), &TLA.getTLI(*F)); + CurrentMemSSA = &MSSABundle->MSSA; for (const auto &Arg : F->args()) { if (!definitelyContainsNoPointer(&Arg)) { (void)getOrInsertVar(PAGVariable(&Arg)); @@ -587,6 +598,35 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { if (definitelyContainsNoPointer(L)) { return; } + if (CurrentMemSSA) { + llvm::SmallPtrSet Defs; + const bool HasLiveOnEntry = collectReachingDefs(L, *CurrentMemSSA, Defs); + if (!HasLiveOnEntry) { + if (Defs.size() == 1) { + const auto *ValueOp = (*Defs.begin())->getValueOperand(); + if (!llvm::isa(ValueOp) && + !definitelyContainsNoPointer(ValueOp)) { + addPtrAlias(L, ValueOp); + return; + } + // Non-pointer or ConstantExpr store value: fall through to addLoad. + } else { + const ValueId DstId = getOrInsertVar(PAGVariable(L)); + bool AnyEdge = false; + for (const auto *Def : Defs) { + forEachOpId(Def->getValueOperand(), [&](ValueId SrcId) { + addAssignEdge(SrcId, DstId); + AnyEdge = true; + }); + } + if (AnyEdge) { + return; + } + // All reaching stores have non-pointer value operands: + // fall through to addLoad. + } + } + } const ValueId DstId = getOrInsertVar(PAGVariable(L)); forEachOpId(L->getPointerOperand(), [&](ValueId PtrId) { addLoad(PtrId, DstId); }); diff --git a/lib/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.cpp b/lib/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.cpp index 3f20cb2203..d3b0ffae77 100644 --- a/lib/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.cpp +++ b/lib/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.cpp @@ -2,6 +2,7 @@ #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h" +#include "phasar/PhasarLLVM/Pointer/MemSSAUtils.h" #include "phasar/PhasarLLVM/Utils/LLVMFunctionDataFlowFacts.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Pointer/PointerAssignmentGraph.h" @@ -12,15 +13,11 @@ #include "phasar/Utils/ValueCompressor.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" @@ -76,60 +73,6 @@ struct PAGMappedLibrarySummary { } }; -// Bundle of per-function analyses for the built-in MemorySSA provider. -// Members are declared in initialization order: each field depends only on -// the ones before it. MSSA is constructed last in the body (after -// AA.addAAResult) because MemorySSA is neither movable nor copyable. -struct MemSSABundle { - llvm::AssumptionCache AC; - llvm::DominatorTree DT; - llvm::BasicAAResult BAA; - llvm::AAResults AA; - llvm::MemorySSA MSSA; - - explicit MemSSABundle(llvm::Function &F, const llvm::TargetLibraryInfo *TLI) - : AC(F), DT(F), - BAA(F.getParent()->getDataLayout(), F, assertNotNull(TLI), AC, &DT), - AA([](const auto *TLI, auto *BAA) { - llvm::AAResults AA(*TLI); - AA.addAAResult(*BAA); - return AA; - }(TLI, &BAA)), - MSSA(F, &AA, &DT) {} -}; - -// returns HasLiveOnEntry -static bool -collectReachingDefs(llvm::MemoryAccess *MA, const llvm::MemorySSA &MSSA, - llvm::SmallPtrSetImpl &Defs, - llvm::SmallPtrSetImpl &Visited) { - if (!Visited.insert(MA).second) { - return false; - } - if (MSSA.isLiveOnEntryDef(MA)) { - return true; - } - if (auto *Def = llvm::dyn_cast(MA)) { - // We only care about stores for now - if (const auto *St = - llvm::dyn_cast(Def->getMemoryInst())) { - Defs.insert(St); - return false; - } - return true; - } - if (auto *Phi = llvm::dyn_cast(MA)) { - for (const auto &Inc : Phi->incoming_values()) { - bool LOE = collectReachingDefs(llvm::cast(Inc.get()), - MSSA, Defs, Visited); - if (LOE) { - return true; - } - } - } - return false; -} - } // namespace struct [[clang::internal_linkage]] LLVMPAGBuilder::PAGBuildData { @@ -384,33 +327,33 @@ struct [[clang::internal_linkage]] LLVMPAGBuilder::PAGBuildData { } if (CurrentMemSSA) { - if (auto *Access = CurrentMemSSA->getMemoryAccess(Ld)) { - auto *Clobber = - CurrentMemSSA->getWalker()->getClobberingMemoryAccess(Access); - llvm::SmallPtrSet Defs; - llvm::SmallPtrSet Visited; - const bool HasLiveOnEntry = - collectReachingDefs(Clobber, *CurrentMemSSA, Defs, Visited); - - if (!HasLiveOnEntry) { - - if (Defs.size() == 1) { - const auto *ValueOp = (*Defs.begin())->getValueOperand(); - if (!llvm::isa(ValueOp)) { - VC.addAlias(Ld, getVariable(ValueOp, Strategy)); - return; - } + llvm::SmallPtrSet Defs; + const bool HasLiveOnEntry = collectReachingDefs(Ld, *CurrentMemSSA, Defs); + if (!HasLiveOnEntry) { + if (Defs.size() == 1) { + const auto *ValueOp = (*Defs.begin())->getValueOperand(); + if (!llvm::isa(ValueOp) && + !definitelyContainsNoPointer(ValueOp)) { + VC.addAlias(Ld, getVariable(ValueOp, Strategy)); + return; } + } - auto LoadObj = getVariable(Ld, Strategy); - for (const auto *Def : Defs) { - handleOperand(Def->getValueOperand(), [&](const auto *ValOp) { - Strategy.onAddEdge(getVariable(ValOp, Strategy), LoadObj, - Assign{}, Ld); - }); - } + auto LoadObj = getVariable(Ld, Strategy); + bool AddedAny = false; + for (const auto *Def : Defs) { + handleOperand(Def->getValueOperand(), [&](const auto *ValOp) { + Strategy.onAddEdge(getVariable(ValOp, Strategy), LoadObj, Assign{}, + Ld); + AddedAny = true; + }); + } + + if (AddedAny) { return; } + // All reaching stores have non-pointer value operands: + // fall through to addEdge. } } diff --git a/lib/PhasarLLVM/Pointer/MemSSAUtils.cpp b/lib/PhasarLLVM/Pointer/MemSSAUtils.cpp new file mode 100644 index 0000000000..90c9839578 --- /dev/null +++ b/lib/PhasarLLVM/Pointer/MemSSAUtils.cpp @@ -0,0 +1,68 @@ +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/PhasarLLVM/Pointer/MemSSAUtils.h" + +#include "phasar/Utils/Utilities.h" + +using namespace psr; + +MemSSABundle::MemSSABundle(llvm::Function &F, + const llvm::TargetLibraryInfo *TLI) + : AC(F), DT(F), + BAA(F.getParent()->getDataLayout(), F, assertNotNull(TLI), AC, &DT), + AA([](const auto *TLI, auto *BAA) { + llvm::AAResults AA(*TLI); + AA.addAAResult(*BAA); + return AA; + }(TLI, &BAA)), + MSSA(F, &AA, &DT) {} + +bool psr::collectReachingDefs( + llvm::MemoryAccess *MA, const llvm::MemorySSA &MSSA, + llvm::SmallPtrSetImpl &ReachingDefs, + llvm::SmallPtrSetImpl &Visited) { + if (!Visited.insert(MA).second) { + return false; + } + if (MSSA.isLiveOnEntryDef(MA)) { + return true; + } + if (auto *Def = llvm::dyn_cast(MA)) { + // We only care about stores for now + if (const auto *St = + llvm::dyn_cast(Def->getMemoryInst())) { + ReachingDefs.insert(St); + return false; + } + return true; + } + if (auto *Phi = llvm::dyn_cast(MA)) { + for (const auto &Inc : Phi->incoming_values()) { + bool LOE = collectReachingDefs(llvm::cast(Inc.get()), + MSSA, ReachingDefs, Visited); + if (LOE) { + return true; + } + } + } + return false; +} + +bool psr::collectReachingDefs( + const llvm::LoadInst *Load, llvm::MemorySSA &MSSA, + llvm::SmallPtrSetImpl &ReachingDefs) { + if (auto *Access = MSSA.getMemoryAccess(Load)) { + auto *Clobber = MSSA.getWalker()->getClobberingMemoryAccess(Access); + llvm::SmallPtrSet Visited; + return collectReachingDefs(Clobber, MSSA, ReachingDefs, Visited); + } + + return true; +} From 9eeb88a1753d22a78ef8e2bab537a7897a9d205d Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 4 Jun 2026 17:48:53 +0200 Subject: [PATCH 28/36] Add other lightweight alias oracles from LLVM to MemSSABundle for slightly better precision --- include/phasar/PhasarLLVM/Pointer/MemSSAUtils.h | 4 ++++ lib/PhasarLLVM/Pointer/MemSSAUtils.cpp | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/phasar/PhasarLLVM/Pointer/MemSSAUtils.h b/include/phasar/PhasarLLVM/Pointer/MemSSAUtils.h index 443f2f5de8..f9fd5aa5c9 100644 --- a/include/phasar/PhasarLLVM/Pointer/MemSSAUtils.h +++ b/include/phasar/PhasarLLVM/Pointer/MemSSAUtils.h @@ -14,7 +14,9 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" @@ -26,6 +28,8 @@ namespace psr { struct MemSSABundle { llvm::AssumptionCache AC; llvm::DominatorTree DT; + llvm::TypeBasedAAResult TBAA; + llvm::ScopedNoAliasAAResult SNA; llvm::BasicAAResult BAA; llvm::AAResults AA; llvm::MemorySSA MSSA; diff --git a/lib/PhasarLLVM/Pointer/MemSSAUtils.cpp b/lib/PhasarLLVM/Pointer/MemSSAUtils.cpp index 90c9839578..c56e9d6db9 100644 --- a/lib/PhasarLLVM/Pointer/MemSSAUtils.cpp +++ b/lib/PhasarLLVM/Pointer/MemSSAUtils.cpp @@ -15,13 +15,15 @@ using namespace psr; MemSSABundle::MemSSABundle(llvm::Function &F, const llvm::TargetLibraryInfo *TLI) - : AC(F), DT(F), + : AC(F), DT(F), TBAA(), SNA(), BAA(F.getParent()->getDataLayout(), F, assertNotNull(TLI), AC, &DT), - AA([](const auto *TLI, auto *BAA) { + AA([](const auto *TLI, auto *TBAA, auto *SNA, auto *BAA) { llvm::AAResults AA(*TLI); + AA.addAAResult(*TBAA); + AA.addAAResult(*SNA); AA.addAAResult(*BAA); return AA; - }(TLI, &BAA)), + }(TLI, &TBAA, &SNA, &BAA)), MSSA(F, &AA, &DT) {} bool psr::collectReachingDefs( From b499c67f051b886f17cc4272e587fecc491dde67 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 4 Jun 2026 18:02:57 +0200 Subject: [PATCH 29/36] Remove stale commented-out debug print in handleCall Co-Authored-By: Claude Sonnet 4.6 --- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index 586915f08f..1543867b09 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -973,9 +973,6 @@ struct [[clang::internal_linkage]] AndersenOTFSolver::SolverData { .CSRetVal = CSRetVal, }; resolveStructVCall(Rec); - // llvm::errs() << "[handleCall]: Adding struct-vcall-record #" - // << UnresolvedStructVCalls.size() << " at " - // << llvmIRToString(C) << '\n'; UnresolvedStructVCalls.push_back(std::move(Rec)); return; } From 8d527c645429b533184bf94644ed1cd7df0e6d2f Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 4 Jun 2026 18:21:17 +0200 Subject: [PATCH 30/36] pre-commit --- .../phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h | 5 ++--- .../phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h | 11 +++++------ lib/PhasarLLVM/Utils/LLVMShorthands.cpp | 8 +++----- .../pointers/andersen_otf_fp_struct_field.c | 6 ++---- .../llvm_test_code/pointers/andersen_otf_merge_load.c | 4 +--- .../pointers/andersen_otf_struct_vtable.c | 7 +++++-- 6 files changed, 18 insertions(+), 23 deletions(-) diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index 12b91c72c0..fe97e30657 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -58,9 +58,8 @@ getVFTIndexAndVT(const llvm::CallBase *CallSite); /// /// Returns \c {base_ptr, all_GEP_indices, gep_source_elem_ty} on match, /// or \c std::nullopt otherwise. -[[nodiscard]] std::optional< - std::tuple, - llvm::Type *>> +[[nodiscard]] std::optional, llvm::Type *>> getStructVCallInfo(const llvm::CallBase *CallSite); /// Assuming that `CallSite` is a call to a non-static member function, diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h b/include/phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h index 12e5fee836..59ec22d22c 100644 --- a/include/phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h +++ b/include/phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h @@ -40,8 +40,8 @@ struct GlobalInitCache { /// from \p Const. \p GetVar maps an \c llvm::Value* to a \c ValueId /// (typically \c getOrInsertVar). template GetVarFn> - [[nodiscard]] llvm::ArrayRef - getOrCreate(const llvm::Constant *Const, GetVarFn &&GetVar) { + [[nodiscard]] llvm::ArrayRef getOrCreate(const llvm::Constant *Const, + GetVarFn &&GetVar) { if (definitelyContainsNoPointer(Const)) { return {}; } @@ -58,8 +58,8 @@ struct GlobalInitCache { if (const auto *CGep = llvm::dyn_cast(Const)) { // TODO: Properly handle constant GEPs - return getOrCreate( - llvm::cast(CGep->getPointerOperand()), GetVar); + return getOrCreate(llvm::cast(CGep->getPointerOperand()), + GetVar); } if (Const->getType()->isPointerTy()) { @@ -71,8 +71,7 @@ struct GlobalInitCache { if (const auto *Agg = llvm::dyn_cast(Const)) { if (Agg->getType()->isArrayTy() && - definitelyContainsNoPointer( - Agg->getType()->getArrayElementType())) { + definitelyContainsNoPointer(Agg->getType()->getArrayElementType())) { return {}; } for (size_t I = 0, N = Agg->getNumOperands(); I < N; ++I) { diff --git a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp index d6859370c1..9f8f2bbca8 100644 --- a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp +++ b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp @@ -747,12 +747,10 @@ const llvm::DIType *psr::stripPointerTypes(const llvm::DIType *DITy) { return DITy; } -const llvm::Function * -psr::walkConstInitPath(const llvm::Constant *Init, - llvm::ArrayRef Indices) { +const llvm::Function *psr::walkConstInitPath(const llvm::Constant *Init, + llvm::ArrayRef Indices) { if (Indices.empty()) { - return llvm::dyn_cast( - Init->stripPointerCastsAndAliases()); + return llvm::dyn_cast(Init->stripPointerCastsAndAliases()); } const uint64_t Idx0 = Indices[0]; const llvm::Constant *Elem = nullptr; diff --git a/test/llvm_test_code/pointers/andersen_otf_fp_struct_field.c b/test/llvm_test_code/pointers/andersen_otf_fp_struct_field.c index 1566578d20..ab2414b2cf 100644 --- a/test/llvm_test_code/pointers/andersen_otf_fp_struct_field.c +++ b/test/llvm_test_code/pointers/andersen_otf_fp_struct_field.c @@ -8,12 +8,10 @@ struct Ctx { static void *target(void *arg) { return arg; } -static void init_ctx(struct Ctx *ctx, void *(*fn)(void *)) { - ctx->fn = fn; -} +static void init_ctx(struct Ctx *ctx, void *(*fn)(void *)) { ctx->fn = fn; } static void *do_call(struct Ctx *ctx, void *arg) { - return ctx->fn(arg); // indirect call via struct field + return ctx->fn(arg); // indirect call via struct field } int main(void) { diff --git a/test/llvm_test_code/pointers/andersen_otf_merge_load.c b/test/llvm_test_code/pointers/andersen_otf_merge_load.c index 1645247649..5a12a1afc5 100644 --- a/test/llvm_test_code/pointers/andersen_otf_merge_load.c +++ b/test/llvm_test_code/pointers/andersen_otf_merge_load.c @@ -7,9 +7,7 @@ static int *h(int **p) { return *p; } -static int *f(int **p) { - return h(p); -} +static int *f(int **p) { return h(p); } int main() { int x = 0; diff --git a/test/llvm_test_code/pointers/andersen_otf_struct_vtable.c b/test/llvm_test_code/pointers/andersen_otf_struct_vtable.c index 0c25016a20..b5c8d3beae 100644 --- a/test/llvm_test_code/pointers/andersen_otf_struct_vtable.c +++ b/test/llvm_test_code/pointers/andersen_otf_struct_vtable.c @@ -6,9 +6,12 @@ static int myRead(void *ctx) { return 0; } static int myWrite(void *ctx, int v) { return v; } -struct Ops { int (*read)(void *); int (*write)(void *, int); }; +struct Ops { + int (*read)(void *); + int (*write)(void *, int); +}; -static const struct Ops myOps = { myRead, myWrite }; +static const struct Ops myOps = {myRead, myWrite}; int dispatch(const struct Ops *ops, void *ctx, int v) { return ops->write(ctx, v); From af9e847dc78c018e29c652f35305d5c7c162d945 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 4 Jun 2026 18:30:57 +0200 Subject: [PATCH 31/36] Fix compilation with LLVM > 19 --- lib/PhasarLLVM/Pointer/MemSSAUtils.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/PhasarLLVM/Pointer/MemSSAUtils.cpp b/lib/PhasarLLVM/Pointer/MemSSAUtils.cpp index c56e9d6db9..0d1f024ef9 100644 --- a/lib/PhasarLLVM/Pointer/MemSSAUtils.cpp +++ b/lib/PhasarLLVM/Pointer/MemSSAUtils.cpp @@ -15,7 +15,12 @@ using namespace psr; MemSSABundle::MemSSABundle(llvm::Function &F, const llvm::TargetLibraryInfo *TLI) - : AC(F), DT(F), TBAA(), SNA(), + : AC(F), DT(F), TBAA( +#if LLVM_VERSION_MAJOR > 19 + /*UsingTypeSanitizer=*/false +#endif + ), + SNA(), BAA(F.getParent()->getDataLayout(), F, assertNotNull(TLI), AC, &DT), AA([](const auto *TLI, auto *TBAA, auto *SNA, auto *BAA) { llvm::AAResults AA(*TLI); @@ -24,7 +29,8 @@ MemSSABundle::MemSSABundle(llvm::Function &F, AA.addAAResult(*BAA); return AA; }(TLI, &TBAA, &SNA, &BAA)), - MSSA(F, &AA, &DT) {} + MSSA(F, &AA, &DT) { +} bool psr::collectReachingDefs( llvm::MemoryAccess *MA, const llvm::MemorySSA &MSSA, From 886dac8df30faf5310781ee5aac4567b6df53d10 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 4 Jun 2026 18:42:40 +0200 Subject: [PATCH 32/36] Add missing parts from phasarllvm/pointer to C++20 module --- include/phasar/PhasarLLVM/Pointer.h | 6 ++++ lib/PhasarLLVM/ControlFlow/ControlFlow.cppm | 1 + lib/PhasarLLVM/Pointer/Pointer.cppm | 38 ++++++++++++++------- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/include/phasar/PhasarLLVM/Pointer.h b/include/phasar/PhasarLLVM/Pointer.h index be473838bc..336d28a97c 100644 --- a/include/phasar/PhasarLLVM/Pointer.h +++ b/include/phasar/PhasarLLVM/Pointer.h @@ -12,10 +12,16 @@ #include "phasar/Config/phasar-config.h" // for PHASAR_USE_SVF #include "phasar/PhasarLLVM/Pointer/AliasAnalysisView.h" +#include "phasar/PhasarLLVM/Pointer/AndersenOTFAA.h" #include "phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h" +#include "phasar/PhasarLLVM/Pointer/LLVMPointsToInfo.h" #include "phasar/PhasarLLVM/Pointer/LLVMPointsToUtils.h" +#include "phasar/PhasarLLVM/Pointer/LLVMUnionFindAA.h" +#include "phasar/PhasarLLVM/Pointer/LLVMUnionFindAliasSet.h" +#include "phasar/PhasarLLVM/Pointer/MemSSAUtils.h" #ifdef PHASAR_USE_SVF #include "phasar/PhasarLLVM/Pointer/SVF/SVFPointsToSet.h" diff --git a/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm b/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm index 629bd45daf..fcba61a7e5 100644 --- a/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm +++ b/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm @@ -26,6 +26,7 @@ using psr::getEntryFunctionsMut; using psr::getNonPureVirtualVFTEntry; using psr::getReceiverType; using psr::getReceiverTypeName; +using psr::getStructVCallInfo; using psr::getVFTIndex; using psr::GlobalCtorsDtorsModel; using psr::ICFGBase; diff --git a/lib/PhasarLLVM/Pointer/Pointer.cppm b/lib/PhasarLLVM/Pointer/Pointer.cppm index 92661ac141..4ee1f0d16e 100644 --- a/lib/PhasarLLVM/Pointer/Pointer.cppm +++ b/lib/PhasarLLVM/Pointer/Pointer.cppm @@ -1,33 +1,47 @@ module; -#include "phasar/Config/phasar-config.h" -#include "phasar/PhasarLLVM/Pointer/AliasAnalysisView.h" -#include "phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h" -#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" -#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" -#include "phasar/PhasarLLVM/Pointer/LLVMAliasSetData.h" -#include "phasar/PhasarLLVM/Pointer/LLVMPointsToInfo.h" -#include "phasar/PhasarLLVM/Pointer/LLVMPointsToUtils.h" - -#ifdef PHASAR_USE_SVF -#include "phasar/PhasarLLVM/Pointer/SVF/SVFPointsToSet.h" -#endif +#include "phasar/PhasarLLVM/Pointer.h" export module phasar.llvm.pointer; export namespace psr { using psr::AliasAnalysisView; using psr::AliasInfoTraits; +using psr::AndersenOTFResult; +using psr::AndersenOTFSolver; +using psr::collectReachingDefs; +using psr::computeAndersenOTF; +using psr::computeAndersenOTFRaw; +using psr::computeBotCtxIndSensUnionFindAA; +using psr::computeBotCtxIndSensUnionFindAARaw; +using psr::computeBotCtxSensUnionFindAA; +using psr::computeBotCtxSensUnionFindAARaw; +using psr::computeCtxIndSensUnionFindAA; +using psr::computeCtxIndSensUnionFindAARaw; +using psr::computeCtxSensUnionFindAA; +using psr::computeCtxSensUnionFindAARaw; +using psr::computeIndSensUnionFindAA; +using psr::computeIndSensUnionFindAARaw; +using psr::computeUnionFindAA; +using psr::computeUnionFindAARaw; using psr::FilteredLLVMAliasSet; using psr::FunctionAliasView; +using psr::GlobalInitCache; using psr::isInterestingPointer; using psr::LLVMAliasInfo; using psr::LLVMAliasInfoRef; using psr::LLVMAliasIteratorRef; using psr::LLVMAliasSet; using psr::LLVMAliasSetData; +using psr::LLVMLocalUnionFindAliasIterator; +using psr::LLVMLocalUnionFindAliasIteratorMixin; using psr::LLVMPointsToIterator; using psr::LLVMPointsToIteratorRef; +using psr::llvmUnionFindAliasHandler; +using psr::LLVMUnionFindAliasIterator; +using psr::LLVMUnionFindAliasIteratorMixin; +using psr::MemSSABundle; +using psr::pag::LLVMCGProvider; #ifdef PHASAR_USE_SVF using psr::createLLVMSVFPointsToIterator; From 175025a816196390a4fd4541166b67646a844a90 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 4 Jun 2026 18:49:10 +0200 Subject: [PATCH 33/36] Let AI fix a LLVM-version compatibility issue in AndersenOTFAATest --- .../PhasarLLVM/Pointer/AndersenOTFAATest.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp index 2d36c6e188..d941b3e4d3 100644 --- a/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp +++ b/unittests/PhasarLLVM/Pointer/AndersenOTFAATest.cpp @@ -919,16 +919,19 @@ TEST(AndersenOTFAATest, MergeLoadConstraint) { // h->f->h cycle; h returns *p. // ret(h) must alias x and y after h(&px) and h(&py) (Bug 1 soundness). const TSL RetH = TSL(RetVal{.InFunction = "h"}); + // Operand 1 (pointer) of "int x = 0" / "int y = 0" stores — stable across + // LLVM versions (unlike the px/py initialization stores whose debug + // location moved from first-use to declaration site between LLVM 16 and 22). const TSL VarX = - TSL(OperandOf{.OperandIndex = 0, - .Inst = LineColFunOp{.Line = 17, - .Col = 8, + TSL(OperandOf{.OperandIndex = 1, + .Inst = LineColFunOp{.Line = 13, + .Col = 7, .InFunction = "main", .OpCode = llvm::Instruction::Store}}); const TSL VarY = - TSL(OperandOf{.OperandIndex = 0, - .Inst = LineColFunOp{.Line = 18, - .Col = 8, + TSL(OperandOf{.OperandIndex = 1, + .Inst = LineColFunOp{.Line = 14, + .Col = 7, .InFunction = "main", .OpCode = llvm::Instruction::Store}}); const GTMap ExpectedResults = { From 8d71110feffd4957cfdbbcb4a6567fd3f2f0b2d0 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 4 Jun 2026 19:22:39 +0200 Subject: [PATCH 34/36] Fix dependency between phasar_llvm_controlflow and phasar_llvm_pointer --- .../ControlFlow/Resolver/Resolver.h | 29 +--- .../PhasarLLVM/Utils/VirtualCallUtils.h | 52 +++++++ .../ControlFlow/Resolver/Resolver.cpp | 128 ----------------- lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp | 2 +- lib/PhasarLLVM/Pointer/CMakeLists.txt | 1 + lib/PhasarLLVM/Utils/VirtualCallUtils.cpp | 135 ++++++++++++++++++ 6 files changed, 190 insertions(+), 157 deletions(-) create mode 100644 include/phasar/PhasarLLVM/Utils/VirtualCallUtils.h create mode 100644 lib/PhasarLLVM/Utils/VirtualCallUtils.cpp diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index fe97e30657..fbbc70d21f 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -18,16 +18,15 @@ #define PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_RESOLVER_H_ #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/PhasarLLVM/Utils/VirtualCallUtils.h" #include "phasar/Utils/MaybeUniquePtr.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/IR/DerivedTypes.h" #include #include #include -#include namespace llvm { class Instruction; @@ -42,26 +41,6 @@ class LLVMVFTableProvider; class DIBasedTypeHierarchy; enum class CallGraphAnalysisType; -/// Assuming that `CallSite` is a virtual call through a vtable, retrieves the -/// index in the vtable of the virtual function called. -[[nodiscard]] std::optional -getVFTIndex(const llvm::CallBase *CallSite); - -/// Similar to getVFTIndex(), but also returns a pointer to the vtable -[[nodiscard]] std::optional> -getVFTIndexAndVT(const llvm::CallBase *CallSite); - -/// Detects the pattern \c call(load(GEP(base, const_indices...))) with a -/// typed (>=3-operand) GEP, i.e. an indirect call through a struct function -/// pointer field. Distinct from the 2-operand raw-pointer C++ vptr case -/// handled by \c getVFTIndexAndVT. -/// -/// Returns \c {base_ptr, all_GEP_indices, gep_source_elem_ty} on match, -/// or \c std::nullopt otherwise. -[[nodiscard]] std::optional, llvm::Type *>> -getStructVCallInfo(const llvm::CallBase *CallSite); - /// Assuming that `CallSite` is a call to a non-static member function, /// retrieves the type of the receiver. Returns nullptr, if the receiver-type /// could not be extracted @@ -76,12 +55,6 @@ getReceiverType(const llvm::CallBase *CallSite); [[nodiscard]] std::string getReceiverTypeName(const llvm::CallBase *CallSite); -/// Checks whether the signature of `DestFun` matches the required withature of -/// `CallSite`, such that `DestFun` qualifies as callee-candidate, if `CallSite` -/// is an indirect/virtual call. -[[nodiscard]] bool isConsistentCall(const llvm::CallBase *CallSite, - const llvm::Function *DestFun); - [[nodiscard]] bool isVirtualCall(const llvm::Instruction *Inst, const LLVMVFTableProvider &VTP); diff --git a/include/phasar/PhasarLLVM/Utils/VirtualCallUtils.h b/include/phasar/PhasarLLVM/Utils/VirtualCallUtils.h new file mode 100644 index 0000000000..9cd766ff74 --- /dev/null +++ b/include/phasar/PhasarLLVM/Utils/VirtualCallUtils.h @@ -0,0 +1,52 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "llvm/ADT/SmallVector.h" + +#include +#include +#include + +namespace llvm { +class CallBase; +class Value; +class Type; +class Function; +} // namespace llvm + +namespace psr { + +/// Assuming that `CallSite` is a virtual call through a vtable, retrieves the +/// index in the vtable of the virtual function called. +[[nodiscard]] std::optional +getVFTIndex(const llvm::CallBase *CallSite); + +/// Similar to getVFTIndex(), but also returns a pointer to the vtable +[[nodiscard]] std::optional> +getVFTIndexAndVT(const llvm::CallBase *CallSite); + +/// Detects the pattern \c call(load(GEP(base, const_indices...))) with a +/// typed (>=3-operand) GEP, i.e. an indirect call through a struct function +/// pointer field. Distinct from the 2-operand raw-pointer C++ vptr case +/// handled by \c getVFTIndexAndVT. +/// +/// Returns \c {base_ptr, all_GEP_indices, gep_source_elem_ty} on match, +/// or \c std::nullopt otherwise. +[[nodiscard]] std::optional, llvm::Type *>> +getStructVCallInfo(const llvm::CallBase *CallSite); + +/// Checks whether the signature of `DestFun` matches the required withature of +/// `CallSite`, such that `DestFun` qualifies as callee-candidate, if `CallSite` +/// is an indirect/virtual call. +[[nodiscard]] bool isConsistentCall(const llvm::CallBase *CallSite, + const llvm::Function *DestFun); +} // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index 1014145452..c4a59d60ab 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -49,50 +49,6 @@ using namespace psr; -std::optional psr::getVFTIndex(const llvm::CallBase *CallSite) { - // deal with a virtual member function - // retrieve the vtable entry that is called - const auto *Load = - llvm::dyn_cast(CallSite->getCalledOperand()); - if (Load == nullptr) { - return std::nullopt; - } - const auto *GEP = - llvm::dyn_cast(Load->getPointerOperand()); - if (GEP == nullptr) { - return std::nullopt; - } - if (auto *CI = llvm::dyn_cast(GEP->getOperand(1))) { - return CI->getZExtValue(); - } - return std::nullopt; -} - -std::optional> -psr::getVFTIndexAndVT(const llvm::CallBase *CallSite) { - // deal with a virtual member function - // retrieve the vtable entry that is called - const auto *Load = - llvm::dyn_cast(CallSite->getCalledOperand()); - if (Load == nullptr) { - return std::nullopt; - } - - const auto *GEP = - llvm::dyn_cast(Load->getPointerOperand()); - // Vtable GEPs index into a pointer array with a single index. - // Multi-index GEPs (e.g. struct field access) are not vtable patterns. - if (GEP == nullptr || GEP->getNumOperands() != 2) { - return std::nullopt; - } - - if (auto *CI = llvm::dyn_cast(GEP->getOperand(1))) { - return {{GEP->getPointerOperand(), CI->getZExtValue()}}; - } - - return std::nullopt; -} - const llvm::DIType *psr::getReceiverType(const llvm::CallBase *CallSite) { if (!CallSite || CallSite->arg_empty() || (CallSite->hasStructRetAttr() && CallSite->arg_size() < 2)) { @@ -143,69 +99,6 @@ std::string psr::getReceiverTypeName(const llvm::CallBase *CallSite) { return ""; } -bool psr::isConsistentCall(const llvm::CallBase *CallSite, - const llvm::Function *DestFun) { - if (CallSite->arg_size() < DestFun->arg_size()) { - return false; - } - if (CallSite->arg_size() != DestFun->arg_size() && !DestFun->isVarArg()) { - return false; - } - - for (const auto &[Param, ArgOp] : - llvm::zip_first(DestFun->args(), CallSite->args())) { - - const auto *ParamTy = Param.getType(); - const auto *ArgTy = ArgOp->getType(); - - if (ParamTy == ArgTy) { - // Trivial equality - continue; - } - - if (ParamTy->getTypeID() != ArgTy->getTypeID()) { - // Trivial non-equality, e.g. PointerType and IntegerType - return false; - } - - if (ParamTy->isPointerTy()) { - if (Param.hasByValAttr() != - CallSite->isByValArgument(ArgOp.getOperandNo())) { - return false; - } - - const auto *ParamSRetTy = Param.getParamStructRetType(); - const auto *ArgSRetTy = - CallSite->getParamStructRetType(ArgOp.getOperandNo()); - if ((ParamSRetTy != nullptr) != (ArgSRetTy != nullptr)) { - return false; - } - - if (ParamSRetTy && ArgSRetTy) { - // TODO: For better precision, compare the sret types as well - // Trivial non-equality, e.g. PointerType and IntegerType - if (ParamSRetTy->getTypeID() != ArgSRetTy->getTypeID()) { - // Trivial non-equality, e.g. PointerType and IntegerType - return false; - } - } - } - - if (ParamTy->isStructTy()) { - // Copied comment from struct-case in isTypeMatchForFunctionArgument(): - // > Well, we could do sanity checks here, but if the analysed code is - // > insane we would miss callees, so we don't do that. - - continue; - } - - // Types are non-equal and we could not find a reason to treat the same - return false; - } - - return true; -} - bool psr::isVirtualCall(const llvm::Instruction *Inst, const LLVMVFTableProvider &VTP) { assert(Inst != nullptr); @@ -372,24 +265,3 @@ Resolver::create(CallGraphAnalysisType Ty, const LLVMProjectIRDB *IRDB, llvm_unreachable("All possible callgraph algorithms should be handled in the " "above switch"); } - -std::optional, - llvm::Type *>> -psr::getStructVCallInfo(const llvm::CallBase *CallSite) { - const auto *Load = - llvm::dyn_cast(CallSite->getCalledOperand()); - if (!Load) { - return std::nullopt; - } - const auto *GEP = - llvm::dyn_cast(Load->getPointerOperand()); - if (!GEP || GEP->getNumOperands() < 3 || !GEP->hasAllConstantIndices()) { - return std::nullopt; - } - llvm::SmallVector Indices; - for (const llvm::Use &Idx : GEP->indices()) { - Indices.push_back(llvm::cast(Idx.get())->getZExtValue()); - } - return {{GEP->getPointerOperand(), std::move(Indices), - GEP->getSourceElementType()}}; -} diff --git a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp index 1543867b09..b8f9118e43 100644 --- a/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp +++ b/lib/PhasarLLVM/Pointer/AndersenOTFAA.cpp @@ -9,7 +9,6 @@ #include "phasar/PhasarLLVM/Pointer/AndersenOTFAA.h" -#include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h" #include "phasar/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.h" @@ -18,6 +17,7 @@ #include "phasar/PhasarLLVM/TypeHierarchy/LLVMVFTable.h" #include "phasar/PhasarLLVM/Utils/LLVMFunctionDataFlowFacts.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/PhasarLLVM/Utils/VirtualCallUtils.h" #include "phasar/Utils/IotaIterator.h" #include "phasar/Utils/LibCSummary.h" #include "phasar/Utils/LibrarySummary.h" diff --git a/lib/PhasarLLVM/Pointer/CMakeLists.txt b/lib/PhasarLLVM/Pointer/CMakeLists.txt index 1736a41820..bb01878905 100644 --- a/lib/PhasarLLVM/Pointer/CMakeLists.txt +++ b/lib/PhasarLLVM/Pointer/CMakeLists.txt @@ -9,6 +9,7 @@ add_phasar_library(phasar_llvm_pointer phasar_controlflow phasar_llvm_utils phasar_llvm_db + phasar_llvm_typehierarchy LLVM_LINK_COMPONENTS Core diff --git a/lib/PhasarLLVM/Utils/VirtualCallUtils.cpp b/lib/PhasarLLVM/Utils/VirtualCallUtils.cpp new file mode 100644 index 0000000000..6134e61917 --- /dev/null +++ b/lib/PhasarLLVM/Utils/VirtualCallUtils.cpp @@ -0,0 +1,135 @@ +#include "phasar/PhasarLLVM/Utils/VirtualCallUtils.h" + +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" + +using namespace psr; + +std::optional psr::getVFTIndex(const llvm::CallBase *CallSite) { + // deal with a virtual member function + // retrieve the vtable entry that is called + const auto *Load = + llvm::dyn_cast(CallSite->getCalledOperand()); + if (Load == nullptr) { + return std::nullopt; + } + const auto *GEP = + llvm::dyn_cast(Load->getPointerOperand()); + if (GEP == nullptr) { + return std::nullopt; + } + if (auto *CI = llvm::dyn_cast(GEP->getOperand(1))) { + return CI->getZExtValue(); + } + return std::nullopt; +} + +std::optional> +psr::getVFTIndexAndVT(const llvm::CallBase *CallSite) { + // deal with a virtual member function + // retrieve the vtable entry that is called + const auto *Load = + llvm::dyn_cast(CallSite->getCalledOperand()); + if (Load == nullptr) { + return std::nullopt; + } + + const auto *GEP = + llvm::dyn_cast(Load->getPointerOperand()); + // Vtable GEPs index into a pointer array with a single index. + // Multi-index GEPs (e.g. struct field access) are not vtable patterns. + if (GEP == nullptr || GEP->getNumOperands() != 2) { + return std::nullopt; + } + + if (auto *CI = llvm::dyn_cast(GEP->getOperand(1))) { + return {{GEP->getPointerOperand(), CI->getZExtValue()}}; + } + + return std::nullopt; +} + +std::optional, + llvm::Type *>> +psr::getStructVCallInfo(const llvm::CallBase *CallSite) { + const auto *Load = + llvm::dyn_cast(CallSite->getCalledOperand()); + if (!Load) { + return std::nullopt; + } + const auto *GEP = + llvm::dyn_cast(Load->getPointerOperand()); + if (!GEP || GEP->getNumOperands() < 3 || !GEP->hasAllConstantIndices()) { + return std::nullopt; + } + llvm::SmallVector Indices; + for (const llvm::Use &Idx : GEP->indices()) { + Indices.push_back(llvm::cast(Idx.get())->getZExtValue()); + } + return {{GEP->getPointerOperand(), std::move(Indices), + GEP->getSourceElementType()}}; +} + +bool psr::isConsistentCall(const llvm::CallBase *CallSite, + const llvm::Function *DestFun) { + if (CallSite->arg_size() < DestFun->arg_size()) { + return false; + } + if (CallSite->arg_size() != DestFun->arg_size() && !DestFun->isVarArg()) { + return false; + } + + for (const auto &[Param, ArgOp] : + llvm::zip_first(DestFun->args(), CallSite->args())) { + + const auto *ParamTy = Param.getType(); + const auto *ArgTy = ArgOp->getType(); + + if (ParamTy == ArgTy) { + // Trivial equality + continue; + } + + if (ParamTy->getTypeID() != ArgTy->getTypeID()) { + // Trivial non-equality, e.g. PointerType and IntegerType + return false; + } + + if (ParamTy->isPointerTy()) { + if (Param.hasByValAttr() != + CallSite->isByValArgument(ArgOp.getOperandNo())) { + return false; + } + + const auto *ParamSRetTy = Param.getParamStructRetType(); + const auto *ArgSRetTy = + CallSite->getParamStructRetType(ArgOp.getOperandNo()); + if ((ParamSRetTy != nullptr) != (ArgSRetTy != nullptr)) { + return false; + } + + if (ParamSRetTy && ArgSRetTy) { + // TODO: For better precision, compare the sret types as well + // Trivial non-equality, e.g. PointerType and IntegerType + if (ParamSRetTy->getTypeID() != ArgSRetTy->getTypeID()) { + // Trivial non-equality, e.g. PointerType and IntegerType + return false; + } + } + } + + if (ParamTy->isStructTy()) { + // Copied comment from struct-case in isTypeMatchForFunctionArgument(): + // > Well, we could do sanity checks here, but if the analysed code is + // > insane we would miss callees, so we don't do that. + + continue; + } + + // Types are non-equal and we could not find a reason to treat the same + return false; + } + + return true; +} \ No newline at end of file From 718cc0b703945f5477238363860676efd606dd25 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 4 Jun 2026 19:28:26 +0200 Subject: [PATCH 35/36] pre-commit --- lib/PhasarLLVM/Utils/VirtualCallUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/PhasarLLVM/Utils/VirtualCallUtils.cpp b/lib/PhasarLLVM/Utils/VirtualCallUtils.cpp index 6134e61917..87cd85b0dc 100644 --- a/lib/PhasarLLVM/Utils/VirtualCallUtils.cpp +++ b/lib/PhasarLLVM/Utils/VirtualCallUtils.cpp @@ -132,4 +132,4 @@ bool psr::isConsistentCall(const llvm::CallBase *CallSite, } return true; -} \ No newline at end of file +} From d902f2ad0482029d22eb2eb4edda9e02d98675b5 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 4 Jun 2026 20:00:15 +0200 Subject: [PATCH 36/36] Fix CRoaring install --- CMakeLists.txt | 9 +++++++-- Config.cmake.in | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 948bff787e..7579e6292d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -343,8 +343,13 @@ include(add_llvm) add_llvm() # Roaring -set(ENABLE_ROARING_TESTS OFF) -add_subdirectory(external/CRoaring EXCLUDE_FROM_ALL) + +find_package(roaring QUIET) +if(NOT TARGET roaring::roaring) + set(ENABLE_ROARING_TESTS OFF) + add_subdirectory(external/CRoaring) + set(PHASAR_PROVIDE_CROARING ON) +endif() # SVF option(PHASAR_USE_SVF "Use SVF for more options in alias analysis (default is OFF)" OFF) diff --git a/Config.cmake.in b/Config.cmake.in index 085a277031..ffdc52fbaa 100644 --- a/Config.cmake.in +++ b/Config.cmake.in @@ -15,6 +15,14 @@ set(PHASAR_USE_LLVM_FAT_LIB @USE_LLVM_FAT_LIB@) set(PHASAR_BUILD_DYNLIB @PHASAR_BUILD_DYNLIB@) set(PHASAR_USE_Z3 @PHASAR_USE_Z3@) set(PHASAR_BUILD_MODULES @PHASAR_BUILD_MODULES@) +set(PHASAR_PROVIDE_CROARING @PHASAR_PROVIDE_CROARING@) + +if (PHASAR_PROVIDE_CROARING) + # TODO: Is that path portable? + include("${CMAKE_CURRENT_LIST_DIR}/../roaring/roaring-targets.cmake") +else() + find_dependency(roaring) +endif() if (PHASAR_USE_Z3) find_dependency(Z3 REQUIRED)