Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
8908155
Initial vide-coded impl of andersen analysis with OTF CG construction
fabianbs96 Apr 23, 2026
c59d591
Add online cycle detection
fabianbs96 Apr 23, 2026
d06b6ec
Merge branch 'development' into f-AndersOTFAA
fabianbs96 Apr 29, 2026
ace582c
Fix reference invalidation, missing retroactive firing, and arg alias…
fabianbs96 Apr 29, 2026
c244b9f
Merge branch 'development' into f-AndersOTFAA
fabianbs96 May 3, 2026
8bba47d
Vibe code some tests + identify bug that converts many alias sets int…
fabianbs96 May 3, 2026
87641c6
Reduce the size of AndersenVar by half (sth the AI apparently could n…
fabianbs96 May 3, 2026
a289f5f
Add AndersenOTF tests for deep chains, recursion, and function pointers
fabianbs96 May 14, 2026
c8c0260
Perf improvement in AndersOTFAA
fabianbs96 May 19, 2026
992604d
Vibe-code delta propagation
fabianbs96 May 19, 2026
8f3f88e
Let AI write more tests
fabianbs96 May 19, 2026
4619bcb
Handle global initializers
fabianbs96 May 20, 2026
0f54781
Reduce unnecessary copies
fabianbs96 May 20, 2026
532c621
Fix globals + fnptr handling
fabianbs96 May 26, 2026
4789a3a
Better vtable handling
fabianbs96 May 27, 2026
ed0b6cb
Fix minor bug in vtable handling + add failing test case for too earl…
fabianbs96 May 27, 2026
057076a
Let claude fix the early fixpoint bug
fabianbs96 May 27, 2026
76020c1
minor
fabianbs96 May 27, 2026
a1be493
minor
fabianbs96 May 28, 2026
fd3e394
Expose call-graph built by AndersenOTFAA + add some configurable soun…
fabianbs96 May 28, 2026
c130387
Debug missing callees in AndersenOTFAA
fabianbs96 May 30, 2026
b47acc1
minor perf improvement
fabianbs96 May 30, 2026
098d60b
minor
fabianbs96 May 30, 2026
c8621e7
Add library-summary handling to AndersenOTFAA.
fabianbs96 Jun 3, 2026
9876945
Best-effort approach to more precisely handle calls through hand-roll…
fabianbs96 Jun 3, 2026
ec5b5a3
Small deduplication
fabianbs96 Jun 4, 2026
d96da4a
Small manual refactorings
fabianbs96 Jun 4, 2026
8f03c60
minor in test
fabianbs96 Jun 4, 2026
b0d6a79
Merge branch 'development' into f-AndersOTFAA
fabianbs96 Jun 4, 2026
86cdd07
Add MemorySSA to AndersenOTFAA + let AI debug a soundness-bug.
fabianbs96 Jun 4, 2026
9eeb88a
Add other lightweight alias oracles from LLVM to MemSSABundle for sli…
fabianbs96 Jun 4, 2026
b499c67
Remove stale commented-out debug print in handleCall
fabianbs96 Jun 4, 2026
8d527c6
pre-commit
fabianbs96 Jun 4, 2026
af9e847
Fix compilation with LLVM > 19
fabianbs96 Jun 4, 2026
886dac8
Add missing parts from phasarllvm/pointer to C++20 module
fabianbs96 Jun 4, 2026
175025a
Let AI fix a LLVM-version compatibility issue in AndersenOTFAATest
fabianbs96 Jun 4, 2026
8d71110
Fix dependency between phasar_llvm_controlflow and phasar_llvm_pointer
fabianbs96 Jun 4, 2026
718cc0b
pre-commit
fabianbs96 Jun 4, 2026
d902f2a
Fix CRoaring install
fabianbs96 Jun 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@
[submodule "external/json-schema-validator"]
path = external/json-schema-validator
url = https://github.com/pboettch/json-schema-validator.git
[submodule "external/CRoaring"]
path = external/CRoaring
url = https://github.com/fabianbs96/CRoaring.git
12 changes: 12 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ set(RELEASE_CONFIGURATIONS RELWITHDEBINFO RELEASE CACHE INTERNAL "" FORCE)

string(APPEND CMAKE_CXX_FLAGS " -MP -fstack-protector-strong -ffunction-sections -fdata-sections -pipe")
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer")
string(APPEND CMAKE_C_FLAGS_DEBUG " -fno-omit-frame-pointer")
string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -fno-omit-frame-pointer")
string(APPEND CMAKE_C_FLAGS_RELWITHDEBINFO " -fno-omit-frame-pointer")
string(APPEND CMAKE_CXX_FLAGS_RELEASE "")

option(CMAKE_VISIBILITY_INLINES_HIDDEN "Hide inlined functions from the DSO table (default ON)" ON)
Expand Down Expand Up @@ -123,6 +125,7 @@ if (NOT "${PHASAR_TARGET_ARCH_INTERNAL}" STREQUAL "")
if (MARCH_SUPPORTED)
message(STATUS "Target architecture '${PHASAR_TARGET_ARCH_INTERNAL}' enabled")
string(APPEND CMAKE_CXX_FLAGS_RELEASE " -march=${PHASAR_TARGET_ARCH_INTERNAL}")
string(APPEND CMAKE_C_FLAGS_RELEASE " -march=${PHASAR_TARGET_ARCH_INTERNAL}")
else()
message(WARNING "Target architecture '${PHASAR_TARGET_ARCH_INTERNAL}' not supported. Fallback to generic build")
endif()
Expand Down Expand Up @@ -339,6 +342,15 @@ set(PHASAR_LLVM_VERSION 16 CACHE STRING "The LLVM major-version that PhASAR shou
include(add_llvm)
add_llvm()

# Roaring

find_package(roaring QUIET)
if(NOT TARGET roaring::roaring)
set(ENABLE_ROARING_TESTS OFF)
add_subdirectory(external/CRoaring)
set(PHASAR_PROVIDE_CROARING ON)
endif()

# SVF
option(PHASAR_USE_SVF "Use SVF for more options in alias analysis (default is OFF)" OFF)
if(PHASAR_USE_SVF)
Expand Down
8 changes: 8 additions & 0 deletions Config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ set(PHASAR_USE_LLVM_FAT_LIB @USE_LLVM_FAT_LIB@)
set(PHASAR_BUILD_DYNLIB @PHASAR_BUILD_DYNLIB@)
set(PHASAR_USE_Z3 @PHASAR_USE_Z3@)
set(PHASAR_BUILD_MODULES @PHASAR_BUILD_MODULES@)
set(PHASAR_PROVIDE_CROARING @PHASAR_PROVIDE_CROARING@)

if (PHASAR_PROVIDE_CROARING)
# TODO: Is that path portable?
include("${CMAKE_CURRENT_LIST_DIR}/../roaring/roaring-targets.cmake")
else()
find_dependency(roaring)
endif()

if (PHASAR_USE_Z3)
find_dependency(Z3 REQUIRED)
Expand Down
1 change: 1 addition & 0 deletions external/CRoaring
Submodule CRoaring added at 5505f1
17 changes: 1 addition & 16 deletions include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
#define PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_RESOLVER_H_

#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h"
#include "phasar/PhasarLLVM/Utils/VirtualCallUtils.h"
#include "phasar/Utils/MaybeUniquePtr.h"

#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/DerivedTypes.h"

#include <memory>
#include <optional>
Expand All @@ -41,15 +41,6 @@ class LLVMVFTableProvider;
class DIBasedTypeHierarchy;
enum class CallGraphAnalysisType;

/// Assuming that `CallSite` is a virtual call through a vtable, retrieves the
/// index in the vtable of the virtual function called.
[[nodiscard]] std::optional<unsigned>
getVFTIndex(const llvm::CallBase *CallSite);

/// Similar to getVFTIndex(), but also returns a pointer to the vtable
[[nodiscard]] std::optional<std::pair<const llvm::Value *, uint64_t>>
getVFTIndexAndVT(const llvm::CallBase *CallSite);

/// Assuming that `CallSite` is a call to a non-static member function,
/// retrieves the type of the receiver. Returns nullptr, if the receiver-type
/// could not be extracted
Expand All @@ -64,12 +55,6 @@ getReceiverType(const llvm::CallBase *CallSite);

[[nodiscard]] std::string getReceiverTypeName(const llvm::CallBase *CallSite);

/// Checks whether the signature of `DestFun` matches the required withature of
/// `CallSite`, such that `DestFun` qualifies as callee-candidate, if `CallSite`
/// is an indirect/virtual call.
[[nodiscard]] bool isConsistentCall(const llvm::CallBase *CallSite,
const llvm::Function *DestFun);

[[nodiscard]] bool isVirtualCall(const llvm::Instruction *Inst,
const LLVMVFTableProvider &VTP);

Expand Down
6 changes: 6 additions & 0 deletions include/phasar/PhasarLLVM/Pointer.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,16 @@

#include "phasar/Config/phasar-config.h" // for PHASAR_USE_SVF
#include "phasar/PhasarLLVM/Pointer/AliasAnalysisView.h"
#include "phasar/PhasarLLVM/Pointer/AndersenOTFAA.h"
#include "phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h"
#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h"
#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h"
#include "phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h"
#include "phasar/PhasarLLVM/Pointer/LLVMPointsToInfo.h"
#include "phasar/PhasarLLVM/Pointer/LLVMPointsToUtils.h"
#include "phasar/PhasarLLVM/Pointer/LLVMUnionFindAA.h"
#include "phasar/PhasarLLVM/Pointer/LLVMUnionFindAliasSet.h"
#include "phasar/PhasarLLVM/Pointer/MemSSAUtils.h"

#ifdef PHASAR_USE_SVF
#include "phasar/PhasarLLVM/Pointer/SVF/SVFPointsToSet.h"
Expand Down
113 changes: 113 additions & 0 deletions include/phasar/PhasarLLVM/Pointer/AndersenOTFAA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#pragma once

/******************************************************************************
* Copyright (c) 2026 Fabian Schiebel.
* All rights reserved. This program and the accompanying materials are made
* available under the terms of LICENSE.txt.
*
* Contributors:
* Fabian Schiebel and others
*****************************************************************************/

#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h"
#include "phasar/PhasarLLVM/Pointer/LLVMPointerAssignmentGraph.h"
#include "phasar/PhasarLLVM/Pointer/LLVMUnionFindAA.h"
#include "phasar/Pointer/RawAliasSet.h"
#include "phasar/Pointer/UnionFindAA.h"
#include "phasar/Utils/MaybeUniquePtr.h"
#include "phasar/Utils/NonNullPtr.h"
#include "phasar/Utils/Soundness.h"
#include "phasar/Utils/TypedVector.h"
#include "phasar/Utils/ValueCompressor.h"

#include "llvm/ADT/ArrayRef.h"

namespace llvm {
class Function;
} // namespace llvm

namespace psr {

class LLVMProjectIRDB;

/// Alias-analysis result for the Andersen-style OTF points-to analysis.
///
/// Two values may-alias iff their points-to sets share at least one abstract
/// object. Satisfies \c UnionFindAAResult so it can be wrapped by
/// \c LLVMUnionFindAliasIterator.
struct AndersenOTFResult {
TypedVector<ValueId, RawAliasSet<ValueId>> AliasSets;
LLVMBasedCallGraph CG;

[[nodiscard]] static constexpr bool isCached() noexcept { return true; }
[[nodiscard]] constexpr size_t size() const noexcept {
return AliasSets.size();
}

[[nodiscard]] RawAliasSet<ValueId>
getRawAliasSet(ValueId Var) const noexcept {
if (!AliasSets.inbounds(Var)) {
return {};
}
return AliasSets[Var];
}

[[nodiscard]] bool mayAlias(ValueId Var1, ValueId Var2) const noexcept {
if (Var1 == Var2) {
return true;
}
if (!AliasSets.inbounds(Var1)) {
return false;
}
return AliasSets[Var1].contains(Var2);
}
};

static_assert(UnionFindAAResult<AndersenOTFResult>);

/// Andersen-style inclusion-based points-to analysis that co-refines the call
/// graph and points-to sets in a single fixpoint.
///
/// Unlike the staged pipeline (resolver → PA), this solver owns its own
/// function-worklist loop: direct calls add callees immediately; indirect
/// calls are resolved as \c pts(fp) grows.
///
/// Phase 1: context- and field-insensitive.
class AndersenOTFSolver {
public:
explicit AndersenOTFSolver(const LLVMProjectIRDB &IRDB,
llvm::ArrayRef<const llvm::Function *> Entries,
ValueCompressor<PAGVariable> &VC,
Soundness S = Soundness::Soundy) noexcept;

/// Run the full OTF fixpoint and return the alias-analysis result.
[[nodiscard]] AndersenOTFResult solve();

private:
struct SolverData;

NonNullPtr<const LLVMProjectIRDB> IRDB;
llvm::ArrayRef<const llvm::Function *> Entries;
NonNullPtr<ValueCompressor<PAGVariable>> VC;
Soundness S;
};

// ---- Factory functions ------------------------------------------------

/// Runs the Andersen OTF fixpoint and returns the raw alias-analysis result
/// (no LLVM-value wrapping). If \p VC is null, a fresh one is allocated.
[[nodiscard]] AndersenOTFResult
computeAndersenOTFRaw(const LLVMProjectIRDB &IRDB,
llvm::ArrayRef<const llvm::Function *> EntryPoints,
MaybeUniquePtr<ValueCompressor<PAGVariable>> VC = nullptr,
Soundness S = Soundness::Soundy);

/// Runs the Andersen OTF fixpoint and returns an \c LLVMUnionFindAliasIterator
/// that implements \c IsLLVMAliasIterator.
[[nodiscard]] LLVMUnionFindAliasIterator<AndersenOTFResult>
computeAndersenOTF(const LLVMProjectIRDB &IRDB,
llvm::ArrayRef<const llvm::Function *> EntryPoints,
MaybeUniquePtr<ValueCompressor<PAGVariable>> VC = nullptr,
Soundness S = Soundness::Soundy);

} // namespace psr
91 changes: 91 additions & 0 deletions include/phasar/PhasarLLVM/Pointer/LLVMGlobalInitCache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#pragma once

/******************************************************************************
* Copyright (c) 2026 Fabian Schiebel.
* All rights reserved. This program and the accompanying materials are made
* available under the terms of LICENSE.txt.
*
* Contributors:
* Fabian Schiebel and others
*****************************************************************************/

#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h"
#include "phasar/Utils/ValueCompressor.h"

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/Casting.h"

#include <concepts>
#include <unordered_map>

namespace psr {

/// Memoised walker for global-variable pointer initializers.
///
/// Traverses a \c llvm::Constant initializer and collects the \c ValueId of
/// every pointer-typed sub-constant it contains (direct pointer, GEP base,
/// or pointer elements of an aggregate). Results are cached so shared
/// sub-expressions are not revisited.
///
/// Create one instance per analysis run; it is tied to a single
/// \c ValueCompressor via the \p GetVar callback.
struct GlobalInitCache {
std::unordered_map<const llvm::Constant *, llvm::SmallVector<ValueId, 1>>
Cache;

/// Returns the \c ValueId slice for all pointer-typed constants reachable
/// from \p Const. \p GetVar maps an \c llvm::Value* to a \c ValueId
/// (typically \c getOrInsertVar).
template <std::invocable<const llvm::Value *> GetVarFn>
[[nodiscard]] llvm::ArrayRef<ValueId> getOrCreate(const llvm::Constant *Const,
GetVarFn &&GetVar) {
if (definitelyContainsNoPointer(Const)) {
return {};
}

auto [It, Inserted] = Cache.try_emplace(Const);
if (!Inserted) {
return It->second;
}
auto &Vec = It->second;

if (llvm::isa<llvm::ConstantPointerNull>(Const)) {
return {};
}

if (const auto *CGep = llvm::dyn_cast<llvm::GEPOperator>(Const)) {
// TODO: Properly handle constant GEPs
return getOrCreate(llvm::cast<llvm::Constant>(CGep->getPointerOperand()),
GetVar);
}

if (Const->getType()->isPointerTy()) {
Vec.push_back(std::invoke(GetVar, Const));
return Vec;
}

// TODO: Get rid of the recursion

if (const auto *Agg = llvm::dyn_cast<llvm::ConstantAggregate>(Const)) {
if (Agg->getType()->isArrayTy() &&
definitelyContainsNoPointer(Agg->getType()->getArrayElementType())) {
return {};
}
for (size_t I = 0, N = Agg->getNumOperands(); I < N; ++I) {
const auto *Elem = llvm::cast<llvm::Constant>(
Agg->getAggregateElement(I)->stripPointerCastsAndAliases());
auto Sub = getOrCreate(Elem, GetVar);
Vec.append(Sub.begin(), Sub.end());
}
}

// TODO: more

return Vec;
}
};

} // namespace psr
55 changes: 55 additions & 0 deletions include/phasar/PhasarLLVM/Pointer/MemSSAUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#pragma once

/******************************************************************************
* Copyright (c) 2026 Fabian Schiebel.
* All rights reserved. This program and the accompanying materials are made
* available under the terms of LICENSE.txt.
*
* Contributors:
* Fabian Schiebel and others
*****************************************************************************/

#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"

namespace psr {

// Bundle of per-function analyses for the built-in MemorySSA provider.
// Members are declared in initialization order: each field depends only on
// the ones before it.
struct MemSSABundle {
llvm::AssumptionCache AC;
llvm::DominatorTree DT;
llvm::TypeBasedAAResult TBAA;
llvm::ScopedNoAliasAAResult SNA;
llvm::BasicAAResult BAA;
llvm::AAResults AA;
llvm::MemorySSA MSSA;

explicit MemSSABundle(llvm::Function &F, const llvm::TargetLibraryInfo *TLI);
};

/// Walks the MemorySSA def chain rooted at MA, collecting all StoreInst
/// reaching definitions into ReachingDefs.
/// Returns true if a LiveOnEntry def is reachable (value may come from outside
/// the function). In that case, ReachingDefs may be incompletely populated.
[[nodiscard]] bool collectReachingDefs(
llvm::MemoryAccess *MA, const llvm::MemorySSA &MSSA,
llvm::SmallPtrSetImpl<const llvm::StoreInst *> &ReachingDefs,
llvm::SmallPtrSetImpl<llvm::MemoryAccess *> &Visited);

/// Collects all store instructions that may define the value loaded from the
/// given load. Forwards to the above collectReachingDefs overload.
[[nodiscard]] bool collectReachingDefs(
const llvm::LoadInst *Load, llvm::MemorySSA &MSSA,
llvm::SmallPtrSetImpl<const llvm::StoreInst *> &ReachingDefs);

} // namespace psr
Loading
Loading