From eaf1d69a99269d4e3ae6e62f38df67bb1728a8f9 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 26 May 2026 21:04:33 +0200 Subject: [PATCH 1/3] StepTHn: speedup filling Replaces 4 virtual calls to GetAt / SetAt / AddAt with just one to updateBin. --- Framework/Core/include/Framework/StepTHn.h | 25 +++++++++++++++++++++- Framework/Core/src/StepTHn.cxx | 21 ++---------------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/Framework/Core/include/Framework/StepTHn.h b/Framework/Core/include/Framework/StepTHn.h index 0302f604eae39..165ad8270f206 100644 --- a/Framework/Core/include/Framework/StepTHn.h +++ b/Framework/Core/include/Framework/StepTHn.h @@ -58,7 +58,7 @@ class StepTHn : public TNamed virtual Long64_t Merge(TCollection* list) = 0; TAxis* GetAxis(int i) { return mPrototype->GetAxis(i); } - void Sumw2(){}; // TODO: added for compatibiltiy with registry, but maybe it would be useful also in StepTHn as toggle for error weights + void Sumw2() {}; // TODO: added for compatibiltiy with registry, but maybe it would be useful also in StepTHn as toggle for error weights protected: void init(); @@ -67,6 +67,7 @@ class StepTHn : public TNamed void deleteContainers(); Long64_t getGlobalBinIndex(const Int_t* binIdx); + virtual void updateBin(int iStep, Long64_t bin, double weight) = 0; Long64_t mNBins; // number of total bins Int_t mNVars; // number of variables @@ -107,6 +108,28 @@ class StepTHnT : public StepTHn } } + void updateBin(int iStep, Long64_t bin, double weight) override + { + if (!mValues[iStep]) { + mValues[iStep] = createArray(); + LOGF(info, "Created values container for step %d", iStep); + } + + if (weight != 1.) { + if (!mSumw2[iStep]) { + mSumw2[iStep] = createArray(mValues[iStep]); + LOGF(info, "Created sumw2 container for step %d", iStep); + } + } + + auto* arr = static_cast(mValues[iStep])->GetArray(); + arr[bin] += weight; + if (mSumw2[iStep]) { + auto* sw2 = static_cast(mSumw2[iStep])->GetArray(); + sw2[bin] += weight * weight; + } + } + ClassDef(StepTHnT, 1) // THn like container }; diff --git a/Framework/Core/src/StepTHn.cxx b/Framework/Core/src/StepTHn.cxx index bb0109db2c97f..ed4a544a44c44 100644 --- a/Framework/Core/src/StepTHn.cxx +++ b/Framework/Core/src/StepTHn.cxx @@ -424,7 +424,7 @@ void StepTHn::Fill(int iStep, int nParams, double positionAndWeight[]) mLastBins[i] = tmpBin; mLastVars[i] = positionAndWeight[i]; } - //Printf("%d", tmpBin); + // Printf("%d", tmpBin); // under/overflow not supported if (tmpBin < 1 || tmpBin > mNbinsCache[i]) { @@ -436,24 +436,7 @@ void StepTHn::Fill(int iStep, int nParams, double positionAndWeight[]) // Printf("%lld", bin); } - if (!mValues[iStep]) { - mValues[iStep] = createArray(); - LOGF(info, "Created values container for step %d", iStep); - } - - if (weight != 1.) { - // initialize with already filled entries (which have been filled with weight == 1), in this case mSumw2 := mValues - if (!mSumw2[iStep]) { - mSumw2[iStep] = createArray(mValues[iStep]); - LOGF(info, "Created sumw2 container for step %d", iStep); - } - } - - // TODO probably slow; add StepTHnT::add ? - mValues[iStep]->SetAt(mValues[iStep]->GetAt(bin) + weight, bin); - if (mSumw2[iStep]) { - mSumw2[iStep]->SetAt(mSumw2[iStep]->GetAt(bin) + weight * weight, bin); - } + updateBin(iStep, bin, weight); } template class StepTHnT; From a69d2a1a57a4f114a4d53ac19451a673b09243c1 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 27 May 2026 09:01:09 +0200 Subject: [PATCH 2/3] StepTHn: do a pre lookup for the bin using a uniform grid Rather than doing a O(log N) binary search for the correct bin, precompute a uniform index for a O(1) lookup and find the actual bin using a short linear search. --- Framework/Core/include/Framework/StepTHn.h | 20 +++++-- Framework/Core/src/StepTHn.cxx | 63 ++++++++++++++++++++-- 2 files changed, 74 insertions(+), 9 deletions(-) diff --git a/Framework/Core/include/Framework/StepTHn.h b/Framework/Core/include/Framework/StepTHn.h index 165ad8270f206..9efddc51cb6ae 100644 --- a/Framework/Core/include/Framework/StepTHn.h +++ b/Framework/Core/include/Framework/StepTHn.h @@ -77,10 +77,22 @@ class StepTHn : public TNamed THnBase** mTarget; //! target histogram - TAxis** mAxisCache; //! cache axis pointers (about 50% of the time in Fill is spent in GetAxis otherwise) - Int_t* mNbinsCache; //! cache Nbins per axis - Double_t* mLastVars; //! caching of last used bins (in many loops some vars are the same for a while) - Int_t* mLastBins; //! caching of last used bins (in many loops some vars are the same for a while) + TAxis** mAxisCache; //! cache axis pointers (about 50% of the time in Fill is spent in GetAxis otherwise) + Int_t* mNbinsCache; //! cache Nbins per axis + Double_t* mLastVars; //! caching of last used bins (in many loops some vars are the same for a while) + Int_t* mLastBins; //! caching of last used bins (in many loops some vars are the same for a while) + + // Fast bin lookup table: for each axis, maps a quantized position to an approximate bin. + static constexpr Int_t kLookupSize = 1024; // number of slots per axis + struct AxisLookup { + Double_t invSlotWidth; // 1.0 / slot width for fast index computation + Double_t xmin; // axis minimum + Double_t xmax; // axis maximum + const Double_t* edges; // pointer to bin edges array (nBins+1 entries) + Int_t nBins; // number of bins + Int_t table[kLookupSize]; // slot -> bin index (1-based, TAxis convention) + }; + AxisLookup* mLookup; //! per-axis lookup tables THnSparse* mPrototype; // not filled used as prototype histogram for axis functionality etc. diff --git a/Framework/Core/src/StepTHn.cxx b/Framework/Core/src/StepTHn.cxx index ed4a544a44c44..f916cbf2eff8a 100644 --- a/Framework/Core/src/StepTHn.cxx +++ b/Framework/Core/src/StepTHn.cxx @@ -39,6 +39,7 @@ StepTHn::StepTHn() : mNBins(0), mNbinsCache(nullptr), mLastVars(nullptr), mLastBins(nullptr), + mLookup(nullptr), mPrototype(nullptr) { // Default constructor (for streaming) @@ -55,6 +56,7 @@ StepTHn::StepTHn(const Char_t* name, const Char_t* title, const Int_t nSteps, co mNbinsCache(nullptr), mLastVars(nullptr), mLastBins(nullptr), + mLookup(nullptr), mPrototype(nullptr) { // Constructor @@ -130,6 +132,7 @@ StepTHn::StepTHn(const StepTHn& c) : mNBins(c.mNBins), mNbinsCache(nullptr), mLastVars(nullptr), mLastBins(nullptr), + mLookup(nullptr), mPrototype(nullptr) { // @@ -152,6 +155,7 @@ StepTHn::~StepTHn() delete[] mNbinsCache; delete[] mLastVars; delete[] mLastBins; + delete[] mLookup; delete mPrototype; } @@ -392,13 +396,35 @@ void StepTHn::Fill(int iStep, int nParams, double positionAndWeight[]) LOGF(fatal, "Fill called with invalid number of parameters (%d vs %d)", mNVars, nParams); } - // fill axis cache + // fill axis cache and build lookup tables on first call if (!mAxisCache) { mAxisCache = new TAxis*[mNVars]; mNbinsCache = new Int_t[mNVars]; + mLookup = new AxisLookup[mNVars]; for (Int_t i = 0; i < mNVars; i++) { mAxisCache[i] = mPrototype->GetAxis(i); mNbinsCache[i] = mAxisCache[i]->GetNbins(); + + // Build lookup table for this axis + auto& lut = mLookup[i]; + lut.nBins = mNbinsCache[i]; + lut.edges = mAxisCache[i]->GetXbins()->GetArray(); + lut.xmin = mAxisCache[i]->GetXmin(); + lut.xmax = mAxisCache[i]->GetXmax(); + + if (lut.edges && lut.xmax > lut.xmin) { + // Variable-width binning: build slot->bin mapping + Double_t slotWidth = (lut.xmax - lut.xmin) / kLookupSize; + lut.invSlotWidth = 1.0 / slotWidth; + for (Int_t s = 0; s < kLookupSize; s++) { + Double_t x = lut.xmin + (s + 0.5) * slotWidth; + lut.table[s] = mAxisCache[i]->FindBin(x); + } + } else { + // Uniform binning or degenerate axis: direct formula, no table needed + lut.edges = nullptr; + lut.invSlotWidth = lut.xmax > lut.xmin ? lut.nBins / (lut.xmax - lut.xmin) : 0; + } } mLastVars = new Double_t[mNVars]; @@ -420,11 +446,39 @@ void StepTHn::Fill(int iStep, int nParams, double positionAndWeight[]) if (mLastVars[i] == positionAndWeight[i]) { tmpBin = mLastBins[i]; } else { - tmpBin = mAxisCache[i]->FindBin(positionAndWeight[i]); + const auto& lut = mLookup[i]; + Double_t x = positionAndWeight[i]; + + if (!lut.edges) { + // Uniform binning: direct computation + tmpBin = 1 + static_cast((x - lut.xmin) * lut.invSlotWidth); + if (tmpBin < 1) { + tmpBin = 0; // underflow + } else if (tmpBin > lut.nBins) { + tmpBin = lut.nBins + 1; // overflow + } + } else { + // Variable-width binning: lookup table + refine + Int_t slot = static_cast((x - lut.xmin) * lut.invSlotWidth); + if (slot < 0 || slot >= kLookupSize) { + tmpBin = (slot < 0) ? 0 : lut.nBins + 1; // under/overflow + } else { + tmpBin = lut.table[slot]; + // Refine: the lookup gives an approximate bin, check edges + // Move left if x is below the lower edge of tmpBin + while (tmpBin > 1 && x < lut.edges[tmpBin - 1]) { + tmpBin--; + } + // Move right if x is at or above the upper edge of tmpBin + while (tmpBin < lut.nBins && x >= lut.edges[tmpBin]) { + tmpBin++; + } + } + } + mLastBins[i] = tmpBin; - mLastVars[i] = positionAndWeight[i]; + mLastVars[i] = x; } - // Printf("%d", tmpBin); // under/overflow not supported if (tmpBin < 1 || tmpBin > mNbinsCache[i]) { @@ -433,7 +487,6 @@ void StepTHn::Fill(int iStep, int nParams, double positionAndWeight[]) // bins start from 0 here bin += tmpBin - 1; - // Printf("%lld", bin); } updateBin(iStep, bin, weight); From ce923dffbcd4db05de99085fd360f42ace1603b6 Mon Sep 17 00:00:00 2001 From: ALICE Action Bot Date: Wed, 27 May 2026 07:05:13 +0000 Subject: [PATCH 3/3] Please consider the following formatting changes --- Framework/Core/include/Framework/StepTHn.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Framework/Core/include/Framework/StepTHn.h b/Framework/Core/include/Framework/StepTHn.h index 9efddc51cb6ae..a1daaf50ef800 100644 --- a/Framework/Core/include/Framework/StepTHn.h +++ b/Framework/Core/include/Framework/StepTHn.h @@ -77,19 +77,19 @@ class StepTHn : public TNamed THnBase** mTarget; //! target histogram - TAxis** mAxisCache; //! cache axis pointers (about 50% of the time in Fill is spent in GetAxis otherwise) - Int_t* mNbinsCache; //! cache Nbins per axis - Double_t* mLastVars; //! caching of last used bins (in many loops some vars are the same for a while) - Int_t* mLastBins; //! caching of last used bins (in many loops some vars are the same for a while) + TAxis** mAxisCache; //! cache axis pointers (about 50% of the time in Fill is spent in GetAxis otherwise) + Int_t* mNbinsCache; //! cache Nbins per axis + Double_t* mLastVars; //! caching of last used bins (in many loops some vars are the same for a while) + Int_t* mLastBins; //! caching of last used bins (in many loops some vars are the same for a while) // Fast bin lookup table: for each axis, maps a quantized position to an approximate bin. - static constexpr Int_t kLookupSize = 1024; // number of slots per axis + static constexpr Int_t kLookupSize = 1024; // number of slots per axis struct AxisLookup { - Double_t invSlotWidth; // 1.0 / slot width for fast index computation - Double_t xmin; // axis minimum - Double_t xmax; // axis maximum - const Double_t* edges; // pointer to bin edges array (nBins+1 entries) - Int_t nBins; // number of bins + Double_t invSlotWidth; // 1.0 / slot width for fast index computation + Double_t xmin; // axis minimum + Double_t xmax; // axis maximum + const Double_t* edges; // pointer to bin edges array (nBins+1 entries) + Int_t nBins; // number of bins Int_t table[kLookupSize]; // slot -> bin index (1-based, TAxis convention) }; AxisLookup* mLookup; //! per-axis lookup tables