diff --git a/inst/include/TreeDist/lap_impl.h b/inst/include/TreeDist/lap_impl.h index a639ec62..9508e444 100644 --- a/inst/include/TreeDist/lap_impl.h +++ b/inst/include/TreeDist/lap_impl.h @@ -13,7 +13,17 @@ // Original algorithm: // R. Jonker and A. Volgenant, "A Shortest Augmenting Path Algorithm // for Dense and Sparse Linear Assignment Problems," Computing 38, -// 325-340, 1987. +// 325-340, 1987. Reference code lap.cpp v1.0 (4 Sept 1996) by +// Roy Jonker, MagicLogic Optimization Inc. +// +// Provenance: +// CHANGED 2025-08-01 by Martin Smith to +// optimize performance. +// CHANGED 2020-01-01 by Martin Smith for integration with R. +// CHANGED 2016-05-13 by Yong Yang in the column +// reduction part, per the MATLAB LAPJV implementation +// (Copyright (c) 2010, Yi Cao, all rights reserved). + #ifdef TREEDIST_LAP_IMPLEMENTATION @@ -105,7 +115,6 @@ cost lap(const lap_row dim, } // AUGMENTING ROW REDUCTION - auto& col_list = scratch.col_list; int loopcnt = 0; do { diff --git a/src/lap.cpp b/src/lap.cpp index 025ad2a4..c7cc8322 100644 --- a/src/lap.cpp +++ b/src/lap.cpp @@ -1,40 +1,13 @@ /************************************************************************ * - * lap.cpp - version 1.0 - 4 September 1996 - author: Roy Jonker @ MagicLogic Optimization Inc. - e-mail: roy_jonker@magiclogic.com - - Code for Linear Assignment Problem, according to - - "A Shortest Augmenting Path Algorithm for Dense and Sparse Linear - Assignment Problems," Computing 38, 325-340, 1987 - - by - - R. Jonker and A. Volgenant, University of Amsterdam. - + * lap.cpp — R entry point (lapjv) for the Linear Assignment Problem. * - CHANGED 2025-08-01 by Martin Smith to optimize - performance. - CHANGED 2020-01-01 by Martin Smith for - integration with R. - CHANGED 2016-05-13 by Yong Yang(yongyanglink@gmail.com) in column reduction - part according to matlab version of LAPJV algorithm - https://github.com/yongyanghz/LAPJV-algorithm-c/blob/master/src/lap.cpp - (Copyright (c) 2010, Yi Cao All rights reserved)-- - https://www.mathworks.com/matlabcentral/fileexchange/26836-lapjv-jonker-volgenant-algorithm-for-linear-assignment-problem-v3-0: + * The lap() algorithm itself lives in inst/include/TreeDist/lap_impl.h, + * the single source of truth shared with downstream LinkingTo packages. + * See that header for the Jonker–Volgenant attribution and provenance. * *************************************************************************/ -// NOTE: The LAP hot loops are highly sensitive to instruction alignment and -// register allocation, which are affected by the TU's full include graph. -// Do NOT include lap_impl.h here — that header is for downstream LinkingTo -// consumers only. TreeDist's own lap() is compiled directly in this file -// to preserve the codegen context that was profiled and tuned. -// -// If the algorithm changes, update BOTH this file and lap_impl.h. - #include "lap.h" #include @@ -100,258 +73,8 @@ Rcpp::List lapjv(Rcpp::NumericMatrix &x, Rcpp::NumericVector &maxX) { ); } -inline bool nontrivially_less_than(cost a, cost b) noexcept { - return a + ((a > ROUND_PRECISION) ? 8 : 0) < b; -} - -/* This function is the jv shortest augmenting path algorithm to solve the - assignment problem */ -namespace TreeDist { - -#if defined(__GNUC__) && !defined(__clang__) -__attribute__((optimize("align-functions=64", "align-loops=16"))) -#endif -cost lap(const lap_row dim, - CostMatrix &input_cost, - std::vector &rowsol, - std::vector &colsol, - const bool allow_interrupt, - LapScratch &scratch) -{ - lap_row num_free = 0; - scratch.ensure(dim); - auto& v = scratch.v; - auto& matches = scratch.matches; - // matches must start at zero for the column-reduction counter - std::fill(matches.begin(), matches.begin() + dim, 0); - const cost* __restrict__ v_ptr = v.data(); - - // COLUMN REDUCTION - for (lap_col j = dim; j--; ) { // Reverse order gives better results. - - const auto [min, imin] = input_cost.findColMin(j); - v[j] = min; - ++matches[imin]; - - if (matches[imin] == 1) { - // Init assignment if minimum row assigned for first time. - rowsol[imin] = j; - colsol[j] = imin; - } else if (v_ptr[j] < v_ptr[rowsol[imin]]) { - const lap_col j1 = rowsol[imin]; - rowsol[imin] = j; - colsol[j] = imin; - colsol[j1] = -1; - } else { - colsol[j] = -1; // Row already assigned, column not assigned. - } - } - - // REDUCTION TRANSFER - auto& freeunassigned = scratch.freeunassigned; // List of unassigned rows. - - for (lap_row i = 0; i < dim; ++i) { - if (matches[i] == 0) { - // Fill list of unassigned 'free' rows. - freeunassigned[num_free++] = i; - } else if (matches[i] == 1) { - // Transfer reduction from rows that are assigned once. - const lap_col j1 = rowsol[i]; - const cost* row_i = input_cost.row(i); - cost min_cost; - if (j1 == 0) { - // Just worth the trouble to initialize with a realistic value - min_cost = row_i[1] - v_ptr[1]; - - for (lap_col j = 2; j < dim; ++j) { - const cost reduced_cost = row_i[j] - v_ptr[j]; - if (reduced_cost < min_cost) { - min_cost = reduced_cost; - } - } - } else { - min_cost = row_i[0] - v_ptr[0]; - - for (lap_col j = 1; j < dim; ++j) { - if (j == j1) continue; - - const cost reduced_cost = row_i[j] - v_ptr[j]; - if (reduced_cost < min_cost) { - min_cost = reduced_cost; - } - } - } - v[j1] -= min_cost; - } - } - - // AUGMENTING ROW REDUCTION - int loopcnt = 0; // do-loop to be done twice. - - do { - ++loopcnt; - - // Scan all free rows. - // In some cases, a free row may be replaced with another one to be - // scanned next. - lap_row previous_num_free = num_free; - num_free = 0; // Start list of rows still free after augmenting - // row reduction. - lap_row k = 0; - while (k < previous_num_free) { - // Find minimum and second minimum reduced cost over columns. - const lap_row i = freeunassigned[k++]; - const auto [umin, usubmin, min_idx, j2] = input_cost.findRowSubmin(&i, v); - lap_col j1 = min_idx; - - lap_row i0 = colsol[j1]; - const bool strictly_less = nontrivially_less_than(umin, usubmin); - if (strictly_less) { - // Change the reduction of the minimum column to increase the minimum - // reduced cost in the row to the subminimum. - v[j1] -= (usubmin - umin); - } else if (i0 > -1) { - // Minimum and subminimum equal. - // Minimum column j1 is assigned. - // Swap columns j1 and j2, as j2 may be unassigned. - j1 = j2; - i0 = colsol[j2]; - } - - // (Re-)assign i to j1, possibly de-assigning an i0. - rowsol[i] = j1; - colsol[j1] = i; - - if (i0 > -1) { // Minimum column j1 assigned earlier. - if (strictly_less) { - // Put in current k, and go back to that k. - // Continue augmenting path i - j1 with i0. - freeunassigned[--k] = i0; - if (allow_interrupt) Rcpp::checkUserInterrupt(); - } else { - // No further augmenting reduction possible. - // Store i0 in list of free rows for next phase. - freeunassigned[num_free++] = i0; - } - } - } - } while (loopcnt < 2); // Repeat once. - - // AUGMENT SOLUTION for each free row. - // Restrict-qualified local pointers enable the compiler to avoid - // reloads after stores in the Dijkstra inner loop. - cost* __restrict__ d_ptr = scratch.d.data(); - lap_row* __restrict__ pred_ptr = scratch.predecessor.data(); - lap_col* __restrict__ cl_ptr = scratch.col_list.data(); - - for (lap_row f = 0; f < num_free; ++f) { - bool unassignedfound = false; - lap_row free_row = freeunassigned[f]; // Start row of augmenting path. - const cost* __restrict__ free_row_cost = input_cost.row(free_row); - lap_col endofpath = 0; - lap_col last = 0; - lap_row i; - lap_col j1; - - // Dijkstra shortest path algorithm. - // Runs until unassigned column added to shortest path tree. - for (lap_col j = 0; j < dim; ++j) { - d_ptr[j] = free_row_cost[j] - v_ptr[j]; - pred_ptr[j] = free_row; - cl_ptr[j] = j; // Init column list. - } - - cost min = 0; - lap_col low = 0; // Columns in 0..low-1 are ready, now none. - lap_col up = 0; // Columns in low..up-1 are to be scanned for current minimum, now none. - // Columns in up..dim-1 are to be considered later to find new minimum; - // at this stage the list simply contains all columns. - - do { - if (up == low) { // No more columns to be scanned for current minimum. - last = low - 1; - - // Scan columns for up..dim-1 to find all indices for which new minimum occurs. - // Store these indices between low..up-1 (increasing up). - min = d_ptr[cl_ptr[up++]]; - - for (lap_dim k = up; k < dim; ++k) { - const lap_col j = cl_ptr[k]; - const cost h = d_ptr[j]; - if (h <= min) { - if (h < min) { // New minimum. - up = low; // Restart list at index low. - min = h; - } - // New index with same minimum, put on undex up, and extend list. - cl_ptr[k] = cl_ptr[up]; - cl_ptr[up++] = j; - } - } - // Check if any of the minimum columns happens to be unassigned. - // If so, we have an augmenting path right away. - for (lap_dim k = low; k < up; ++k) { - if (colsol[cl_ptr[k]] < 0) { - endofpath = cl_ptr[k]; - unassignedfound = true; - break; - } - } - } - - if (!unassignedfound) { - // Update 'distances' between free_row and all unscanned columns, - // via next scanned column. - j1 = cl_ptr[low++]; - i = colsol[j1]; - const cost* __restrict__ row_i = input_cost.row(i); - const cost h = row_i[j1] - v_ptr[j1] - min; - - for (lap_dim k = up; k < dim; ++k) { - const lap_col j = cl_ptr[k]; - cost v2 = row_i[j] - v_ptr[j] - h; - if (v2 < d_ptr[j]) { - pred_ptr[j] = i; - if (v2 == min) { // New column found at same minimum value - if (colsol[j] < 0) { - // If unassigned, shortest augmenting path is complete. - endofpath = j; - unassignedfound = true; - break; - } else { - // Else add to list to be scanned right away. - cl_ptr[k] = cl_ptr[up]; - cl_ptr[up++] = j; - } - } - d_ptr[j] = v2; - } - } - } - } while (!unassignedfound); - - // Update column prices. - for(lap_dim k = 0; k <= last; ++k) { - j1 = cl_ptr[k]; - v[j1] += d_ptr[j1] - min; - } - - // Reset row and column assignments along the alternating path. - do { - i = pred_ptr[endofpath]; - colsol[endofpath] = i; - j1 = endofpath; - endofpath = rowsol[i]; - rowsol[i] = j1; - } while (i != free_row); - } - - // Calculate optimal cost. - cost lapcost = 0; - for (lap_dim i = 0; i < dim; ++i) { - lapcost += input_cost(i, rowsol[i]); - } - - return lapcost; -} -} // namespace TreeDist +// Pull in the one true definition of TreeDist::lap(). Wire the interrupt +// macro to Rcpp so the package keeps its user-interrupt checks. +#define TREEDIST_CHECK_INTERRUPT() Rcpp::checkUserInterrupt() +#define TREEDIST_LAP_IMPLEMENTATION +#include