Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions inst/include/TreeDist/lap_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,17 @@
// Original algorithm:
// R. Jonker and A. Volgenant, "A Shortest Augmenting Path Algorithm
// for Dense and Sparse Linear Assignment Problems," Computing 38,
// 325-340, 1987.
// 325-340, 1987. Reference code lap.cpp v1.0 (4 Sept 1996) by
// Roy Jonker, MagicLogic Optimization Inc.
//
// Provenance:
// CHANGED 2025-08-01 by Martin Smith <martin.smith@durham.ac.uk> to
// optimize performance.
// CHANGED 2020-01-01 by Martin Smith for integration with R.
// CHANGED 2016-05-13 by Yong Yang <yongyanglink@gmail.com> in the column
// reduction part, per the MATLAB LAPJV implementation
// (Copyright (c) 2010, Yi Cao, all rights reserved).


#ifdef TREEDIST_LAP_IMPLEMENTATION

Expand Down Expand Up @@ -105,7 +115,6 @@ cost lap(const lap_row dim,
}

// AUGMENTING ROW REDUCTION
auto& col_list = scratch.col_list;
int loopcnt = 0;

do {
Expand Down
295 changes: 9 additions & 286 deletions src/lap.cpp
Original file line number Diff line number Diff line change
@@ -1,40 +1,13 @@
/************************************************************************
*
* lap.cpp
version 1.0 - 4 September 1996
author: Roy Jonker @ MagicLogic Optimization Inc.
e-mail: roy_jonker@magiclogic.com

Code for Linear Assignment Problem, according to

"A Shortest Augmenting Path Algorithm for Dense and Sparse Linear
Assignment Problems," Computing 38, 325-340, 1987

by

R. Jonker and A. Volgenant, University of Amsterdam.

* lap.cpp — R entry point (lapjv) for the Linear Assignment Problem.
*
CHANGED 2025-08-01 by Martin Smith <martin.smith@durham.ac.uk> to optimize
performance.
CHANGED 2020-01-01 by Martin Smith <martin.smith@durham.ac.uk> for
integration with R.
CHANGED 2016-05-13 by Yong Yang(yongyanglink@gmail.com) in column reduction
part according to matlab version of LAPJV algorithm
https://github.com/yongyanghz/LAPJV-algorithm-c/blob/master/src/lap.cpp
(Copyright (c) 2010, Yi Cao All rights reserved)--
https://www.mathworks.com/matlabcentral/fileexchange/26836-lapjv-jonker-volgenant-algorithm-for-linear-assignment-problem-v3-0:
* The lap() algorithm itself lives in inst/include/TreeDist/lap_impl.h,
* the single source of truth shared with downstream LinkingTo packages.
* See that header for the Jonker–Volgenant attribution and provenance.
*
*************************************************************************/

// NOTE: The LAP hot loops are highly sensitive to instruction alignment and
// register allocation, which are affected by the TU's full include graph.
// Do NOT include lap_impl.h here — that header is for downstream LinkingTo
// consumers only. TreeDist's own lap() is compiled directly in this file
// to preserve the codegen context that was profiled and tuned.
//
// If the algorithm changes, update BOTH this file and lap_impl.h.

#include "lap.h"
#include <Rcpp/Lightest>

Expand Down Expand Up @@ -100,258 +73,8 @@ Rcpp::List lapjv(Rcpp::NumericMatrix &x, Rcpp::NumericVector &maxX) {
);
}

inline bool nontrivially_less_than(cost a, cost b) noexcept {
return a + ((a > ROUND_PRECISION) ? 8 : 0) < b;
}

/* This function is the jv shortest augmenting path algorithm to solve the
assignment problem */
namespace TreeDist {

#if defined(__GNUC__) && !defined(__clang__)
__attribute__((optimize("align-functions=64", "align-loops=16")))
#endif
cost lap(const lap_row dim,
CostMatrix &input_cost,
std::vector<lap_col> &rowsol,
std::vector<lap_row> &colsol,
const bool allow_interrupt,
LapScratch &scratch)
{
lap_row num_free = 0;
scratch.ensure(dim);
auto& v = scratch.v;
auto& matches = scratch.matches;
// matches must start at zero for the column-reduction counter
std::fill(matches.begin(), matches.begin() + dim, 0);
const cost* __restrict__ v_ptr = v.data();

// COLUMN REDUCTION
for (lap_col j = dim; j--; ) { // Reverse order gives better results.

const auto [min, imin] = input_cost.findColMin(j);
v[j] = min;
++matches[imin];

if (matches[imin] == 1) {
// Init assignment if minimum row assigned for first time.
rowsol[imin] = j;
colsol[j] = imin;
} else if (v_ptr[j] < v_ptr[rowsol[imin]]) {
const lap_col j1 = rowsol[imin];
rowsol[imin] = j;
colsol[j] = imin;
colsol[j1] = -1;
} else {
colsol[j] = -1; // Row already assigned, column not assigned.
}
}

// REDUCTION TRANSFER
auto& freeunassigned = scratch.freeunassigned; // List of unassigned rows.

for (lap_row i = 0; i < dim; ++i) {
if (matches[i] == 0) {
// Fill list of unassigned 'free' rows.
freeunassigned[num_free++] = i;
} else if (matches[i] == 1) {
// Transfer reduction from rows that are assigned once.
const lap_col j1 = rowsol[i];
const cost* row_i = input_cost.row(i);
cost min_cost;
if (j1 == 0) {
// Just worth the trouble to initialize with a realistic value
min_cost = row_i[1] - v_ptr[1];

for (lap_col j = 2; j < dim; ++j) {
const cost reduced_cost = row_i[j] - v_ptr[j];
if (reduced_cost < min_cost) {
min_cost = reduced_cost;
}
}
} else {
min_cost = row_i[0] - v_ptr[0];

for (lap_col j = 1; j < dim; ++j) {
if (j == j1) continue;

const cost reduced_cost = row_i[j] - v_ptr[j];
if (reduced_cost < min_cost) {
min_cost = reduced_cost;
}
}
}
v[j1] -= min_cost;
}
}

// AUGMENTING ROW REDUCTION
int loopcnt = 0; // do-loop to be done twice.

do {
++loopcnt;

// Scan all free rows.
// In some cases, a free row may be replaced with another one to be
// scanned next.
lap_row previous_num_free = num_free;
num_free = 0; // Start list of rows still free after augmenting
// row reduction.
lap_row k = 0;
while (k < previous_num_free) {
// Find minimum and second minimum reduced cost over columns.
const lap_row i = freeunassigned[k++];
const auto [umin, usubmin, min_idx, j2] = input_cost.findRowSubmin(&i, v);
lap_col j1 = min_idx;

lap_row i0 = colsol[j1];
const bool strictly_less = nontrivially_less_than(umin, usubmin);
if (strictly_less) {
// Change the reduction of the minimum column to increase the minimum
// reduced cost in the row to the subminimum.
v[j1] -= (usubmin - umin);
} else if (i0 > -1) {
// Minimum and subminimum equal.
// Minimum column j1 is assigned.
// Swap columns j1 and j2, as j2 may be unassigned.
j1 = j2;
i0 = colsol[j2];
}

// (Re-)assign i to j1, possibly de-assigning an i0.
rowsol[i] = j1;
colsol[j1] = i;

if (i0 > -1) { // Minimum column j1 assigned earlier.
if (strictly_less) {
// Put in current k, and go back to that k.
// Continue augmenting path i - j1 with i0.
freeunassigned[--k] = i0;
if (allow_interrupt) Rcpp::checkUserInterrupt();
} else {
// No further augmenting reduction possible.
// Store i0 in list of free rows for next phase.
freeunassigned[num_free++] = i0;
}
}
}
} while (loopcnt < 2); // Repeat once.

// AUGMENT SOLUTION for each free row.
// Restrict-qualified local pointers enable the compiler to avoid
// reloads after stores in the Dijkstra inner loop.
cost* __restrict__ d_ptr = scratch.d.data();
lap_row* __restrict__ pred_ptr = scratch.predecessor.data();
lap_col* __restrict__ cl_ptr = scratch.col_list.data();

for (lap_row f = 0; f < num_free; ++f) {
bool unassignedfound = false;
lap_row free_row = freeunassigned[f]; // Start row of augmenting path.
const cost* __restrict__ free_row_cost = input_cost.row(free_row);
lap_col endofpath = 0;
lap_col last = 0;
lap_row i;
lap_col j1;

// Dijkstra shortest path algorithm.
// Runs until unassigned column added to shortest path tree.
for (lap_col j = 0; j < dim; ++j) {
d_ptr[j] = free_row_cost[j] - v_ptr[j];
pred_ptr[j] = free_row;
cl_ptr[j] = j; // Init column list.
}

cost min = 0;
lap_col low = 0; // Columns in 0..low-1 are ready, now none.
lap_col up = 0; // Columns in low..up-1 are to be scanned for current minimum, now none.
// Columns in up..dim-1 are to be considered later to find new minimum;
// at this stage the list simply contains all columns.

do {
if (up == low) { // No more columns to be scanned for current minimum.
last = low - 1;

// Scan columns for up..dim-1 to find all indices for which new minimum occurs.
// Store these indices between low..up-1 (increasing up).
min = d_ptr[cl_ptr[up++]];

for (lap_dim k = up; k < dim; ++k) {
const lap_col j = cl_ptr[k];
const cost h = d_ptr[j];
if (h <= min) {
if (h < min) { // New minimum.
up = low; // Restart list at index low.
min = h;
}
// New index with same minimum, put on undex up, and extend list.
cl_ptr[k] = cl_ptr[up];
cl_ptr[up++] = j;
}
}
// Check if any of the minimum columns happens to be unassigned.
// If so, we have an augmenting path right away.
for (lap_dim k = low; k < up; ++k) {
if (colsol[cl_ptr[k]] < 0) {
endofpath = cl_ptr[k];
unassignedfound = true;
break;
}
}
}

if (!unassignedfound) {
// Update 'distances' between free_row and all unscanned columns,
// via next scanned column.
j1 = cl_ptr[low++];
i = colsol[j1];
const cost* __restrict__ row_i = input_cost.row(i);
const cost h = row_i[j1] - v_ptr[j1] - min;

for (lap_dim k = up; k < dim; ++k) {
const lap_col j = cl_ptr[k];
cost v2 = row_i[j] - v_ptr[j] - h;
if (v2 < d_ptr[j]) {
pred_ptr[j] = i;
if (v2 == min) { // New column found at same minimum value
if (colsol[j] < 0) {
// If unassigned, shortest augmenting path is complete.
endofpath = j;
unassignedfound = true;
break;
} else {
// Else add to list to be scanned right away.
cl_ptr[k] = cl_ptr[up];
cl_ptr[up++] = j;
}
}
d_ptr[j] = v2;
}
}
}
} while (!unassignedfound);

// Update column prices.
for(lap_dim k = 0; k <= last; ++k) {
j1 = cl_ptr[k];
v[j1] += d_ptr[j1] - min;
}

// Reset row and column assignments along the alternating path.
do {
i = pred_ptr[endofpath];
colsol[endofpath] = i;
j1 = endofpath;
endofpath = rowsol[i];
rowsol[i] = j1;
} while (i != free_row);
}

// Calculate optimal cost.
cost lapcost = 0;
for (lap_dim i = 0; i < dim; ++i) {
lapcost += input_cost(i, rowsol[i]);
}

return lapcost;
}
} // namespace TreeDist
// Pull in the one true definition of TreeDist::lap(). Wire the interrupt
// macro to Rcpp so the package keeps its user-interrupt checks.
#define TREEDIST_CHECK_INTERRUPT() Rcpp::checkUserInterrupt()
#define TREEDIST_LAP_IMPLEMENTATION
#include <TreeDist/lap_impl.h>
Loading