From ade68cf38973383438fa21e9563991b97355f4bd Mon Sep 17 00:00:00 2001 From: nakul-krishnakumar Date: Wed, 27 May 2026 13:01:08 +0530 Subject: [PATCH 1/7] feat: add `ml/strided/dkmeans-init-plus-plus` --- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown_pkg_readmes status: na - task: lint_markdown_docs status: na - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: passed - task: lint_typescript_tests status: na - task: lint_license_headers status: passed --- --- .../lib/dkmeans_init_plus_plus.js | 152 +++++++++++ .../dkmeans-init-plus-plus/lib/index.js | 94 +++++++ .../dkmeans-init-plus-plus/lib/main.js | 35 +++ .../dkmeans-init-plus-plus/lib/ndarray.js | 243 ++++++++++++++++++ 4 files changed, 524 insertions(+) create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/main.js create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js new file mode 100644 index 000000000000..1e60b8be16cc --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js @@ -0,0 +1,152 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var isLayout = require( '@stdlib/blas/base/assert/is-layout' ); +var isRowMajor = require( '@stdlib/ndarray/base/assert/is-row-major-string' ); +var isColumnMajor = require( '@stdlib/ndarray/base/assert/is-column-major-string' ); +var max = require( '@stdlib/math/base/special/fast/max' ); +var format = require( '@stdlib/string/format' ); +var ndarray = require( './ndarray.js' ); + + +// MAIN // + +/** +* Initializes centroids by performing the k-means++ initialization procedure. +* +* ## Method +* +* The k-means++ algorithm for choosing initial centroids is as follows: +* +* 1. Select a data point uniformly at random from a data set \\( X \\). This data point is first centroid and denoted \\( c_0 \\). +* +* 2. Compute the distance from each data point to \\( c_0 \\). Denote the distance between \\( c_j \\) and data point \\( m \\) as \\( d(x_m, c_j) \\). +* +* 3. Select the next centroid, \\( c_1 \\), at random from \\( X \\) with probability +* +* ```tex +* \frac{d^2(x_m, c_0)}{\sum_{j=0}^{n-1} d^2(x_j, c_0)} +* ``` +* +* where \\( n \\) is the number of data points. +* +* 4. To choose centroid \\( j \\), +* +* a. Compute the distances from each data point to each centroid and assign each data point to its closest centroid. +* +* b. For \\( i = 0,\ldots,n-1 \\) and \\( p = 0,\ldots,j-2 \\), select centroid \\( j \\) at random from \\( X \\) with probability +* +* ```tex +* \frac{d^2(x_i, c_p)}{\sum_{\{h; x_h \exits C_p\}} d^2(x_h, c_p)} +* ``` +* +* where \\( C_p \\) is the set of all data points closest to centroid \\( c_p \\) and \\( x_i \\) belongs to \\( c_p \\). +* +* Stated more plainly, select each subsequent centroid with a probability proportional to the distance from the centroid to the closest centroid already chosen. +* +* 5. Repeat step `4` until \\( k \\) centroids have been chosen. +* +* ## References +* +* - Arthur, David, and Sergei Vassilvitskii. 2007. "K-means++: The Advantages of Careful Seeding." In _Proceedings of the Eighteenth Annual Acm-Siam Symposium on Discrete Algorithms_, 1027–35. SODA '07. Philadelphia, PA, USA: Society for Industrial and Applied Mathematics. . +* +* @param {string} order - storage layout +* @param {PositiveInteger} k - number of clusters +* @param {PositiveInteger} M - number of data points +* @param {PositiveInteger} N - number of features +* @param {Float64Array} out - input array +* @param {integer} LDO - stride of the first dimension of `out` (a.k.a., leading dimension of the matrix `out`) +* @param {Float64Array} X - input array +* @param {integer} LDX - stride of the first dimension of `x` (a.k.a., leading dimension of the matrix `x`) +* @param {string} metric - distance metric +* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) +* @param {*} seed - PRNG seed +* @throws {TypeError} first argument must be a valid order +* @throws {RangeError} sixth argument must be greater than or equal to max(1,N) +* @throws {RangeError} eighth argument must be greater than or equal to max(1,N) +* @returns {Float64Array} centroids +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* +* var k = 3; +* var M = 5; +* var N = 2; +* +* var out = new Float64Array( k*N ); +* +* // Specify data points: +* var xbuf = new Float64Array([ +* 0.0, 0.0, +* 1.0, 1.0, +* 1.0, -1.0, +* -1.0, -1.0, +* -1.0, 1.0 +* ]); +* +* var v = dkmeansInitPlusPlus( 'row-major', k, M, N, out, 2, xbuf, 2, 'sqeuclidean', 3, 44 ); +* // returns [0,0,1,-1,1,1] +*/ +function dkmeansInitPlusPlus( order, k, M, N, out, LDO, X, LDX, metric, trials, seed ) { // eslint-disable-line max-len, max-params + var so1; + var so2; + var sx1; + var sx2; + var so; + var sx; + + if ( !isLayout( order ) ) { + throw new TypeError( format( 'invalid argument. First argument must be a valid order. Value: `%s`.', order ) ); + } + if ( isRowMajor( order ) ) { + so = N; + sx = N; + } else { + so = k; + sx = M; + } + if ( LDO < max( 1, so ) ) { + throw new RangeError( format( 'invalid argument. Fifth argument must be greater than or equal to max(1,%d). Value: `%d`.', so, LDO ) ); + } + if ( LDX < max( 1, sx ) ) { + throw new RangeError( format( 'invalid argument. Fifth argument must be greater than or equal to max(1,%d). Value: `%d`.', sx, LDO ) ); + } + if ( isColumnMajor( order ) ) { + so1 = 1; + so2 = LDO; + + sx1 = 1; + sx2 = LDX; + } else { // order === 'row-major' + so1 = LDO; + so2 = 1; + + sx1 = LDX; + sx2 = 1; + } + return ndarray( k, M, N, out, so1, so2, 0, X, sx1, sx2, 0, metric, trials, seed ); // eslint-disable-line max-len +} + + +// EXPORTS // + +module.exports = dkmeansInitPlusPlus; diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js new file mode 100644 index 000000000000..89a2199da91b --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js @@ -0,0 +1,94 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +/** +* Initialize centroids by performing the k-means++ initialization procedure using alternative indexing semantics. +* +* @module @stdlib/ml/strided/dkmeans-init-plus-plus +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* var dkmeansInitPlusPlus = require( '@stdlib/ml/strided/dkmeans-init-plus-plus' ); +* +* var k = 3; +* var M = 5; +* var N = 2; +* +* var out = new Float64Array( k*N ); +* +* // Specify data points: +* var xbuf = new Float64Array([ +* 0.0, 0.0, +* 1.0, 1.0, +* 1.0, -1.0, +* -1.0, -1.0, +* -1.0, 1.0 +* ]); +* +* var v = dkmeansInitPlusPlus( 'row-major', k, M, N, out, 2, xbuf, 2, 'sqeuclidean', 3, 44 ); +* // returns [0,0,1,-1,1,1] +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* var dkmeansInitPlusPlus = require( '@stdlib/ml/strided/dkmeans-init-plus-plus' ); +* +* var k = 3; +* var M = 5; +* var N = 2; +* +* var out = new Float64Array( k*N ); +* +* // Specify data points: +* var xbuf = new Float64Array([ +* 0.0, 0.0, +* 1.0, 1.0, +* 1.0, -1.0, +* -1.0, -1.0, +* -1.0, 1.0 +* ]); +* +* var v = dkmeansInitPlusPlus.ndarray( k, M, N, out, 2, 1, 0, xbuf, 2, 1, 0, 'sqeuclidean', 3, 44 ); +* // returns [0,0,1,-1,1,1] +*/ + +// MODULES // + +var join = require( 'path' ).join; +var tryRequire = require( '@stdlib/utils/try-require' ); +var isError = require( '@stdlib/assert/is-error' ); +var main = require( './main.js' ); + + +// MAIN // + +var dkmeansInitPlusPlus; +var tmp = tryRequire( join( __dirname, './native.js' ) ); +if ( isError( tmp ) ) { + dkmeansInitPlusPlus = main; +} else { + dkmeansInitPlusPlus = tmp; +} + + +// EXPORTS // + +module.exports = dkmeansInitPlusPlus; + +// exports: { "ndarray": "dkmeansInitPlusPlus.ndarray" } diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/main.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/main.js new file mode 100644 index 000000000000..0745f2767c76 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/main.js @@ -0,0 +1,35 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var setReadOnly = require( '@stdlib/utils/define-nonenumerable-read-only-property' ); +var dkmeansInitPlusPlus = require( './dkmeans_init_plus_plus.js' ); +var ndarray = require( './ndarray.js' ); + + +// MAIN // + +setReadOnly( dkmeansInitPlusPlus, 'ndarray', ndarray ); + + +// EXPORTS // + +module.exports = dkmeansInitPlusPlus; diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js new file mode 100644 index 000000000000..148c6fa986b9 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js @@ -0,0 +1,243 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var randint = require( '@stdlib/random/base/discrete-uniform' ).factory; +var randu = require( '@stdlib/random/base/mt19937' ).factory; +var dcopy = require( '@stdlib/blas/base/dcopy' ).ndarray; +var Float64Array = require( '@stdlib/array/float64' ); +var PINF = require( '@stdlib/constants/float64/pinf' ); +var dsquaredEuclidean = require( '@stdlib/stats/strided/distances/dsquared-euclidean' ).ndarray; +var dcosine = require( '@stdlib/stats/strided/distances/dcosine-distance' ).ndarray; +var dcityblock = require( '@stdlib/stats/strided/distances/dcityblock' ).ndarray; +var dcorrelation = require( '@stdlib/stats/strided/distances/dcorrelation' ).ndarray; + + +// MAIN // + +/** +* Initializes centroids by performing the k-means++ initialization procedure using alternative indexing semantics. +* +* ## Method +* +* The k-means++ algorithm for choosing initial centroids is as follows: +* +* 1. Select a data point uniformly at random from a data set \\( X \\). This data point is first centroid and denoted \\( c_0 \\). +* +* 2. Compute the distance from each data point to \\( c_0 \\). Denote the distance between \\( c_j \\) and data point \\( m \\) as \\( d(x_m, c_j) \\). +* +* 3. Select the next centroid, \\( c_1 \\), at random from \\( X \\) with probability +* +* ```tex +* \frac{d^2(x_m, c_0)}{\sum_{j=0}^{n-1} d^2(x_j, c_0)} +* ``` +* +* where \\( n \\) is the number of data points. +* +* 4. To choose centroid \\( j \\), +* +* a. Compute the distances from each data point to each centroid and assign each data point to its closest centroid. +* +* b. For \\( i = 0,\ldots,n-1 \\) and \\( p = 0,\ldots,j-2 \\), select centroid \\( j \\) at random from \\( X \\) with probability +* +* ```tex +* \frac{d^2(x_i, c_p)}{\sum_{\{h; x_h \exits C_p\}} d^2(x_h, c_p)} +* ``` +* +* where \\( C_p \\) is the set of all data points closest to centroid \\( c_p \\) and \\( x_i \\) belongs to \\( c_p \\). +* +* Stated more plainly, select each subsequent centroid with a probability proportional to the distance from the centroid to the closest centroid already chosen. +* +* 5. Repeat step `4` until \\( k \\) centroids have been chosen. +* +* ## References +* +* - Arthur, David, and Sergei Vassilvitskii. 2007. "K-means++: The Advantages of Careful Seeding." In _Proceedings of the Eighteenth Annual Acm-Siam Symposium on Discrete Algorithms_, 1027–35. SODA '07. Philadelphia, PA, USA: Society for Industrial and Applied Mathematics. . +* +* @param {PositiveInteger} k - number of clusters +* @param {PositiveInteger} M - number of data points +* @param {PositiveInteger} N - number of features +* @param {Float64Array} out - input array +* @param {integer} so1 - first stride length of `out` +* @param {integer} so2 - second stride length of `out` +* @param {NonNegativeInteger} oo - starting index of `out` +* @param {Float64Array} X - input array +* @param {integer} sx1 - first stride length of `X` +* @param {integer} sx2 - second stride length of `X` +* @param {NonNegativeInteger} ox - starting index of `X` +* @param {string} metric - distance metric +* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) +* @param {*} seed - PRNG seed +* @returns {Float64Array} centroids +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* +* var k = 3; +* var M = 5; +* var N = 2; +* +* var out = new Float64Array( k*N ); +* +* // Specify data points: +* var xbuf = new Float64Array([ +* 0.0, 0.0, +* 1.0, 1.0, +* 1.0, -1.0, +* -1.0, -1.0, +* -1.0, 1.0 +* ]); +* +* var v = dkmeansInitPlusPlus( k, M, N, out, 2, 1, 0, xbuf, 2, 1, 0, 'sqeuclidean', 3, 44 ); +* // returns [0,0,1,-1,1,1] +*/ +function dkmeansInitPlusPlus( k, M, N, out, so1, so2, oo, X, sx1, sx2, ox, metric, trials, seed ) { // eslint-disable-line max-len, max-params + var centroids; // array of indices + var dhash; + var randi; + var probs; + var rand; + var csum; + var bsum; + var dist; + var xidx; + var cidx; + var oidx; + var d2; + var bc; + var d; + var c; + var i; + var j; + var t; + var r; + + // Create seeded PRNGs: + rand = randu({ + 'seed': seed + }); + randi = randint({ + 'seed': rand() + }); + rand = rand.normalized; + + // 1. Select a data point at random for the first centroid... + c = randi( 0, M-1 ); + if ( k === 1 ) { + // For the trivial case of one centroid, we are done which means we can skip to setting the output centroid data... + return dcopy( N, X, sx2, ox + ( sx1*c ), out, so2, oo ); + } + centroids = [ c ]; + + if ( metric === 'sqeuclidean' ) { + dist = dsquaredEuclidean; + } else if ( metric === 'cosine' ) { + dist = dcosine; + } else if ( metric === 'cityblock' ) { + dist = dcityblock; + } else { + dist = dcorrelation; + } + + dhash = new Float64Array( M ); + for ( i = 0; i < M; i++ ) { + dhash[ i ] = PINF; // squared distance + } + // Create a scratch array for storing cumulative probabilities: + probs = new Float64Array( M ); + + // 2-5. For each data point, compute the distances to each centroid, find the closest centroid, and, based on the distance to the closest centroid, assign a probability to the data point to be chosen as centroid `c_j`... + for ( i = 1; i < k; i++ ) { + csum = 0.0; + xidx = ox; + cidx = ox + ( sx1*centroids[ i-1 ] ); + for ( j = 0; j < M; j++ ) { + d2 = dist( N, X, sx2, xidx, X, sx2, cidx ); + if ( d2 < dhash[ j ] ) { + dhash[ j ] = d2; + csum += d2; + } else { + csum += dhash[ j ]; + } + xidx += sx1; + } + + if ( csum === 0.0 ) { + centroids.push( randi( 0, M-1 ) ); + continue; + } + + // Compute the cumulative probabilities... + probs[ 0 ] = dhash[ 0 ] / csum; + for ( j = 1; j < M; j++ ) { + probs[ j ] = probs[ j-1 ] + ( dhash[ j ] / csum ); + } + // Based Arthur's and Vassilvitskii's paper "kmeans++: The Advantages of Careful Seeding" (see conclusion), randomly select candidate centroids and pick the candidate which minimizes the total squared distance... + bsum = PINF; // best sum + bc = -1; // best candidate + for ( t = 0; t < trials; t++ ) { + // Use rejection sampling to handle edge case where the total cumulative probability does not equal unity due to accumulated floating-point errors and is less than `r` (*very* rarely should this require more than one iteration)... + c = -1; + + // Note: the following should never choose an already chosen centroid (why? because a centroid's minimum squared distance is `0`, which means it will either correspond to a cumulative probability of `0` or will correspond to a cumulative probability equal to the previous cumulative probability, thus leading to the equivalent of a no-op iteration) + while ( c === -1 ) { + r = rand(); // Note: `r` exists on the interval `[0,1)` + for ( j = 0; j < M; j++ ) { + if ( r < probs[ j ] ) { + c = j; + break; + } + } + } + // Compute the sum of squared distances were we to include the candidate centroid... + csum = 0.0; + cidx = ox + ( sx1 * c ); + for ( j = 0; j < M; j++ ) { + d = dist( N, X, sx2, ox + ( sx1*j ), X, sx2, cidx ); + if ( d < dhash[ j ] ) { + csum += d; + } else { + csum += dhash[ j ]; + } + } + // Determine if the candidate is the best candidate we have seen thus far... + if ( csum < bsum ) { + bsum = csum; + bc = c; + } + } + // Push the "best" candidate to our list of centroids: + centroids.push( bc ); + } + // 6. Set centroid data... + oidx = oo; + for ( i = 0; i < k; i++ ) { + // Note: the following is likely to be an "out-of-order" copy... + dcopy( N, X, sx2, ox + ( sx1*centroids[i] ), out, so2, oidx ); + oidx += so1; + } + return out; +} + + +// EXPORTS // + +module.exports = dkmeansInitPlusPlus; From 910e515da5e0708f2df60f2156e521f0db4dc862 Mon Sep 17 00:00:00 2001 From: nakul-krishnakumar Date: Wed, 27 May 2026 13:15:47 +0530 Subject: [PATCH 2/7] fix: add argument validation for `trials` --- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown_pkg_readmes status: na - task: lint_markdown_docs status: na - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: passed - task: lint_typescript_tests status: na - task: lint_license_headers status: passed --- --- .../dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js | 4 ++++ .../ml/strided/dkmeans-init-plus-plus/lib/ndarray.js | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js index 1e60b8be16cc..50da5f0aabe0 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js @@ -81,6 +81,7 @@ var ndarray = require( './ndarray.js' ); * @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) * @param {*} seed - PRNG seed * @throws {TypeError} first argument must be a valid order +* @throws {TypeError} tenth argument must be a valid trials (>=1) * @throws {RangeError} sixth argument must be greater than or equal to max(1,N) * @throws {RangeError} eighth argument must be greater than or equal to max(1,N) * @returns {Float64Array} centroids @@ -117,6 +118,9 @@ function dkmeansInitPlusPlus( order, k, M, N, out, LDO, X, LDX, metric, trials, if ( !isLayout( order ) ) { throw new TypeError( format( 'invalid argument. First argument must be a valid order. Value: `%s`.', order ) ); } + if ( trials < 1 ) { + throw new TypeError( format( 'invalid argument. Tenth argument must be a valid trials (>=1). Value: `%s`.', trials ) ); + } if ( isRowMajor( order ) ) { so = N; sx = N; diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js index 148c6fa986b9..b56081bb39cd 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js @@ -25,6 +25,7 @@ var randu = require( '@stdlib/random/base/mt19937' ).factory; var dcopy = require( '@stdlib/blas/base/dcopy' ).ndarray; var Float64Array = require( '@stdlib/array/float64' ); var PINF = require( '@stdlib/constants/float64/pinf' ); +var format = require( '@stdlib/string/format' ); var dsquaredEuclidean = require( '@stdlib/stats/strided/distances/dsquared-euclidean' ).ndarray; var dcosine = require( '@stdlib/stats/strided/distances/dcosine-distance' ).ndarray; var dcityblock = require( '@stdlib/stats/strided/distances/dcityblock' ).ndarray; @@ -86,6 +87,7 @@ var dcorrelation = require( '@stdlib/stats/strided/distances/dcorrelation' ).nda * @param {string} metric - distance metric * @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) * @param {*} seed - PRNG seed +* @throws {TypeError} tenth argument must be a valid trials (>=1) * @returns {Float64Array} centroids * * @example @@ -130,6 +132,10 @@ function dkmeansInitPlusPlus( k, M, N, out, so1, so2, oo, X, sx1, sx2, ox, metri var t; var r; + if ( trials < 1 ) { + throw new TypeError( format( 'invalid argument. Thirteenth argument must be a valid trials (>=1). Value: `%s`.', trials ) ); + } + // Create seeded PRNGs: rand = randu({ 'seed': seed From 4d8fd8773aa5404c66435f01a772c11824f0c507 Mon Sep 17 00:00:00 2001 From: nakul-krishnakumar Date: Wed, 27 May 2026 22:10:20 +0530 Subject: [PATCH 3/7] test: add tests for `ml/strided/dkmeans-init-plus-plus --- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown_pkg_readmes status: na - task: lint_markdown_docs status: na - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: na - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: passed - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: passed - task: lint_typescript_tests status: na - task: lint_license_headers status: passed --- --- .../test/fixtures/column_major.json | 53 ++++ .../fixtures/large-strides/column_major.json | 62 +++++ .../fixtures/large-strides/row_major.json | 58 ++++ .../fixtures/mixed-strides/column_major.json | 48 ++++ .../fixtures/mixed-strides/row_major.json | 50 ++++ .../negative-strides/column_major.json | 50 ++++ .../fixtures/negative-strides/row_major.json | 50 ++++ .../test/fixtures/offsets/column_major.json | 51 ++++ .../test/fixtures/offsets/row_major.json | 49 ++++ .../test/fixtures/row_major.json | 49 ++++ .../test/test.dkmeans_init_plus_plus.js | 247 +++++++++++++++++ .../dkmeans-init-plus-plus/test/test.js | 82 ++++++ .../test/test.ndarray.js | 260 ++++++++++++++++++ 13 files changed, 1109 insertions(+) create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/column_major.json create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/column_major.json create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/row_major.json create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/column_major.json create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/row_major.json create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/column_major.json create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/row_major.json create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/column_major.json create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/row_major.json create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/row_major.json create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.dkmeans_init_plus_plus.js create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.js create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.ndarray.js diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/column_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/column_major.json new file mode 100644 index 000000000000..a8c767b24db9 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/column_major.json @@ -0,0 +1,53 @@ +{ + "order": "column-major", + "out": [ + 0.0, + 0.0, + 9999.0, + 9999.0, + 0.0, + 0.0 + ], + "k": 2, + "M": 4, + "N": 2, + "strideO1": 1, + "strideO2": 4, + "offsetO": 0, + "LDO": 4, + "out_mat": [ + [ 0.0, 0.0 ], + [ 0.0, 0.0 ] + ], + "X": [ + 1.0, + 2.0, + 5.0, + 0.0, + 3.0, + 4.0, + 6.0, + 0.0 + ], + "strideX1": 1, + "strideX2": 4, + "offsetX": 0, + "LDX": 4, + "X_mat": [ + [ 1.0, 3.0 ], + [ 2.0, 4.0 ], + [ 5.0, 6.0 ], + [ 0.0, 0.0 ] + ], + "metric": "sqeuclidean", + "trials": 2, + "seed": 44, + "expected": [ + 1.0, + 5.0, + 9999.0, + 9999.0, + 3.0, + 6.0 + ] +} diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/column_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/column_major.json new file mode 100644 index 000000000000..80ae77ae1bf8 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/column_major.json @@ -0,0 +1,62 @@ +{ + "order": "column-major", + "out": [ + 0.0, + 9999.0, + 0.0, + 9999.0, + 9999.0, + 9999.0, + 0.0, + 9999.0, + 0.0 + ], + "k": 2, + "M": 3, + "N": 2, + "strideO1": 2, + "strideO2": 6, + "offsetO": 0, + "LDO": 3, + "out_mat": [ + [ 0.0, 0.0 ], + [ 0.0, 0.0 ] + ], + "X": [ + 1.0, + 9999.0, + 2.0, + 9999.0, + 0.0, + 9999.0, + 3.0, + 9999.0, + 4.0, + 9999.0, + 0.0, + 9999.0 + ], + "strideX1": 2, + "strideX2": 6, + "offsetX": 0, + "LDX": 3, + "X_mat": [ + [ 1.0, 3.0 ], + [ 2.0, 4.0 ], + [ 0.0, 0.0 ] + ], + "metric": "sqeuclidean", + "trials": 2, + "seed": 44, + "expected": [ + 1.0, + 9999.0, + 0.0, + 9999.0, + 9999.0, + 9999.0, + 3.0, + 9999.0, + 0.0 + ] +} diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/row_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/row_major.json new file mode 100644 index 000000000000..39192a0c1b22 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/row_major.json @@ -0,0 +1,58 @@ +{ + "order": "row-major", + "out": [ + 0.0, + 9999.0, + 0.0, + 9999.0, + 0.0, + 9999.0, + 0.0 + ], + "k": 2, + "M": 3, + "N": 2, + "strideO1": 4, + "strideO2": 2, + "offsetO": 0, + "LDO": 2, + "out_mat": [ + [ 0.0, 0.0 ], + [ 0.0, 0.0 ] + ], + "X": [ + 1.0, + 9999.0, + 2.0, + 9999.0, + 3.0, + 9999.0, + 4.0, + 9999.0, + 0.0, + 9999.0, + 0.0, + 9999.0 + ], + "strideX1": 4, + "strideX2": 2, + "offsetX": 0, + "LDX": 2, + "X_mat": [ + [ 1.0, 2.0 ], + [ 3.0, 4.0 ], + [ 0.0, 0.0 ] + ], + "metric": "sqeuclidean", + "trials": 2, + "seed": 44, + "expected": [ + 1.0, + 9999.0, + 2.0, + 9999.0, + 3.0, + 9999.0, + 4.0 + ] +} diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/column_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/column_major.json new file mode 100644 index 000000000000..9e5d79575b43 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/column_major.json @@ -0,0 +1,48 @@ +{ + "order": "column-major", + "out": [ + 0.0, + 0.0, + 9999.0, + 0.0, + 0.0 + ], + "k": 2, + "M": 3, + "N": 2, + "strideO1": 1, + "strideO2": -3, + "offsetO": 3, + "LDO": 3, + "out_mat": [ + [ 0.0, 0.0 ], + [ 0.0, 0.0 ] + ], + "X": [ + 3.0, + 4.0, + 0.0, + 1.0, + 2.0, + 0.0 + ], + "strideX1": 1, + "strideX2": -3, + "offsetX": 3, + "LDX": 3, + "X_mat": [ + [ 1.0, 3.0 ], + [ 2.0, 4.0 ], + [ 0.0, 0.0 ] + ], + "metric": "sqeuclidean", + "trials": 2, + "seed": 44, + "expected": [ + 3.0, + 0.0, + 9999.0, + 1.0, + 0.0 + ] +} diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/row_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/row_major.json new file mode 100644 index 000000000000..49fcd8f5b807 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/row_major.json @@ -0,0 +1,50 @@ +{ + "order": "row-major", + "out": [ + 9999.0, + 9999.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "k": 2, + "M": 3, + "N": 2, + "strideO1": -2, + "strideO2": 1, + "offsetO": 4, + "LDO": 2, + "out_mat": [ + [ 0.0, 0.0 ], + [ 0.0, 0.0 ] + ], + "X": [ + 0.0, + 0.0, + 3.0, + 4.0, + 1.0, + 2.0 + ], + "strideX1": -2, + "strideX2": 1, + "offsetX": 4, + "LDX": 2, + "X_mat": [ + [ 1.0, 2.0 ], + [ 3.0, 4.0 ], + [ 0.0, 0.0 ] + ], + "metric": "sqeuclidean", + "trials": 2, + "seed": 44, + "expected": [ + 9999.0, + 9999.0, + 3.0, + 4.0, + 1.0, + 2.0 + ] +} diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/column_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/column_major.json new file mode 100644 index 000000000000..dd77c49804ba --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/column_major.json @@ -0,0 +1,50 @@ +{ + "order": "column-major", + "out": [ + 9999.0, + 0.0, + 0.0, + 9999.0, + 0.0, + 0.0 + ], + "k": 2, + "M": 3, + "N": 2, + "strideO1": -1, + "strideO2": -3, + "offsetO": 5, + "LDO": 3, + "out_mat": [ + [ 0.0, 0.0 ], + [ 0.0, 0.0 ] + ], + "X": [ + 0.0, + 4.0, + 3.0, + 0.0, + 2.0, + 1.0 + ], + "strideX1": -1, + "strideX2": -3, + "offsetX": 5, + "LDX": 3, + "X_mat": [ + [ 1.0, 3.0 ], + [ 2.0, 4.0 ], + [ 0.0, 0.0 ] + ], + "metric": "sqeuclidean", + "trials": 2, + "seed": 44, + "expected": [ + 9999.0, + 0.0, + 3.0, + 9999.0, + 0.0, + 1.0 + ] +} diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/row_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/row_major.json new file mode 100644 index 000000000000..490d9bbd708e --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/row_major.json @@ -0,0 +1,50 @@ +{ + "order": "row-major", + "out": [ + 9999.0, + 9999.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "k": 2, + "M": 3, + "N": 2, + "strideO1": -2, + "strideO2": -1, + "offsetO": 5, + "LDO": 2, + "out_mat": [ + [ 0.0, 0.0 ], + [ 0.0, 0.0 ] + ], + "X": [ + 0.0, + 0.0, + 4.0, + 3.0, + 2.0, + 1.0 + ], + "strideX1": -2, + "strideX2": -1, + "offsetX": 5, + "LDX": 2, + "X_mat": [ + [ 1.0, 2.0 ], + [ 3.0, 4.0 ], + [ 0.0, 0.0 ] + ], + "metric": "sqeuclidean", + "trials": 2, + "seed": 44, + "expected": [ + 9999.0, + 9999.0, + 4.0, + 3.0, + 2.0, + 1.0 + ] +} diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/column_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/column_major.json new file mode 100644 index 000000000000..546d081bface --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/column_major.json @@ -0,0 +1,51 @@ +{ + "order": "column-major", + "out": [ + 9999.0, + 0.0, + 0.0, + 9999.0, + 0.0, + 0.0 + ], + "k": 2, + "M": 3, + "N": 2, + "strideO1": 1, + "strideO2": 3, + "offsetO": 1, + "LDO": 3, + "out_mat": [ + [ 0.0, 0.0 ], + [ 0.0, 0.0 ] + ], + "X": [ + 9999.0, + 1.0, + 2.0, + 0.0, + 3.0, + 4.0, + 0.0 + ], + "strideX1": 1, + "strideX2": 3, + "offsetX": 1, + "LDX": 3, + "X_mat": [ + [ 1.0, 3.0 ], + [ 2.0, 4.0 ], + [ 0.0, 0.0 ] + ], + "metric": "sqeuclidean", + "trials": 2, + "seed": 44, + "expected": [ + 9999.0, + 1.0, + 0.0, + 9999.0, + 3.0, + 0.0 + ] +} diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/row_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/row_major.json new file mode 100644 index 000000000000..94633bf04965 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/row_major.json @@ -0,0 +1,49 @@ +{ + "order": "row-major", + "out": [ + 9999.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "k": 2, + "M": 3, + "N": 2, + "strideO1": 2, + "strideO2": 1, + "offsetO": 1, + "LDO": 2, + "out_mat": [ + [ 0.0, 0.0 ], + [ 0.0, 0.0 ] + ], + "X": [ + 9999.0, + 1.0, + 2.0, + 3.0, + 4.0, + 0.0, + 0.0 + ], + "strideX1": 2, + "strideX2": 1, + "offsetX": 1, + "LDX": 2, + "X_mat": [ + [ 1.0, 2.0 ], + [ 3.0, 4.0 ], + [ 0.0, 0.0 ] + ], + "metric": "sqeuclidean", + "trials": 2, + "seed": 44, + "expected": [ + 9999.0, + 1.0, + 2.0, + 3.0, + 4.0 + ] +} diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/row_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/row_major.json new file mode 100644 index 000000000000..d0d6c551c774 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/row_major.json @@ -0,0 +1,49 @@ +{ + "order": "row-major", + "out": [ + 0.0, + 0.0, + 0.0, + 0.0 + ], + "k": 2, + "M": 4, + "N": 2, + "strideO1": 2, + "strideO2": 1, + "offsetO": 0, + "LDO": 2, + "out_mat": [ + [ 0.0, 0.0 ], + [ 0.0, 0.0 ] + ], + "X": [ + 1.0, + 2.0, + 3.0, + 4.0, + 5.0, + 6.0, + 0.0, + 0.0 + ], + "strideX1": 2, + "strideX2": 1, + "offsetX": 0, + "LDX": 2, + "X_mat": [ + [ 1.0, 2.0 ], + [ 3.0, 4.0 ], + [ 5.0, 6.0 ], + [ 0.0, 0.0 ] + ], + "metric": "sqeuclidean", + "trials": 2, + "seed": 44, + "expected": [ + 1.0, + 2.0, + 5.0, + 6.0 + ] +} diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.dkmeans_init_plus_plus.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.dkmeans_init_plus_plus.js new file mode 100644 index 000000000000..3f2072acfa23 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.dkmeans_init_plus_plus.js @@ -0,0 +1,247 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +/* eslint-disable max-len */ + +'use strict'; + +// MODULES // + +var tape = require( 'tape' ); +var Float64Array = require( '@stdlib/array/float64' ); +var isAlmostSameValue = require( '@stdlib/assert/is-almost-same-value' ); +var dkmeansInitPlusPlus = require( './../lib/dkmeans_init_plus_plus.js' ); + + +// FIXTURES // + +var ROW_MAJOR_DATA = require( './fixtures/row_major.json' ); +var COLUMN_MAJOR_DATA = require( './fixtures/column_major.json' ); + + +// TESTS // + +tape( 'main export is a function', function test( t ) { + t.ok( true, __filename ); + t.strictEqual( typeof dkmeansInitPlusPlus, 'function', 'main export is a function' ); + t.end(); +}); + +tape( 'the function has an arity of 11', function test( t ) { + t.strictEqual( dkmeansInitPlusPlus.length, 11, 'returns expected value' ); + t.end(); +}); + +tape( 'the function throws an error if provided a first argument which is not a valid order', function test( t ) { + var values; + var data; + var i; + + data = ROW_MAJOR_DATA; + + values = [ + 'foo', + 'bar', + 'beep', + 'boop', + -5, + NaN, + true, + false, + null, + void 0, + [], + {}, + function noop() {} + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), TypeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dkmeansInitPlusPlus( value, data.k, data.M, data.N, new Float64Array( data.out ), data.LDO, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + }; + } +}); + +tape( 'the function throws an error if provided a sixth argument which is not a valid `LDO` value (row-major)', function test( t ) { + var values; + var data; + var i; + + data = ROW_MAJOR_DATA; + + values = [ + 0, + 1 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dkmeansInitPlusPlus( data.order, data.k, data.M, data.N, new Float64Array( data.out ), value, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + }; + } +}); + +tape( 'the function throws an error if provided a sixth argument which is not a valid `LDO` value (column-major)', function test( t ) { + var values; + var data; + var i; + + data = COLUMN_MAJOR_DATA; + + values = [ + 0, + 1 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dkmeansInitPlusPlus( data.order, data.k, data.M, data.N, new Float64Array( data.out ), value, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + }; + } +}); + +tape( 'the function throws an error if provided a eighth argument which is not a valid `LDX` value (row-major)', function test( t ) { + var values; + var data; + var i; + + data = ROW_MAJOR_DATA; + + values = [ + 0, + 1 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dkmeansInitPlusPlus( data.order, data.k, data.M, data.N, new Float64Array( data.out ), data.LDO, new Float64Array( data.X ), value, data.metric, data.trials, data.seed ); + }; + } +}); + +tape( 'the function throws an error if provided a eighth argument which is not a valid `LDX ` value (column-major)', function test( t ) { + var values; + var data; + var i; + + data = COLUMN_MAJOR_DATA; + + values = [ + 0, + 1 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dkmeansInitPlusPlus( data.order, data.k, data.M, data.N, new Float64Array( data.out ), data.LDO, new Float64Array( data.X ), value, data.metric, data.trials, data.seed ); + }; + } +}); + +tape( 'the function throws an error when trials is less than or equal to zero', function test( t ) { + var data; + + data = ROW_MAJOR_DATA; + t.throws( badValue( 0 ), RangeError, 'throws an error when provided ' + 0 ); + t.end(); + + function badValue( value ) { + return function badValue() { + dkmeansInitPlusPlus( data.order, data.k, data.M, data.N, new Float64Array( data.out ), value, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + }; + } +}); + +tape( 'the function returns a NaN array when M is less than or equal to zero', function test( t ) { + var data; + var out; + + data = ROW_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.order, data.k, 0, data.N, new Float64Array( data.out ), data.LDO, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + + t.strictEqual( isAlmostSameValue( out, NaN, 0), true, 'returns expected value' ); + t.end(); +}); + +tape( 'the function returns a NaN array when N is less than or equal to zero', function test( t ) { + var data; + var out; + + data = ROW_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.order, data.k, data.M, 0, new Float64Array( data.out ), data.LDO, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + + t.strictEqual( isAlmostSameValue( out, NaN, 0), true, 'returns expected value' ); + t.end(); +}); + +tape( 'the function returns the centroids (row-major)', function test( t ) { + var expected; + var data; + var out; + var i; + + data = ROW_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.order, data.k, data.M, data.N, new Float64Array( data.out ), data.LDO, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + + expected = new Float64Array( data.expected ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } + t.end(); +}); + +tape( 'the function returns the centroids (column-major)', function test( t ) { + var expected; + var data; + var out; + var i; + + data = COLUMN_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.order, data.k, data.M, data.N, new Float64Array( data.out ), data.LDO, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + + expected = new Float64Array( data.expected ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } + t.end(); +}); diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.js new file mode 100644 index 000000000000..4df22faf26d4 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.js @@ -0,0 +1,82 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var tape = require( 'tape' ); +var proxyquire = require( 'proxyquire' ); +var IS_BROWSER = require( '@stdlib/assert/is-browser' ); +var dkmeansInitPlusPlus = require( './../lib' ); + + +// VARIABLES // + +var opts = { + 'skip': IS_BROWSER +}; + + +// TESTS // + +tape( 'main export is a function', function test( t ) { + t.ok( true, __filename ); + t.strictEqual( typeof dkmeansInitPlusPlus, 'function', 'main export is a function' ); + t.end(); +}); + +tape( 'attached to the main export is a method providing an ndarray interface', function test( t ) { + t.strictEqual( typeof dkmeansInitPlusPlus.ndarray, 'function', 'method is a function' ); + t.end(); +}); + +tape( 'if a native implementation is available, the main export is the native implementation', opts, function test( t ) { + var dkmeansInitPlusPlus = proxyquire( './../lib', { + '@stdlib/utils/try-require': tryRequire + }); + + t.strictEqual( dkmeansInitPlusPlus, mock, 'returns expected value' ); + t.end(); + + function tryRequire() { + return mock; + } + + function mock() { + // Mock... + } +}); + +tape( 'if a native implementation is not available, the main export is a JavaScript implementation', opts, function test( t ) { + var dkmeansInitPlusPlus; + var main; + + main = require( './../lib/dkmeans_init_plus_plus.js' ); + + dkmeansInitPlusPlus = proxyquire( './../lib', { + '@stdlib/utils/try-require': tryRequire + }); + + t.strictEqual( dkmeansInitPlusPlus, main, 'returns expected value' ); + t.end(); + + function tryRequire() { + return new Error( 'Cannot find module' ); + } +}); diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.ndarray.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.ndarray.js new file mode 100644 index 000000000000..37e933408ff5 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.ndarray.js @@ -0,0 +1,260 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +/* eslint-disable max-len, id-length */ + +'use strict'; + +// MODULES // + +var tape = require( 'tape' ); +var Float64Array = require( '@stdlib/array/float64' ); +var isAlmostSameValue = require( '@stdlib/assert/is-almost-same-value' ); +var dkmeansInitPlusPlus = require( './../lib/ndarray.js' ); + + +// FIXTURES // + +var ROW_MAJOR_DATA = require( './fixtures/row_major.json' ); +var COLUMN_MAJOR_DATA = require( './fixtures/column_major.json' ); +var OFFSET_ROW_MAJOR_DATA = require( './fixtures/offsets/row_major.json' ); +var OFFSET_COLUMN_MAJOR_DATA = require( './fixtures/offsets/column_major.json' ); +var NEGATIVE_STRIDES_ROW_MAJOR_DATA = require( './fixtures/negative-strides/row_major.json' ); +var NEGATIVE_STRIDES_COLUMN_MAJOR_DATA = require( './fixtures/negative-strides/column_major.json' ); +var MIXED_STRIDES_ROW_MAJOR_DATA = require( './fixtures/mixed-strides/row_major.json' ); +var MIXED_STRIDES_COLUMN_MAJOR_DATA = require( './fixtures/mixed-strides/column_major.json' ); +var LARGE_STRIDES_ROW_MAJOR_DATA = require( './fixtures/large-strides/row_major.json' ); +var LARGE_STRIDES_COLUMN_MAJOR_DATA = require( './fixtures/large-strides/column_major.json' ); + + +// TESTS // + +tape( 'main export is a function', function test( t ) { + t.ok( true, __filename ); + t.strictEqual( typeof dkmeansInitPlusPlus, 'function', 'main export is a function' ); + t.end(); +}); + +tape( 'the function has an arity of 14', function test( t ) { + t.strictEqual( dkmeansInitPlusPlus.length, 14, 'returns expected value' ); + t.end(); +}); + +tape( 'the function returns a NaN array when M is less than or equal to zero (row-major)', function test( t ) { + var data; + var out; + + data = ROW_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.k, 0, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + + t.strictEqual( isAlmostSameValue( out, NaN, 0), true, 'returns expected value' ); + t.end(); +}); + +tape( 'the function returns a NaN array when N is less than or equal to zero (row-major)', function test( t ) { + var data; + var out; + + data = ROW_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.k, data.M, 0, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + + t.strictEqual( isAlmostSameValue( out, NaN, 0), true, 'returns expected value' ); + t.end(); +}); + +tape( 'the function returns a NaN array when M is less than or equal to zero (column-major)', function test( t ) { + var data; + var out; + + data = COLUMN_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.k, 0, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + + t.strictEqual( isAlmostSameValue( out, NaN, 0), true, 'returns expected value' ); + t.end(); +}); + +tape( 'the function returns a NaN array when N is less than or equal to zero (column-major)', function test( t ) { + var data; + var out; + + data = COLUMN_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.k, data.M, 0, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + + t.strictEqual( isAlmostSameValue( out, NaN, 0), true, 'returns expected value' ); + t.end(); +}); + +tape( 'the function returns the centroids (row-major)', function test( t ) { + var expected; + var data; + var out; + var i; + + data = ROW_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + + expected = new Float64Array( data.expected ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } + t.end(); +}); + +tape( 'the function returns the centroids (column-major)', function test( t ) { + var expected; + var data; + var out; + var i; + + data = COLUMN_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + + expected = new Float64Array( data.expected ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } + t.end(); +}); + +tape( 'the function returns the centroids (row-major, offsets)', function test( t ) { + var expected; + var data; + var out; + var i; + + data = OFFSET_ROW_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + + expected = new Float64Array( data.expected ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } + t.end(); +}); + +tape( 'the function returns the centroids (column-major, offsets)', function test( t ) { + var expected; + var data; + var out; + var i; + + data = OFFSET_COLUMN_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + + expected = new Float64Array( data.expected ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } + t.end(); +}); + +tape( 'the function returns the centroids (row-major, mixed strides)', function test( t ) { + var expected; + var data; + var out; + var i; + + data = MIXED_STRIDES_ROW_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + + expected = new Float64Array( data.expected ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } + t.end(); +}); + +tape( 'the function returns the centroids (column-major, mixed strides)', function test( t ) { + var expected; + var data; + var out; + var i; + + data = MIXED_STRIDES_COLUMN_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + + expected = new Float64Array( data.expected ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } + t.end(); +}); + +tape( 'the function returns the centroids (row-major, negative strides)', function test( t ) { + var expected; + var data; + var out; + var i; + + data = NEGATIVE_STRIDES_ROW_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + + expected = new Float64Array( data.expected ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } + t.end(); +}); + +tape( 'the function returns the centroids (column-major, negative strides)', function test( t ) { + var expected; + var data; + var out; + var i; + + data = NEGATIVE_STRIDES_COLUMN_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + + expected = new Float64Array( data.expected ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } + t.end(); +}); + +tape( 'the function returns the centroids (row-major, large strides)', function test( t ) { + var expected; + var data; + var out; + var i; + + data = LARGE_STRIDES_ROW_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + + expected = new Float64Array( data.expected ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } + t.end(); +}); + +tape( 'the function returns the centroids (column-major, large strides)', function test( t ) { + var expected; + var data; + var out; + var i; + + data = LARGE_STRIDES_COLUMN_MAJOR_DATA; + out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + + expected = new Float64Array( data.expected ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } + t.end(); +}); From e45a3aa91363a88654e2453124a23181ac8e95fb Mon Sep 17 00:00:00 2001 From: nakul-krishnakumar Date: Wed, 27 May 2026 22:11:37 +0530 Subject: [PATCH 4/7] bench: add JS benchmarks for `ml/strided/dkmeans-init-plus-plus --- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown_pkg_readmes status: na - task: lint_markdown_docs status: na - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: na - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: passed - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: passed - task: lint_typescript_tests status: na - task: lint_license_headers status: passed --- --- .../benchmark/benchmark.js | 159 ++++++++++++++++++ .../benchmark/benchmark.ndarray.js | 147 ++++++++++++++++ 2 files changed, 306 insertions(+) create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.js create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.ndarray.js diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.js new file mode 100644 index 000000000000..3d89160dc469 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.js @@ -0,0 +1,159 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var bench = require( '@stdlib/bench' ); +var uniform = require( '@stdlib/random/array/uniform' ); +var Float64Array = require( '@stdlib/array/float64' ); +var isnan = require( '@stdlib/math/base/assert/is-nan' ); +var pow = require( '@stdlib/math/base/special/pow' ); +var format = require( '@stdlib/string/format' ); +var pkg = require( './../package.json' ).name; +var dkmeansInitPlusPlus = require( './../lib/dkmeans_init_plus_plus.js' ); + + +// VARIABLES // + +var LAYOUTS = [ + 'row-major', + 'column-major' +]; + +var options = { + 'dtype': 'float64' +}; + + +// FUNCTIONS // + +/** +* Creates a benchmark function. +* +* @private +* @param {string} order - storage layout +* @param {PositiveInteger} k - number of clusters +* @param {PositiveInteger} M - number of data points +* @param {PositiveInteger} N - number of features +* @param {string} metric - distance metric +* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) +* @returns {Function} benchmark function +*/ +function createBenchmark( order, k, M, N, metric, trials ) { + var out = new Float64Array( k*N ); + var x = uniform( M*N, -100.0, 100.0, options ); + return benchmark; + + /** + * Benchmark function. + * + * @private + * @param {Benchmark} b - benchmark instance + */ + function benchmark( b ) { + var c; + var i; + + b.tic(); + for ( i = 0; i < b.iterations; i++ ) { + c = dkmeansInitPlusPlus( order, k, M, N, out, N, x, N, metric, trials, 44 ); // eslint-disable-line max-len + if ( isnan( c[ i%(k*N) ] ) ) { + b.fail( 'should not return NaN' ); + } + } + b.toc(); + if ( isnan( c[ 0 ] ) ) { + b.fail( 'should not return NaN' ); + } + b.pass( 'benchmark finished' ); + b.end(); + } +} + + +// MAIN // + +/** +* Main execution sequence. +* +* @private +*/ +function main() { + var metrics; + var trials; + var min; + var max; + var M; + var N; + var k; + var t; + var m; + var i; + var j; + var f; + + min = 1; // 10^min + max = 4; // 10^max + + // Benchmark: vary order with defaults k=2, M=100, N=2, metric=sqeuclidean, trials=1... + for ( i = 0; i <= LAYOUTS.length; i++ ) { + f = createBenchmark( LAYOUTS[ i ], 2, 100, 2, 'sqeuclidean', 1 ); + bench( format( '%s::vary_M:order=%s,k=2,M=100,N=2,metric=sqeuclidean,trials=1', pkg, LAYOUTS[ i ] ), f ); + } + + // Benchmark: vary number of data points (M) with defaults k=2, N=2, metric=sqeuclidean, trials=1... + for ( i = min; i <= max; i++ ) { + M = pow( 10, i ); + f = createBenchmark( 'row-major', 2, M, 2, 'sqeuclidean', 1 ); + bench( format( '%s::vary_M:order=row-major,k=2,M=%d,N=2,metric=sqeuclidean,trials=1', pkg, M ), f ); + } + + // Benchmark: vary number of features (N) with defaults k=2, M=10, metric=sqeuclidean, trials=1... + for ( i = min; i <= max; i++ ) { + N = pow( 10, i ); + f = createBenchmark( 'row-major', 2, 10, N, 'sqeuclidean', 1 ); + bench( format( '%s::vary_N:order=row-major,k=2,M=1000,N=%d,metric=sqeuclidean,trials=1', pkg, N ), f ); + } + + // Benchmark: vary number of clusters (k) with defaults M=1000, N=2, metric=sqeuclidean, trials=1... + for ( i = 1; i <= 4; i++ ) { + k = pow( 2, i ); + f = createBenchmark( 'row-major', k, 1000, 2, 'sqeuclidean', 1 ); + bench( format( '%s::vary_k:order=row-major,k=%d,M=1000,N=2,metric=sqeuclidean,trials=1', pkg, k ), f ); + } + + // Benchmark: vary number of trials with defaults k=2, M=1000, N=2, metric=sqeuclidean... + trials = [ 1, 10, 100, 1000 ]; + for ( j = 0; j < trials.length; j++ ) { + t = trials[ j ]; + f = createBenchmark( 'row-major', 2, 1000, 2, 'sqeuclidean', t ); + bench( format( '%s::vary_trials:order=row-major,k=2,M=1000,N=2,metric=sqeuclidean,trials=%d', pkg, t ), f ); + } + + // Benchmark: vary metric with defaults k=2, M=1000, N=2, trials=1... + metrics = [ 'sqeuclidean', 'cosine', 'cityblock', 'correlation' ]; + for ( j = 0; j < metrics.length; j++ ) { + m = metrics[ j ]; + f = createBenchmark( 'row-major', 2, 1000, 2, m, 1 ); + bench( format( '%s::vary_metric:order=row-major,k=2,M=1000,N=2,metric=%s,trials=1', pkg, m ), f ); + } +} + +main(); diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.ndarray.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.ndarray.js new file mode 100644 index 000000000000..12505912673d --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.ndarray.js @@ -0,0 +1,147 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var bench = require( '@stdlib/bench' ); +var uniform = require( '@stdlib/random/array/uniform' ); +var Float64Array = require( '@stdlib/array/float64' ); +var isnan = require( '@stdlib/math/base/assert/is-nan' ); +var pow = require( '@stdlib/math/base/special/pow' ); +var format = require( '@stdlib/string/format' ); +var pkg = require( './../package.json' ).name; +var dkmeansInitPlusPlus = require( './../lib/ndarray.js' ); + + +// VARIABLES // + +var options = { + 'dtype': 'float64' +}; + + +// FUNCTIONS // + +/** +* Creates a benchmark function. +* +* @private +* @param {PositiveInteger} k - number of clusters +* @param {PositiveInteger} M - number of data points +* @param {PositiveInteger} N - number of features +* @param {string} metric - distance metric +* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) +* @returns {Function} benchmark function +*/ +function createBenchmark( k, M, N, metric, trials ) { + var out = new Float64Array( k*N ); + var x = uniform( M*N, -100.0, 100.0, options ); + return benchmark; + + /** + * Benchmark function. + * + * @private + * @param {Benchmark} b - benchmark instance + */ + function benchmark( b ) { + var c; + var i; + + b.tic(); + for ( i = 0; i < b.iterations; i++ ) { + c = dkmeansInitPlusPlus( k, M, N, out, N, 1, 0, x, N, 1, 0, metric, trials, 44 ); // eslint-disable-line max-len + if ( isnan( c[ i%(k*N) ] ) ) { + b.fail( 'should not return NaN' ); + } + } + b.toc(); + if ( isnan( c[ 0 ] ) ) { + b.fail( 'should not return NaN' ); + } + b.pass( 'benchmark finished' ); + b.end(); + } +} + + +// MAIN // + +/** +* Main execution sequence. +* +* @private +*/ +function main() { + var metrics; + var trials; + var min; + var max; + var M; + var N; + var k; + var t; + var m; + var i; + var j; + var f; + + min = 1; // 10^min + max = 4; // 10^max + + // Benchmark: vary number of data points (M) with defaults k=2, N=2, metric=sqeuclidean, trials=1... + for ( i = min; i <= max; i++ ) { + M = pow( 10, i ); + f = createBenchmark( 2, M, 2, 'sqeuclidean', 1 ); + bench( format( '%s::vary_M:k=2,M=%d,N=2,metric=sqeuclidean,trials=1', pkg, M ), f ); + } + + // Benchmark: vary number of features (N) with defaults k=2, M=10, metric=sqeuclidean, trials=1... + for ( i = min; i <= max; i++ ) { + N = pow( 10, i ); + f = createBenchmark( 2, 10, N, 'sqeuclidean', 1 ); + bench( format( '%s::vary_N:k=2,M=1000,N=%d,metric=sqeuclidean,trials=1', pkg, N ), f ); + } + + // Benchmark: vary number of clusters (k) with defaults M=1000, N=2, metric=sqeuclidean, trials=1... + for ( i = 1; i <= 4; i++ ) { + k = pow( 2, i ); + f = createBenchmark( k, 1000, 2, 'sqeuclidean', 1 ); + bench( format( '%s::vary_k:k=%d,M=1000,N=2,metric=sqeuclidean,trials=1', pkg, k ), f ); + } + + // Benchmark: vary number of trials with defaults k=2, M=1000, N=2, metric=sqeuclidean... + trials = [ 1, 10, 100 ]; + for ( j = 0; j < trials.length; j++ ) { + t = trials[ j ]; + f = createBenchmark( 2, 1000, 2, 'sqeuclidean', t ); + bench( format( '%s::vary_trials:k=2,M=1000,N=2,metric=sqeuclidean,trials=%d', pkg, t ), f ); + } + + // Benchmark: vary metric with defaults k=2, M=1000, N=2, trials=1... + metrics = [ 'sqeuclidean', 'cosine', 'cityblock', 'correlation' ]; + for ( j = 0; j < metrics.length; j++ ) { + m = metrics[ j ]; + f = createBenchmark( 2, 1000, 2, m, 1 ); + bench( format( '%s::vary_metric:k=2,M=1000,N=2,metric=%s,trials=1', pkg, m ), f ); + } +} + +main(); From 7c045c1eda9c0b09435c0e1b65a093ebc4228df2 Mon Sep 17 00:00:00 2001 From: nakul-krishnakumar Date: Wed, 27 May 2026 23:06:44 +0530 Subject: [PATCH 5/7] fix: add early return for invalid args --- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown_pkg_readmes status: na - task: lint_markdown_docs status: na - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: passed - task: lint_typescript_tests status: na - task: lint_license_headers status: passed --- --- .../dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js | 2 +- .../@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js | 2 +- .../@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js | 5 ++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js index 50da5f0aabe0..ac2bab979598 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js @@ -31,7 +31,7 @@ var ndarray = require( './ndarray.js' ); // MAIN // /** -* Initializes centroids by performing the k-means++ initialization procedure. +* Initializes centroids by performing the k-means++ initialization procedure on double-precision floating-point data points. * * ## Method * diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js index 89a2199da91b..19f4f3d116ca 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js @@ -19,7 +19,7 @@ 'use strict'; /** -* Initialize centroids by performing the k-means++ initialization procedure using alternative indexing semantics. +* Initialize centroids by performing the k-means++ initialization procedure on double-precision floating-point data points using alternative indexing semantics. * * @module @stdlib/ml/strided/dkmeans-init-plus-plus * diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js index b56081bb39cd..bb1ff8d31539 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js @@ -35,7 +35,7 @@ var dcorrelation = require( '@stdlib/stats/strided/distances/dcorrelation' ).nda // MAIN // /** -* Initializes centroids by performing the k-means++ initialization procedure using alternative indexing semantics. +* Initializes centroids by performing the k-means++ initialization procedure on double-precision floating-point data points using alternative indexing semantics. * * ## Method * @@ -135,6 +135,9 @@ function dkmeansInitPlusPlus( k, M, N, out, so1, so2, oo, X, sx1, sx2, ox, metri if ( trials < 1 ) { throw new TypeError( format( 'invalid argument. Thirteenth argument must be a valid trials (>=1). Value: `%s`.', trials ) ); } + if ( k < 1 || M < 1 || N < 1) { + return NaN; + } // Create seeded PRNGs: rand = randu({ From 78ebd7b6c974d2ca41aaa8dfd7ffa75d915668af Mon Sep 17 00:00:00 2001 From: nakul-krishnakumar Date: Tue, 2 Jun 2026 01:39:54 +0530 Subject: [PATCH 6/7] refactor: update according to code review --- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown_pkg_readmes status: na - task: lint_markdown_docs status: na - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: passed - task: lint_typescript_tests status: na - task: lint_license_headers status: passed --- --- .../dkmeans-init-plus-plus/lib/base.js | 281 ++++++++++++++++++ .../lib/dkmeans_init_plus_plus.js | 104 +++---- .../dkmeans-init-plus-plus/lib/index.js | 24 +- .../dkmeans-init-plus-plus/lib/ndarray.js | 228 +++----------- 4 files changed, 391 insertions(+), 246 deletions(-) create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/base.js diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/base.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/base.js new file mode 100644 index 000000000000..1af42e447dad --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/base.js @@ -0,0 +1,281 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var discreteUniform = require( '@stdlib/random/base/discrete-uniform' ).factory; +var uniform = require( '@stdlib/random/base/uniform' ).factory; +var dcopy = require( '@stdlib/blas/base/dcopy' ).ndarray; +var PINF = require( '@stdlib/constants/float64/pinf' ); +var dsquaredEuclidean = require( '@stdlib/stats/strided/distances/dsquared-euclidean' ).ndarray; +var dcosine = require( '@stdlib/stats/strided/distances/dcosine-distance' ).ndarray; +var dcityblock = require( '@stdlib/stats/strided/distances/dcityblock' ).ndarray; +var dcorrelation = require( '@stdlib/stats/strided/distances/dcorrelation' ).ndarray; +var accessors = require( '@stdlib/array/base/accessors' ); +var FLOAT64_EPS = require( '@stdlib/constants/float64/eps' ); + + +// MAIN // + +/** +* Initializes centroids by performing the k-means++ initialization procedure on double-precision floating-point data points using alternative indexing semantics. +* +* ## Method +* +* The k-means++ algorithm for choosing initial centroids is as follows: +* +* 1. Select a data point uniformly at random from a data set \\( X \\). This data point is first centroid and denoted \\( c_0 \\). +* +* 2. Compute the distance from each data point to \\( c_0 \\). Denote the distance between \\( c_j \\) and data point \\( m \\) as \\( d(x_m, c_j) \\). +* +* 3. Select the next centroid, \\( c_1 \\), at random from \\( X \\) with probability +* +* ```tex +* \frac{d^2(x_m, c_0)}{\sum_{j=0}^{n-1} d^2(x_j, c_0)} +* ``` +* +* where \\( n \\) is the number of data points. +* +* 4. To choose centroid \\( j \\), +* +* a. Compute the distances from each data point to each centroid and assign each data point to its closest centroid. +* +* b. For \\( i = 0,\ldots,n-1 \\) and \\( p = 0,\ldots,j-2 \\), select centroid \\( j \\) at random from \\( X \\) with probability +* +* ```tex +* \frac{d^2(x_i, c_p)}{\sum_{\{h; x_h \in C_p\}} d^2(x_h, c_p)} +* ``` +* +* where \\( C_p \\) is the set of all data points closest to centroid \\( c_p \\) and \\( x_i \\) belongs to \\( c_p \\). +* +* Stated more plainly, select each subsequent centroid with a probability proportional to the distance from the centroid to the closest centroid already chosen. +* +* 5. Repeat step `4` until \\( k \\) centroids have been chosen. +* +* ## References +* +* - Arthur, David, and Sergei Vassilvitskii. 2007. "K-means++: The Advantages of Careful Seeding." In _Proceedings of the Eighteenth Annual Acm-Siam Symposium on Discrete Algorithms_, 1027–35. SODA '07. Philadelphia, PA, USA: Society for Industrial and Applied Mathematics. . +* +* @param {PositiveInteger} M - number of data points +* @param {PositiveInteger} N - number of features +* @param {PositiveInteger} k - number of clusters +* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) +* @param {string} metric - distance metric +* @param {Float64Array} X - input array +* @param {integer} sx1 - first stride length of `X` +* @param {integer} sx2 - second stride length of `X` +* @param {NonNegativeInteger} ox - starting index of `X` +* @param {Float64Array} out - output array +* @param {integer} so1 - first stride length of `out` +* @param {integer} so2 - second stride length of `out` +* @param {NonNegativeInteger} oo - starting index of `out` +* @param {Float64Array} W1 - first workspace array of size `2*M` for tracking squared distances and probabilities +* @param {integer} sw1 - stride length of `W1` +* @param {NonNegativeInteger} ow1 - starting index of `W1` +* @param {Float64Array} W2 - second workspace array for tracking centroid candidates +* @param {integer} sw2 - stride length of `W2` +* @param {NonNegativeInteger} ow2 - starting index of `W2` +* @param {Options} [options] - function options +* @param {PRNG} [options.prng] - pseudorandom number generator which generates uniformly distributed pseudorandom numbers +* @param {PRNGSeedMT19937} [options.seed] - pseudorandom number generator seed +* @param {PRNGStateMT19937} [options.state] - pseudorandom number generator state +* @param {boolean} [options.copy=true] - boolean indicating whether to copy a provided pseudorandom number generator state +* @returns {Float64Array} centroids +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* var Int32Array = require( '@stdlib/array/int32' ); +* +* var k = 3; +* var M = 5; +* var N = 2; +* +* var out = new Float64Array( k*N ); +* var W1 = new Float64Array( 2*M ); +* var W2 = new Int32Array( k ); +* +* // Specify data points: +* var xbuf = new Float64Array([ +* 0.0, 0.0, +* 1.0, 1.0, +* 1.0, -1.0, +* -1.0, -1.0, +* -1.0, 1.0 +* ]); +* +* var opts = { +* 'seed': 1234, +* } +* +* dkmeansInitPlusPlus( M, N, k, 3, 'sqeuclidean', xbuf, 2, 1, 0, out, 2, 1, 0, W1, 1, 0, W2, 1, 0, opts ); +* // out => [ 0.0, 0.0, 1.0, 1.0, -1.0, 1.0 ] +*/ +function dkmeansInitPlusPlus( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1, so2, oo, W1, sw1, ow1, W2, sw2, ow2, options ) { // eslint-disable-line max-len, max-params + var centroids; // array of indices + var randi; + var w1idx; + var w2idx; + var rand; + var csum; + var bsum; + var dist; + var xidx; + var cidx; + var oidx; + var opr; + var odh; + var s2; + var d2; + var bc; + var d; + var c; + var i; + var j; + var t; + var r; + + if ( arguments.length > 19 ) { + randi = discreteUniform( options ); + rand = uniform( 0.0, 1.0-FLOAT64_EPS, options ); + } else { + randi = discreteUniform(); + rand = uniform( 0.0, 1.0-FLOAT64_EPS ); + } + w2idx = ow2; + centroids = accessors( W2 ); + + // Resolve workspace offsets for the squared distances and cumulative probabilities. The workspace `W1` interleaves the two values for each data point, such that, for data point `i`, the squared distance resides at strided index `2*i` and the cumulative probability at strided index `2*i+1`... + odh = ow1; + opr = ow1 + sw1; + s2 = 2 * sw1; // stride between consecutive data points within `W1` + + // Initialize the squared distances to positive infinity... + w1idx = odh; + for ( i = 0; i < M; i++ ) { + W1[ w1idx ] = PINF; + w1idx += s2; + } + + // 1. Select a data point at random for the first centroid... + c = randi( 0, M-1 ); + if ( k === 1 ) { + // For the trivial case of one centroid, we are done which means we can skip to setting the output centroid data... + return dcopy( N, X, sx2, ox + ( sx1*c ), out, so2, oo ); + } + centroids.accessors[ 1 ]( W2, w2idx, c ); // set first centroid `c`. + w2idx += sw2; + + if ( metric === 'sqeuclidean' ) { + dist = dsquaredEuclidean; + } else if ( metric === 'cosine' ) { + dist = dcosine; + } else if ( metric === 'cityblock' ) { + dist = dcityblock; + } else { + dist = dcorrelation; + } + + // 2-5. For each data point, compute the distances to each centroid, find the closest centroid, and, based on the distance to the closest centroid, assign a probability to the data point to be chosen as centroid `c_j`... + for ( i = 1; i < k; i++ ) { + csum = 0.0; + xidx = ox; + c = centroids.accessors[ 0 ]( W2, w2idx - sw2 ); // get the recently added centroid... + cidx = ox + ( sx1*c ); + w1idx = odh; + for ( j = 0; j < M; j++ ) { + d2 = dist( N, X, sx2, xidx, X, sx2, cidx ); + if ( d2 < W1[ w1idx ] ) { + W1[ w1idx ] = d2; + csum += d2; + } else { + csum += W1[ w1idx ]; + } + xidx += sx1; + w1idx += s2; + } + + if ( csum === 0.0 ) { + centroids.accessors[ 1 ]( W2, w2idx, randi( 0, M-1 ) ); + w2idx += sw2; + continue; + } + + // Compute the cumulative probabilities... + w1idx = opr; + W1[ w1idx ] = W1[ odh ] / csum; + for ( j = 1; j < M; j++ ) { + w1idx += s2; + W1[ w1idx ] = W1[ w1idx-s2 ] + ( W1[ odh + (j*s2) ] / csum ); + } + // Based Arthur's and Vassilvitskii's paper "kmeans++: The Advantages of Careful Seeding" (see conclusion), randomly select candidate centroids and pick the candidate which minimizes the total squared distance... + bsum = PINF; // best sum + bc = -1; // best candidate + for ( t = 0; t < trials; t++ ) { + // Use rejection sampling to handle edge case where the total cumulative probability does not equal unity due to accumulated floating-point errors and is less than `r` (*very* rarely should this require more than one iteration)... + c = -1; + + // Note: the following should never choose an already chosen centroid (why? because a centroid's minimum squared distance is `0`, which means it will either correspond to a cumulative probability of `0` or will correspond to a cumulative probability equal to the previous cumulative probability, thus leading to the equivalent of a no-op iteration) + while ( c === -1 ) { + r = rand(); // Note: `r` exists on the interval `[0,1)` + for ( j = 0; j < M; j++ ) { + if ( r < W1[ opr + (j*s2) ] ) { + c = j; + break; + } + } + } + // Compute the sum of squared distances were we to include the candidate centroid... + csum = 0.0; + cidx = ox + ( sx1 * c ); + w1idx = odh; + for ( j = 0; j < M; j++ ) { + d = dist( N, X, sx2, ox + ( sx1*j ), X, sx2, cidx ); + if ( d < W1[ w1idx ] ) { + csum += d; + } else { + csum += W1[ w1idx ]; + } + w1idx += s2; + } + // Determine if the candidate is the best candidate we have seen thus far... + if ( csum < bsum ) { + bsum = csum; + bc = c; + } + } + // Push the "best" candidate to our list of centroids: + centroids.accessors[ 1 ]( W2, w2idx, bc ); + w2idx += sw2; + } + // 6. Set centroid data... + oidx = oo; + for ( i = 0; i < k; i++ ) { + // Note: the following is likely to be an "out-of-order" copy... + c = centroids.accessors[ 0 ]( W2, ow2 + ( i*sw2 ) ); + dcopy( N, X, sx2, ox + ( sx1*c ), out, so2, oidx ); + oidx += so1; + } + return out; +} + + +// EXPORTS // + +module.exports = dkmeansInitPlusPlus; diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js index ac2bab979598..6b6c8f294d6d 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js @@ -20,10 +20,13 @@ // MODULES // -var isLayout = require( '@stdlib/blas/base/assert/is-layout' ); -var isRowMajor = require( '@stdlib/ndarray/base/assert/is-row-major-string' ); var isColumnMajor = require( '@stdlib/ndarray/base/assert/is-column-major-string' ); -var max = require( '@stdlib/math/base/special/fast/max' ); +var resolveMetricStr = require( '@stdlib/ml/base/kmeans/metric-resolve-str' ); +var isRowMajor = require( '@stdlib/ndarray/base/assert/is-row-major-string' ); +var isPositiveInteger = require( '@stdlib/assert/is-positive-integer' ); +var stride2offset = require( '@stdlib/strided/base/stride2offset' ); +var isLayout = require( '@stdlib/blas/base/assert/is-layout' ); +var max = require( '@stdlib/math/base/special/max' ); var format = require( '@stdlib/string/format' ); var ndarray = require( './ndarray.js' ); @@ -33,67 +36,43 @@ var ndarray = require( './ndarray.js' ); /** * Initializes centroids by performing the k-means++ initialization procedure on double-precision floating-point data points. * -* ## Method -* -* The k-means++ algorithm for choosing initial centroids is as follows: -* -* 1. Select a data point uniformly at random from a data set \\( X \\). This data point is first centroid and denoted \\( c_0 \\). -* -* 2. Compute the distance from each data point to \\( c_0 \\). Denote the distance between \\( c_j \\) and data point \\( m \\) as \\( d(x_m, c_j) \\). -* -* 3. Select the next centroid, \\( c_1 \\), at random from \\( X \\) with probability -* -* ```tex -* \frac{d^2(x_m, c_0)}{\sum_{j=0}^{n-1} d^2(x_j, c_0)} -* ``` -* -* where \\( n \\) is the number of data points. -* -* 4. To choose centroid \\( j \\), -* -* a. Compute the distances from each data point to each centroid and assign each data point to its closest centroid. -* -* b. For \\( i = 0,\ldots,n-1 \\) and \\( p = 0,\ldots,j-2 \\), select centroid \\( j \\) at random from \\( X \\) with probability -* -* ```tex -* \frac{d^2(x_i, c_p)}{\sum_{\{h; x_h \exits C_p\}} d^2(x_h, c_p)} -* ``` -* -* where \\( C_p \\) is the set of all data points closest to centroid \\( c_p \\) and \\( x_i \\) belongs to \\( c_p \\). -* -* Stated more plainly, select each subsequent centroid with a probability proportional to the distance from the centroid to the closest centroid already chosen. -* -* 5. Repeat step `4` until \\( k \\) centroids have been chosen. -* -* ## References -* -* - Arthur, David, and Sergei Vassilvitskii. 2007. "K-means++: The Advantages of Careful Seeding." In _Proceedings of the Eighteenth Annual Acm-Siam Symposium on Discrete Algorithms_, 1027–35. SODA '07. Philadelphia, PA, USA: Society for Industrial and Applied Mathematics. . -* * @param {string} order - storage layout -* @param {PositiveInteger} k - number of clusters * @param {PositiveInteger} M - number of data points * @param {PositiveInteger} N - number of features -* @param {Float64Array} out - input array -* @param {integer} LDO - stride of the first dimension of `out` (a.k.a., leading dimension of the matrix `out`) +* @param {PositiveInteger} k - number of clusters +* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) +* @param {string} metric - distance metric * @param {Float64Array} X - input array * @param {integer} LDX - stride of the first dimension of `x` (a.k.a., leading dimension of the matrix `x`) -* @param {string} metric - distance metric -* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) -* @param {*} seed - PRNG seed -* @throws {TypeError} first argument must be a valid order -* @throws {TypeError} tenth argument must be a valid trials (>=1) +* @param {Float64Array} out - output array +* @param {integer} LDO - stride of the first dimension of `out` (a.k.a., leading dimension of the matrix `out`) +* @param {Float64Array} W1 - first workspace array of size `2*M` for tracking squared distances and probabilities +* @param {integer} sw1 - stride length of `W1` +* @param {Float64Array} W2 - second workspace array for tracking centroid candidates +* @param {integer} sw2 - stride length of `W2` +* @param {Options} [options] - function options +* @param {PRNG} [options.prng] - pseudorandom number generator which generates uniformly distributed pseudorandom numbers +* @param {PRNGSeedMT19937} [options.seed] - pseudorandom number generator seed +* @param {PRNGStateMT19937} [options.state] - pseudorandom number generator state +* @param {boolean} [options.copy=true] - boolean indicating whether to copy a provided pseudorandom number generator state +* @throws {RangeError} first argument must be greater than or equal to third argument +* @throws {RangeError} fourth argument must be a positive integer +* @throws {TypeError} fifth argument must be a valid supported metric * @throws {RangeError} sixth argument must be greater than or equal to max(1,N) * @throws {RangeError} eighth argument must be greater than or equal to max(1,N) * @returns {Float64Array} centroids * * @example * var Float64Array = require( '@stdlib/array/float64' ); +* var Int32Array = require( '@stdlib/array/int32' ); * * var k = 3; * var M = 5; * var N = 2; * * var out = new Float64Array( k*N ); +* var W1 = new Float64Array( 2*M ); +* var W2 = new Int32Array( k ); * * // Specify data points: * var xbuf = new Float64Array([ @@ -104,10 +83,14 @@ var ndarray = require( './ndarray.js' ); * -1.0, 1.0 * ]); * -* var v = dkmeansInitPlusPlus( 'row-major', k, M, N, out, 2, xbuf, 2, 'sqeuclidean', 3, 44 ); -* // returns [0,0,1,-1,1,1] +* var opts = { +* 'seed': 1234, +* } +* +* dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', xbuf, 2, out, 2, W1, 1, W2, 1, opts ); +* // out => [ 0.0, 0.0, 1.0, 1.0, -1.0, 1.0 ] */ -function dkmeansInitPlusPlus( order, k, M, N, out, LDO, X, LDX, metric, trials, seed ) { // eslint-disable-line max-len, max-params +function dkmeansInitPlusPlus( order, M, N, k, trials, metric, X, LDX, out, LDO, W1, sw1, W2, sw2, options ) { // eslint-disable-line max-len, max-params var so1; var so2; var sx1; @@ -115,12 +98,21 @@ function dkmeansInitPlusPlus( order, k, M, N, out, LDO, X, LDX, metric, trials, var so; var sx; + if ( k > M ) { + throw new RangeError( format( 'invalid argument. First argument `M` must be greater than or equal to third argument `k`. Value: `M=%d, k=%d`.', M, k ) ); + } + if ( !isPositiveInteger( trials ) ) { + throw new RangeError( format( 'invalid argument. Fourth argument must be a be a positive integer. Value: `%d`.', trials ) ); + } + if ( resolveMetricStr( metric ) === null ) { + throw new TypeError( format( 'invalid argument. Fifth argument must be a valid supported metric. Value: `%s`.', metric ) ); + } + if ( k < 1 || M < 1 || N < 1 ) { + return out; + } if ( !isLayout( order ) ) { throw new TypeError( format( 'invalid argument. First argument must be a valid order. Value: `%s`.', order ) ); } - if ( trials < 1 ) { - throw new TypeError( format( 'invalid argument. Tenth argument must be a valid trials (>=1). Value: `%s`.', trials ) ); - } if ( isRowMajor( order ) ) { so = N; sx = N; @@ -129,10 +121,10 @@ function dkmeansInitPlusPlus( order, k, M, N, out, LDO, X, LDX, metric, trials, sx = M; } if ( LDO < max( 1, so ) ) { - throw new RangeError( format( 'invalid argument. Fifth argument must be greater than or equal to max(1,%d). Value: `%d`.', so, LDO ) ); + throw new RangeError( format( 'invalid argument. Sixth argument must be greater than or equal to max(1,%d). Value: `%d`.', so, LDO ) ); } if ( LDX < max( 1, sx ) ) { - throw new RangeError( format( 'invalid argument. Fifth argument must be greater than or equal to max(1,%d). Value: `%d`.', sx, LDO ) ); + throw new RangeError( format( 'invalid argument. Eighth argument must be greater than or equal to max(1,%d). Value: `%d`.', sx, LDX ) ); } if ( isColumnMajor( order ) ) { so1 = 1; @@ -147,7 +139,7 @@ function dkmeansInitPlusPlus( order, k, M, N, out, LDO, X, LDX, metric, trials, sx1 = LDX; sx2 = 1; } - return ndarray( k, M, N, out, so1, so2, 0, X, sx1, sx2, 0, metric, trials, seed ); // eslint-disable-line max-len + return ndarray( M, N, k, trials, metric, X, sx1, sx2, 0, out, so1, so2, 0, W1, sw1, stride2offset( 2*M, sw1 ), W2, sw2, stride2offset( k, sw2 ), options ); // eslint-disable-line max-len } diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js index 19f4f3d116ca..b920717626fd 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js @@ -19,12 +19,13 @@ 'use strict'; /** -* Initialize centroids by performing the k-means++ initialization procedure on double-precision floating-point data points using alternative indexing semantics. +* Initialize centroids by performing the k-means++ initialization procedure on double-precision floating-point data points. * * @module @stdlib/ml/strided/dkmeans-init-plus-plus * * @example * var Float64Array = require( '@stdlib/array/float64' ); +* var Int32Array = require( '@stdlib/array/int32' ); * var dkmeansInitPlusPlus = require( '@stdlib/ml/strided/dkmeans-init-plus-plus' ); * * var k = 3; @@ -32,6 +33,8 @@ * var N = 2; * * var out = new Float64Array( k*N ); +* var W1 = new Float64Array( 2*M ); +* var W2 = new Int32Array( k ); * * // Specify data points: * var xbuf = new Float64Array([ @@ -42,11 +45,16 @@ * -1.0, 1.0 * ]); * -* var v = dkmeansInitPlusPlus( 'row-major', k, M, N, out, 2, xbuf, 2, 'sqeuclidean', 3, 44 ); -* // returns [0,0,1,-1,1,1] +* var opts = { +* 'seed': 1234, +* } +* +* dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', xbuf, 2, out, 2, W1, 1, W2, 1, opts ); +* // out => [ 0.0, 0.0, 1.0, 1.0, -1.0, 1.0 ] * * @example * var Float64Array = require( '@stdlib/array/float64' ); +* var Int32Array = require( '@stdlib/array/int32' ); * var dkmeansInitPlusPlus = require( '@stdlib/ml/strided/dkmeans-init-plus-plus' ); * * var k = 3; @@ -54,6 +62,8 @@ * var N = 2; * * var out = new Float64Array( k*N ); +* var W1 = new Float64Array( 2*M ); +* var W2 = new Int32Array( k ); * * // Specify data points: * var xbuf = new Float64Array([ @@ -64,8 +74,12 @@ * -1.0, 1.0 * ]); * -* var v = dkmeansInitPlusPlus.ndarray( k, M, N, out, 2, 1, 0, xbuf, 2, 1, 0, 'sqeuclidean', 3, 44 ); -* // returns [0,0,1,-1,1,1] +* var opts = { +* 'seed': 1234, +* } +* +* dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', xbuf, 2, 1, 0, out, 2, 1, 0, W1, 1, 0, W2, 1, 0, opts ); +* // out => [ 0.0, 0.0, 1.0, 1.0, -1.0, 1.0 ] */ // MODULES // diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js index bb1ff8d31539..8b0c1ff299ba 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js @@ -20,16 +20,10 @@ // MODULES // -var randint = require( '@stdlib/random/base/discrete-uniform' ).factory; -var randu = require( '@stdlib/random/base/mt19937' ).factory; -var dcopy = require( '@stdlib/blas/base/dcopy' ).ndarray; -var Float64Array = require( '@stdlib/array/float64' ); -var PINF = require( '@stdlib/constants/float64/pinf' ); var format = require( '@stdlib/string/format' ); -var dsquaredEuclidean = require( '@stdlib/stats/strided/distances/dsquared-euclidean' ).ndarray; -var dcosine = require( '@stdlib/stats/strided/distances/dcosine-distance' ).ndarray; -var dcityblock = require( '@stdlib/stats/strided/distances/dcityblock' ).ndarray; -var dcorrelation = require( '@stdlib/stats/strided/distances/dcorrelation' ).ndarray; +var isPositiveInteger = require( '@stdlib/assert/is-positive-integer' ); +var resolveMetricStr = require( '@stdlib/ml/base/kmeans/metric-resolve-str' ); +var base = require( './base.js' ); // MAIN // @@ -37,67 +31,46 @@ var dcorrelation = require( '@stdlib/stats/strided/distances/dcorrelation' ).nda /** * Initializes centroids by performing the k-means++ initialization procedure on double-precision floating-point data points using alternative indexing semantics. * -* ## Method -* -* The k-means++ algorithm for choosing initial centroids is as follows: -* -* 1. Select a data point uniformly at random from a data set \\( X \\). This data point is first centroid and denoted \\( c_0 \\). -* -* 2. Compute the distance from each data point to \\( c_0 \\). Denote the distance between \\( c_j \\) and data point \\( m \\) as \\( d(x_m, c_j) \\). -* -* 3. Select the next centroid, \\( c_1 \\), at random from \\( X \\) with probability -* -* ```tex -* \frac{d^2(x_m, c_0)}{\sum_{j=0}^{n-1} d^2(x_j, c_0)} -* ``` -* -* where \\( n \\) is the number of data points. -* -* 4. To choose centroid \\( j \\), -* -* a. Compute the distances from each data point to each centroid and assign each data point to its closest centroid. -* -* b. For \\( i = 0,\ldots,n-1 \\) and \\( p = 0,\ldots,j-2 \\), select centroid \\( j \\) at random from \\( X \\) with probability -* -* ```tex -* \frac{d^2(x_i, c_p)}{\sum_{\{h; x_h \exits C_p\}} d^2(x_h, c_p)} -* ``` -* -* where \\( C_p \\) is the set of all data points closest to centroid \\( c_p \\) and \\( x_i \\) belongs to \\( c_p \\). -* -* Stated more plainly, select each subsequent centroid with a probability proportional to the distance from the centroid to the closest centroid already chosen. -* -* 5. Repeat step `4` until \\( k \\) centroids have been chosen. -* -* ## References -* -* - Arthur, David, and Sergei Vassilvitskii. 2007. "K-means++: The Advantages of Careful Seeding." In _Proceedings of the Eighteenth Annual Acm-Siam Symposium on Discrete Algorithms_, 1027–35. SODA '07. Philadelphia, PA, USA: Society for Industrial and Applied Mathematics. . -* -* @param {PositiveInteger} k - number of clusters * @param {PositiveInteger} M - number of data points * @param {PositiveInteger} N - number of features -* @param {Float64Array} out - input array -* @param {integer} so1 - first stride length of `out` -* @param {integer} so2 - second stride length of `out` -* @param {NonNegativeInteger} oo - starting index of `out` +* @param {PositiveInteger} k - number of clusters +* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) +* @param {string} metric - distance metric * @param {Float64Array} X - input array * @param {integer} sx1 - first stride length of `X` * @param {integer} sx2 - second stride length of `X` * @param {NonNegativeInteger} ox - starting index of `X` -* @param {string} metric - distance metric -* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) -* @param {*} seed - PRNG seed -* @throws {TypeError} tenth argument must be a valid trials (>=1) +* @param {Float64Array} out - output array +* @param {integer} so1 - first stride length of `out` +* @param {integer} so2 - second stride length of `out` +* @param {NonNegativeInteger} oo - starting index of `out` +* @param {Float64Array} W1 - first workspace array of size `2*M` for tracking squared distances and probabilities +* @param {integer} sw1 - stride length of `W1` +* @param {NonNegativeInteger} ow1 - starting index of `W1` +* @param {Float64Array} W2 - second workspace array for tracking centroid candidates +* @param {integer} sw2 - stride length of `W2` +* @param {NonNegativeInteger} ow2 - starting index of `W2` +* @param {Options} [options] - function options +* @param {PRNG} [options.prng] - pseudorandom number generator which generates uniformly distributed pseudorandom numbers +* @param {PRNGSeedMT19937} [options.seed] - pseudorandom number generator seed +* @param {PRNGStateMT19937} [options.state] - pseudorandom number generator state +* @param {boolean} [options.copy=true] - boolean indicating whether to copy a provided pseudorandom number generator state +* @throws {RangeError} first argument must be greater than or equal to third argument +* @throws {RangeError} fourth argument must be a positive integer +* @throws {TypeError} fifth argument must be a valid supported metric * @returns {Float64Array} centroids * * @example * var Float64Array = require( '@stdlib/array/float64' ); +* var Int32Array = require( '@stdlib/array/int32' ); * * var k = 3; * var M = 5; * var N = 2; * * var out = new Float64Array( k*N ); +* var W1 = new Float64Array( 2*M ); +* var W2 = new Int32Array( k ); * * // Specify data points: * var xbuf = new Float64Array([ @@ -108,142 +81,27 @@ var dcorrelation = require( '@stdlib/stats/strided/distances/dcorrelation' ).nda * -1.0, 1.0 * ]); * -* var v = dkmeansInitPlusPlus( k, M, N, out, 2, 1, 0, xbuf, 2, 1, 0, 'sqeuclidean', 3, 44 ); -* // returns [0,0,1,-1,1,1] +* var opts = { +* 'seed': 1234, +* } +* +* dkmeansInitPlusPlus( M, N, k, 3, 'sqeuclidean', xbuf, 2, 1, 0, out, 2, 1, 0, W1, 1, 0, W2, 1, 0, opts ); +* // out => [ 0.0, 0.0, 1.0, 1.0, -1.0, 1.0 ] */ -function dkmeansInitPlusPlus( k, M, N, out, so1, so2, oo, X, sx1, sx2, ox, metric, trials, seed ) { // eslint-disable-line max-len, max-params - var centroids; // array of indices - var dhash; - var randi; - var probs; - var rand; - var csum; - var bsum; - var dist; - var xidx; - var cidx; - var oidx; - var d2; - var bc; - var d; - var c; - var i; - var j; - var t; - var r; - - if ( trials < 1 ) { - throw new TypeError( format( 'invalid argument. Thirteenth argument must be a valid trials (>=1). Value: `%s`.', trials ) ); - } - if ( k < 1 || M < 1 || N < 1) { - return NaN; +function dkmeansInitPlusPlus( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1, so2, oo, W1, sw1, ow1, W2, sw2, ow2, options ) { // eslint-disable-line max-len, max-params + if ( k > M ) { + throw new RangeError( format( 'invalid argument. First argument `M` must be greater than or equal to third argument `k`. Value: `M=%d, k=%d`.', M, k ) ); } - - // Create seeded PRNGs: - rand = randu({ - 'seed': seed - }); - randi = randint({ - 'seed': rand() - }); - rand = rand.normalized; - - // 1. Select a data point at random for the first centroid... - c = randi( 0, M-1 ); - if ( k === 1 ) { - // For the trivial case of one centroid, we are done which means we can skip to setting the output centroid data... - return dcopy( N, X, sx2, ox + ( sx1*c ), out, so2, oo ); - } - centroids = [ c ]; - - if ( metric === 'sqeuclidean' ) { - dist = dsquaredEuclidean; - } else if ( metric === 'cosine' ) { - dist = dcosine; - } else if ( metric === 'cityblock' ) { - dist = dcityblock; - } else { - dist = dcorrelation; - } - - dhash = new Float64Array( M ); - for ( i = 0; i < M; i++ ) { - dhash[ i ] = PINF; // squared distance + if ( !isPositiveInteger( trials ) ) { + throw new RangeError( format( 'invalid argument. Fourth argument must be a be a positive integer. Value: `%d`.', trials ) ); } - // Create a scratch array for storing cumulative probabilities: - probs = new Float64Array( M ); - - // 2-5. For each data point, compute the distances to each centroid, find the closest centroid, and, based on the distance to the closest centroid, assign a probability to the data point to be chosen as centroid `c_j`... - for ( i = 1; i < k; i++ ) { - csum = 0.0; - xidx = ox; - cidx = ox + ( sx1*centroids[ i-1 ] ); - for ( j = 0; j < M; j++ ) { - d2 = dist( N, X, sx2, xidx, X, sx2, cidx ); - if ( d2 < dhash[ j ] ) { - dhash[ j ] = d2; - csum += d2; - } else { - csum += dhash[ j ]; - } - xidx += sx1; - } - - if ( csum === 0.0 ) { - centroids.push( randi( 0, M-1 ) ); - continue; - } - - // Compute the cumulative probabilities... - probs[ 0 ] = dhash[ 0 ] / csum; - for ( j = 1; j < M; j++ ) { - probs[ j ] = probs[ j-1 ] + ( dhash[ j ] / csum ); - } - // Based Arthur's and Vassilvitskii's paper "kmeans++: The Advantages of Careful Seeding" (see conclusion), randomly select candidate centroids and pick the candidate which minimizes the total squared distance... - bsum = PINF; // best sum - bc = -1; // best candidate - for ( t = 0; t < trials; t++ ) { - // Use rejection sampling to handle edge case where the total cumulative probability does not equal unity due to accumulated floating-point errors and is less than `r` (*very* rarely should this require more than one iteration)... - c = -1; - - // Note: the following should never choose an already chosen centroid (why? because a centroid's minimum squared distance is `0`, which means it will either correspond to a cumulative probability of `0` or will correspond to a cumulative probability equal to the previous cumulative probability, thus leading to the equivalent of a no-op iteration) - while ( c === -1 ) { - r = rand(); // Note: `r` exists on the interval `[0,1)` - for ( j = 0; j < M; j++ ) { - if ( r < probs[ j ] ) { - c = j; - break; - } - } - } - // Compute the sum of squared distances were we to include the candidate centroid... - csum = 0.0; - cidx = ox + ( sx1 * c ); - for ( j = 0; j < M; j++ ) { - d = dist( N, X, sx2, ox + ( sx1*j ), X, sx2, cidx ); - if ( d < dhash[ j ] ) { - csum += d; - } else { - csum += dhash[ j ]; - } - } - // Determine if the candidate is the best candidate we have seen thus far... - if ( csum < bsum ) { - bsum = csum; - bc = c; - } - } - // Push the "best" candidate to our list of centroids: - centroids.push( bc ); + if ( resolveMetricStr( metric ) === null ) { + throw new TypeError( format( 'invalid argument. Fifth argument must be a valid supported metric. Value: `%s`.', metric ) ); } - // 6. Set centroid data... - oidx = oo; - for ( i = 0; i < k; i++ ) { - // Note: the following is likely to be an "out-of-order" copy... - dcopy( N, X, sx2, ox + ( sx1*centroids[i] ), out, so2, oidx ); - oidx += so1; + if ( k < 1 || M < 1 || N < 1 ) { + return out; } - return out; + return base( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1, so2, oo, W1, sw1, ow1, W2, sw2, ow2, options ); // eslint-disable-line max-len } From e348fd77e39a6f9f3ac14ec6e54e94b23e0ca668 Mon Sep 17 00:00:00 2001 From: nakul-krishnakumar Date: Thu, 4 Jun 2026 02:47:58 +0530 Subject: [PATCH 7/7] feat: add `ml/strided/dkmeans-init-plus-plus` --- .../strided/dkmeans-init-plus-plus/README.md | 394 ++++++++++++++++ .../benchmark/benchmark.js | 54 ++- .../benchmark/benchmark.ndarray.js | 39 +- .../dkmeans-init-plus-plus/docs/repl.txt | 200 ++++++++ .../docs/types/index.d.ts | 240 ++++++++++ .../dkmeans-init-plus-plus/docs/types/test.ts | 430 ++++++++++++++++++ .../dkmeans-init-plus-plus/examples/index.js | 58 +++ .../dkmeans-init-plus-plus/lib/base.js | 45 +- .../lib/dkmeans_init_plus_plus.js | 148 ------ .../dkmeans-init-plus-plus/lib/index.js | 26 +- .../dkmeans-init-plus-plus/lib/main.js | 122 ++++- .../dkmeans-init-plus-plus/lib/ndarray.js | 22 +- .../dkmeans-init-plus-plus/package.json | 69 +++ .../test/fixtures/column_major.json | 26 +- .../fixtures/large-strides/column_major.json | 20 +- .../fixtures/large-strides/row_major.json | 24 +- .../fixtures/mixed-strides/column_major.json | 20 +- .../fixtures/mixed-strides/row_major.json | 22 +- .../negative-strides/column_major.json | 20 +- .../fixtures/negative-strides/row_major.json | 22 +- .../test/fixtures/offsets/column_major.json | 20 +- .../test/fixtures/offsets/row_major.json | 22 +- .../test/fixtures/row_major.json | 26 +- .../test/test.dkmeans_init_plus_plus.js | 160 +++++-- .../test/test.ndarray.js | 192 +++++++- 25 files changed, 2118 insertions(+), 303 deletions(-) create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/README.md create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/docs/repl.txt create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/docs/types/index.d.ts create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/docs/types/test.ts create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/examples/index.js delete mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js create mode 100644 lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/package.json diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/README.md b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/README.md new file mode 100644 index 000000000000..28cb86bc32d3 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/README.md @@ -0,0 +1,394 @@ + + +# dkmeansInitPlusPlus + +> Initializes centroids by performing the [k-means++][kmeans-plus-plus] initialization procedure on double-precision floating-point data points. + + + +
+ +
+ + + + + +
+ +## Usage + +```javascript +var dkmeansInitPlusPlus = require( '@stdlib/ml/strided/dkmeans-init-plus-plus' ); +``` + + + +#### dkmeansInitPlusPlus( order, M, N, k, trials, metric, X, LDX, out, LDO, W1, sw1, W2, sw2\[, options] ) + + + +Initializes centroids by performing the [k-means++][kmeans-plus-plus] initialization procedure on double-precision floating-point data points. + + + +```javascript +var Float64Array = require( '@stdlib/array/float64' ); +var Int32Array = require( '@stdlib/array/int32' ); + +var k = 3; +var M = 5; +var N = 2; + +// Allocate an output matrix for the centroids: +var out = new Float64Array( k*N ); +var W1 = new Float64Array( 2*M ); +var W2 = new Int32Array( k ); + +/* + X = [ + [ 0.0, 0.0 ], + [ 1.0, 1.0 ], + [ 1.0, -1.0 ], + [ -1.0, -1.0 ], + [ -1.0, 1.0 ] + ] +*/ +var X = new Float64Array( [ 0.0, 0.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0 ] ); + +var centroids = dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', X, N, out, N, W1, 1, W2, 1 ); +``` + +The function has the following parameters: + +- **order**: storage layout. +- **M**: number of data points. +- **N**: number of features. +- **k**: number of clusters (i.e., the number of centroids to initialize). +- **trials**: number of candidate centroids to sample per iteration (must be `>= 1`). +- **metric**: distance metric. Must be one of the following: `'sqeuclidean'`, `'cosine'`, `'cityblock'`, or `'correlation'`. +- **X**: input matrix stored as a [`Float64Array`][@stdlib/array/float64]. +- **LDX**: stride of the first dimension of `X` (a.k.a., leading dimension of the matrix `X`). +- **out**: output matrix stored as a [`Float64Array`][@stdlib/array/float64]. +- **LDO**: stride of the first dimension of `out` (a.k.a., leading dimension of the matrix `out`). +- **W1**: first workspace array of size `2*M` for tracking squared distances and probabilities. +- **sw1**: stride length of `W1`. +- **W2**: second workspace array for tracking centroid candidates. +- **sw2**: stride length of `W2`. + +The output matrix `out` has shape `(k, N)`, with one initialized centroid per row, and the input matrix `X` has shape `(M, N)`, with one data point per row. + +```javascript +var Float64Array = require( '@stdlib/array/float64' ); +var Int32Array = require( '@stdlib/array/int32' ); + +var k = 2; +var M = 3; +var N = 2; + +// Allocate an output matrix for the centroids: +var out = new Float64Array( k*N ); +var W1 = new Float64Array( 2*M ); +var W2 = new Int32Array( k ); + +/* + X = [ + [ 0.0, 0.0 ], + [ 1.0, 3.0 ], + [ 2.0, 4.0 ], + ] +*/ +var X = new Float64Array( [ 0.0, 0.0, 1.0, 3.0, 2.0, 4.0 ] ); + +var centroids = dkmeansInitPlusPlus( 'column-major', M, N, k, 3, 'sqeuclidean', X, M, out, k, W1, 1, W2, 1 ); +``` + +Note that indexing is relative to the first index. To introduce an offset, use [`typed array`][mdn-typed-array] views. + + + +```javascript +var Float64Array = require( '@stdlib/array/float64' ); +var Int32Array = require( '@stdlib/array/int32' ); + +var k = 3; +var M = 5; +var N = 2; + +// Allocate an output matrix for the centroids: +var out = new Float64Array( k*N ); +var W1 = new Float64Array( 2*M ); +var W2 = new Int32Array( k ); + +/* + X = [ + [ 0.0, 0.0 ], + [ 1.0, 1.0 ], + [ 1.0, -1.0 ], + [ -1.0, -1.0 ], + [ -1.0, 1.0 ] + ] +*/ +var X = new Float64Array( [ 0.0, 0.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 2.0 ] ); + +var X1 = new Float64Array( X.buffer, X.BYTES_PER_ELEMENT*1 ); // start at 2nd element + +var centroids = dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', X1, N, out, N, W1, 1, W2, 1 ); +``` + +The function accepts the following `options`: + +- **prng**: pseudorandom number generator for generating uniformly distributed pseudorandom numbers. If provided, the function **ignores** both the `state` and `seed` options. In order to seed the underlying pseudorandom number generator, one must seed the provided `prng` (assuming the provided `prng` is seedable). +- **seed**: pseudorandom number generator seed. +- **state**: a [`Uint32Array`][@stdlib/array/uint32] containing pseudorandom number generator state. If provided, the function ignores the `seed` option. +- **copy**: `boolean` indicating whether to copy a provided pseudorandom number generator state. Setting this option to `false` allows sharing state between two or more pseudorandom number generators. Setting this option to `true` ensures that an underlying generator has exclusive control over its internal state. Default: `true`. + +To use a custom PRNG as the underlying source of uniformly distributed pseudorandom numbers, set the `prng` option. + + + +```javascript +var Float64Array = require( '@stdlib/array/float64' ); +var Int32Array = require( '@stdlib/array/int32' ); +var minstd = require( '@stdlib/random/base/minstd' ); + +var k = 3; +var M = 5; +var N = 2; + +// Allocate an output matrix for the centroids: +var out = new Float64Array( k*N ); +var W1 = new Float64Array( 2*M ); +var W2 = new Int32Array( k ); + +/* + X = [ + [ 0.0, 0.0 ], + [ 1.0, 1.0 ], + [ 1.0, -1.0 ], + [ -1.0, -1.0 ], + [ -1.0, 1.0 ] + ] +*/ +var X = new Float64Array( [ 0.0, 0.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0 ] ); + +var opts = { + 'prng': minstd +}; + +var centroids = dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', X, N, out, N, W1, 1, W2, 1, opts ); +``` + +To seed the underlying pseudorandom number generator, set the `seed` option. + + + +```javascript +var Float64Array = require( '@stdlib/array/float64' ); +var Int32Array = require( '@stdlib/array/int32' ); + +var k = 3; +var M = 5; +var N = 2; + +// Allocate an output matrix for the centroids: +var out = new Float64Array( k*N ); +var W1 = new Float64Array( 2*M ); +var W2 = new Int32Array( k ); + +/* + X = [ + [ 0.0, 0.0 ], + [ 1.0, 1.0 ], + [ 1.0, -1.0 ], + [ -1.0, -1.0 ], + [ -1.0, 1.0 ] + ] +*/ +var X = new Float64Array( [ 0.0, 0.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0 ] ); + +var opts = { + 'seed': 12345 +}; + +var centroids = dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', X, N, out, N, W1, 1, W2, 1, opts ); +``` + + + +#### dkmeansInitPlusPlus.ndarray( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1, so2, oo, W1, sw1, ow1, W2, sw2, ow2\[, options] ) + + + +Initializes centroids by performing the [k-means++][kmeans-plus-plus] initialization procedure on double-precision floating-point data points using alternative indexing semantics. + +```javascript +var Float64Array = require( '@stdlib/array/float64' ); +var Int32Array = require( '@stdlib/array/int32' ); + +var k = 3; +var M = 5; +var N = 2; + +// Allocate an output matrix for the centroids: +var out = new Float64Array( k*N ); +var W1 = new Float64Array( 2*M ); +var W2 = new Int32Array( k ); + +/* + X = [ + [ 0.0, 0.0 ], + [ 1.0, 1.0 ], + [ 1.0, -1.0 ], + [ -1.0, -1.0 ], + [ -1.0, 1.0 ] + ] +*/ +var X = new Float64Array( [ 0.0, 0.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0 ] ); + +var centroids = dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', X, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0 ); +``` + +The function has the following parameters: + +- **M**: number of data points. +- **N**: number of features. +- **k**: number of clusters (i.e., the number of centroids to initialize). +- **trials**: number of candidate centroids to sample per iteration (must be `>= 1`). +- **metric**: distance metric. Must be one of the following: `'sqeuclidean'`, `'cosine'`, `'cityblock'`, or `'correlation'`. +- **X**: input matrix stored as a [`Float64Array`][@stdlib/array/float64]. +- **sx1**: stride of the first dimension of `X`. +- **sx2**: stride of the second dimension of `X`. +- **ox**: starting index for `X`. +- **out**: output matrix stored as a [`Float64Array`][@stdlib/array/float64]. +- **so1**: stride of the first dimension of `out`. +- **so2**: stride of the second dimension of `out`. +- **oo**: starting index for `out`. +- **W1**: first workspace array of size `2*M` for tracking squared distances and probabilities. +- **sw1**: stride length of `W1`. +- **ow1**: starting index for `W1`. +- **W2**: second workspace array for tracking centroid candidates. +- **sw2**: stride length of `W2`. +- **ow2**: starting index for `W2`. + +
+ + + + + +
+ +## Notes + +- The k-means++ procedure is stochastic; providing the same `seed` yields reproducible centroids. +- Increasing the number of `trials` (the "greedy" k-means++ variant) samples multiple candidate centroids per iteration and keeps the candidate which minimizes the total squared distance, generally improving centroid quality at the cost of additional computation. + +
+ + + + + +
+ +## Examples + + + +```javascript +var Float64Array = require( '@stdlib/array/float64' ); +var Int32Array = require( '@stdlib/array/int32' ); +var discreteUniform = require( '@stdlib/random/array/discrete-uniform' ); +var dkmeansInitPlusPlus = require( '@stdlib/ml/strided/dkmeans-init-plus-plus' ); + +var k = 3; +var M = 10; +var N = 2; + +// Generate a random set of data points: +var X = discreteUniform( M*N, -50, 50, { + 'dtype': 'float64' +}); +console.log( X ); + +// Allocate an output matrix for the centroids: +var out = new Float64Array( k*N ); + +// Allocate workspace arrays: +var W1 = new Float64Array( 2*M ); +var W2 = new Int32Array( k ); + +// Set PRNG options +var options = { + 'seed': 1234 +}; + +// Initialize centroids using the k-means++ procedure: +var centroids = dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', X, N, out, N, W1, 1, W2, 1, options ); + +console.log( centroids ); +// => + +// Initialize centroids using the k-means++ procedure using alternative indexing semantics: +centroids = dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', X, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0, options ); + +console.log( centroids ); +// => +``` + +
+ + + +
+ +## References + +- Arthur, David, and Sergei Vassilvitskii. 2007. "K-means++: The Advantages of Careful Seeding." In _Proceedings of the Eighteenth Annual Acm-Siam Symposium on Discrete Algorithms_, 1027–35. SODA '07. Philadelphia, PA, USA: Society for Industrial and Applied Mathematics. . + +
+ + + + + + + + + + + + + + diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.js index 3d89160dc469..5a0bb4613216 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.js @@ -23,11 +23,12 @@ var bench = require( '@stdlib/bench' ); var uniform = require( '@stdlib/random/array/uniform' ); var Float64Array = require( '@stdlib/array/float64' ); +var Int32Array = require( '@stdlib/array/int32' ); var isnan = require( '@stdlib/math/base/assert/is-nan' ); var pow = require( '@stdlib/math/base/special/pow' ); var format = require( '@stdlib/string/format' ); var pkg = require( './../package.json' ).name; -var dkmeansInitPlusPlus = require( './../lib/dkmeans_init_plus_plus.js' ); +var dkmeansInitPlusPlus = require( './../lib' ); // VARIABLES // @@ -49,16 +50,33 @@ var options = { * * @private * @param {string} order - storage layout -* @param {PositiveInteger} k - number of clusters +* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) +* @param {string} metric - distance metric * @param {PositiveInteger} M - number of data points * @param {PositiveInteger} N - number of features -* @param {string} metric - distance metric -* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) +* @param {PositiveInteger} k - number of clusters * @returns {Function} benchmark function */ -function createBenchmark( order, k, M, N, metric, trials ) { - var out = new Float64Array( k*N ); - var x = uniform( M*N, -100.0, 100.0, options ); +function createBenchmark( order, trials, metric, M, N, k ) { + var LDX; + var LDO; + var out; + var W2; + var W1; + var x; + + out = new Float64Array( k*N ); + W1 = new Float64Array( 2*M ); + W2 = new Int32Array( k ); + x = uniform( M*N, -100.0, 100.0, options ); + if ( order === 'row-major' ) { + LDX = N; + LDO = N; + } else { + LDX = M; + LDO = k; + } + return benchmark; /** @@ -73,8 +91,8 @@ function createBenchmark( order, k, M, N, metric, trials ) { b.tic(); for ( i = 0; i < b.iterations; i++ ) { - c = dkmeansInitPlusPlus( order, k, M, N, out, N, x, N, metric, trials, 44 ); // eslint-disable-line max-len - if ( isnan( c[ i%(k*N) ] ) ) { + c = dkmeansInitPlusPlus( order, M, N, k, trials, metric, x, LDX, out, LDO, W1, 1, W2, 1 ); // eslint-disable-line max-len + if ( isnan( c[ i%( k*N ) ] ) ) { b.fail( 'should not return NaN' ); } } @@ -113,29 +131,29 @@ function main() { max = 4; // 10^max // Benchmark: vary order with defaults k=2, M=100, N=2, metric=sqeuclidean, trials=1... - for ( i = 0; i <= LAYOUTS.length; i++ ) { - f = createBenchmark( LAYOUTS[ i ], 2, 100, 2, 'sqeuclidean', 1 ); - bench( format( '%s::vary_M:order=%s,k=2,M=100,N=2,metric=sqeuclidean,trials=1', pkg, LAYOUTS[ i ] ), f ); + for ( i = 0; i < LAYOUTS.length; i++ ) { + f = createBenchmark( LAYOUTS[ i ], 1, 'sqeuclidean', 100, 2, 2 ); + bench( format( '%s::vary_order:order=%s,k=2,M=100,N=2,metric=sqeuclidean,trials=1', pkg, LAYOUTS[ i ] ), f ); } // Benchmark: vary number of data points (M) with defaults k=2, N=2, metric=sqeuclidean, trials=1... for ( i = min; i <= max; i++ ) { M = pow( 10, i ); - f = createBenchmark( 'row-major', 2, M, 2, 'sqeuclidean', 1 ); + f = createBenchmark( 'row-major', 1, 'sqeuclidean', M, 2, 2 ); bench( format( '%s::vary_M:order=row-major,k=2,M=%d,N=2,metric=sqeuclidean,trials=1', pkg, M ), f ); } // Benchmark: vary number of features (N) with defaults k=2, M=10, metric=sqeuclidean, trials=1... for ( i = min; i <= max; i++ ) { N = pow( 10, i ); - f = createBenchmark( 'row-major', 2, 10, N, 'sqeuclidean', 1 ); - bench( format( '%s::vary_N:order=row-major,k=2,M=1000,N=%d,metric=sqeuclidean,trials=1', pkg, N ), f ); + f = createBenchmark( 'row-major', 1, 'sqeuclidean', 10, N, 2 ); + bench( format( '%s::vary_N:order=row-major,k=2,M=10,N=%d,metric=sqeuclidean,trials=1', pkg, N ), f ); } // Benchmark: vary number of clusters (k) with defaults M=1000, N=2, metric=sqeuclidean, trials=1... for ( i = 1; i <= 4; i++ ) { k = pow( 2, i ); - f = createBenchmark( 'row-major', k, 1000, 2, 'sqeuclidean', 1 ); + f = createBenchmark( 'row-major', 1, 'sqeuclidean', 1000, 2, k ); bench( format( '%s::vary_k:order=row-major,k=%d,M=1000,N=2,metric=sqeuclidean,trials=1', pkg, k ), f ); } @@ -143,7 +161,7 @@ function main() { trials = [ 1, 10, 100, 1000 ]; for ( j = 0; j < trials.length; j++ ) { t = trials[ j ]; - f = createBenchmark( 'row-major', 2, 1000, 2, 'sqeuclidean', t ); + f = createBenchmark( 'row-major', t, 'sqeuclidean', 1000, 2, 2 ); bench( format( '%s::vary_trials:order=row-major,k=2,M=1000,N=2,metric=sqeuclidean,trials=%d', pkg, t ), f ); } @@ -151,7 +169,7 @@ function main() { metrics = [ 'sqeuclidean', 'cosine', 'cityblock', 'correlation' ]; for ( j = 0; j < metrics.length; j++ ) { m = metrics[ j ]; - f = createBenchmark( 'row-major', 2, 1000, 2, m, 1 ); + f = createBenchmark( 'row-major', 1, m, 1000, 2, 2 ); bench( format( '%s::vary_metric:order=row-major,k=2,M=1000,N=2,metric=%s,trials=1', pkg, m ), f ); } } diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.ndarray.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.ndarray.js index 12505912673d..a3b4b5cfc07d 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.ndarray.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/benchmark/benchmark.ndarray.js @@ -23,6 +23,7 @@ var bench = require( '@stdlib/bench' ); var uniform = require( '@stdlib/random/array/uniform' ); var Float64Array = require( '@stdlib/array/float64' ); +var Int32Array = require( '@stdlib/array/int32' ); var isnan = require( '@stdlib/math/base/assert/is-nan' ); var pow = require( '@stdlib/math/base/special/pow' ); var format = require( '@stdlib/string/format' ); @@ -43,16 +44,24 @@ var options = { * Creates a benchmark function. * * @private -* @param {PositiveInteger} k - number of clusters +* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) +* @param {string} metric - distance metric * @param {PositiveInteger} M - number of data points * @param {PositiveInteger} N - number of features -* @param {string} metric - distance metric -* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) +* @param {PositiveInteger} k - number of clusters * @returns {Function} benchmark function */ -function createBenchmark( k, M, N, metric, trials ) { - var out = new Float64Array( k*N ); - var x = uniform( M*N, -100.0, 100.0, options ); +function createBenchmark( trials, metric, M, N, k ) { + var out; + var W2; + var W1; + var x; + + out = new Float64Array( k*N ); + W1 = new Float64Array( 2*M ); + W2 = new Int32Array( k ); + x = uniform( M*N, -100.0, 100.0, options ); + return benchmark; /** @@ -67,8 +76,8 @@ function createBenchmark( k, M, N, metric, trials ) { b.tic(); for ( i = 0; i < b.iterations; i++ ) { - c = dkmeansInitPlusPlus( k, M, N, out, N, 1, 0, x, N, 1, 0, metric, trials, 44 ); // eslint-disable-line max-len - if ( isnan( c[ i%(k*N) ] ) ) { + c = dkmeansInitPlusPlus( M, N, k, trials, metric, x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0 ); // eslint-disable-line max-len + if ( isnan( c[ i%( k*N ) ] ) ) { b.fail( 'should not return NaN' ); } } @@ -109,29 +118,29 @@ function main() { // Benchmark: vary number of data points (M) with defaults k=2, N=2, metric=sqeuclidean, trials=1... for ( i = min; i <= max; i++ ) { M = pow( 10, i ); - f = createBenchmark( 2, M, 2, 'sqeuclidean', 1 ); + f = createBenchmark( 1, 'sqeuclidean', M, 2, 2 ); bench( format( '%s::vary_M:k=2,M=%d,N=2,metric=sqeuclidean,trials=1', pkg, M ), f ); } // Benchmark: vary number of features (N) with defaults k=2, M=10, metric=sqeuclidean, trials=1... for ( i = min; i <= max; i++ ) { N = pow( 10, i ); - f = createBenchmark( 2, 10, N, 'sqeuclidean', 1 ); - bench( format( '%s::vary_N:k=2,M=1000,N=%d,metric=sqeuclidean,trials=1', pkg, N ), f ); + f = createBenchmark( 1, 'sqeuclidean', 10, N, 2 ); + bench( format( '%s::vary_N:k=2,M=10,N=%d,metric=sqeuclidean,trials=1', pkg, N ), f ); } // Benchmark: vary number of clusters (k) with defaults M=1000, N=2, metric=sqeuclidean, trials=1... for ( i = 1; i <= 4; i++ ) { k = pow( 2, i ); - f = createBenchmark( k, 1000, 2, 'sqeuclidean', 1 ); + f = createBenchmark( 1, 'sqeuclidean', 1000, 2, k ); bench( format( '%s::vary_k:k=%d,M=1000,N=2,metric=sqeuclidean,trials=1', pkg, k ), f ); } // Benchmark: vary number of trials with defaults k=2, M=1000, N=2, metric=sqeuclidean... - trials = [ 1, 10, 100 ]; + trials = [ 1, 10, 100, 1000 ]; for ( j = 0; j < trials.length; j++ ) { t = trials[ j ]; - f = createBenchmark( 2, 1000, 2, 'sqeuclidean', t ); + f = createBenchmark( t, 'sqeuclidean', 1000, 2, 2 ); bench( format( '%s::vary_trials:k=2,M=1000,N=2,metric=sqeuclidean,trials=%d', pkg, t ), f ); } @@ -139,7 +148,7 @@ function main() { metrics = [ 'sqeuclidean', 'cosine', 'cityblock', 'correlation' ]; for ( j = 0; j < metrics.length; j++ ) { m = metrics[ j ]; - f = createBenchmark( 2, 1000, 2, m, 1 ); + f = createBenchmark( 1, m, 1000, 2, 2 ); bench( format( '%s::vary_metric:k=2,M=1000,N=2,metric=%s,trials=1', pkg, m ), f ); } } diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/docs/repl.txt b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/docs/repl.txt new file mode 100644 index 000000000000..33b3a329195f --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/docs/repl.txt @@ -0,0 +1,200 @@ + +{{alias}}( order, M, N, k, trials, metric, X, LDX, out, LDO, W1, sw1, W2, sw2[, options] ) + Initializes centroids by performing the k-means++ initialization procedure + on double-precision floating-point data points. + + Parameters + ---------- + order: string + Row-major (C-style) or column-major (Fortran-style) order. Must be + either 'row-major' or 'column-major'. + + M: integer + Number of data points. + + N: integer + Number of features. + + k: integer + Number of clusters. + + trials: integer + Number of potential centroids per iteration (>= 1). + + metric: string + Distance metric. + + X: Float64Array + Input Array. + + LDX: integer + Stride of the first dimension of `X` (a.k.a., leading dimension of the + matrix `X`). + + out: Float64Array + Output Array. + + LDO: integer + Stride of the first dimension of `out` (a.k.a., leading dimension of the + matrix `out`). + + W1: Float64Array + First workspace array of size `2*M` for tracking squared distances and + probabilities. + + sw1: integer + Stride length of `W1`. + + W2: Int32Array + Second workspace array for tracking centroid candidates. + + sw2: integer + Stride length of `W2`. + + options.prng: Function (optional) + Pseudorandom number generator (PRNG) for generating uniformly + distributed pseudorandom numbers on the interval `[0,1)`. If provided, + the `state` and `seed` options are ignored. In order to seed the + underlying pseudorandom number generator, one must seed the provided + `prng` (assuming the provided `prng` is seedable). + + options.seed: integer|ArrayLikeObject (optional) + Pseudorandom number generator seed. The seed may be either a positive + unsigned 32-bit integer or, for arbitrary length seeds, an array-like + object containing unsigned 32-bit integers. + + options.state: Uint32Array (optional) + Pseudorandom number generator state. If provided, the `seed` option is + ignored. + + options.copy: boolean (optional) + Boolean indicating whether to copy a provided pseudorandom number + generator state. Setting this option to `false` allows sharing state + between two or more pseudorandom number generators. Setting this option + to `true` ensures that the underlying generator has exclusive control + over its internal state. Default: true. + + Returns + ------- + out: Float64Array + Centroids. + + Examples + -------- + > var out = new {{alias:@stdlib/array/float64}}( 4 ); + > var x = new {{alias:@stdlib/array/float64}}( [ 0.0, 0.0, 1.0, 1.0, -1.0, 1.0 ] ); + > var W1 = new {{alias:@stdlib/array/float64}}( 2*3 ); + > var W2 = new {{alias:@stdlib/array/int32}}( 2 ); + > {{alias}}( 'row-major', 3, 2, 2, 2, 'sqeuclidean', x, 2, out, 2, W1, 1, W2, 1 ) + [...] + + +{{alias}}.ndarray( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1, so2, oo, W1, sw1, ow1, W2, sw2, ow2[, options] ) + Initializes centroids by performing the k-means++ initialization procedure + on double-precision floating-point data points using alternative indexing + semantics. + + While typed array views mandate a view offset based on the underlying + buffer, offset parameters support indexing semantics based on starting + indices. + + Parameters + ---------- + M: integer + Number of data points. + + N: integer + Number of features. + + k: integer + Number of clusters. + + trials: integer + Number of potential centroids per iteration (>= 1). + + metric: string + Distance metric. + + X: Float64Array + Input Array. + + sx1: integer + Stride of the first dimension of `X`. + + sx2: integer + Stride of the second dimension of `X`. + + ox: integer + Starting index of `X`. + + out: Float64Array + Output Array. + + so1: integer + Stride of the first dimension of `out`. + + so2: integer + Stride of the second dimension of `out`. + + oo: integer + Starting index of `out`. + + W1: Float64Array + First workspace array of size `2*M` for tracking squared distances and + probabilities. + + sw1: integer + Stride length of `W1`. + + ow1: integer + Starting index of `W1`. + + W2: Int32Array + Second workspace array for tracking centroid candidates. + + sw2: integer + Stride length of `W2`. + + ow2: integer + Starting index of `W2`. + + options.prng: Function (optional) + Pseudorandom number generator (PRNG) for generating uniformly + distributed pseudorandom numbers on the interval `[0,1)`. If provided, + the `state` and `seed` options are ignored. In order to seed the + underlying pseudorandom number generator, one must seed the provided + `prng` (assuming the provided `prng` is seedable). + + options.seed: integer|ArrayLikeObject (optional) + Pseudorandom number generator seed. The seed may be either a positive + unsigned 32-bit integer or, for arbitrary length seeds, an array-like + object containing unsigned 32-bit integers. + + options.state: Uint32Array (optional) + Pseudorandom number generator state. If provided, the `seed` option is + ignored. + + options.copy: boolean (optional) + Boolean indicating whether to copy a provided pseudorandom number + generator state. Setting this option to `false` allows sharing state + between two or more pseudorandom number generators. Setting this option + to `true` ensures that the underlying generator has exclusive control + over its internal state. Default: true. + + Returns + ------- + out: Float64Array + Centroids. + + Examples + -------- + > var out = new {{alias:@stdlib/array/float64}}( 4 ); + > var x = new {{alias:@stdlib/array/float64}}( [ 0.0, 0.0, 1.0, 1.0, -1.0, 1.0 ] ); + > var W1 = new {{alias:@stdlib/array/float64}}( 2*3 ); + > var W2 = new {{alias:@stdlib/array/int32}}( 2 ); + > {{alias}}.ndarray( 3, 2, 2, 2, 'sqeuclidean', x, 2, 1, 0, out, 2, 1, 0, W1, 1, 0, W2, 1, 0 ) + [...] + + See Also + -------- + diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/docs/types/index.d.ts b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/docs/types/index.d.ts new file mode 100644 index 000000000000..9d956d1230cc --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/docs/types/index.d.ts @@ -0,0 +1,240 @@ +/* +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +// TypeScript Version: 4.1 + +import { Layout } from '@stdlib/types/blas'; +import * as random from '@stdlib/types/random'; + +/** +* Interface defining function options. +*/ +interface Options { + /** + * Pseudorandom number generator which generates uniformly distributed pseudorandom numbers. + */ + prng?: random.PRNG; + + /** + * Pseudorandom number generator seed. + */ + seed?: random.PRNGSeedMT19937; + + /** + * Pseudorandom number generator state. + */ + state?: random.PRNGStateMT19937; + + /** + * Specifies whether to copy a provided pseudorandom number generator state (default: `true`). + */ + copy?: boolean; +} + +/** +* Interface describing `dkmeansInitPlusPlus`. +*/ +interface Routine { + /** + * Initializes centroids by performing the k-means++ initialization procedure on double-precision floating-point data points. + * + * @param order - storage layout + * @param M - number of data points + * @param N - number of features + * @param k - number of clusters + * @param trials - number of potential centroids per iteration (>= 1) + * @param metric - distance metric + * @param X - input array + * @param LDX - stride of the first dimension of `x` (a.k.a., leading dimension of the matrix `x`) + * @param out - output array + * @param LDO - stride of the first dimension of `out` (a.k.a., leading dimension of the matrix `out`) + * @param W1 - first workspace array + * @param sw1 - stride length of `W1` + * @param W2 - second workspace array + * @param sw2 - stride length of `W2` + * @param options - function options + * @returns centroids + * + * @example + * var Float64Array = require( '@stdlib/array/float64' ); + * var Int32Array = require( '@stdlib/array/int32' ); + * + * var k = 3; + * var M = 5; + * var N = 2; + * + * var out = new Float64Array( k*N ); + * var W1 = new Float64Array( 2*M ); + * var W2 = new Int32Array( k ); + * + * // Specify data points: + * var xbuf = new Float64Array([ + * 0.0, 0.0, + * 1.0, 1.0, + * 1.0, -1.0, + * -1.0, -1.0, + * -1.0, 1.0 + * ]); + * + * var opts = { + * 'seed': 1234 + * } + * + * dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', xbuf, 2, out, 2, W1, 1, W2, 1, opts ); + * // out => [ 1.0, -1.0, 0.0, 0.0, -1.0, 1.0 ] + */ + ( order: Layout, M: number, N: number, k: number, trials: number, metric: string, X: Float64Array, LDX: number, out: Float64Array, LDO: number, W1: Float64Array, sw1: number, W2: Int32Array, sw2: number, options?: Options ): Float64Array; + + /** + * Initializes centroids by performing the k-means++ initialization procedure on double-precision floating-point data points using alternative indexing semantics. + * + * @param M - number of data points + * @param N - number of features + * @param k - number of clusters + * @param trials - number of potential centroids per iteration (>= 1) + * @param metric - distance metric + * @param x - input array + * @param sx1 - first stride length of `x` + * @param sx2 - second stride length of `x` + * @param ox - starting index of `x` + * @param out - output array + * @param so1 - first stride length of `out` + * @param so2 - second stride length of `out` + * @param oo - starting index of `out` + * @param W1 - first workspace array + * @param sw1 - stride length of `W1` + * @param ow1 - starting index of `W1` + * @param W2 - second workspace array + * @param sw2 - stride length of `W2` + * @param ow2 - starting index of `W2` + * @param options - function options + * @returns centroids + * + * @example + * var Float64Array = require( '@stdlib/array/float64' ); + * var Int32Array = require( '@stdlib/array/int32' ); + * + * var k = 3; + * var M = 5; + * var N = 2; + * + * var out = new Float64Array( k*N ); + * var W1 = new Float64Array( 2*M ); + * var W2 = new Int32Array( k ); + * + * // Specify data points: + * var xbuf = new Float64Array([ + * 0.0, 0.0, + * 1.0, 1.0, + * 1.0, -1.0, + * -1.0, -1.0, + * -1.0, 1.0 + * ]); + * + * var opts = { + * 'seed': 1234 + * } + * + * dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', xbuf, 2, 1, 0, out, 2, 1, 0, W1, 1, 0, W2, 1, 0, opts ); + * // out => [ 1.0, -1.0, 0.0, 0.0, -1.0, 1.0 ] + */ + ndarray( M: number, N: number, k: number, trials: number, metric: string, X: Float64Array, sx1: number, sx2: number, ox: number, out: Float64Array, so1: number, so2: number, oo: number, W1: Float64Array, sw1: number, ow1: number, W2: Int32Array, sw2: number, ow2: number, options?: Options ): Float64Array; +} + +/** +* Initializes centroids by performing the k-means++ initialization procedure on double-precision floating-point data points. +* +* @param order - storage layout +* @param M - number of data points +* @param N - number of features +* @param k - number of clusters +* @param trials - number of potential centroids per iteration (>= 1) +* @param metric - distance metric +* @param X - input array +* @param LDX - stride of the first dimension of `x` (a.k.a., leading dimension of the matrix `x`) +* @param out - output array +* @param LDO - stride of the first dimension of `out` (a.k.a., leading dimension of the matrix `out`) +* @param W1 - first workspace array +* @param sw1 - stride length of `W1` +* @param W2 - second workspace array +* @param sw2 - stride length of `W2` +* @param options - function options +* @returns centroids +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* var Int32Array = require( '@stdlib/array/int32' ); +* +* var k = 3; +* var M = 5; +* var N = 2; +* +* var out = new Float64Array( k*N ); +* var W1 = new Float64Array( 2*M ); +* var W2 = new Int32Array( k ); +* +* // Specify data points: +* var xbuf = new Float64Array([ +* 0.0, 0.0, +* 1.0, 1.0, +* 1.0, -1.0, +* -1.0, -1.0, +* -1.0, 1.0 +* ]); +* +* var opts = { +* 'seed': 1234 +* }; +* +* var v = dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', xbuf, 2, out, 2, W1, 1, W2, 1, opts ); +* // returns [ 1.0, -1.0, 0.0, 0.0, -1.0, 1.0 ] +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* var Int32Array = require( '@stdlib/array/int32' ); +* +* var k = 3; +* var M = 5; +* var N = 2; +* +* var out = new Float64Array( k*N ); +* var W1 = new Float64Array( 2*M ); +* var W2 = new Int32Array( k ); +* +* // Specify data points: +* var xbuf = new Float64Array([ +* 0.0, 0.0, +* 1.0, 1.0, +* 1.0, -1.0, +* -1.0, -1.0, +* -1.0, 1.0 +* ]); +* +* var opts = { +* 'seed': 1234 +* }; +* +* var v = dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', xbuf, 2, 1, 0, out, 2, 1, 0, W1, 1, 0, W2, 1, 0, opts ); +* // returns [ 1.0, -1.0, 0.0, 0.0, -1.0, 1.0 ] +*/ +declare var dkmeansInitPlusPlus: Routine; + + +// EXPORTS // + +export = dkmeansInitPlusPlus; diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/docs/types/test.ts b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/docs/types/test.ts new file mode 100644 index 000000000000..af370f2aa9eb --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/docs/types/test.ts @@ -0,0 +1,430 @@ +/* +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +import dkmeansInitPlusPlus = require( './index' ); + + +// TESTS // + +// The function returns a Float64Array... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + const options = { + 'seed': 1234 + }; + + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectType Float64Array + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1, options ); // $ExpectType Float64Array +} + +// The compiler throws an error if the function is provided a first argument which is not a layout... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus( 5, M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( true, M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( null, M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( void 0, M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( [], M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( {}, M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError +} + +// The compiler throws an error if the function is provided a second argument which is not a number... +{ + const k = 3; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( 5 * N ); + const W1 = new Float64Array( 10 ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus( 'row-major', '10', N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', true, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', null, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', void 0, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', [], N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', {}, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError +} + +// The compiler throws an error if the function is provided a third argument which is not a number... +{ + const k = 3; + const M = 5; + + const out = new Float64Array( k * 2 ); + const x = new Float64Array( M * 2 ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus( 'row-major', M, '10', k, 3, 'sqeuclidean', x, 2, out, 2, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, true, k, 3, 'sqeuclidean', x, 2, out, 2, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, null, k, 3, 'sqeuclidean', x, 2, out, 2, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, void 0, k, 3, 'sqeuclidean', x, 2, out, 2, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, [], k, 3, 'sqeuclidean', x, 2, out, 2, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, {}, k, 3, 'sqeuclidean', x, 2, out, 2, W1, 1, W2, 1 ); // $ExpectError +} + +// The compiler throws an error if the function is provided a fourth argument which is not a number... +{ + const M = 5; + const N = 2; + + const out = new Float64Array( 3 * N ); + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( 3 ); + + dkmeansInitPlusPlus( 'row-major', M, N, '10', 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, true, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, null, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, void 0, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, [], 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, {}, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError +} + +// The compiler throws an error if the function is provided a fifth argument which is not a number... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus( 'row-major', M, N, k, '10', 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, true, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, null, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, void 0, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, [], 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, {}, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1 ); // $ExpectError +} + +// The compiler throws an error if the function is provided a sixth argument which is not a string... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 10, x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, true, x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, null, x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, void 0, x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, [], x, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, {}, x, N, out, N, W1, 1, W2, 1 ); // $ExpectError +} + +// The compiler throws an error if the function is provided a seventh argument which is not a Float64Array... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', 10, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', true, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', null, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', void 0, N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', [], N, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', {}, N, out, N, W1, 1, W2, 1 ); // $ExpectError +} + +// The compiler throws an error if the function is provided an eighth argument which is not a number... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, '10', out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, true, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, null, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, void 0, out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, [], out, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, {}, out, N, W1, 1, W2, 1 ); // $ExpectError +} + +// The compiler throws an error if the function is provided a ninth argument which is not a Float64Array... +{ + const k = 3; + const M = 5; + const N = 2; + + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, 10, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, true, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, null, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, void 0, N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, [], N, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, {}, N, W1, 1, W2, 1 ); // $ExpectError +} + +// The compiler throws an error if the function is provided a tenth argument which is not a number... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, '10', W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, true, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, null, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, void 0, W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, [], W1, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, {}, W1, 1, W2, 1 ); // $ExpectError +} + +// The compiler throws an error if the function is provided an eleventh argument which is not a Float64Array... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( M * N ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, 10, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, true, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, null, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, void 0, 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, [], 1, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, {}, 1, W2, 1 ); // $ExpectError +} + +// The compiler throws an error if the function is provided a twelfth argument which is not a number... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, '10', W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, true, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, null, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, void 0, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, [], W2, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, {}, W2, 1 ); // $ExpectError +} + +// The compiler throws an error if the function is provided a thirteenth argument which is not an Int32Array... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, 10, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, true, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, null, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, void 0, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, [], 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, {}, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W1, 1 ); // $ExpectError +} + +// The compiler throws an error if the function is provided a fourteenth argument which is not a number... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, '10' ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, true ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, null ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, void 0 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, [] ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, {} ); // $ExpectError +} + +// The compiler throws an error if the function is provided a fifteenth argument which is not an options object... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1, '10' ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1, true ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1, [] ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1, { 'seed': true } ); // $ExpectError +} + +// The compiler throws an error if the function is provided an unsupported number of arguments... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus(); // $ExpectError + dkmeansInitPlusPlus( 'row-major' ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean' ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2 ); // $ExpectError + dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', x, N, out, N, W1, 1, W2, 1, {}, 0 ); // $ExpectError +} + +// Attached to main export is an `ndarray` method which returns a Float64Array... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + const options = { + 'seed': 1234 + }; + + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0 ); // $ExpectType Float64Array + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0, options ); // $ExpectType Float64Array +} + +// The compiler throws an error if the `ndarray` method is provided arguments of incorrect types... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus.ndarray( '10', N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, '10', k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, '10', 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, '10', 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 10, x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', 10, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, '10', 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, '10', 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, '10', out, N, 1, 0, W1, 1, 0, W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, 10, N, 1, 0, W1, 1, 0, W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, '10', 1, 0, W1, 1, 0, W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, '10', 0, W1, 1, 0, W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, '10', W1, 1, 0, W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, 10, 1, 0, W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, '10', 0, W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, '10', W2, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W1, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, '10', 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, '10' ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0, true ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0, { 'seed': true } ); // $ExpectError +} + +// The compiler throws an error if the `ndarray` method is provided an unsupported number of arguments... +{ + const k = 3; + const M = 5; + const N = 2; + + const out = new Float64Array( k * N ); + const x = new Float64Array( M * N ); + const W1 = new Float64Array( 2 * M ); + const W2 = new Int32Array( k ); + + dkmeansInitPlusPlus.ndarray(); // $ExpectError + dkmeansInitPlusPlus.ndarray( M ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean' ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, 0 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1 ); // $ExpectError + dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', x, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0, {}, 0 ); // $ExpectError +} diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/examples/index.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/examples/index.js new file mode 100644 index 000000000000..8fef37e79e38 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/examples/index.js @@ -0,0 +1,58 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +var Float64Array = require( '@stdlib/array/float64' ); +var Int32Array = require( '@stdlib/array/int32' ); +var discreteUniform = require( '@stdlib/random/array/discrete-uniform' ); +var dkmeansInitPlusPlus = require( './../lib' ); + +var k = 3; +var M = 10; +var N = 2; + +// Generate a random set of data points: +var X = discreteUniform( M*N, -50, 50, { + 'dtype': 'float64' +}); +console.log( X ); + +// Allocate an output matrix for the centroids: +var out = new Float64Array( k*N ); + +// Allocate workspace arrays: +var W1 = new Float64Array( 2*M ); +var W2 = new Int32Array( k ); + +// Set PRNG options +var options = { + 'seed': 1234 +}; + +// Initialize centroids using the k-means++ procedure: +var centroids = dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', X, N, out, N, W1, 1, W2, 1, options ); + +console.log( centroids ); +// => + +// Initialize centroids using the k-means++ procedure using alternative indexing semantics: +centroids = dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', X, N, 1, 0, out, N, 1, 0, W1, 1, 0, W2, 1, 0, options ); + +console.log( centroids ); +// => diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/base.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/base.js index 1af42e447dad..6f2fb0fee83d 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/base.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/base.js @@ -24,12 +24,14 @@ var discreteUniform = require( '@stdlib/random/base/discrete-uniform' ).factory; var uniform = require( '@stdlib/random/base/uniform' ).factory; var dcopy = require( '@stdlib/blas/base/dcopy' ).ndarray; var PINF = require( '@stdlib/constants/float64/pinf' ); +var hasOwnProp = require( '@stdlib/assert/has-own-property' ); var dsquaredEuclidean = require( '@stdlib/stats/strided/distances/dsquared-euclidean' ).ndarray; var dcosine = require( '@stdlib/stats/strided/distances/dcosine-distance' ).ndarray; var dcityblock = require( '@stdlib/stats/strided/distances/dcityblock' ).ndarray; var dcorrelation = require( '@stdlib/stats/strided/distances/dcorrelation' ).ndarray; var accessors = require( '@stdlib/array/base/accessors' ); var FLOAT64_EPS = require( '@stdlib/constants/float64/eps' ); +var INT32_MAX = require( '@stdlib/constants/int32/max' ); // MAIN // @@ -73,6 +75,7 @@ var FLOAT64_EPS = require( '@stdlib/constants/float64/eps' ); * * - Arthur, David, and Sergei Vassilvitskii. 2007. "K-means++: The Advantages of Careful Seeding." In _Proceedings of the Eighteenth Annual Acm-Siam Symposium on Discrete Algorithms_, 1027–35. SODA '07. Philadelphia, PA, USA: Society for Industrial and Applied Mathematics. . * +* @private * @param {PositiveInteger} M - number of data points * @param {PositiveInteger} N - number of features * @param {PositiveInteger} k - number of clusters @@ -121,17 +124,18 @@ var FLOAT64_EPS = require( '@stdlib/constants/float64/eps' ); * ]); * * var opts = { -* 'seed': 1234, +* 'seed': 1234 * } * * dkmeansInitPlusPlus( M, N, k, 3, 'sqeuclidean', xbuf, 2, 1, 0, out, 2, 1, 0, W1, 1, 0, W2, 1, 0, opts ); -* // out => [ 0.0, 0.0, 1.0, 1.0, -1.0, 1.0 ] +* // out => [ 1.0, -1.0, 0.0, 0.0, -1.0, 1.0 ] */ -function dkmeansInitPlusPlus( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1, so2, oo, W1, sw1, ow1, W2, sw2, ow2, options ) { // eslint-disable-line max-len, max-params - var centroids; // array of indices +function dkmeansInitPlusPlus( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1, so2, oo, W1, sw1, ow1, W2, sw2, ow2, options ) { // eslint-disable-line max-len, max-params, max-statements + var centroids; // accessor for array of indices var randi; var w1idx; var w2idx; + var opts; var rand; var csum; var bsum; @@ -141,7 +145,6 @@ function dkmeansInitPlusPlus( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1 var oidx; var opr; var odh; - var s2; var d2; var bc; var d; @@ -152,8 +155,19 @@ function dkmeansInitPlusPlus( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1 var r; if ( arguments.length > 19 ) { - randi = discreteUniform( options ); - rand = uniform( 0.0, 1.0-FLOAT64_EPS, options ); + opts = options; + if ( hasOwnProp( opts, 'prng' ) ) { + randi = discreteUniform( opts ); + rand = uniform( 0.0, 1.0-FLOAT64_EPS, opts ); + } else if ( hasOwnProp( opts, 'seed' ) ) { + randi = discreteUniform( opts ); + rand = uniform( 0.0, 1.0-FLOAT64_EPS, { + 'seed': randi( 1, INT32_MAX ) + }); + } else { + randi = discreteUniform(); + rand = uniform( 0.0, 1.0-FLOAT64_EPS ); + } } else { randi = discreteUniform(); rand = uniform( 0.0, 1.0-FLOAT64_EPS ); @@ -161,16 +175,15 @@ function dkmeansInitPlusPlus( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1 w2idx = ow2; centroids = accessors( W2 ); - // Resolve workspace offsets for the squared distances and cumulative probabilities. The workspace `W1` interleaves the two values for each data point, such that, for data point `i`, the squared distance resides at strided index `2*i` and the cumulative probability at strided index `2*i+1`... + // Resolve workspace offsets for the squared distances and cumulative probabilities... odh = ow1; - opr = ow1 + sw1; - s2 = 2 * sw1; // stride between consecutive data points within `W1` + opr = ow1 + ( M*sw1 ); // Initialize the squared distances to positive infinity... w1idx = odh; for ( i = 0; i < M; i++ ) { W1[ w1idx ] = PINF; - w1idx += s2; + w1idx += sw1; } // 1. Select a data point at random for the first centroid... @@ -208,7 +221,7 @@ function dkmeansInitPlusPlus( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1 csum += W1[ w1idx ]; } xidx += sx1; - w1idx += s2; + w1idx += sw1; } if ( csum === 0.0 ) { @@ -221,8 +234,8 @@ function dkmeansInitPlusPlus( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1 w1idx = opr; W1[ w1idx ] = W1[ odh ] / csum; for ( j = 1; j < M; j++ ) { - w1idx += s2; - W1[ w1idx ] = W1[ w1idx-s2 ] + ( W1[ odh + (j*s2) ] / csum ); + w1idx += sw1; + W1[ w1idx ] = W1[ w1idx-sw1 ] + ( W1[ odh + ( j*sw1 ) ] / csum ); // probs[ i ] = probs[ i-1 ] + ( dhash[ ind ] / csum ) } // Based Arthur's and Vassilvitskii's paper "kmeans++: The Advantages of Careful Seeding" (see conclusion), randomly select candidate centroids and pick the candidate which minimizes the total squared distance... bsum = PINF; // best sum @@ -235,7 +248,7 @@ function dkmeansInitPlusPlus( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1 while ( c === -1 ) { r = rand(); // Note: `r` exists on the interval `[0,1)` for ( j = 0; j < M; j++ ) { - if ( r < W1[ opr + (j*s2) ] ) { + if ( r < W1[ opr + ( j*sw1 ) ] ) { c = j; break; } @@ -252,7 +265,7 @@ function dkmeansInitPlusPlus( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1 } else { csum += W1[ w1idx ]; } - w1idx += s2; + w1idx += sw1; } // Determine if the candidate is the best candidate we have seen thus far... if ( csum < bsum ) { diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js deleted file mode 100644 index 6b6c8f294d6d..000000000000 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/dkmeans_init_plus_plus.js +++ /dev/null @@ -1,148 +0,0 @@ -/** -* @license Apache-2.0 -* -* Copyright (c) 2026 The Stdlib Authors. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -'use strict'; - -// MODULES // - -var isColumnMajor = require( '@stdlib/ndarray/base/assert/is-column-major-string' ); -var resolveMetricStr = require( '@stdlib/ml/base/kmeans/metric-resolve-str' ); -var isRowMajor = require( '@stdlib/ndarray/base/assert/is-row-major-string' ); -var isPositiveInteger = require( '@stdlib/assert/is-positive-integer' ); -var stride2offset = require( '@stdlib/strided/base/stride2offset' ); -var isLayout = require( '@stdlib/blas/base/assert/is-layout' ); -var max = require( '@stdlib/math/base/special/max' ); -var format = require( '@stdlib/string/format' ); -var ndarray = require( './ndarray.js' ); - - -// MAIN // - -/** -* Initializes centroids by performing the k-means++ initialization procedure on double-precision floating-point data points. -* -* @param {string} order - storage layout -* @param {PositiveInteger} M - number of data points -* @param {PositiveInteger} N - number of features -* @param {PositiveInteger} k - number of clusters -* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) -* @param {string} metric - distance metric -* @param {Float64Array} X - input array -* @param {integer} LDX - stride of the first dimension of `x` (a.k.a., leading dimension of the matrix `x`) -* @param {Float64Array} out - output array -* @param {integer} LDO - stride of the first dimension of `out` (a.k.a., leading dimension of the matrix `out`) -* @param {Float64Array} W1 - first workspace array of size `2*M` for tracking squared distances and probabilities -* @param {integer} sw1 - stride length of `W1` -* @param {Float64Array} W2 - second workspace array for tracking centroid candidates -* @param {integer} sw2 - stride length of `W2` -* @param {Options} [options] - function options -* @param {PRNG} [options.prng] - pseudorandom number generator which generates uniformly distributed pseudorandom numbers -* @param {PRNGSeedMT19937} [options.seed] - pseudorandom number generator seed -* @param {PRNGStateMT19937} [options.state] - pseudorandom number generator state -* @param {boolean} [options.copy=true] - boolean indicating whether to copy a provided pseudorandom number generator state -* @throws {RangeError} first argument must be greater than or equal to third argument -* @throws {RangeError} fourth argument must be a positive integer -* @throws {TypeError} fifth argument must be a valid supported metric -* @throws {RangeError} sixth argument must be greater than or equal to max(1,N) -* @throws {RangeError} eighth argument must be greater than or equal to max(1,N) -* @returns {Float64Array} centroids -* -* @example -* var Float64Array = require( '@stdlib/array/float64' ); -* var Int32Array = require( '@stdlib/array/int32' ); -* -* var k = 3; -* var M = 5; -* var N = 2; -* -* var out = new Float64Array( k*N ); -* var W1 = new Float64Array( 2*M ); -* var W2 = new Int32Array( k ); -* -* // Specify data points: -* var xbuf = new Float64Array([ -* 0.0, 0.0, -* 1.0, 1.0, -* 1.0, -1.0, -* -1.0, -1.0, -* -1.0, 1.0 -* ]); -* -* var opts = { -* 'seed': 1234, -* } -* -* dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', xbuf, 2, out, 2, W1, 1, W2, 1, opts ); -* // out => [ 0.0, 0.0, 1.0, 1.0, -1.0, 1.0 ] -*/ -function dkmeansInitPlusPlus( order, M, N, k, trials, metric, X, LDX, out, LDO, W1, sw1, W2, sw2, options ) { // eslint-disable-line max-len, max-params - var so1; - var so2; - var sx1; - var sx2; - var so; - var sx; - - if ( k > M ) { - throw new RangeError( format( 'invalid argument. First argument `M` must be greater than or equal to third argument `k`. Value: `M=%d, k=%d`.', M, k ) ); - } - if ( !isPositiveInteger( trials ) ) { - throw new RangeError( format( 'invalid argument. Fourth argument must be a be a positive integer. Value: `%d`.', trials ) ); - } - if ( resolveMetricStr( metric ) === null ) { - throw new TypeError( format( 'invalid argument. Fifth argument must be a valid supported metric. Value: `%s`.', metric ) ); - } - if ( k < 1 || M < 1 || N < 1 ) { - return out; - } - if ( !isLayout( order ) ) { - throw new TypeError( format( 'invalid argument. First argument must be a valid order. Value: `%s`.', order ) ); - } - if ( isRowMajor( order ) ) { - so = N; - sx = N; - } else { - so = k; - sx = M; - } - if ( LDO < max( 1, so ) ) { - throw new RangeError( format( 'invalid argument. Sixth argument must be greater than or equal to max(1,%d). Value: `%d`.', so, LDO ) ); - } - if ( LDX < max( 1, sx ) ) { - throw new RangeError( format( 'invalid argument. Eighth argument must be greater than or equal to max(1,%d). Value: `%d`.', sx, LDX ) ); - } - if ( isColumnMajor( order ) ) { - so1 = 1; - so2 = LDO; - - sx1 = 1; - sx2 = LDX; - } else { // order === 'row-major' - so1 = LDO; - so2 = 1; - - sx1 = LDX; - sx2 = 1; - } - return ndarray( M, N, k, trials, metric, X, sx1, sx2, 0, out, so1, so2, 0, W1, sw1, stride2offset( 2*M, sw1 ), W2, sw2, stride2offset( k, sw2 ), options ); // eslint-disable-line max-len -} - - -// EXPORTS // - -module.exports = dkmeansInitPlusPlus; diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js index b920717626fd..fea6cc8c9fe2 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/index.js @@ -46,11 +46,11 @@ * ]); * * var opts = { -* 'seed': 1234, +* 'seed': 1234 * } * * dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', xbuf, 2, out, 2, W1, 1, W2, 1, opts ); -* // out => [ 0.0, 0.0, 1.0, 1.0, -1.0, 1.0 ] +* // out => [ 1.0, -1.0, 0.0, 0.0, -1.0, 1.0 ] * * @example * var Float64Array = require( '@stdlib/array/float64' ); @@ -75,34 +75,26 @@ * ]); * * var opts = { -* 'seed': 1234, +* 'seed': 1234 * } * * dkmeansInitPlusPlus.ndarray( M, N, k, 3, 'sqeuclidean', xbuf, 2, 1, 0, out, 2, 1, 0, W1, 1, 0, W2, 1, 0, opts ); -* // out => [ 0.0, 0.0, 1.0, 1.0, -1.0, 1.0 ] +* // out => [ 1.0, -1.0, 0.0, 0.0, -1.0, 1.0 ] */ + // MODULES // -var join = require( 'path' ).join; -var tryRequire = require( '@stdlib/utils/try-require' ); -var isError = require( '@stdlib/assert/is-error' ); -var main = require( './main.js' ); +var setReadOnly = require( '@stdlib/utils/define-nonenumerable-read-only-property' ); +var dkmeansInitPlusPlus = require( './main.js' ); +var ndarray = require( './ndarray.js' ); // MAIN // -var dkmeansInitPlusPlus; -var tmp = tryRequire( join( __dirname, './native.js' ) ); -if ( isError( tmp ) ) { - dkmeansInitPlusPlus = main; -} else { - dkmeansInitPlusPlus = tmp; -} +setReadOnly( dkmeansInitPlusPlus, 'ndarray', ndarray ); // EXPORTS // module.exports = dkmeansInitPlusPlus; - -// exports: { "ndarray": "dkmeansInitPlusPlus.ndarray" } diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/main.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/main.js index 0745f2767c76..3b503ffa971b 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/main.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/main.js @@ -20,14 +20,130 @@ // MODULES // -var setReadOnly = require( '@stdlib/utils/define-nonenumerable-read-only-property' ); -var dkmeansInitPlusPlus = require( './dkmeans_init_plus_plus.js' ); +var isColumnMajor = require( '@stdlib/ndarray/base/assert/is-column-major-string' ); +var resolveMetricStr = require( '@stdlib/ml/base/kmeans/metric-resolve-str' ); +var isRowMajor = require( '@stdlib/ndarray/base/assert/is-row-major-string' ); +var isPositiveInteger = require( '@stdlib/assert/is-positive-integer' ); +var stride2offset = require( '@stdlib/strided/base/stride2offset' ); +var isLayout = require( '@stdlib/blas/base/assert/is-layout' ); +var max = require( '@stdlib/math/base/special/max' ); +var format = require( '@stdlib/string/format' ); var ndarray = require( './ndarray.js' ); // MAIN // -setReadOnly( dkmeansInitPlusPlus, 'ndarray', ndarray ); +/** +* Initializes centroids by performing the k-means++ initialization procedure on double-precision floating-point data points. +* +* @param {string} order - storage layout +* @param {PositiveInteger} M - number of data points +* @param {PositiveInteger} N - number of features +* @param {PositiveInteger} k - number of clusters +* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1) +* @param {string} metric - distance metric +* @param {Float64Array} X - input array +* @param {integer} LDX - stride of the first dimension of `x` (a.k.a., leading dimension of the matrix `x`) +* @param {Float64Array} out - output array +* @param {integer} LDO - stride of the first dimension of `out` (a.k.a., leading dimension of the matrix `out`) +* @param {Float64Array} W1 - first workspace array of size `2*M` for tracking squared distances and probabilities +* @param {integer} sw1 - stride length of `W1` +* @param {Float64Array} W2 - second workspace array for tracking centroid candidates +* @param {integer} sw2 - stride length of `W2` +* @param {Options} [options] - function options +* @param {PRNG} [options.prng] - pseudorandom number generator which generates uniformly distributed pseudorandom numbers +* @param {PRNGSeedMT19937} [options.seed] - pseudorandom number generator seed +* @param {PRNGStateMT19937} [options.state] - pseudorandom number generator state +* @param {boolean} [options.copy=true] - boolean indicating whether to copy a provided pseudorandom number generator state +* @throws {RangeError} second argument must be greater than or equal to fourth argument +* @throws {RangeError} fifth argument must be a positive integer +* @throws {TypeError} sixth argument must be a valid supported metric +* @throws {RangeError} eighth argument must be greater than or equal to max(1,N) +* @throws {RangeError} tenth argument must be greater than or equal to max(1,N) +* @returns {Float64Array} centroids +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* var Int32Array = require( '@stdlib/array/int32' ); +* +* var k = 3; +* var M = 5; +* var N = 2; +* +* var out = new Float64Array( k*N ); +* var W1 = new Float64Array( 2*M ); +* var W2 = new Int32Array( k ); +* +* // Specify data points: +* var xbuf = new Float64Array([ +* 0.0, 0.0, +* 1.0, 1.0, +* 1.0, -1.0, +* -1.0, -1.0, +* -1.0, 1.0 +* ]); +* +* var opts = { +* 'seed': 1234 +* } +* +* dkmeansInitPlusPlus( 'row-major', M, N, k, 3, 'sqeuclidean', xbuf, 2, out, 2, W1, 1, W2, 1, opts ); +* // out => [ 1.0, -1.0, 0.0, 0.0, -1.0, 1.0 ] +*/ +function dkmeansInitPlusPlus( order, M, N, k, trials, metric, X, LDX, out, LDO, W1, sw1, W2, sw2, options ) { // eslint-disable-line max-len, max-params + var so1; + var so2; + var sx1; + var sx2; + var so; + var sx; + + if ( k > M ) { + throw new RangeError( format( 'invalid argument. Second argument `M` must be greater than or equal to fourth argument `k`. Value: `M=%d, k=%d`.', M, k ) ); + } + if ( !isPositiveInteger( trials ) ) { + throw new RangeError( format( 'invalid argument. Fifth argument must be a positive integer. Value: `%d`.', trials ) ); + } + if ( resolveMetricStr( metric ) === null ) { + throw new TypeError( format( 'invalid argument. Sixth argument must be a valid supported metric. Value: `%s`.', metric ) ); + } + if ( k < 1 || M < 1 || N < 1 ) { + return out; + } + if ( !isLayout( order ) ) { + throw new TypeError( format( 'invalid argument. First argument must be a valid order. Value: `%s`.', order ) ); + } + if ( isRowMajor( order ) ) { + so = N; + sx = N; + } else { + so = k; + sx = M; + } + if ( LDX < max( 1, sx ) ) { + throw new RangeError( format( 'invalid argument. Eighth argument must be greater than or equal to max(1,%d). Value: `%d`.', sx, LDX ) ); + } + if ( LDO < max( 1, so ) ) { + throw new RangeError( format( 'invalid argument. Tenth argument must be greater than or equal to max(1,%d). Value: `%d`.', so, LDO ) ); + } + if ( isColumnMajor( order ) ) { + so1 = 1; + so2 = LDO; + + sx1 = 1; + sx2 = LDX; + } else { // order === 'row-major' + so1 = LDO; + so2 = 1; + + sx1 = LDX; + sx2 = 1; + } + if ( arguments.length > 14 ) { + return ndarray( M, N, k, trials, metric, X, sx1, sx2, 0, out, so1, so2, 0, W1, sw1, stride2offset( 2*M, sw1 ), W2, sw2, stride2offset( k, sw2 ), options ); // eslint-disable-line max-len + } + return ndarray( M, N, k, trials, metric, X, sx1, sx2, 0, out, so1, so2, 0, W1, sw1, stride2offset( 2*M, sw1 ), W2, sw2, stride2offset( k, sw2 ) ); // eslint-disable-line max-len +} // EXPORTS // diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js index 8b0c1ff299ba..a94fa356efd7 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/lib/ndarray.js @@ -21,6 +21,9 @@ // MODULES // var format = require( '@stdlib/string/format' ); +var isObject = require( '@stdlib/assert/is-plain-object' ); +var isFunction = require( '@stdlib/assert/is-function' ); +var hasOwnProp = require( '@stdlib/assert/has-own-property' ); var isPositiveInteger = require( '@stdlib/assert/is-positive-integer' ); var resolveMetricStr = require( '@stdlib/ml/base/kmeans/metric-resolve-str' ); var base = require( './base.js' ); @@ -82,18 +85,18 @@ var base = require( './base.js' ); * ]); * * var opts = { -* 'seed': 1234, +* 'seed': 1234 * } * * dkmeansInitPlusPlus( M, N, k, 3, 'sqeuclidean', xbuf, 2, 1, 0, out, 2, 1, 0, W1, 1, 0, W2, 1, 0, opts ); -* // out => [ 0.0, 0.0, 1.0, 1.0, -1.0, 1.0 ] +* // out => [ 1.0, -1.0, 0.0, 0.0, -1.0, 1.0 ] */ function dkmeansInitPlusPlus( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1, so2, oo, W1, sw1, ow1, W2, sw2, ow2, options ) { // eslint-disable-line max-len, max-params if ( k > M ) { throw new RangeError( format( 'invalid argument. First argument `M` must be greater than or equal to third argument `k`. Value: `M=%d, k=%d`.', M, k ) ); } if ( !isPositiveInteger( trials ) ) { - throw new RangeError( format( 'invalid argument. Fourth argument must be a be a positive integer. Value: `%d`.', trials ) ); + throw new RangeError( format( 'invalid argument. Fourth argument must be a positive integer. Value: `%d`.', trials ) ); } if ( resolveMetricStr( metric ) === null ) { throw new TypeError( format( 'invalid argument. Fifth argument must be a valid supported metric. Value: `%s`.', metric ) ); @@ -101,7 +104,18 @@ function dkmeansInitPlusPlus( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1 if ( k < 1 || M < 1 || N < 1 ) { return out; } - return base( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1, so2, oo, W1, sw1, ow1, W2, sw2, ow2, options ); // eslint-disable-line max-len + if ( arguments.length > 19 ) { + if ( !isObject( options ) ) { + throw new TypeError( format( 'invalid argument. Options argument must be an object. Value: `%s`.', options ) ); + } + if ( hasOwnProp( options, 'prng' ) ) { + if ( !isFunction( options.prng ) ) { + throw new TypeError( format( 'invalid option. `%s` option must be a pseudorandom number generator function. Option: `%s`.', 'prng', options.prng ) ); + } + } + return base( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1, so2, oo, W1, sw1, ow1, W2, sw2, ow2, options ); // eslint-disable-line max-len + } + return base( M, N, k, trials, metric, X, sx1, sx2, ox, out, so1, so2, oo, W1, sw1, ow1, W2, sw2, ow2 ); // eslint-disable-line max-len } diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/package.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/package.json new file mode 100644 index 000000000000..6e89ee3fdb05 --- /dev/null +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/package.json @@ -0,0 +1,69 @@ +{ + "name": "@stdlib/ml/strided/dkmeans-init-plus-plus", + "version": "0.0.0", + "description": "Initialize centroids by performing the k-means++ initialization procedure on double-precision floating-point data points.", + "license": "Apache-2.0", + "author": { + "name": "The Stdlib Authors", + "url": "https://github.com/stdlib-js/stdlib/graphs/contributors" + }, + "contributors": [ + { + "name": "The Stdlib Authors", + "url": "https://github.com/stdlib-js/stdlib/graphs/contributors" + } + ], + "main": "./lib", + "browser": "./lib/main.js", + "gypfile": false, + "directories": { + "benchmark": "./benchmark", + "doc": "./docs", + "example": "./examples", + "lib": "./lib", + "test": "./test" + }, + "types": "./docs/types", + "scripts": {}, + "homepage": "https://github.com/stdlib-js/stdlib", + "repository": { + "type": "git", + "url": "git://github.com/stdlib-js/stdlib.git" + }, + "bugs": { + "url": "https://github.com/stdlib-js/stdlib/issues" + }, + "dependencies": {}, + "devDependencies": {}, + "engines": { + "node": ">=0.10.0", + "npm": ">2.7.0" + }, + "os": [ + "aix", + "darwin", + "freebsd", + "linux", + "macos", + "openbsd", + "sunos", + "win32", + "windows" + ], + "keywords": [ + "stdlib", + "ml", + "strided", + "kmeans", + "kmeans-plus-plus", + "kmeans-init-plus-plus", + "ndarray", + "strided array", + "typed", + "array", + "float64", + "double", + "float64array" + ], + "__stdlib__": {} +} diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/column_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/column_major.json index a8c767b24db9..49bdf79c6b95 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/column_major.json +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/column_major.json @@ -42,12 +42,30 @@ "metric": "sqeuclidean", "trials": 2, "seed": 44, + "W1": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "strideW1": 1, + "offsetW1": 0, + "W2": [ + 0, + 0 + ], + "strideW2": 1, + "offsetW2": 0, "expected": [ - 1.0, - 5.0, + 2.0, + 0.0, 9999.0, 9999.0, - 3.0, - 6.0 + 4.0, + 0.0 ] } diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/column_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/column_major.json index 80ae77ae1bf8..5c4cbd3e084c 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/column_major.json +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/column_major.json @@ -48,14 +48,30 @@ "metric": "sqeuclidean", "trials": 2, "seed": 44, + "W1": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "strideW1": 1, + "offsetW1": 0, + "W2": [ + 0, + 0 + ], + "strideW2": 1, + "offsetW2": 0, "expected": [ - 1.0, + 2.0, 9999.0, 0.0, 9999.0, 9999.0, 9999.0, - 3.0, + 4.0, 9999.0, 0.0 ] diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/row_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/row_major.json index 39192a0c1b22..b6f5c21b68b2 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/row_major.json +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/large-strides/row_major.json @@ -46,13 +46,29 @@ "metric": "sqeuclidean", "trials": 2, "seed": 44, + "W1": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "strideW1": 1, + "offsetW1": 0, + "W2": [ + 0, + 0 + ], + "strideW2": 1, + "offsetW2": 0, "expected": [ - 1.0, + 3.0, 9999.0, - 2.0, + 4.0, 9999.0, - 3.0, + 0.0, 9999.0, - 4.0 + 0.0 ] } diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/column_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/column_major.json index 9e5d79575b43..01edbb8cb09c 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/column_major.json +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/column_major.json @@ -38,11 +38,27 @@ "metric": "sqeuclidean", "trials": 2, "seed": 44, + "W1": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "strideW1": 1, + "offsetW1": 0, + "W2": [ + 0, + 0 + ], + "strideW2": 1, + "offsetW2": 0, "expected": [ - 3.0, + 4.0, 0.0, 9999.0, - 1.0, + 2.0, 0.0 ] } diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/row_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/row_major.json index 49fcd8f5b807..663b5eb110a9 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/row_major.json +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/mixed-strides/row_major.json @@ -39,12 +39,28 @@ "metric": "sqeuclidean", "trials": 2, "seed": 44, + "W1": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "strideW1": 1, + "offsetW1": 0, + "W2": [ + 0, + 0 + ], + "strideW2": 1, + "offsetW2": 0, "expected": [ 9999.0, 9999.0, + 0.0, + 0.0, 3.0, - 4.0, - 1.0, - 2.0 + 4.0 ] } diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/column_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/column_major.json index dd77c49804ba..f28f4b278692 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/column_major.json +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/column_major.json @@ -39,12 +39,28 @@ "metric": "sqeuclidean", "trials": 2, "seed": 44, + "W1": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "strideW1": 1, + "offsetW1": 0, + "W2": [ + 0, + 0 + ], + "strideW2": 1, + "offsetW2": 0, "expected": [ 9999.0, 0.0, - 3.0, + 4.0, 9999.0, 0.0, - 1.0 + 2.0 ] } diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/row_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/row_major.json index 490d9bbd708e..5036fe7c5f06 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/row_major.json +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/negative-strides/row_major.json @@ -39,12 +39,28 @@ "metric": "sqeuclidean", "trials": 2, "seed": 44, + "W1": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "strideW1": 1, + "offsetW1": 0, + "W2": [ + 0, + 0 + ], + "strideW2": 1, + "offsetW2": 0, "expected": [ 9999.0, 9999.0, + 0.0, + 0.0, 4.0, - 3.0, - 2.0, - 1.0 + 3.0 ] } diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/column_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/column_major.json index 546d081bface..f661177c7431 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/column_major.json +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/column_major.json @@ -40,12 +40,28 @@ "metric": "sqeuclidean", "trials": 2, "seed": 44, + "W1": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "strideW1": 1, + "offsetW1": 0, + "W2": [ + 0, + 0 + ], + "strideW2": 1, + "offsetW2": 0, "expected": [ 9999.0, - 1.0, + 2.0, 0.0, 9999.0, - 3.0, + 4.0, 0.0 ] } diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/row_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/row_major.json index 94633bf04965..a406a67cee08 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/row_major.json +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/offsets/row_major.json @@ -39,11 +39,27 @@ "metric": "sqeuclidean", "trials": 2, "seed": 44, + "W1": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "strideW1": 1, + "offsetW1": 0, + "W2": [ + 0, + 0 + ], + "strideW2": 1, + "offsetW2": 0, "expected": [ 9999.0, - 1.0, - 2.0, 3.0, - 4.0 + 4.0, + 0.0, + 0.0 ] } diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/row_major.json b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/row_major.json index d0d6c551c774..3fa4b5597717 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/row_major.json +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/fixtures/row_major.json @@ -40,10 +40,28 @@ "metric": "sqeuclidean", "trials": 2, "seed": 44, + "W1": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "strideW1": 1, + "offsetW1": 0, + "W2": [ + 0, + 0 + ], + "strideW2": 1, + "offsetW2": 0, "expected": [ - 1.0, - 2.0, - 5.0, - 6.0 + 3.0, + 4.0, + 0.0, + 0.0 ] } diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.dkmeans_init_plus_plus.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.dkmeans_init_plus_plus.js index 3f2072acfa23..de4ada615199 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.dkmeans_init_plus_plus.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.dkmeans_init_plus_plus.js @@ -24,6 +24,7 @@ var tape = require( 'tape' ); var Float64Array = require( '@stdlib/array/float64' ); +var Int32Array = require( '@stdlib/array/int32' ); var isAlmostSameValue = require( '@stdlib/assert/is-almost-same-value' ); var dkmeansInitPlusPlus = require( './../lib/dkmeans_init_plus_plus.js' ); @@ -42,17 +43,21 @@ tape( 'main export is a function', function test( t ) { t.end(); }); -tape( 'the function has an arity of 11', function test( t ) { - t.strictEqual( dkmeansInitPlusPlus.length, 11, 'returns expected value' ); +tape( 'the function has an arity of 15', function test( t ) { + t.strictEqual( dkmeansInitPlusPlus.length, 15, 'returns expected value' ); t.end(); }); tape( 'the function throws an error if provided a first argument which is not a valid order', function test( t ) { var values; var data; + var opts; var i; data = ROW_MAJOR_DATA; + opts = { + 'seed': data.seed + }; values = [ 'foo', @@ -77,41 +82,88 @@ tape( 'the function throws an error if provided a first argument which is not a function badValue( value ) { return function badValue() { - dkmeansInitPlusPlus( value, data.k, data.M, data.N, new Float64Array( data.out ), data.LDO, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + dkmeansInitPlusPlus( value, data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.LDX, new Float64Array( data.out ), data.LDO, new Float64Array( data.W1 ), data.strideW1, new Int32Array( data.W2 ), data.strideW2, opts ); }; } }); -tape( 'the function throws an error if provided a sixth argument which is not a valid `LDO` value (row-major)', function test( t ) { +tape( 'the function throws an error if `k` is greater than `M`', function test( t ) { + var data; + var opts; + + data = ROW_MAJOR_DATA; + opts = { + 'seed': data.seed + }; + + t.throws( badValue, RangeError, 'throws an error' ); + t.end(); + + function badValue() { + dkmeansInitPlusPlus( data.order, 1, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.LDX, new Float64Array( data.out ), data.LDO, new Float64Array( data.W1 ), data.strideW1, new Int32Array( data.W2 ), data.strideW2, opts ); + } +}); + +tape( 'the function throws an error when trials is less than or equal to zero', function test( t ) { + var data; + var opts; + + data = ROW_MAJOR_DATA; + opts = { + 'seed': data.seed + }; + + t.throws( badValue( 0 ), RangeError, 'throws an error when provided ' + 0 ); + t.throws( badValue( -1 ), RangeError, 'throws an error when provided ' + -1 ); + t.end(); + + function badValue( value ) { + return function badValue() { + dkmeansInitPlusPlus( data.order, data.M, data.N, data.k, value, data.metric, new Float64Array( data.X ), data.LDX, new Float64Array( data.out ), data.LDO, new Float64Array( data.W1 ), data.strideW1, new Int32Array( data.W2 ), data.strideW2, opts ); + }; + } +}); + +tape( 'the function throws an error if provided an unsupported metric', function test( t ) { var values; var data; + var opts; var i; data = ROW_MAJOR_DATA; + opts = { + 'seed': data.seed + }; values = [ - 0, - 1 + 'foo', + 'bar', + 'beep', + 'boop' ]; for ( i = 0; i < values.length; i++ ) { - t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + t.throws( badValue( values[ i ] ), TypeError, 'throws an error when provided ' + values[ i ] ); } t.end(); function badValue( value ) { return function badValue() { - dkmeansInitPlusPlus( data.order, data.k, data.M, data.N, new Float64Array( data.out ), value, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + dkmeansInitPlusPlus( data.order, data.M, data.N, data.k, data.trials, value, new Float64Array( data.X ), data.LDX, new Float64Array( data.out ), data.LDO, new Float64Array( data.W1 ), data.strideW1, new Int32Array( data.W2 ), data.strideW2, opts ); }; } }); -tape( 'the function throws an error if provided a sixth argument which is not a valid `LDO` value (column-major)', function test( t ) { +tape( 'the function throws an error if provided a tenth argument which is not a valid `LDO` value (row-major)', function test( t ) { var values; var data; + var opts; var i; - data = COLUMN_MAJOR_DATA; + data = ROW_MAJOR_DATA; + opts = { + 'seed': data.seed + }; values = [ 0, @@ -125,17 +177,21 @@ tape( 'the function throws an error if provided a sixth argument which is not a function badValue( value ) { return function badValue() { - dkmeansInitPlusPlus( data.order, data.k, data.M, data.N, new Float64Array( data.out ), value, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + dkmeansInitPlusPlus( data.order, data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.LDX, new Float64Array( data.out ), value, new Float64Array( data.W1 ), data.strideW1, new Int32Array( data.W2 ), data.strideW2, opts ); }; } }); -tape( 'the function throws an error if provided a eighth argument which is not a valid `LDX` value (row-major)', function test( t ) { +tape( 'the function throws an error if provided a tenth argument which is not a valid `LDO` value (column-major)', function test( t ) { var values; var data; + var opts; var i; - data = ROW_MAJOR_DATA; + data = COLUMN_MAJOR_DATA; + opts = { + 'seed': data.seed + }; values = [ 0, @@ -149,17 +205,21 @@ tape( 'the function throws an error if provided a eighth argument which is not a function badValue( value ) { return function badValue() { - dkmeansInitPlusPlus( data.order, data.k, data.M, data.N, new Float64Array( data.out ), data.LDO, new Float64Array( data.X ), value, data.metric, data.trials, data.seed ); + dkmeansInitPlusPlus( data.order, data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.LDX, new Float64Array( data.out ), value, new Float64Array( data.W1 ), data.strideW1, new Int32Array( data.W2 ), data.strideW2, opts ); }; } }); -tape( 'the function throws an error if provided a eighth argument which is not a valid `LDX ` value (column-major)', function test( t ) { +tape( 'the function throws an error if provided an eighth argument which is not a valid `LDX` value (row-major)', function test( t ) { var values; var data; + var opts; var i; - data = COLUMN_MAJOR_DATA; + data = ROW_MAJOR_DATA; + opts = { + 'seed': data.seed + }; values = [ 0, @@ -173,55 +233,93 @@ tape( 'the function throws an error if provided a eighth argument which is not a function badValue( value ) { return function badValue() { - dkmeansInitPlusPlus( data.order, data.k, data.M, data.N, new Float64Array( data.out ), data.LDO, new Float64Array( data.X ), value, data.metric, data.trials, data.seed ); + dkmeansInitPlusPlus( data.order, data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), value, new Float64Array( data.out ), data.LDO, new Float64Array( data.W1 ), data.strideW1, new Int32Array( data.W2 ), data.strideW2, opts ); }; } }); -tape( 'the function throws an error when trials is less than or equal to zero', function test( t ) { +tape( 'the function throws an error if provided an eighth argument which is not a valid `LDX` value (column-major)', function test( t ) { + var values; var data; + var opts; + var i; - data = ROW_MAJOR_DATA; - t.throws( badValue( 0 ), RangeError, 'throws an error when provided ' + 0 ); + data = COLUMN_MAJOR_DATA; + opts = { + 'seed': data.seed + }; + + values = [ + 0, + 1, + 2, + 3 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } t.end(); function badValue( value ) { return function badValue() { - dkmeansInitPlusPlus( data.order, data.k, data.M, data.N, new Float64Array( data.out ), value, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + dkmeansInitPlusPlus( data.order, data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), value, new Float64Array( data.out ), data.LDO, new Float64Array( data.W1 ), data.strideW1, new Int32Array( data.W2 ), data.strideW2, opts ); }; } }); -tape( 'the function returns a NaN array when M is less than or equal to zero', function test( t ) { +tape( 'the function returns the output array when k is less than one', function test( t ) { + var expected; var data; + var opts; var out; + var i; data = ROW_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.order, data.k, 0, data.N, new Float64Array( data.out ), data.LDO, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.order, data.M, data.N, 0, data.trials, data.metric, new Float64Array( data.X ), data.LDX, new Float64Array( data.out ), data.LDO, new Float64Array( data.W1 ), data.strideW1, new Int32Array( data.W2 ), data.strideW2, opts ); - t.strictEqual( isAlmostSameValue( out, NaN, 0), true, 'returns expected value' ); + expected = new Float64Array( data.out ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } t.end(); }); -tape( 'the function returns a NaN array when N is less than or equal to zero', function test( t ) { +tape( 'the function returns the output array when N is less than one', function test( t ) { + var expected; var data; + var opts; var out; + var i; data = ROW_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.order, data.k, data.M, 0, new Float64Array( data.out ), data.LDO, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.order, data.M, 0, data.k, data.trials, data.metric, new Float64Array( data.X ), data.LDX, new Float64Array( data.out ), data.LDO, new Float64Array( data.W1 ), data.strideW1, new Int32Array( data.W2 ), data.strideW2, opts ); - t.strictEqual( isAlmostSameValue( out, NaN, 0), true, 'returns expected value' ); + expected = new Float64Array( data.out ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } t.end(); }); tape( 'the function returns the centroids (row-major)', function test( t ) { var expected; var data; + var opts; var out; var i; data = ROW_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.order, data.k, data.M, data.N, new Float64Array( data.out ), data.LDO, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.order, data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.LDX, new Float64Array( data.out ), data.LDO, new Float64Array( data.W1 ), data.strideW1, new Int32Array( data.W2 ), data.strideW2, opts ); expected = new Float64Array( data.expected ); for ( i = 0; i < expected.length; i++ ) { @@ -233,11 +331,15 @@ tape( 'the function returns the centroids (row-major)', function test( t ) { tape( 'the function returns the centroids (column-major)', function test( t ) { var expected; var data; + var opts; var out; var i; data = COLUMN_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.order, data.k, data.M, data.N, new Float64Array( data.out ), data.LDO, new Float64Array( data.X ), data.LDX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.order, data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.LDX, new Float64Array( data.out ), data.LDO, new Float64Array( data.W1 ), data.strideW1, new Int32Array( data.W2 ), data.strideW2, opts ); expected = new Float64Array( data.expected ); for ( i = 0; i < expected.length; i++ ) { diff --git a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.ndarray.js b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.ndarray.js index 37e933408ff5..486bd2f11ca2 100644 --- a/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.ndarray.js +++ b/lib/node_modules/@stdlib/ml/strided/dkmeans-init-plus-plus/test/test.ndarray.js @@ -24,6 +24,7 @@ var tape = require( 'tape' ); var Float64Array = require( '@stdlib/array/float64' ); +var Int32Array = require( '@stdlib/array/int32' ); var isAlmostSameValue = require( '@stdlib/assert/is-almost-same-value' ); var dkmeansInitPlusPlus = require( './../lib/ndarray.js' ); @@ -50,63 +51,170 @@ tape( 'main export is a function', function test( t ) { t.end(); }); -tape( 'the function has an arity of 14', function test( t ) { - t.strictEqual( dkmeansInitPlusPlus.length, 14, 'returns expected value' ); +tape( 'the function has an arity of 20', function test( t ) { + t.strictEqual( dkmeansInitPlusPlus.length, 20, 'returns expected value' ); t.end(); }); -tape( 'the function returns a NaN array when M is less than or equal to zero (row-major)', function test( t ) { +tape( 'the function throws an error if `k` is greater than `M`', function test( t ) { var data; + var opts; + + data = ROW_MAJOR_DATA; + opts = { + 'seed': data.seed + }; + + t.throws( badValue, RangeError, 'throws an error' ); + t.end(); + + function badValue() { + dkmeansInitPlusPlus( 1, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); + } +}); + +tape( 'the function throws an error when trials is less than or equal to zero', function test( t ) { + var data; + var opts; + + data = ROW_MAJOR_DATA; + opts = { + 'seed': data.seed + }; + + t.throws( badValue( 0 ), RangeError, 'throws an error when provided ' + 0 ); + t.throws( badValue( -1 ), RangeError, 'throws an error when provided ' + -1 ); + t.end(); + + function badValue( value ) { + return function badValue() { + dkmeansInitPlusPlus( data.M, data.N, data.k, value, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); + }; + } +}); + +tape( 'the function throws an error if provided an unsupported metric', function test( t ) { + var values; + var data; + var opts; + var i; + + data = ROW_MAJOR_DATA; + opts = { + 'seed': data.seed + }; + + values = [ + 'foo', + 'bar', + 'beep', + 'boop' + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), TypeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dkmeansInitPlusPlus( data.M, data.N, data.k, data.trials, value, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); + }; + } +}); + +tape( 'the function returns the output array when k is less than one (row-major)', function test( t ) { + var expected; + var data; + var opts; var out; + var i; data = ROW_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.k, 0, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.M, data.N, 0, data.trials, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); - t.strictEqual( isAlmostSameValue( out, NaN, 0), true, 'returns expected value' ); + expected = new Float64Array( data.out ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } t.end(); }); -tape( 'the function returns a NaN array when N is less than or equal to zero (row-major)', function test( t ) { +tape( 'the function returns the output array when N is less than one (row-major)', function test( t ) { + var expected; var data; + var opts; var out; + var i; data = ROW_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.k, data.M, 0, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.M, 0, data.k, data.trials, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); - t.strictEqual( isAlmostSameValue( out, NaN, 0), true, 'returns expected value' ); + expected = new Float64Array( data.out ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } t.end(); }); -tape( 'the function returns a NaN array when M is less than or equal to zero (column-major)', function test( t ) { +tape( 'the function returns the output array when k is less than one (column-major)', function test( t ) { + var expected; var data; + var opts; var out; + var i; data = COLUMN_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.k, 0, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.M, data.N, 0, data.trials, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); - t.strictEqual( isAlmostSameValue( out, NaN, 0), true, 'returns expected value' ); + expected = new Float64Array( data.out ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } t.end(); }); -tape( 'the function returns a NaN array when N is less than or equal to zero (column-major)', function test( t ) { +tape( 'the function returns the output array when N is less than one (column-major)', function test( t ) { + var expected; var data; + var opts; var out; + var i; data = COLUMN_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.k, data.M, 0, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.M, 0, data.k, data.trials, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); - t.strictEqual( isAlmostSameValue( out, NaN, 0), true, 'returns expected value' ); + expected = new Float64Array( data.out ); + for ( i = 0; i < expected.length; i++ ) { + t.strictEqual( isAlmostSameValue( out[ i ], expected[ i ], 0 ), true, 'returns expected value' ); + } t.end(); }); tape( 'the function returns the centroids (row-major)', function test( t ) { var expected; var data; + var opts; var out; var i; data = ROW_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); expected = new Float64Array( data.expected ); for ( i = 0; i < expected.length; i++ ) { @@ -118,11 +226,15 @@ tape( 'the function returns the centroids (row-major)', function test( t ) { tape( 'the function returns the centroids (column-major)', function test( t ) { var expected; var data; + var opts; var out; var i; data = COLUMN_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); expected = new Float64Array( data.expected ); for ( i = 0; i < expected.length; i++ ) { @@ -134,11 +246,15 @@ tape( 'the function returns the centroids (column-major)', function test( t ) { tape( 'the function returns the centroids (row-major, offsets)', function test( t ) { var expected; var data; + var opts; var out; var i; data = OFFSET_ROW_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); expected = new Float64Array( data.expected ); for ( i = 0; i < expected.length; i++ ) { @@ -150,11 +266,15 @@ tape( 'the function returns the centroids (row-major, offsets)', function test( tape( 'the function returns the centroids (column-major, offsets)', function test( t ) { var expected; var data; + var opts; var out; var i; data = OFFSET_COLUMN_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); expected = new Float64Array( data.expected ); for ( i = 0; i < expected.length; i++ ) { @@ -166,11 +286,15 @@ tape( 'the function returns the centroids (column-major, offsets)', function tes tape( 'the function returns the centroids (row-major, mixed strides)', function test( t ) { var expected; var data; + var opts; var out; var i; data = MIXED_STRIDES_ROW_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); expected = new Float64Array( data.expected ); for ( i = 0; i < expected.length; i++ ) { @@ -182,11 +306,15 @@ tape( 'the function returns the centroids (row-major, mixed strides)', function tape( 'the function returns the centroids (column-major, mixed strides)', function test( t ) { var expected; var data; + var opts; var out; var i; data = MIXED_STRIDES_COLUMN_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); expected = new Float64Array( data.expected ); for ( i = 0; i < expected.length; i++ ) { @@ -198,11 +326,15 @@ tape( 'the function returns the centroids (column-major, mixed strides)', functi tape( 'the function returns the centroids (row-major, negative strides)', function test( t ) { var expected; var data; + var opts; var out; var i; data = NEGATIVE_STRIDES_ROW_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); expected = new Float64Array( data.expected ); for ( i = 0; i < expected.length; i++ ) { @@ -214,11 +346,15 @@ tape( 'the function returns the centroids (row-major, negative strides)', functi tape( 'the function returns the centroids (column-major, negative strides)', function test( t ) { var expected; var data; + var opts; var out; var i; data = NEGATIVE_STRIDES_COLUMN_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); expected = new Float64Array( data.expected ); for ( i = 0; i < expected.length; i++ ) { @@ -230,11 +366,15 @@ tape( 'the function returns the centroids (column-major, negative strides)', fun tape( 'the function returns the centroids (row-major, large strides)', function test( t ) { var expected; var data; + var opts; var out; var i; data = LARGE_STRIDES_ROW_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); expected = new Float64Array( data.expected ); for ( i = 0; i < expected.length; i++ ) { @@ -246,11 +386,15 @@ tape( 'the function returns the centroids (row-major, large strides)', function tape( 'the function returns the centroids (column-major, large strides)', function test( t ) { var expected; var data; + var opts; var out; var i; data = LARGE_STRIDES_COLUMN_MAJOR_DATA; - out = dkmeansInitPlusPlus( data.k, data.M, data.N, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, data.metric, data.trials, data.seed ); + opts = { + 'seed': data.seed + }; + out = dkmeansInitPlusPlus( data.M, data.N, data.k, data.trials, data.metric, new Float64Array( data.X ), data.strideX1, data.strideX2, data.offsetX, new Float64Array( data.out ), data.strideO1, data.strideO2, data.offsetO, new Float64Array( data.W1 ), data.strideW1, data.offsetW1, new Int32Array( data.W2 ), data.strideW2, data.offsetW2, opts ); expected = new Float64Array( data.expected ); for ( i = 0; i < expected.length; i++ ) {