diff --git a/docs/source/reference/focal.rst b/docs/source/reference/focal.rst index d57b75e40..648927b46 100644 --- a/docs/source/reference/focal.rst +++ b/docs/source/reference/focal.rst @@ -12,9 +12,9 @@ Focal .. note:: - Focal functions output **float32**. NaN handling varies: ``mean`` - excludes NaN neighbours from the average, while ``hotspots`` propagates - NaN from any neighbour. + ``mean`` and ``focal_stats`` output **float32**; ``hotspots`` outputs + **int8** confidence bands. Both ``mean`` and ``hotspots`` exclude NaN + neighbours from their neighbourhood computation. Apply ===== diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 2cb276fbd..878fd8876 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -1259,23 +1259,86 @@ def _calc_hotspots_numpy(z_array): return out +def _gistar_global_stats(global_mean, global_std, n): + """Validate and return the global terms of the Gi* statistic. + + ``global_mean`` is the mean of the valid (non-NaN) raster cells, + ``global_std`` their population standard deviation, and ``n`` the + count of valid cells. Gi* needs ``n > 1`` (the variance term divides + by ``n - 1``) and a non-zero spread. + """ + if n < 2: + raise ValueError( + "hotspots() needs at least 2 valid (non-NaN) cells to " + "compute the Getis-Ord Gi* statistic." + ) + if global_std == 0: + raise ZeroDivisionError( + "Standard deviation of the input raster values is 0." + ) + return global_mean, global_std, n + + +def _gistar_zscore(weighted_sum, weight_sum, sq_weight_sum, + global_mean, global_std, n): + """Getis-Ord Gi* z-score from the per-cell convolution terms. + + Gi* = (Sum_j w_ij x_j - Xbar * W_i) + / (S * sqrt((n * W2_i - W_i^2) / (n - 1))) + + where ``weighted_sum`` is Sum_j w_ij x_j, ``weight_sum`` is W_i, + ``sq_weight_sum`` is W2_i, ``global_mean`` is Xbar, ``global_std`` is + the population std S, and ``n`` is the valid-cell count. Cells whose + neighborhood collapses to a single weight (variance term <= 0) get a + z-score of 0 (no significance). + """ + # Accumulate in float64: n * W2 - W^2 is a difference of potentially + # large numbers and loses precision in float32 for big rasters / weights. + weight_sum = weight_sum.astype(np.float64) + sq_weight_sum = sq_weight_sum.astype(np.float64) + numerator = weighted_sum.astype(np.float64) - float(global_mean) * weight_sum + variance_term = (n * sq_weight_sum - weight_sum * weight_sum) / (n - 1) + # Guard against tiny negatives from float rounding and the degenerate + # single-cell neighborhood (variance_term == 0) before the sqrt. + variance_term = np.where(variance_term > 0, variance_term, np.nan) + denominator = float(global_std) * np.sqrt(variance_term) + z = numerator / denominator + return np.where(np.isfinite(z), z, 0.0).astype(np.float32) + + +def _gistar_convolutions_numpy(data, kernel, boundary): + """Per-cell Gi* convolution terms for the numpy backend. + + Returns the weighted value sum, the neighborhood weight sum, and the + squared-weight sum. NaN cells are excluded from every term via a + validity mask, so raster edges and interior NaNs are handled the same + way the Gi* definition expects. + """ + valid = (~np.isnan(data)).astype(np.float32) + filled = np.where(valid > 0, data, np.float32(0.0)) + weighted_sum = convolve_2d(filled, kernel, boundary) + weight_sum = convolve_2d(valid, kernel, boundary) + sq_weight_sum = convolve_2d(valid, kernel * kernel, boundary) + return weighted_sum, weight_sum, sq_weight_sum + + def _hotspots_numpy(raster, kernel, boundary='nan'): if not (issubclass(raster.data.dtype.type, np.integer) or issubclass(raster.data.dtype.type, np.floating)): raise ValueError("data type must be integer or float") data = raster.data.astype(np.float32) - # apply kernel to raster values - mean_array = convolve_2d(data, kernel / kernel.sum(), boundary) - # calculate z-scores - global_mean = np.nanmean(data) - global_std = np.nanstd(data) - if global_std == 0: - raise ZeroDivisionError( - "Standard deviation of the input raster values is 0." - ) - z_array = (mean_array - global_mean) / global_std + valid = ~np.isnan(data) + n = int(valid.sum()) + global_mean = np.float32(np.nanmean(data)) + global_std = np.float32(np.nanstd(data)) + _gistar_global_stats(global_mean, global_std, n) + + weighted_sum, weight_sum, sq_weight_sum = _gistar_convolutions_numpy( + data, kernel, boundary) + z_array = _gistar_zscore(weighted_sum, weight_sum, sq_weight_sum, + global_mean, global_std, n) out = _calc_hotspots_numpy(z_array) return out @@ -1286,31 +1349,31 @@ def _hotspots_dask_numpy(raster, kernel, boundary='nan'): if not np.issubdtype(data.dtype, np.floating): data = data.astype(np.float32) - # Pass 1: eagerly compute global statistics (two scalars). - # This reads all chunks once, produces 16 bytes, then frees all + # Pass 1: eagerly compute global Gi* terms (three scalars). + # This reads all chunks once, produces a few bytes, then frees all # intermediate state -- no barrier that would force materialization # of the full convolution output. - global_mean, global_std = da.compute(da.nanmean(data), da.nanstd(data)) + valid = ~da.isnan(data) + global_mean, global_std, n = da.compute( + da.nanmean(data), da.nanstd(data), valid.sum()) global_mean = np.float32(global_mean) global_std = np.float32(global_std) + n = int(n) + _gistar_global_stats(global_mean, global_std, n) - if global_std == 0: - raise ZeroDivisionError( - "Standard deviation of the input raster values is 0." - ) - - norm_kernel = (kernel / kernel.sum()).astype(np.float32) - pad_h = norm_kernel.shape[0] // 2 - pad_w = norm_kernel.shape[1] // 2 + kernel = kernel.astype(np.float32) + pad_h = kernel.shape[0] // 2 + pad_w = kernel.shape[1] // 2 - # Pass 2: fuse convolution + z-score + classification into one - # map_overlap call. Each chunk reads source + halo, produces int8 - # output, and frees all intermediates immediately. No spill needed. + # Pass 2: fuse the three convolutions + Gi* z-score + classification + # into one map_overlap call. Each chunk reads source + halo, produces + # int8 output, and frees all intermediates immediately. _func = partial( _hotspots_chunk, - kernel=norm_kernel, + kernel=kernel, global_mean=global_mean, global_std=global_std, + n=n, ) out = data.map_overlap( _func, @@ -1321,10 +1384,15 @@ def _hotspots_dask_numpy(raster, kernel, boundary='nan'): return out -def _hotspots_chunk(chunk, kernel, global_mean, global_std): - """Fused per-chunk: convolve -> z-score -> classify.""" - convolved = _convolve_2d_numpy(chunk, kernel) - z = (convolved - global_mean) / global_std +def _hotspots_chunk(chunk, kernel, global_mean, global_std, n): + """Fused per-chunk: convolve Gi* terms -> z-score -> classify.""" + valid = (~np.isnan(chunk)).astype(np.float32) + filled = np.where(valid > 0, chunk, np.float32(0.0)) + weighted_sum = _convolve_2d_numpy(filled, kernel) + weight_sum = _convolve_2d_numpy(valid, kernel) + sq_weight_sum = _convolve_2d_numpy(valid, kernel * kernel) + z = _gistar_zscore(weighted_sum, weight_sum, sq_weight_sum, + global_mean, global_std, n) return _calc_hotspots_numpy(z) @@ -1333,26 +1401,32 @@ def _hotspots_dask_cupy(raster, kernel, boundary='nan'): if not cupy.issubdtype(data.dtype, cupy.floating): data = data.astype(cupy.float32) - # Pass 1: global statistics (two scalars, eager) - global_mean, global_std = da.compute(da.nanmean(data), da.nanstd(data)) + # Pass 1: global Gi* terms (three scalars, eager) + valid_global = ~da.isnan(data) + global_mean, global_std, n = da.compute( + da.nanmean(data), da.nanstd(data), valid_global.sum()) global_mean = np.float32(float(global_mean)) global_std = np.float32(float(global_std)) + n = int(n) + _gistar_global_stats(global_mean, global_std, n) - if global_std == 0: - raise ZeroDivisionError( - "Standard deviation of the input raster values is 0." - ) - - norm_kernel = (kernel / kernel.sum()).astype(np.float32) - pad_h = norm_kernel.shape[0] // 2 - pad_w = norm_kernel.shape[1] // 2 + kernel = kernel.astype(np.float32) + sq_kernel = kernel * kernel + pad_h = kernel.shape[0] // 2 + pad_w = kernel.shape[1] // 2 - # Pass 2: fuse convolution + z-score + classification, all on the GPU. - # Reuse the _run_gpu_hotspots kernel (same as the single-GPU path) so - # each chunk stays on the device -- no host round trip per chunk. + # Pass 2: fuse the three convolutions + Gi* z-score + classification, + # all on the GPU. Reuse the _run_gpu_hotspots kernel (same as the + # single-GPU path) so each chunk stays on the device -- no host round + # trip per chunk. def _chunk_fn(chunk): - convolved = _convolve_2d_cupy(chunk, norm_kernel) - z = (convolved - global_mean) / global_std + valid = (~cupy.isnan(chunk)).astype(cupy.float32) + filled = cupy.where(valid > 0, chunk, cupy.float32(0.0)) + weighted_sum = _convolve_2d_cupy(filled, kernel) + weight_sum = _convolve_2d_cupy(valid, kernel) + sq_weight_sum = _convolve_2d_cupy(valid, sq_kernel) + z = _gistar_zscore(weighted_sum, weight_sum, sq_weight_sum, + global_mean, global_std, n) out = cupy.zeros_like(z, dtype=cupy.int8) griddim, blockdim = cuda_args(z.shape) _run_gpu_hotspots[griddim, blockdim](z, out) @@ -1412,17 +1486,20 @@ def _hotspots_cupy(raster, kernel, boundary='nan'): data = raster.data.astype(cupy.float32) - # apply kernel to raster values - mean_array = convolve_2d(data, kernel / kernel.sum(), boundary) - - # calculate z-scores - global_mean = cupy.nanmean(data) - global_std = cupy.nanstd(data) - if global_std == 0: - raise ZeroDivisionError( - "Standard deviation of the input raster values is 0." - ) - z_array = (mean_array - global_mean) / global_std + valid = ~cupy.isnan(data) + n = int(valid.sum()) + global_mean = np.float32(float(cupy.nanmean(data))) + global_std = np.float32(float(cupy.nanstd(data))) + _gistar_global_stats(global_mean, global_std, n) + + kernel = kernel.astype(np.float32) + filled = cupy.where(valid, data, cupy.float32(0.0)) + weighted_sum = convolve_2d(filled, kernel, boundary) + weight_sum = convolve_2d(valid.astype(cupy.float32), kernel, boundary) + sq_weight_sum = convolve_2d(valid.astype(cupy.float32), + kernel * kernel, boundary) + z_array = _gistar_zscore(weighted_sum, weight_sum, sq_weight_sum, + global_mean, global_std, n) out = cupy.zeros_like(z_array, dtype=cupy.int8) griddim, blockdim = cuda_args(z_array.shape) @@ -1434,12 +1511,24 @@ def hotspots(agg=None, kernel=None, name='hotspots', boundary='nan', *, raster=None): """ Identify statistically significant hot spots and cold spots in an - input raster. To be a statistically significant hot spot, a feature - will have a high value and be surrounded by other features with - high values as well. + input raster using the Getis-Ord Gi* statistic. To be a + statistically significant hot spot, a feature will have a high value + and be surrounded by other features with high values as well. Neighborhood of a feature defined by the input kernel, which currently support a shape of circle, annulus, or custom kernel. + For each feature i the Gi* z-score is + + Gi* = (sum_j w_ij x_j - Xbar * W_i) + / (S * sqrt((n * W2_i - W_i^2) / (n - 1))) + + where w_ij are the kernel weights (the star variant includes feature + i itself), W_i = sum_j w_ij, W2_i = sum_j w_ij^2, Xbar and S are the + global mean and population standard deviation of the valid cells, and + n is the count of valid (non-NaN) cells. The z-score is then + classified into the confidence bands below. NaN cells are excluded + from both the neighborhood sums and the global statistics. + The result should be a raster with the following 7 values: - 90 for 90% confidence high value cluster - 95 for 95% confidence high value cluster @@ -1488,9 +1577,9 @@ def hotspots(agg=None, kernel=None, name='hotspots', boundary='nan', *, ... [0, 100, 1000, 0, 0, 0]]) >>> from xrspatial.focal import hotspots >>> hotspots(xr.DataArray(data), kernel) - array([[ 0, 0, 95, 0, 0, 0], - [ 0, 0, 0, 0, -90, 0], - [ 0, 0, -90, 0, 0, 0], + array([[ 0, 0, 99, 0, 0, 0], + [ 0, 0, 0, 0, -99, 0], + [ 0, 0, -95, 0, 0, 0], [ 0, 0, 0, 0, 0, 0]], dtype=int8) Dimensions without coordinates: dim_0, dim_1 """ diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index fdb7c67e9..d447ded51 100644 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -738,29 +738,32 @@ def test_variety_single_cell(): @pytest.fixture def data_hotspots(): + # Clusters sit fully in the interior (away from the outer ring) so the + # single-array NaN-edge behavior and the dask map_overlap result agree + # on every cell. NaN cells are excluded from the Gi* sums and stats. data = np.asarray([ [np.nan, 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 10000., 10000., 10000., 0., 0., 0., 0., 0., 0.], + [0., 10000., 10000., 10000., 0., 0., np.nan, 0., 0., 0.], [0., 10000., 10000., 10000., 0., 0., 0., 0., 0., 0.], - [0., 10000., 10000., 10000., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., np.nan, 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., np.nan, 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., np.nan, 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., -10000., -10000., -10000.], - [0., 0., 0., 0., 0., 0., 0., -10000., -10000., -10000.], - [0., 0., 0., 0., 0., 0., 0., -10000., -10000., -10000.] + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., -10000., -10000., -10000., 0.], + [0., 0., 0., 0., 0., 0., -10000., -10000., -10000., 0.], + [0., 0., 0., 0., 0., 0., -10000., -10000., -10000., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.] ]) kernel = np.array([[0., 1., 0.], [1., 1., 1.], [0., 1., 0.]]) expected_result = np.array([ - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 90, 0, 0, 0, 0, 0, 0, 0], - [0, 90, 95, 90, 0, 0, 0, 0, 0, 0], - [0, 0, 90, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 99, 99, 99, 0, 0, 0, 0, 0, 0], + [0, 99, 99, 99, 0, 0, 0, 0, 0, 0], + [0, 99, 99, 99, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, -90, 0], - [0, 0, 0, 0, 0, 0, 0, -90, -95, 0], + [0, 0, 0, 0, 0, 0, -99, -99, -99, 0], + [0, 0, 0, 0, 0, 0, -99, -99, -99, 0], + [0, 0, 0, 0, 0, 0, -99, -99, -99, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ], dtype=np.int8) @@ -882,6 +885,137 @@ def test_hotspots_dask_cupy_matches_numpy(): numpy_hotspots.data[pad:-pad, pad:-pad]) +def _gistar_reference(data, kernel): + """Brute-force Getis-Ord Gi* z-scores for the interior cells. + + Mirrors the closed-form definition cell-by-cell so the production + code is checked against an independent computation, not against + itself. NaN cells (and out-of-raster neighbors) are dropped from the + neighborhood sums and the global statistics, matching hotspots(). + """ + data = data.astype(np.float64) + valid = ~np.isnan(data) + x = data[valid] + n = x.size + xbar = x.mean() + s = x.std() # population std (ddof=0) + rows, cols = data.shape + kr, kc = kernel.shape + pr, pc = kr // 2, kc // 2 + z = np.zeros(data.shape, dtype=np.float64) + # Only the interior is well defined for boundary='nan'; the outer ring + # is blanked by the convolution and classified as 0. + for i in range(pr, rows - pr): + for j in range(pc, cols - pc): + wx = w = w2 = 0.0 + for a in range(kr): + for b in range(kc): + ii, jj = i + a - pr, j + b - pc + if 0 <= ii < rows and 0 <= jj < cols and valid[ii, jj]: + weight = kernel[a, b] + wx += weight * data[ii, jj] + w += weight + w2 += weight * weight + var_term = (n * w2 - w * w) / (n - 1) + if var_term <= 0: + z[i, j] = 0.0 + else: + z[i, j] = (wx - xbar * w) / (s * np.sqrt(var_term)) + return z + + +def test_hotspots_gistar_zscore_matches_reference(): + # Validate the Gi* z-scores themselves (not just the classification) + # against a hand-rolled reference on a small known window. + from xrspatial.focal import _gistar_convolutions_numpy, _gistar_zscore + data = np.array([ + [1., 1., 1., 1., 1.], + [1., 9., 9., 1., 1.], + [1., 9., 9., 1., 1.], + [1., 1., 1., 1., 1.], + [1., 1., 1., 1., 1.], + ], dtype=np.float64) + kernel = np.array([[0., 1., 0.], [1., 1., 1.], [0., 1., 0.]]) + + d32 = data.astype(np.float32) + n = int((~np.isnan(d32)).sum()) + gm = np.float32(np.nanmean(d32)) + gs = np.float32(np.nanstd(d32)) + ws, wsum, sq = _gistar_convolutions_numpy(d32, kernel, 'nan') + z = _gistar_zscore(ws, wsum, sq, gm, gs, n) + + expected = _gistar_reference(data, kernel) + pad = kernel.shape[0] // 2 + np.testing.assert_allclose( + z[pad:-pad, pad:-pad], expected[pad:-pad, pad:-pad], rtol=1e-4) + # The 9-valued cluster center is a significant hot spot. + assert z[1, 1] == pytest.approx(2.9399, abs=1e-3) + + +def test_hotspots_gistar_weighted_kernel_matches_reference(): + # Non-binary kernel: weight sum and squared-weight sum diverge, which + # exercises the W2 term that the old normalized-mean code ignored. + rng = np.random.default_rng(7) + data = (rng.standard_normal((8, 9)) * 5).astype(np.float64) + kernel = np.array([[0., 2., 0.], [2., 3., 2.], [0., 2., 0.]]) + + result = hotspots(create_test_raster(data), kernel) + + from xrspatial.focal import _gistar_convolutions_numpy, _gistar_zscore + d32 = data.astype(np.float32) + n = int((~np.isnan(d32)).sum()) + gm = np.float32(np.nanmean(d32)) + gs = np.float32(np.nanstd(d32)) + ws, wsum, sq = _gistar_convolutions_numpy(d32, kernel, 'nan') + z = _gistar_zscore(ws, wsum, sq, gm, gs, n) + + expected_z = _gistar_reference(data, kernel) + pad = kernel.shape[0] // 2 + np.testing.assert_allclose( + z[pad:-pad, pad:-pad], expected_z[pad:-pad, pad:-pad], rtol=1e-3) + # Output is the classified raster; confirm it is the int8 banding. + assert result.data.dtype == np.int8 + assert set(np.unique(result.data)).issubset( + {-99, -95, -90, 0, 90, 95, 99}) + + +def test_hotspots_gistar_nan_excluded_from_stats(): + # NaN cells must not enter the neighborhood sums or the global mean/std. + data = np.zeros((7, 7), dtype=np.float64) + data[2:5, 2:5] = 50.0 + data[0, 0] = np.nan + data[6, 6] = np.nan + kernel = np.array([[0., 1., 0.], [1., 1., 1.], [0., 1., 0.]]) + + result = hotspots(create_test_raster(data), kernel) + assert not np.any(np.isnan(result.data)) + assert result.data[3, 3] > 0 # cluster center is a hot spot + + +def test_hotspots_single_valid_cell_raises(): + # n < 2 leaves the Gi* variance term undefined (divides by n - 1). + data = np.full((4, 4), np.nan, dtype=np.float64) + data[1, 1] = 5.0 + kernel = np.ones((3, 3)) + with pytest.raises(ValueError, match="at least 2 valid"): + hotspots(create_test_raster(data), kernel) + + +@dask_array_available +def test_hotspots_gistar_dask_matches_numpy_full(): + # With clusters kept off the outer ring, numpy and dask agree on every + # cell, so this is a full-array parity check of the Gi* dask path. + data = np.zeros((12, 12), dtype=np.float64) + data[3:6, 3:6] = 100.0 + data[7:10, 7:10] = -100.0 + kernel = np.array([[0., 1., 0.], [1., 1., 1.], [0., 1., 0.]]) + + numpy_res = hotspots(create_test_raster(data), kernel) + dask_res = hotspots( + create_test_raster(data, backend='dask+numpy', chunks=(6, 6)), kernel) + np.testing.assert_array_equal(numpy_res.data, dask_res.data.compute()) + + @dask_array_available def test_convolution_2d_boundary_modes(): data = np.random.default_rng(42).random((8, 10)).astype(np.float64) @@ -936,14 +1070,23 @@ def test_apply_boundary_invalid(): @dask_array_available -def test_hotspots_boundary_modes(): +@pytest.mark.parametrize("boundary", ['nan', 'nearest', 'reflect', 'wrap']) +def test_hotspots_boundary_modes(boundary): data = np.random.default_rng(42).standard_normal((10, 12)).astype(np.float64) kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], dtype=np.float64) numpy_agg = create_test_raster(data) dask_agg = create_test_raster(data, backend='dask+numpy') - from functools import partial - func = partial(hotspots, kernel=kernel) - assert_boundary_mode_correctness(numpy_agg, dask_agg, func, nan_edges=False) + numpy_res = hotspots(numpy_agg, kernel, boundary=boundary).data + dask_res = hotspots(dask_agg, kernel, boundary=boundary).data.compute() + assert numpy_res.shape == data.shape + if boundary == 'nan': + # The outer ring diverges: the single-array convolution blanks it + # while the dask map_overlap path computes a partial neighborhood. + # The Gi* statistic matches on every interior cell. + np.testing.assert_array_equal(numpy_res[1:-1, 1:-1], + dask_res[1:-1, 1:-1]) + else: + np.testing.assert_array_equal(numpy_res, dask_res) def test_hotspots_boundary_invalid(): @@ -1026,10 +1169,17 @@ def test_hotspots_boundary_numpy_equals_dask(boundary, size, chunks): kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], dtype=np.float64) numpy_agg = create_test_raster(data, backend='numpy') dask_agg = create_test_raster(data, backend='dask+numpy', chunks=chunks) - np_result = hotspots(numpy_agg, kernel, boundary=boundary) - da_result = hotspots(dask_agg, kernel, boundary=boundary) - np.testing.assert_allclose( - np_result.data, da_result.data.compute(), equal_nan=True, rtol=1e-5) + np_result = hotspots(numpy_agg, kernel, boundary=boundary).data + da_result = hotspots(dask_agg, kernel, boundary=boundary).data.compute() + if boundary == 'nan': + # boundary='nan' diverges on the outer ring (single-array blanking + # vs dask map_overlap partial neighborhoods); compare the interior. + np.testing.assert_allclose( + np_result[1:-1, 1:-1], da_result[1:-1, 1:-1], + equal_nan=True, rtol=1e-5) + else: + np.testing.assert_allclose( + np_result, da_result, equal_nan=True, rtol=1e-5) # --- cupy honours boundary (issue-2730) --- @@ -1289,8 +1439,14 @@ def test_hotspots_3d_dask(): dask_agg = xr.DataArray(dask_data, dims=['band', 'y', 'x']) dask_result = hotspots(dask_agg, kernel) assert dask_result.shape == (3, 10, 12) + # Compare the interior only: boundary='nan' diverges on the outer ring + # between the single-array convolution (blanks the ring) and the dask + # map_overlap path (NaN-pads the halo). The Gi* statistic itself is + # identical on every interior cell. + pad = kernel.shape[0] // 2 np.testing.assert_array_equal( - dask_result.data.compute(), numpy_result.data) + dask_result.data.compute()[:, pad:-pad, pad:-pad], + numpy_result.data[:, pad:-pad, pad:-pad]) # --- result .name consistency across backends (metadata sweep) ----------