diff --git a/.claude/sweep-accuracy-state.csv b/.claude/sweep-accuracy-state.csv index 398a068d..a2061c04 100644 --- a/.claude/sweep-accuracy-state.csv +++ b/.claude/sweep-accuracy-state.csv @@ -24,7 +24,7 @@ normalize,2026-05-01,,,,rescale and standardize across all 4 backends. NaN/inf f perlin,2026-04-10T12:00:00Z,,,,Improved Perlin noise implementation correct. Fade/gradient functions verified. Backend-consistent. Continuous at cell boundaries. polygon_clip,2026-04-13T12:00:00Z,1197,,,crop=True + all_touched=True drops boundary pixels. Fix in PR #1200. polygonize,2026-05-29,2606,HIGH,5,"Cat 5 HIGH: dask connectivity=8 cross-chunk merge filled diagonal notch where same-value regions meet only at a corner across a chunk boundary; total area exceeded raster. Hole ring was dropped because containment tested hole[0] (on exterior at pinch). Fixed via _ring_interior_point in PR for #2606. numpy, dask+numpy, dask+cupy area parity now holds; 4-conn was already correct. cupy + dask+cupy paths validated on GPU host. Other cats clean: NaN masked on numpy/cupy float paths (tested), _is_close handles +/-inf via exact-equality short-circuit, atol/rtol/simplify_tolerance reject NaN/inf, integer GPU CCL matches numpy." -proximity,2026-03-30T15:00:00Z,,,,Direction >= boundary fragile but works due to truncated constant. Float32 truncation is design choice. No wrong-results bugs found. +proximity,2026-05-29,2721,MEDIUM,4;5,Bounded GREAT_CIRCLE on dask (both numpy+cupy) raised ValueError: map_overlap pad depth = max_distance/cellsize mixed metre distance with degree cellsize. numpy/cupy backends fine. Fixed by measuring per-pixel pitch with active metric (PR #2722). Cat1 float32 output is documented design choice; NaN/Inf masking via np.isfinite consistent; numpy GDAL-sweep matches exact nearest and cupy brute-force on tested grids. reproject,2026-05-29,2620,HIGH,5,"Cat5 backend inconsistency: cupy _resample_cupy (cupyx map_coordinates) diverged from numpy/native on pyproj-fallback CRS pairs (projected->projected, e.g. EPSG:32633->3857). Edge-band cval=0.0 bleed (all modes, ~534/pixel) + cubic B-spline vs Catmull-Rom (~0.45 interior). Fixed PR for #2620: route eager+dask cupy through _resample_cupy_native. Other files clean: _merge numpy/cupy structurally identical; _datum_grids/_vertical/_itrf use -0.5 pixel-center interp and self-inequality NaN checks; WGS84/GRS80 constants correct; curvature correction n/a (no geodesic gradient here). LOW (not fixed): _transform._bilinear_interp_2ch docstring claims parallel but isn't." resample,2026-05-29,2610,HIGH,3;5,"dask interp (nearest/bilinear) overlap depth=1 too small on downsample; block-centered source coord landed past chunk, map_coordinates clamped to edge -> wrong seam rows. Fixed PR #2627 via per-axis _downsample_radius. cupy+dask+cupy verified." sieve,2026-04-13T12:00:00Z,,,,Union-find CCL correct. NaN excluded from labeling. All backends funnel through _sieve_numpy. diff --git a/xrspatial/proximity.py b/xrspatial/proximity.py index b9b0e3c9..bb963d6b 100644 --- a/xrspatial/proximity.py +++ b/xrspatial/proximity.py @@ -27,7 +27,7 @@ class cupy(object): from xrspatial.pathfinding import _available_memory_bytes from xrspatial.utils import ( _validate_raster, - cuda_args, get_dataarray_resolution, has_cuda_and_cupy, + cuda_args, has_cuda_and_cupy, is_cupy_array, is_dask_cupy, ngjit, ) from xrspatial.dataset_support import supports_dataset @@ -426,16 +426,25 @@ def _process_dask_cupy(raster, x_coords, y_coords, target_values, max_distance, distance_metric, process_mode): """Dask+CuPy bounded proximity via map_overlap with per-chunk GPU kernel. - Each chunk (plus overlap padding of ``max_distance / cellsize`` pixels) - is processed on GPU independently. Only valid for finite max_distance + Each chunk (plus an overlap padding of ``max_distance`` converted to + pixels using the active distance metric) is processed on GPU + independently. Only valid for finite max_distance where the padding guarantees all relevant targets are visible within each overlapped chunk. """ import cupy as cp - cellsize_x, cellsize_y = get_dataarray_resolution(raster) - pad_y = int(max_distance / abs(cellsize_y) + 0.5) - pad_x = int(max_distance / abs(cellsize_x) + 0.5) + # Overlap depth in pixels, measured with the active distance_metric so + # that GREAT_CIRCLE (max_distance in metres) does not divide by a degree + # cellsize. See _process_dask for the same conversion. + dist_per_row = _distance( + x_coords[0], x_coords[0], + y_coords[0], y_coords[1], distance_metric) + dist_per_col = _distance( + x_coords[0], x_coords[1], + y_coords[0], y_coords[0], distance_metric) + pad_y = int(max_distance / dist_per_row + 0.5) + pad_x = int(max_distance / dist_per_col + 0.5) # Build 2D coordinate grids as dask+cupy arrays matching raster chunks. # Each chunk is small (chunk_h x chunk_w x 8 bytes); the full grid is @@ -1237,10 +1246,19 @@ def _process_dask(raster, xs, ys): ys = ys.rechunk({0: height, 1: width}) pad_y = pad_x = 0 else: - cellsize_x, cellsize_y = get_dataarray_resolution(raster) - # calculate padding for each chunk - pad_y = int(max_distance / cellsize_y + 0.5) - pad_x = int(max_distance / cellsize_x + 0.5) + # Convert max_distance to a per-chunk overlap depth in pixels. + # max_distance is expressed in the same unit as the chosen + # distance_metric, so the pixel pitch must be measured with that + # same metric. Using the raw degree cellsize for GREAT_CIRCLE + # (where max_distance is in metres) yields a meaningless depth. + dist_per_row = _distance( + x_coords[0], x_coords[0], + y_coords[0], y_coords[1], distance_metric) + dist_per_col = _distance( + x_coords[0], x_coords[1], + y_coords[0], y_coords[0], distance_metric) + pad_y = int(max_distance / dist_per_row + 0.5) + pad_x = int(max_distance / dist_per_col + 0.5) out = da.map_overlap( _process_numpy, diff --git a/xrspatial/tests/test_proximity.py b/xrspatial/tests/test_proximity.py index b3dedf87..d8de98d0 100644 --- a/xrspatial/tests/test_proximity.py +++ b/xrspatial/tests/test_proximity.py @@ -1074,6 +1074,43 @@ def test_no_scipy_dask_unbounded_memory_guard(): prox_mod.cKDTree = original_ckdtree +@pytest.mark.skipif(da is None, reason="dask is not installed") +@pytest.mark.parametrize("func", [proximity, allocation, direction]) +def test_great_circle_dask_bounded_matches_numpy(func): + """Bounded GREAT_CIRCLE on a dask raster must match the numpy result. + + Regression test: the map_overlap padding used the raw degree cellsize + while max_distance is in metres for GREAT_CIRCLE, producing an overlap + depth of hundreds of thousands of pixels and raising ValueError on + valid input. numpy and cupy backends handled the same call fine. + """ + height, width = 16, 24 + rng = np.random.RandomState(7) + data = (rng.rand(height, width) < 0.06).astype(np.float64) + _lon = np.linspace(-20, 20, width) + _lat = np.linspace(20, -20, height) + raster = xr.DataArray(data, dims=['lat', 'lon']) + raster['lon'] = _lon + raster['lat'] = _lat + + # bounded, smaller than the corner-to-corner great-circle distance + max_dist = 1.5e6 + numpy_result = func(raster, x='lon', y='lat', + max_distance=max_dist, + distance_metric='GREAT_CIRCLE') + + dask_raster = raster.copy() + dask_raster.data = da.from_array(data, chunks=(8, 12)) + dask_result = func(dask_raster, x='lon', y='lat', + max_distance=max_dist, + distance_metric='GREAT_CIRCLE') + + assert isinstance(dask_result.data, da.Array) + np.testing.assert_allclose( + dask_result.values, numpy_result.values, rtol=1e-5, equal_nan=True, + ) + + # --------------------------------------------------------------------------- # Coverage gaps closed for issue #2692 (test-only; no source change). #