From eccea5175361357167738afda6a6a36c339a4821 Mon Sep 17 00:00:00 2001 From: Walter Simson Date: Sun, 17 May 2026 03:05:04 +0000 Subject: [PATCH 1/5] Bump BINARY_VERSION to v1.4.0 (sm_120 / Blackwell) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidates the per-platform pins (v1.3.0, v1.3.1, v0.3.0rc3) into a single BINARY_VERSION used by all five mirror URLs. Picks up: - CUDA: sm_75;80;86;87;89;90;90a;100;120 (closes #656, #622 once verified on Blackwell hardware) - macOS OMP: linked against libhdf5.320 — current Homebrew ABI (likely closes #661 pending current-Homebrew smoke test) Also slots v0.6.2 (this release) into plans/release-strategy.md and bumps the downstream version slots, plus adds a v0.6.5 entry for the Intel Mac universal2 follow-up. Co-Authored-By: Claude Opus 4.7 (1M context) --- kwave/__init__.py | 6 ++-- plans/release-strategy.md | 63 +++++++++++++++++++++++++++++++++------ 2 files changed, 57 insertions(+), 12 deletions(-) diff --git a/kwave/__init__.py b/kwave/__init__.py index fafc6f837..8fc1a52c7 100644 --- a/kwave/__init__.py +++ b/kwave/__init__.py @@ -13,7 +13,7 @@ # Constants and Configurations URL_BASE = "https://github.com/waltsims/" -BINARY_VERSION = "v1.3.0" +BINARY_VERSION = "v1.4.0" PREFIX = f"{URL_BASE}kspaceFirstOrder-{{}}-{{}}/releases/download/{BINARY_VERSION}/" PLATFORM = platform.system().lower() @@ -53,12 +53,12 @@ def get_windows_release_urls(architecture: str) -> list: URL_DICT = { "linux": { - "cuda": [URL_BASE + f"kspaceFirstOrder-CUDA-{PLATFORM}/releases/download/v1.3.1/{EXECUTABLE_PREFIX}CUDA"], + "cuda": [URL_BASE + f"kspaceFirstOrder-CUDA-{PLATFORM}/releases/download/{BINARY_VERSION}/{EXECUTABLE_PREFIX}CUDA"], "omp": [URL_BASE + f"kspaceFirstOrder-OMP-{PLATFORM}/releases/download/{BINARY_VERSION}/{EXECUTABLE_PREFIX}OMP"], }, "darwin": { "cuda": [], - "omp": [URL_BASE + f"k-wave-omp-{PLATFORM}/releases/download/v0.3.0rc3/{EXECUTABLE_PREFIX}OMP"], + "omp": [URL_BASE + f"k-wave-omp-{PLATFORM}/releases/download/{BINARY_VERSION}/{EXECUTABLE_PREFIX}OMP"], }, "windows": {architecture: get_windows_release_urls(architecture) for architecture in ARCHITECTURES}, } diff --git a/plans/release-strategy.md b/plans/release-strategy.md index cf20a9224..687b3174b 100644 --- a/plans/release-strategy.md +++ b/plans/release-strategy.md @@ -11,8 +11,10 @@ This release strategy brings the unified solver architecture to fruition. | **0.5.0** | Finalize master/main | Stabilize current codebase | | **0.6.0** | Python Solver + Unified API + Deprecation | Python solver, `kspaceFirstOrder()` kwargs, Future warnings | | **0.6.1** | C-order + Examples + Docs | C-order migration, 29 examples ported, 47 parity tests, docs cleanup | -| **0.6.2** | Tier 2 Features + Examples | Time-reversal, rect sensors, sound_speed_ref, port Tier 2 examples | -| **0.6.3** | Axisymmetric Support | Axisymmetric solver in new API, port AS examples | +| **0.6.2** | Binary refresh (sm_120 / Blackwell) | Bump URL pins to upstream v1.4.0 binaries with NVIDIA Blackwell support; closes #656, #622 | +| **0.6.3** | Tier 2 Features + Examples | Time-reversal, rect sensors, sound_speed_ref, port Tier 2 examples | +| **0.6.4** | Axisymmetric Support | Axisymmetric solver in new API, port AS examples | +| **0.6.5** | Broader Darwin coverage | Universal2 (arm64 + x86_64) OpenMP binary; restores Intel Mac support | | **0.7.0** | CLI (`kwp`) | Command-line interface for running simulations | | **1.0.0** | Clean Release | Remove deprecated code. Simple, readable, fast. | | **2.0.0** | Performance & Scale | nanobind CUDA, MPI, Devito, multi-GPU | @@ -153,7 +155,30 @@ Combined release: C-order migration, example restructure, parity tests, docs cle - macOS C++ hint in executor.py (scoped to linker errors) - Deleted: Makefile, Dockerfile, run_examples.py, notebook pipeline, dead CI workflows -### v0.6.2 — Tier 2 Features + Examples +### v0.6.2 — Binary refresh (sm_120 / Blackwell) + +**Goal:** Ship `kwave/__init__.py` URL pins that point at the v1.4.0 upstream binaries (sm_120 / Blackwell support). This is a thin release: no Python-side code changes, just pin bumps and a CHANGELOG entry. + +**Background:** The unified build pipeline (`kspacefirstorder-unified` @ `02026d05`) produced 5 binary artifacts on 2026-05-16. CUDA archs now include `sm_75;80;86;87;89;90;90a;100;120`. The 5-mirror release flow is itself being retired — see waltsims/kspacefirstorder-unified#13 for the consolidation that obviates a manual runbook. + +**Release sequencing (do in order):** +1. **Tag `v1.4.0` on `kspacefirstorder-unified` @ `02026d05`** — provenance pointer ("this SHA produced the v1.4.0 binaries"). Diff vs current HEAD is doc-only. +2. **Tag `v1.4.0` on each of the 5 mirror repos** with the corresponding artifact: + - `kspaceFirstOrder-CUDA-linux` ← `kspaceFirstOrder-cuda-linux-13.0.0/kspaceFirstOrder-CUDA` + - `kspaceFirstOrder-CUDA-windows` ← `kspaceFirstOrder-cuda-windows-13.0.0/kspaceFirstOrder-CUDA.exe` + - `kspaceFirstOrder-OMP-linux` ← `kspaceFirstOrder-openmp-linux-ubuntu-latest/kspaceFirstOrder-OMP` + - `kspaceFirstOrder-OMP-windows` ← `kspaceFirstOrder-openmp-windows-windows-latest/kspaceFirstOrder-OMP.exe` + - `k-wave-omp-darwin` ← `kspaceFirstOrder-openmp-darwin-macos-latest/kspaceFirstOrder-OMP` (arm64-only — see v0.6.5) +3. In `kwave/__init__.py`, collapse the per-platform version pins (`v1.3.0`, `v1.3.1`, `v0.3.0rc3`) into a single `BINARY_VERSION = "v1.4.0"` used by all five URLs. Open the k-wave-python PR. +4. Verify on a real Blackwell GPU (cc @aconesac or Brno team for RTX 5070 Ti). +5. Close issues [#656](https://github.com/waltsims/k-wave-python/issues/656) and [#622](https://github.com/waltsims/k-wave-python/issues/622) on release. +6. **Bonus close for [#661](https://github.com/waltsims/k-wave-python/issues/661) (macOS HDF5 ABI):** the new darwin OMP binary links `libhdf5.320.dylib` (current Homebrew ABI) — verified with `strings` on the artifact. If a current-Homebrew macOS smoke test runs clean, close #661 referencing the v1.4.0 `k-wave-omp-darwin` release. + +**Out of scope:** Darwin x86_64 (Intel Mac) coverage — the v1.4.0 OMP-darwin binary is arm64-only, which is a regression vs. older Intel-era releases. Tracked separately in v0.6.5. + +--- + +### v0.6.3 — Tier 2 Features + Examples **Goal:** Add solver features needed by Tier 2 examples, port those examples. @@ -174,12 +199,30 @@ Combined release: C-order migration, example restructure, parity tests, docs cle - Remove unused MATLAB collector infrastructure if parity tests replace it - Audit `kWaveSimulation_helper/` — delete helpers superseded by `kspaceFirstOrder()` -### v0.6.3 — Axisymmetric Support +### v0.6.4 — Axisymmetric Support Axisymmetric = dimensionality reduction (3D→2D or 2D→1D). Not a separate solver — wrapper around `kspaceFirstOrder()` with radial symmetry terms added to the wave equation. --- +### v0.6.5 — Broader Darwin coverage (Intel + Apple Silicon) + +**Goal:** Restore Intel Mac support for the OpenMP binary. The v1.4.0 release shipped a Mach-O `arm64`-only OMP binary (`kspaceFirstOrder-OMP` in `k-wave-omp-darwin` v1.4.0), which excludes every Intel Mac. Pre-2020 hardware is still the majority of macOS users in academic settings, and even on newer Apple Silicon machines x86_64 coverage is useful for Rosetta-only third-party stacks. + +**Decision: ship a universal2 binary (arm64 + x86_64) — not two separate per-arch binaries.** Doubles the file size (~300 KB → ~600 KB, negligible) and avoids a per-arch download-selection step in `kwave/__init__.py`. The build flow already runs on `macos-latest` (Apple Silicon GitHub runners); switching to a universal build is a one-line CMake change plus a `libomp` install for both arches. + +**Tasks:** +1. **Build path:** Add `-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"` to the macOS leg of `ci-multi-platform.yml` in the unified repo, and ensure the Homebrew install step pulls a `libomp` that has both slices (Homebrew bottles are per-arch — may need `lipo` to combine, or build OMP from source). +2. **Verify:** `lipo -info kspaceFirstOrder-OMP` should report both `x86_64 arm64`. Smoke test on both an Apple Silicon Mac (native) and an Intel Mac (or `arch -x86_64` on AS via Rosetta). +3. **Release:** Tag `v1.4.1` (or whatever the next binary release is) on `k-wave-omp-darwin` with the universal binary, then bump `BINARY_VERSION` in `kwave/__init__.py`. +4. **No Python code changes** are needed — the existing `PLATFORM == "darwin"` branch in `kwave/__init__.py` doesn't distinguish architecture; a universal binary is consumed exactly the same way. + +**Future Mac topics (post-1.0, separate releases):** +- **CUDA on macOS:** Not feasible — Apple dropped NVIDIA driver support after macOS 10.13. The `darwin/cuda` slot in `URL_DICT` will stay empty indefinitely. +- **Metal / MPS backend for the Python solver:** Would unlock Apple Silicon GPU acceleration for the `backend="python"` path via something like `mlx` or PyTorch's MPS. Scope is the Python solver, not the C++ binary — best fit is the v2.x performance phase. + +--- + ## Phase 3: v1.0.0 - Clean Release **Goal:** Simple, readable, fast. Remove all deprecated code. @@ -298,8 +341,10 @@ result = kspaceFirstOrder(kgrid, medium, source, sensor, 1. ~~v0.5.0~~ ✅ Stabilize master 2. ~~v0.6.0~~ ✅ Python solver + unified API + deprecations 3. ~~v0.6.1~~ ✅ C-order + examples + docs cleanup -4. **Next:** v0.6.2 — Tier 2 features + examples -5. **Then:** v0.6.3 — Axisymmetric support -6. **Then:** v0.7.0 — CLI (`kwp`) -7. **Then:** v1.0.0 — Clean release (delete deprecated code) -8. **Post-1.0:** Performance & scale based on profiling +4. **Next:** v0.6.2 — Binary refresh (sm_120 / Blackwell) +5. **Then:** v0.6.3 — Tier 2 features + examples +6. **Then:** v0.6.4 — Axisymmetric support +7. **Then:** v0.6.5 — Broader Darwin coverage (universal2 OMP) +8. **Then:** v0.7.0 — CLI (`kwp`) +9. **Then:** v1.0.0 — Clean release (delete deprecated code) +10. **Post-1.0:** Performance & scale based on profiling From 6306d872a74557bd1715be4f51d3e5b91ec2a359 Mon Sep 17 00:00:00 2001 From: Walter Simson Date: Sun, 17 May 2026 03:46:37 +0000 Subject: [PATCH 2/5] Guard Intel Mac against arm64-only darwin binary The v1.4.0 k-wave-omp-darwin binary is Mach-O arm64 only. Without a guard, Intel Mac users would silently download a binary they can't execute and hit "exec format error" at runtime. On Intel Mac: emit a RuntimeWarning at import explaining the constraint, and skip the darwin/omp URL so we don't waste bandwidth downloading a useless binary. backend="python" continues to work. Universal2 (arm64+x86_64) coverage is tracked for v0.6.5. Co-Authored-By: Claude Opus 4.7 (1M context) --- kwave/__init__.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/kwave/__init__.py b/kwave/__init__.py index 8fc1a52c7..9ae1e7085 100644 --- a/kwave/__init__.py +++ b/kwave/__init__.py @@ -3,6 +3,7 @@ import logging import os import platform +import warnings from pathlib import Path from typing import List from urllib.request import urlretrieve @@ -20,6 +21,18 @@ if PLATFORM not in ["linux", "windows", "darwin"]: raise NotImplementedError(f"k-wave-python is currently unsupported on this operating system: {PLATFORM}.") +# darwin C++ binary is arm64-only; universal2 coverage tracked for v0.6.5 +DARWIN_BINARY_ARCH = "arm64" +_darwin_unsupported = PLATFORM == "darwin" and platform.machine() != DARWIN_BINARY_ARCH +if _darwin_unsupported: + warnings.warn( + f"k-wave-python's macOS C++ binary is {DARWIN_BINARY_ARCH}-only. " + f"Detected {platform.machine()} — the C++ backend (backend='cpp') will not run on this machine. " + "Use backend='python' instead. Universal2 (Intel + Apple Silicon) coverage is tracked for v0.6.5.", + RuntimeWarning, + stacklevel=2, + ) + # TODO: install directly in to /bin/ directory system directory is no longer needed # TODO: deprecate in 0.5.0 BINARY_PATH = Path(__file__).parent / "bin" / PLATFORM @@ -58,7 +71,11 @@ def get_windows_release_urls(architecture: str) -> list: }, "darwin": { "cuda": [], - "omp": [URL_BASE + f"k-wave-omp-{PLATFORM}/releases/download/{BINARY_VERSION}/{EXECUTABLE_PREFIX}OMP"], + "omp": ( + [] + if _darwin_unsupported + else [URL_BASE + f"k-wave-omp-{PLATFORM}/releases/download/{BINARY_VERSION}/{EXECUTABLE_PREFIX}OMP"] + ), }, "windows": {architecture: get_windows_release_urls(architecture) for architecture in ARCHITECTURES}, } From 7f13647983913acdac791ac72d30845599ccee8d Mon Sep 17 00:00:00 2001 From: Walter Simson Date: Sun, 17 May 2026 04:02:44 +0000 Subject: [PATCH 3/5] Pin windows OMP to v1.3.0 (new v1.4.0 build has different DLL deps) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The v1.4.0 OMP-windows build switched compiler/OpenMP/FFT stack (Intel compiler + Intel OpenMP + ? → MSVC + VCOMP + FFTW) and needs runtime DLLs that aren't packaged with the release: needed but not shipped: fftw3f.dll, VCOMP140.DLL, VCRUNTIME140_1.dll shipped but unneeded: cufft64_10.dll, libiomp5md.dll, libmmd.dll, svml_dispmd.dll Windows OMP doesn't benefit from the v1.4.0 Blackwell changes anyway (CUDA-only), so route it back to the working v1.3.0 binary until the build is fixed in kspacefirstorder-unified#14. CUDA-windows, all linux binaries, and darwin OMP continue to use v1.4.0. Co-Authored-By: Claude Opus 4.7 (1M context) --- kwave/__init__.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kwave/__init__.py b/kwave/__init__.py index 9ae1e7085..d16cec1ff 100644 --- a/kwave/__init__.py +++ b/kwave/__init__.py @@ -15,7 +15,10 @@ # Constants and Configurations URL_BASE = "https://github.com/waltsims/" BINARY_VERSION = "v1.4.0" -PREFIX = f"{URL_BASE}kspaceFirstOrder-{{}}-{{}}/releases/download/{BINARY_VERSION}/" +# Windows OMP build switched compiler/OpenMP/FFT stack in v1.4.0 and now needs different +# runtime DLLs than v1.3.0 ships; pin windows OMP to v1.3.0 until the build packages its +# own DLLs (or links statically). Tracked in kspacefirstorder-unified#14. +WINDOWS_OMP_VERSION = "v1.3.0" PLATFORM = platform.system().lower() if PLATFORM not in ["linux", "windows", "darwin"]: @@ -57,11 +60,12 @@ def get_windows_release_urls(architecture: str) -> list: + version = WINDOWS_OMP_VERSION if architecture == "omp" else BINARY_VERSION specific_filenames = [EXECUTABLE_PREFIX + architecture + ".exe"] if architecture == "omp": specific_filenames += WINDOWS_DLLS - release_urls = [PREFIX.format(architecture.upper(), PLATFORM.lower()) + filename for filename in specific_filenames] - return release_urls + base = f"{URL_BASE}kspaceFirstOrder-{architecture.upper()}-{PLATFORM.lower()}/releases/download/{version}/" + return [base + filename for filename in specific_filenames] URL_DICT = { From 3963e80a8585e849badad31926d58a3803493806 Mon Sep 17 00:00:00 2001 From: Walter Simson Date: Mon, 18 May 2026 00:07:08 +0000 Subject: [PATCH 4/5] Bump BINARY_VERSION to v1.4.1 + version to 0.6.2 Linux + macOS pull from the rebuilt v1.4.1 mirror releases: - Linux binaries are now statically linked (CUDA + cufft + FFTW + libstdc++) and built on ubuntu-22.04 (glibc 2.35 floor). Restores the plug-and-play property the legacy Makefile build provided; permanent regression guard via check-linux-binary-deps.sh in unified. - Darwin binary picks up the fast-math fix (k-wave-omp-darwin#4) and the libhdf5.320 ABI refresh (closes #661). Windows stays pinned to v1.3.0 for both OMP and CUDA. v1.4.x windows releases don't bundle their runtime DLLs (different stacks for both flavors). The v1.4.x OMP DLL bundling is fixed in kspacefirstorder-unified#14 (awaiting production validation); CUDA DLL bundling is tracked separately in kspacefirstorder-unified#17. Pin gets flipped in v0.6.3 once both windows flavors are validated. Picks up via this bump: - #656, #622 (Blackwell sm_120 on Linux + macOS) - #661 (macOS HDF5 ABI) - kspacefirstorder-unified#15 (Linux binary regression) Closes #738 --- kwave/__init__.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/kwave/__init__.py b/kwave/__init__.py index 27204db0a..31a7f7636 100644 --- a/kwave/__init__.py +++ b/kwave/__init__.py @@ -11,15 +11,20 @@ # Test installation with: # python3 -m pip install -i https://test.pypi.org/simple/ --extra-index-url=https://pypi.org/simple/ k-Wave-python==0.3.0 -__version__ = "0.6.1" +__version__ = "0.6.2" # Constants and Configurations URL_BASE = "https://github.com/waltsims/" -BINARY_VERSION = "v1.4.0" -# Windows OMP build switched compiler/OpenMP/FFT stack in v1.4.0 and now needs different -# runtime DLLs than v1.3.0 ships; pin windows OMP to v1.3.0 until the build packages its -# own DLLs (or links statically). Tracked in kspacefirstorder-unified#14. +BINARY_VERSION = "v1.4.1" +# Pin both Windows binaries to v1.3.0. v1.4.x windows builds switched compiler / +# OpenMP / FFT / CUDA runtime stacks and neither v1.4.x release ships its runtime +# DLLs (cufft, cudart, vcomp, vcruntime140_1, fftw3f, etc.). v1.3.0 binaries are +# self-contained with their Intel-era DLL bundle (listed in WINDOWS_DLLS below, +# downloaded with the OMP request and used by both .exe files since they share +# kwave/bin/windows/). OMP DLL bundling is fixed in kspacefirstorder-unified#14 +# (awaiting validation); CUDA DLL bundling is tracked in kspacefirstorder-unified#17. WINDOWS_OMP_VERSION = "v1.3.0" +WINDOWS_CUDA_VERSION = "v1.3.0" PLATFORM = platform.system().lower() if PLATFORM not in ["linux", "windows", "darwin"]: @@ -61,7 +66,7 @@ def get_windows_release_urls(architecture: str) -> list: - version = WINDOWS_OMP_VERSION if architecture == "omp" else BINARY_VERSION + version = WINDOWS_OMP_VERSION if architecture == "omp" else WINDOWS_CUDA_VERSION specific_filenames = [EXECUTABLE_PREFIX + architecture + ".exe"] if architecture == "omp": specific_filenames += WINDOWS_DLLS @@ -77,9 +82,7 @@ def get_windows_release_urls(architecture: str) -> list: "darwin": { "cuda": [], "omp": ( - [] - if _darwin_unsupported - else [URL_BASE + f"k-wave-omp-{PLATFORM}/releases/download/{BINARY_VERSION}/{EXECUTABLE_PREFIX}OMP"] + [] if _darwin_unsupported else [URL_BASE + f"k-wave-omp-{PLATFORM}/releases/download/{BINARY_VERSION}/{EXECUTABLE_PREFIX}OMP"] ), }, "windows": {architecture: get_windows_release_urls(architecture) for architecture in ARCHITECTURES}, From 92fd493abab813dfe57a5763528ed36790867564 Mon Sep 17 00:00:00 2001 From: Walter Simson Date: Mon, 18 May 2026 00:53:11 +0000 Subject: [PATCH 5/5] Update release strategy with v0.6.2 retrospective + carry-overs - v0.6.2 section rewritten as retrospective: v1.4.1 (not v1.4.0), Linux static linking, darwin fast-math fix, Windows pinned to v1.3.0 - v0.6.3 picks up the Windows pin flips (OMP validation + CUDA DLL packaging fix per unified#17) as carry-over items - New "Binary distribution maintenance" section captures the version-less pipeline work: mirror consolidation (unified#13) + Windows static linking (v1.5 follow-up) --- plans/release-strategy.md | 48 +++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/plans/release-strategy.md b/plans/release-strategy.md index 687b3174b..75c76094d 100644 --- a/plans/release-strategy.md +++ b/plans/release-strategy.md @@ -157,30 +157,30 @@ Combined release: C-order migration, example restructure, parity tests, docs cle ### v0.6.2 — Binary refresh (sm_120 / Blackwell) -**Goal:** Ship `kwave/__init__.py` URL pins that point at the v1.4.0 upstream binaries (sm_120 / Blackwell support). This is a thin release: no Python-side code changes, just pin bumps and a CHANGELOG entry. +**Goal:** Ship `kwave/__init__.py` URL pins that point at fixed upstream binaries with sm_120 / Blackwell support, the macOS HDF5 ABI refresh, and the macOS fast-math fix. -**Background:** The unified build pipeline (`kspacefirstorder-unified` @ `02026d05`) produced 5 binary artifacts on 2026-05-16. CUDA archs now include `sm_75;80;86;87;89;90;90a;100;120`. The 5-mirror release flow is itself being retired — see waltsims/kspacefirstorder-unified#13 for the consolidation that obviates a manual runbook. +**What actually shipped (retrospective):** v1.4.0 had to be revoked due to packaging regressions and a numerical bug. v0.6.2 ships against **v1.4.1** instead. Summary: -**Release sequencing (do in order):** -1. **Tag `v1.4.0` on `kspacefirstorder-unified` @ `02026d05`** — provenance pointer ("this SHA produced the v1.4.0 binaries"). Diff vs current HEAD is doc-only. -2. **Tag `v1.4.0` on each of the 5 mirror repos** with the corresponding artifact: - - `kspaceFirstOrder-CUDA-linux` ← `kspaceFirstOrder-cuda-linux-13.0.0/kspaceFirstOrder-CUDA` - - `kspaceFirstOrder-CUDA-windows` ← `kspaceFirstOrder-cuda-windows-13.0.0/kspaceFirstOrder-CUDA.exe` - - `kspaceFirstOrder-OMP-linux` ← `kspaceFirstOrder-openmp-linux-ubuntu-latest/kspaceFirstOrder-OMP` - - `kspaceFirstOrder-OMP-windows` ← `kspaceFirstOrder-openmp-windows-windows-latest/kspaceFirstOrder-OMP.exe` - - `k-wave-omp-darwin` ← `kspaceFirstOrder-openmp-darwin-macos-latest/kspaceFirstOrder-OMP` (arm64-only — see v0.6.5) -3. In `kwave/__init__.py`, collapse the per-platform version pins (`v1.3.0`, `v1.3.1`, `v0.3.0rc3`) into a single `BINARY_VERSION = "v1.4.0"` used by all five URLs. Open the k-wave-python PR. -4. Verify on a real Blackwell GPU (cc @aconesac or Brno team for RTX 5070 Ti). -5. Close issues [#656](https://github.com/waltsims/k-wave-python/issues/656) and [#622](https://github.com/waltsims/k-wave-python/issues/622) on release. -6. **Bonus close for [#661](https://github.com/waltsims/k-wave-python/issues/661) (macOS HDF5 ABI):** the new darwin OMP binary links `libhdf5.320.dylib` (current Homebrew ABI) — verified with `strings` on the artifact. If a current-Homebrew macOS smoke test runs clean, close #661 referencing the v1.4.0 `k-wave-omp-darwin` release. +- **Linux binaries** — `v1.4.1` rebuilt with static CUDA + cufft + FFTW + libstdc++ on ubuntu-22.04 (glibc 2.35 floor), restoring the plug-and-play property the legacy Makefile build had. Permanent regression guard added via `scripts/check-linux-binary-deps.sh` in the unified repo. Fixed in [kspacefirstorder-unified#15](https://github.com/waltsims/kspacefirstorder-unified/issues/15) and [#16](https://github.com/waltsims/kspacefirstorder-unified/pull/16). +- **macOS OMP** — `v1.4.1` picks up the fast-math fix ([k-wave-omp-darwin#4](https://github.com/waltsims/k-wave-omp-darwin/pull/4)) that prevents NaNs in absorbing-media simulations on arm64. ABI refresh (`libhdf5.320.dylib`) preserved from v1.4.0; closes [#661](https://github.com/waltsims/k-wave-python/issues/661). +- **Windows OMP + CUDA** — both pinned to **v1.3.0** for this release. v1.4.x windows builds switched compiler/OpenMP/FFT/CUDA-runtime stacks but neither bundles its runtime DLLs. v1.3.0 binaries are self-contained with the Intel-era DLL bundle (shipped via `WINDOWS_DLLS` with the OMP request and used by both `.exe` files since they share `kwave/bin/windows/`). OMP DLL bundling fix landed in [kspacefirstorder-unified#14](https://github.com/waltsims/kspacefirstorder-unified/issues/14) (awaiting production validation); CUDA bundling tracked in [kspacefirstorder-unified#17](https://github.com/waltsims/kspacefirstorder-unified/issues/17). Pin gets flipped in v0.6.3 once both windows flavors are validated end-to-end. +- **Chmod fix** — independent fix for `download_binaries` not setting the exec bit on Linux/macOS, surfaced during v0.6.2 validation on Colab ([#741](https://github.com/waltsims/k-wave-python/pull/741)). +- **Intel Mac guard** — `kwave/__init__.py` now emits a `RuntimeWarning` and skips the darwin OMP download on `darwin` x86_64, since the v1.4.x darwin binary is arm64-only. Universal2 coverage remains tracked in v0.6.5. -**Out of scope:** Darwin x86_64 (Intel Mac) coverage — the v1.4.0 OMP-darwin binary is arm64-only, which is a regression vs. older Intel-era releases. Tracked separately in v0.6.5. +**Closes:** [#622](https://github.com/waltsims/k-wave-python/issues/622), [#656](https://github.com/waltsims/k-wave-python/issues/656), [#661](https://github.com/waltsims/k-wave-python/issues/661), [#738](https://github.com/waltsims/k-wave-python/issues/738), [#740](https://github.com/waltsims/k-wave-python/issues/740). + +**Out of scope:** Darwin x86_64 (Intel Mac) coverage — v1.4.x OMP-darwin is arm64-only. Tracked in v0.6.5. --- -### v0.6.3 — Tier 2 Features + Examples +### v0.6.3 — Tier 2 Features + Examples + Windows binary pin flips + +**Goal:** Add solver features needed by Tier 2 examples, port those examples, and flip the Windows binary pins to v1.4.x once both flavors are validated. -**Goal:** Add solver features needed by Tier 2 examples, port those examples. +**Windows binary pin flips (carried over from v0.6.2):** +- **Validate `v1.4.1` windows-OMP** end-to-end on a Windows machine (smoke + small simulation). Bundle landed in [kspacefirstorder-unified#14](https://github.com/waltsims/kspacefirstorder-unified/issues/14); CI smoke passes; runtime sim not yet confirmed. +- **Fix `v1.4.x` windows-CUDA DLL packaging** in [kspacefirstorder-unified#17](https://github.com/waltsims/kspacefirstorder-unified/issues/17) — same DLL-bundling pattern as windows-omp, plus CUDA runtime DLLs (`cudart64_*.dll`, `cufft64_*.dll`) from `$env:CUDA_PATH\bin`. +- Once both validated: drop `WINDOWS_OMP_VERSION` + `WINDOWS_CUDA_VERSION` pins in `kwave/__init__.py` (both use `BINARY_VERSION`). Likely also: refactor `WINDOWS_DLLS` into per-architecture lists since OMP and CUDA need slightly different MSVC redist + runtime deps. **Features to add:** - **Time-reversal reconstruction** — needed by PR examples (`pr_2D_TR_*`, `pr_3D_TR_*`) @@ -223,6 +223,20 @@ Axisymmetric = dimensionality reduction (3D→2D or 2D→1D). Not a separate sol --- +## Binary distribution maintenance (no fixed version) + +Work on the upstream binary side that doesn't need its own k-wave-python release — it gets picked up by whatever version bump happens to be in flight. + +### Mirror consolidation (kspacefirstorder-unified#13) + +Collapse the 5 per-platform mirror repos (`kspaceFirstOrder-{CUDA,OMP}-{linux,windows}` + `k-wave-omp-darwin`) into the unified repo. Each cross-cutting change currently lands ~2-3× (proven by v1.4.0 / v1.4.1 taking ~12 PRs across repos); the consolidation retires that overhead. Cross-linked from k-wave-python#738 — flip `URL_DICT` to point at unified once the consolidation lands. Best fit for v1.5.0 or alongside whichever release first benefits. + +### Static-link Windows binaries (v1.5 follow-up) + +The v0.6.2 ship used the **bundle** approach for Windows OMP (DLLs alongside the .exe) because static-linking on Windows MSVC is more complex (`/MT` for CRT, vcpkg static port for FFTW, `VCOMP140` static linking is awkward, may need LLVM OpenMP). The Linux fix in unified#15 already uses static linking; Windows should follow once the broader v1.5 static-linking discipline is in scope. Track alongside mirror consolidation since both are "tidy up the binary pipeline" work. + +--- + ## Phase 3: v1.0.0 - Clean Release **Goal:** Simple, readable, fast. Remove all deprecated code.