diff --git a/.gitignore b/.gitignore index b773865..9468ae8 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ mkl_fft/_pydfti.c mkl_fft/_pydfti.cpython*.so mkl_fft/_pydfti.*-win_amd64.pyd mkl_fft/src/mklfft.c + +# ASV benchmark artifacts +.asv/ diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000..6370fdb --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,67 @@ +# mkl_fft ASV Benchmarks + +Performance benchmarks for [mkl_fft](https://github.com/IntelPython/mkl_fft) using +[Airspeed Velocity (ASV)](https://asv.readthedocs.io/en/stable/). + +### Coverage + +| File | API | Transforms | Dtypes | Sizes/Shapes | +|------|-----|-----------|--------|-------------| +| `bench_fft1d.py` | `mkl_fft` | `fft`, `ifft`, `rfft`, `irfft` | float32, float64, complex64, complex128 | power-of-two and non-power-of-two | +| `bench_fftnd.py` | `mkl_fft` | `fft2`, `ifft2`, `rfft2`, `irfft2`, `fftn`, `ifftn`, `rfftn`, `irfftn` | float32, float64, complex64, complex128 | square and non-square/non-cubic | +| `bench_interfaces.py` | `mkl_fft.interfaces.{numpy_fft, scipy_fft}` | All exported functions; selected by a `module` parameter. Hermitian 2-D/N-D (`hfft2`, `hfftn`) are scipy-only. | float32, float64, complex64, complex128 | power-of-two and cubic | +| `bench_memory.py` | `mkl_fft` | Peak RSS for 1-D, 2-D, and 3-D transforms | float32, float64, complex128 | power-of-two | + +## Threading + +Set `MKL_NUM_THREADS` in the environment before running ASV to control the +thread count used by MKL: + +```bash +MKL_NUM_THREADS=8 asv run --python=same --quick HEAD^! +``` + +If `MKL_NUM_THREADS` is not set, `__init__.py` applies a default: **4** threads +when the machine has 4 or more physical cores, or **1** (single-threaded) +otherwise. This keeps results comparable across CI machines in the shared pool +regardless of their total core count. Physical cores are detected via +`psutil.cpu_count(logical=False)` — hyperthreads are excluded per MKL +recommendation. + +## Notes on Measurement + +### DFTI descriptor warmup + +MKL creates a DFTI descriptor on the first FFT call for a given (size, dtype, +strides) combination and reuses it on subsequent calls. To avoid charging +that one-time cost to the first measured iteration, each benchmark's `setup` +performs an explicit warmup call after preparing the input array. ASV's +default `warmup_time` (0.1s) already amortizes this for sub-millisecond +transforms, but the explicit warmup makes the intent visible. + +## Running Benchmarks + +Prerequisites: + +```bash +pip install asv psutil +``` + +Run benchmarks against the current environment: + +```bash +asv run --python=same --quick HEAD^! +``` + +Compare two commits: + +```bash +asv continuous --python=same HEAD~1 HEAD +``` + +View results in a browser: + +```bash +asv publish +asv preview +``` diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json new file mode 100644 index 0000000..1e26be0 --- /dev/null +++ b/benchmarks/asv.conf.json @@ -0,0 +1,24 @@ +{ + "version": 1, + "project": "mkl_fft", + "project_url": "https://github.com/IntelPython/mkl_fft", + "show_commit_url": "https://github.com/IntelPython/mkl_fft/commit/", + "repo": "..", + "branches": [ + "master" + ], + "environment_type": "conda", + "conda_channels": [ + "https://software.repos.intel.com/python/conda/", + "conda-forge" + ], + "benchmark_dir": "benchmarks", + "env_dir": ".asv/env", + "results_dir": ".asv/results", + "html_dir": ".asv/html", + "build_cache_size": 2, + "default_benchmark_timeout": 500, + "regressions_thresholds": { + ".*": 0.3 + } +} diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py new file mode 100644 index 0000000..9b89c10 --- /dev/null +++ b/benchmarks/benchmarks/__init__.py @@ -0,0 +1,21 @@ +"""ASV benchmarks for mkl_fft""" + +import os + +import psutil + +_MIN_THREADS = 4 # minimum physical cores required for multi-threaded mode + + +def _physical_cores(): + """Return physical core count; fall back to 1 (conservative).""" + return psutil.cpu_count(logical=False) or 1 + + +def _thread_count(): + physical = _physical_cores() + return str(_MIN_THREADS) if physical >= _MIN_THREADS else "1" + + +_THREADS = os.environ.get("MKL_NUM_THREADS", _thread_count()) +os.environ["MKL_NUM_THREADS"] = _THREADS diff --git a/benchmarks/benchmarks/_utils.py b/benchmarks/benchmarks/_utils.py new file mode 100644 index 0000000..d209dbb --- /dev/null +++ b/benchmarks/benchmarks/_utils.py @@ -0,0 +1,73 @@ +"""Shared utilities for mkl_fft benchmarks.""" + +import numpy as np + +_RNG_SEED = 42 + + +def _make_input(rng, shape, dtype): + """Return an array of *shape* and *dtype*. + + Complex dtypes get non-zero imaginary parts for a realistic signal. + `shape` may be an int (1-D) or a tuple. + """ + dt = np.dtype(dtype) + s = (shape,) if isinstance(shape, int) else shape + if dt.kind == "c": + return (rng.standard_normal(s) + 1j * rng.standard_normal(s)).astype(dt) + return rng.standard_normal(s).astype(dt) + + +class BenchC2C: + """Base setup for complex-to-complex benchmarks. + + Subclasses define params, param_names, and time_* / peakmem_* methods. + Other positional params are ignored. + """ + + def setup(self, shape, dtype, *_): + rng = np.random.default_rng(_RNG_SEED) + self.x = _make_input(rng, shape, dtype) + + +# dtype axes +_DTYPES_ALL = ["float32", "float64", "complex64", "complex128"] +_DTYPES_REAL = ["float32", "float64"] +_DTYPES_REDUCED = ["float64", "complex128"] + +# shape/size axes shared across multiple files +_SHAPES_2D = [(64, 64), (128, 128), (256, 256), (512, 512)] +_SHAPES_2D_IFACE = [(64, 64), (256, 256), (512, 512)] +_SHAPES_3D = [(16, 16, 16), (32, 32, 32), (64, 64, 64)] + + +class BenchR2C: + """Base setup for real-to-complex / complex-to-real and Hermitian benchmarks. + + Prepares: + self.x_real — real array of full shape (rfft / ihfft input) + self.x_complex — complex half-spectrum array (irfft / hfft input) + + DC (index 0 of the last axis) of x_complex has its imaginary part zeroed, + and when the full last-axis length is even the Nyquist bin imaginary part + is also zeroed, satisfying Hermitian symmetry expected by hfft / ihfft2 / + hfftn. Extra positional params are accepted and ignored. + """ + + def setup(self, shape, dtype, *_): + rng = np.random.default_rng(_RNG_SEED) + cdtype = "complex64" if dtype == "float32" else "complex128" + if isinstance(shape, int): + n_last = shape + half_shape = shape // 2 + 1 + else: + n_last = shape[-1] + half_shape = shape[:-1] + (shape[-1] // 2 + 1,) + self.x_real = rng.standard_normal(shape).astype(dtype) + self.x_complex = ( + rng.standard_normal(half_shape) + + 1j * rng.standard_normal(half_shape) + ).astype(cdtype) + self.x_complex[..., 0] = self.x_complex[..., 0].real + if n_last % 2 == 0: + self.x_complex[..., -1] = self.x_complex[..., -1].real diff --git a/benchmarks/benchmarks/bench_fft1d.py b/benchmarks/benchmarks/bench_fft1d.py new file mode 100644 index 0000000..323b41a --- /dev/null +++ b/benchmarks/benchmarks/bench_fft1d.py @@ -0,0 +1,105 @@ +"""Benchmarks for 1-D FFT operations using the mkl_fft root API.""" + +import mkl_fft + +from ._utils import _DTYPES_ALL, _DTYPES_REAL, BenchC2C, BenchR2C + +_SIZES_POW2 = [64, 256, 1024, 4096, 16384, 65536] +_SIZES_NONPOW2 = [127, 509, 1000, 4001, 10007] + + +# --------------------------------------------------------------------------- +# Complex-to-complex 1-D (power-of-two sizes) +# --------------------------------------------------------------------------- + + +class BenchFFT1D(BenchC2C): + """Forward and inverse complex FFT — power-of-two sizes.""" + + params = [_SIZES_POW2, _DTYPES_ALL] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + super().setup(n, dtype) + # prime MKL DFTI descriptor cache + mkl_fft.fft(self.x) + mkl_fft.ifft(self.x) + + def time_fft(self, n, dtype): + mkl_fft.fft(self.x) + + def time_ifft(self, n, dtype): + mkl_fft.ifft(self.x) + + +# --------------------------------------------------------------------------- +# Real-to-complex / complex-to-real 1-D (power-of-two sizes) +# --------------------------------------------------------------------------- + + +class BenchRFFT1D(BenchR2C): + """Forward rfft and inverse irfft — power-of-two sizes.""" + + params = [_SIZES_POW2, _DTYPES_REAL] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + super().setup(n, dtype) + mkl_fft.rfft(self.x_real) + mkl_fft.irfft(self.x_complex, n=n) + + def time_rfft(self, n, dtype): + mkl_fft.rfft(self.x_real) + + def time_irfft(self, n, dtype): + mkl_fft.irfft(self.x_complex, n=n) + + +# --------------------------------------------------------------------------- +# Complex-to-complex 1-D (non-power-of-two sizes) +# --------------------------------------------------------------------------- + + +class BenchFFT1DNonPow2(BenchC2C): + """Forward and inverse complex FFT — non-power-of-two sizes. + + MKL uses a different code path for non-power-of-two transforms; + this suite catches regressions in that path. + """ + + params = [_SIZES_NONPOW2, _DTYPES_ALL] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + super().setup(n, dtype) + mkl_fft.fft(self.x) + mkl_fft.ifft(self.x) + + def time_fft(self, n, dtype): + mkl_fft.fft(self.x) + + def time_ifft(self, n, dtype): + mkl_fft.ifft(self.x) + + +# --------------------------------------------------------------------------- +# Real-to-complex / complex-to-real 1-D (non-power-of-two sizes) +# --------------------------------------------------------------------------- + + +class BenchRFFT1DNonPow2(BenchR2C): + """Forward rfft and inverse irfft — non-power-of-two sizes.""" + + params = [_SIZES_NONPOW2, _DTYPES_REAL] + param_names = ["n", "dtype"] + + def setup(self, n, dtype): + super().setup(n, dtype) + mkl_fft.rfft(self.x_real) + mkl_fft.irfft(self.x_complex, n=n) + + def time_rfft(self, n, dtype): + mkl_fft.rfft(self.x_real) + + def time_irfft(self, n, dtype): + mkl_fft.irfft(self.x_complex, n=n) diff --git a/benchmarks/benchmarks/bench_fftnd.py b/benchmarks/benchmarks/bench_fftnd.py new file mode 100644 index 0000000..b200284 --- /dev/null +++ b/benchmarks/benchmarks/bench_fftnd.py @@ -0,0 +1,174 @@ +"""Benchmarks for 2-D and N-D FFT operations using the mkl_fft root API.""" + +import mkl_fft + +from ._utils import ( + _DTYPES_ALL, + _DTYPES_REAL, + _DTYPES_REDUCED, + _SHAPES_2D, + _SHAPES_3D, + BenchC2C, + BenchR2C, +) + +# --------------------------------------------------------------------------- +# 2-D complex-to-complex (power-of-two, square + non-square) +# --------------------------------------------------------------------------- + + +class BenchFFT2D(BenchC2C): + """Forward and inverse 2-D FFT — square and non-square shapes.""" + + params = [ + _SHAPES_2D + [(256, 128), (512, 256)], + _DTYPES_ALL, + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + super().setup(shape, dtype) + # Prime MKL DFTI descriptor cache + mkl_fft.fft2(self.x) + mkl_fft.ifft2(self.x) + + def time_fft2(self, shape, dtype): + mkl_fft.fft2(self.x) + + def time_ifft2(self, shape, dtype): + mkl_fft.ifft2(self.x) + + +# --------------------------------------------------------------------------- +# 2-D real-to-complex / complex-to-real +# --------------------------------------------------------------------------- + + +class BenchRFFT2D(BenchR2C): + """Forward rfft2 and inverse irfft2.""" + + params = [_SHAPES_2D, _DTYPES_REAL] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + super().setup(shape, dtype) + mkl_fft.rfft2(self.x_real) + mkl_fft.irfft2(self.x_complex, s=shape) + + def time_rfft2(self, shape, dtype): + mkl_fft.rfft2(self.x_real) + + def time_irfft2(self, shape, dtype): + mkl_fft.irfft2(self.x_complex, s=shape) + + +# --------------------------------------------------------------------------- +# 2-D complex-to-complex (non-power-of-two) +# --------------------------------------------------------------------------- + + +class BenchFFT2DNonPow2(BenchC2C): + """Forward and inverse 2-D FFT — non-power-of-two sizes.""" + + params = [ + [ + (96, 96), + (100, 100), + (270, 270), + (500, 500), + (100, 200), # non-square non-pow2 + ], + _DTYPES_REDUCED, + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + super().setup(shape, dtype) + mkl_fft.fft2(self.x) + mkl_fft.ifft2(self.x) + + def time_fft2(self, shape, dtype): + mkl_fft.fft2(self.x) + + def time_ifft2(self, shape, dtype): + mkl_fft.ifft2(self.x) + + +# --------------------------------------------------------------------------- +# N-D complex-to-complex (3-D cubes + non-cubic shape) +# --------------------------------------------------------------------------- + + +class BenchFFTnD(BenchC2C): + """Forward and inverse N-D FFT.""" + + params = [ + _SHAPES_3D + [(32, 64, 128)], + _DTYPES_ALL, + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + super().setup(shape, dtype) + mkl_fft.fftn(self.x) + mkl_fft.ifftn(self.x) + + def time_fftn(self, shape, dtype): + mkl_fft.fftn(self.x) + + def time_ifftn(self, shape, dtype): + mkl_fft.ifftn(self.x) + + +# --------------------------------------------------------------------------- +# N-D real-to-complex / complex-to-real +# --------------------------------------------------------------------------- + + +class BenchRFFTnD(BenchR2C): + """Forward rfftn and inverse irfftn.""" + + params = [_SHAPES_3D, _DTYPES_REAL] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + super().setup(shape, dtype) + mkl_fft.rfftn(self.x_real) + mkl_fft.irfftn(self.x_complex, s=shape) + + def time_rfftn(self, shape, dtype): + mkl_fft.rfftn(self.x_real) + + def time_irfftn(self, shape, dtype): + mkl_fft.irfftn(self.x_complex, s=shape) + + +# --------------------------------------------------------------------------- +# N-D complex-to-complex (non-power-of-two 3-D) +# --------------------------------------------------------------------------- + + +class BenchFFTnDNonPow2(BenchC2C): + """Forward and inverse N-D FFT — non-power-of-two sizes.""" + + params = [ + [ + (24, 24, 24), + (30, 30, 30), + (50, 50, 50), + (30, 40, 50), # non-cubic non-pow2 + ], + _DTYPES_REDUCED, + ] + param_names = ["shape", "dtype"] + + def setup(self, shape, dtype): + super().setup(shape, dtype) + mkl_fft.fftn(self.x) + mkl_fft.ifftn(self.x) + + def time_fftn(self, shape, dtype): + mkl_fft.fftn(self.x) + + def time_ifftn(self, shape, dtype): + mkl_fft.ifftn(self.x) diff --git a/benchmarks/benchmarks/bench_interfaces.py b/benchmarks/benchmarks/bench_interfaces.py new file mode 100644 index 0000000..e26ac4a --- /dev/null +++ b/benchmarks/benchmarks/bench_interfaces.py @@ -0,0 +1,284 @@ +"""Benchmarks for mkl_fft.interfaces.{numpy_fft, scipy_fft}. + +A single ``module`` parameter selects the interface, following SciPy's +benchmark layout (scipy/benchmarks/benchmarks/fft_basic.py). + +Covered transforms: + fft / ifft — 1-D C2C + rfft / irfft — 1-D R2C / C2R + hfft / ihfft — 1-D Hermitian + fft2 / ifft2 — 2-D C2C + rfft2 / irfft2 — 2-D R2C / C2R + hfft2 / ihfft2 — 2-D Hermitian (scipy_fft only) + fftn / ifftn — N-D C2C + rfftn / irfftn — N-D R2C / C2R + hfftn / ihfftn — N-D Hermitian (scipy_fft only) +""" + +from mkl_fft.interfaces import numpy_fft, scipy_fft + +from ._utils import ( + _DTYPES_ALL, + _DTYPES_REAL, + _DTYPES_REDUCED, + _SHAPES_2D_IFACE, + _SHAPES_3D, + BenchC2C, + BenchR2C, +) + +_SIZES_1D = [256, 1024, 16384] +_MODULES = ["numpy_fft", "scipy_fft"] +_MODULES_SCIPY_ONLY = ["scipy_fft"] +_MODULE_MAP = {"numpy_fft": numpy_fft, "scipy_fft": scipy_fft} + + +# --------------------------------------------------------------------------- +# 1-D complex-to-complex +# --------------------------------------------------------------------------- + + +class BenchC2C1D(BenchC2C): + """fft / ifft — 1-D.""" + + params = [_SIZES_1D, _DTYPES_ALL, _MODULES] + param_names = ["n", "dtype", "module"] + + def setup(self, n, dtype, module): + super().setup(n, dtype) + mod = _MODULE_MAP[module] + self.fft = mod.fft + self.ifft = mod.ifft + # Prime MKL DFTI descriptor cache + self.fft(self.x) + self.ifft(self.x) + + def time_fft(self, n, dtype, module): + self.fft(self.x) + + def time_ifft(self, n, dtype, module): + self.ifft(self.x) + + +# --------------------------------------------------------------------------- +# 1-D real-to-complex / complex-to-real +# --------------------------------------------------------------------------- + + +class BenchRC1D(BenchR2C): + """rfft / irfft — 1-D.""" + + params = [_SIZES_1D, _DTYPES_REAL, _MODULES] + param_names = ["n", "dtype", "module"] + + def setup(self, n, dtype, module): + super().setup(n, dtype) + mod = _MODULE_MAP[module] + self.rfft = mod.rfft + self.irfft = mod.irfft + self.rfft(self.x_real) + self.irfft(self.x_complex, n=n) + + def time_rfft(self, n, dtype, module): + self.rfft(self.x_real) + + def time_irfft(self, n, dtype, module): + self.irfft(self.x_complex, n=n) + + +# --------------------------------------------------------------------------- +# 1-D Hermitian +# hfft: input complex length n//2+1 → output real length n +# ihfft: input real length n → output complex length n//2+1 +# --------------------------------------------------------------------------- + + +class BenchHermitian1D(BenchR2C): + """hfft / ihfft — 1-D Hermitian. + + *dtype* is the **output** dtype of hfft (real); the inverse ihfft + takes the same real input and produces the corresponding complex output. + """ + + params = [_SIZES_1D, _DTYPES_REAL, _MODULES] + param_names = ["n", "dtype", "module"] + + def setup(self, n, dtype, module): + super().setup(n, dtype) + mod = _MODULE_MAP[module] + self.hfft = mod.hfft + self.ihfft = mod.ihfft + self.hfft(self.x_complex, n=n) + self.ihfft(self.x_real) + + def time_hfft(self, n, dtype, module): + self.hfft(self.x_complex, n=n) + + def time_ihfft(self, n, dtype, module): + self.ihfft(self.x_real) + + +# --------------------------------------------------------------------------- +# 2-D complex-to-complex +# --------------------------------------------------------------------------- + + +class BenchC2C2D(BenchC2C): + """fft2 / ifft2 — 2-D.""" + + params = [_SHAPES_2D_IFACE, _DTYPES_REDUCED, _MODULES] + param_names = ["shape", "dtype", "module"] + + def setup(self, shape, dtype, module): + super().setup(shape, dtype) + mod = _MODULE_MAP[module] + self.fft2 = mod.fft2 + self.ifft2 = mod.ifft2 + self.fft2(self.x) + self.ifft2(self.x) + + def time_fft2(self, shape, dtype, module): + self.fft2(self.x) + + def time_ifft2(self, shape, dtype, module): + self.ifft2(self.x) + + +# --------------------------------------------------------------------------- +# 2-D real-to-complex / complex-to-real +# --------------------------------------------------------------------------- + + +class BenchRC2D(BenchR2C): + """rfft2 / irfft2 — 2-D.""" + + params = [_SHAPES_2D_IFACE, _DTYPES_REAL, _MODULES] + param_names = ["shape", "dtype", "module"] + + def setup(self, shape, dtype, module): + super().setup(shape, dtype) + mod = _MODULE_MAP[module] + self.rfft2 = mod.rfft2 + self.irfft2 = mod.irfft2 + self.rfft2(self.x_real) + self.irfft2(self.x_complex, s=shape) + + def time_rfft2(self, shape, dtype, module): + self.rfft2(self.x_real) + + def time_irfft2(self, shape, dtype, module): + self.irfft2(self.x_complex, s=shape) + + +# --------------------------------------------------------------------------- +# 2-D Hermitian (scipy_fft only — not in numpy_fft interface) +# hfft2: input complex shape (M, N//2+1) → output real shape (M, N) +# ihfft2: input real shape (M, N) → output complex shape (M, N//2+1) +# --------------------------------------------------------------------------- + + +class BenchHermitian2D(BenchR2C): + """scipy_fft.hfft2 / ihfft2 — 2-D Hermitian. + + *dtype* is the **output** dtype of hfft2 (real). + """ + + params = [_SHAPES_2D_IFACE, _DTYPES_REAL, _MODULES_SCIPY_ONLY] + param_names = ["shape", "dtype", "module"] + + def setup(self, shape, dtype, module): + super().setup(shape, dtype) + mod = _MODULE_MAP[module] + self.hfft2 = mod.hfft2 + self.ihfft2 = mod.ihfft2 + self.hfft2(self.x_complex, s=shape) + self.ihfft2(self.x_real) + + def time_hfft2(self, shape, dtype, module): + self.hfft2(self.x_complex, s=shape) + + def time_ihfft2(self, shape, dtype, module): + self.ihfft2(self.x_real) + + +# --------------------------------------------------------------------------- +# N-D complex-to-complex +# --------------------------------------------------------------------------- + + +class BenchC2CND(BenchC2C): + """fftn / ifftn — N-D.""" + + params = [_SHAPES_3D, _DTYPES_REDUCED, _MODULES] + param_names = ["shape", "dtype", "module"] + + def setup(self, shape, dtype, module): + super().setup(shape, dtype) + mod = _MODULE_MAP[module] + self.fftn = mod.fftn + self.ifftn = mod.ifftn + self.fftn(self.x) + self.ifftn(self.x) + + def time_fftn(self, shape, dtype, module): + self.fftn(self.x) + + def time_ifftn(self, shape, dtype, module): + self.ifftn(self.x) + + +# --------------------------------------------------------------------------- +# N-D real-to-complex / complex-to-real +# --------------------------------------------------------------------------- + + +class BenchRCND(BenchR2C): + """rfftn / irfftn — N-D.""" + + params = [_SHAPES_3D, _DTYPES_REAL, _MODULES] + param_names = ["shape", "dtype", "module"] + + def setup(self, shape, dtype, module): + super().setup(shape, dtype) + mod = _MODULE_MAP[module] + self.rfftn = mod.rfftn + self.irfftn = mod.irfftn + self.rfftn(self.x_real) + self.irfftn(self.x_complex, s=shape) + + def time_rfftn(self, shape, dtype, module): + self.rfftn(self.x_real) + + def time_irfftn(self, shape, dtype, module): + self.irfftn(self.x_complex, s=shape) + + +# --------------------------------------------------------------------------- +# N-D Hermitian (scipy_fft only) +# hfftn: input complex, last axis length s[-1]//2+1 → output real shape s +# ihfftn: input real shape s → output complex, last axis length s[-1]//2+1 +# --------------------------------------------------------------------------- + + +class BenchHermitianND(BenchR2C): + """scipy_fft.hfftn / ihfftn — N-D Hermitian. + + *dtype* is the **output** dtype of hfftn (real). + """ + + params = [_SHAPES_3D, _DTYPES_REAL, _MODULES_SCIPY_ONLY] + param_names = ["shape", "dtype", "module"] + + def setup(self, shape, dtype, module): + super().setup(shape, dtype) + mod = _MODULE_MAP[module] + self.hfftn = mod.hfftn + self.ihfftn = mod.ihfftn + self.hfftn(self.x_complex, s=shape) + self.ihfftn(self.x_real) + + def time_hfftn(self, shape, dtype, module): + self.hfftn(self.x_complex, s=shape) + + def time_ihfftn(self, shape, dtype, module): + self.ihfftn(self.x_real) diff --git a/benchmarks/benchmarks/bench_memory.py b/benchmarks/benchmarks/bench_memory.py new file mode 100644 index 0000000..46d7176 --- /dev/null +++ b/benchmarks/benchmarks/bench_memory.py @@ -0,0 +1,90 @@ +"""Peak-memory benchmarks for FFT operations. + +Measures peak RSS (resident set size) to detect memory regressions +in the mkl_fft root API across 1-D, 2-D, and 3-D transforms. +""" + +import mkl_fft + +from ._utils import ( + _DTYPES_REAL, + _DTYPES_REDUCED, + _SHAPES_2D, + _SHAPES_3D, + BenchC2C, + BenchR2C, +) + +_SIZES_1D = [1024, 16384, 65536, 262144] + + +# --------------------------------------------------------------------------- +# 1-D complex FFT +# --------------------------------------------------------------------------- + + +class PeakMemFFT1D(BenchC2C): + """Peak RSS for 1-D complex FFT.""" + + params = [_SIZES_1D, _DTYPES_REDUCED] + param_names = ["n", "dtype"] + + def peakmem_fft(self, n, dtype): + mkl_fft.fft(self.x) + + def peakmem_ifft(self, n, dtype): + mkl_fft.ifft(self.x) + + +# --------------------------------------------------------------------------- +# 1-D real FFT +# --------------------------------------------------------------------------- + + +class PeakMemRFFT1D(BenchR2C): + """Peak RSS for 1-D real FFT (forward and inverse).""" + + params = [_SIZES_1D, _DTYPES_REAL] + param_names = ["n", "dtype"] + + def peakmem_rfft(self, n, dtype): + mkl_fft.rfft(self.x_real) + + def peakmem_irfft(self, n, dtype): + mkl_fft.irfft(self.x_complex, n=n) + + +# --------------------------------------------------------------------------- +# 2-D complex FFT +# --------------------------------------------------------------------------- + + +class PeakMemFFT2D(BenchC2C): + """Peak RSS for 2-D complex FFT.""" + + params = [_SHAPES_2D, _DTYPES_REDUCED] + param_names = ["shape", "dtype"] + + def peakmem_fft2(self, shape, dtype): + mkl_fft.fft2(self.x) + + def peakmem_ifft2(self, shape, dtype): + mkl_fft.ifft2(self.x) + + +# --------------------------------------------------------------------------- +# N-D complex FFT (3-D) +# --------------------------------------------------------------------------- + + +class PeakMemFFTnD(BenchC2C): + """Peak RSS for N-D complex FFT (3-D shapes).""" + + params = [_SHAPES_3D, _DTYPES_REDUCED] + param_names = ["shape", "dtype"] + + def peakmem_fftn(self, shape, dtype): + mkl_fft.fftn(self.x) + + def peakmem_ifftn(self, shape, dtype): + mkl_fft.ifftn(self.x) diff --git a/pyproject.toml b/pyproject.toml index fda3e40..ab9b8dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,7 @@ readme = {file = "README.md", content-type = "text/markdown"} requires-python = ">=3.10,<3.15" [project.optional-dependencies] +benchmark = ["asv>=0.6", "psutil"] scipy_interface = ["scipy>=1.10", "mkl-service"] test = ["pytest", "scipy>=1.10", "mkl-service"]