diff --git a/src/spikeinterface/core/__init__.py b/src/spikeinterface/core/__init__.py index d757d6d98b..d038ffbd29 100644 --- a/src/spikeinterface/core/__init__.py +++ b/src/spikeinterface/core/__init__.py @@ -44,8 +44,7 @@ inject_some_split_units, synthetize_spike_train_bad_isi, generate_templates, - NoiseGeneratorRecording, - noise_generator_recording, + MockRecording, generate_recording_by_size, InjectTemplatesRecording, inject_templates, @@ -190,3 +189,25 @@ load_waveforms, load_sorting_analyzer_or_waveforms, ) + + +def __getattr__(name): + if name in ("NoiseGeneratorRecording", "noise_generator_recording"): + import warnings + + warnings.warn( + f"Importing {name} from spikeinterface.core is deprecated. " + f"Import from spikeinterface.generation instead: " + f"`from spikeinterface.generation import {name}`. " + f"This will be removed in version 0.106.0.", + FutureWarning, + stacklevel=2, + ) + from spikeinterface.generation.noise_tools import NoiseGeneratorRecording, noise_generator_recording + + _map = { + "NoiseGeneratorRecording": NoiseGeneratorRecording, + "noise_generator_recording": noise_generator_recording, + } + return _map[name] + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py index 1c9ece728f..d3f65cfcbb 100644 --- a/src/spikeinterface/core/generate.py +++ b/src/spikeinterface/core/generate.py @@ -60,13 +60,12 @@ def generate_recording( """ seed = _ensure_seed(seed) - recording = NoiseGeneratorRecording( + recording = MockRecording( num_channels=num_channels, sampling_frequency=sampling_frequency, durations=durations, dtype="float32", seed=seed, - strategy="tile_pregenerated", # block size is fixed to one second noise_block_size=int(sampling_frequency), ) @@ -1227,17 +1226,21 @@ def get_unit_spike_train(self, unit_id, start_frame: int | None = None, end_fram ## Noise generator zone ## -class NoiseGeneratorRecording(BaseRecording): +class MockRecording(BaseRecording): """ - A lazy recording that generates white noise samples if and only if `get_traces` is called. + A lazy recording that generates unit-variance white noise samples if and only if `get_traces` is called. - This done by tiling small noise chunk. + This is a lightweight testing utility for infrastructure tests, memory profiling, and benchmarks. + For noise with spatial correlations or per-channel noise levels, use + ``spikeinterface.generation.NoiseGeneratorRecording``. + + Noise is generated by pre-allocating a single noise block and tiling it across the requested + frame range. This is reproducible across different start/end frame calls with the same seed. 2 strategies to be reproducible across different start/end frame calls: * "tile_pregenerated": pregenerate a small noise block and tile it depending the start_frame/end_frame * "on_the_fly": generate on the fly small noise chunk and tile then. seed depend also on the noise block. - Parameters ---------- num_channels : int @@ -1246,26 +1249,19 @@ class NoiseGeneratorRecording(BaseRecording): The sampling frequency of the recorder. durations : list[float] The durations of each segment in seconds. Note that the length of this list is the number of segments. - noise_levels : float | np.ndarray, default: 1.0 - Std of the white noise (if an array, defined by per channels) - cov_matrix : np.ndarray | None, default: None - The covariance matrix of the noise dtype : np.dtype | str | None, default: "float32" The dtype of the recording. Note that only np.float32 and np.float64 are supported. seed : int | None, default: None The seed for np.random.default_rng. strategy : "tile_pregenerated" | "on_the_fly", default: "tile_pregenerated" - The strategy of generating noise chunk: - * "tile_pregenerated": pregenerate a noise chunk of noise_block_size sample and repeat it - very fast and cusume only one noise block. - * "on_the_fly": generate on the fly a new noise block by combining seed + noise block index - no memory preallocation but a bit more computaion (random) + The strategy of generating noise chunk. + # TODO: Remove on_the_fly strategy after discussion, see #4522. noise_block_size : int, default: 30000 - Size in sample of noise block. + Size in samples of the pre-generated noise block. Notes ----- - If modifying this function, ensure that only one call to malloc is made per call get_traces to + If modifying this class, ensure that only one call to malloc is made per call to get_traces to maintain the optimized memory profile. """ @@ -1274,38 +1270,22 @@ def __init__( num_channels: int, sampling_frequency: float, durations: list[float], - noise_levels: float | np.ndarray = 1.0, - cov_matrix: np.ndarray | None = None, dtype: np.dtype | str | None = "float32", seed: int | None = None, strategy: Literal["tile_pregenerated", "on_the_fly"] = "tile_pregenerated", noise_block_size: int = 30000, ): - channel_ids = [str(idx) for idx in np.arange(num_channels)] + channel_ids = [str(index) for index in np.arange(num_channels)] dtype = np.dtype(dtype).name # Cast to string for serialization if dtype not in ("float32", "float64"): raise ValueError(f"'dtype' must be 'float32' or 'float64' but is {dtype}") assert strategy in ("tile_pregenerated", "on_the_fly"), "'strategy' must be 'tile_pregenerated' or 'on_the_fly'" - if np.isscalar(noise_levels): - noise_levels = np.ones((1, num_channels)) * noise_levels - else: - noise_levels = np.asarray(noise_levels) - if len(noise_levels.shape) < 2: - noise_levels = noise_levels[np.newaxis, :] - - assert len(noise_levels[0]) == num_channels, "Noise levels should have a size of num_channels" - BaseRecording.__init__(self, sampling_frequency=sampling_frequency, channel_ids=channel_ids, dtype=dtype) num_segments = len(durations) - if cov_matrix is not None: - assert ( - cov_matrix.shape[0] == cov_matrix.shape[1] == num_channels - ), "cov_matrix should have a size (num_channels, num_channels)" - # very important here when multiprocessing and dump/load seed = _ensure_seed(seed) @@ -1315,13 +1295,11 @@ def __init__( for i in range(num_segments): num_samples = int(durations[i] * sampling_frequency) - rec_segment = NoiseGeneratorRecordingSegment( + rec_segment = MockRecordingSegment( num_samples, num_channels, sampling_frequency, noise_block_size, - noise_levels, - cov_matrix, dtype, segments_seeds[i], strategy, @@ -1332,8 +1310,6 @@ def __init__( "num_channels": num_channels, "durations": durations, "sampling_frequency": sampling_frequency, - "noise_levels": noise_levels, - "cov_matrix": cov_matrix, "dtype": dtype, "seed": seed, "strategy": strategy, @@ -1341,15 +1317,13 @@ def __init__( } -class NoiseGeneratorRecordingSegment(BaseRecordingSegment): +class MockRecordingSegment(BaseRecordingSegment): def __init__( self, num_samples, num_channels, sampling_frequency, noise_block_size, - noise_levels, - cov_matrix, dtype, seed, strategy, @@ -1361,25 +1335,13 @@ def __init__( self.num_samples = num_samples self.num_channels = num_channels self.noise_block_size = noise_block_size - self.noise_levels = noise_levels - self.cov_matrix = cov_matrix self.dtype = dtype self.seed = seed self.strategy = strategy if self.strategy == "tile_pregenerated": rng = np.random.default_rng(seed=self.seed) - - if self.cov_matrix is None: - self.noise_block = ( - rng.standard_normal(size=(self.noise_block_size, self.num_channels), dtype=self.dtype) - * noise_levels - ) - else: - self.noise_block = rng.multivariate_normal( - np.zeros(self.num_channels), self.cov_matrix, size=self.noise_block_size - ) - + self.noise_block = rng.standard_normal(size=(self.noise_block_size, self.num_channels), dtype=self.dtype) elif self.strategy == "on_the_fly": pass @@ -1413,14 +1375,7 @@ def get_traces( noise_block = self.noise_block elif self.strategy == "on_the_fly": rng = np.random.default_rng(seed=(self.seed, block_index)) - if self.cov_matrix is None: - noise_block = rng.standard_normal(size=(self.noise_block_size, self.num_channels), dtype=self.dtype) - else: - noise_block = rng.multivariate_normal( - np.zeros(self.num_channels), self.cov_matrix, size=self.noise_block_size - ) - - noise_block *= self.noise_levels + noise_block = rng.standard_normal(size=(self.noise_block_size, self.num_channels), dtype=self.dtype) if block_index == first_block_index: if first_block_index != last_block_index: @@ -1443,16 +1398,11 @@ def get_traces( return traces -noise_generator_recording = define_function_from_class( - source_class=NoiseGeneratorRecording, name="noise_generator_recording" -) - - def generate_recording_by_size( full_traces_size_GiB: float, seed: int | None = None, strategy: Literal["tile_pregenerated", "on_the_fly"] = "tile_pregenerated", -) -> NoiseGeneratorRecording: +) -> MockRecording: """ Generate a large lazy recording. This is a convenience wrapper around the NoiseGeneratorRecording class where only @@ -1490,7 +1440,7 @@ def generate_recording_by_size( num_samples = int(full_traces_size_bytes / (num_channels * dtype.itemsize)) durations = [num_samples / sampling_frequency] - recording = NoiseGeneratorRecording( + recording = MockRecording( durations=durations, sampling_frequency=sampling_frequency, num_channels=num_channels, @@ -2456,6 +2406,8 @@ def generate_ground_truth_recording( assert (nbefore + nafter) == templates.shape[1] # construct recording + from spikeinterface.generation.noise_tools import NoiseGeneratorRecording + noise_rec = NoiseGeneratorRecording( num_channels=num_channels, sampling_frequency=sampling_frequency, @@ -2482,3 +2434,25 @@ def generate_ground_truth_recording( sorting.name = "GroundTruthSorting" return recording, sorting + + +def __getattr__(name): + if name in ("NoiseGeneratorRecording", "noise_generator_recording"): + import warnings + + warnings.warn( + f"Importing {name} from spikeinterface.core.generate is deprecated. " + f"Import from spikeinterface.generation instead: " + f"`from spikeinterface.generation import {name}`. " + f"This will be removed in version 0.106.0.", + FutureWarning, + stacklevel=2, + ) + from spikeinterface.generation.noise_tools import NoiseGeneratorRecording, noise_generator_recording + + _map = { + "NoiseGeneratorRecording": NoiseGeneratorRecording, + "noise_generator_recording": noise_generator_recording, + } + return _map[name] + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/src/spikeinterface/generation/__init__.py b/src/spikeinterface/generation/__init__.py index bf6158f031..e0fb98dbbd 100644 --- a/src/spikeinterface/generation/__init__.py +++ b/src/spikeinterface/generation/__init__.py @@ -13,7 +13,7 @@ scale_template_to_range, relocate_templates, ) -from .noise_tools import generate_noise +from .noise_tools import generate_noise, NoiseGeneratorRecording, noise_generator_recording from .splitting_tools import split_sorting_by_amplitudes, split_sorting_by_times @@ -43,8 +43,7 @@ inject_some_duplicate_units, inject_some_split_units, synthetize_spike_train_bad_isi, - NoiseGeneratorRecording, - noise_generator_recording, + MockRecording, InjectTemplatesRecording, inject_templates, ) diff --git a/src/spikeinterface/generation/noise_tools.py b/src/spikeinterface/generation/noise_tools.py index 96abf30fdb..f64a8abe72 100644 --- a/src/spikeinterface/generation/noise_tools.py +++ b/src/spikeinterface/generation/noise_tools.py @@ -1,6 +1,230 @@ import numpy as np +from typing import Literal -from spikeinterface.core.generate import NoiseGeneratorRecording +from spikeinterface.core import BaseRecording, BaseRecordingSegment +from spikeinterface.core.generate import _ensure_seed +from spikeinterface.core.core_tools import define_function_from_class + + +class NoiseGeneratorRecording(BaseRecording): + """ + A lazy recording that generates noise samples if and only if `get_traces` is called. + + This done by tiling small noise chunk. + + 2 strategies to be reproducible across different start/end frame calls: + * "tile_pregenerated": pregenerate a small noise block and tile it depending the start_frame/end_frame + * "on_the_fly": generate on the fly small noise chunk and tile then. seed depend also on the noise block. + + + Parameters + ---------- + num_channels : int + The number of channels. + sampling_frequency : float + The sampling frequency of the recorder. + durations : list[float] + The durations of each segment in seconds. Note that the length of this list is the number of segments. + noise_levels : float | np.ndarray, default: 1.0 + Std of the white noise (if an array, defined by per channels) + cov_matrix : np.ndarray | None, default: None + The covariance matrix of the noise + dtype : np.dtype | str | None, default: "float32" + The dtype of the recording. Note that only np.float32 and np.float64 are supported. + seed : int | None, default: None + The seed for np.random.default_rng. + strategy : "tile_pregenerated" | "on_the_fly", default: "tile_pregenerated" + The strategy of generating noise chunk: + * "tile_pregenerated": pregenerate a noise chunk of noise_block_size sample and repeat it + very fast and cusume only one noise block. + * "on_the_fly": generate on the fly a new noise block by combining seed + noise block index + no memory preallocation but a bit more computaion (random) + noise_block_size : int, default: 30000 + Size in sample of noise block. + + Notes + ----- + If modifying this function, ensure that only one call to malloc is made per call get_traces to + maintain the optimized memory profile. + """ + + def __init__( + self, + num_channels: int, + sampling_frequency: float, + durations: list[float], + noise_levels: float | np.ndarray = 1.0, + cov_matrix: np.ndarray | None = None, + dtype: np.dtype | str | None = "float32", + seed: int | None = None, + strategy: Literal["tile_pregenerated", "on_the_fly"] = "tile_pregenerated", + noise_block_size: int = 30000, + ): + + channel_ids = [str(index) for index in np.arange(num_channels)] + dtype = np.dtype(dtype).name # Cast to string for serialization + if dtype not in ("float32", "float64"): + raise ValueError(f"'dtype' must be 'float32' or 'float64' but is {dtype}") + assert strategy in ("tile_pregenerated", "on_the_fly"), "'strategy' must be 'tile_pregenerated' or 'on_the_fly'" + + if np.isscalar(noise_levels): + noise_levels = np.ones((1, num_channels)) * noise_levels + else: + noise_levels = np.asarray(noise_levels) + if len(noise_levels.shape) < 2: + noise_levels = noise_levels[np.newaxis, :] + + assert len(noise_levels[0]) == num_channels, "Noise levels should have a size of num_channels" + + BaseRecording.__init__(self, sampling_frequency=sampling_frequency, channel_ids=channel_ids, dtype=dtype) + + num_segments = len(durations) + + if cov_matrix is not None: + assert ( + cov_matrix.shape[0] == cov_matrix.shape[1] == num_channels + ), "cov_matrix should have a size (num_channels, num_channels)" + + # very important here when multiprocessing and dump/load + seed = _ensure_seed(seed) + + # we need one seed per segment + rng = np.random.default_rng(seed) + segments_seeds = [rng.integers(0, 2**63) for i in range(num_segments)] + + for i in range(num_segments): + num_samples = int(durations[i] * sampling_frequency) + rec_segment = NoiseGeneratorRecordingSegment( + num_samples, + num_channels, + sampling_frequency, + noise_block_size, + noise_levels, + cov_matrix, + dtype, + segments_seeds[i], + strategy, + ) + self.add_recording_segment(rec_segment) + + self._kwargs = { + "num_channels": num_channels, + "durations": durations, + "sampling_frequency": sampling_frequency, + "noise_levels": noise_levels, + "cov_matrix": cov_matrix, + "dtype": dtype, + "seed": seed, + "strategy": strategy, + "noise_block_size": noise_block_size, + } + + +class NoiseGeneratorRecordingSegment(BaseRecordingSegment): + def __init__( + self, + num_samples, + num_channels, + sampling_frequency, + noise_block_size, + noise_levels, + cov_matrix, + dtype, + seed, + strategy, + ): + assert seed is not None + + BaseRecordingSegment.__init__(self, sampling_frequency=sampling_frequency) + + self.num_samples = num_samples + self.num_channels = num_channels + self.noise_block_size = noise_block_size + self.noise_levels = noise_levels + self.cov_matrix = cov_matrix + self.dtype = dtype + self.seed = seed + self.strategy = strategy + + if self.strategy == "tile_pregenerated": + rng = np.random.default_rng(seed=self.seed) + + if self.cov_matrix is None: + self.noise_block = ( + rng.standard_normal(size=(self.noise_block_size, self.num_channels), dtype=self.dtype) + * noise_levels + ) + else: + self.noise_block = rng.multivariate_normal( + np.zeros(self.num_channels), self.cov_matrix, size=self.noise_block_size + ) + + elif self.strategy == "on_the_fly": + pass + + def get_num_samples(self) -> int: + return self.num_samples + + def get_traces( + self, + start_frame: int | None = None, + end_frame: int | None = None, + channel_indices: list | None = None, + ) -> np.ndarray: + + if start_frame is None: + start_frame = 0 + if end_frame is None: + end_frame = self.get_num_samples() + + start_frame_within_block = start_frame % self.noise_block_size + end_frame_within_block = end_frame % self.noise_block_size + num_samples = end_frame - start_frame + + traces = np.empty(shape=(num_samples, self.num_channels), dtype=self.dtype) + + first_block_index = start_frame // self.noise_block_size + last_block_index = end_frame // self.noise_block_size + + pos = 0 + for block_index in range(first_block_index, last_block_index + 1): + if self.strategy == "tile_pregenerated": + noise_block = self.noise_block + elif self.strategy == "on_the_fly": + rng = np.random.default_rng(seed=(self.seed, block_index)) + if self.cov_matrix is None: + noise_block = rng.standard_normal(size=(self.noise_block_size, self.num_channels), dtype=self.dtype) + else: + noise_block = rng.multivariate_normal( + np.zeros(self.num_channels), self.cov_matrix, size=self.noise_block_size + ) + + noise_block *= self.noise_levels + + if block_index == first_block_index: + if first_block_index != last_block_index: + end_first_block = self.noise_block_size - start_frame_within_block + traces[:end_first_block] = noise_block[start_frame_within_block:] + pos += end_first_block + else: + # special case when unique block + traces[:] = noise_block[start_frame_within_block : start_frame_within_block + num_samples] + elif block_index == last_block_index: + if end_frame_within_block > 0: + traces[pos:] = noise_block[:end_frame_within_block] + else: + traces[pos : pos + self.noise_block_size] = noise_block + pos += self.noise_block_size + + # slice channels + traces = traces if channel_indices is None else traces[:, channel_indices] + + return traces + + +noise_generator_recording = define_function_from_class( + source_class=NoiseGeneratorRecording, name="noise_generator_recording" +) def generate_noise( diff --git a/src/spikeinterface/preprocessing/silence_periods.py b/src/spikeinterface/preprocessing/silence_periods.py index 393c712919..1343d447b2 100644 --- a/src/spikeinterface/preprocessing/silence_periods.py +++ b/src/spikeinterface/preprocessing/silence_periods.py @@ -4,7 +4,8 @@ from .basepreprocessor import BasePreprocessor, BasePreprocessorSegment from spikeinterface.core import get_noise_levels -from spikeinterface.core.generate import NoiseGeneratorRecording +from spikeinterface.core.generate import MockRecording +from .normalize_scale import ScaleRecording from spikeinterface.core.job_tools import split_job_kwargs from spikeinterface.core.base import base_period_dtype @@ -87,16 +88,16 @@ def __init__( recording, return_in_uV=False, random_slices_kwargs=random_slices_kwargs ) - noise_generator = NoiseGeneratorRecording( + mock_noise = MockRecording( num_channels=recording.get_num_channels(), sampling_frequency=recording.sampling_frequency, durations=[recording.select_segments(i).get_duration() for i in range(recording.get_num_segments())], dtype=recording.dtype, seed=seed, - noise_levels=noise_levels, strategy="on_the_fly", noise_block_size=int(recording.sampling_frequency), ) + noise_generator = ScaleRecording(mock_noise, gain=noise_levels, dtype=recording.dtype) else: noise_generator = None