From 8adff23401e770ec158c8160e5f7e1ffb3b181fc Mon Sep 17 00:00:00 2001 From: kohankhaki Date: Sat, 10 May 2025 04:07:58 -0400 Subject: [PATCH 1/4] added clean up logs functionality to cli. --- README.md | 1 + vec_inf/README.md | 1 + vec_inf/cli/_cli.py | 55 +++++++++++++++++++++++++++++++++++ vec_inf/client/_utils.py | 62 ++++++++++++++++++++++++++++++++++++++++ vec_inf/client/api.py | 51 ++++++++++++++++++++++++++++++++- 5 files changed, 169 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 52dbf6e..0a0537a 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,7 @@ Note that there are other parameters that can also be added to the config but no * `metrics`: Streams performance metrics to the console. * `shutdown`: Shutdown a model by providing its Slurm job ID. * `list`: List all available model names, or view the default/cached configuration of a specific model, `--json-mode` supported. +* `cleanup`: Remove old log directories. You can filter by `--model-family`, `--model-name`, and/or `--job-id`. Use `--dry-run` to preview what would be deleted. For more details on the usage of these commands, refer to the [User Guide](https://vectorinstitute.github.io/vector-inference/user_guide/) diff --git a/vec_inf/README.md b/vec_inf/README.md index a7cceea..e0299a5 100644 --- a/vec_inf/README.md +++ b/vec_inf/README.md @@ -5,5 +5,6 @@ * `metrics`: Streams performance metrics to the console. * `shutdown`: Shutdown a model by providing its Slurm job ID. * `list`: List all available model names, or view the default/cached configuration of a specific model, `--json-mode` supported. +* `cleanup`: Remove old log directories. You can filter by `--model-family`, `--model-name`, and/or `--job-id`. Use `--dry-run` to preview what would be deleted. Use `--help` to see all available options diff --git a/vec_inf/cli/_cli.py b/vec_inf/cli/_cli.py index af102f0..c8e1d64 100644 --- a/vec_inf/cli/_cli.py +++ b/vec_inf/cli/_cli.py @@ -336,5 +336,60 @@ def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None: raise click.ClickException(f"Metrics check failed: {str(e)}") from e +@cli.command("cleanup") +@click.option("--log-dir", type=str, help="Path to SLURM log directory") +@click.option("--model-family", type=str, help="Filter by model family") +@click.option("--model-name", type=str, help="Filter by model name") +@click.option( + "--job-id", type=int, help="Only remove logs with this exact SLURM job ID" +) +@click.option("--dry-run", is_flag=True, help="List matching logs without deleting") +def cleanup_logs_cli( + log_dir: Optional[str], + model_family: Optional[str], + model_name: Optional[str], + job_id: Optional[int], + dry_run: bool, +) -> None: + """Clean up log files based on optional filters. + + Parameters + ---------- + log_dir : str or Path, optional + Root directory containing log files. Defaults to ~/.vec-inf-logs. + model_family : str, optional + Only delete logs for this model family. + model_name : str, optional + Only delete logs for this model name. + job_id : int, optional + If provided, only match directories with this exact SLURM job ID. + dry_run : bool + If True, return matching files without deleting them. + """ + try: + client = VecInfClient() + matched = client.cleanup_logs( + log_dir=log_dir, + model_family=model_family, + model_name=model_name, + job_id=job_id, + dry_run=dry_run, + ) + + if not matched: + if dry_run: + click.echo("Dry run: no matching log directories found.") + else: + click.echo("No matching log directories were deleted.") + elif dry_run: + click.echo(f"Dry run: {len(matched)} directories would be deleted:") + for f in matched: + click.echo(f" - {f}") + else: + click.echo(f"Deleted {len(matched)} log directory(ies).") + except Exception as e: + raise click.ClickException(f"Cleanup failed: {str(e)}") from e + + if __name__ == "__main__": cli() diff --git a/vec_inf/client/_utils.py b/vec_inf/client/_utils.py index 64f7879..1256f58 100644 --- a/vec_inf/client/_utils.py +++ b/vec_inf/client/_utils.py @@ -285,3 +285,65 @@ def parse_launch_output(output: str) -> tuple[str, dict[str, str]]: config_dict[key.lower().replace(" ", "_")] = value return slurm_job_id, config_dict + + +def find_matching_dirs( + log_dir: Path, + model_family: Optional[str] = None, + model_name: Optional[str] = None, + job_id: Optional[int] = None, +) -> list[Path]: + """ + Find log directories based on filtering criteria. + + Parameters + ---------- + log_dir : Path + The base directory containing model family directories. + model_family : str, optional + Filter to only search inside this family. + model_name : str, optional + Filter to only match model names. + job_id : int, optional + Filter to only match this exact SLURM job ID. + + Returns + ------- + list[Path] + List of directories that match the criteria and can be deleted. + """ + matched = [] + + if not log_dir.exists() or not log_dir.is_dir(): + raise FileNotFoundError(f"Log directory does not exist: {log_dir}") + + if not model_family and not model_name and not job_id: + return [log_dir] + + # Iterate over model families + for family_dir in log_dir.iterdir(): + if not family_dir.is_dir(): + continue + if model_family and family_dir.name != model_family: + continue + + if model_family and not model_name and not job_id: + return [family_dir] + + for job_dir in family_dir.iterdir(): + if not job_dir.is_dir(): + continue + + try: + name_part, id_part = job_dir.name.rsplit(".", 1) + parsed_id = int(id_part) + except ValueError: + continue + + if model_name and name_part != model_name: + continue + if job_id is not None and parsed_id != job_id: + continue + + matched.append(job_dir) + return matched diff --git a/vec_inf/client/api.py b/vec_inf/client/api.py index 3073888..51c46b7 100644 --- a/vec_inf/client/api.py +++ b/vec_inf/client/api.py @@ -10,8 +10,10 @@ vec_inf.client.models : Data models for API responses """ +import shutil import time import warnings +from pathlib import Path from typing import Any, Optional, Union from vec_inf.client._exceptions import ( @@ -24,7 +26,7 @@ ModelStatusMonitor, PerformanceMetricsCollector, ) -from vec_inf.client._utils import run_bash_command +from vec_inf.client._utils import find_matching_dirs, run_bash_command from vec_inf.client.config import ModelConfig from vec_inf.client.models import ( LaunchOptions, @@ -60,6 +62,9 @@ class VecInfClient: wait_until_ready(slurm_job_id, timeout_seconds, poll_interval_seconds, log_dir) Wait for a model to become ready + cleanup_logs(log_dir, model_name, model_family, job_id, dry_run) + Remove logs from the log directory. + Examples -------- >>> from vec_inf.api import VecInfClient @@ -300,3 +305,47 @@ def wait_until_ready( # Wait before checking again time.sleep(poll_interval_seconds) + + def cleanup_logs( + self, + log_dir: Optional[Union[str, Path]] = None, + model_family: Optional[str] = None, + model_name: Optional[str] = None, + job_id: Optional[int] = None, + dry_run: bool = False, + ) -> list[Path]: + """Remove logs from the log directory. + + Parameters + ---------- + log_dir : str or Path, optional + Root directory containing log files. Defaults to ~/.vec-inf-logs. + model_family : str, optional + Only delete logs for this model family. + model_name : str, optional + Only delete logs for this model name. + job_id : int, optional + If provided, only match directories with this exact SLURM job ID. + dry_run : bool + If True, return matching files without deleting them. + + Returns + ------- + list[Path] + List of deleted (or matched if dry_run) log file paths. + """ + log_root = Path(log_dir) if log_dir else Path.home() / ".vec-inf-logs" + matched = find_matching_dirs( + log_dir=log_root, + model_family=model_family, + model_name=model_name, + job_id=job_id, + ) + + if dry_run: + return matched + + for path in matched: + shutil.rmtree(path) + + return matched From 7106024b349ab25b41bc562a691e1775363a5f44 Mon Sep 17 00:00:00 2001 From: kohankhaki Date: Tue, 13 May 2025 21:40:41 -0400 Subject: [PATCH 2/4] added --before-job-id argument. --- README.md | 2 +- vec_inf/README.md | 2 +- vec_inf/cli/_cli.py | 9 +++++++++ vec_inf/client/_utils.py | 11 ++++++++--- vec_inf/client/api.py | 4 ++++ 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 0a0537a..869095c 100644 --- a/README.md +++ b/README.md @@ -102,7 +102,7 @@ Note that there are other parameters that can also be added to the config but no * `metrics`: Streams performance metrics to the console. * `shutdown`: Shutdown a model by providing its Slurm job ID. * `list`: List all available model names, or view the default/cached configuration of a specific model, `--json-mode` supported. -* `cleanup`: Remove old log directories. You can filter by `--model-family`, `--model-name`, and/or `--job-id`. Use `--dry-run` to preview what would be deleted. +* `cleanup`: Remove old log directories. You can filter by `--model-family`, `--model-name`, `--job-id`, and/or `--before-job-id`. Use `--dry-run` to preview what would be deleted. For more details on the usage of these commands, refer to the [User Guide](https://vectorinstitute.github.io/vector-inference/user_guide/) diff --git a/vec_inf/README.md b/vec_inf/README.md index e0299a5..9b9b816 100644 --- a/vec_inf/README.md +++ b/vec_inf/README.md @@ -5,6 +5,6 @@ * `metrics`: Streams performance metrics to the console. * `shutdown`: Shutdown a model by providing its Slurm job ID. * `list`: List all available model names, or view the default/cached configuration of a specific model, `--json-mode` supported. -* `cleanup`: Remove old log directories. You can filter by `--model-family`, `--model-name`, and/or `--job-id`. Use `--dry-run` to preview what would be deleted. +* `cleanup`: Remove old log directories. You can filter by `--model-family`, `--model-name`, `--job-id`, and/or `--before-job-id`. Use `--dry-run` to preview what would be deleted. Use `--help` to see all available options diff --git a/vec_inf/cli/_cli.py b/vec_inf/cli/_cli.py index c8e1d64..d4649dd 100644 --- a/vec_inf/cli/_cli.py +++ b/vec_inf/cli/_cli.py @@ -343,12 +343,18 @@ def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None: @click.option( "--job-id", type=int, help="Only remove logs with this exact SLURM job ID" ) +@click.option( + "--before-job-id", + type=int, + help="Remove logs with job ID less than this value", +) @click.option("--dry-run", is_flag=True, help="List matching logs without deleting") def cleanup_logs_cli( log_dir: Optional[str], model_family: Optional[str], model_name: Optional[str], job_id: Optional[int], + before_job_id: Optional[int], dry_run: bool, ) -> None: """Clean up log files based on optional filters. @@ -363,6 +369,8 @@ def cleanup_logs_cli( Only delete logs for this model name. job_id : int, optional If provided, only match directories with this exact SLURM job ID. + before_job_id : int, optional + If provided, only delete logs with job ID less than this value. dry_run : bool If True, return matching files without deleting them. """ @@ -373,6 +381,7 @@ def cleanup_logs_cli( model_family=model_family, model_name=model_name, job_id=job_id, + before_job_id=before_job_id, dry_run=dry_run, ) diff --git a/vec_inf/client/_utils.py b/vec_inf/client/_utils.py index 1256f58..6bb4b85 100644 --- a/vec_inf/client/_utils.py +++ b/vec_inf/client/_utils.py @@ -292,6 +292,7 @@ def find_matching_dirs( model_family: Optional[str] = None, model_name: Optional[str] = None, job_id: Optional[int] = None, + before_job_id: Optional[int] = None, ) -> list[Path]: """ Find log directories based on filtering criteria. @@ -306,6 +307,8 @@ def find_matching_dirs( Filter to only match model names. job_id : int, optional Filter to only match this exact SLURM job ID. + before_job_id : int, optional + Filter to only include job IDs less than this value. Returns ------- @@ -317,17 +320,16 @@ def find_matching_dirs( if not log_dir.exists() or not log_dir.is_dir(): raise FileNotFoundError(f"Log directory does not exist: {log_dir}") - if not model_family and not model_name and not job_id: + if not model_family and not model_name and not job_id and not before_job_id: return [log_dir] - # Iterate over model families for family_dir in log_dir.iterdir(): if not family_dir.is_dir(): continue if model_family and family_dir.name != model_family: continue - if model_family and not model_name and not job_id: + if model_family and not model_name and not job_id and not before_job_id: return [family_dir] for job_dir in family_dir.iterdir(): @@ -344,6 +346,9 @@ def find_matching_dirs( continue if job_id is not None and parsed_id != job_id: continue + if before_job_id is not None and parsed_id >= before_job_id: + continue matched.append(job_dir) + return matched diff --git a/vec_inf/client/api.py b/vec_inf/client/api.py index 51c46b7..fa05d2d 100644 --- a/vec_inf/client/api.py +++ b/vec_inf/client/api.py @@ -312,6 +312,7 @@ def cleanup_logs( model_family: Optional[str] = None, model_name: Optional[str] = None, job_id: Optional[int] = None, + before_job_id: Optional[int] = None, dry_run: bool = False, ) -> list[Path]: """Remove logs from the log directory. @@ -326,6 +327,8 @@ def cleanup_logs( Only delete logs for this model name. job_id : int, optional If provided, only match directories with this exact SLURM job ID. + before_job_id : int, optional + If provided, only delete logs with job ID less than this value. dry_run : bool If True, return matching files without deleting them. @@ -340,6 +343,7 @@ def cleanup_logs( model_family=model_family, model_name=model_name, job_id=job_id, + before_job_id=before_job_id, ) if dry_run: From 28401a24fe8af056828706cb7dce7cfd651223b5 Mon Sep 17 00:00:00 2001 From: kohankhaki Date: Tue, 13 May 2025 22:42:51 -0400 Subject: [PATCH 3/4] tests added for find_matching_dirs. --- tests/vec_inf/client/test_utils.py | 133 +++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/tests/vec_inf/client/test_utils.py b/tests/vec_inf/client/test_utils.py index 930d6a1..0cdc2a0 100644 --- a/tests/vec_inf/client/test_utils.py +++ b/tests/vec_inf/client/test_utils.py @@ -8,6 +8,7 @@ from vec_inf.client._utils import ( MODEL_READY_SIGNATURE, + find_matching_dirs, get_base_url, is_server_running, load_config, @@ -208,3 +209,135 @@ def test_load_config_invalid_user_model(tmp_path): assert "validation error" in str(excinfo.value).lower() assert "model_type" in str(excinfo.value) assert "num_gpus" in str(excinfo.value) + + +def test_find_matching_dirs_only_model_family(tmp_path): + """Return model_family directory when only model_family is provided.""" + fam_dir = tmp_path / "famA" + fam_dir.mkdir() + (fam_dir / "modelA.1").mkdir() + (fam_dir / "modelB.2").mkdir() + + other_dir = tmp_path / "famB" + other_dir.mkdir() + (other_dir / "modelC.3").mkdir() + + matches = find_matching_dirs(log_dir=tmp_path, model_family="famA") + assert len(matches) == 1 + assert matches[0].name == "famA" + + +def test_find_matching_dirs_only_model_name(tmp_path): + """Return directories matching when only model_name is provided.""" + fam_a = tmp_path / "famA" + fam_a.mkdir() + (fam_a / "target.1").mkdir() + (fam_a / "other.2").mkdir() + + fam_b = tmp_path / "famB" + fam_b.mkdir() + (fam_b / "different.3").mkdir() + + matches = find_matching_dirs(log_dir=tmp_path, model_name="target") + result_names = [p.name for p in matches] + + assert "target.1" in result_names + assert "other.2" not in result_names + assert "different.3" not in result_names + + +def test_find_matching_dirs_only_job_id(tmp_path): + """Return directories matching exact job_id.""" + fam_dir = tmp_path / "fam" + fam_dir.mkdir() + (fam_dir / "modelA.10").mkdir() + (fam_dir / "modelB.20").mkdir() + (fam_dir / "modelC.30").mkdir() + + matches = find_matching_dirs(log_dir=tmp_path, job_id=10) + result_names = [p.name for p in matches] + + assert "modelA.10" in result_names + assert "modelB.20" not in result_names + assert "modelC.30" not in result_names + + +def test_find_matching_dirs_only_before_job_id(tmp_path): + """Return directories with job_id < before_job_id.""" + fam_dir = tmp_path / "famA" + fam_dir.mkdir() + (fam_dir / "modelA.1").mkdir() + (fam_dir / "modelA.5").mkdir() + (fam_dir / "modelA.100").mkdir() + + fam_dir = tmp_path / "famB" + fam_dir.mkdir() + (fam_dir / "modelB.30").mkdir() + + matches = find_matching_dirs(log_dir=tmp_path, before_job_id=50) + result_names = [p.name for p in matches] + + assert "modelA.1" in result_names + assert "modelA.5" in result_names + assert "modelA.100" not in result_names + assert "modelB.30" in result_names + + +def test_find_matching_dirs_family_and_before_job_id(tmp_path): + """Return directories under a given family with job IDs less than before_job_id.""" + fam_dir = tmp_path / "targetfam" + fam_dir.mkdir() + (fam_dir / "modelA.10").mkdir() + (fam_dir / "modelA.20").mkdir() + (fam_dir / "modelA.99").mkdir() + (fam_dir / "modelA.150").mkdir() + + other_fam = tmp_path / "otherfam" + other_fam.mkdir() + (other_fam / "modelB.5").mkdir() + (other_fam / "modelB.10").mkdir() + (other_fam / "modelB.100").mkdir() + + matches = find_matching_dirs( + log_dir=tmp_path, + model_family="targetfam", + before_job_id=100, + ) + + result_names = [p.name for p in matches] + + assert "modelA.10" in result_names + assert "modelA.20" in result_names + assert "modelA.99" in result_names + assert "modelA.150" not in result_names + assert all("otherfam" not in str(p) for p in matches) + + +def test_find_matching_dirs_with_family_model_name_and_before_job_id(tmp_path): + """Return matching dirs with model_family, model_name, and before_job_id filters.""" + fam_dir = tmp_path / "targetfam" + fam_dir.mkdir() + (fam_dir / "modelA.1").mkdir() + (fam_dir / "modelA.50").mkdir() + (fam_dir / "modelA.150").mkdir() + (fam_dir / "modelB.40").mkdir() + + other_fam = tmp_path / "otherfam" + other_fam.mkdir() + (other_fam / "modelC.20").mkdir() + + matches = find_matching_dirs( + log_dir=tmp_path, + model_family="targetfam", + model_name="modelA", + before_job_id=100, + ) + + result_names = [p.name for p in matches] + + assert "modelA.1" in result_names + assert "modelA.50" in result_names + assert "modelA.150" not in result_names + assert "modelB.40" not in result_names + assert all("modelB" not in p for p in result_names) + assert all("otherfam" not in str(p) for p in matches) From 7d79d765751f5024b75bed9ec85656c2a347e7b4 Mon Sep 17 00:00:00 2001 From: kohankhaki Date: Tue, 13 May 2025 23:12:40 -0400 Subject: [PATCH 4/4] added tests for cleanup in api and cli, updated var names in utils. --- tests/vec_inf/cli/test_cli.py | 74 +++++++++++++++++++++++ tests/vec_inf/client/test_api.py | 71 ++++++++++++++++++++++ tests/vec_inf/client/test_utils.py | 94 +++++++++++++++--------------- 3 files changed, 192 insertions(+), 47 deletions(-) diff --git a/tests/vec_inf/cli/test_cli.py b/tests/vec_inf/cli/test_cli.py index 155b091..ed7b721 100644 --- a/tests/vec_inf/cli/test_cli.py +++ b/tests/vec_inf/cli/test_cli.py @@ -531,3 +531,77 @@ def test_metrics_command_request_failed( in result.output ) assert "Connection refused" in result.output + + +def test_cli_cleanup_logs_dry_run(runner, tmp_path): + """Test CLI cleanup command in dry-run mode.""" + model_dir = tmp_path / "fam_a" / "model_a.123" + model_dir.mkdir(parents=True) + + result = runner.invoke( + cli, + [ + "cleanup", + "--log-dir", + str(tmp_path), + "--model-family", + "fam_a", + "--model-name", + "model_a", + "--dry-run", + ], + ) + + assert result.exit_code == 0 + assert "would be deleted" in result.output + assert "model_a.123" in result.output + + +def test_cli_cleanup_logs_delete(tmp_path): + """Test cleanup_logs CLI deletes matching directories when not in dry-run mode.""" + fam_dir = tmp_path / "fam_a" + fam_dir.mkdir() + (fam_dir / "model_a.1").mkdir() + + runner = CliRunner() + result = runner.invoke( + cli, + [ + "cleanup", + "--log-dir", + str(tmp_path), + "--model-family", + "fam_a", + "--model-name", + "model_a", + "--job-id", + "1", + ], + ) + + assert result.exit_code == 0 + assert "Deleted 1 log directory" in result.output + assert not (fam_dir / "model_a.1").exists() + + +def test_cli_cleanup_logs_no_match(tmp_path): + """Test cleanup_logs CLI when no directories match the filters.""" + fam_dir = tmp_path / "fam_a" + fam_dir.mkdir() + (fam_dir / "model_a.1").mkdir() + + runner = CliRunner() + result = runner.invoke( + cli, + [ + "cleanup", + "--log-dir", + str(tmp_path), + "--model-family", + "fam_b", + ], + ) + + assert result.exit_code == 0 + assert "No matching log directories were deleted." in result.output + assert (fam_dir / "model_a.1").exists() diff --git a/tests/vec_inf/client/test_api.py b/tests/vec_inf/client/test_api.py index 74dc398..3cdb2c1 100644 --- a/tests/vec_inf/client/test_api.py +++ b/tests/vec_inf/client/test_api.py @@ -128,3 +128,74 @@ def test_wait_until_ready(): assert result.server_status == ModelStatus.READY assert result.base_url == "http://gpu123:8080/v1" assert mock_status.call_count == 2 + + +def test_cleanup_logs_no_match(tmp_path): + """Test when cleanup_logs returns empty list.""" + fam_a = tmp_path / "fam_a" + model_a = fam_a / "model_a.999" + model_a.mkdir(parents=True) + + client = VecInfClient() + deleted = client.cleanup_logs( + log_dir=tmp_path, + model_family="fam_b", + dry_run=False, + ) + + assert deleted == [] + assert fam_a.exists() + assert model_a.exists() + + +def test_cleanup_logs_deletes_matching_dirs(tmp_path): + """Test that cleanup_logs deletes model directories matching filters.""" + fam_a = tmp_path / "fam_a" + fam_a.mkdir() + + model_a_1 = fam_a / "model_a.10" + model_a_2 = fam_a / "model_a.20" + model_b = fam_a / "model_b.30" + + model_a_1.mkdir() + model_a_2.mkdir() + model_b.mkdir() + + client = VecInfClient() + deleted = client.cleanup_logs( + log_dir=tmp_path, + model_family="fam_a", + model_name="model_a", + before_job_id=15, + dry_run=False, + ) + + assert deleted == [model_a_1] + assert not model_a_1.exists() + assert model_a_2.exists() + assert model_b.exists() + + +def test_cleanup_logs_matching_dirs_dry_run(tmp_path): + """Test that cleanup_logs find model directories matching filters.""" + fam_a = tmp_path / "fam_a" + fam_a.mkdir() + + model_a_1 = fam_a / "model_a.10" + model_a_2 = fam_a / "model_a.20" + + model_a_1.mkdir() + model_a_2.mkdir() + + client = VecInfClient() + deleted = client.cleanup_logs( + log_dir=tmp_path, + model_family="fam_a", + model_name="model_a", + before_job_id=15, + dry_run=True, + ) + + assert deleted == [model_a_1] + assert model_a_1.exists() + assert model_a_2.exists() diff --git a/tests/vec_inf/client/test_utils.py b/tests/vec_inf/client/test_utils.py index 0cdc2a0..f13baa1 100644 --- a/tests/vec_inf/client/test_utils.py +++ b/tests/vec_inf/client/test_utils.py @@ -213,28 +213,28 @@ def test_load_config_invalid_user_model(tmp_path): def test_find_matching_dirs_only_model_family(tmp_path): """Return model_family directory when only model_family is provided.""" - fam_dir = tmp_path / "famA" + fam_dir = tmp_path / "fam_a" fam_dir.mkdir() - (fam_dir / "modelA.1").mkdir() - (fam_dir / "modelB.2").mkdir() + (fam_dir / "model_a.1").mkdir() + (fam_dir / "model_b.2").mkdir() - other_dir = tmp_path / "famB" + other_dir = tmp_path / "fam_b" other_dir.mkdir() - (other_dir / "modelC.3").mkdir() + (other_dir / "model_c.3").mkdir() - matches = find_matching_dirs(log_dir=tmp_path, model_family="famA") + matches = find_matching_dirs(log_dir=tmp_path, model_family="fam_a") assert len(matches) == 1 - assert matches[0].name == "famA" + assert matches[0].name == "fam_a" def test_find_matching_dirs_only_model_name(tmp_path): """Return directories matching when only model_name is provided.""" - fam_a = tmp_path / "famA" + fam_a = tmp_path / "fam_a" fam_a.mkdir() (fam_a / "target.1").mkdir() (fam_a / "other.2").mkdir() - fam_b = tmp_path / "famB" + fam_b = tmp_path / "fam_b" fam_b.mkdir() (fam_b / "different.3").mkdir() @@ -250,53 +250,53 @@ def test_find_matching_dirs_only_job_id(tmp_path): """Return directories matching exact job_id.""" fam_dir = tmp_path / "fam" fam_dir.mkdir() - (fam_dir / "modelA.10").mkdir() - (fam_dir / "modelB.20").mkdir() - (fam_dir / "modelC.30").mkdir() + (fam_dir / "model_a.10").mkdir() + (fam_dir / "model_b.20").mkdir() + (fam_dir / "model_c.30").mkdir() matches = find_matching_dirs(log_dir=tmp_path, job_id=10) result_names = [p.name for p in matches] - assert "modelA.10" in result_names - assert "modelB.20" not in result_names - assert "modelC.30" not in result_names + assert "model_a.10" in result_names + assert "model_b.20" not in result_names + assert "model_c.30" not in result_names def test_find_matching_dirs_only_before_job_id(tmp_path): """Return directories with job_id < before_job_id.""" - fam_dir = tmp_path / "famA" + fam_dir = tmp_path / "fam_a" fam_dir.mkdir() - (fam_dir / "modelA.1").mkdir() - (fam_dir / "modelA.5").mkdir() - (fam_dir / "modelA.100").mkdir() + (fam_dir / "model_a.1").mkdir() + (fam_dir / "model_a.5").mkdir() + (fam_dir / "model_a.100").mkdir() - fam_dir = tmp_path / "famB" + fam_dir = tmp_path / "fam_b" fam_dir.mkdir() - (fam_dir / "modelB.30").mkdir() + (fam_dir / "model_b.30").mkdir() matches = find_matching_dirs(log_dir=tmp_path, before_job_id=50) result_names = [p.name for p in matches] - assert "modelA.1" in result_names - assert "modelA.5" in result_names - assert "modelA.100" not in result_names - assert "modelB.30" in result_names + assert "model_a.1" in result_names + assert "model_a.5" in result_names + assert "model_a.100" not in result_names + assert "model_b.30" in result_names def test_find_matching_dirs_family_and_before_job_id(tmp_path): """Return directories under a given family with job IDs less than before_job_id.""" fam_dir = tmp_path / "targetfam" fam_dir.mkdir() - (fam_dir / "modelA.10").mkdir() - (fam_dir / "modelA.20").mkdir() - (fam_dir / "modelA.99").mkdir() - (fam_dir / "modelA.150").mkdir() + (fam_dir / "model_a.10").mkdir() + (fam_dir / "model_a.20").mkdir() + (fam_dir / "model_a.99").mkdir() + (fam_dir / "model_a.150").mkdir() other_fam = tmp_path / "otherfam" other_fam.mkdir() - (other_fam / "modelB.5").mkdir() - (other_fam / "modelB.10").mkdir() - (other_fam / "modelB.100").mkdir() + (other_fam / "model_b.5").mkdir() + (other_fam / "model_b.10").mkdir() + (other_fam / "model_b.100").mkdir() matches = find_matching_dirs( log_dir=tmp_path, @@ -306,10 +306,10 @@ def test_find_matching_dirs_family_and_before_job_id(tmp_path): result_names = [p.name for p in matches] - assert "modelA.10" in result_names - assert "modelA.20" in result_names - assert "modelA.99" in result_names - assert "modelA.150" not in result_names + assert "model_a.10" in result_names + assert "model_a.20" in result_names + assert "model_a.99" in result_names + assert "model_a.150" not in result_names assert all("otherfam" not in str(p) for p in matches) @@ -317,27 +317,27 @@ def test_find_matching_dirs_with_family_model_name_and_before_job_id(tmp_path): """Return matching dirs with model_family, model_name, and before_job_id filters.""" fam_dir = tmp_path / "targetfam" fam_dir.mkdir() - (fam_dir / "modelA.1").mkdir() - (fam_dir / "modelA.50").mkdir() - (fam_dir / "modelA.150").mkdir() - (fam_dir / "modelB.40").mkdir() + (fam_dir / "model_a.1").mkdir() + (fam_dir / "model_a.50").mkdir() + (fam_dir / "model_a.150").mkdir() + (fam_dir / "model_b.40").mkdir() other_fam = tmp_path / "otherfam" other_fam.mkdir() - (other_fam / "modelC.20").mkdir() + (other_fam / "model_c.20").mkdir() matches = find_matching_dirs( log_dir=tmp_path, model_family="targetfam", - model_name="modelA", + model_name="model_a", before_job_id=100, ) result_names = [p.name for p in matches] - assert "modelA.1" in result_names - assert "modelA.50" in result_names - assert "modelA.150" not in result_names - assert "modelB.40" not in result_names - assert all("modelB" not in p for p in result_names) + assert "model_a.1" in result_names + assert "model_a.50" in result_names + assert "model_a.150" not in result_names + assert "model_b.40" not in result_names + assert all("model_b" not in p for p in result_names) assert all("otherfam" not in str(p) for p in matches)