From 149d23cd49d34504c3d59223b009176a61e51159 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Thu, 26 Feb 2026 23:02:55 +0530 Subject: [PATCH 1/3] [MNT] Diagnose and address long test runtimes (#1633) - Add global per-test timeout (600s) to pytest config - CI: report all test durations (--durations=0) for diagnosis - CI: add explicit --timeout=600 to prevent hanging tests - Optimize verify_cache_state fixture: scope function -> module - Add scripts/profile_tests.sh for local duration profiling --- .github/workflows/test.yml | 6 +++--- pyproject.toml | 1 + scripts/profile_tests.sh | 27 +++++++++++++++++++++++++++ tests/conftest.py | 10 ++++++---- 4 files changed, 37 insertions(+), 7 deletions(-) create mode 100755 scripts/profile_tests.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dc0995fc6..44fccc2e7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -153,7 +153,7 @@ jobs: marks="not production_server" fi - pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Ubuntu Production if: matrix.os == 'ubuntu-latest' @@ -171,14 +171,14 @@ jobs: marks="production_server" fi - pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Windows if: matrix.os == 'windows-latest' env: OPENML_TEST_SERVER_ADMIN_KEY: ${{ secrets.OPENML_TEST_SERVER_ADMIN_KEY }} run: | # we need a separate step because of the bash-specific if-statement in the previous one. - pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server" + pytest -n 4 --durations=0 --timeout=600 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server" - name: Upload coverage if: matrix.code-cov && always() diff --git a/pyproject.toml b/pyproject.toml index 8c463968b..91235ba04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -130,6 +130,7 @@ log_level="DEBUG" testpaths = ["tests"] minversion = "7.0" xfail_strict = true +timeout = 600 filterwarnings=[ "ignore:the matrix subclass:PendingDeprecationWarning" ] diff --git a/scripts/profile_tests.sh b/scripts/profile_tests.sh new file mode 100755 index 000000000..593700cff --- /dev/null +++ b/scripts/profile_tests.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Profile test durations to diagnose slow tests (Issue #1633) +# Usage: ./scripts/profile_tests.sh [marker_filter] +# +# Examples: +# ./scripts/profile_tests.sh # non-server tests +# ./scripts/profile_tests.sh "production_server" # production server tests only +# ./scripts/profile_tests.sh "sklearn" # sklearn tests only + +set -euo pipefail + +MARKER_FILTER="${1:-not production_server and not test_server}" + +echo "=== OpenML Test Duration Profiler ===" +echo "Marker filter: $MARKER_FILTER" +echo "Timeout per test: 300s" +echo "" + +pytest \ + --durations=0 \ + --timeout=300 \ + -q \ + -m "$MARKER_FILTER" \ + 2>&1 | tee test_durations_report.txt + +echo "" +echo "=== Report saved to test_durations_report.txt ===" diff --git a/tests/conftest.py b/tests/conftest.py index 1359e6247..bbb486b3d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -106,7 +106,8 @@ def delete_remote_files(tracker, flow_names) -> None: if "flow" in tracker: to_sort = list(zip(tracker["flow"], flow_names)) flow_deletion_order = [ - entity_id for entity_id, _ in sorted(to_sort, key=lambda x: len(x[1]), reverse=True) + entity_id + for entity_id, _ in sorted(to_sort, key=lambda x: len(x[1]), reverse=True) ] tracker["flow"] = [flow_deletion_order[1] for flow_id, _ in flow_deletion_order] @@ -275,7 +276,7 @@ def test_apikey_v2() -> str: return openml.config.get_test_servers()[APIVersion.V2]["apikey"] -@pytest.fixture(autouse=True, scope="function") +@pytest.fixture(autouse=True, scope="module") def verify_cache_state(test_files_directory) -> Iterator[None]: assert_static_test_cache_correct(test_files_directory) yield @@ -324,11 +325,12 @@ def with_test_cache(test_files_directory, request): openml.config.set_root_cache_directory(_root_cache_directory) if tmp_cache.exists(): shutil.rmtree(tmp_cache) - + @pytest.fixture def static_cache_dir(): - return Path(__file__).parent / "files" + return Path(__file__).parent / "files" + @pytest.fixture def workdir(tmp_path): From c8fd9a9c6a81f498c24c8d977c6087078f769538 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Sun, 1 Mar 2026 21:36:51 +0530 Subject: [PATCH 2/3] Address review feedback: revert CI/conftest changes, improve profile script - Revert CI workflow to original --durations=20 (no timeout) - Remove global timeout from pyproject.toml - Revert conftest.py verify_cache_state scope to function - Update profile_tests.sh: accept CLI args (-m, -d, -t, -o) with defaults --- .github/workflows/test.yml | 6 +++--- pyproject.toml | 1 - scripts/profile_tests.sh | 44 +++++++++++++++++++++++++++++--------- tests/conftest.py | 2 +- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 44fccc2e7..dc0995fc6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -153,7 +153,7 @@ jobs: marks="not production_server" fi - pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Ubuntu Production if: matrix.os == 'ubuntu-latest' @@ -171,14 +171,14 @@ jobs: marks="production_server" fi - pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Windows if: matrix.os == 'windows-latest' env: OPENML_TEST_SERVER_ADMIN_KEY: ${{ secrets.OPENML_TEST_SERVER_ADMIN_KEY }} run: | # we need a separate step because of the bash-specific if-statement in the previous one. - pytest -n 4 --durations=0 --timeout=600 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server" + pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server" - name: Upload coverage if: matrix.code-cov && always() diff --git a/pyproject.toml b/pyproject.toml index 91235ba04..8c463968b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -130,7 +130,6 @@ log_level="DEBUG" testpaths = ["tests"] minversion = "7.0" xfail_strict = true -timeout = 600 filterwarnings=[ "ignore:the matrix subclass:PendingDeprecationWarning" ] diff --git a/scripts/profile_tests.sh b/scripts/profile_tests.sh index 593700cff..88e6f0ad7 100755 --- a/scripts/profile_tests.sh +++ b/scripts/profile_tests.sh @@ -1,27 +1,51 @@ #!/bin/bash # Profile test durations to diagnose slow tests (Issue #1633) -# Usage: ./scripts/profile_tests.sh [marker_filter] +# +# Usage: ./scripts/profile_tests.sh [options] +# +# Options: +# -m MARKER Pytest marker filter (default: "not production_server and not test_server") +# -d DURATION Number of slowest durations to show, 0 for all (default: 20) +# -t TIMEOUT Per-test timeout in seconds (default: 300) +# -o OUTPUT Output file path for the report (default: test_durations_report.txt) # # Examples: -# ./scripts/profile_tests.sh # non-server tests -# ./scripts/profile_tests.sh "production_server" # production server tests only -# ./scripts/profile_tests.sh "sklearn" # sklearn tests only +# ./scripts/profile_tests.sh +# ./scripts/profile_tests.sh -m "production_server" -d 0 -t 600 +# ./scripts/profile_tests.sh -m "sklearn" -o sklearn_report.txt set -euo pipefail -MARKER_FILTER="${1:-not production_server and not test_server}" +# Default values +MARKER_FILTER="not production_server and not test_server" +DURATIONS=20 +TIMEOUT=300 +OUTPUT_FILE="test_durations_report.txt" + +# Parse command line arguments +while getopts "m:d:t:o:" opt; do + case $opt in + m) MARKER_FILTER="$OPTARG" ;; + d) DURATIONS="$OPTARG" ;; + t) TIMEOUT="$OPTARG" ;; + o) OUTPUT_FILE="$OPTARG" ;; + *) echo "Usage: $0 [-m marker] [-d durations] [-t timeout] [-o output_file]" && exit 1 ;; + esac +done echo "=== OpenML Test Duration Profiler ===" echo "Marker filter: $MARKER_FILTER" -echo "Timeout per test: 300s" +echo "Durations to show: $DURATIONS" +echo "Timeout per test: ${TIMEOUT}s" +echo "Output file: $OUTPUT_FILE" echo "" pytest \ - --durations=0 \ - --timeout=300 \ + --durations="$DURATIONS" \ + --timeout="$TIMEOUT" \ -q \ -m "$MARKER_FILTER" \ - 2>&1 | tee test_durations_report.txt + 2>&1 | tee "$OUTPUT_FILE" echo "" -echo "=== Report saved to test_durations_report.txt ===" +echo "=== Report saved to $OUTPUT_FILE ===" diff --git a/tests/conftest.py b/tests/conftest.py index bbb486b3d..03aaafe2d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -276,7 +276,7 @@ def test_apikey_v2() -> str: return openml.config.get_test_servers()[APIVersion.V2]["apikey"] -@pytest.fixture(autouse=True, scope="module") +@pytest.fixture(autouse=True, scope="function") def verify_cache_state(test_files_directory) -> Iterator[None]: assert_static_test_cache_correct(test_files_directory) yield From 144cee960de36430c6b4088f79f12a4854fff39e Mon Sep 17 00:00:00 2001 From: Abhishek Date: Sun, 1 Mar 2026 22:11:47 +0530 Subject: [PATCH 3/3] Update profile_tests.sh: add -n workers, --dist=load, remove -q - Add -n flag for parallel workers (default: 4) - Add --dist=load to distribute tests across workers - Remove -q flag for full pytest output - Mimics exact pytest command used in CI --- scripts/profile_tests.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/profile_tests.sh b/scripts/profile_tests.sh index 88e6f0ad7..05a8cd2fe 100755 --- a/scripts/profile_tests.sh +++ b/scripts/profile_tests.sh @@ -7,12 +7,13 @@ # -m MARKER Pytest marker filter (default: "not production_server and not test_server") # -d DURATION Number of slowest durations to show, 0 for all (default: 20) # -t TIMEOUT Per-test timeout in seconds (default: 300) +# -n WORKERS Number of parallel workers (default: 4) # -o OUTPUT Output file path for the report (default: test_durations_report.txt) # # Examples: # ./scripts/profile_tests.sh # ./scripts/profile_tests.sh -m "production_server" -d 0 -t 600 -# ./scripts/profile_tests.sh -m "sklearn" -o sklearn_report.txt +# ./scripts/profile_tests.sh -m "sklearn" -n 2 -o sklearn_report.txt set -euo pipefail @@ -20,16 +21,18 @@ set -euo pipefail MARKER_FILTER="not production_server and not test_server" DURATIONS=20 TIMEOUT=300 +NUM_WORKERS=4 OUTPUT_FILE="test_durations_report.txt" # Parse command line arguments -while getopts "m:d:t:o:" opt; do +while getopts "m:d:t:n:o:" opt; do case $opt in m) MARKER_FILTER="$OPTARG" ;; d) DURATIONS="$OPTARG" ;; t) TIMEOUT="$OPTARG" ;; + n) NUM_WORKERS="$OPTARG" ;; o) OUTPUT_FILE="$OPTARG" ;; - *) echo "Usage: $0 [-m marker] [-d durations] [-t timeout] [-o output_file]" && exit 1 ;; + *) echo "Usage: $0 [-m marker] [-d durations] [-t timeout] [-n workers] [-o output_file]" && exit 1 ;; esac done @@ -37,13 +40,15 @@ echo "=== OpenML Test Duration Profiler ===" echo "Marker filter: $MARKER_FILTER" echo "Durations to show: $DURATIONS" echo "Timeout per test: ${TIMEOUT}s" +echo "Workers: $NUM_WORKERS" echo "Output file: $OUTPUT_FILE" echo "" pytest \ + --dist=load \ + -n="$NUM_WORKERS" \ --durations="$DURATIONS" \ --timeout="$TIMEOUT" \ - -q \ -m "$MARKER_FILTER" \ 2>&1 | tee "$OUTPUT_FILE"