From 5e0b5544a89b23d38d910572fe8b49dc527d8f5d Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 20 May 2026 16:19:34 -0500 Subject: [PATCH] ci+scripts: address review nits from #170 Follow-ups from Milagros' review of #170: - Add `.github/workflows/check-notebooks.yml` so `check_notebook_versions.py` runs on PRs that touch notebooks, `mkdocs.yaml`, or the script itself. Stale connection banners now fail the PR instead of relying on a contributor running the check manually. Kept as a separate lightweight workflow rather than wiring into `development.yml`, which only triggers on push to main (deploy). - `execute_notebooks.py`: route pre-cache warnings to stderr so a transient gitlab.com hiccup is visible in CI logs without silently re-introducing "Downloading file ..." chatter on the next refresh. Wrap the `skimage.data` import in try/except so the script degrades gracefully (and prints to stderr) when scikit-image isn't installed. - `check_notebook_versions.py`: drop the inner `break` so a notebook with multiple stale banners reports all of them, deduped via a set. --- .github/workflows/check-notebooks.yml | 19 +++++++++++++++++++ scripts/check_notebook_versions.py | 10 +++++----- scripts/execute_notebooks.py | 22 ++++++++++++++-------- 3 files changed, 38 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/check-notebooks.yml diff --git a/.github/workflows/check-notebooks.yml b/.github/workflows/check-notebooks.yml new file mode 100644 index 00000000..c0162bb4 --- /dev/null +++ b/.github/workflows/check-notebooks.yml @@ -0,0 +1,19 @@ +name: Check notebooks +on: + pull_request: + paths: + - 'src/tutorials/**/*.ipynb' + - 'src/how-to/**/*.ipynb' + - 'mkdocs.yaml' + - 'scripts/check_notebook_versions.py' + - '.github/workflows/check-notebooks.yml' +jobs: + check-versions: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Verify notebook DataJoint connection banners are current + run: python scripts/check_notebook_versions.py diff --git a/scripts/check_notebook_versions.py b/scripts/check_notebook_versions.py index 5bc9b3d9..3bfa2cf6 100755 --- a/scripts/check_notebook_versions.py +++ b/scripts/check_notebook_versions.py @@ -80,22 +80,22 @@ def main() -> int: notebooks.extend(p for p in d.rglob("*.ipynb") if ".ipynb_checkpoints" not in str(p)) notebooks.sort() - stale: list[tuple[Path, tuple[int, int, int]]] = [] + stale: dict[Path, set[tuple[int, int, int]]] = {} checked = 0 for nb in notebooks: had_banner = False for ver in iter_banner_versions(nb): had_banner = True if ver[0] != target_major or ver[1] != target_minor: - stale.append((nb.relative_to(repo), ver)) - break + stale.setdefault(nb.relative_to(repo), set()).add(ver) if had_banner: checked += 1 if stale: print(f"Stale DataJoint connection banner(s) (target: {target_major}.{target_minor}.x):") - for path, ver in stale: - print(f" {path}: found {ver[0]}.{ver[1]}.{ver[2]}") + for path, vers in sorted(stale.items()): + for ver in sorted(vers): + print(f" {path}: found {ver[0]}.{ver[1]}.{ver[2]}") print(f"\nRe-execute notebooks with MODE=EXECUTE or MODE=EXECUTE_PG.") return 1 diff --git a/scripts/execute_notebooks.py b/scripts/execute_notebooks.py index 8a30953a..fd5b7704 100644 --- a/scripts/execute_notebooks.py +++ b/scripts/execute_notebooks.py @@ -174,15 +174,21 @@ def main(): print(f" DJ_USER: {env.get('DJ_USER')}") # Pre-cache scikit-image datasets so the one-time "Downloading file ..." - # message doesn't leak into committed notebook outputs. + # message doesn't leak into committed notebook outputs. Warnings go to + # stderr so a transient gitlab.com hiccup is visible in CI logs without + # silently re-introducing the download chatter on the next refresh. print("\nPre-caching scikit-image datasets...") - import skimage.data as _sk_data - for _loader in (_sk_data.hubble_deep_field, _sk_data.human_mitosis): - try: - _loader() - print(f" cached: {_loader.__name__}") - except Exception as _e: - print(f" pre-cache warn: {_loader.__name__}: {_e}") + try: + import skimage.data as _sk_data + except ImportError as _e: + print(f" skipping pre-cache (scikit-image not installed): {_e}", file=sys.stderr) + else: + for _loader in (_sk_data.hubble_deep_field, _sk_data.human_mitosis): + try: + _loader() + print(f" cached: {_loader.__name__}") + except Exception as _e: + print(f" pre-cache warn: {_loader.__name__}: {_e}", file=sys.stderr) # Find notebooks notebooks = find_notebooks(args.base_path)