From 36ce47d4e7e5535859386937f19387abad616b3e Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 5 Jun 2026 17:30:46 +0200 Subject: [PATCH 1/3] Support fsspec specifiers for ScriptCreator NBs Addresses #42 --- docs/xcetool.md | 9 ++++++--- environment.yml | 1 + pyproject.toml | 1 + test/test_core.py | 10 ++++++++++ xcengine/core.py | 13 ++++++++++--- 5 files changed, 28 insertions(+), 6 deletions(-) diff --git a/docs/xcetool.md b/docs/xcetool.md index dece420..fb26634 100644 --- a/docs/xcetool.md +++ b/docs/xcetool.md @@ -13,9 +13,12 @@ details on usage and available options. Usage: `xcetool image build [OPTIONS] NOTEBOOK` -This is the main `xcetool` subcommand: it builds a container image from a supplied -notebook and environment file. If given the `--eoap` argument, it also generates -a CWL file defining a corresponding application package. +This is the main `xcetool` subcommand: it builds a container image from a +supplied notebook and environment file. If given the `--eoap` argument, it also +generates a CWL file defining a corresponding application package. The +NOTEBOOK argument can be a path to a local file, a URL, or any other string +which can be parsed by the [fsspec](https://filesystem-spec.readthedocs.io/) +library. Options: diff --git a/environment.yml b/environment.yml index 13a1707..b551bd1 100644 --- a/environment.yml +++ b/environment.yml @@ -18,6 +18,7 @@ dependencies: - cwltool - pytest - pytest-cov + - pytest-httpserver - pytz # Note: xcube is not required for the conversion itself, but is required diff --git a/pyproject.toml b/pyproject.toml index 2dbcc8d..99eee6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ dev = [ "cwltool", "pytest", "pytest-cov", + "pytest-httpserver", "pytz" ] doc = [ diff --git a/test/test_core.py b/test/test_core.py index d6a6255..c27c60e 100644 --- a/test/test_core.py +++ b/test/test_core.py @@ -351,6 +351,16 @@ def test_script_creator_notebook_config(): assert config["container_image_tag"] == "my-tag" +def test_script_creator_notebook_config_http(httpserver): + http_path = "/mynotebook.ipynb" + nb_path = pathlib.Path(__file__).parent / "data" / "paramtest.ipynb" + httpserver.expect_request(http_path).respond_with_data(nb_path.read_bytes()) + script_creator = ScriptCreator(httpserver.url_for(http_path)) + config = script_creator.nb_params.config + assert config["environment_file"] == "my-environment.yml" + assert config["container_image_tag"] == "my-tag" + + def test_image_builder_notebook_config(tmp_path): nb_path = pathlib.Path(__file__).parent / "data" / "paramtest.ipynb" image_builder = ImageBuilder(nb_path, None, tmp_path, None) diff --git a/xcengine/core.py b/xcengine/core.py index 41c9332..dc3ac02 100644 --- a/xcengine/core.py +++ b/xcengine/core.py @@ -23,6 +23,7 @@ from docker.errors import BuildError from docker.models.containers import Container from docker.models.images import Image +import fsspec import nbconvert import nbformat import yaml @@ -37,13 +38,19 @@ class ScriptCreator: """Turn a Jupyter notebook into a set of scripts""" - nb_path: pathlib.Path + nb_path: pathlib.Path | str notebook: nbformat.NotebookNode nb_params: NotebookParameters = NotebookParameters({}) - def __init__(self, nb_path: pathlib.Path): + def __init__(self, nb_path: pathlib.Path | str): + """ + Instantiate a ScriptCreator for a specified notebook + + :param nb_path: filesystem path or fsspec-parseable specifier + (e.g. HTTP URL) to the input notebook + """ self.nb_path = nb_path - with open(nb_path) as fh: + with fsspec.open(str(nb_path)) as fh: self.notebook = nbformat.read(fh, as_version=4) self.process_params_cell() From 9dc22b535d3a054c34b76beb70c850c300442e69 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 5 Jun 2026 18:05:29 +0200 Subject: [PATCH 2/3] Support fsspec specifiers for ImageBuilder envs Addresses #42 --- docs/xcetool.md | 12 +++++++----- test/test_core.py | 35 ++++++++++++++++++++++++++--------- xcengine/core.py | 8 ++++---- 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/docs/xcetool.md b/docs/xcetool.md index fb26634..42d2724 100644 --- a/docs/xcetool.md +++ b/docs/xcetool.md @@ -6,8 +6,8 @@ The command-line interface to xcengine is the command `xcetool`, which implements multiple subcommands and options for building and running container images and Application Packages. -You can use the `--help` flag for any `xcetool` command or subcommand to get more -details on usage and available options. +You can use the `--help` flag for any `xcetool` command or subcommand to get +more details on usage and available options. ### `xcetool image build` @@ -16,9 +16,9 @@ Usage: `xcetool image build [OPTIONS] NOTEBOOK` This is the main `xcetool` subcommand: it builds a container image from a supplied notebook and environment file. If given the `--eoap` argument, it also generates a CWL file defining a corresponding application package. The -NOTEBOOK argument can be a path to a local file, a URL, or any other string -which can be parsed by the [fsspec](https://filesystem-spec.readthedocs.io/) -library. +NOTEBOOK argument can be a path to a local file, an HTTP URL, or any other +string which can be parsed by the +[fsspec](https://filesystem-spec.readthedocs.io/) library. Options: @@ -28,6 +28,8 @@ Options: This option is mainly useful for debugging. - `-e`, `--environment` `FILE`: Conda environment file to use in Docker image. + This can be a path to a local file, an HTTP URL, or any other string which + can be parsed by the fsspec library. If no environment file is specified here or in the notebook, xcetool will look for a file called `environment.yml` in the notebook's directory. If all else fails, diff --git a/test/test_core.py b/test/test_core.py index c27c60e..29347af 100644 --- a/test/test_core.py +++ b/test/test_core.py @@ -380,24 +380,41 @@ def test_image_builder_write_dockerfile(tmp_path): @patch("docker.from_env") -@pytest.mark.parametrize("set_env", [False, True]) +@pytest.mark.parametrize("env_type", ["none", "local", "http"]) @pytest.mark.parametrize("skip_build", [False, True]) -def test_image_builder_build_dir(from_env_mock, tmp_path, set_env, skip_build): +def test_image_builder_build_dir( + from_env_mock, + tmp_path, + httpserver, + env_type, + skip_build +): client_mock = Mock(docker.client.DockerClient) client_mock.images.build.return_value = None, None from_env_mock.return_value = client_mock build_dir = tmp_path / "build" - env_path = tmp_path / "env2.yaml" + build_env_path = tmp_path / "env2.yaml" env_def = { "name": "foo", "channels": "bar", "dependencies": ["python >=3.13", "baz >=42.0"], } - env_path.write_text(yaml.safe_dump(env_def)) + build_env_path.write_text(yaml.safe_dump(env_def)) + env_http = "/env2.yaml" + + match env_type: + case "none": env_param = None + case "local": env_param = build_env_path + case "http": + httpserver.expect_request(env_http).respond_with_data(build_env_path.read_bytes()) + env_param = httpserver.url_for(env_http) + case _: + raise RuntimeError(f"Unknown env type {env_type}") + image_builder = ImageBuilder( pathlib.Path(__file__).parent / "data" / "noparamtest.ipynb", - env_path if set_env else None, + env_param, build_dir, None, ) @@ -406,11 +423,11 @@ def test_image_builder_build_dir(from_env_mock, tmp_path, set_env, skip_build): from_env_mock.assert_not_called() else: client_mock.images.build.assert_called() - env_path = build_dir / "environment.yml" - assert env_path.is_file() - output_env = yaml.safe_load(env_path.read_text()) + build_env_path = build_dir / "environment.yml" + assert build_env_path.is_file() + output_env = yaml.safe_load(build_env_path.read_text()) assert {"name", "channels", "dependencies"} <= set(output_env) - if set_env: + if env_type != "none": assert output_env["name"] == env_def["name"] assert output_env["channels"] == env_def["channels"] assert set(output_env["dependencies"]) >= set(env_def["dependencies"]) diff --git a/xcengine/core.py b/xcengine/core.py index dc3ac02..5cbd315 100644 --- a/xcengine/core.py +++ b/xcengine/core.py @@ -187,12 +187,12 @@ class ImageBuilder: """ tag_format: ClassVar[str] = "%Y.%m.%d.%H.%M.%S" - environment: pathlib.Path | None = None + environment: pathlib.Path | str | None = None def __init__( self, - notebook: pathlib.Path, - environment: pathlib.Path | None, + notebook: pathlib.Path | str, + environment: pathlib.Path | str | None, build_dir: pathlib.Path, tag: str | None, ): @@ -244,7 +244,7 @@ def build( ) -> Image | None: self.script_creator.convert_notebook_to_script(self.build_dir) if self.environment: - with open(self.environment, "r") as fh: + with fsspec.open(self.environment, "r") as fh: env_def = yaml.safe_load(fh) else: LOGGER.warning( From 86f76e691c4488f79dc97755f24c21e15e1f57b5 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 5 Jun 2026 18:34:36 +0200 Subject: [PATCH 3/3] Update changelog --- CHANGES.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 018f7bb..5b5f69b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,8 @@ ## Changes in 0.1.3 (in development) +* Allow HTTP URLs and other fsspec-supported specifiers as notebook and + environment file paths (#42) + ## Changes in 0.1.2 * Improve handling of environment file specification (#63)