From 23763b06be1105a7d615a1f8165dcae61813f724 Mon Sep 17 00:00:00 2001 From: Charlie Laughton Date: Tue, 23 Jun 2026 12:23:01 +0800 Subject: [PATCH 1/6] Add support to FileHandling to open URLs (for reading only) --- crossflow/__init__.py | 2 +- crossflow/filehandling.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/crossflow/__init__.py b/crossflow/__init__.py index 70495fd..f276901 100644 --- a/crossflow/__init__.py +++ b/crossflow/__init__.py @@ -5,4 +5,4 @@ together command-line driven tools. """ -__version__ = "0.1.4" +__version__ = "0.1.5.dev0" diff --git a/crossflow/filehandling.py b/crossflow/filehandling.py index 5bbde0c..58a4951 100644 --- a/crossflow/filehandling.py +++ b/crossflow/filehandling.py @@ -76,8 +76,18 @@ def __init__(self, path, stage_point, must_exist=True): if not isinstance(path, (os.PathLike, str, bytes)): raise IOError(f"Error - illegal argument type {type(path)} for {path}") if must_exist: - if not os.path.exists(path): - raise IOError("Error - no such file") + if isinstance(path, str) and (path.startswith("http://") or + path.startswith("https://") or + path.startswith("ftp://")): + try: + source = fsspec.open(path) + with source as s: + s.read(1) + except Exception: + raise IOError("Error - no such file") + else: + if not os.path.exists(path): + raise IOError("Error - no such file") source = fsspec.open(path) ext = os.path.splitext(path)[1] self.path = path From 48023b73c1e4688da52b76f97936428a8d64628f Mon Sep 17 00:00:00 2001 From: Charlie Laughton Date: Tue, 23 Jun 2026 13:32:34 +0800 Subject: [PATCH 2/6] Add test for URLs in FileHandler.load --- tests/test_filehandling.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_filehandling.py b/tests/test_filehandling.py index 241b6cd..938db95 100644 --- a/tests/test_filehandling.py +++ b/tests/test_filehandling.py @@ -50,6 +50,11 @@ def test_file_protocol(tmpdir): assert pf.read_text() == "content" +def test_url_protocol(): + fh = filehandling.FileHandler() + f = fh.load('https://raw.githubusercontent.com/HECBioSim/crossflow/refs/heads/main/README.md') + + """ def test_s3_protocol(tmpdir): d = tmpdir.mkdir('sub') From 40b48f5dc4dd89008ad3f5689d99ebf21b00a031 Mon Sep 17 00:00:00 2001 From: Charlie Laughton Date: Tue, 23 Jun 2026 13:50:45 +0800 Subject: [PATCH 3/6] Improve docstrings --- crossflow/filehandling.py | 52 +++++++++++++++++++++++++++++++++++++-- crossflow/tasks.py | 41 +++++++++++++++++++++++++++--- 2 files changed, 88 insertions(+), 5 deletions(-) diff --git a/crossflow/filehandling.py b/crossflow/filehandling.py index 58a4951..a2e5675 100644 --- a/crossflow/filehandling.py +++ b/crossflow/filehandling.py @@ -16,7 +16,7 @@ filename_here = fh.save(filename_here) they inherit from os.PathLike so can be used anywhere a conventional path can -be used: +be used e.g. with open(fh) as f: ... @@ -44,6 +44,7 @@ def set_stage_point(stage_point): class FileHandler: """ Handle file operations + """ def __init__(self, stage_point=None): @@ -55,13 +56,27 @@ def __init__(self, stage_point=None): def load(self, path): """ Method to load file. + + args: + path (str): file path or URL + + returns: + FileHandle: a FileHandle object + """ return FileHandle(path, self.stage_point, must_exist=True) def create(self, path): """ - Method to load file. + Method to create a new file. + + args: + path (str): file path + + returns: + FileHandle: a FileHandle object + """ return FileHandle(path, self.stage_point, must_exist=False) @@ -70,6 +85,31 @@ def create(self, path): class FileHandle: """ A portable container for a file. + + a FileHandle object is instantiated with the path of an existing file on + an existing file system: + + fh = FileHandle('/path/to/file') + or: + fh = FileHandle('http://example.com/file.txt') + + and has a save() method that creates a local copy of that file: + + filename_here = fh.save(filename_here) + + FileHandle objects inherit from os.PathLike so can be used anywhere a + conventional path can be used: + + with open(fh) as f: + ... + + They can also be used to read and write binary and text data directly: + + data = fh.read_binary() + text = fh.read_text() + fh.write_binary(data) + fh.write_text(text) + """ def __init__(self, path, stage_point, must_exist=True): @@ -190,6 +230,10 @@ def read_text(self): def write_binary(self, data): """ A method for writing binary file formats + + args: + data (bytes): binary data to write + """ compressed_data = zlib.compress(data) @@ -204,6 +248,10 @@ def write_binary(self, data): def write_text(self, text): """ A wrapper for writing binary formatted text. + + args: + text (str): text data to write + """ self.write_binary(text.encode("utf-8")) diff --git a/crossflow/tasks.py b/crossflow/tasks.py index eefa778..54b0599 100644 --- a/crossflow/tasks.py +++ b/crossflow/tasks.py @@ -42,11 +42,19 @@ def _gen_filenames(pattern, n_files): class SubprocessTask: """ A task that runs a command-line executable + + Methods: + set_inputs: set the inputs the task requires + set_outputs: set the outputs the task produces + set_constant: set a constant for the task + run: execute the task + """ def __init__(self, template): """ - Arguments: + Initialize the SubprocessTask. + args: template (str): a template for the command to be executed """ self.template = template @@ -66,6 +74,10 @@ def __call__(self, *args): def set_inputs(self, inputs): """ Set the inputs the task requires + + args: + inputs (list): a list of input variable names + """ if not isinstance(inputs, list): raise TypeError( @@ -76,6 +88,10 @@ def set_inputs(self, inputs): def set_outputs(self, outputs): """ Set the outputs the task produces + + args: + outputs (list): a list of output variable names + """ if not isinstance(outputs, list): raise TypeError( @@ -88,6 +104,11 @@ def set_constant(self, key, value): Set a constant for the task If it was previously defined as an input variable, remove it from that list. + + args: + key (str): the name of the constant + value (str): the value of the constant + """ d = {"name": key} try: @@ -194,7 +215,8 @@ class FunctionTask: def __init__(self, func): """ - Arguments: + Initialize the FunctionTask. + args: func: the Python function to wrap """ self.func = func @@ -210,18 +232,31 @@ def __call__(self, *args): def set_inputs(self, inputs): """ Set the inputs the task requires + + args: + inputs (list): a list of input variable names + """ self.inputs = inputs def set_outputs(self, outputs): """ Set the outputs the task produces + + args: + outputs (list): a list of output variable names + """ self.outputs = outputs def set_constant(self, key, value): """ - Set a parameters for the task + Set a constant for the task + + args: + key (str): the name of the constant + value (str): the value of the constant + """ try: self.constants[key] = self.filehandler.load(value) From 308c529e56b812a924713b275719693a288efb05 Mon Sep 17 00:00:00 2001 From: Charlie Laughton Date: Wed, 24 Jun 2026 18:15:55 +0800 Subject: [PATCH 4/6] Add missing dependencies for URL support by fsspec --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index d3486fd..1e91e07 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,8 @@ dependencies = [ "dask", "distributed", "fsspec", + "requests", + "aiohttp" ] [project.urls] From b67db4425932752c1951842a6d54026deb3e7361 Mon Sep 17 00:00:00 2001 From: Charlie Laughton Date: Wed, 24 Jun 2026 18:32:11 +0800 Subject: [PATCH 5/6] Fix black linting moan --- crossflow/filehandling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crossflow/filehandling.py b/crossflow/filehandling.py index a2e5675..13adbf7 100644 --- a/crossflow/filehandling.py +++ b/crossflow/filehandling.py @@ -123,8 +123,8 @@ def __init__(self, path, stage_point, must_exist=True): source = fsspec.open(path) with source as s: s.read(1) - except Exception: - raise IOError("Error - no such file") + except Exception as e: + raise IOError("Error - no such file") from e else: if not os.path.exists(path): raise IOError("Error - no such file") From a866f55cf28d9222ae999ceeba14c694c53d4d97 Mon Sep 17 00:00:00 2001 From: James Gebbie-Rayet Date: Wed, 24 Jun 2026 12:04:30 +0100 Subject: [PATCH 6/6] fix pre-commit gripes --- crossflow/filehandling.py | 28 +++++++++++++++------------- crossflow/tasks.py | 4 ++-- tests/test_filehandling.py | 6 ++++-- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/crossflow/filehandling.py b/crossflow/filehandling.py index 13adbf7..e86ded6 100644 --- a/crossflow/filehandling.py +++ b/crossflow/filehandling.py @@ -94,17 +94,17 @@ class FileHandle: fh = FileHandle('http://example.com/file.txt') and has a save() method that creates a local copy of that file: - + filename_here = fh.save(filename_here) FileHandle objects inherit from os.PathLike so can be used anywhere a conventional path can be used: - + with open(fh) as f: ... - + They can also be used to read and write binary and text data directly: - + data = fh.read_binary() text = fh.read_text() fh.write_binary(data) @@ -116,15 +116,17 @@ def __init__(self, path, stage_point, must_exist=True): if not isinstance(path, (os.PathLike, str, bytes)): raise IOError(f"Error - illegal argument type {type(path)} for {path}") if must_exist: - if isinstance(path, str) and (path.startswith("http://") or - path.startswith("https://") or - path.startswith("ftp://")): - try: - source = fsspec.open(path) - with source as s: - s.read(1) - except Exception as e: - raise IOError("Error - no such file") from e + if isinstance(path, str) and ( + path.startswith("http://") + or path.startswith("https://") + or path.startswith("ftp://") + ): + try: + source = fsspec.open(path) + with source as s: + s.read(1) + except Exception as e: + raise IOError("Error - no such file") from e else: if not os.path.exists(path): raise IOError("Error - no such file") diff --git a/crossflow/tasks.py b/crossflow/tasks.py index 54b0599..43afe1e 100644 --- a/crossflow/tasks.py +++ b/crossflow/tasks.py @@ -245,7 +245,7 @@ def set_outputs(self, outputs): args: outputs (list): a list of output variable names - + """ self.outputs = outputs @@ -256,7 +256,7 @@ def set_constant(self, key, value): args: key (str): the name of the constant value (str): the value of the constant - + """ try: self.constants[key] = self.filehandler.load(value) diff --git a/tests/test_filehandling.py b/tests/test_filehandling.py index 938db95..b393e55 100644 --- a/tests/test_filehandling.py +++ b/tests/test_filehandling.py @@ -52,9 +52,11 @@ def test_file_protocol(tmpdir): def test_url_protocol(): fh = filehandling.FileHandler() - f = fh.load('https://raw.githubusercontent.com/HECBioSim/crossflow/refs/heads/main/README.md') + f = fh.load( + "https://raw.githubusercontent.com/HECBioSim/crossflow/refs/heads/main/README.md" + ) + - """ def test_s3_protocol(tmpdir): d = tmpdir.mkdir('sub')