From 36ea8c54ef7b9e991af2c956975613136bf30305 Mon Sep 17 00:00:00 2001 From: Sergio Souza Costa Date: Tue, 12 May 2026 16:57:51 -0300 Subject: [PATCH 1/4] include documentation --- ...dependabot.yml => dependabot.yml.disabled} | 0 docs/index.md | 23 ++++ docs/platform/api-reference.md | 127 ++++++++++++++++++ docs/platform/data-management.md | 81 +++++++++++ docs/platform/executor-harness.md | 72 ++++++++++ docs/platform/executor.md | 100 ++++++++++++++ docs/platform/experiment-record.md | 74 ++++++++++ docs/platform/index.md | 69 ++++++++++ docs/platform/registry.md | 75 +++++++++++ docs/platform/researcher-guide.md | 85 ++++++++++++ docs/platform/security.md | 66 +++++++++ docs/platform/variable-mapping.md | 82 +++++++++++ mkdocs.yml | 35 +++++ 13 files changed, 889 insertions(+) rename .github/{dependabot.yml => dependabot.yml.disabled} (100%) create mode 100644 docs/index.md create mode 100644 docs/platform/api-reference.md create mode 100644 docs/platform/data-management.md create mode 100644 docs/platform/executor-harness.md create mode 100644 docs/platform/executor.md create mode 100644 docs/platform/experiment-record.md create mode 100644 docs/platform/index.md create mode 100644 docs/platform/registry.md create mode 100644 docs/platform/researcher-guide.md create mode 100644 docs/platform/security.md create mode 100644 docs/platform/variable-mapping.md create mode 100644 mkdocs.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml.disabled similarity index 100% rename from .github/dependabot.yml rename to .github/dependabot.yml.disabled diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..8cdaed4 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,23 @@ +# DisSModel Platform Documentation + +Welcome to the official documentation for the **DisSModel Platform**. + +The DisSModel Platform is a robust service designed for the execution, tracking, and reproduction of Land Use and Cover Change (LUCC) simulations. It provides a standardized environment for scientific modeling, ensuring that experiments are audit-ready and fully reproducible. + +## Key Features + +* **Immutable Provenance:** Every simulation is recorded with a complete snapshot of its configuration and data integrity. +* **Model Registry:** Decouples model parameters from implementation via version-controlled TOML files. +* **Scalable Execution:** Leverages Dask and subprocess isolation for high-performance simulation runs. +* **Researcher-Centric:** Designed to facilitate the transition from local Jupyter exploration to large-scale production. + +## Getting Started + +To understand how the platform works, we recommend starting with the following sections: + +1. [**Platform Overview**](platform/index.md) - Learn about the architecture and core components. +2. [**Researcher Guide**](platform/researcher-guide.md) - A step-by-step guide to developing and running models. +3. [**API Reference**](platform/api-reference.md) - Complete documentation of the available REST endpoints. + +--- +*This documentation was automatically generated from the source code.* diff --git a/docs/platform/api-reference.md b/docs/platform/api-reference.md new file mode 100644 index 0000000..3ba69aa --- /dev/null +++ b/docs/platform/api-reference.md @@ -0,0 +1,127 @@ +# REST API Reference + +The DisSModel Platform API is built with FastAPI. All endpoints (except `/health` and `/`) require the `X-API-Key` header for authentication. + +**Base URL:** `http://localhost:8000` + +## Job Submission + +### `POST /submit_job` +Submit a simulation job using a model registered in the [Registry](registry.md). + +**Body Schema:** +| Field | Type | Required | Description | +|---|---|---|---| +| `model_name` | `str` | Yes | Name of the registered model. | +| `input_dataset`| `str` | Yes | URI (`s3://`, `http://`, or local path). | +| `input_format` | `str` | Yes | `geotiff`, `vector`, or `zarr`. | +| `column_map` | `dict` | No | Mapping for vector columns. | +| `band_map` | `dict` | No | Mapping for raster bands. | +| `parameters` | `dict` | No | Overrides for TOML defaults. | +| `priority` | `str` | No | `high`, `normal` (default), or `low`. | + +**Example:** +```bash +curl -X POST http://localhost:8000/submit_job \ + -H "X-API-Key: dev-key" \ + -H "Content-Type: application/json" \ + -d '{ + "model_name": "coastal-v1", + "input_dataset": "s3://dissmodel-inputs/data.tif", + "input_format": "geotiff", + "parameters": {"threshold": 0.8} + }' +``` + +--- + +### `POST /submit_job_inline` +Submit a job with an inline TOML spec. Used for development and exploration. + +**Body Schema:** +| Field | Type | Required | Description | +|---|---|---|---| +| `model_spec_toml` | `str` | Yes | Full content of the TOML spec. | +| `input_dataset` | `str` | Yes | URI of the dataset. | +| `input_format` | `str` | Yes | `geotiff`, `vector`, or `zarr`. | +| `column_map` | `dict` | No | Mapping for vector columns. | +| `band_map` | `dict` | No | Mapping for raster bands. | +| `parameters` | `dict` | No | Overrides. | + +--- + +## Status & Tracking + +### `GET /job/{experiment_id}` +Return the current status and provenance record for a specific experiment. + +**Response:** `JobResponse` object (see [ExperimentRecord](experiment-record.md)). + +--- + +### `GET /jobs` +List simulations, optionally filtered by status. + +**Query Parameters:** +| Parameter | Type | Default | Description | +|---|---|---|---| +| `limit` | `int` | `100` | Max number of records to return. | +| `status` | `str` | `null` | Filter by status (`completed`, `failed`, etc.). | + +--- + +## Reproduce & Publish + +### `POST /experiments/{experiment_id}/reproduce` +Re-run an experiment using its original `resolved_spec` snapshot, independent of the current registry state. + +--- + +### `POST /experiments/{experiment_id}/publish` +Export a reproducibility package. Returns the full provenance as JSON. + +--- + +## Model Registry + +### `GET /models` +List all registered models and their basic metadata. + +--- + +### `GET /models/{model_name}` +Return the full TOML spec for a specific model. + +--- + +### `POST /admin/sync` +Force an immediate `git pull` of the `dissmodel-configs` repository. + +--- + +## Data Management + +### `POST /data/upload` +Upload a dataset to the `dissmodel-inputs` bucket. + +**Body (Multipart Form):** +* `file`: The file to upload. +* `label`: A tag for the input directory (e.g., `baseline`). + +**Response:** +```json +{ + "uri": "s3://dissmodel-inputs/inputs/baseline/data.tif", + "checksum": "sha256...", + "size_mb": 45.2 +} +``` + +--- + +### `GET /download` +Generate a presigned URL for downloading a file from S3. + +**Query Parameters:** +* `uri`: The `s3://` URI. +* `expires_hours`: Validity period (default: `1`). diff --git a/docs/platform/data-management.md b/docs/platform/data-management.md new file mode 100644 index 0000000..144da00 --- /dev/null +++ b/docs/platform/data-management.md @@ -0,0 +1,81 @@ +# Data Management + +The DisSModel Platform handles input and output data using a uniform URI scheme. This allows simulations to run on local development machines or cloud workers without changing the model code. + +## URI Resolution + +The `ModelExecutor` uses the internal `_resolve()` logic to fetch data before the simulation starts. + +| URI Scheme | Resolution Logic | Typical Use Case | +|---|---|---| +| `s3://bucket/key` | Downloaded from MinIO to worker's local scratch. | **Production** (MinIO/AWS S3). | +| `http://...` | Downloaded via `urllib` to local scratch. | Public datasets (e.g., NASA, ESA). | +| `/abs/path/...` | Used directly as a local file path. | **Development** (Local machine). | + +### Implementation Detail + +```python +# Extracted from storage.py +def download_to_file(uri: str, dest: str) -> str: + if uri.startswith("s3://"): + bucket, key = _parse_s3(uri) + minio_client.fget_object(bucket, key, dest) + return dest + if uri.startswith("http://"): + urllib.request.urlretrieve(uri, dest) + return dest + return uri # Local path unchanged +``` + +## Storage Strategy + +| Situation | Data Location | Access Method | +|---|---|---| +| **Large Datasets** | MinIO `dissmodel-inputs` | Use `mc cp` (MinIO Client). | +| **Small Files (<100MB)** | MinIO `dissmodel-inputs` | Use `POST /data/upload` API. | +| **Simulation Results** | MinIO `dissmodel-outputs` | Auto-uploaded by the Platform. | +| **Local Testing** | Local Disk | Direct path in JobRequest. | + +## Uploading Data + +### Via MinIO Client (`mc`) + +For datasets larger than 100 MB, use the official MinIO client: + +```bash +# Configure the client +mc alias set inpe http://minio:9000 inpe_admin inpe_secret_2024 + +# Upload a GeoTIFF +mc cp my_dataset.tif inpe/dissmodel-inputs/baseline/maranhao_2024.tif +``` + +### Via API + +For small datasets: + +```bash +curl -X POST http://localhost:8000/data/upload \ + -H "X-API-Key: dev-key" \ + -F "file=@my_dataset.tif" \ + -F "label=baseline" +``` + +## Intake Catalogs + +The platform supports `Intake` for structured data access. If a `catalog.yaml` exists in the `dissmodel-configs` repository, executors can use it to load datasets by name instead of URI. + +**Example `catalog.yaml`:** +```yaml +sources: + maranhao_elevation: + driver: rasterio + args: + urlpath: s3://dissmodel-inputs/baseline/altimetry.tif + storage_options: + endpoint_url: http://minio:9000 +``` + +## Note on Local Paths + +During the MVP phase, local file paths continue to work if the worker has access to the same filesystem (e.g., in a Docker Compose development environment). However, for multi-node production clusters, **S3 URIs are mandatory**. diff --git a/docs/platform/executor-harness.md b/docs/platform/executor-harness.md new file mode 100644 index 0000000..85ab50b --- /dev/null +++ b/docs/platform/executor-harness.md @@ -0,0 +1,72 @@ +# ExecutorTestHarness + +The `ExecutorTestHarness` is a utility designed to ensure that a `ModelExecutor` implementation correctly follows the platform's contract. It is used both locally in Jupyter notebooks and automatically in CI/CD pipelines. + +## Purpose + +The harness bridge the gap between development and production: +1. **Contract Verification:** Checks if the class has the required methods and type hints. +2. **Structural Integrity:** Verifies if the executor correctly auto-registers in the `ExecutorRegistry`. +3. **Sanity Check:** Runs the `load/validate/run/save` cycle with sample data to catch runtime errors early. + +## Automated Checks + +The harness performs the following internal checks: + +| Method | Check Description | +|---|---| +| `_check_name` | Ensures the class name matches the registry key. | +| `_check_methods` | Verifies presence of `load`, `validate`, `run`, and `save`. | +| `_check_annotations` | Ensures type hints match the `ModelExecutor` ABC. | +| `run_contract_tests()` | Executes all structural checks above. | +| `run_with_sample_data()` | Executes the full lifecycle with a dummy `ExperimentRecord`. | + +## Usage Examples + +### 1. In a Jupyter Notebook + +Researchers should use the harness before opening a Pull Request to the `dissmodel-executors` repository. + +```python +from dissmodel.executor.testing import ExecutorTestHarness +from my_new_executor import CoastalTiffExecutor + +# Step 1: Structural check +harness = ExecutorTestHarness(CoastalTiffExecutor) +if harness.run_contract_tests(): + print("✅ Contract is valid!") + +# Step 2: Runtime check with sample record +record = ExperimentRecord( + model_name="test", + resolved_spec={"model": {"parameters": {"taxa": 0.01}}}, + source={"uri": "local_test.tif", "type": "local"} +) +harness.run_with_sample_data(record) +``` + +### 2. In CI/CD (Pytest) + +The `dissmodel-platform` includes a script (`scripts/validate_executors.py`) that uses the harness to automatically validate all executors in the `executors/` folder. + +```python +# scripts/validate_executors.py logic +for name, cls in ExecutorRegistry.items(): + harness = ExecutorTestHarness(cls) + assert harness.run_contract_tests(), f"Executor {name} failed contract" +``` + +## Adding Custom Unit Tests + +While the harness covers the platform's requirements, you should add model-specific tests to verify mathematical correctness: + +```python +def test_coastal_logic(): + executor = CoastalTiffExecutor() + # Mocking data... + result = executor.run(mock_data, mock_record) + assert result.mean() > 0 # Custom assertion +``` + +!!! tip "Plugin Integration" + A `ModelExecutor` that passes the harness tests is guaranteed to be compatible with the platform's worker and API, significantly reducing integration bugs. diff --git a/docs/platform/executor.md b/docs/platform/executor.md new file mode 100644 index 0000000..c70cd52 --- /dev/null +++ b/docs/platform/executor.md @@ -0,0 +1,100 @@ +# ModelExecutor + +The `ModelExecutor` is the abstract base class (ABC) that every simulation plugin must implement. It defines the lifecycle of a simulation on the platform, from data loading to result persistence. + +## Interface Definition + +```python +from abc import ABC, abstractmethod +from dissmodel.executor.schemas import ExperimentRecord + +class ModelExecutor(ABC): + """ + Base interface for all DisSModel platform executors. + Concrete subclasses must implement the load/validate/run/save cycle. + """ + + @abstractmethod + def load(self, record: ExperimentRecord) -> Any: + """ + Resolve URIs and load data into memory (e.g., Xarray, GeoDataFrame). + Should use record.source.uri and record.resolved_spec. + """ + pass + + @abstractmethod + def validate(self, data: Any, record: ExperimentRecord) -> None: + """ + Verify if the loaded data matches the model requirements. + Check CRS, dimensions, required bands/columns, and mapping. + Raise ValueError on failure. + """ + pass + + @abstractmethod + def run(self, data: Any, record: ExperimentRecord) -> Any: + """ + Execute the core simulation logic. + This is where the actual modeling happens. + """ + pass + + @abstractmethod + def save(self, result: Any, record: ExperimentRecord) -> str: + """ + Persist the result to the output storage (MinIO/S3). + Must return the final S3 URI and update record.output_sha256. + """ + pass +``` + +## Execution Lifecycle + +The platform's generic runner executes the following sequence: + +| Phase | Responsibility | Error Handling | +|---|---|---| +| **`load()`** | Fetch data from S3/HTTP; parse formats. | Retried by worker on network failure. | +| **`validate()`** | Contract verification (mapping, CRS). | Fails immediately (Invalid Request). | +| **`run()`** | Mathematical execution; temporal loops. | Logs stack trace to `ExperimentRecord`. | +| **`save()`** | Write artifacts; generate checksums. | Ensures output path is deterministic. | + +## Auto-Registration Mechanism + +Executors use Python's `__init_subclass__` to register themselves automatically in the `ExecutorRegistry`. The `class` field in the TOML spec connects the configuration to the implementation. + +```python +# In dissmodel-executors repo: +class CoastalTiffExecutor(ModelExecutor): + def __init_subclass__(cls, **kwargs): + super().__init_subclass__(**kwargs) + ExecutorRegistry.register(cls.__name__, cls) +``` + +## Executor Hierarchy + +The following hierarchy represents the standard executors available: + +* `ModelExecutor` (ABC) + * `RasterModelExecutor` (Base for GeoTIFF/NetCDF) + * `CoastalTiffExecutor` — Sea Level Rise logic for rasters. + * `LUCCExecutor` — Potential/Allocation flow for land-use change. + * `VectorModelExecutor` (Base for Shapefile/GPKG) + * `CoastalVectorExecutor` — High-resolution coastal flooding. + +### Key Differences: Raster vs. Vector + +| Feature | `CoastalTiffExecutor` | `CoastalVectorExecutor` | +|---|---|---| +| **Data Structure** | Xarray / Dask | GeoDataFrame (Pandas-like) | +| **Mapping** | `band_map` | `column_map` | +| **Connectivity** | 4/8-way Adjacency | Topology-based (libpysal) | +| **Scaling** | Pixel-based (Chunked) | Entity-based (Vectorized) | + +## The Generic Runner + +When a worker picks up a job, it calls `run_experiment(record)`, which spawns a subprocess running `worker.job_runner`. This subprocess: +1. Installs the required `package` from the spec. +2. Imports the `executor_module`. +3. Fetches the class from `ExecutorRegistry.get(class_key)`. +4. Triggers the `execute_lifecycle(executor, record)`. diff --git a/docs/platform/experiment-record.md b/docs/platform/experiment-record.md new file mode 100644 index 0000000..971be99 --- /dev/null +++ b/docs/platform/experiment-record.md @@ -0,0 +1,74 @@ +# ExperimentRecord + +The `ExperimentRecord` is the foundational object for reproducibility in the DisSModel Platform. It is an immutable Pydantic model (once stored) that captures the complete provenance of a simulation, including the model version, input data integrity, and the exact configuration used. + +## Purpose + +* **Auditability:** Every execution is logged with a unique `experiment_id`. +* **Immutability:** Once a job is submitted, its `resolved_spec` is snapshotted, ensuring that subsequent changes to the Model Registry do not affect historical results. +* **Reproducibility:** A record contains enough information to re-run the exact same simulation and verify the `output_sha256`. + +## Schema Definition + +| Field | Type | Description | +|---|---|---| +| `experiment_id` | `str` | Unique UUID/hash for the execution. | +| `model_name` | `str` | Name of the model in the registry (e.g., `coastal-dynamics-v1`). | +| `model_commit` | `str` | Git hash of the `dissmodel-configs` repo at submission time. | +| `code_version` | `str` | Version of the `dissmodel` library used. | +| `resolved_spec` | `dict` | Full snapshot of the TOML configuration (including defaults). | +| `source` | `DataSource` | Object containing `uri`, `type` (s3/http/local), and `checksum`. | +| `input_format` | `str` | Format of the input data (`geotiff`, `vector`, `zarr`). | +| `column_map` | `dict` | (Vector only) Mapping from canonical names to dataset columns. | +| `band_map` | `dict` | (Raster only) Mapping from canonical names to TIFF bands. | +| `parameters` | `dict` | Final resolved parameters (TOML defaults + request overrides). | +| `status` | `str` | `queued`, `running`, `completed`, `failed`, or `cancelled`. | +| `created_at` | `datetime` | UTC timestamp of job submission. | +| `output_path` | `str` | S3 URI where the result is stored. | +| `output_sha256` | `str` | SHA256 checksum of the generated output file. | +| `logs` | `list[str]` | Execution logs and lifecycle events. | +| `artifacts` | `dict` | Key-value store of additional outputs (e.g., profiling reports). | + +## Registry vs. Execution + +The following table distinguishes between what is fixed in the Model Registry (TOML) and what can vary per execution via the API request: + +| Feature | Registry (TOML) | Execution (API Request) | +|---|---|---| +| Model Class | **Fixed** | N/A | +| Package/Module | **Fixed** | N/A | +| Canonical Vocab | **Fixed** | N/A | +| Parameters | Defaults | **Overrides** | +| Input Dataset | N/A | **Required** | +| Variable Mapping| N/A | **Context-dependent** | + +## Example JSON Response + +`GET /job/{experiment_id}` + +```json +{ + "job_id": "exp_8f2d1c9e", + "experiment_id": "exp_8f2d1c9e", + "status": "completed", + "model_name": "coastal-v1", + "created_at": "2024-05-12T14:30:00Z", + "output_path": "s3://dissmodel-outputs/results/exp_8f2d1c9e.tif", + "output_sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "input_sha256": "d5a8c9b2...", + "logs": [ + "2024-05-12T14:30:00Z - Record created — model=coastal-v1 commit=a1b2c3d", + "2024-05-12T14:30:05Z - Dispatching to subprocess...", + "2024-05-12T14:30:45Z - Completed — val=0.5s | load=10s | run=25s | save=4s | total=39.5s" + ] +} +``` + +## Methodological Citation + +When publishing results, use the following template for the methodology section: + +> "Simulations were performed on the DisSModel Platform (v0.1.0) using the `coastal-v1` model (registry commit `a1b2c3d`). Input data integrity was verified via SHA256 (`d5a8c9b2...`). The complete execution provenance is preserved in ExperimentRecord `exp_8f2d1c9e`." + +!!! warning "Non-reproducible records" + Jobs submitted via `POST /submit_job_inline` will have `model_commit: 'local-inline'`. These records contain the full spec but are **not reproducible** via the registry and should not be used for final academic publication. diff --git a/docs/platform/index.md b/docs/platform/index.md new file mode 100644 index 0000000..dd460c1 --- /dev/null +++ b/docs/platform/index.md @@ -0,0 +1,69 @@ +# DisSModel Platform: Overview + +The **DisSModel Platform** is a FastAPI-powered service designed for the execution, tracking, and reproduction of Land Use and Cover Change (LUCC) simulations. Built upon the `dissmodel` core framework and integrated with the Pangeo ecosystem, it provides a robust infrastructure for scientific reproducibility, enabling researchers to transition seamlessly from local exploration in Jupyter notebooks to large-scale production runs. + +## Architecture Layers + +```text +┌───────────────────────────────────────────────────────────┐ +│ Researcher (Jupyter/CLI) │ +└──────────────┬───────────────────────────┬────────────────┘ + │ (HTTPS + X-API-Key) │ (S3/MinIO) + ▼ ▼ +┌───────────────────────────┐ ┌─────────────────────────┐ +│ FastAPI API │ │ MinIO Storage │ +│ (Job submission, Status) ◄─────► (Inputs, Outputs) │ +└──────────────┬────────────┘ └────────────▲────────────┘ + │ (Redis Queue) │ + ▼ │ +┌───────────────────────────┐ │ +│ Dask Worker │──────────────────┘ +│ (Generic Job Runner) │ +└──────────────┬────────────┘ + │ (Subprocess) + ▼ +┌───────────────────────────┐ +│ Model Executor │ +│ (LUCC, Coastal, etc.) │ +└───────────────────────────┘ +``` + +## Repository Ecosystem + +| Repository | Artifact | Lifecycle | +|---|---|---| +| `dissmodel` | Core Library | Fundamental logic, ABCs, and Geo-primitives. | +| `dissmodel-platform` | Web Service | This repo. Handles API, Queue, and Workers. | +| `dissmodel-configs` | Registry | Git-versioned TOML specs and calibrated coefficients. | +| `dissmodel-executors` | Plugins | Collection of concrete `ModelExecutor` implementations. | + +## Comparison + +| Feature | DisSModel Platform | LuccME / TerraME | Pure Pangeo | +|---|---|---|---| +| **Reproducibility** | Native (ExperimentRecord) | File-based | Manual / Scripted | +| **Scaling** | Dask-ready Workers | Shared Memory / Cluster | Highly Scalable | +| **Interface** | REST API / Python | Lua / CLI | Python | +| **Versioning** | Git-based TOML | Manual | Manual | + +## Documentation Sections + +* [ExperimentRecord](experiment-record.md): The immutable proof of provenance. +* [Model Registry](registry.md): Decoupling configuration from code via TOML. +* [ModelExecutor](executor.md): The core interface for simulation plugins. +* [Variable Mapping](variable-mapping.md): Canonical vocabulary for data decoupling. +* [API Reference](api-reference.md): Complete REST endpoint documentation. +* [Data Management](data-management.md): Handling S3, HTTP, and local datasets. +* [Executor Test Harness](executor-harness.md): Contract testing for developers. +* [Security](security.md): Authentication and internal credentials. +* [Researcher Guide](researcher-guide.md): Step-by-step from zero to publication. + +## Out of Scope (v0.1) + +| Feature | Status | +|---|---| +| Docker Sandbox | MVP runs in shared worker env (PR review required) | +| BDC / STAC Integration | Planned for Phase 2 | +| Automatic Zenodo Deposit | Manual JSON export for now | +| JWT / OAuth2 Auth | API Key only in MVP | +| Web Dashboard | API only; Jupyter used as frontend | diff --git a/docs/platform/registry.md b/docs/platform/registry.md new file mode 100644 index 0000000..08af88c --- /dev/null +++ b/docs/platform/registry.md @@ -0,0 +1,75 @@ +# Model Registry (TOML) + +The Model Registry is a centralized repository of git-versioned TOML files that define the configuration, parameters, and canonical vocabulary for simulations. This pattern decouples the model's configuration from its Python implementation. + +## Design Principle: Configuration as Code + +By storing coefficients and model specifications in a separate `dissmodel-configs` repository, the platform ensures: +1. **Version Control:** Every change to a model parameter is tracked via git. +2. **Consistency:** All researchers use the same calibrated coefficients for a given model version. +3. **Transparency:** The exact spec used for an experiment is snapshotted in the [ExperimentRecord](experiment-record.md). + +## TOML Structure + +A typical model specification (`models/coastal-v1.toml`) looks like this: + +```toml +[model] +name = "coastal-v1" +description = "Coastal dynamics model with Sea Level Rise" +class = "CoastalTiffExecutor" # Registry key for the Python class +package = "dissmodel-executors==0.1.0" # PyPI/Git/Local package to install + +[model.parameters] +taxa_elevacao = 0.011 # Default value (can be overridden) +threshold = 0.5 + +[model.bands] # Canonical Vocabulary (Raster) +elevation = "alt" +land_use = "uso" + +[model.columns] # Canonical Vocabulary (Vector) +id_cell = "id" +soil_type = "solo" + +[[model.potential]] # Complex nested structures +name = "deforestation" +[model.potential.betas] +distance_to_roads = -0.45 +slope = 1.2 +``` + +## Canonical Vocabulary + +The `[model.bands]` and `[model.columns]` sections define the **Canonical Vocabulary**. The `ModelExecutor` code uses these generic names (e.g., `elevation`), while the `band_map` / `column_map` provided in the API request maps them to the actual names in the dataset (e.g., `SRTM_B1`). + +This allows the same model code to run on different datasets without modification. See [Variable Mapping](variable-mapping.md) for details. + +## Synchronization Flow + +The platform keeps its local cache in sync with the `dissmodel-configs` repository: + +1. **Git Push:** A researcher pushes a new TOML or updates an existing one to the `main` branch of `dissmodel-configs`. +2. **Background Sync:** Every 15 minutes (configurable), an `APScheduler` job runs `git pull` on the worker/API nodes. +3. **Cache Invalidation:** If changes are detected, the `lru_cache` of `load_model_spec()` is cleared. +4. **Instant Availability:** New models or parameters are immediately visible via `GET /models`. + +### Manual Synchronization + +To force an immediate sync without waiting for the scheduler: + +```bash +curl -X POST http://localhost:8000/admin/sync -H "X-API-Key: your-token" +``` + +## Inline Specs (Jupyter) + +For rapid exploration, you can bypass the registry using `POST /submit_job_inline`. + +!!! tip "Exploration vs. Production" + Use inline specs to test new model structures. Once the logic is stable, move the spec to the `dissmodel-configs` repo to gain git-versioning and full reproducibility. + +**Limitations of Inline Specs:** +* `model_commit` is marked as `local-inline`. +* Not reproducible via the standard registry flow. +* Required for development in Jupyter before opening a PR to the configs repo. diff --git a/docs/platform/researcher-guide.md b/docs/platform/researcher-guide.md new file mode 100644 index 0000000..51fd689 --- /dev/null +++ b/docs/platform/researcher-guide.md @@ -0,0 +1,85 @@ +# Researcher Guide + +This guide takes you through the complete lifecycle of a simulation, from local development in a Jupyter notebook to large-scale execution and academic publication. + +## Phase 1: Local Development (Jupyter) + +The goal of this phase is to implement and test your simulation logic. + +1. **Installation:** + ```bash + pip install dissmodel dissmodel-platform + ``` +2. **Implementation:** + Create a new subclass of `ModelExecutor` in your notebook. Implement the `load`, `validate`, `run`, and `save` methods. +3. **Validation:** + Use the `ExecutorTestHarness` to verify your implementation: + ```python + from dissmodel.executor.testing import ExecutorTestHarness + harness = ExecutorTestHarness(MyExecutor) + harness.run_contract_tests() + ``` +4. **Integration:** + Move your class to a file (e.g., `my_model.py`) in the `dissmodel-executors` repository and open a Pull Request. + +## Phase 2: Model Registration + +Once your code is in the repository, you need to register the calibrated coefficients. + +1. **Calibration:** Calibrate your model parameters using your preferred tools (R, Python, etc.). +2. **Create TOML:** Add a new `.toml` file to `dissmodel-configs/models/`. Define the `class`, `package`, `parameters`, and canonical vocabulary. +3. **Sync:** `git push` your changes. The platform will sync automatically in 15 minutes, or you can force it: + ```bash + curl -X POST http://localhost:8000/admin/sync -H "X-API-Key: your-token" + ``` + +## Phase 3: Platform Execution + +Now you can run the simulation on the platform's production workers. + +1. **Upload Data:** + ```bash + curl -X POST http://localhost:8000/data/upload \ + -F "file=@large_dataset.tif" -F "label=baseline" + ``` +2. **Submit Job:** + ```bash + curl -X POST http://localhost:8000/submit_job \ + -d '{ + "model_name": "my-model-v1", + "input_dataset": "s3://dissmodel-inputs/inputs/baseline/large_dataset.tif", + "input_format": "geotiff", + "parameters": {"taxa": 0.05} + }' + ``` +3. **Track Status:** + Use `GET /job/{id}` to follow the progress. + +## Phase 4: Reproduction & Publication + +To ensure your results are valid for a paper: + +1. **Verify Reproduction:** + ```bash + curl -X POST http://localhost:8000/experiments/{id}/reproduce + ``` + Compare the `output_sha256` of the original and the reproduced job. They must be identical. +2. **Publish Package:** + Use `POST /experiments/{id}/publish` to export the JSON provenance package. +3. **Methodological Citation:** + Include the `experiment_id`, `model_commit`, and `input_sha256` in your paper. + +## Mapping: Original Script → Platform + +| Script Concept | DisSModel Platform Equivalent | +|---|---| +| Hardcoded Paths | `DataSource` URIs (`s3://`) | +| Global Variables | `model.parameters` in TOML | +| Column/Band Names | [Variable Mapping](variable-mapping.md) | +| Local Loops | `ModelExecutor.run()` method | +| `plt.show()` | `Map` and `Chart` integration | +| Result saving | `ModelExecutor.save()` (auto-upload) | +| Log files | `ExperimentRecord.logs` | + +!!! tip "Full Reproducibility" + Always prefer submitting jobs via the Registry (`/submit_job`) for final results. Inline jobs (`/submit_job_inline`) are excellent for debugging but lack the git-versioned proof of provenance required for high-impact publications. diff --git a/docs/platform/security.md b/docs/platform/security.md new file mode 100644 index 0000000..20c4199 --- /dev/null +++ b/docs/platform/security.md @@ -0,0 +1,66 @@ +# Security & Authentication + +The DisSModel Platform implements a two-layered security architecture to protect simulation data and computational resources. + +## Architecture Overview + +```text +Researcher ──(1)──► FastAPI API ──(2)──► MinIO Storage +(External) (X-API-Key) (Internal Creds) +``` + +1. **Researcher → API:** Authenticated via the `X-API-Key` header. +2. **API/Worker → MinIO:** Uses internal service-to-service credentials that are never exposed to the researcher. + +## Authentication (X-API-Key) + +Every request to the platform (except health checks) must include a valid API key. + +```python +# Extracted from main.py +_API_KEY_HEADER = APIKeyHeader(name="X-API-Key", auto_error=True) +_VALID_KEYS = set(os.getenv("API_KEYS", "dev-key").split(",")) + +async def require_api_key(key: str = Depends(_API_KEY_HEADER)) -> str: + if key not in _VALID_KEYS: + raise HTTPException(status_code=403, detail="Invalid API key") + return key +``` + +### Configuration via Environment + +API keys and MinIO credentials are configured using environment variables (or a `.env` file): + +```bash +# .env example +API_KEYS=researcher-1-token,researcher-2-token +MINIO_ACCESS_KEY=inpe_admin +MINIO_SECRET_KEY=inpe_secret_2024 +MINIO_ENDPOINT=minio:9000 +``` + +## Security Roadmap + +| Phase | Authentication | Execution Security | +|---|---|---| +| **MVP** | API Key (Static) | Manual PR Review of Executors | +| **Phase 2** | JWT / OpenID Connect | PyPI-only Plugins (Signed) | +| **Phase 3** | OAuth2 (Inpe Accounts) | Docker/Wasm Sandboxing | + +## Pipeline Security (CI/CD) + +All Pull Requests to the `dissmodel-executors` repository undergo automated security scanning: + +* **Bandit:** Scans for common security issues in Python code (e.g., `subprocess` usage, hardcoded secrets). +* **Mypy:** Ensures type safety to prevent memory-related bugs. +* **Contract Testing:** Using `ExecutorTestHarness` to ensure the executor does not attempt to access restricted internal APIs. + +## Execution Guardrails + +Since workers run simulations as subprocesses, they have limited access to the system: +* **Read-only Root:** The worker filesystem is mostly read-only. +* **Scratch Space:** Each job is assigned a unique temporary directory for intermediate files. +* **Resource Limits:** CPU and Memory limits are enforced via Docker/Dask. + +!!! danger "Credential Protection" + Never commit API keys or `.env` files to git. Use the `X-API-Key` header exclusively for authentication. If a key is compromised, rotate it immediately in the platform's environment configuration. diff --git a/docs/platform/variable-mapping.md b/docs/platform/variable-mapping.md new file mode 100644 index 0000000..ffb97fc --- /dev/null +++ b/docs/platform/variable-mapping.md @@ -0,0 +1,82 @@ +# Variable Mapping + +Variable mapping is the mechanism that allows a single `ModelExecutor` to run on multiple datasets with different column or band names. By using a **Canonical Vocabulary**, we decouple the mathematical model from the data structure. + +## The Problem + +Imagine a Coastal model that requires "elevation" data. +* Dataset A calls it `altimetry`. +* Dataset B calls it `DEM_2024_meters`. +* Dataset C stores it in `Band 3` of a GeoTIFF. + +Without mapping, you would need three different versions of the model code. + +## The Solution: Canonical Vocabulary + +In the [Model Registry](registry.md) (TOML), we define the names the code expects: + +```toml +[model.bands] +elevation = "alt" # "elevation" is the canonical name; "alt" is the TOML default +land_use = "uso" +``` + +In the API request, the researcher provides a mapping to the actual dataset: + +### `column_map` (Vector Data) + +Mapping for GeoDataFrames (Shapefile, GeoPackage): + +```bash +curl -X POST http://localhost:8000/submit_job \ + -H "X-API-Key: your-token" \ + -H "Content-Type: application/json" \ + -d '{ + "model_name": "coastal-v1", + "input_dataset": "s3://dissmodel-inputs/ilha_maranhao.gpkg", + "column_map": { + "elevation": "DEM_meters", + "land_use": "LULC_2023" + } + }' +``` + +### `band_map` (Raster Data) + +Mapping for GeoTIFFs where bands are identified by index (1-based) or name: + +```bash +curl -X POST http://localhost:8000/submit_job \ + -H "X-API-Key: your-token" \ + -H "Content-Type: application/json" \ + -d '{ + "model_name": "coastal-v1", + "input_dataset": "s3://dissmodel-inputs/maranhao_srtm.tif", + "band_map": { + "elevation": 1, + "land_use": 2 + } + }' +``` + +## When to Omit Mapping + +If the dataset already uses the canonical names (e.g., it was generated by another DisSModel simulation or follows the project's standard naming convention), you can omit the `column_map`/`band_map` in the request. The platform will fall back to the defaults defined in the TOML spec. + +## Validation + +The `ModelExecutor.validate()` method is responsible for checking if the mapping is correct before the simulation starts. + +```python +# Extracted from a typical RasterModelExecutor.validate() +def validate(self, data, record): + expected_bands = record.resolved_spec["model"]["bands"].keys() + mapped_bands = record.band_map.values() + + for band in mapped_bands: + if band not in data.data_vars: + raise ValueError(f"Band '{band}' not found in input dataset.") +``` + +!!! tip "Automatic Catch" + By validating early, the platform prevents wasting computation time on a job that would eventually fail due to a missing column or band. diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..f25865a --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,35 @@ +site_name: DisSModel Platform Documentation +site_description: Documentation for the DisSModel Platform API and Worker system. +theme: + name: material + palette: + primary: indigo + accent: indigo + features: + - navigation.tabs + - content.code.copy + +plugins: + - search + +nav: + - Home: index.md + - Platform: + - Overview: platform/index.md + - ExperimentRecord: platform/experiment-record.md + - Model Registry: platform/registry.md + - ModelExecutor: platform/executor.md + - Variable Mapping: platform/variable-mapping.md + - API Reference: platform/api-reference.md + - Data Management: platform/data-management.md + - Executor Test Harness: platform/executor-harness.md + - Security & Auth: platform/security.md + - Researcher Guide: platform/researcher-guide.md + +markdown_extensions: + - admonition + - pymdownx.details + - pymdownx.superfences + - pymdownx.tabbed: + alternate_style: true + - attr_list From 59c20a42e23ffd16d402bc5375a9a3d9ac2aae33 Mon Sep 17 00:00:00 2001 From: Sergio Souza Costa Date: Tue, 12 May 2026 16:58:17 -0300 Subject: [PATCH 2/4] update --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 230375a..af4cc18 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # Python +site configs __pycache__/ *.py[cod] From 748b0ee61f32e8907ed5f215b5ea2f34d7285650 Mon Sep 17 00:00:00 2001 From: Sergio Souza Costa Date: Sat, 13 Jun 2026 10:30:47 -0300 Subject: [PATCH 3/4] chore: housekeeping pre-release v0.1.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix all LambdaGeo → DisSModel org URLs in README and docs/deployment.md - Rename services/frontend/ → services/jupyter/ (reflects actual JupyterLab service) - Update docker-compose.yml, docker-compose.prod.yml and CI to reference new path - Remove || true from lint/typecheck/security CI jobs; fix underlying errors: - ruff: remove unused imports, move mid-file imports to module top (main.py) - mypy: add type: ignore[misc] for redis-py sync/async stub ambiguity (worker.py) - bandit: add nosec B104/B310 for intentional container bind and guarded urlretrieve - Add PYTHONPATH=$PWD/services to mypy CI step so worker imports resolve - Add CHANGELOG.md for v0.1.0 Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/ci.yml | 12 ++--- CHANGELOG.md | 45 +++++++++++++++++++ README.md | 10 ++--- docker-compose.prod.yml | 2 +- docker-compose.yml | 2 +- docs/deployment.md | 2 +- services/api/main.py | 18 +++----- services/{frontend => jupyter}/Dockerfile | 0 .../{frontend => jupyter}/jupyter_config.py | 0 .../{frontend => jupyter}/requirements.txt | 0 services/worker/storage.py | 3 +- services/worker/worker.py | 4 +- 12 files changed, 69 insertions(+), 29 deletions(-) create mode 100644 CHANGELOG.md rename services/{frontend => jupyter}/Dockerfile (100%) rename services/{frontend => jupyter}/jupyter_config.py (100%) rename services/{frontend => jupyter}/requirements.txt (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index baca9c7..31f09b9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: run: pip install ruff - name: Run Lint - run: ruff check services/api/ services/worker/ --exclude __pycache__,*.apagar,*.pyc || true + run: ruff check services/api/ services/worker/ --exclude __pycache__,*.apagar,*.pyc typecheck: @@ -52,7 +52,7 @@ jobs: - name: Run Mypy run: | # Focamos apenas na lógica da API e Worker da plataforma - mypy --ignore-missing-imports services/api/main.py services/worker/worker.py || true + PYTHONPATH=$PWD/services mypy --ignore-missing-imports services/api/main.py services/worker/worker.py security: @@ -70,7 +70,7 @@ jobs: run: pip install bandit - name: Run Bandit - run: bandit -r services/api/ services/worker/ -ll -ii -f txt -o bandit-report.txt || true + run: bandit -r services/api/ services/worker/ -ll -ii -f txt -o bandit-report.txt - name: Upload Bandit Report uses: actions/upload-artifact@v4 @@ -193,11 +193,11 @@ jobs: cache-from: type=gha cache-to: type=gha,mode=max - - name: Build Frontend + - name: Build Jupyter uses: docker/build-push-action@v5 with: - context: ./services/frontend - file: ./services/frontend/Dockerfile + context: ./services/jupyter + file: ./services/jupyter/Dockerfile push: false cache-from: type=gha cache-to: type=gha,mode=max diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..78f0e68 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,45 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.1.0] - 2026-06-13 + +First public release of the DisSModel Platform MVP. + +### Added + +- **REST API** (`services/api`) — FastAPI gateway with `X-API-Key` authentication applied to all routes; endpoints for job submission (async and inline), status polling, reproduction, publishing, model listing, file upload/download, and admin sync. +- **Worker** (`services/worker`) — Redis-queue consumer that delegates execution to `dissmodel.executor.runner.execute_lifecycle`; publishes profiling metrics and `ExperimentRecord` JSON to MinIO. +- **JupyterLab** (`services/jupyter`) — Containerised notebook environment (port 8888) with `dissmodel`, `ipyleaflet`, `ipywidgets`, and `folium` pre-installed. +- **Streamlit CA Explorer** (`services/streamlit-ca`) — Interactive explorer for Cellular Automata models. +- **Streamlit SysDyn Explorer** (`services/streamlit-sysdyn`) — Interactive explorer for System Dynamics models. +- **Nginx reverse proxy** (`services/nginx`) — Routes `/dissmodel/jupyter`, `/dissmodel/api`, `/dissmodel/minio` in production. +- **Docker Compose** — Development (`docker-compose.yml`) and production (`docker-compose.prod.yml`) stacks with Redis, MinIO, config-sync sidecar, and all services. +- **CI pipeline** (`.github/workflows/ci.yml`) — Lint (ruff), type-check (mypy), security scan (bandit), API tests, worker executor validation, and Docker build jobs; all gates are enforced (no `|| true` bypasses). +- **Dependabot** — Automated dependency updates for `services/api`, `services/worker`, and `services/jupyter`. +- **Presigned URL generation** — Local HMAC signing for MinIO download links without extra network round-trips. +- **Config-sync sidecar** — Git-backed model registry auto-pulled into all services at runtime. +- **Executor contract validation** (`scripts/validate_executors.py`) — CI step that checks registered executors comply with the dissmodel interface before tests run. + +### Fixed + +- Moved mid-file imports (`hmac`, `hashlib`, `urllib.parse`, `datetime.timezone`) to module top in `services/api/main.py`. +- Removed unused imports (`json`, `timedelta`, `S3Error`, `start_sync_scheduler`, `reproduce_experiment`, `run_experiment`) from `services/api/main.py` and `services/worker/storage.py`. +- Added `# type: ignore[misc]` for redis-py sync/async stub ambiguity in `services/worker/worker.py`. +- Added `# nosec B104` and `# nosec B310` for intentional false positives in bandit scan. + +### Changed + +- Renamed `services/frontend/` → `services/jupyter/` to reflect that the service is JupyterLab, not a generic web frontend. +- Updated all repository URLs from `LambdaGeo/dissmodel-platform` → `DisSModel/dissmodel-platform` in `README.md` and `docs/deployment.md`. +- Updated core library link from `LambdaGeo/dissmodel` → `DisSModel/dissmodel`. +- Updated organisation name from `LambdaGeo / INPE` → `DisSModel / INPE` in `README.md` contact section. +- `typecheck` CI job now sets `PYTHONPATH=$PWD/services` so worker imports resolve correctly without stubs. + +[Unreleased]: https://github.com/DisSModel/dissmodel-platform/compare/v0.1.0...HEAD +[0.1.0]: https://github.com/DisSModel/dissmodel-platform/releases/tag/v0.1.0 diff --git a/README.md b/README.md index 6645b05..52a2d05 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ An integrated environment for developing and running geospatial models, featurin ```bash # 1. Clone the repository -git clone https://github.com/LambdaGeo/dissmodel-platform.git +git clone https://github.com/DisSModel/dissmodel-platform.git cd dissmodel-platform # 2. Configure environment variables @@ -175,13 +175,13 @@ MIT License — see [LICENSE](LICENSE) ## 🙏 Acknowledgements -- [DisSModel](https://github.com/LambdaGeo/dissmodel) — Core modelling library +- [DisSModel](https://github.com/DisSModel/dissmodel) — Core modelling library - [Jupyter Project](https://jupyter.org/) — Development environment - [MinIO](https://min.io/) — S3-compatible object storage - [Pangeo](https://pangeo.io/) — Inspiration for cloud-native architecture ## 📞 Contact -- **Organisation:** LambdaGeo / INPE -- **Issues:** https://github.com/LambdaGeo/dissmodel-platform/issues -- **Discussions:** https://github.com/LambdaGeo/dissmodel-platform/discussions +- **Organisation:** DisSModel / INPE +- **Issues:** https://github.com/DisSModel/dissmodel-platform/issues +- **Discussions:** https://github.com/DisSModel/dissmodel-platform/discussions diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 5071523..ac6d5a1 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -19,7 +19,7 @@ services: #sudo chmod -R 775 ./workspace jupyter: build: - context: ./services/frontend + context: ./services/jupyter dockerfile: Dockerfile container_name: dissmodel-jupyter restart: unless-stopped diff --git a/docker-compose.yml b/docker-compose.yml index be6c280..5995613 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -27,7 +27,7 @@ services: jupyter: build: - context: ./services/frontend + context: ./services/jupyter dockerfile: Dockerfile container_name: dissmodel-jupyter restart: unless-stopped diff --git a/docs/deployment.md b/docs/deployment.md index b3d14d6..2d6caad 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -11,7 +11,7 @@ ```bash # Clonar -git clone https://github.com/LambdaGeo/dissmodel-platform.git +git clone https://github.com/DisSModel/dissmodel-platform.git cd dissmodel-platform # Configurar diff --git a/services/api/main.py b/services/api/main.py index 1ae1f4a..4d3a4e8 100644 --- a/services/api/main.py +++ b/services/api/main.py @@ -1,13 +1,15 @@ # services/api/main.py from __future__ import annotations +import hashlib +import hmac import io -import json import logging import os from contextlib import asynccontextmanager -from datetime import datetime, timedelta +from datetime import datetime, timezone from typing import Optional +from urllib.parse import quote, urlencode import redis from fastapi import Depends, FastAPI, File, Form, HTTPException, UploadFile @@ -15,10 +17,9 @@ from fastapi.security import APIKeyHeader from minio import Minio -from minio.error import S3Error -from worker.api_registry import list_models, load_model_spec, start_sync_scheduler, sync_configs -from worker.runner import build_record, build_record_inline, reproduce_experiment, run_experiment +from worker.api_registry import list_models, load_model_spec, sync_configs +from worker.runner import build_record, build_record_inline from dissmodel.executor.schemas import ExperimentRecord, InlineJobRequest, JobRequest, JobResponse # ── Logging ─────────────────────────────────────────────────────────────────── @@ -308,11 +309,6 @@ async def upload_dataset( } -import hmac -import hashlib -from urllib.parse import urlencode, quote -from datetime import timezone - def _presign_url(bucket: str, key: str, expires_seconds: int = 3600) -> str: """Gera presigned URL sem conexão de rede — cálculo local puro.""" server_url = os.getenv("MINIO_URL", "http://localhost:19000").rstrip("/") @@ -399,4 +395,4 @@ async def general_exception_handler(request, exc): if __name__ == "__main__": import uvicorn - uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file + uvicorn.run(app, host="0.0.0.0", port=8000) # nosec B104 \ No newline at end of file diff --git a/services/frontend/Dockerfile b/services/jupyter/Dockerfile similarity index 100% rename from services/frontend/Dockerfile rename to services/jupyter/Dockerfile diff --git a/services/frontend/jupyter_config.py b/services/jupyter/jupyter_config.py similarity index 100% rename from services/frontend/jupyter_config.py rename to services/jupyter/jupyter_config.py diff --git a/services/frontend/requirements.txt b/services/jupyter/requirements.txt similarity index 100% rename from services/frontend/requirements.txt rename to services/jupyter/requirements.txt diff --git a/services/worker/storage.py b/services/worker/storage.py index efc4fec..9d7b3ef 100644 --- a/services/worker/storage.py +++ b/services/worker/storage.py @@ -6,7 +6,6 @@ import os from minio import Minio -from minio.error import S3Error # ── Client ──────────────────────────────────────────────────────────────────── @@ -35,7 +34,7 @@ def download_to_file(uri: str, dest: str) -> str: if uri.startswith("http://") or uri.startswith("https://"): import urllib.request - urllib.request.urlretrieve(uri, dest) + urllib.request.urlretrieve(uri, dest) # nosec B310 return dest return uri # local path — return as-is diff --git a/services/worker/worker.py b/services/worker/worker.py index 1e5d320..3dd08da 100644 --- a/services/worker/worker.py +++ b/services/worker/worker.py @@ -86,10 +86,10 @@ def main() -> None: while True: try: # brpop blocks up to 5s and respects queue priority order - result = redis_client.brpop(QUEUES, timeout=5) + result = redis_client.brpop(QUEUES, timeout=5) # type: ignore[misc] if result: - _, experiment_id = result + _, experiment_id = result # type: ignore[misc] process_job(experiment_id) except KeyboardInterrupt: From b9613b95fb0aac9042ae8c9641ec5bba67864f27 Mon Sep 17 00:00:00 2001 From: Sergio Souza Costa Date: Sat, 13 Jun 2026 10:57:37 -0300 Subject: [PATCH 4/4] chore: reactivate dependabot with monthly schedule and add services/jupyter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Renamed dependabot.yml.disabled → dependabot.yml - Changed all pip and github-actions intervals: weekly → monthly - Added /services/jupyter to pip coverage Co-Authored-By: Claude Sonnet 4.6 --- .github/dependabot.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..57cdb58 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,29 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" + commit-message: + prefix: "chore(deps):" + + - package-ecosystem: "pip" + directory: "/services/api" + schedule: + interval: "monthly" + commit-message: + prefix: "chore(deps):" + + - package-ecosystem: "pip" + directory: "/services/worker" + schedule: + interval: "monthly" + commit-message: + prefix: "chore(deps):" + + - package-ecosystem: "pip" + directory: "/services/jupyter" + schedule: + interval: "monthly" + commit-message: + prefix: "chore(deps):"