From 36d62797552c44929c06b21281435de049e40a6c Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 15:09:25 -0700 Subject: [PATCH 01/40] chore: scaffold empty hm-pipeline-ir crate --- Cargo.lock | 9 +++++++++ Cargo.toml | 3 +++ crates/hm-pipeline-ir/Cargo.toml | 18 ++++++++++++++++++ crates/hm-pipeline-ir/src/lib.rs | 10 ++++++++++ 4 files changed, 40 insertions(+) create mode 100644 crates/hm-pipeline-ir/Cargo.toml create mode 100644 crates/hm-pipeline-ir/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 43ee1aa..3a59db5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1630,6 +1630,15 @@ dependencies = [ "serde_json", ] +[[package]] +name = "hm-pipeline-ir" +version = "0.0.0-dev" +dependencies = [ + "schemars 0.8.22", + "serde", + "serde_json", +] + [[package]] name = "hm-plugin-cloud" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index f49f751..6a05d9e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ resolver = "2" members = [ "crates/hm", "crates/hm-plugin-protocol", + "crates/hm-pipeline-ir", "crates/hm-plugin-sdk", "crates/hm-plugin-docker", "crates/hm-plugin-output-human", @@ -13,6 +14,7 @@ members = [ default-members = [ "crates/hm", "crates/hm-plugin-protocol", + "crates/hm-pipeline-ir", "crates/hm-plugin-sdk", ] @@ -23,6 +25,7 @@ repository = "https://github.com/harmont-dev/harmont-cli" [workspace.dependencies] hm-plugin-protocol = { path = "crates/hm-plugin-protocol", version = "0.0.0-dev" } +hm-pipeline-ir = { path = "crates/hm-pipeline-ir", version = "0.0.0-dev" } hm-plugin-sdk = { path = "crates/hm-plugin-sdk", version = "0.0.0-dev" } serde = { version = "1", features = ["derive"] } serde_json = "1" diff --git a/crates/hm-pipeline-ir/Cargo.toml b/crates/hm-pipeline-ir/Cargo.toml new file mode 100644 index 0000000..ac37361 --- /dev/null +++ b/crates/hm-pipeline-ir/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "hm-pipeline-ir" +version = "0.0.0-dev" +edition.workspace = true +license.workspace = true +repository.workspace = true +description = "Pipeline IR — the v0 wire-format schema consumed by hm." + +[dependencies] +serde = { workspace = true } +serde_json = { workspace = true } +schemars = { workspace = true } + +[dev-dependencies] +serde_json = { workspace = true } + +[lints] +workspace = true diff --git a/crates/hm-pipeline-ir/src/lib.rs b/crates/hm-pipeline-ir/src/lib.rs new file mode 100644 index 0000000..5e406df --- /dev/null +++ b/crates/hm-pipeline-ir/src/lib.rs @@ -0,0 +1,10 @@ +//! Pipeline IR, the v0 wire format consumed by the `hm` binary. +//! +//! Source of truth lives in two other places that must stay in sync +//! with this file: `harmont-pipeline/src/Harmont/Pipeline/Schema.hs` +//! (Haskell mirror) and `cidsl/py/harmont/__init__.py` (Python emitter). +//! Changing a field name here means changing it in both other places +//! in the same PR. + +#![forbid(unsafe_code)] +#![allow(clippy::multiple_crate_versions, clippy::cargo_common_metadata)] From 4b41b1bfa45c3141986cbaafb335046a0bf0666b Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 15:11:10 -0700 Subject: [PATCH 02/40] refactor: move pipeline IR types to hm-pipeline-ir crate --- Cargo.lock | 1 + crates/hm-pipeline-ir/src/lib.rs | 65 +++++++++++++++ crates/hm-plugin-protocol/Cargo.toml | 1 + crates/hm-plugin-protocol/src/ir.rs | 115 +-------------------------- 4 files changed, 71 insertions(+), 111 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3a59db5..2d64248 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1701,6 +1701,7 @@ name = "hm-plugin-protocol" version = "0.0.0-dev" dependencies = [ "chrono", + "hm-pipeline-ir", "insta", "schemars 0.8.22", "semver", diff --git a/crates/hm-pipeline-ir/src/lib.rs b/crates/hm-pipeline-ir/src/lib.rs index 5e406df..03cbd11 100644 --- a/crates/hm-pipeline-ir/src/lib.rs +++ b/crates/hm-pipeline-ir/src/lib.rs @@ -8,3 +8,68 @@ #![forbid(unsafe_code)] #![allow(clippy::multiple_crate_versions, clippy::cargo_common_metadata)] + +use std::collections::BTreeMap; + +use schemars::JsonSchema as DeriveJsonSchema; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] +pub struct Pipeline { + /// Must equal `"0"` — bumping this is reserved for breaking + /// schema changes, none of which are scheduled. The v0 schema + /// gains optional fields in-place (see `runner` below). + pub version: String, + #[serde(default)] + pub env: Option>, + #[serde(default)] + pub default_image: Option, + pub steps: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum Step { + Command(Box), + Wait(WaitStep), +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] +pub struct CommandStep { + pub key: String, + #[serde(default)] + pub label: Option, + pub cmd: String, + #[serde(default)] + pub builds_in: Option, + #[serde(default)] + pub image: Option, + #[serde(default)] + pub env: Option>, + #[serde(default)] + pub timeout_seconds: Option, + #[serde(default)] + pub cache: Option, + /// Names the step-executor plugin that should run this step. + /// `None` ⇒ the default executor handles it (Docker, in the + /// shipped configuration). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub runner: Option, + /// Plugin-specific extra fields. Validated by the executor + /// plugin's `StepExecutorSpec::step_schema` if it set one. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub runner_args: Option, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] +pub struct WaitStep { + #[serde(default)] + pub continue_on_failure: bool, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] +pub struct Cache { + pub policy: String, + #[serde(default)] + pub key: Option, +} diff --git a/crates/hm-plugin-protocol/Cargo.toml b/crates/hm-plugin-protocol/Cargo.toml index ec74a4e..f8e92f8 100644 --- a/crates/hm-plugin-protocol/Cargo.toml +++ b/crates/hm-plugin-protocol/Cargo.toml @@ -7,6 +7,7 @@ repository.workspace = true description = "Wire-level data types shared between the hm binary and hm plugins." [dependencies] +hm-pipeline-ir = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } schemars = { workspace = true } diff --git a/crates/hm-plugin-protocol/src/ir.rs b/crates/hm-plugin-protocol/src/ir.rs index 8446ba9..44c1e3d 100644 --- a/crates/hm-plugin-protocol/src/ir.rs +++ b/crates/hm-plugin-protocol/src/ir.rs @@ -1,112 +1,5 @@ -//! Pipeline IR, the v0 wire format consumed by the `hm` binary. -//! -//! Source of truth lives in two other places that must stay in sync -//! with this file: `harmont-pipeline/src/Harmont/Pipeline/Schema.hs` -//! (Haskell mirror) and `cidsl/py/harmont/__init__.py` (Python emitter). -//! Changing a field name here means changing it in both other places -//! in the same PR. +//! Re-exports from `hm_pipeline_ir`. The canonical definitions now +//! live in the `hm-pipeline-ir` crate; this module keeps the +//! `hm_plugin_protocol::ir::*` import path working. -use std::collections::BTreeMap; - -use schemars::JsonSchema as DeriveJsonSchema; -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] -pub struct Pipeline { - /// Must equal `"0"` — bumping this is reserved for breaking - /// schema changes, none of which are scheduled. The v0 schema - /// gains optional fields in-place (see `runner` below). - pub version: String, - #[serde(default)] - pub env: Option>, - #[serde(default)] - pub default_image: Option, - pub steps: Vec, -} - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum Step { - Command(Box), - Wait(WaitStep), -} - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] -pub struct CommandStep { - pub key: String, - #[serde(default)] - pub label: Option, - pub cmd: String, - #[serde(default)] - pub builds_in: Option, - #[serde(default)] - pub image: Option, - #[serde(default)] - pub env: Option>, - #[serde(default)] - pub timeout_seconds: Option, - #[serde(default)] - pub cache: Option, - - /// Names the step-executor plugin that should run this step. - /// `None` ⇒ the default executor handles it (Docker, in the - /// shipped configuration). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub runner: Option, - - /// Plugin-specific extra fields. Validated by the executor - /// plugin's `StepExecutorSpec::step_schema` if it set one. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub runner_args: Option, -} - -#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] -pub struct WaitStep { - #[serde(default)] - pub continue_on_failure: bool, -} - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] -pub struct Cache { - pub policy: String, - #[serde(default)] - pub key: Option, -} - -#[cfg(test)] -#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] -mod tests { - use super::*; - - #[test] - fn parses_step_with_runner() { - let json = br#"{ - "version": "0", - "steps": [ - {"type": "command", "key": "a", "cmd": "echo a"}, - {"type": "command", "key": "b", "cmd": "freestyle run", - "runner": "freestyle", "runner_args": {"region": "us"}} - ] - }"#; - let p: Pipeline = serde_json::from_slice(json).unwrap(); - let Step::Command(b) = &p.steps[1] else { - panic!("expected command") - }; - assert_eq!(b.runner.as_deref(), Some("freestyle")); - assert_eq!(b.runner_args.as_ref().unwrap()["region"], "us"); - } - - #[test] - fn parses_legacy_step_without_runner() { - let json = br#"{ - "version": "0", - "steps": [{"type": "command", "key": "a", "cmd": "echo a"}] - }"#; - let p: Pipeline = serde_json::from_slice(json).unwrap(); - let Step::Command(a) = &p.steps[0] else { - panic!("expected command") - }; - assert!(a.runner.is_none()); - assert!(a.runner_args.is_none()); - } -} +pub use hm_pipeline_ir::{Cache, CommandStep, Pipeline, Step, WaitStep}; From b987ee772affad37e387ee5f838b2e1e60fa01e0 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 15:14:11 -0700 Subject: [PATCH 03/40] test: add pipeline IR parsing tests to hm-pipeline-ir --- crates/hm-pipeline-ir/tests/parse_pipeline.rs | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 crates/hm-pipeline-ir/tests/parse_pipeline.rs diff --git a/crates/hm-pipeline-ir/tests/parse_pipeline.rs b/crates/hm-pipeline-ir/tests/parse_pipeline.rs new file mode 100644 index 0000000..3082e49 --- /dev/null +++ b/crates/hm-pipeline-ir/tests/parse_pipeline.rs @@ -0,0 +1,41 @@ +#![allow( + clippy::cargo_common_metadata, + clippy::multiple_crate_versions, + clippy::unwrap_used, + clippy::expect_used, + clippy::panic +)] + +use hm_pipeline_ir::{Pipeline, Step}; + +#[test] +fn parses_step_with_runner() { + let json = br#"{ + "version": "0", + "steps": [ + {"type": "command", "key": "a", "cmd": "echo a"}, + {"type": "command", "key": "b", "cmd": "freestyle run", + "runner": "freestyle", "runner_args": {"region": "us"}} + ] + }"#; + let p: Pipeline = serde_json::from_slice(json).unwrap(); + let Step::Command(b) = &p.steps[1] else { + panic!("expected command") + }; + assert_eq!(b.runner.as_deref(), Some("freestyle")); + assert_eq!(b.runner_args.as_ref().unwrap()["region"], "us"); +} + +#[test] +fn parses_legacy_step_without_runner() { + let json = br#"{ + "version": "0", + "steps": [{"type": "command", "key": "a", "cmd": "echo a"}] + }"#; + let p: Pipeline = serde_json::from_slice(json).unwrap(); + let Step::Command(a) = &p.steps[0] else { + panic!("expected command") + }; + assert!(a.runner.is_none()); + assert!(a.runner_args.is_none()); +} From 6c4cca2e3fd0b58e66850718280168598c0ba42d Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 15:15:37 -0700 Subject: [PATCH 04/40] test: add pipeline IR schema snapshot test --- Cargo.lock | 1 + crates/hm-pipeline-ir/Cargo.toml | 2 + .../hm-pipeline-ir/tests/schema_snapshot.rs | 16 ++ .../snapshots/schema_snapshot__pipeline.snap | 168 ++++++++++++++++++ 4 files changed, 187 insertions(+) create mode 100644 crates/hm-pipeline-ir/tests/schema_snapshot.rs create mode 100644 crates/hm-pipeline-ir/tests/snapshots/schema_snapshot__pipeline.snap diff --git a/Cargo.lock b/Cargo.lock index 2d64248..d86a6b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1634,6 +1634,7 @@ dependencies = [ name = "hm-pipeline-ir" version = "0.0.0-dev" dependencies = [ + "insta", "schemars 0.8.22", "serde", "serde_json", diff --git a/crates/hm-pipeline-ir/Cargo.toml b/crates/hm-pipeline-ir/Cargo.toml index ac37361..086ad1e 100644 --- a/crates/hm-pipeline-ir/Cargo.toml +++ b/crates/hm-pipeline-ir/Cargo.toml @@ -12,6 +12,8 @@ serde_json = { workspace = true } schemars = { workspace = true } [dev-dependencies] +insta = { version = "1", features = ["json"] } +schemars = { workspace = true } serde_json = { workspace = true } [lints] diff --git a/crates/hm-pipeline-ir/tests/schema_snapshot.rs b/crates/hm-pipeline-ir/tests/schema_snapshot.rs new file mode 100644 index 0000000..421d672 --- /dev/null +++ b/crates/hm-pipeline-ir/tests/schema_snapshot.rs @@ -0,0 +1,16 @@ +#![allow( + clippy::cargo_common_metadata, + clippy::multiple_crate_versions, + clippy::unwrap_used, + clippy::expect_used, + clippy::panic +)] + +use hm_pipeline_ir::Pipeline; +use schemars::schema_for; + +#[test] +fn pipeline_schema_is_stable() { + let schema = schema_for!(Pipeline); + insta::assert_json_snapshot!("pipeline", schema); +} diff --git a/crates/hm-pipeline-ir/tests/snapshots/schema_snapshot__pipeline.snap b/crates/hm-pipeline-ir/tests/snapshots/schema_snapshot__pipeline.snap new file mode 100644 index 0000000..8bb7cfe --- /dev/null +++ b/crates/hm-pipeline-ir/tests/snapshots/schema_snapshot__pipeline.snap @@ -0,0 +1,168 @@ +--- +source: crates/hm-pipeline-ir/tests/schema_snapshot.rs +assertion_line: 15 +expression: schema +--- +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Pipeline", + "type": "object", + "required": [ + "steps", + "version" + ], + "properties": { + "version": { + "description": "Must equal `\"0\"` — bumping this is reserved for breaking schema changes, none of which are scheduled. The v0 schema gains optional fields in-place (see `runner` below).", + "type": "string" + }, + "env": { + "default": null, + "type": [ + "object", + "null" + ], + "additionalProperties": { + "type": "string" + } + }, + "default_image": { + "default": null, + "type": [ + "string", + "null" + ] + }, + "steps": { + "type": "array", + "items": { + "$ref": "#/definitions/Step" + } + } + }, + "definitions": { + "Step": { + "oneOf": [ + { + "type": "object", + "required": [ + "cmd", + "key", + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "command" + ] + }, + "key": { + "type": "string" + }, + "label": { + "default": null, + "type": [ + "string", + "null" + ] + }, + "cmd": { + "type": "string" + }, + "builds_in": { + "default": null, + "type": [ + "string", + "null" + ] + }, + "image": { + "default": null, + "type": [ + "string", + "null" + ] + }, + "env": { + "default": null, + "type": [ + "object", + "null" + ], + "additionalProperties": { + "type": "string" + } + }, + "timeout_seconds": { + "default": null, + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + }, + "cache": { + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/Cache" + }, + { + "type": "null" + } + ] + }, + "runner": { + "description": "Names the step-executor plugin that should run this step. `None` ⇒ the default executor handles it (Docker, in the shipped configuration).", + "type": [ + "string", + "null" + ] + }, + "runner_args": { + "description": "Plugin-specific extra fields. Validated by the executor plugin's `StepExecutorSpec::step_schema` if it set one." + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "wait" + ] + }, + "continue_on_failure": { + "default": false, + "type": "boolean" + } + } + } + ] + }, + "Cache": { + "type": "object", + "required": [ + "policy" + ], + "properties": { + "policy": { + "type": "string" + }, + "key": { + "default": null, + "type": [ + "string", + "null" + ] + } + } + } + } +} From eb3cee7132b722453b832821c341e13d02257d88 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 15:16:00 -0700 Subject: [PATCH 05/40] docs: add hm-pipeline-ir to CLAUDE.md crate listing --- CLAUDE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CLAUDE.md b/CLAUDE.md index 0ab594b..12fd66e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,6 +1,7 @@ The `cli/` directory is a Cargo workspace. - `crates/hm/` — the `hm` binary (today's CLI body). +- `crates/hm-pipeline-ir/` — pipeline IR schema (serde structs only, no runtime). - `crates/hm-plugin-protocol/` — wire types (serde structs only). - `crates/hm-plugin-sdk/` — authoring SDK for plugin writers. - `crates/hm-fixtures/` — test-only WASM plugins; compiled to From 591882b87ac04d7c9e298ba09ead0de6f51d9f4b Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 15:32:07 -0700 Subject: [PATCH 06/40] Squashed 'dsls/harmont-py/' content from commit 2d47999 git-subtree-dir: dsls/harmont-py git-subtree-split: 2d47999cfbd5bffd1f7fd7894302791117ef1edb --- .github/workflows/ci.yml | 40 ++++ .github/workflows/release.yml | 60 ++++++ .gitignore | 13 ++ CLAUDE.md | 266 +++++++++++++++++++++++ LICENSE | 21 ++ README.md | 168 +++++++++++++++ RELEASING.md | 110 ++++++++++ harmont/__init__.py | 173 +++++++++++++++ harmont/_decorator.py | 68 ++++++ harmont/_deploy.py | 189 +++++++++++++++++ harmont/_deps.py | 203 ++++++++++++++++++ harmont/_envelope.py | 100 +++++++++ harmont/_keys.py | 121 +++++++++++ harmont/_registry.py | 44 ++++ harmont/_step.py | 99 +++++++++ harmont/_target.py | 104 +++++++++ harmont/_toolchain.py | 79 +++++++ harmont/_typing.py | 118 +++++++++++ harmont/_unwrap.py | 56 +++++ harmont/_validation.py | 11 + harmont/cache.py | 80 +++++++ harmont/cmake.py | 127 +++++++++++ harmont/composer.py | 109 ++++++++++ harmont/dev/__init__.py | 19 ++ harmont/dev/__main__.py | 71 +++++++ harmont/dev/_deployment.py | 47 ++++ harmont/dev/_factory.py | 153 +++++++++++++ harmont/dev/_port.py | 37 ++++ harmont/dev/_registry_dump.py | 100 +++++++++ harmont/dotnet.py | 116 ++++++++++ harmont/elm.py | 143 +++++++++++++ harmont/go.py | 117 ++++++++++ harmont/gradle.py | 137 ++++++++++++ harmont/haskell.py | 257 ++++++++++++++++++++++ harmont/json_emit.py | 69 ++++++ harmont/keygen.py | 156 ++++++++++++++ harmont/npm.py | 118 +++++++++++ harmont/ocaml.py | 145 +++++++++++++ harmont/perl.py | 86 ++++++++ harmont/pipeline.py | 172 +++++++++++++++ harmont/python.py | 141 ++++++++++++ harmont/ruby.py | 108 ++++++++++ harmont/rust.py | 139 ++++++++++++ harmont/triggers.py | 135 ++++++++++++ harmont/types.py | 12 ++ harmont/zig.py | 172 +++++++++++++++ pyproject.toml | 88 ++++++++ tests/__init__.py | 0 tests/conftest.py | 20 ++ tests/dev/__init__.py | 0 tests/dev/conftest.py | 20 ++ tests/dev/test_canonical_example.py | 47 ++++ tests/dev/test_decorator.py | 98 +++++++++ tests/dev/test_dep_marker.py | 42 ++++ tests/dev/test_deploy_factory.py | 77 +++++++ tests/dev/test_dump_cli.py | 60 ++++++ tests/dev/test_local_deployment.py | 78 +++++++ tests/dev/test_port_sentinel.py | 22 ++ tests/dev/test_registry_dump.py | 93 ++++++++ tests/dev/test_topo.py | 63 ++++++ tests/examples_render_conftest.py | 71 +++++++ tests/test_cache.py | 73 +++++++ tests/test_cmake.py | 68 ++++++ tests/test_composer.py | 76 +++++++ tests/test_decorator.py | 103 +++++++++ tests/test_deps.py | 80 +++++++ tests/test_dotnet.py | 78 +++++++ tests/test_elm.py | 133 ++++++++++++ tests/test_envelope.py | 186 ++++++++++++++++ tests/test_examples_render.py | 72 +++++++ tests/test_go.py | 91 ++++++++ tests/test_gradle.py | 78 +++++++ tests/test_har_28_example.py | 84 ++++++++ tests/test_haskell.py | 183 ++++++++++++++++ tests/test_haskell_cabal_alias.py | 35 +++ tests/test_json_emit.py | 192 +++++++++++++++++ tests/test_keygen.py | 318 ++++++++++++++++++++++++++++ tests/test_keys.py | 97 +++++++++ tests/test_npm.py | 117 ++++++++++ tests/test_ocaml.py | 69 ++++++ tests/test_perl.py | 62 ++++++ tests/test_pipeline.py | 36 ++++ tests/test_pipeline_fixtures.py | 83 ++++++++ tests/test_pipeline_lowering.py | 124 +++++++++++ tests/test_python.py | 133 ++++++++++++ tests/test_registry.py | 81 +++++++ tests/test_ruby.py | 74 +++++++ tests/test_rust.py | 168 +++++++++++++++ tests/test_sh_shorthand.py | 44 ++++ tests/test_step_chain.py | 90 ++++++++ tests/test_step_sh.py | 86 ++++++++ tests/test_strict_signature.py | 129 +++++++++++ tests/test_target.py | 109 ++++++++++ tests/test_target_cross_module.py | 73 +++++++ tests/test_target_fixtures.py | 154 ++++++++++++++ tests/test_target_unwrap.py | 79 +++++++ tests/test_toolchain.py | 108 ++++++++++ tests/test_toolchain_compose.py | 82 +++++++ tests/test_triggers.py | 76 +++++++ tests/test_typing_markers.py | 74 +++++++ tests/test_validation.py | 30 +++ tests/test_zig.py | 67 ++++++ tests/test_zig_toolchain.py | 81 +++++++ 103 files changed, 9964 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/release.yml create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 RELEASING.md create mode 100644 harmont/__init__.py create mode 100644 harmont/_decorator.py create mode 100644 harmont/_deploy.py create mode 100644 harmont/_deps.py create mode 100644 harmont/_envelope.py create mode 100644 harmont/_keys.py create mode 100644 harmont/_registry.py create mode 100644 harmont/_step.py create mode 100644 harmont/_target.py create mode 100644 harmont/_toolchain.py create mode 100644 harmont/_typing.py create mode 100644 harmont/_unwrap.py create mode 100644 harmont/_validation.py create mode 100644 harmont/cache.py create mode 100644 harmont/cmake.py create mode 100644 harmont/composer.py create mode 100644 harmont/dev/__init__.py create mode 100644 harmont/dev/__main__.py create mode 100644 harmont/dev/_deployment.py create mode 100644 harmont/dev/_factory.py create mode 100644 harmont/dev/_port.py create mode 100644 harmont/dev/_registry_dump.py create mode 100644 harmont/dotnet.py create mode 100644 harmont/elm.py create mode 100644 harmont/go.py create mode 100644 harmont/gradle.py create mode 100644 harmont/haskell.py create mode 100644 harmont/json_emit.py create mode 100644 harmont/keygen.py create mode 100644 harmont/npm.py create mode 100644 harmont/ocaml.py create mode 100644 harmont/perl.py create mode 100644 harmont/pipeline.py create mode 100644 harmont/python.py create mode 100644 harmont/ruby.py create mode 100644 harmont/rust.py create mode 100644 harmont/triggers.py create mode 100644 harmont/types.py create mode 100644 harmont/zig.py create mode 100644 pyproject.toml create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/dev/__init__.py create mode 100644 tests/dev/conftest.py create mode 100644 tests/dev/test_canonical_example.py create mode 100644 tests/dev/test_decorator.py create mode 100644 tests/dev/test_dep_marker.py create mode 100644 tests/dev/test_deploy_factory.py create mode 100644 tests/dev/test_dump_cli.py create mode 100644 tests/dev/test_local_deployment.py create mode 100644 tests/dev/test_port_sentinel.py create mode 100644 tests/dev/test_registry_dump.py create mode 100644 tests/dev/test_topo.py create mode 100644 tests/examples_render_conftest.py create mode 100644 tests/test_cache.py create mode 100644 tests/test_cmake.py create mode 100644 tests/test_composer.py create mode 100644 tests/test_decorator.py create mode 100644 tests/test_deps.py create mode 100644 tests/test_dotnet.py create mode 100644 tests/test_elm.py create mode 100644 tests/test_envelope.py create mode 100644 tests/test_examples_render.py create mode 100644 tests/test_go.py create mode 100644 tests/test_gradle.py create mode 100644 tests/test_har_28_example.py create mode 100644 tests/test_haskell.py create mode 100644 tests/test_haskell_cabal_alias.py create mode 100644 tests/test_json_emit.py create mode 100644 tests/test_keygen.py create mode 100644 tests/test_keys.py create mode 100644 tests/test_npm.py create mode 100644 tests/test_ocaml.py create mode 100644 tests/test_perl.py create mode 100644 tests/test_pipeline.py create mode 100644 tests/test_pipeline_fixtures.py create mode 100644 tests/test_pipeline_lowering.py create mode 100644 tests/test_python.py create mode 100644 tests/test_registry.py create mode 100644 tests/test_ruby.py create mode 100644 tests/test_rust.py create mode 100644 tests/test_sh_shorthand.py create mode 100644 tests/test_step_chain.py create mode 100644 tests/test_step_sh.py create mode 100644 tests/test_strict_signature.py create mode 100644 tests/test_target.py create mode 100644 tests/test_target_cross_module.py create mode 100644 tests/test_target_fixtures.py create mode 100644 tests/test_target_unwrap.py create mode 100644 tests/test_toolchain.py create mode 100644 tests/test_toolchain_compose.py create mode 100644 tests/test_triggers.py create mode 100644 tests/test_typing_markers.py create mode 100644 tests/test_validation.py create mode 100644 tests/test_zig.py create mode 100644 tests/test_zig_toolchain.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..b2b777c --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,40 @@ +name: CI + +on: + pull_request: + push: + branches: [main] + +permissions: + contents: read + +jobs: + test: + name: pytest + ruff + mypy + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: pip + + - name: Install harmont + dev extras + run: pip install -e '.[dev]' + + - name: ruff check + run: ruff check . + + - name: mypy + run: mypy harmont + + - name: pytest + run: | + pytest -v \ + --deselect tests/test_gradle.py \ + --deselect tests/test_haskell.py diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..2be6deb --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,60 @@ +name: Release + +on: + push: + tags: + - "v*" + +permissions: + contents: read + +jobs: + pypi-publish: + name: Publish to PyPI + runs-on: ubuntu-latest + environment: + # PyPI Trusted Publisher is scoped to this environment. Configure + # the matching publisher on https://pypi.org/manage/account/publishing/ + # before the first tag push (see RELEASING.md). + name: release + url: https://pypi.org/project/harmont/ + permissions: + # `id-token: write` is the OIDC switch that pypa/gh-action-pypi-publish + # uses to mint a short-lived token PyPI accepts in lieu of an API token. + id-token: write + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Set version from tag + run: | + VERSION="${GITHUB_REF_NAME#v}" + echo "VERSION=$VERSION" >> "$GITHUB_ENV" + # Sed only the first match so this is a no-op if pyproject is + # already at the tagged version (a re-run with a corrected tag, + # for instance, shouldn't double-edit). + sed -i '0,/version = "0.0.0-dev"/s//version = "'"$VERSION"'"/' pyproject.toml + grep -n "^version" pyproject.toml + + - name: Install build + run: python -m pip install --upgrade build + + - name: Build sdist and wheel + run: python -m build + + - name: Inspect dist + run: | + ls -la dist/ + # Fail fast if either artifact is missing. + test -f dist/harmont-${VERSION}.tar.gz + test -f dist/harmont-${VERSION}-py3-none-any.whl + + - name: Publish to PyPI via Trusted Publishing + uses: pypa/gh-action-pypi-publish@release/v1 + # No `with:` block needed — the action defaults to using OIDC + # against the project's configured Trusted Publisher when + # `id-token: write` is granted (above). It picks up dist/* by + # default. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2ca40f2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +__pycache__/ +*.pyc +*.pyo +.mypy_cache/ +.ruff_cache/ +.pytest_cache/ +.venv/ +venv/ +build/ +dist/ +*.egg-info/ +.coverage +htmlcov/ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..36848b8 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,266 @@ +# cidsl/py — Python chain DSL for Harmont pipelines + +> Read `PRINCIPLES.md` at the repo root before editing. Validation +> errors raised in `__post_init__` and from the lowering pass are +> user-facing to pipeline authors — keep them precise and fix-directed +> per § 5. + +A Python package that emits the v0 IR JSON for Harmont CI pipelines. +Runtime deps: `croniter` (HAR-9 schedule trigger validation). + +## How It Works + +`Step` is a frozen dataclass. `scratch()` returns a root `Step`; +`Step.sh(cmd, **kw)` returns a child carrying one shell command (use +`cwd="path"` to prepend `cd && ` to the command); +`Step.fork(label=None)` returns a passthrough used to brand a branch. +`hm.sh(cmd, **kw)` is shorthand for `scratch().sh(cmd, **kw)` — start a +chain in one call. The `pipeline(*leaves, env=None)` factory walks back +from each leaf via `parent`, topo-sorts, and emits the v0 IR as a +Python dict. `pipeline_to_json(p)` serializes that dict (resolving +cache keys first via `harmont.keygen`) to the wire-format JSON string. + +## Build & Test + +```bash +python3 -m venv .venv && source .venv/bin/activate +pip install -e '.[dev]' + +pytest # all tests +pytest -v --tb=short +``` + +## Public surface + +`hm.pipeline` is **polymorphic**. When called with positional +`Step` arguments it is the factory — returns the v0 IR dict. When +called with no positionals (or a string slug) it is the HAR-9 +**decorator**: it registers the wrapped function as a CI pipeline. + +Decorator form: + +```python +import harmont as hm + + +@hm.pipeline("default") +def default() -> hm.Step: + return hm.sh("echo hi", label="hi") +``` + +Factory form (used inside a pipeline definition that builds the dict +imperatively, and in unit tests): + +```python +hm.pipeline(hm.sh("echo hi"), default_image="alpine:3.20") +``` + +Stage 1 of rendering (`hm.dump_registry_json`) walks every +`.harmont/*.py`, imports each (which has the side effect of running +the decorators), assembles each registered pipeline via the factory, +and emits a `schema_version="1"` envelope keyed by slug, with each +pipeline's resolved v0 IR carried in the `definition` field. + +The full surface (all reachable through `hm.`): + +```python +pipeline(slug=None, *, name=None, triggers=(), allow_manual=True, + env=None, default_image=None) # decorator +pipeline(*leaves, env=None, default_image=None) # factory (v0 IR dict) +pipeline_to_json(p, **kw) # -> str (wire JSON) +dump_registry_json() # -> str (envelope JSON) + +target() # decorator: memoized building block + +sh(cmd, *, cwd=None, label=None, ...) # -> Step (= scratch().sh(cmd, ...)) +scratch() # -> Step (root) +Step.sh(cmd, *, cwd=None, ...) # -> Step +Step.fork(label=None) # -> Step +wait(*, continue_on_failure=False) # -> Step + +# trigger constructors (passed via `triggers=` on the decorator) +push(branch=..., tag=...) +pull_request(branches=..., types=...) +schedule(cron=...) + +# cache helpers +ttl(duration) | on_change(*paths) | forever(env_keys=()) | compose(*policies) + +# language toolchains (call to construct; bare-form actions also work) +haskell(ghc=..., cabal="latest") # -> HaskellToolchain (cabal package via .package(path)) +rust(path=..., version="stable") # -> RustToolchain +npm(path=..., version="20") # -> NpmProject +elm(path=..., elm_version="0.19.1") # -> ElmProject +python(path=..., uv_version="latest") # -> PythonToolchain (uv-based) +go(path=..., version="1.23.2") # -> GoToolchain +gradle(path=..., jdk="21", kotlin=False) # -> GradleProject (Java + Kotlin) +cmake(path=..., lang="c"|"cpp") # -> CMakeProject +dotnet(path=..., channel="8.0") # -> DotnetProject +ruby(path=..., version="default") # -> RubyProject +ocaml(path=..., compiler="5.1.1") # -> OCamlProject +zig(version="0.13.0") # -> ZigToolchain (zig project via .project(path)) +zig(path=..., version="0.13.0") # -> ZigProject (one-shot) +perl(path=...) # -> PerlProject +composer(path=..., laravel=False) # -> ComposerProject (PHP + Laravel) +``` + +`Step` is opaque — pipeline authors do not read its attributes. + +### Reusable targets (HAR-28) + +`@hm.target()` decorates a parameterless function and memoizes its +return value per envelope render. Targets are the composition unit: + +```python +@hm.target() +def apt_base() -> hm.Step: + return hm.sh("apt-get update").sh("apt-get install -y python3 python3-venv") + +@hm.target() +def api(): + return hm.haskell(ghc="9.6.7").cabal(path="api") + +@hm.pipeline("ci") +def ci() -> tuple[hm.Step, ...]: + return (apt_base().sh("./run-smoke"), api()) +``` + +`@hm.target()` functions may return `Step`, `tuple[Step, ...]`, +`HaskellPackage`, `ElmProject`, `NpmProject`, or `RustToolchain`. +When such a value reaches the pipeline assembler it is unwrapped to +its default leaf: + +| Type | Default leaf | +|------|--------------| +| `HaskellPackage` | `.build()` | +| `RustToolchain` | `.build()` | +| `NpmProject` | `.install()` (npm-ci leaf) | +| `ElmProject` | `.make("src/Main.elm")` | + +Authors who want a different default call the explicit action +(`.test()`, `.lint()`, etc.) themselves. + +#### Fixture-style dependencies (typed markers) + +A target's parameters are typed annotations that tell the decorator +how to inject the value. Two markers are public: + +**`Target[T]`** — declares a dependency on another `@hm.target` by +parameter name. Static type-checkers see the parameter as `T`. + +**`Annotated[Step, BaseImage("X")]`** — declares a scratch-rooted +`Step` in image `"X"`. The first `.sh()` call on the parameter +inherits `image="X"`, so the first emitted IR step carries it. + +```python +from typing import Annotated + +import harmont as hm +from harmont.haskell import HaskellPackage, HaskellToolchain + +@hm.target() +def apt_base(base: Annotated[hm.Step, hm.BaseImage("ubuntu-24.04")]) -> hm.Step: + return base.sh("apt-get update").sh("apt-get install -y python3") + +@hm.target() +def api(ghc: hm.Target[HaskellToolchain]) -> HaskellPackage: + return ghc.cabal(path="api") + +@hm.pipeline("ci") +def ci( + apt_base: hm.Target[hm.Step], + api: hm.Target[HaskellPackage], +) -> tuple[hm.Step, ...]: + return (apt_base.sh("./run-smoke"), api) +``` + +Rules: + +- Every fixture parameter **must** carry a marker (`Target[T]` or + `Annotated[Step, BaseImage("...")]`) OR a default value. Unmarked + params raise at decoration time. +- `*args` / `**kwargs` / positional-only parameters are rejected. +- Duplicate target names raise at decoration time. Use + `@hm.target(name="...")` to disambiguate. +- Cycles raise `RuntimeError` listing the path. + +Both markers unwrap cleanly under mypy and pyright via PEP 593 +(`Annotated`); `assert_type(apt_base, Step)` and the like pass +without suppressions. + +Memoization scope is one `dump_registry_json` render. Two targets +that both depend on `apt_base` share the same `Step`, so the v0 IR +contains one apt-base step with N children — not N copies. + +## Deployments — `@hm.deploy` and `hm.dev` + +`@hm.deploy` is a driver-agnostic decorator that registers a function +as a long-lived service. The function returns a `Deployment` value +produced by a driver-specific factory; v1 ships only the local Docker +driver via `hm.dev.deploy(...)`. Future cloud drivers (`hm.aws.deploy`, +`hm.fly.deploy`) plug in without touching the top-level decorator. + +```python +import harmont as hm + +@hm.deploy("hello") +def hello() -> hm.Deployment: + return hm.dev.deploy( + image="python:3.12-alpine", + cmd=["python", "-m", "http.server", "5678"], + port_mapping={5678: hm.dev.port()}, + ) + +@hm.deploy("greeter") +def greeter(hello: hm.Dep[hm.Deployment]) -> hm.Deployment: + return hm.dev.deploy( + image="python:3.12-alpine", + cmd=["python", "-m", "http.server", "5678"], + port_mapping={5678: hm.dev.port()}, + env={"HELLO_HOST": hello.name}, + ) +``` + +Public surface: + +```python +hm.deploy(slug=None, *, name=None) # decorator +hm.Dep[T] # PEP-593 fixture marker +hm.Deployment # abstract dataclass + +hm.dev.deploy(*, image=None, from_=None, cmd=None, + port_mapping=None, env=None, + volumes=None, workdir=None) # -> LocalDeployment +hm.dev.port() # OS-assigned host port sentinel +hm.dev.LocalDeployment # concrete subclass +hm.dev.dump_registry_json(*, worktree_root) # -> v0 JSON +``` + +`hm.dev.port()` is only valid as a value in `port_mapping`. The host +port is assigned by Docker (via `-p :`) at `hm dev up` +time; query it from another terminal with `hm dev port-of +`. Ports are fresh on every `hm dev up`. + +The Rust CLI (`hm dev up`) shells out to `python -m harmont.dev +--dump-registry` to obtain the registry JSON. Schema is at +`docs/superpowers/specs/2026-05-21-hm-dev-deploy-design.md` § 1. + +## Cache keys + +`harmont.keygen.resolve_pipeline_keys` ports the algorithm previously +implemented in Scheme. `pipeline_to_json` calls it before +serialization, so every step whose policy is not `none` has a +deterministic `cache.key` baked into the wire-format JSON. + +## Snapshot lineage: `builds_in`, `image`, `default_image` + +A chain edge — `parent.sh(cmd, ...)` — emits `builds_in: ""` in the v0 IR JSON. The edge encodes both synchronisation (the +planner waits for the parent) and state inheritance (the local +executor reuses the parent's container; the cloud planner boots from +its snapshot). + +A step rooted at `scratch()` has `builds_in: null`. It boots from +`image="..."` locally (or the pipeline's `default_image`). The cloud +planner ignores `image`/`default_image` (it always boots from the +Freestyle base image). diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d3f90b3 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Marko Vejnovic + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..56f0ab9 --- /dev/null +++ b/README.md @@ -0,0 +1,168 @@ +# harmont-py + +[![license](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) + +Python DSL for defining [Harmont](https://harmont.dev) CI pipelines. + +Pipelines are chains of shell commands, branched with `.fork()`, synchronized with `hm.wait()`, registered with a decorator, and rendered to a JSON IR. The companion [`harmont-cli`](https://github.com/harmont-dev/harmont-cli) consumes that IR and runs the pipeline locally in Docker or on the hosted Harmont cloud. + +The package installs as `harmont` and you import it as `harmont`: + +```python +import harmont as hm +``` + +## Quick start + +### 1. Write a pipeline + +A pipeline file lives at `.harmont/.py` in your repo: + +```python +import harmont as hm + + +@hm.pipeline("hello") +def hello() -> hm.Step: + return ( + hm.sh("echo 'hello from harmont'", label="hello") + .sh("uname -a", label="env") + ) +``` + +### 2. Install + +Not yet on PyPI. Install from source (Python 3.11+): + +```sh +git clone https://github.com/harmont-dev/harmont-py +cd harmont-py +pip install -e . +``` + +If you arrived here from the [`harmont-cli`](https://github.com/harmont-dev/harmont-cli) Quick start, you already did this — skip to Step 3. + +Development extras (pytest, mypy, ruff): + +```sh +pip install -e '.[dev]' +``` + +### 3. Run + +Use the [Harmont CLI](https://github.com/harmont-dev/harmont-cli): + +```sh +hm run hello +``` + +`hm run` walks `.harmont/*.py`, imports each file (triggering the decorators), renders the registered pipeline to JSON, and executes it (locally in Docker by default, or against the cloud via `hm cloud run`). + +## DSL surface + +| Primitive | Returns | What it does | +|---|---|---| +| `hm.sh(cmd, cwd=..., label=...)` | `Step` | Start a chain in one call (= `hm.scratch().sh(cmd, ...)`) | +| `hm.scratch()` | `Step` | Empty root; chain with `.sh(...)` for an explicit start | +| `Step.sh(cmd, cwd=..., ...)` | `Step` | Run a shell command; chained `.sh` shares container state | +| `Step.fork(label=...)` | `Step` | Branch a shared base into parallel work | +| `hm.wait()` | `Step` | Explicit synchronization barrier | +| `@hm.target()` | decorator | Reusable, memoized building block | +| `@hm.pipeline("slug")` | decorator | Register a pipeline (multiple per file are fine) | +| `hm.pipeline(*leaves, env=..., default_image=...)` | `dict` | Factory form — build the v0 IR dict directly (used in tests) | + +Cache policies (`hm.ttl`, `hm.on_change`, `hm.forever`, `hm.compose`), triggers (`hm.push`, `hm.pull_request`, `hm.schedule`), and matrix axes are documented in the module docstrings; start at `harmont/__init__.py`. + +## Language toolchains + +`harmont` ships first-class wrappers for the common toolchains. Each exposes the actions that make sense for that ecosystem (e.g. `.build()`, `.test()`, `.clippy()`, `.fmt()` for Rust; `.test()`, `.lint()`, `.fmt()`, `.typecheck()` for Python): + +| Call | Project type | +|---|---| +| `hm.rust(path=..., version="stable")` | cargo + clippy + rustfmt | +| `hm.haskell(ghc="9.6.7", cabal="latest")` | cabal (call `.cabal(path)` to build a package) | +| `hm.python(path=..., uv_version="latest")` | uv-based Python project | +| `hm.go(path=..., version="1.23.2")` | go build/test/vet/fmt | +| `hm.npm(path=..., version="20")` | npm + arbitrary scripts | +| `hm.gradle(path=..., jdk="21", kotlin=False)` | Java or Kotlin via Gradle | +| `hm.cmake(path=..., lang="c"\|"cpp")` | C/C++ via CMake + CTest | +| `hm.dotnet(path=..., channel="8.0")` | .NET via dotnet CLI | +| `hm.ruby(path=..., version="default")` | Bundler + Rake | +| `hm.ocaml(path=..., compiler="5.1.1")` | opam + Dune | +| `hm.zig(path=..., version="0.13.0")` | zig build/test/fmt | +| `hm.perl(path=...)` | cpanm + prove | +| `hm.composer(path=..., laravel=False)` | PHP / Laravel via Composer | +| `hm.elm(path=..., elm_version="0.19.1")` | Elm | + +Working examples for each toolchain live in [`harmont-cli/examples/`](https://github.com/harmont-dev/harmont-cli/tree/main/examples). + +## Composing with targets + +For larger pipelines, factor toolchain setup into `@hm.target()` and let pipelines depend on them by parameter name. `Target[T]` and `Annotated[Step, BaseImage("...")]` are typed markers that unwrap cleanly under mypy and pyright. + +```python +from typing import Annotated + +import harmont as hm +from harmont.haskell import HaskellPackage, HaskellToolchain + + +@hm.target() +def apt_base(base: Annotated[hm.Step, hm.BaseImage("ubuntu-24.04")]) -> hm.Step: + return base.sh("apt-get update").sh("apt-get install -y python3") + + +@hm.target() +def ghc() -> HaskellToolchain: + return hm.haskell(ghc="9.6.7") + + +@hm.target() +def api(ghc: hm.Target[HaskellToolchain]) -> HaskellPackage: + return ghc.cabal(path="api") + + +@hm.pipeline("ci") +def ci( + apt_base: hm.Target[hm.Step], + api: hm.Target[HaskellPackage], +) -> tuple[hm.Step, ...]: + return (apt_base.sh("./run-smoke"), api) +``` + +Every fixture parameter must carry a marker or default value; unmarked parameters raise at decoration time. Memoization scope is one `dump_registry_json` render, so two targets that depend on the same `apt_base` share a single step. + +
+How rendering works + +`hm.sh(...).sh(...)` builds a chain of frozen `Step` dataclasses. Each `.sh()` returns a new `Step` carrying the parent reference. The `hm.pipeline()` factory walks back from each leaf, topo-sorts, and emits a `version: "0"` IR dict matching the schema in `harmont-pipeline` (Haskell side). + +When used as a decorator, `@hm.pipeline("slug")` registers the wrapped function with a module-level registry. `hm.dump_registry_json()` walks every `.harmont/*.py`, imports each (which triggers the decorators), and returns the full envelope. + +A chain edge — `parent.sh(cmd, ...)` — emits `builds_in: ""` in the v0 IR JSON. The edge encodes synchronisation and state inheritance: the local executor reuses the parent's container; the cloud planner boots from its snapshot. A step rooted at `scratch()` has `builds_in: null` and boots from `image="..."` (or the pipeline's `default_image`) locally; the cloud planner ignores `image` (it always boots from the Freestyle base). + +The JSON wire format and cache-key algorithm are stable; see module docstrings under `harmont/` for the contract. + +
+ +## Build & test + +```sh +python3 -m venv .venv && source .venv/bin/activate +pip install -e '.[dev]' + +pytest # all tests +pytest -v --tb=short +mypy --strict harmont +ruff check . +``` + +`pytest` is configured to treat warnings as errors (`filterwarnings = ["error"]`). + +## See also + +- [`harmont-cli`](https://github.com/harmont-dev/harmont-cli) — the CLI that runs pipelines defined with this package (`hm run`). + +## License + +MIT. See [`LICENSE`](LICENSE). diff --git a/RELEASING.md b/RELEASING.md new file mode 100644 index 0000000..9b3b3d9 --- /dev/null +++ b/RELEASING.md @@ -0,0 +1,110 @@ +# Releasing harmont-py + +This package lives upstream at and is mirrored into the private Harmont monorepo as `cidsl/py/`. The monorepo is the source of truth; the public repo receives a `git subtree push`. + +## One-time setup + +```sh +gh repo create harmont-dev/harmont-py --public \ + --description "Python DSL for Harmont CI pipelines" \ + --homepage https://harmont.dev + +# Initial split from the monorepo (run from the monorepo root) +git subtree split --prefix=cidsl/py -b harmont-py-export +git push git@github.com:harmont-dev/harmont-py.git harmont-py-export:main +git branch -D harmont-py-export +``` + +## Ongoing sync (monorepo → public) + +```sh +git subtree push --prefix=cidsl/py git@github.com:harmont-dev/harmont-py.git main +``` + +If subtree-push fails because the public repo has commits that aren't in the monorepo's history, fall back to a fresh split: + +```sh +git subtree split --prefix=cidsl/py -b harmont-py-export +git push git@github.com:harmont-dev/harmont-py.git harmont-py-export:main +git branch -D harmont-py-export +``` + +## Pulling external contributions back (public → monorepo) + +```sh +git subtree pull --prefix=cidsl/py git@github.com:harmont-dev/harmont-py.git main --squash +``` + +## Cutting a release + +Versioning is **driven by git tags on the public mirror**. The release +workflow in `.github/workflows/release.yml` triggers on any tag matching +`v*`, seds the version from the tag into `pyproject.toml`, builds the +sdist and wheel, and publishes to PyPI via Trusted Publishing (OIDC — +no API tokens stored in the repo). + +### Prerequisites (one-time) + +1. **Configure the PyPI Trusted Publisher** on + with: + - Owner: `harmont-dev` + - Repository: `harmont-py` + - Workflow filename: `release.yml` + - Environment: `release` + + If the `harmont` project does not yet exist on PyPI, create it via a + one-off manual `twine upload` first (or use the "Add a pending + publisher" flow at ), + then add the Trusted Publisher. + +2. **Create the `release` GitHub Environment** on + . + Recommended protection rules: + - Deployment branches and tags → "Selected branches and tags" → + add tag rule `v*`. + - (Optional) required reviewers on the environment so a human has + to click "approve" before publish runs. + +### Releasing + +1. Update `CHANGELOG.md` or release notes locally if you keep them. +2. Tag from the monorepo (source of truth): + + ```sh + git tag v + git subtree push --prefix=cidsl/py git@github.com:harmont-dev/harmont-py.git main + git push git@github.com:harmont-dev/harmont-py.git v + ``` + + The tag has to land on the **public** repo for the workflow to fire. + The subtree-push lands the corresponding `main` commit there first + so the tag points at the right SHA. + +3. Watch the run: + + ```sh + gh run watch \ + "$(gh run list --repo harmont-dev/harmont-py --workflow release.yml \ + --limit 1 --json databaseId --jq '.[0].databaseId')" \ + --repo harmont-dev/harmont-py --exit-status + ``` + +4. Confirm the release on . +5. (Optional) Create a GitHub Release on the same tag with notes: + + ```sh + gh release create v --repo harmont-dev/harmont-py \ + --title "harmont v" --generate-notes + ``` + +### Troubleshooting + +- **`Trusted publishing exchange failed`:** the GH Environment name in + the workflow does not match the one configured on PyPI. Both must be + exactly `release`. +- **`File already exists`:** the version was already published to PyPI. + PyPI is append-only — bump the version, re-tag, re-push. +- **`No files to upload`:** the build step did not produce + `dist/*.tar.gz` and `dist/*.whl`. Inspect the `Build sdist and wheel` + step output. Most common cause: `setuptools` couldn't find a package + to build because `pyproject.toml` was mid-edit. diff --git a/harmont/__init__.py b/harmont/__init__.py new file mode 100644 index 0000000..ae07945 --- /dev/null +++ b/harmont/__init__.py @@ -0,0 +1,173 @@ +"""harmont — chain-style Python DSL for Harmont CI pipelines. + +The whole public surface: + + scratch() -> Step (root) + sh(cmd, **kw) -> Step (== scratch().sh(cmd, **kw)) + Step.sh(cmd, **kw) -> Step + Step.fork(label=None) -> Step + wait(*, continue_on_failure=False) -> Step + + pipeline(*leaves, env=None, default_image=None) -> dict (v0 IR) + pipeline_to_json(p, **kw) -> str + + @pipeline(slug, ..., triggers=[...], allow_manual=True) -> decorator + push(branch=..., tag=...) -> PushTrigger + pull_request(branches=..., types=...) -> PullRequestTrigger + schedule(cron=...) -> ScheduleTrigger + dump_registry_json() -> str (HAR-9 envelope) + +Cache helpers: ttl, on_change, forever, compose. + +``hm.pipeline`` is polymorphic. When called with positional ``Step`` +arguments it builds a v0 IR dict (the factory). When called with no +positionals or a string slug it returns a decorator that registers a +function as a CI pipeline (HAR-9). +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from . import _decorator, dev +from ._deploy import Deployment, deploy +from ._envelope import dump_registry_json +from ._step import Step, scratch, wait +from ._target import clear_target_cache, target # noqa: F401 clear_target_cache used by tests +from ._typing import BaseImage, Dep, Target +from .cache import ( + CacheCompose, + CacheForever, + CacheNone, + CacheOnChange, + CachePolicy, + CacheTTL, +) +from .cmake import cmake +from .composer import composer +from .dotnet import dotnet +from .elm import elm +from .go import go +from .gradle import gradle +from .haskell import haskell +from .npm import npm +from .ocaml import ocaml +from .perl import perl +from .pipeline import pipeline as _pipeline_factory +from .pipeline import pipeline_to_json +from .python import python +from .ruby import ruby +from .rust import rust +from .triggers import pull_request, push, schedule +from .types import Pipeline +from .zig import zig + +if TYPE_CHECKING: + from datetime import timedelta + + +def pipeline(*args: Any, **kwargs: Any) -> Any: + """Polymorphic entry point. + + - ``pipeline(*leaves, env=..., default_image=...)`` — every + positional arg is a :class:`Step`; returns the v0 IR dict (the + factory). + - ``pipeline(slug=None, *, name=..., triggers=..., allow_manual=..., + env=..., default_image=...)`` — no positionals or a string slug; + returns a decorator that registers the wrapped function in the + module-level :data:`~harmont._registry.REGISTRATIONS` table + (HAR-9). + + The discriminant is the *type* of the positional arguments: any + non-Step positional (including a string slug, or no positional at + all) routes to the decorator path. + """ + if args and all(isinstance(a, Step) for a in args): + return _pipeline_factory(*args, **kwargs) + return _decorator.pipeline(*args, **kwargs) + + +def ttl(duration: timedelta) -> CacheTTL: + return CacheTTL(duration=duration) + + +def on_change(*paths: str) -> CacheOnChange: + return CacheOnChange(paths=tuple(paths)) + + +def forever(env_keys: tuple[str, ...] = ()) -> CacheForever: + return CacheForever(env_keys=env_keys) + + +def compose(*policies: CachePolicy) -> CacheCompose: + return CacheCompose(policies=tuple(policies)) + + +def sh( + cmd: str, + *, + cwd: str | None = None, + label: str | None = None, + cache: CachePolicy | None = None, + env: dict[str, str] | None = None, + timeout_seconds: int | None = None, + image: str | None = None, + key: str | None = None, +) -> Step: + """Shorthand for ``scratch().sh(cmd, ...)`` — start a chain in one call.""" + return scratch().sh( + cmd, + cwd=cwd, + label=label, + cache=cache, + env=env, + timeout_seconds=timeout_seconds, + image=image, + key=key, + ) + + +__all__ = [ + "BaseImage", + "CacheCompose", + "CacheForever", + "CacheNone", + "CacheOnChange", + "CachePolicy", + "CacheTTL", + "Dep", + "Deployment", + "Pipeline", + "Step", + "Target", + "cmake", + "compose", + "composer", + "deploy", + "dev", + "dotnet", + "dump_registry_json", + "elm", + "forever", + "go", + "gradle", + "haskell", + "npm", + "ocaml", + "on_change", + "perl", + "pipeline", + "pipeline_to_json", + "pull_request", + "push", + "python", + "ruby", + "rust", + "schedule", + "scratch", + "sh", + "target", + "ttl", + "wait", + "zig", +] diff --git a/harmont/_decorator.py b/harmont/_decorator.py new file mode 100644 index 0000000..69fe75c --- /dev/null +++ b/harmont/_decorator.py @@ -0,0 +1,68 @@ +"""@hm.pipeline decorator — see docs/superpowers/specs/2026-05-10-har-9-imperfect-dsl-design.md.""" +from __future__ import annotations + +import re +from functools import wraps +from typing import TYPE_CHECKING, Any + +from ._deps import call_with_deps, validate_target_signature +from ._registry import PipelineRegistration, register + +if TYPE_CHECKING: + from collections.abc import Callable + + from .triggers import Trigger + +_SLUG_RE = re.compile(r"^[a-z][a-z0-9-]{0,63}$") + + +def _validate_slug(slug: str) -> None: + if not _SLUG_RE.match(slug): + msg = ( + f"invalid pipeline slug {slug!r}\n" + f" → use lowercase letters, digits, and '-', " + f"start with a letter, max 64 chars" + ) + raise ValueError(msg) + + +def pipeline( + slug: str | None = None, + *, + name: str | None = None, + triggers: tuple[Trigger, ...] | list[Trigger] = (), + allow_manual: bool = True, + env: dict[str, str] | None = None, + default_image: str | None = None, +) -> Callable[[Callable[..., Any]], Callable[[], Any]]: + """Register a function as a CI pipeline. + + The wrapped function returns a :class:`Step`, a tuple of leaves + (:data:`Pipeline`), or any toolchain wrapper that + :func:`harmont._unwrap.as_leaves` can coerce. The function may + declare dependencies as parameters (pytest-style); each parameter + name is resolved against the global target registry. + """ + def decorator(fn: Callable[..., Any]) -> Callable[[], Any]: + validate_target_signature(fn) + resolved = slug if slug is not None else fn.__name__ + _validate_slug(resolved) + + @wraps(fn) + def wrapper() -> Any: + return call_with_deps(fn) + + register( + PipelineRegistration( + slug=resolved, + name=name if name is not None else resolved, + triggers=tuple(triggers), + allow_manual=allow_manual, + env=env, + default_image=default_image, + fn=wrapper, + ) + ) + return wrapper + + return decorator diff --git a/harmont/_deploy.py b/harmont/_deploy.py new file mode 100644 index 0000000..acac143 --- /dev/null +++ b/harmont/_deploy.py @@ -0,0 +1,189 @@ +"""Driver-agnostic deployment registry, decorator, and Dep marker. + +This module is intentionally driver-free. Concrete deployment types +(``LocalDeployment``, future ``AwsDeployment``, …) live in their own +driver subpackages (``harmont.dev``, future ``harmont.aws``). +The registry stores deployments polymorphically; CLI subcommands filter +by ``isinstance`` or by the ``driver`` discriminator. +""" +from __future__ import annotations + +import dataclasses +import re +from dataclasses import dataclass +from functools import wraps +from typing import TYPE_CHECKING, Any + +from ._deps import call_with_deps, validate_target_signature + +if TYPE_CHECKING: + from collections.abc import Callable + + +@dataclass(frozen=True) +class Deployment: + """Abstract deployment record. Subclassed per driver. + + ``name`` is the slug the user passed to ``@hm.deploy``. + ``driver`` is the discriminator string ("local" for ``hm.dev``). + """ + name: str + driver: str + + +# Registry: slug -> zero-arg callable that re-invokes the user-defined +# function with deps resolved. Same shape as REGISTRATIONS for pipelines. +DEPLOYMENTS: dict[str, Callable[[], Deployment]] = {} + + +_SLUG_RE = re.compile(r"^[a-z][a-z0-9-]{0,30}$") + + +def _validate_slug(slug: str) -> None: + """Raise ValueError if slug does not satisfy Docker container-name rules.""" + if not _SLUG_RE.match(slug): + msg = ( + f"hm: invalid deployment slug {slug!r}\n" + " → use lowercase letters, digits, and '-', " + "start with a letter, max 31 chars (Docker container name rules)" + ) + raise ValueError(msg) + + +def deploy( + slug: str | None = None, + *, + name: str | None = None, +) -> Callable[[Callable[..., Any]], Callable[[], Deployment]]: + """Register a function as a deployment. + + The wrapped function returns a :class:`Deployment` (typically the + output of :func:`harmont.dev.deploy` or any future driver's factory). + Parameters are resolved via the shared marker machinery: ``Target[T]``, + ``BaseImage[...]``, and ``Dep[T]`` (deployment-to-deployment refs). + + Usage:: + + @hm.deploy("db") + def db(): + return hm.dev.deploy(image="postgres:16", port_mapping={5432: hm.dev.port()}) + + @hm.deploy("api") + def api(db: hm.Dep[hm.Deployment]): + return hm.dev.deploy( + image="myapp:latest", + port_mapping={8000: hm.dev.port()}, + env={"DB_HOST": db.name}, + ) + + Args: + slug: Registry key. Must match ``^[a-z][a-z0-9-]{0,30}$`` + (Docker container-name rules). Defaults to ``fn.__name__``. + name: Reserved for future use as a human-readable display name. + Has no effect in v1; the slug is the public identity. + + Raises: + ValueError: On invalid or duplicate slug. + TypeError: On unmarkered parameters without defaults (raised by + the shared :func:`validate_target_signature`), or if + the wrapped function returns a non-Deployment value. + """ + del name # reserved-for-future-use; explicitly drop the unused binding + + def decorator(fn: Callable[..., Any]) -> Callable[[], Deployment]: + validate_target_signature(fn) + resolved_slug = slug if slug is not None else fn.__name__ + _validate_slug(resolved_slug) + if resolved_slug in DEPLOYMENTS: + msg = ( + f"hm: duplicate deployment slug {resolved_slug!r}\n" + " → each @hm.deploy must have a unique slug; " + "pass an explicit slug= to disambiguate" + ) + raise ValueError(msg) + + @wraps(fn) + def wrapper() -> Deployment: + value = call_with_deps(fn) + if not isinstance(value, Deployment): + msg = ( + f"hm.deploy({resolved_slug!r}) must return a Deployment, " + f"got {type(value).__name__}\n" + " → return the output of hm.dev.deploy(...) or another " + "driver's factory" + ) + raise TypeError(msg) + # Stamp the resolved slug into the returned dataclass so callers + # see name= regardless of what the factory left in `name`. + return dataclasses.replace(value, name=resolved_slug) + + DEPLOYMENTS[resolved_slug] = wrapper + return wrapper + + return decorator + + +def dep_graph() -> dict[str, tuple[str, ...]]: + """Return slug -> tuple of upstream slugs, in parameter order. + + Walks DEPLOYMENTS; for each registered slug, introspects the wrapped + function's signature for ``Dep[T]`` parameters. Plain defaults and + Target/BaseImage markers do not produce edges in the deploy graph. + """ + import inspect + import typing as _typing + + from ._typing import _DepMarker + + out: dict[str, tuple[str, ...]] = {} + for slug, wrapper in DEPLOYMENTS.items(): + fn = wrapper.__wrapped__ # type: ignore[attr-defined] + sig = inspect.signature(fn) + hints = _typing.get_type_hints(fn, include_extras=True) + deps: list[str] = [] + for name in sig.parameters: + ann = hints.get(name) + if ann is None: + continue + if _typing.get_origin(ann) is None: + continue + metadata = _typing.get_args(ann)[1:] + if any(isinstance(m, _DepMarker) for m in metadata): + deps.append(name) + out[slug] = tuple(deps) + return out + + +def topo_order() -> list[str]: + """Topological ordering of DEPLOYMENTS by dep_graph; deps first. + + Raises RuntimeError on cycles. Stable under insertion order for + independent slugs (preserves decoration order within a level). + """ + g = dep_graph() + # Kahn's algorithm w/ stable level ordering (insertion-order of g). + indeg: dict[str, int] = {} + for slug, upstreams in g.items(): + indeg[slug] = sum(1 for u in upstreams if u in g) + order: list[str] = [] + while True: + progressed = False + for slug in list(g.keys()): + if slug in order: + continue + if indeg[slug] == 0: + order.append(slug) + for downstream, upstreams in g.items(): + if slug in upstreams and downstream not in order: + indeg[downstream] -= 1 + progressed = True + if not progressed: + break + if len(order) != len(g): + unresolved = [s for s in g if s not in order] + msg = ( + f"hm: dep cycle among deployments: {', '.join(unresolved)}\n" + " → break the cycle, or factor shared state into a target" + ) + raise RuntimeError(msg) + return order diff --git a/harmont/_deps.py b/harmont/_deps.py new file mode 100644 index 0000000..a806c1f --- /dev/null +++ b/harmont/_deps.py @@ -0,0 +1,203 @@ +"""Shared dependency resolution for @hm.target and @hm.pipeline (HAR-28). + +Strict-marker model: +- ``Target[T]`` — resolve by parameter name from the global + target registry; raise if not found. +- ``BaseImage["X"]`` — inject a scratch-rooted ``Step(image=X)``. +- plain param with default — bind the default value. +- anything else — raise at decoration time via + :func:`validate_target_signature`. + +Cycle detection uses a module-level "currently resolving" stack keyed +by function name; the dump_registry_json render clears it at the +start of every render along with the target memoization cache. +""" + +from __future__ import annotations + +import inspect +import typing +from typing import TYPE_CHECKING, Any + +from ._step import Step +from ._typing import _TARGET_MARKER, _BaseImageMarker, _DepMarker + +if TYPE_CHECKING: + from collections.abc import Callable + + +_TARGETS_BY_NAME: dict[str, Callable[[], Any]] = {} +_RESOLVING: list[str] = [] + + +def register_named_target(name: str, fn: Callable[[], Any]) -> None: + """Register a named target. Raises on duplicate name.""" + if name in _TARGETS_BY_NAME: + msg = ( + f"hm: duplicate target name {name!r}\n" + " → each @hm.target must have a unique name; pass " + 'name="..." to disambiguate' + ) + raise ValueError(msg) + _TARGETS_BY_NAME[name] = fn + + +def clear_target_names() -> None: + """Reset the name registry and cycle-detection stack. Used by tests + and `clear_target_cache()` (the full reset used at test boundaries).""" + _TARGETS_BY_NAME.clear() + _RESOLVING.clear() + + +def _param_kind_error(param: inspect.Parameter) -> str | None: + """Return a fix-directed error message if `param` has a forbidden kind.""" + kind = param.kind + if kind == inspect.Parameter.VAR_POSITIONAL: + return ( + "hm: target functions cannot take *args\n" + " → declare each dependency as an explicit named parameter" + ) + if kind == inspect.Parameter.VAR_KEYWORD: + return ( + "hm: target functions cannot take **kwargs\n" + " → declare each dependency as an explicit named parameter" + ) + if kind == inspect.Parameter.POSITIONAL_ONLY: + return ( + f"hm: target functions cannot have positional-only " + f"parameters (got {param.name!r})\n" + " → remove the '/' marker; parameters must be name-resolvable" + ) + return None + + +def _marker_for(annotation: Any) -> object | None: + """Inspect an `Annotated[T, ...]` annotation and return the + hm-specific marker (a `_TargetMarker`, `_BaseImageMarker`, or + `_DepMarker`) if present, else None.""" + if typing.get_origin(annotation) is None: + return None + metadata = typing.get_args(annotation)[1:] + for meta in metadata: + if meta is _TARGET_MARKER: + return _TARGET_MARKER # type: ignore[no-any-return] + if isinstance(meta, _BaseImageMarker): + return meta + if isinstance(meta, _DepMarker): + return meta + return None + + +def _safe_get_type_hints(fn: Callable[..., Any]) -> dict[str, Any]: + """`typing.get_type_hints(fn, include_extras=True)` but tolerant of + forward references that fail to resolve — fall back to the raw + `__annotations__` dict so markers still surface.""" + try: + return typing.get_type_hints(fn, include_extras=True) + except Exception: # intentionally broad; fallback path + return dict(getattr(fn, "__annotations__", {})) + + +def validate_target_signature(fn: Callable[..., Any]) -> None: + """Decoration-time validation. Raise TypeError on any of: + + - `*args` / `**kwargs` / positional-only parameter. + - Parameter with no marker and no default value. + + A parameter with an `hm.Target[T]` or `hm.BaseImage["X"]` marker + in its annotation is always valid. A parameter with neither + marker but a default value is allowed (the default is used). + """ + sig = inspect.signature(fn) + hints = _safe_get_type_hints(fn) + for param in sig.parameters.values(): + kind_err = _param_kind_error(param) + if kind_err is not None: + raise TypeError(kind_err) + annotation = hints.get(param.name) + if _marker_for(annotation) is not None: + continue + if param.default is not inspect.Parameter.empty: + continue + msg = ( + f"hm: parameter {param.name!r} has no marker and no default\n" + " → annotate with Target[T] (target dep) or " + 'BaseImage["..."] (scratch image), or give it a default' + ) + raise TypeError(msg) + + +def resolve_deps(fn: Callable[..., Any]) -> dict[str, Any]: + """Walk ``fn``'s signature and produce the kwargs to invoke it. + + Marker dispatch per parameter: + - `Target[T]` → look up param name in `_TARGETS_BY_NAME`; + raise if not found. + - `BaseImage["X"]` → inject `Step(image="X")` (a scratch root). + - no marker, default → bind the default value. + - no marker, no default → raise (caught earlier by + `validate_target_signature` for well-formed targets). + """ + sig = inspect.signature(fn) + hints = _safe_get_type_hints(fn) + kwargs: dict[str, Any] = {} + for param in sig.parameters.values(): + kind_err = _param_kind_error(param) + if kind_err is not None: + raise TypeError(kind_err) + annotation = hints.get(param.name) + marker = _marker_for(annotation) + if marker is _TARGET_MARKER: + if param.name not in _TARGETS_BY_NAME: + msg = ( + f"hm: target {param.name!r} not found\n" + " → declare it with @hm.target() or rename the " + "parameter to match an existing target" + ) + raise TypeError(msg) + kwargs[param.name] = _TARGETS_BY_NAME[param.name]() + continue + if isinstance(marker, _BaseImageMarker): + kwargs[param.name] = Step(image=marker.image) + continue + if isinstance(marker, _DepMarker): + # Local import to avoid circular: _deploy imports nothing from us. + from ._deploy import DEPLOYMENTS + + if param.name not in DEPLOYMENTS: + msg = ( + f"hm: deployment {param.name!r} not found\n" + " → declare it with @hm.deploy() or rename the " + "parameter to match an existing deployment" + ) + raise TypeError(msg) + kwargs[param.name] = DEPLOYMENTS[param.name]() + continue + if param.default is not inspect.Parameter.empty: + kwargs[param.name] = param.default + continue + msg = ( + f"hm: parameter {param.name!r} has no marker and no default\n" + ' → annotate with Target[T] or BaseImage["..."], or ' + "give it a default" + ) + raise TypeError(msg) + return kwargs + + +def call_with_deps(fn: Callable[..., Any]) -> Any: + """Resolve ``fn``'s parameters and call it. Detects cycles.""" + name = fn.__name__ + if name in _RESOLVING: + cycle = " → ".join([*_RESOLVING, name]) + msg = ( + f"hm: dependency cycle detected\n" + f" → {cycle}\n" + " fix: break the cycle, or extract a shared root target" + ) + raise RuntimeError(msg) + _RESOLVING.append(name) + try: + return fn(**resolve_deps(fn)) + finally: + _RESOLVING.pop() diff --git a/harmont/_envelope.py b/harmont/_envelope.py new file mode 100644 index 0000000..fe84936 --- /dev/null +++ b/harmont/_envelope.py @@ -0,0 +1,100 @@ +"""Envelope renderer — produces the schema_version=1 JSON document. + +See docs/superpowers/specs/2026-05-10-har-9-imperfect-dsl-design.md +§ "The envelope" for the wire format. + +Each registered pipeline carries its resolved v0 IR as a nested +``definition`` object. Consumers (api, cli) read that directly — no +intermediate Scheme stage exists since HAR-16. +""" + +from __future__ import annotations + +import json +import os +import time +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from ._registry import REGISTRATIONS, PipelineRegistration +from ._target import clear_target_memo +from ._unwrap import as_leaves +from .keygen import resolve_pipeline_keys +from .pipeline import pipeline as _assemble + +if TYPE_CHECKING: + from collections.abc import Mapping + + +def _render_one( + reg: PipelineRegistration, + *, + pipeline_org: str, + now: int, + base_path: Path, + env: Mapping[str, str], +) -> dict[str, Any]: + raw = reg.fn() + try: + leaves = as_leaves(raw) + except TypeError as e: + msg = ( + f"pipeline {reg.slug!r}: invalid return value\n" + f" → {e}" + ) + raise TypeError(msg) from e + ir = _assemble(*leaves, env=reg.env, default_image=reg.default_image) + resolve_pipeline_keys( + ir.get("steps", []), + pipeline_org=pipeline_org, + pipeline_slug=reg.slug, + now=now, + base_path=base_path, + env=env, + ) + return { + "slug": reg.slug, + "name": reg.name, + "allow_manual": reg.allow_manual, + "triggers": [t.to_dict() for t in reg.triggers], + "definition": ir, + } + + +def dump_registry_json( + *, + pipeline_org: str | None = None, + now: int | None = None, + base_path: Path | None = None, + env: Mapping[str, str] | None = None, +) -> str: + """Emit the schema_version=1 envelope JSON. + + Defaults mirror ``pipeline_to_json``: + ``pipeline_org`` <- ``env["HARMONT_PIPELINE_ORG"]`` or ``"default"`` + ``now`` <- ``int(time.time())`` + ``base_path`` <- ``Path.cwd()`` (resolves ``on_change`` cache paths) + ``env`` <- ``os.environ`` + Per-pipeline slug is read from each registration. + + The target memoization cache is cleared at the start of each render + so per-pipeline target invocations dedup within a single render but + don't leak across renders. The named-target registry is left intact + so pipeline fixture-style params can resolve their dependencies. + """ + clear_target_memo() + env_map: Mapping[str, str] = env if env is not None else os.environ + org = pipeline_org if pipeline_org is not None else env_map.get( + "HARMONT_PIPELINE_ORG", "default" + ) + render_now = now if now is not None else int(time.time()) + bp = base_path if base_path is not None else Path.cwd() + return json.dumps( + { + "schema_version": "1", + "pipelines": [ + _render_one(reg, pipeline_org=org, now=render_now, base_path=bp, env=env_map) + for reg in REGISTRATIONS + ], + } + ) diff --git a/harmont/_keys.py b/harmont/_keys.py new file mode 100644 index 0000000..0e44000 --- /dev/null +++ b/harmont/_keys.py @@ -0,0 +1,121 @@ +"""Key derivation for chain-DSL steps. + +Order of precedence per the design doc: + 1. explicit `key=` override on .sh() + 2. slugified label (when unique within the pipeline) + 3. stable 12-char hash of (parent_resolved_key, cmd, position) + +Collision policy: when two steps' label-slugs collide and neither +claimed the slug via explicit `key=`, both fall back to hash. An +explicit override always wins, even if it would collide with another +step's natural slug. +""" + +from __future__ import annotations + +import hashlib +import re +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Iterable + + from ._step import Step + +_EMOJI_SHORTCODE_RE = re.compile(r":[a-z0-9_+-]+:") +_NON_ALNUM_RE = re.compile(r"[^a-z0-9]+") + + +def slugify_label(label: str) -> str: + """Lowercase, strip ``:emoji_codes:``, replace non-alnum runs with ``-``, + trim leading/trailing dashes. + + Slugs are ASCII-only by policy (matches Buildkite). Non-ASCII + letters are treated as separators: ``"Café Build"`` slugs to + ``"caf-build"`` and ``"构建"`` slugs to ``""``. Labels that reduce + to the empty string fall back to a hash key in ``resolve_keys``; + the user's label is preserved on the step's ``label`` field for + display, only the cross-reference key is hash-based. + """ + s = label.lower() + s = _EMOJI_SHORTCODE_RE.sub(" ", s) + s = _NON_ALNUM_RE.sub("-", s) + return s.strip("-") + + +def hash_key(parent_key: str, cmd: str, position: int) -> str: + """Stable 12-char SHA-256 prefix over (parent_key, cmd, position). + + Used as the fallback key when no usable slug is available.""" + h = hashlib.sha256() + h.update(parent_key.encode("utf-8")) + h.update(b"\x00") + h.update(cmd.encode("utf-8")) + h.update(b"\x00") + h.update(str(position).encode("utf-8")) + return h.hexdigest()[:12] + + +def resolve_keys(steps: Iterable[Step]) -> dict[int, str]: + """Resolve each Step's key. Returns ``{id(step): key}``. + + The ``id()`` indexing is deliberate: two structurally-equal Steps + that arose from independent fork branches must keep distinct keys, + and frozen-dataclass equality would conflate them. + """ + steps_list = list(steps) + + overrides: dict[int, str] = {} + # Natural slug per step (computed for every labeled step, even + # those with explicit overrides — see slug_counts below). + natural_slugs: dict[int, str] = {} + for s in steps_list: + if s.key_override is not None: + overrides[id(s)] = s.key_override + if s.label is not None: + slug = slugify_label(s.label) + if slug: + natural_slugs[id(s)] = slug + + # Reserve every override; any natural slug that matches a reserved + # override is a collision for the slug claimant. + reserved = set(overrides.values()) + + # Detect slug collisions across every labeled step — including those + # with explicit overrides. An override-bearing step still "claims" + # its natural slug for collision purposes, so a peer with the same + # label can't quietly take it. + slug_counts: dict[str, int] = {} + for slug in natural_slugs.values(): + slug_counts[slug] = slug_counts.get(slug, 0) + 1 + + # The slug pool that non-override steps may draw from: only steps + # without a `key=` override are eligible to receive their slug. + label_slugs: dict[int, str] = { + sid: slug for sid, slug in natural_slugs.items() if sid not in overrides + } + + keys: dict[int, str] = {} + for position, s in enumerate(steps_list): + sid = id(s) + if sid in overrides: + keys[sid] = overrides[sid] + continue + candidate_slug = label_slugs.get(sid) + if ( + candidate_slug is not None + and candidate_slug not in reserved + and slug_counts[candidate_slug] == 1 + ): + keys[sid] = candidate_slug + reserved.add(candidate_slug) + continue + # Fall back to hash. Parent resolved key may not be in `keys` + # yet; use the empty string as a sentinel — call sites that + # need the resolved parent_key pass it explicitly via the + # lowering pass (see pipeline.py). + parent_key = "" + if s.parent is not None and id(s.parent) in keys: + parent_key = keys[id(s.parent)] + keys[sid] = hash_key(parent_key, s.cmd or "", position) + return keys diff --git a/harmont/_registry.py b/harmont/_registry.py new file mode 100644 index 0000000..72c0917 --- /dev/null +++ b/harmont/_registry.py @@ -0,0 +1,44 @@ +"""Module-level registry of @pipeline-decorated functions. + +Stage 1 (`dump_registry_json` in `_envelope`) walks REGISTRATIONS to +emit the envelope JSON the api/cli consume. +""" +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Callable + + from .triggers import Trigger + + +@dataclass(frozen=True) +class PipelineRegistration: + slug: str + name: str + triggers: tuple[Trigger, ...] + allow_manual: bool + env: dict[str, str] | None + default_image: str | None + fn: Callable[[], object] + + +REGISTRATIONS: list[PipelineRegistration] = [] + + +def register(reg: PipelineRegistration) -> None: + """Append a registration; raise on duplicate slug.""" + if any(r.slug == reg.slug for r in REGISTRATIONS): + msg = ( + f"duplicate pipeline slug {reg.slug!r}\n" + f" → each @hm.pipeline must have a unique slug" + ) + raise ValueError(msg) + REGISTRATIONS.append(reg) + + +def clear_registry() -> None: + """Wipe REGISTRATIONS. Test-fixture helper; not part of the public surface.""" + REGISTRATIONS.clear() diff --git a/harmont/_step.py b/harmont/_step.py new file mode 100644 index 0000000..9889a80 --- /dev/null +++ b/harmont/_step.py @@ -0,0 +1,99 @@ +"""Internal Step dataclass — the chain primitive. + +Public callers go through `scratch`, `wait`, `Step.sh`, `Step.fork` +re-exported from `harmont/__init__.py`. This module is private; nothing +outside `harmont` should import from it. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from .cache import CachePolicy + + +@dataclass(frozen=True) +class Step: + cmd: str | None = None + parent: Step | None = None + """In-tree pointer used by the lowering pass to walk back to the + nearest emitted ancestor. Distinct from the wire-format + ``builds_in`` field, which carries the resolved key string.""" + + is_wait: bool = False + continue_on_failure: bool = False + label: str | None = None + cache: CachePolicy | None = None + env: dict[str, str] | None = None + timeout_seconds: int | None = None + image: str | None = None + """Local-mode Docker base image override for this step. Ignored when + the step has a ``builds_in`` parent (the parent's snapshot wins); + falls back to the pipeline's ``default_image`` when unset.""" + + runner: str | None = None + """Step-executor plugin runner name. ``None`` = default (Docker).""" + + runner_args: dict[str, Any] | None = None + """Plugin-specific runner arguments. Validated by the executor + plugin's ``step_schema`` if it set one.""" + + key_override: str | None = None + """Manual key override; surfaces as the `key=` kwarg on `.sh()`. + The field is renamed so it doesn't shadow the runtime-derived key + the lowering pass produces in pipeline.py.""" + + def sh( + self, + cmd: str, + *, + cwd: str | None = None, + label: str | None = None, + cache: CachePolicy | None = None, + env: dict[str, str] | None = None, + timeout_seconds: int | None = None, + image: str | None = None, + runner: str | None = None, + runner_args: dict[str, Any] | None = None, + key: str | None = None, + ) -> Step: + if cwd == "": + msg = ( + "hm: cwd must be a non-empty path\n" + ' → omit cwd= to run in the workspace root, ' + 'or pass cwd="some/dir"' + ) + raise ValueError(msg) + effective_cmd = f"cd {cwd} && {cmd}" if cwd is not None else cmd + # Image inheritance: a scratch root (cmd is None) with image set + # passes it down to the first emitted command step. Once the + # chain has a real cmd, inheritance stops — keeps wire format + # identical for normal chains. + effective_image = image if image is not None else ( + self.image if self.cmd is None else None + ) + return Step( + cmd=effective_cmd, + parent=self, + label=label, + cache=cache, + env=env, + timeout_seconds=timeout_seconds, + image=effective_image, + runner=runner, + runner_args=runner_args, + key_override=key, + ) + + def fork(self, label: str | None = None) -> Step: + return Step(cmd=None, parent=self, label=label) + + +def scratch() -> Step: + return Step() + + +def wait(*, continue_on_failure: bool = False) -> Step: + return Step(is_wait=True, continue_on_failure=continue_on_failure) diff --git a/harmont/_target.py b/harmont/_target.py new file mode 100644 index 0000000..18c8cd8 --- /dev/null +++ b/harmont/_target.py @@ -0,0 +1,104 @@ +"""@hm.target — memoized, composable building blocks (HAR-28). + +A target is a function that returns a ``Step`` (or a toolchain wrapper +that unwraps to one — see :mod:`harmont._unwrap`). The decorator: + + 1. Registers the wrapped function by name in the global registry + (``harmont._deps._TARGETS_BY_NAME``), so other targets can + declare it as a parameter. + 2. Memoizes the return value per envelope render so targets calling + other targets dedup correctly. + 3. Resolves any parameters declared by the wrapped function via + :func:`harmont._deps.call_with_deps` (cycle-aware). + +Pytest-style fixture form: + + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("apt-get update") + + @hm.target() + def venv(apt_base) -> hm.Step: + return apt_base.sh("python3 -m venv .venv") + +Explicit-call form is still supported: + + @hm.target() + def venv() -> hm.Step: + return apt_base().sh("python3 -m venv .venv") + +The cache lives in a module-level dict keyed by the wrapped function +object. :func:`harmont._envelope.dump_registry_json` clears it before +each render; tests clear it via the fixture pattern documented in +``cidsl/py/CLAUDE.md``. +""" + +from __future__ import annotations + +from functools import wraps +from typing import TYPE_CHECKING, Any + +from ._deps import ( + call_with_deps, + clear_target_names, + register_named_target, + validate_target_signature, +) + +if TYPE_CHECKING: + from collections.abc import Callable + + +_TARGET_CACHE: dict[Callable[..., Any], Any] = {} + + +def clear_target_memo() -> None: + """Reset only the per-render memoization cache. + + Called at the start of every envelope render so two consecutive + renders don't share cached ``Step`` values. The named-target + registry is NOT touched — it is populated once at decoration time + and must remain in place so pipeline fixture-style params can + resolve their dependencies during the same render. + """ + _TARGET_CACHE.clear() + + +def clear_target_cache() -> None: + """Reset target memoization AND the named-target registry. + + Test-only helper: between tests we want a clean slate. During an + envelope render the named registry stays put — only the memo cache + is wiped via :func:`clear_target_memo`. + """ + _TARGET_CACHE.clear() + clear_target_names() + + +def target( + *, name: str | None = None, +) -> Callable[[Callable[..., Any]], Callable[[], Any]]: + """Mark a function as a reusable, memoized pipeline building block. + + The wrapped function may declare dependencies as parameters; each + parameter name is resolved against the global target registry + (pytest-fixture style). + + ``name`` defaults to ``fn.__name__``. Override when the function + name collides with another target or when a more human-readable + registry key is wanted. + """ + def decorator(fn: Callable[..., Any]) -> Callable[[], Any]: + validate_target_signature(fn) + target_name = name if name is not None else fn.__name__ + + @wraps(fn) + def wrapper() -> Any: + if fn not in _TARGET_CACHE: + _TARGET_CACHE[fn] = call_with_deps(fn) + return _TARGET_CACHE[fn] + + register_named_target(target_name, wrapper) + return wrapper + + return decorator diff --git a/harmont/_toolchain.py b/harmont/_toolchain.py new file mode 100644 index 0000000..fb67b6a --- /dev/null +++ b/harmont/_toolchain.py @@ -0,0 +1,79 @@ +"""Shared helpers for language toolchain abstractions (HAR-15). + +Each language module (rust.py, haskell.py, npm.py, elm.py) builds its +toolchain chain via :func:`make_install_chain`. The chain is: + + scratch (no Step) -> apt-base -> tool-install -> (action leaves) + +When ``base`` is provided the apt-base step is skipped and the chain +forks off ``base`` directly. This is the explicit composition primitive +that lets toolchains stack (``hm.elm(base=node.installed)``) or share a +content-producing parent (``hm.npm(base=spec)``). +""" + +from __future__ import annotations + +from datetime import timedelta +from typing import TYPE_CHECKING + +from ._step import scratch +from .cache import CacheTTL + +if TYPE_CHECKING: + from ._step import Step + from .cache import CachePolicy + + +APT_TTL = timedelta(days=1) + + +def apt_install_cmd(packages: tuple[str, ...]) -> str: + """Single shell string: ``apt-get update && apt-get install -y ``.""" + pkgs = " ".join(packages) + return f"apt-get update && apt-get install -y {pkgs}" + + +def node_install_cmd(version: str) -> str: + """NodeSource node-install command for a given major Node version. + + Used by both the npm toolchain and the elm toolchain (whose + tooling runs under npx). + """ + major = version.removesuffix(".x") + return ( + f"curl -fsSL https://deb.nodesource.com/setup_{major}.x | bash - && " + "apt-get install -y nodejs" + ) + + +def make_install_chain( + *, + apt_packages: tuple[str, ...], + install_cmd: str, + install_cache: CachePolicy, + lang_tag: str, + install_tag: str, + image: str | None, + base: Step | None, +) -> Step: + """Build apt-base + tool-install chain. Return the tool-install Step. + + ``base=None`` (default) emits ``scratch -> apt-base -> tool-install``. + ``base=`` emits ``base -> tool-install`` — both ``apt_packages`` + and ``image`` are ignored; the caller asserts that ``base`` already + provides the system prerequisites the tool install needs. + """ + if base is None: + parent = scratch().sh( + apt_install_cmd(apt_packages), + label=f":{lang_tag}: apt-base", + image=image, + cache=CacheTTL(duration=APT_TTL), + ) + else: + parent = base + return parent.sh( + install_cmd, + label=f":{lang_tag}: {install_tag}", + cache=install_cache, + ) diff --git a/harmont/_typing.py b/harmont/_typing.py new file mode 100644 index 0000000..953db49 --- /dev/null +++ b/harmont/_typing.py @@ -0,0 +1,118 @@ +"""Annotation markers for fixture-style target parameters (HAR-28). + +Two markers are public surface: + + Target[T] — declares a dependency on a registered target by + parameter name. The resolved value is typed `T` + (whatever the target returns — `Step`, + `HaskellPackage`, `ElmProject`, etc.). + + BaseImage(X) — used in ``Annotated[Step, BaseImage("X")]``. Declares + a scratch-rooted Step in image "X" as the parameter + value. The first ``.sh()`` call on the parameter + inherits ``image="X"``, so the first emitted IR step + carries it in the v0 wire format. + +Both surface as PEP 593 ``Annotated[...]`` so static type-checkers see +the concrete type (``Step``, ``HaskellPackage``, etc.) while the runtime +decorator reads the marker from ``typing.get_type_hints(include_extras=True)``. + +Examples: + + @hm.target() + def venv(apt_base: hm.Target[hm.Step]) -> hm.Step: + # mypy/pyright: apt_base is Step. assert_type passes. + return apt_base.sh("python3 -m venv .venv") + + @hm.target() + def apt_base( + base: Annotated[hm.Step, hm.BaseImage("ubuntu-24.04")], + ) -> hm.Step: + # mypy/pyright: base is Step. assert_type passes. + return base.sh("apt-get update") + +The callable ``BaseImage(...)`` form is preferred over the older +``BaseImage["..."]`` subscript form because type checkers parse the +hyphenated image string as arithmetic in subscript position. +""" + +from __future__ import annotations + +from typing import Annotated, TypeVar + +T = TypeVar("T") + + +class _TargetMarker: + """Sentinel class for Annotated metadata. The module-level + instance ``_TARGET_MARKER`` is the actual sentinel value.""" + + __slots__ = () + + def __repr__(self) -> str: + return "" + + +_TARGET_MARKER = _TargetMarker() + + +# Annotated with a TypeVar produces a generic alias; subscripting +# ``Target[Step]`` resolves to ``Annotated[Step, _TARGET_MARKER]``. +Target = Annotated[T, _TARGET_MARKER] + + +class _BaseImageMarker: + """Metadata holder for the BaseImage("...") annotation.""" + + __slots__ = ("image",) + + def __init__(self, image: str) -> None: + self.image = image + + def __repr__(self) -> str: + return f"" + + def __eq__(self, other: object) -> bool: + return isinstance(other, _BaseImageMarker) and self.image == other.image + + def __hash__(self) -> int: + return hash(("_BaseImageMarker", self.image)) + + +def BaseImage(image: str) -> _BaseImageMarker: # noqa: N802 — factory mimicking a type + """Annotation metadata factory. Use as + ``Annotated[Step, BaseImage("ubuntu-24.04")]``. + + The decorator injects a ``Step(image="ubuntu-24.04")`` (a scratch + root with the image set) as the parameter value. The first + ``.sh(...)`` call on it inherits the image so the first emitted + IR step carries ``image="ubuntu-24.04"`` in the v0 wire format. + """ + if not isinstance(image, str) or not image: + msg = ( + "hm: BaseImage(...) takes a non-empty image string\n" + ' → e.g. BaseImage("ubuntu-24.04")' + ) + raise TypeError(msg) + return _BaseImageMarker(image) + + +class _DepMarker: + """Sentinel class for Annotated metadata. Marks a parameter as a + dependency on another @hm.deploy by parameter name; the injected + value is the resolved Deployment. The module-level instance + ``_DEP_MARKER`` is the actual sentinel value embedded in + ``Annotated[T, _DEP_MARKER]`` by the ``Dep`` alias. + """ + + __slots__ = () + + def __repr__(self) -> str: + return "" + + +_DEP_MARKER = _DepMarker() + + +# hm.Dep[Deployment] (or a concrete subclass) -> Annotated[T, _DEP_MARKER]. +Dep = Annotated[T, _DEP_MARKER] diff --git a/harmont/_unwrap.py b/harmont/_unwrap.py new file mode 100644 index 0000000..c2ddb87 --- /dev/null +++ b/harmont/_unwrap.py @@ -0,0 +1,56 @@ +"""Coerce toolchain return values to ``tuple[Step, ...]`` (HAR-28). + +Used by ``@hm.target`` and by the envelope renderer when a pipeline's +return value carries language-toolchain objects instead of bare Steps. +Each toolchain has one unambiguous default action: + + HaskellPackage -> .build() + RustToolchain -> .build() + NpmProject -> .install() (the npm-ci leaf - verifies deps) + ElmProject -> .make("src/Main.elm") + +Authors who want a different default call the explicit action method. +""" + +from __future__ import annotations + +from ._step import Step +from .elm import ElmProject +from .haskell import HaskellPackage +from .npm import NpmProject +from .rust import RustToolchain + + +def _one(obj: object) -> tuple[Step, ...]: + if isinstance(obj, Step): + return (obj,) + if isinstance(obj, HaskellPackage): + return (obj.build(),) + if isinstance(obj, RustToolchain): + return (obj.build(),) + if isinstance(obj, NpmProject): + return (obj.install(),) + if isinstance(obj, ElmProject): + return (obj.make("src/Main.elm"),) + if isinstance(obj, (tuple, list)): + return as_leaves(obj) + msg = ( + f"hm.target: cannot use {type(obj).__name__} as a pipeline leaf\n" + " → return one of: Step, tuple[Step, ...], HaskellPackage, " + "RustToolchain, NpmProject, ElmProject" + ) + raise TypeError(msg) + + +def as_leaves(obj: object) -> tuple[Step, ...]: + """Flatten ``obj`` into a tuple of leaf Steps. + + Recursive on tuples/lists. See module docstring for default-leaf + rules per toolchain wrapper. + """ + if isinstance(obj, (tuple, list)): + out: list[Step] = [] + for item in obj: + out.extend(_one(item)) + return tuple(out) + return _one(obj) diff --git a/harmont/_validation.py b/harmont/_validation.py new file mode 100644 index 0000000..c5c0eff --- /dev/null +++ b/harmont/_validation.py @@ -0,0 +1,11 @@ +"""Validators used by the chain DSL. Kept tiny on purpose.""" + +from __future__ import annotations + + +def validate_positive_int(value: int | None, field_name: str, container_name: str) -> None: + if value is None: + return + if not isinstance(value, int) or value < 1: + msg = f"{container_name}.{field_name} must be a positive integer; got {value!r}" + raise ValueError(msg) diff --git a/harmont/cache.py b/harmont/cache.py new file mode 100644 index 0000000..1b9da42 --- /dev/null +++ b/harmont/cache.py @@ -0,0 +1,80 @@ +"""Cache policies for layered VM snapshots. + +See docs/design/snapshots/2026-05-01-python-surface.md for the surface +and docs/design/snapshots/2026-05-01-data-model.md for the key formulas. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from datetime import timedelta + + +@dataclass(frozen=True) +class CachePolicy: + """Base — never instantiate directly. Use the helpers below.""" + + +@dataclass(frozen=True) +class CacheNone(CachePolicy): + """Always run the step; never cache its snapshot. + + Equivalent to today's behavior. Default for command steps. + """ + + +@dataclass(frozen=True) +class CacheForever(CachePolicy): + """Cache forever, keyed only on (command, parent, env_keys). + + Use for pure computations whose only inputs are visible to the planner. + DO NOT use for installs that fetch the public internet — package repos + drift; manual cache busts will be needed. + """ + + env_keys: tuple[str, ...] = () + + +@dataclass(frozen=True) +class CacheTTL(CachePolicy): + """Cache for `duration`; refresh once per window (UTC-midnight floored). + + Two builds within the same UTC day share a key; a build at 00:30 UTC + the next day rebuilds. + """ + + duration: timedelta + env_keys: tuple[str, ...] = () + + +@dataclass(frozen=True) +class CacheOnChange(CachePolicy): + """Rebuild whenever any file under `paths` changes. + + Paths are relative to the source-archive root. File hashes are + computed at render time by `harmont.keygen` (paths are read from + the source archive's checkout root). + + No `env_keys` field — file content already covers the invalidation + surface. + """ + + paths: tuple[str, ...] + + +@dataclass(frozen=True) +class CacheCompose(CachePolicy): + """Combine multiple policies. Cache hits ONLY when every sub-policy hits. + + Useful for "rebuild daily OR when these files change": + + CacheCompose(policies=( + CacheTTL(duration=timedelta(days=1)), + CacheOnChange(paths=("api/cabal.project",)), + )) + """ + + policies: tuple[CachePolicy, ...] diff --git a/harmont/cmake.py b/harmont/cmake.py new file mode 100644 index 0000000..4264848 --- /dev/null +++ b/harmont/cmake.py @@ -0,0 +1,127 @@ +"""CMake (C/C++) toolchain. + +Public surface lives on the module-level singleton :data:`cmake`. Call it +to construct a :class:`CMakeProject`, or use the bare-form action methods +(``cmake.configure()``, ``cmake.build()``, etc.) for a one-shot leaf. + +The chain is: + + scratch -> apt-base (build-essential, cmake, ninja-build, clang-format) + -> cmake-verify (cmake --version && clang-format --version, + cached forever) + -> action leaves + +The ``lang="cpp"`` switch swaps the label prefix from ``:c:`` to +``:cpp:`` only — cmake routes by ``CMakeLists.txt`` and the shell +commands are identical for both languages. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +from ._toolchain import make_install_chain +from .cache import CacheForever + +if TYPE_CHECKING: + from ._step import Step + +APT_PACKAGES = ("build-essential", "cmake", "ninja-build", "clang-format") + +_ACTION_KWARGS = frozenset(("cache", "env", "timeout_seconds", "label", "key")) + + +@dataclass(frozen=True) +class CMakeProject: + path: str + installed: Step + _tag: str + + def _emit(self, cmd: str, default_label: str, **kw: Any) -> Step: + if kw.get("label") is None: + kw["label"] = default_label + return self.installed.sh(cmd, **kw) + + def configure(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && cmake -S . -B build", + f":{self._tag}: configure", **kw, + ) + + def build(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && cmake -S . -B build && cmake --build build", + f":{self._tag}: build", **kw, + ) + + def test(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && cmake -S . -B build && cmake --build build " + "&& ctest --test-dir build --output-on-failure", + f":{self._tag}: test", **kw, + ) + + def fmt(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && find src tests -name '*.[ch]' " + f"-o -name '*.cpp' -o -name '*.hpp' | " + f"xargs clang-format --dry-run --Werror", + f":{self._tag}: fmt", **kw, + ) + + +def _make_cmake( + *, + path: str = ".", + lang: str = "c", + image: str | None = None, + base: Step | None = None, +) -> CMakeProject: + if lang not in ("c", "cpp"): + msg = ( + f"hm.cmake: invalid lang {lang!r}\n" + ' → use "c" or "cpp"' + ) + raise ValueError(msg) + installed = make_install_chain( + apt_packages=APT_PACKAGES, + install_cmd="cmake --version && clang-format --version", + install_cache=CacheForever(env_keys=()), + lang_tag=lang, + install_tag="cmake-verify", + image=image, + base=base, + ) + return CMakeProject(path=path, installed=installed, _tag=lang) + + +class _CMakeEntry: + def __call__( + self, + *, + path: str = ".", + lang: str = "c", + image: str | None = None, + base: Step | None = None, + ) -> CMakeProject: + return _make_cmake(path=path, lang=lang, image=image, base=base) + + def configure(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).configure(**action_kw) + + def build(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).build(**action_kw) + + def test(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).test(**action_kw) + + def fmt(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).fmt(**action_kw) + + +cmake = _CMakeEntry() diff --git a/harmont/composer.py b/harmont/composer.py new file mode 100644 index 0000000..1e5b43c --- /dev/null +++ b/harmont/composer.py @@ -0,0 +1,109 @@ +"""Composer (PHP / Laravel) toolchain abstraction. + +Chain: scratch -> apt-base (php-cli + extensions + composer + git + unzip) -> +composer-verify (``composer --version && php --version``, cached forever) -> +composer-deps (``composer install``, cached on ``composer.lock``) -> +action leaves. The ``laravel=True`` switch swaps ``.test()`` to +``php artisan test`` and changes the label prefix from ``:php:`` to +``:laravel:``. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +from ._toolchain import make_install_chain +from .cache import CacheForever, CacheOnChange + +if TYPE_CHECKING: + from ._step import Step + +APT_PACKAGES = ( + "php-cli", + "php-mbstring", + "php-xml", + "php-curl", + "php-sqlite3", + "composer", + "git", + "unzip", +) + +_ACTION_KWARGS = frozenset(("cache", "env", "timeout_seconds", "label", "key")) + + +@dataclass(frozen=True) +class ComposerProject: + path: str + installed: Step + _tag: str + _laravel: bool + + def _emit(self, cmd: str, default_label: str, **kw: Any) -> Step: + if kw.get("label") is None: + kw["label"] = default_label + return self.installed.sh(cmd, **kw) + + def test(self, **kw: Any) -> Step: + cmd = ( + f"cd {self.path} && php artisan test" + if self._laravel + else f"cd {self.path} && vendor/bin/phpunit" + ) + return self._emit(cmd, f":{self._tag}: test", **kw) + + def lint(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && vendor/bin/phpstan analyse", + f":{self._tag}: lint", + **kw, + ) + + +def _make_composer( + *, + path: str = ".", + laravel: bool = False, + image: str | None = None, + base: Step | None = None, +) -> ComposerProject: + tag = "laravel" if laravel else "php" + composer_verified = make_install_chain( + apt_packages=APT_PACKAGES, + install_cmd="composer --version && php --version", + install_cache=CacheForever(env_keys=()), + lang_tag=tag, + install_tag="composer", + image=image, + base=base, + ) + deps = composer_verified.sh( + f"cd {path} && composer install --no-interaction --prefer-dist", + label=f":{tag}: deps", + cache=CacheOnChange(paths=(f"{path}/composer.lock",)), + ) + return ComposerProject(path=path, installed=deps, _tag=tag, _laravel=laravel) + + +class _ComposerEntry: + def __call__( + self, + *, + path: str = ".", + laravel: bool = False, + image: str | None = None, + base: Step | None = None, + ) -> ComposerProject: + return _make_composer(path=path, laravel=laravel, image=image, base=base) + + def test(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).test(**action_kw) + + def lint(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).lint(**action_kw) + + +composer = _ComposerEntry() diff --git a/harmont/dev/__init__.py b/harmont/dev/__init__.py new file mode 100644 index 0000000..060c33e --- /dev/null +++ b/harmont/dev/__init__.py @@ -0,0 +1,19 @@ +"""harmont.dev — local Docker deployment driver. + +Public surface: + + deploy(*, image=None, from_=None, cmd=None, + port_mapping=None, env=None, + volumes=None, workdir=None) -> LocalDeployment + port() -> _PortSentinel + LocalDeployment (concrete subclass) + dump_registry_json(*, worktree_root) -> str +""" +from __future__ import annotations + +from ._deployment import LocalDeployment +from ._factory import deploy +from ._port import port +from ._registry_dump import dump_registry_json + +__all__ = ["LocalDeployment", "deploy", "dump_registry_json", "port"] diff --git a/harmont/dev/__main__.py b/harmont/dev/__main__.py new file mode 100644 index 0000000..9366e61 --- /dev/null +++ b/harmont/dev/__main__.py @@ -0,0 +1,71 @@ +"""`python -m harmont.dev` — registry-dump entry point for the CLI. + +Walks ``.harmont/*.py`` (importing each by file path), letting +``@hm.deploy``-decorated functions register themselves into +``harmont._deploy.DEPLOYMENTS`` as a side effect. Then emits the +deployment registry JSON to stdout. + +Errors go to stderr with exit code 1 (DSL error) or 2 (argparse +usage error), matching ``harmont``'s convention. +""" +from __future__ import annotations + +import argparse +import importlib.util +import sys +from pathlib import Path + + +def _import_path(path: Path) -> None: + spec = importlib.util.spec_from_file_location( + name=f"_harmont_dev_user_{path.stem}", + location=str(path), + ) + if spec is None or spec.loader is None: + msg = f"cannot load module from {path}" + raise RuntimeError(msg) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + +def _walk_harmont_dir(root: Path) -> None: + harmont_dir = root / ".harmont" + if not harmont_dir.is_dir(): + sys.stderr.write( + f"hm: no .harmont/ directory in {root}\n" + " → create .harmont/ and add @hm.deploy-decorated functions\n" + ) + sys.exit(1) + for py in sorted(harmont_dir.glob("*.py")): + _import_path(py) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(prog="python -m harmont.dev") + parser.add_argument( + "--dump-registry", + action="store_true", + help="walk .harmont/*.py and emit the v0 deployment registry JSON", + ) + parser.add_argument( + "--worktree-root", + type=Path, + default=None, + help="path to the worktree root; defaults to cwd", + ) + args = parser.parse_args(argv) + + if not args.dump_registry: + # parser.error() is NoReturn (calls sys.exit(2)); execution stops here. + parser.error("nothing to do; pass --dump-registry") + + from harmont.dev import dump_registry_json + + root = args.worktree_root if args.worktree_root is not None else Path.cwd() + _walk_harmont_dir(root) + sys.stdout.write(dump_registry_json(worktree_root=root) + "\n") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/harmont/dev/_deployment.py b/harmont/dev/_deployment.py new file mode 100644 index 0000000..561a1cc --- /dev/null +++ b/harmont/dev/_deployment.py @@ -0,0 +1,47 @@ +"""LocalDeployment — the concrete dataclass for the local Docker driver. + +Construction is mediated by ``harmont.dev._factory.deploy(...)``; the +factory does input validation and coerces fields. ``__post_init__`` is +the last-line invariant check (driver must be 'local'). +""" +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from harmont._deploy import Deployment + +if TYPE_CHECKING: + from collections.abc import Mapping + + from harmont._step import Step + + from ._port import _PortSentinel + + +@dataclass(frozen=True) +class LocalDeployment(Deployment): + """Local Docker deployment record. + + Exactly one of ``image`` or ``from_step`` is non-None — enforced by + ``deploy(...)``. ``port_mapping`` keys are container ports (1..65535); + values are ``_PortSentinel`` (the ``hm.dev.port()`` singleton). + ``volumes`` maps host paths (relative or absolute) to container + paths (with optional ``:ro`` suffix). + """ + image: str | None + from_step: Step | None + cmd: tuple[str, ...] | None + port_mapping: Mapping[int, _PortSentinel] + env: Mapping[str, str] + volumes: Mapping[str, str] + workdir: str | None + + def __post_init__(self) -> None: + if self.driver != "local": + msg = ( + f"LocalDeployment.driver must be 'local', got {self.driver!r}\n" + " → use the harmont.dev._factory.deploy() function " + "instead of constructing LocalDeployment directly" + ) + raise ValueError(msg) diff --git a/harmont/dev/_factory.py b/harmont/dev/_factory.py new file mode 100644 index 0000000..8d2e9f8 --- /dev/null +++ b/harmont/dev/_factory.py @@ -0,0 +1,153 @@ +"""hm.dev.deploy(...) — the public factory for LocalDeployment. + +Validation is deliberately strict and fix-directed. The @hm.deploy +decorator only learns the slug at decoration time, so this factory +emits LocalDeployment with name="" — the decorator stamps the slug +in afterwards via dataclasses.replace. +""" +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ._deployment import LocalDeployment +from ._port import _PortSentinel + +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + + from harmont._step import Step + + +def deploy( + *, + image: str | None = None, + from_: Step | None = None, + cmd: Iterable[str] | None = None, + port_mapping: Mapping[int, _PortSentinel] | None = None, + env: Mapping[str, str] | None = None, + volumes: Mapping[str, str] | None = None, + workdir: str | None = None, +) -> LocalDeployment: + """Construct a LocalDeployment. + + Exactly one of ``image`` or ``from_`` is required. ``port_mapping`` + keys are container ports (1..65535); values must be the + ``hm.dev.port()`` sentinel in v1. See the design spec § 1 for the + full validation table. + """ + if (image is None) == (from_ is None): + msg = ( + "hm.dev.deploy requires exactly one of `image=` or `from_=`, " + f"got image={image!r}, from_={from_!r}\n" + ' → pick one. Use `image="..."` for a published image, ' + "`from_=` to build from a Step chain." + ) + raise ValueError(msg) + + pm = _validate_port_mapping(port_mapping) + env_resolved = _validate_env(env) + volumes_resolved = _validate_volumes(volumes) + cmd_resolved = _validate_cmd(cmd) + workdir_resolved = _validate_workdir(workdir) + + return LocalDeployment( + name="", # decorator stamps the slug in + driver="local", + image=image, + from_step=from_, + cmd=cmd_resolved, + port_mapping=pm, + env=env_resolved, + volumes=volumes_resolved, + workdir=workdir_resolved, + ) + + +def _validate_port_mapping( + pm: Mapping[int, _PortSentinel] | None, +) -> Mapping[int, _PortSentinel]: + if pm is None: + return {} + result: dict[int, _PortSentinel] = {} + for k, v in pm.items(): + if not isinstance(k, int) or k < 1 or k > 65535: + msg = ( + f"hm.dev.deploy port_mapping key must be int in 1..65535, " + f"got {k!r}\n" + " → keys are container ports the service listens on" + ) + raise ValueError(msg) + if not isinstance(v, _PortSentinel): + msg = ( + f"hm.dev.deploy port_mapping value must be hm.dev.port(), " + f"got {type(v).__name__}\n" + " → use hm.dev.port() to ask the OS for a free host port" + ) + raise TypeError(msg) + result[k] = v + return result + + +def _validate_env(env: Mapping[str, str] | None) -> Mapping[str, str]: + if env is None: + return {} + for k, v in env.items(): + if not isinstance(k, str): + msg = f"hm.dev.deploy env key must be str, got {type(k).__name__}" + raise TypeError(msg) + if not isinstance(v, str): + msg = ( + f"hm.dev.deploy env value for {k!r} must be str, " + f"got {type(v).__name__}\n" + " → call str(...) at the call site so the conversion is explicit" + ) + raise TypeError(msg) + return dict(env) + + +def _validate_volumes( + volumes: Mapping[str, str] | None, +) -> Mapping[str, str]: + if volumes is None: + return {} + for hp, cp in volumes.items(): + if not isinstance(hp, str) or not hp: + msg = ( + f"hm.dev.deploy volumes host path must be a non-empty str, " + f"got {hp!r} ({type(hp).__name__})" + ) + raise ValueError(msg) + if not isinstance(cp, str) or not cp.startswith("/"): + msg = ( + f"hm.dev.deploy volumes container path {cp!r} must start with " + "'/'; append ':ro' for read-only mounts (e.g. '/workspace:ro')" + ) + raise ValueError(msg) + return dict(volumes) + + +def _validate_cmd(cmd: Iterable[str] | None) -> tuple[str, ...] | None: + if cmd is None: + return None + items = tuple(cmd) + for x in items: + if not isinstance(x, str): + msg = ( + f"hm.dev.deploy cmd elements must be str, got {type(x).__name__}\n" + " → call str(...) at the call site so the conversion is explicit" + ) + raise TypeError(msg) + return items + + +def _validate_workdir(workdir: str | None) -> str | None: + if workdir is None: + return None + if not workdir.startswith("/"): + msg = ( + f"hm.dev.deploy workdir must be an absolute path, got {workdir!r}\n" + " → workdir is interpreted inside the container; " + "use a path that starts with '/'" + ) + raise ValueError(msg) + return workdir diff --git a/harmont/dev/_port.py b/harmont/dev/_port.py new file mode 100644 index 0000000..5bef7d5 --- /dev/null +++ b/harmont/dev/_port.py @@ -0,0 +1,37 @@ +"""hm.dev.port() — the OS-assigned-host-port sentinel. + +The sentinel is only meaningful as a value in +``hm.dev.deploy(..., port_mapping={CONTAINER_PORT: hm.dev.port()})``. +Any other position (env value, cmd arg, …) is rejected at the call +site that consumes it, with a fix-directed message per PRINCIPLES § 5. +""" +from __future__ import annotations + + +class _PortSentinel: + __slots__ = () + + def __repr__(self) -> str: + return "" + + def __eq__(self, other: object) -> bool: + return isinstance(other, _PortSentinel) + + def __hash__(self) -> int: + return hash(_PortSentinel) + + +_SINGLETON = _PortSentinel() + + +def port() -> _PortSentinel: + """Return the sentinel for an OS-assigned host port. + + Use only as a ``port_mapping`` value: + + hm.dev.deploy( + image="postgres:16", + port_mapping={5432: hm.dev.port()}, + ) + """ + return _SINGLETON diff --git a/harmont/dev/_registry_dump.py b/harmont/dev/_registry_dump.py new file mode 100644 index 0000000..8358c47 --- /dev/null +++ b/harmont/dev/_registry_dump.py @@ -0,0 +1,100 @@ +"""Local-driver registry dump. + +Walks ``harmont._deploy.DEPLOYMENTS`` in topo order, lowering each +``LocalDeployment`` to the JSON shape described in +``docs/superpowers/specs/2026-05-21-hm-dev-deploy-design.md`` § 1. +Non-local deployments are passed through as ``{"driver": X, +"_unhandled": true}`` so the CLI can render them in ``hm dev ls``. + +Step-chain deployments emit their pipeline as the existing v0 IR via +``harmont.pipeline()``; cache-keys are resolved through the standard +keygen path so the Rust executor can use the terminal key as the +build-image tag without re-running the algorithm. +""" +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from harmont._deploy import DEPLOYMENTS, Deployment, dep_graph, topo_order +from harmont._target import clear_target_memo +from harmont.keygen import resolve_pipeline_keys +from harmont.pipeline import pipeline as _assemble + +from ._deployment import LocalDeployment +from ._port import _PortSentinel + +_SENTINEL_WIRE = "__hm_dev_port__" + + +def _lower_local(d: LocalDeployment, deps: tuple[str, ...]) -> dict[str, Any]: + return { + "driver": "local", + "image": d.image, + "from": _lower_from_step(d.from_step) if d.from_step is not None else None, + "cmd": list(d.cmd) if d.cmd is not None else None, + "port_mapping": { + str(cport): _SENTINEL_WIRE + for cport, value in d.port_mapping.items() + if isinstance(value, _PortSentinel) + }, + "env": dict(d.env), + "volumes": dict(d.volumes), + "workdir": d.workdir, + "deps": list(deps), + } + + +def _lower_from_step(step: Any) -> dict[str, Any]: + """Lower a single Step (the deployment's `from_=`) into the v0 IR shape. + + The Step is treated as the terminal leaf of a one-pipeline IR. + Cache-keys are resolved via the existing keygen so the Rust side + can use them as image tags without re-running the algorithm. + """ + ir = _assemble(step) + resolve_pipeline_keys( + ir.get("steps", []), + pipeline_org="hm-dev", + pipeline_slug="hm-dev-build", + now=0, + base_path=Path("/tmp"), # noqa: S108 + env={}, + ) + return {"type": "step_chain", "pipeline_v0": ir} + + +def dump_registry_json( + *, + worktree_root: Path | None = None, +) -> str: + """Emit the v0 deployment-registry JSON. + + ``worktree_root`` is recorded so the CLI can resolve relative + ``volumes`` paths and the worktree-hash label. Pass the value + yourself in tests; production use comes through the CLI shim + (``python -m harmont.dev --dump-registry --worktree-root ``). + """ + clear_target_memo() + wt = Path(worktree_root) if worktree_root is not None else Path.cwd() + order = topo_order() + graph = dep_graph() + deployments: dict[str, dict[str, Any]] = {} + for slug in order: + value = DEPLOYMENTS[slug]() + if isinstance(value, LocalDeployment): + deployments[slug] = _lower_local(value, graph[slug]) + elif isinstance(value, Deployment): + deployments[slug] = {"driver": value.driver, "_unhandled": True} + else: + msg = ( + f"hm: @hm.deploy({slug!r}) returned {type(value).__name__}; " + "expected a Deployment subclass" + ) + raise TypeError(msg) + return json.dumps({ + "schema_version": "0", + "worktree": str(wt), + "deployments": deployments, + }) diff --git a/harmont/dotnet.py b/harmont/dotnet.py new file mode 100644 index 0000000..43fa617 --- /dev/null +++ b/harmont/dotnet.py @@ -0,0 +1,116 @@ +"""dotnet (C#) toolchain. + +Chain: scratch -> apt-base (curl, ca-certificates, libicu-dev) -> +dotnet-install (via Microsoft's dotnet-install.sh) -> action leaves. +The dotnet-install step is cached forever, keyed on the channel baked +into the command. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +from ._toolchain import make_install_chain +from .cache import CacheForever + +if TYPE_CHECKING: + from ._step import Step + +APT_PACKAGES = ("curl", "ca-certificates", "libicu-dev") + +_ACTION_KWARGS = frozenset(("cache", "env", "timeout_seconds", "label", "key")) + +_CHANNEL_RE = re.compile(r"^([0-9]+\.[0-9]+|LTS|STS)$") + +_INSTALL_SCRIPT = "/tmp/dotnet-install.sh" # noqa: S108 + + +def _dotnet_install_cmd(channel: str) -> str: + return ( + f"curl -fsSL https://dot.net/v1/dotnet-install.sh -o {_INSTALL_SCRIPT} && " + f"chmod +x {_INSTALL_SCRIPT} && " + f"{_INSTALL_SCRIPT} --channel {channel} --install-dir /usr/local/dotnet && " + "ln -sf /usr/local/dotnet/dotnet /usr/local/bin/dotnet && " + "dotnet --info" + ) + + +@dataclass(frozen=True) +class DotnetProject: + path: str + installed: Step + + def _emit(self, cmd: str, default_label: str, **kw: Any) -> Step: + if kw.get("label") is None: + kw["label"] = default_label + return self.installed.sh(cmd, **kw) + + def build(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && dotnet build", ":dotnet: build", **kw, + ) + + def test(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && dotnet test", ":dotnet: test", **kw, + ) + + def fmt(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && dotnet format --verify-no-changes", + ":dotnet: fmt", **kw, + ) + + +def _make_dotnet( + *, + path: str = ".", + channel: str = "8.0", + image: str | None = None, + base: Step | None = None, +) -> DotnetProject: + if not _CHANNEL_RE.match(channel): + msg = ( + f"hm.dotnet: invalid channel {channel!r}\n" + ' → use "8.0", "LTS", or "STS"' + ) + raise ValueError(msg) + installed = make_install_chain( + apt_packages=APT_PACKAGES, + install_cmd=_dotnet_install_cmd(channel), + install_cache=CacheForever(env_keys=()), + lang_tag="dotnet", + install_tag="install", + image=image, + base=base, + ) + return DotnetProject(path=path, installed=installed) + + +class _DotnetEntry: + def __call__( + self, + *, + path: str = ".", + channel: str = "8.0", + image: str | None = None, + base: Step | None = None, + ) -> DotnetProject: + return _make_dotnet(path=path, channel=channel, image=image, base=base) + + def build(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).build(**action_kw) + + def test(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).test(**action_kw) + + def fmt(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).fmt(**action_kw) + + +dotnet = _DotnetEntry() diff --git a/harmont/elm.py b/harmont/elm.py new file mode 100644 index 0000000..5a8f6c2 --- /dev/null +++ b/harmont/elm.py @@ -0,0 +1,143 @@ +"""Elm project abstraction (HAR-15). + +Public surface lives on the module-level singleton :data:`elm`. Call it +to construct an :class:`ElmProject`, or use the bare-form action +methods (``elm.make(...)``, ``elm.test()``, etc.) for a one-shot leaf. + +Chain shape: scratch -> apt-base -> nodesource node install -> elm +binary download -> action leaves. Node is required because elm-test, +elm-review, and elm-format all run under npx. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +from ._toolchain import make_install_chain, node_install_cmd +from .cache import CacheForever + +if TYPE_CHECKING: + from ._step import Step + +APT_PACKAGES = ("curl", "ca-certificates") + +_ACTION_KWARGS = frozenset(("cache", "env", "timeout_seconds", "label", "key")) + +_VERSION_RE = re.compile(r"^[0-9]+(\.[0-9]+)+$") + + +def _elm_install_cmd(elm_version: str) -> str: + return ( + f"curl -fsSL https://github.com/elm/compiler/releases/download/" + f"{elm_version}/binary-for-linux-64-bit.gz -o /tmp/elm.gz && " + "gunzip /tmp/elm.gz && chmod +x /tmp/elm && " + "mv /tmp/elm /usr/local/bin/elm" + ) + + +@dataclass(frozen=True) +class ElmProject: + """Constructed via :func:`elm` (the ``hm.elm`` singleton).""" + + path: str + installed: Step + + def _emit(self, cmd: str, default_label: str, **kw: Any) -> Step: + if kw.get("label") is None: + kw["label"] = default_label + return self.installed.sh(cmd, **kw) + + def make(self, target: str, *, output: str | None = None, **kw: Any) -> Step: + suffix = f" --output={output}" if output is not None else "" + return self._emit( + f"cd {self.path} && elm make {target}{suffix}", + f":elm: make {target}", **kw, + ) + + def test(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && npx --yes elm-test", + ":elm: test", **kw, + ) + + def review(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && npx --yes elm-review", + ":elm: review", **kw, + ) + + def fmt(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && npx --yes elm-format --validate .", + ":elm: fmt", **kw, + ) + + +def _make_elm( + *, + path: str = ".", + elm_version: str = "0.19.1", + node_version: str = "20", + image: str | None = None, + base: Step | None = None, +) -> ElmProject: + if not _VERSION_RE.match(elm_version): + msg = ( + f"hm.elm: invalid elm_version {elm_version!r}\n" + ' → e.g. elm_version="0.19.1"' + ) + raise ValueError(msg) + node_installed = make_install_chain( + apt_packages=APT_PACKAGES, + install_cmd=node_install_cmd(node_version), + install_cache=CacheForever(env_keys=()), + lang_tag="elm", + install_tag="node", + image=image, + base=base, + ) + elm_installed = node_installed.sh( + _elm_install_cmd(elm_version), + label=":elm: install", + cache=CacheForever(env_keys=()), + ) + return ElmProject(path=path, installed=elm_installed) + + +class _ElmEntry: + """Callable singleton — supports both object form and bare form.""" + + def __call__( + self, + *, + path: str = ".", + elm_version: str = "0.19.1", + node_version: str = "20", + image: str | None = None, + base: Step | None = None, + ) -> ElmProject: + return _make_elm( + path=path, elm_version=elm_version, node_version=node_version, + image=image, base=base, + ) + + def make(self, target: str, *, output: str | None = None, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).make(target, output=output, **action_kw) + + def test(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).test(**action_kw) + + def review(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).review(**action_kw) + + def fmt(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).fmt(**action_kw) + + +elm = _ElmEntry() diff --git a/harmont/go.py b/harmont/go.py new file mode 100644 index 0000000..921e690 --- /dev/null +++ b/harmont/go.py @@ -0,0 +1,117 @@ +"""Go toolchain abstraction. + +Chain: scratch -> apt-base (curl, ca-certificates) -> go-install (download +official tarball to /usr/local/go) -> action leaves. The go-install step +is cached forever, keyed on the Go version in the command. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +from ._toolchain import make_install_chain +from .cache import CacheForever + +if TYPE_CHECKING: + from ._step import Step + +APT_PACKAGES = ("curl", "ca-certificates", "git") + +_ACTION_KWARGS = frozenset(("cache", "env", "timeout_seconds", "label", "key")) + +_VERSION_RE = re.compile(r"^[0-9]+\.[0-9]+(\.[0-9]+)?$") + + +def _go_install_cmd(version: str) -> str: + return ( + f"curl -fsSL https://go.dev/dl/go{version}.linux-amd64.tar.gz " + "-o /tmp/go.tgz && rm -rf /usr/local/go && " + "tar -C /usr/local -xzf /tmp/go.tgz && " + "ln -sf /usr/local/go/bin/go /usr/local/bin/go && " + "ln -sf /usr/local/go/bin/gofmt /usr/local/bin/gofmt && " + "go version" + ) + + +@dataclass(frozen=True) +class GoToolchain: + path: str + installed: Step + + def _emit(self, cmd: str, default_label: str, **kw: Any) -> Step: + if kw.get("label") is None: + kw["label"] = default_label + return self.installed.sh(cmd, **kw) + + def build(self, **kw: Any) -> Step: + return self._emit(f"cd {self.path} && go build ./...", ":go: build", **kw) + + def test(self, **kw: Any) -> Step: + return self._emit(f"cd {self.path} && go test ./...", ":go: test", **kw) + + def vet(self, **kw: Any) -> Step: + return self._emit(f"cd {self.path} && go vet ./...", ":go: vet", **kw) + + def fmt(self, **kw: Any) -> Step: + return self._emit( + f'cd {self.path} && test -z "$(gofmt -l .)"', + ":go: fmt", **kw, + ) + + +def _make_go( + *, + path: str = ".", + version: str = "1.23.2", + image: str | None = None, + base: Step | None = None, +) -> GoToolchain: + if not _VERSION_RE.match(version): + msg = ( + f"hm.go: invalid version {version!r}\n" + ' → use a Go version like "1.23.2"' + ) + raise ValueError(msg) + installed = make_install_chain( + apt_packages=APT_PACKAGES, + install_cmd=_go_install_cmd(version), + install_cache=CacheForever(env_keys=()), + lang_tag="go", + install_tag="install", + image=image, + base=base, + ) + return GoToolchain(path=path, installed=installed) + + +class _GoEntry: + def __call__( + self, + *, + path: str = ".", + version: str = "1.23.2", + image: str | None = None, + base: Step | None = None, + ) -> GoToolchain: + return _make_go(path=path, version=version, image=image, base=base) + + def build(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).build(**action_kw) + + def test(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).test(**action_kw) + + def vet(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).vet(**action_kw) + + def fmt(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).fmt(**action_kw) + + +go = _GoEntry() diff --git a/harmont/gradle.py b/harmont/gradle.py new file mode 100644 index 0000000..29fb714 --- /dev/null +++ b/harmont/gradle.py @@ -0,0 +1,137 @@ +"""Gradle (Java/Kotlin) toolchain. + +Chain: scratch -> apt-base (curl, openjdk--jdk-headless) -> jdk-verify +(``java -version && gradle --version`` smoke test, cached forever) -> +action leaves running ``gradle`` directly. The verify step lets +``make_install_chain`` enforce its standard shape even though the +JDK install happens via apt; it also gives the pipeline UI a single +named step that confirms the JDK is operational. + +Gradle itself is installed from the official distribution zip into +``/opt/gradle`` and symlinked onto PATH. We deliberately do NOT rely +on a project-shipped ``./gradlew`` wrapper: for the examples and +small projects we want a working pipeline out of the box, not a +chicken-and-egg requirement that the user pre-populate ``gradlew``. +Pipelines that do ship a wrapper can still invoke it from their +own step layered on ``gradle.installed``. + +The ``kotlin=True`` flag swaps the label prefix only — Gradle drives +both languages identically. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +from ._toolchain import make_install_chain +from .cache import CacheForever + +if TYPE_CHECKING: + from ._step import Step + +_ACTION_KWARGS = frozenset(("cache", "env", "timeout_seconds", "label", "key")) + +_JDK_RE = re.compile(r"^(11|17|21)$") + +# Pinned Gradle version — bumping requires re-running the example +# pipelines locally to confirm tasks still work; older Gradle releases +# may not support newer Kotlin/Java toolchain features. +_GRADLE_VERSION = "8.10" + + +def _apt_packages(jdk: str) -> tuple[str, ...]: + return ("curl", "ca-certificates", "unzip", f"openjdk-{jdk}-jdk-headless") + + +def _install_cmd() -> str: + return ( + f"curl -fsSL https://services.gradle.org/distributions/" + f"gradle-{_GRADLE_VERSION}-bin.zip -o /tmp/gradle.zip && " + "unzip -q /tmp/gradle.zip -d /opt && " + f"ln -sf /opt/gradle-{_GRADLE_VERSION}/bin/gradle /usr/local/bin/gradle && " + "rm /tmp/gradle.zip && java -version && gradle --version" + ) + + +@dataclass(frozen=True) +class GradleProject: + path: str + installed: Step + _tag: str + + def _emit(self, cmd: str, default_label: str, **kw: Any) -> Step: + if kw.get("label") is None: + kw["label"] = default_label + return self.installed.sh(cmd, **kw) + + def build(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && gradle build", f":{self._tag}: build", **kw, + ) + + def test(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && gradle test", f":{self._tag}: test", **kw, + ) + + def lint(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && gradle check", f":{self._tag}: lint", **kw, + ) + + +def _make_gradle( + *, + path: str = ".", + jdk: str = "21", + kotlin: bool = False, + image: str | None = None, + base: Step | None = None, +) -> GradleProject: + if not _JDK_RE.match(jdk): + msg = ( + f"hm.gradle: invalid jdk {jdk!r}\n" + ' → use "11", "17", or "21"' + ) + raise ValueError(msg) + tag = "kotlin" if kotlin else "java" + installed = make_install_chain( + apt_packages=_apt_packages(jdk), + install_cmd=_install_cmd(), + install_cache=CacheForever(env_keys=()), + lang_tag=tag, + install_tag="jdk", + image=image, + base=base, + ) + return GradleProject(path=path, installed=installed, _tag=tag) + + +class _GradleEntry: + def __call__( + self, + *, + path: str = ".", + jdk: str = "21", + kotlin: bool = False, + image: str | None = None, + base: Step | None = None, + ) -> GradleProject: + return _make_gradle(path=path, jdk=jdk, kotlin=kotlin, image=image, base=base) + + def build(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).build(**action_kw) + + def test(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).test(**action_kw) + + def lint(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).lint(**action_kw) + + +gradle = _GradleEntry() diff --git a/harmont/haskell.py b/harmont/haskell.py new file mode 100644 index 0000000..810133c --- /dev/null +++ b/harmont/haskell.py @@ -0,0 +1,257 @@ +"""Haskell toolchain + package abstraction (HAR-15). + +Public surface lives on the module-level singleton :data:`haskell`. Call +it to construct a :class:`HaskellToolchain` (which then spawns one +:class:`HaskellPackage` per cabal package via ``.package(path)``), or +use the bare-form action methods (``haskell.build(path=..., ghc=...)``, +etc.) for a one-shot leaf. + +The chain is: + + scratch -> apt-base -> ghcup-install -> -deps -> -action + +``ghcup-install`` is cached forever (keyed on the GHC version baked +into the command). Each package's ``deps`` Step is cached +:class:`CacheOnChange` against the package's cabal files. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING, Any, overload + +from ._toolchain import make_install_chain +from .cache import CacheForever, CacheOnChange + +if TYPE_CHECKING: + from ._step import Step + +APT_PACKAGES = ( + "curl", "ca-certificates", "build-essential", + "libgmp-dev", "libffi-dev", "libncurses-dev", "zlib1g-dev", +) + +_ACTION_KWARGS = frozenset(("cache", "env", "timeout_seconds", "label", "key")) + +_VERSION_RE = re.compile(r"^[a-zA-Z0-9.-]+$") + + +def _ghcup_cmd(ghc: str, cabal: str) -> str: + # `fourmolu` backs `pkg.fmt()`. We pull a pre-built binary from + # the fourmolu GitHub releases rather than `cabal install fourmolu` + # because the latter compiles from source on every cold cache, + # adding ~10 minutes per pipeline first-run. `hlint` (for the + # rarely-used `pkg.hlint()`) and HLS are intentionally NOT + # installed here — pipelines that need them should layer their + # own step. + fourmolu_url = ( + "https://github.com/fourmolu/fourmolu/releases/download/" + "v0.18.0.0/fourmolu-0.18.0.0-linux-x86_64" + ) + return ( + "curl -fsSL https://downloads.haskell.org/~ghcup/x86_64-linux-ghcup " + "-o /usr/local/bin/ghcup && chmod +x /usr/local/bin/ghcup && " + f"ghcup install ghc {ghc} && ghcup install cabal {cabal} && " + f"ghcup set ghc {ghc} && ghcup set cabal {cabal} && " + "ln -sf /root/.ghcup/bin/* /usr/local/bin/ && " + f"curl -fsSL {fourmolu_url} -o /usr/local/bin/fourmolu && " + "chmod +x /usr/local/bin/fourmolu" + ) + + +@dataclass(frozen=True) +class HaskellPackage: + """One cabal package. Returned by :meth:`HaskellToolchain.package`. + + ``installed`` is the package's ``deps`` Step — the chain ancestor + every action leaf attaches to. Exposed so callers can chain custom + commands onto the deps-installed snapshot via ``pkg.installed.sh(...)``. + """ + + path: str + installed: Step + + def _emit(self, cmd: str, default_label: str, **kw: Any) -> Step: + if kw.get("label") is None: + kw["label"] = default_label + return self.installed.sh(cmd, **kw) + + def build(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && cabal build all", + f":haskell: {self.path} build", **kw, + ) + + def test(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && cabal test all", + f":haskell: {self.path} test", **kw, + ) + + def lint(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && cabal build all --flag werror", + f":haskell: {self.path} lint", **kw, + ) + + def hlint(self, **kw: Any) -> Step: + return self._emit( + f"hlint {self.path}", + f":haskell: {self.path} hlint", **kw, + ) + + def fmt(self, **kw: Any) -> Step: + return self._emit( + f"fourmolu --mode check {self.path}", + f":haskell: {self.path} fmt", **kw, + ) + + +@dataclass(frozen=True) +class HaskellToolchain: + """Constructed via :func:`haskell` (the ``hm.haskell`` singleton). + + Holds the shared ``ghcup`` install Step. Spawn one + :class:`HaskellPackage` per cabal package via :meth:`package`. + """ + + ghc: str + cabal_version: str + installed: Step + + def package( + self, + path: str, + *, + cache_paths: tuple[str, ...] | None = None, + ) -> HaskellPackage: + if cache_paths is not None: + paths = cache_paths + else: + paths = ( + tuple(sorted(p.as_posix() for p in Path(path).glob("*.cabal"))) + + ((f"{path}/cabal.project",) if Path(path, "cabal.project").exists() else ()) + ) + deps = self.installed.sh( + f"cabal update && cd {path} && cabal build all --only-dependencies", + label=f":haskell: {path} deps", + cache=CacheOnChange(paths=paths), + ) + return HaskellPackage(path=path, installed=deps) + + def cabal( + self, + path: str, + *, + cache_paths: tuple[str, ...] | None = None, + ) -> HaskellPackage: + """Alias for :meth:`package`. Reads more naturally for cabal projects.""" + return self.package(path, cache_paths=cache_paths) + + +def _make_toolchain( + *, + ghc: str, + cabal: str, + image: str | None, + base: Step | None, +) -> HaskellToolchain: + installed = make_install_chain( + apt_packages=APT_PACKAGES, + install_cmd=_ghcup_cmd(ghc, cabal), + install_cache=CacheForever(env_keys=()), + lang_tag="haskell", + install_tag="ghcup", + image=image, + base=base, + ) + return HaskellToolchain(ghc=ghc, cabal_version=cabal, installed=installed) + + +def _validate_ghc(ghc: str | None) -> str: + if ghc is None: + msg = ( + "hm.haskell: ghc is required\n" + ' → pass ghc="9.6.7" (or another GHC version your packages support)' + ) + raise ValueError(msg) + if not _VERSION_RE.match(ghc): + msg = ( + f"hm.haskell: invalid ghc {ghc!r}\n" + ' → use a GHC version like "9.6.7"' + ) + raise ValueError(msg) + return ghc + + +class _HaskellEntry: + """Callable singleton — supports both object form and bare form.""" + + @overload + def __call__( + self, + *, + ghc: str, + cabal: str = ..., + image: str | None = ..., + base: Step | None = ..., + ) -> HaskellToolchain: ... + + @overload + def __call__( + self, + *, + ghc: str, + path: str, + cabal: str = ..., + image: str | None = ..., + base: Step | None = ..., + cache_paths: tuple[str, ...] | None = ..., + ) -> HaskellPackage: ... + + def __call__( + self, + *, + ghc: str | None = None, + cabal: str = "latest", + image: str | None = None, + base: Step | None = None, + path: str | None = None, + cache_paths: tuple[str, ...] | None = None, + ) -> HaskellToolchain | HaskellPackage: + ghc_v = _validate_ghc(ghc) + toolchain = _make_toolchain(ghc=ghc_v, cabal=cabal, image=image, base=base) + if path is None: + return toolchain + return toolchain.package(path, cache_paths=cache_paths) + + def _pkg(self, **kw: Any) -> HaskellPackage: + path = kw.pop("path", ".") + pkg = self(path=path, **kw) + assert isinstance(pkg, HaskellPackage) # noqa: S101 — narrow overload result + return pkg + + def build(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self._pkg(**kw).build(**action_kw) + + def test(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self._pkg(**kw).test(**action_kw) + + def lint(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self._pkg(**kw).lint(**action_kw) + + def hlint(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self._pkg(**kw).hlint(**action_kw) + + def fmt(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self._pkg(**kw).fmt(**action_kw) + + +haskell = _HaskellEntry() diff --git a/harmont/json_emit.py b/harmont/json_emit.py new file mode 100644 index 0000000..e71289e --- /dev/null +++ b/harmont/json_emit.py @@ -0,0 +1,69 @@ +"""Render a chain-DSL pipeline dict to the v0 IR JSON string. + +The wire format mirrors harmont-pipeline/src/Harmont/Pipeline/Schema.hs +exactly. Optional fields are omitted (not null); the only field that +emits JSON null is `builds_in` for scratch-rooted steps. + +Cache keys are resolved in keygen.resolve_pipeline_keys before +serialization, so the emitted JSON includes `cache.key` for every +step whose policy is not 'none'. +""" + +from __future__ import annotations + +import copy +import json +import os +import time +from pathlib import Path +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from collections.abc import Mapping + +from .keygen import resolve_pipeline_keys + + +def pipeline_to_json( + p: dict[str, Any], + *, + pipeline_org: str | None = None, + pipeline_slug: str | None = None, + now: int | None = None, + base_path: Path | None = None, + env: Mapping[str, str] | None = None, +) -> str: + """Render the pipeline dict (as returned by `pipeline(...)`) to JSON. + + Resolves cache keys before serialization. Defaults mirror the + environment hooks of the old Scheme renderer: + pipeline_org <- env["HARMONT_PIPELINE_ORG"] or "default" + pipeline_slug <- env["HARMONT_PIPELINE_SLUG"] or "default" + now <- int(time.time()) + base_path <- Path.cwd() + env <- os.environ + """ + env_map: Mapping[str, str] = env if env is not None else os.environ + org = ( + pipeline_org + if pipeline_org is not None + else env_map.get("HARMONT_PIPELINE_ORG", "default") + ) + slug = ( + pipeline_slug + if pipeline_slug is not None + else env_map.get("HARMONT_PIPELINE_SLUG", "default") + ) + render_now = now if now is not None else int(time.time()) + bp = base_path if base_path is not None else Path.cwd() + + body = copy.deepcopy(p) + resolve_pipeline_keys( + body.get("steps", []), + pipeline_org=org, + pipeline_slug=slug, + now=render_now, + base_path=bp, + env=env_map, + ) + return json.dumps(body, ensure_ascii=False, separators=(", ", ": ")) diff --git a/harmont/keygen.py b/harmont/keygen.py new file mode 100644 index 0000000..4dc1269 --- /dev/null +++ b/harmont/keygen.py @@ -0,0 +1,156 @@ +"""Cache-key resolver. + +Direct port of cidsl/lisp/src/harmont_macros.scm (resolve-cache-key +and helpers). Output bytes MUST match the Scheme version so cached +snapshots persisted before the Scheme removal remain reachable. + +Algorithm (pre-image of the outer sha256): + + pipeline_org NUL pipeline_slug NUL step_key NUL + parent_resolved_key NUL policy_resolution + +policy_resolution branches: + none -> "none" (no key emitted) + forever -> "forever-" + sha256(cmd NUL env_subset) + ttl -> "ttl-N-" + sha256(cmd NUL env_subset) N = now // duration + on_change -> "sha-" + sha256(concat(file_hash(p) NUL for p in sorted)) + compose -> "compose-" + sha256(concat(resolve(sub) or "none")) + +The Scheme `cache-when` policy is removed (see HAR-16) — it required a +Scheme sandbox that no longer exists. +""" + +from __future__ import annotations + +import hashlib +from pathlib import Path # noqa: TC003 used at runtime in _path_hash +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from collections.abc import Mapping + +NUL = "\x00" + + +def resolve_pipeline_keys( + steps: list[dict[str, Any]], + *, + pipeline_org: str, + pipeline_slug: str, + now: int, + base_path: Path, + env: Mapping[str, str], +) -> list[dict[str, Any]]: + """Walk `steps` in order. For every step whose cache policy is not + 'none', compute a deterministic sha256 cache key and inject it into + that step's `cache` dict as `cache["key"]`. Returns the same list + (mutated in place — callers may rely on identity).""" + resolved: dict[str, str] = {} + for step in steps: + if step.get("type") != "command": + continue + cache = step.get("cache") + if not cache or cache["policy"] == "none": + continue + cmd = step.get("cmd", "") + parent = step.get("builds_in") # str or None + parent_resolved = _lookup_parent(parent, resolved) + policy_res = _resolve_policy(cache, cmd, now, base_path, env) + key = _sha256_hex( + pipeline_org + + NUL + + pipeline_slug + + NUL + + step["key"] + + NUL + + parent_resolved + + NUL + + policy_res + ) + cache["key"] = key + resolved[step["key"]] = key + return steps + + +def _lookup_parent(parent: str | None, resolved: dict[str, str]) -> str: + if parent is None: + return "scratch" + key = resolved.get(parent) + if key is None: + msg = ( + f"step references builds_in {parent!r} which has no cached " + f"key (parent must be defined upstream and cached)" + ) + raise ValueError(msg) + return key + + +def _resolve_policy( + policy: dict[str, Any], + cmd: str, + now: int, + base_path: Path, + env: Mapping[str, str], +) -> str: + kind = policy["policy"] + if kind == "none": + return "none" + if kind == "forever": + env_keys = policy.get("env_keys", []) + return "forever-" + _sha256_hex(cmd + NUL + _env_subset(env_keys, env)) + if kind == "ttl": + duration = policy["duration_seconds"] + bucket = now // duration + env_keys = policy.get("env_keys", []) + return "ttl-" + str(bucket) + "-" + _sha256_hex(cmd + NUL + _env_subset(env_keys, env)) + if kind == "on_change": + paths = sorted(policy["paths"]) + pre = "".join(_path_hash(base_path / p) + NUL for p in paths) + return "sha-" + _sha256_hex(pre) + if kind == "compose": + subs = policy["sub_policies"] + parts = [ + _resolve_policy(sub, cmd, now, base_path, env) if sub["policy"] != "none" else "none" + for sub in subs + ] + return "compose-" + _sha256_hex("".join(parts)) + msg = f"resolve-policy-key: unknown policy {kind!r}" + raise ValueError(msg) + + +def _env_subset(env_keys: list[str], env: Mapping[str, str]) -> str: + sorted_keys = sorted(env_keys) + return "".join(k + "=" + env.get(k, "") + NUL for k in sorted_keys) + + +def _path_hash(path: Path) -> str: + """Hash a path's content for an `on_change` cache key. + + Files: hash the bytes. + + Directories: walk recursively in sorted order and fold each file's + POSIX-style relative path + content into one SHA-256 stream. Empty + directories hash to the empty stream's digest, which is stable. + + Missing paths fail loudly: ``on_change`` is a build-time invariant + and a typo should not silently weaken the cache key. + """ + if path.is_file(): + with path.open("rb") as fp: + return hashlib.sha256(fp.read()).hexdigest() + if path.is_dir(): + h = hashlib.sha256() + files = sorted(p for p in path.rglob("*") if p.is_file()) + for child in files: + rel = child.relative_to(path).as_posix() + h.update(rel.encode("utf-8")) + h.update(b"\x00") + h.update(child.read_bytes()) + h.update(b"\x00") + return h.hexdigest() + msg = f"on_change path does not exist: {path}" + raise FileNotFoundError(msg) + + +def _sha256_hex(s: str) -> str: + return hashlib.sha256(s.encode("utf-8")).hexdigest() diff --git a/harmont/npm.py b/harmont/npm.py new file mode 100644 index 0000000..212dd7e --- /dev/null +++ b/harmont/npm.py @@ -0,0 +1,118 @@ +"""Npm project abstraction (HAR-15).""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +from ._toolchain import make_install_chain, node_install_cmd +from .cache import CacheForever, CacheOnChange + +if TYPE_CHECKING: + from ._step import Step + +APT_PACKAGES = ("curl", "ca-certificates") + +_ACTION_KWARGS = frozenset(("cache", "env", "timeout_seconds", "label", "key")) + +_VERSION_RE = re.compile(r"^[0-9]+(\.x)?$") + + +@dataclass(frozen=True) +class NpmProject: + path: str + installed: Step # the `npm ci` step + + def _emit(self, cmd: str, default_label: str, **kw: Any) -> Step: + if kw.get("label") is None: + kw["label"] = default_label + return self.installed.sh(cmd, **kw) + + def install(self) -> Step: + return self.installed + + def run(self, script: str, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && npm run {script}", + f":node: {script}", **kw, + ) + + def test(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && npm test", + ":node: test", **kw, + ) + + def lint(self, **kw: Any) -> Step: + return self.run("lint", **kw) + + def fmt(self, **kw: Any) -> Step: + return self.run("fmt", **kw) + + +def _make_npm( + *, + path: str = ".", + version: str = "20", + image: str | None = None, + base: Step | None = None, +) -> NpmProject: + if not _VERSION_RE.match(version): + msg = ( + f"hm.npm: invalid version {version!r}\n" + ' → use a Node major version like "20" or "20.x"' + ) + raise ValueError(msg) + node_installed = make_install_chain( + apt_packages=APT_PACKAGES, + install_cmd=node_install_cmd(version), + install_cache=CacheForever(env_keys=()), + lang_tag="node", + install_tag="install", + image=image, + base=base, + ) + npm_ci = node_installed.sh( + f"cd {path} && npm ci", + label=":node: deps", + cache=CacheOnChange(paths=(f"{path}/package-lock.json",)), + ) + return NpmProject(path=path, installed=npm_ci) + + +class _NpmEntry: + def __call__( + self, + *, + path: str = ".", + version: str = "20", + image: str | None = None, + base: Step | None = None, + ) -> NpmProject: + return _make_npm(path=path, version=version, image=image, base=base) + + def install(self, **kw: Any) -> Step: + # .install() returns the pre-existing npm-ci Step verbatim — it + # doesn't emit a new action, so it doesn't accept action kwargs + # (label/cache/env/...). Constructor kwargs only. + return self(**kw).install() + + def run(self, script: str, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).run(script, **action_kw) + + def test(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).test(**action_kw) + + def lint(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).lint(**action_kw) + + def fmt(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).fmt(**action_kw) + + +npm = _NpmEntry() diff --git a/harmont/ocaml.py b/harmont/ocaml.py new file mode 100644 index 0000000..d41ebca --- /dev/null +++ b/harmont/ocaml.py @@ -0,0 +1,145 @@ +"""OCaml toolchain abstraction. + +Chain: scratch -> apt-base (opam + build deps) -> opam-init (opam switch +create ; installs dune + ocamlformat, cached forever per +compiler version) -> opam-deps (per-project ``opam install . --deps-only`` +when an .opam file exists, cached on the .opam files) -> action leaves +driven by dune. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from ._toolchain import make_install_chain +from .cache import CacheForever, CacheOnChange + +if TYPE_CHECKING: + from ._step import Step + +APT_PACKAGES = ( + "opam", + "build-essential", + "git", + "m4", + "unzip", + "bubblewrap", +) + +_ACTION_KWARGS = frozenset(("cache", "env", "timeout_seconds", "label", "key")) + +_VERSION_RE = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+$") + + +def _opam_init_cmd(compiler: str) -> str: + return ( + "opam init -y --disable-sandboxing --bare && " + f"opam switch create {compiler} {compiler} && " + "eval $(opam env) && opam install -y dune ocamlformat" + ) + + +@dataclass(frozen=True) +class OCamlProject: + path: str + installed: Step + + def _emit(self, cmd: str, default_label: str, **kw: Any) -> Step: + if kw.get("label") is None: + kw["label"] = default_label + return self.installed.sh(cmd, **kw) + + def build(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && opam exec -- dune build", + ":ocaml: build", + **kw, + ) + + def test(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && opam exec -- dune runtest", + ":ocaml: test", + **kw, + ) + + def fmt(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && opam exec -- dune build @fmt", + ":ocaml: fmt", + **kw, + ) + + +def _make_ocaml( + *, + path: str = ".", + compiler: str = "5.1.1", + image: str | None = None, + base: Step | None = None, +) -> OCamlProject: + if not _VERSION_RE.match(compiler): + msg = ( + f"hm.ocaml: invalid compiler {compiler!r}\n" + ' → use a compiler version like "5.1.1"' + ) + raise ValueError(msg) + opam = make_install_chain( + apt_packages=APT_PACKAGES, + install_cmd=_opam_init_cmd(compiler), + install_cache=CacheForever(env_keys=()), + lang_tag="ocaml", + install_tag="opam", + image=image, + base=base, + ) + # Per-project deps step: install opam dependencies declared in any + # .opam files at `path`. Cached on those files so unchanged manifests + # short-circuit. Falls through harmlessly when there are no .opam + # files (the shell glob expands to nothing and `opam install` is + # skipped via the `[ -n "$o" ]` guard). + opam_files = tuple(sorted(p.as_posix() for p in Path(path).glob("*.opam"))) + deps_cmd = ( + f"cd {path} && " + "if ls *.opam >/dev/null 2>&1; then " + " opam install -y . --deps-only --with-test; " + "else " + ' echo "no .opam files; skipping deps"; ' + "fi" + ) + deps = opam.sh( + deps_cmd, + label=":ocaml: deps", + cache=CacheOnChange(paths=opam_files) if opam_files else CacheForever(env_keys=()), + ) + return OCamlProject(path=path, installed=deps) + + +class _OCamlEntry: + def __call__( + self, + *, + path: str = ".", + compiler: str = "5.1.1", + image: str | None = None, + base: Step | None = None, + ) -> OCamlProject: + return _make_ocaml(path=path, compiler=compiler, image=image, base=base) + + def build(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).build(**action_kw) + + def test(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).test(**action_kw) + + def fmt(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).fmt(**action_kw) + + +ocaml = _OCamlEntry() diff --git a/harmont/perl.py b/harmont/perl.py new file mode 100644 index 0000000..1007a73 --- /dev/null +++ b/harmont/perl.py @@ -0,0 +1,86 @@ +"""Perl toolchain abstraction. + +Chain: scratch -> apt-base (perl + cpanminus) -> cpanm-deps -> action +leaves. The cpanm-deps step is cached on the project's ``cpanfile``. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +from ._toolchain import make_install_chain +from .cache import CacheForever, CacheOnChange + +if TYPE_CHECKING: + from ._step import Step + +APT_PACKAGES = ("perl", "cpanminus", "build-essential") + +_ACTION_KWARGS = frozenset(("cache", "env", "timeout_seconds", "label", "key")) + + +@dataclass(frozen=True) +class PerlProject: + path: str + installed: Step + + def _emit(self, cmd: str, default_label: str, **kw: Any) -> Step: + if kw.get("label") is None: + kw["label"] = default_label + return self.installed.sh(cmd, **kw) + + def test(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && prove -lv t/", ":perl: test", **kw, + ) + + def lint(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && perlcritic lib/", ":perl: lint", **kw, + ) + + +def _make_perl( + *, + path: str = ".", + image: str | None = None, + base: Step | None = None, +) -> PerlProject: + cpanm_installed = make_install_chain( + apt_packages=APT_PACKAGES, + install_cmd="cpanm --notest --quiet Perl::Critic && perl --version", + install_cache=CacheForever(env_keys=()), + lang_tag="perl", + install_tag="cpanm", + image=image, + base=base, + ) + deps = cpanm_installed.sh( + f"cd {path} && cpanm --installdeps --notest .", + label=":perl: deps", + cache=CacheOnChange(paths=(f"{path}/cpanfile",)), + ) + return PerlProject(path=path, installed=deps) + + +class _PerlEntry: + def __call__( + self, + *, + path: str = ".", + image: str | None = None, + base: Step | None = None, + ) -> PerlProject: + return _make_perl(path=path, image=image, base=base) + + def test(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).test(**action_kw) + + def lint(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).lint(**action_kw) + + +perl = _PerlEntry() diff --git a/harmont/pipeline.py b/harmont/pipeline.py new file mode 100644 index 0000000..e61cb1c --- /dev/null +++ b/harmont/pipeline.py @@ -0,0 +1,172 @@ +"""Pipeline factory + lowering pass. + +The factory walks back from each leaf via `Step.parent`, collects every +unique step (keyed by `id`, since structurally-equal forks must keep +distinct keys), topo-sorts by parent edges with a stable +leaf-then-DFS-pre tiebreaker, and lowers each step to a JSON-shaped +dict matching the v0 IR schema. + +Use `pipeline_to_json` from `json_emit` to emit the wire-format string. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from ._keys import resolve_keys +from .cache import ( + CacheCompose, + CacheForever, + CacheNone, + CacheOnChange, + CachePolicy, + CacheTTL, +) + +if TYPE_CHECKING: + from ._step import Step + + +def pipeline( + *leaves: Step, + env: dict[str, str] | None = None, + default_image: str | None = None, +) -> dict[str, Any]: + """Top-level factory. Returns a JSON-shaped dict (version "0"). + + ``default_image`` is the local-mode fallback Docker image: it + applies to every command step that lacks both a ``builds_in`` + parent and a per-step ``image`` override. + """ + if not leaves: + msg = ( + "pipeline must have at least one leaf — " + "pass the terminal step(s) of each branch as positional args" + ) + raise ValueError(msg) + out: dict[str, Any] = {"version": "0"} + if env is not None: + out["env"] = env + if default_image is not None: + out["default_image"] = default_image + out["steps"] = _lower_to_dicts(list(leaves)) + return out + + +def _lower_to_dicts(leaves: list[Step]) -> list[dict[str, Any]]: + """Walk back via `parent`, topo-sort, emit one dict per emitted step. + + `scratch` and `fork` nodes carry no command and are not emitted as + JSON steps; they exist only to set the `parent` of their children. + """ + ordered = _topo_collect(leaves) + keys = resolve_keys([s for s in ordered if s.cmd is not None and not s.is_wait]) + out: list[dict[str, Any]] = [] + for s in ordered: + if s.is_wait: + d: dict[str, Any] = {"type": "wait"} + if s.continue_on_failure: + d["continue_on_failure"] = True + out.append(d) + continue + if s.cmd is None: + # scratch or fork — passthrough, not emitted + continue + parent_key = _resolved_parent_key(s, keys) + d = { + "type": "command", + "key": keys[id(s)], + "cmd": s.cmd, + "builds_in": parent_key, + } + if s.label is not None: + d["label"] = s.label + if s.cache is not None: + d["cache"] = _cache_to_dict(s.cache) + if s.env is not None: + d["env"] = s.env + if s.timeout_seconds is not None: + d["timeout_seconds"] = s.timeout_seconds + if s.image is not None: + d["image"] = s.image + if s.runner is not None: + d["runner"] = s.runner + if s.runner_args is not None: + d["runner_args"] = s.runner_args + out.append(d) + return out + + +def _topo_collect(leaves: list[Step]) -> list[Step]: + """Collect every Step reachable from `leaves` via `parent`, return them + in parent-before-child order. Tiebreak by leaf order, then DFS-pre on + each leaf chain (deterministic). Wait steps are inserted in their + leaf-tuple position.""" + seen: set[int] = set() + ordered: list[Step] = [] + + for leaf in leaves: + if leaf.is_wait: + ordered.append(leaf) + continue + chain: list[Step] = [] + node: Step | None = leaf + while node is not None: + if id(node) in seen: + break + chain.append(node) + node = node.parent + # chain is leaf -> root order; reverse for parent-first. + for s in reversed(chain): + if id(s) in seen: + continue + seen.add(id(s)) + ordered.append(s) + return ordered + + +def _resolved_parent_key(s: Step, keys: dict[int, str]) -> str | None: + """Walk back through scratch/fork nodes to the nearest emitted ancestor.""" + node = s.parent + while node is not None: + if node.cmd is not None and not node.is_wait: + return keys[id(node)] + node = node.parent + return None + + +def _cache_to_dict(policy: CachePolicy) -> dict[str, Any]: + """Render a CachePolicy to its JSON-shape dict. + + Cache key resolution happens in keygen.resolve_pipeline_keys after + the pipeline structure is built. + """ + if isinstance(policy, CacheNone): + return {"policy": "none"} + if isinstance(policy, CacheForever): + return {"policy": "forever", "env_keys": list(policy.env_keys)} + if isinstance(policy, CacheTTL): + return { + "policy": "ttl", + "duration_seconds": int(policy.duration.total_seconds()), + "env_keys": list(policy.env_keys), + } + if isinstance(policy, CacheOnChange): + return {"policy": "on_change", "paths": list(policy.paths)} + if isinstance(policy, CacheCompose): + return { + "policy": "compose", + "sub_policies": [_cache_to_dict(p) for p in policy.policies], + } + msg = f"unknown CachePolicy: {type(policy).__name__}" + raise TypeError(msg) + + +from .json_emit import pipeline_to_json as _pipeline_to_json # noqa: E402 + + +def pipeline_to_json(p: dict[str, Any], **kw: Any) -> str: + """Convenience re-export so callers can do + ``harmont.pipeline_to_json(pipeline(...))`` without importing + `json_emit` directly. See `json_emit.pipeline_to_json` for kwargs.""" + return _pipeline_to_json(p, **kw) diff --git a/harmont/python.py b/harmont/python.py new file mode 100644 index 0000000..9c5396d --- /dev/null +++ b/harmont/python.py @@ -0,0 +1,141 @@ +"""Python (uv) toolchain abstraction. + +Public surface lives on the module-level singleton :data:`python`. Call +it to construct a :class:`PythonToolchain`, or use the bare-form action +methods (``python.test()``, ``python.lint()``, etc.) for a one-shot leaf. + +The chain is: + + scratch -> apt-base -> uv-install -> uv-sync -> action leaves + +The ``uv-install`` step is cached forever (keyed on the uv version baked +into the command). The ``uv-sync`` step is cached on the project's +``uv.lock`` and ``pyproject.toml``. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +from ._toolchain import make_install_chain +from .cache import CacheForever, CacheOnChange + +if TYPE_CHECKING: + from ._step import Step + +APT_PACKAGES = ("curl", "ca-certificates", "python3", "python3-venv") + +_ACTION_KWARGS = frozenset(("cache", "env", "timeout_seconds", "label", "key")) + +_VERSION_RE = re.compile(r"^([0-9]+\.[0-9]+\.[0-9]+|latest)$") + + +def _uv_install_cmd(uv_version: str) -> str: + pin = "" if uv_version == "latest" else f"UV_VERSION={uv_version} " + return ( + f"{pin}curl -LsSf https://astral.sh/uv/install.sh | sh && " + "ln -sf /root/.local/bin/uv /usr/local/bin/uv && uv --version" + ) + + +@dataclass(frozen=True) +class PythonToolchain: + path: str + installed: Step # uv-sync Step + + def _emit(self, cmd: str, default_label: str, **kw: Any) -> Step: + if kw.get("label") is None: + kw["label"] = default_label + return self.installed.sh(cmd, **kw) + + def test(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && uv run pytest", ":python: test", **kw, + ) + + def lint(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && uv run ruff check .", ":python: lint", **kw, + ) + + def fmt(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && uv run ruff format --check .", + ":python: fmt", **kw, + ) + + def typecheck(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && uv run mypy .", ":python: typecheck", **kw, + ) + + +def _make_python( + *, + path: str = ".", + uv_version: str = "latest", + image: str | None = None, + base: Step | None = None, +) -> PythonToolchain: + if not _VERSION_RE.match(uv_version): + msg = ( + f"hm.python: invalid uv_version {uv_version!r}\n" + ' → use "latest" or a pinned version like "0.4.18"' + ) + raise ValueError(msg) + uv_installed = make_install_chain( + apt_packages=APT_PACKAGES, + install_cmd=_uv_install_cmd(uv_version), + install_cache=CacheForever(env_keys=()), + lang_tag="python", + install_tag="uv-install", + image=image, + base=base, + ) + # `--all-extras` pulls every `[project.optional-dependencies]` + # group declared in pyproject.toml. This matters because the + # action surface (`.lint()`, `.fmt()`, `.typecheck()`, `.test()`) + # depends on tools like `ruff`, `mypy`, `pytest` that authors + # almost always declare under an `[optional-dependencies] dev` + # extra rather than as runtime deps. Without `--all-extras`, + # `uv sync` only installs runtime deps and every action step + # fails with `Failed to spawn: : No such file or directory`. + synced = uv_installed.sh( + f"cd {path} && uv sync --all-extras", + label=":python: uv-sync", + cache=CacheOnChange(paths=(f"{path}/uv.lock", f"{path}/pyproject.toml")), + ) + return PythonToolchain(path=path, installed=synced) + + +class _PythonEntry: + def __call__( + self, + *, + path: str = ".", + uv_version: str = "latest", + image: str | None = None, + base: Step | None = None, + ) -> PythonToolchain: + return _make_python(path=path, uv_version=uv_version, image=image, base=base) + + def test(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).test(**action_kw) + + def lint(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).lint(**action_kw) + + def fmt(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).fmt(**action_kw) + + def typecheck(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).typecheck(**action_kw) + + +python = _PythonEntry() diff --git a/harmont/ruby.py b/harmont/ruby.py new file mode 100644 index 0000000..55f05be --- /dev/null +++ b/harmont/ruby.py @@ -0,0 +1,108 @@ +"""Ruby toolchain abstraction. + +Chain: scratch -> apt-base (ruby-full, build-essential, git) -> +bundler-install (gem install bundler, cached forever) -> +bundle-deps (cached on Gemfile.lock) -> action leaves. + +The ``version`` parameter is validated as ``"default" | "X.Y" | "X.Y.Z"``; +``"default"`` installs whichever ruby-full ships in the apt repository. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +from ._toolchain import make_install_chain +from .cache import CacheForever, CacheOnChange + +if TYPE_CHECKING: + from ._step import Step + +APT_PACKAGES = ("ruby-full", "build-essential", "git") + +_ACTION_KWARGS = frozenset(("cache", "env", "timeout_seconds", "label", "key")) + +_VERSION_RE = re.compile(r"^(default|[0-9]+\.[0-9]+(\.[0-9]+)?)$") + + +@dataclass(frozen=True) +class RubyProject: + path: str + installed: Step + + def _emit(self, cmd: str, default_label: str, **kw: Any) -> Step: + if kw.get("label") is None: + kw["label"] = default_label + return self.installed.sh(cmd, **kw) + + def test(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && bundle exec rspec", ":ruby: test", **kw, + ) + + def lint(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && bundle exec rubocop", ":ruby: lint", **kw, + ) + + +def _make_ruby( + *, + path: str = ".", + version: str = "default", + image: str | None = None, + base: Step | None = None, +) -> RubyProject: + if not _VERSION_RE.match(version): + msg = ( + f"hm.ruby: invalid version {version!r}\n" + ' → use "default" (apt) or a version like "3.2.2"' + ) + raise ValueError(msg) + if version != "default": + msg = ( + f"hm.ruby: pinned ruby version {version!r} not yet wired in\n" + ' → use version="default" (apt ruby-full); pinned versions need' + " rbenv/asdf support, which is not implemented yet" + ) + raise NotImplementedError(msg) + bundler_installed = make_install_chain( + apt_packages=APT_PACKAGES, + install_cmd="gem install bundler && bundle --version", + install_cache=CacheForever(env_keys=()), + lang_tag="ruby", + install_tag="bundler", + image=image, + base=base, + ) + deps = bundler_installed.sh( + f"cd {path} && bundle install", + label=":ruby: deps", + cache=CacheOnChange(paths=(f"{path}/Gemfile.lock",)), + ) + return RubyProject(path=path, installed=deps) + + +class _RubyEntry: + def __call__( + self, + *, + path: str = ".", + version: str = "default", + image: str | None = None, + base: Step | None = None, + ) -> RubyProject: + return _make_ruby(path=path, version=version, image=image, base=base) + + def test(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).test(**action_kw) + + def lint(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).lint(**action_kw) + + +ruby = _RubyEntry() diff --git a/harmont/rust.py b/harmont/rust.py new file mode 100644 index 0000000..910bd57 --- /dev/null +++ b/harmont/rust.py @@ -0,0 +1,139 @@ +"""Rust toolchain abstraction (HAR-15). + +Public surface lives on the module-level singleton :data:`rust`. Call it +to construct a :class:`RustToolchain`, or use the bare-form action +methods (``rust.build()``, ``rust.test()``, etc.) for a one-shot leaf. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +from ._toolchain import make_install_chain +from .cache import CacheForever + +if TYPE_CHECKING: + from ._step import Step + +APT_PACKAGES = ( + "curl", "ca-certificates", "build-essential", "pkg-config", "libssl-dev", +) + +_ACTION_KWARGS = frozenset(("cache", "env", "timeout_seconds", "label", "key")) + +_VERSION_RE = re.compile(r"^[a-z0-9.-]+$") + + +def _rustup_cmd(version: str, components: tuple[str, ...]) -> str: + comps = ",".join(components) + return ( + "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | " + f"sh -s -- -y --default-toolchain {version} --profile minimal " + f"--component {comps} && . $HOME/.cargo/env && " + "rustc --version && cargo --version" + ) + + +@dataclass(frozen=True) +class RustToolchain: + """Constructed via :func:`rust` (the ``hm.rust`` singleton).""" + + path: str + installed: Step + + def _wrap(self, cargo: str) -> str: + return f". $HOME/.cargo/env && cd {self.path} && {cargo}" + + def _emit(self, cargo: str, default_label: str, **kw: Any) -> Step: + if kw.get("label") is None: + kw["label"] = default_label + return self.installed.sh(self._wrap(cargo), **kw) + + def build(self, *, release: bool = False, **kw: Any) -> Step: + flag = " --release" if release else "" + return self._emit(f"cargo build{flag}", ":rust: build", **kw) + + def test(self, *, release: bool = False, **kw: Any) -> Step: + flag = " --release" if release else "" + return self._emit(f"cargo test{flag}", ":rust: test", **kw) + + def clippy(self, **kw: Any) -> Step: + return self._emit( + "cargo clippy --all-targets -- -D warnings", ":rust: clippy", **kw, + ) + + def fmt(self, **kw: Any) -> Step: + return self._emit("cargo fmt --check", ":rust: fmt", **kw) + + def doc(self, **kw: Any) -> Step: + return self._emit("cargo doc --no-deps", ":rust: doc", **kw) + + +def _make_rust( + *, + path: str = ".", + version: str = "stable", + image: str | None = None, + components: tuple[str, ...] = ("clippy", "rustfmt"), + base: Step | None = None, +) -> RustToolchain: + if not _VERSION_RE.match(version): + msg = ( + f"hm.rust: invalid version {version!r}\n" + ' → use a rustup channel name (e.g. "stable") or a ' + 'pinned version (e.g. "1.81.0")' + ) + raise ValueError(msg) + installed = make_install_chain( + apt_packages=APT_PACKAGES, + install_cmd=_rustup_cmd(version, components), + install_cache=CacheForever(env_keys=()), + lang_tag="rust", + install_tag="rustup", + image=image, + base=base, + ) + return RustToolchain(path=path, installed=installed) + + +class _RustEntry: + """Callable singleton — supports both object form and bare form.""" + + def __call__( + self, + *, + path: str = ".", + version: str = "stable", + image: str | None = None, + components: tuple[str, ...] = ("clippy", "rustfmt"), + base: Step | None = None, + ) -> RustToolchain: + return _make_rust( + path=path, version=version, image=image, + components=components, base=base, + ) + + def build(self, *, release: bool = False, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).build(release=release, **action_kw) + + def test(self, *, release: bool = False, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).test(release=release, **action_kw) + + def clippy(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).clippy(**action_kw) + + def fmt(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).fmt(**action_kw) + + def doc(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self(**kw).doc(**action_kw) + + +rust = _RustEntry() diff --git a/harmont/triggers.py b/harmont/triggers.py new file mode 100644 index 0000000..224982b --- /dev/null +++ b/harmont/triggers.py @@ -0,0 +1,135 @@ +"""Trigger DSL constructors and types. + +Three triggers in v1: push, pull_request, schedule. Each constructor +returns a frozen dataclass with a ``to_dict()`` method that produces the +wire-format JSON object documented in +docs/superpowers/specs/2026-05-10-har-9-imperfect-dsl-design.md. +""" +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from croniter import croniter + + +def _normalise_globs(value: str | list[str] | tuple[str, ...] | None) -> tuple[str, ...] | None: + if value is None: + return None + if isinstance(value, str): + return (value,) + return tuple(value) + + +@dataclass(frozen=True) +class Trigger: + """Base class. Concrete subclasses override ``to_dict``.""" + + def to_dict(self) -> dict[str, Any]: + raise NotImplementedError + + +@dataclass(frozen=True) +class PushTrigger(Trigger): + branches: tuple[str, ...] | None + tags: tuple[str, ...] | None + + def to_dict(self) -> dict[str, Any]: + out: dict[str, Any] = {"event": "push"} + if self.branches is not None: + out["branches"] = list(self.branches) + if self.tags is not None: + out["tags"] = list(self.tags) + return out + + +def push( + branch: str | list[str] | tuple[str, ...] | None = None, + tag: str | list[str] | tuple[str, ...] | None = None, +) -> PushTrigger: + """Trigger on a git push. + + Pass exactly one of ``branch`` or ``tag``. Each is a glob or list + of globs (``*`` matches any chars including ``/``; ``?`` matches one + char). + """ + if (branch is None) == (tag is None): + msg = ( + "hm.push: pass exactly one of branch or tag\n" + ' → e.g. hm.push(branch="main") or hm.push(tag="v*")' + ) + raise ValueError(msg) + return PushTrigger( + branches=_normalise_globs(branch), + tags=_normalise_globs(tag), + ) + + +_PR_TYPES = frozenset( + {"opened", "synchronize", "reopened", "closed", "ready_for_review"} +) +_DEFAULT_PR_TYPES = ("opened", "synchronize", "reopened") + + +@dataclass(frozen=True) +class PullRequestTrigger(Trigger): + branches: tuple[str, ...] | None + types: tuple[str, ...] + + def to_dict(self) -> dict[str, Any]: + out: dict[str, Any] = {"event": "pull_request"} + if self.branches is not None: + out["branches"] = list(self.branches) + out["types"] = list(self.types) + return out + + +def pull_request( + branches: str | list[str] | tuple[str, ...] | None = None, + types: list[str] | tuple[str, ...] | None = None, +) -> PullRequestTrigger: + """Trigger on a GitHub pull_request event. + + ``branches`` filters by the PR's *target* branch. ``types`` selects + PR-action keywords; defaults to opened/synchronize/reopened (mirrors + GHA). + """ + resolved_types = tuple(types) if types is not None else _DEFAULT_PR_TYPES + if not resolved_types: + msg = "hm.pull_request: types must be non-empty" + raise ValueError(msg) + bad = [t for t in resolved_types if t not in _PR_TYPES] + if bad: + valid = ", ".join(sorted(_PR_TYPES)) + msg = ( + f"unknown pull_request type {bad[0]!r}\n" + f" → valid: {valid}" + ) + raise ValueError(msg) + return PullRequestTrigger( + branches=_normalise_globs(branches), + types=resolved_types, + ) + + +@dataclass(frozen=True) +class ScheduleTrigger(Trigger): + cron: str + + def to_dict(self) -> dict[str, Any]: + return {"event": "schedule", "cron": self.cron} + + +def schedule(cron: str) -> ScheduleTrigger: + """Trigger on a UTC cron schedule. + + ``cron`` is a five-field crontab expression (minute hour day month + dow). Always interpreted as UTC. + """ + if not croniter.is_valid(cron): + msg = ( + f"hm.schedule: invalid cron expression {cron!r}\n" + f" → five-field crontab, UTC, e.g. '0 4 * * *'" + ) + raise ValueError(msg) + return ScheduleTrigger(cron=cron) diff --git a/harmont/types.py b/harmont/types.py new file mode 100644 index 0000000..c8ae2fe --- /dev/null +++ b/harmont/types.py @@ -0,0 +1,12 @@ +"""Type aliases for the chain DSL.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Union + +if TYPE_CHECKING: + from ._step import Step + +EnvVars = dict[str, str] + +Pipeline = Union["Step", "tuple[Step, ...]"] diff --git a/harmont/zig.py b/harmont/zig.py new file mode 100644 index 0000000..dd0e9b1 --- /dev/null +++ b/harmont/zig.py @@ -0,0 +1,172 @@ +"""Zig toolchain abstraction. + +Chain: scratch -> apt-base (curl, xz-utils, ca-certificates) -> zig-install +(download tarball from ziglang.org, extract to /usr/local/zig) -> action +leaves. + +Two entry shapes: + + hm.zig(path=".") # one-shot: returns ZigProject directly + hm.zig() # multi-project: returns ZigToolchain + tc.project(path="lib-a") # spawn one ZigProject per subdir + +The toolchain form holds the shared zig-install Step. Two .project() +calls reuse it, so the emitted v0 IR contains a single :zig: install +node with N project chains fanning out from it. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, overload + +from ._toolchain import make_install_chain +from .cache import CacheForever + +if TYPE_CHECKING: + from ._step import Step + +APT_PACKAGES = ("curl", "ca-certificates", "xz-utils") + +_ACTION_KWARGS = frozenset(("cache", "env", "timeout_seconds", "label", "key")) + +_VERSION_RE = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+$") + + +def _zig_install_cmd(version: str) -> str: + tarball = f"zig-linux-x86_64-{version}.tar.xz" + url = f"https://ziglang.org/download/{version}/{tarball}" + return ( + f"curl -fsSL {url} -o /tmp/zig.tar.xz && " + "rm -rf /usr/local/zig && mkdir -p /usr/local/zig && " + "tar -xJf /tmp/zig.tar.xz -C /usr/local/zig --strip-components=1 && " + "ln -sf /usr/local/zig/zig /usr/local/bin/zig && zig version" + ) + + +@dataclass(frozen=True) +class ZigProject: + path: str + installed: Step + + def _emit(self, cmd: str, default_label: str, **kw: Any) -> Step: + if kw.get("label") is None: + kw["label"] = default_label + return self.installed.sh(cmd, **kw) + + def build(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && zig build", + f":zig: {self.path} build", **kw, + ) + + def test(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && zig build test", + f":zig: {self.path} test", **kw, + ) + + def fmt(self, **kw: Any) -> Step: + return self._emit( + f"cd {self.path} && zig fmt --check .", + f":zig: {self.path} fmt", **kw, + ) + + +@dataclass(frozen=True) +class ZigToolchain: + """Constructed via :func:`zig` when no ``path`` is supplied. + + Holds the shared zig-install Step. Spawn one :class:`ZigProject` + per subdir via :meth:`project`; all projects from one toolchain + share the same install Step, so the emitted IR contains a single + :zig: install node fanned out to N project chains. + """ + + version: str + installed: Step + + def project(self, path: str = ".") -> ZigProject: + return ZigProject(path=path, installed=self.installed) + + +def _make_toolchain( + *, + version: str, + image: str | None, + base: Step | None, +) -> ZigToolchain: + if not _VERSION_RE.match(version): + msg = ( + f"hm.zig: invalid version {version!r}\n" + ' → use a Zig version like "0.13.0"' + ) + raise ValueError(msg) + installed = make_install_chain( + apt_packages=APT_PACKAGES, + install_cmd=_zig_install_cmd(version), + install_cache=CacheForever(env_keys=()), + lang_tag="zig", + install_tag="install", + image=image, + base=base, + ) + return ZigToolchain(version=version, installed=installed) + + +class _ZigEntry: + """Callable singleton — supports object form, toolchain form, and bare form.""" + + @overload + def __call__( + self, + *, + version: str = ..., + image: str | None = ..., + base: Step | None = ..., + ) -> ZigToolchain: ... + + @overload + def __call__( + self, + *, + path: str, + version: str = ..., + image: str | None = ..., + base: Step | None = ..., + ) -> ZigProject: ... + + def __call__( + self, + *, + path: str | None = None, + version: str = "0.13.0", + image: str | None = None, + base: Step | None = None, + ) -> ZigToolchain | ZigProject: + toolchain = _make_toolchain(version=version, image=image, base=base) + if path is None: + return toolchain + return toolchain.project(path) + + def _project(self, **kw: Any) -> ZigProject: + path = kw.pop("path", ".") + proj = self(path=path, **kw) + assert isinstance(proj, ZigProject) # noqa: S101 — narrow overload result + return proj + + def build(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self._project(**kw).build(**action_kw) + + def test(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self._project(**kw).test(**action_kw) + + def fmt(self, **kw: Any) -> Step: + action_kw = {k: kw.pop(k) for k in list(kw) if k in _ACTION_KWARGS} + return self._project(**kw).fmt(**action_kw) + + +zig = _ZigEntry() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..459cab0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,88 @@ +[build-system] +requires = ["setuptools>=68.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "harmont" +version = "0.0.0-dev" +description = "Python DSL for Harmont CI pipelines — emits v0 IR JSON" +license = "MIT" +license-files = ["LICENSE"] +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "croniter>=1.4,<3", +] + +[project.urls] +Repository = "https://github.com/harmont-dev/harmont-py" +Homepage = "https://harmont.dev" + +[project.optional-dependencies] +dev = [ + "pytest>=7.4", + "pytest-cov>=4.1", + "mypy>=1.8", + "ruff>=0.2", +] + +[tool.setuptools.packages.find] +include = ["harmont*"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +filterwarnings = ["error"] +markers = [] + +[tool.mypy] +strict = true +packages = ["harmont"] + +[[tool.mypy.overrides]] +module = "croniter.*" +ignore_missing_imports = true + +[tool.ruff] +target-version = "py311" +line-length = 99 + +[tool.ruff.lint] +ignore = [] +select = [ + "E", # pycodestyle errors + "F", # pyflakes + "I", # isort + "N", # pep8-naming + "UP", # pyupgrade + "B", # flake8-bugbear + "SIM", # flake8-simplify + "RUF", # ruff-specific rules + "S", # flake8-bandit (security) + "A", # flake8-builtins + "C4", # flake8-comprehensions + "PIE", # flake8-pie + "PT", # flake8-pytest-style + "RET", # flake8-return + "ARG", # flake8-unused-arguments + "T20", # flake8-print + "TID", # flake8-tidy-imports + "ERA", # eradicate (commented-out code) + "TRY", # tryceratops (exception anti-patterns) + "PERF", # perflint + "FBT", # flake8-boolean-trap (boolean positional args) + "DTZ", # flake8-datetimez (timezone-aware datetimes) + "EM", # flake8-errmsg (exception message extracted to var) + "G", # flake8-logging-format (logging f-strings forbidden) + "Q", # flake8-quotes (consistent quote style) + "SLF", # flake8-self (no private member access from outside) + "TCH", # flake8-type-checking (TYPE_CHECKING blocks) + "LOG", # flake8-logging (logger usage) + "RSE", # flake8-raise (no extra parens around raise) + "ISC", # flake8-implicit-str-concat + "PYI", # flake8-pyi (.pyi stubs; harmless if absent) + "INT", # flake8-gettext +] + +[tool.ruff.lint.per-file-ignores] +# Test files are allowed to use `assert` (S101) and don't need docstrings. +"tests/**" = ["S101", "ARG001"] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..fdd4d07 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,20 @@ +"""Shared pytest fixtures for cidsl/py tests. + +The :func:`_chdir_to_repo_root` autouse fixture anchors every test's +working directory at the repo root so that toolchain abstractions +which glob the filesystem at construction time +(e.g. :func:`harmont.haskell.HaskellToolchain.package`) resolve real +files in ``api/``, ``freestyle/``, ``app/``, etc. +""" +from __future__ import annotations + +from pathlib import Path + +import pytest + +_REPO_ROOT = Path(__file__).resolve().parents[3] + + +@pytest.fixture(autouse=True) +def _chdir_to_repo_root(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.chdir(_REPO_ROOT) diff --git a/tests/dev/__init__.py b/tests/dev/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/dev/conftest.py b/tests/dev/conftest.py new file mode 100644 index 0000000..ba961a6 --- /dev/null +++ b/tests/dev/conftest.py @@ -0,0 +1,20 @@ +"""Per-test reset of every registry the deploy DSL touches.""" +from __future__ import annotations + +import pytest + +from harmont._deploy import DEPLOYMENTS +from harmont._registry import clear_registry +from harmont._target import clear_target_cache + + +@pytest.fixture(autouse=True) +def _reset_registries(): + """Clear every module-level registry before each test so order is irrelevant.""" + DEPLOYMENTS.clear() + clear_registry() + clear_target_cache() + yield + DEPLOYMENTS.clear() + clear_registry() + clear_target_cache() diff --git a/tests/dev/test_canonical_example.py b/tests/dev/test_canonical_example.py new file mode 100644 index 0000000..f94ba04 --- /dev/null +++ b/tests/dev/test_canonical_example.py @@ -0,0 +1,47 @@ +"""End-to-end test mirroring the spec's canonical hello+greeter example. + +The deployments both use Python's stdlib `http.server` (no third-party +image dependency), which is the smallest practical "native language +facility" demonstration of an HTTP server in a harmont deployment. +""" +from __future__ import annotations + +import json +from typing import TYPE_CHECKING + +import harmont as hm + +if TYPE_CHECKING: + from pathlib import Path + + +def test_canonical_hello_greeter_dumps_expected_shape(tmp_path: Path) -> None: + @hm.deploy("hello") + def hello() -> hm.Deployment: + return hm.dev.deploy( + image="python:3.12-alpine", + cmd=["python", "-m", "http.server", "5678"], + port_mapping={5678: hm.dev.port()}, + ) + + @hm.deploy("greeter") + def greeter(hello: hm.Dep[hm.Deployment]) -> hm.Deployment: + return hm.dev.deploy( + image="python:3.12-alpine", + cmd=["python", "-m", "http.server", "5678"], + port_mapping={5678: hm.dev.port()}, + env={"HELLO_HOST": hello.name}, + ) + + raw = hm.dev.dump_registry_json(worktree_root=tmp_path) + out = json.loads(raw) + assert out["schema_version"] == "0" + assert list(out["deployments"].keys()) == ["hello", "greeter"] + assert out["deployments"]["greeter"]["deps"] == ["hello"] + assert out["deployments"]["hello"]["image"] == "python:3.12-alpine" + assert out["deployments"]["hello"]["cmd"] == [ + "python", "-m", "http.server", "5678", + ] + assert out["deployments"]["greeter"]["env"] == {"HELLO_HOST": "hello"} + assert out["deployments"]["hello"]["from"] is None + assert out["deployments"]["greeter"]["from"] is None diff --git a/tests/dev/test_decorator.py b/tests/dev/test_decorator.py new file mode 100644 index 0000000..1036d21 --- /dev/null +++ b/tests/dev/test_decorator.py @@ -0,0 +1,98 @@ +"""@hm.deploy decorator: registration, slug derivation, fixture injection.""" +from __future__ import annotations + +import pytest + +import harmont as hm +from harmont._deploy import DEPLOYMENTS +from harmont.dev import LocalDeployment + + +def test_deploy_registers_under_explicit_slug(): + @hm.deploy("db") + def db(): + return hm.dev.deploy(image="postgres:16", port_mapping={5432: hm.dev.port()}) + + assert "db" in DEPLOYMENTS + resolved = DEPLOYMENTS["db"]() + assert isinstance(resolved, LocalDeployment) + assert resolved.name == "db" # decorator stamped slug in + assert resolved.image == "postgres:16" + + +def test_deploy_uses_function_name_when_slug_omitted(): + @hm.deploy() + def redis(): + return hm.dev.deploy(image="redis:7", port_mapping={6379: hm.dev.port()}) + + assert "redis" in DEPLOYMENTS + + +def test_deploy_rejects_invalid_slug(): + with pytest.raises(ValueError, match="invalid deployment slug"): + @hm.deploy("Bad Slug") + def x(): + return hm.dev.deploy(image="x", port_mapping={5432: hm.dev.port()}) + + +def test_deploy_rejects_duplicate_slug(): + @hm.deploy("db") + def db1(): + return hm.dev.deploy(image="postgres:16", port_mapping={5432: hm.dev.port()}) + + with pytest.raises(ValueError, match="duplicate deployment slug"): + @hm.deploy("db") + def db2(): + return hm.dev.deploy(image="postgres:15", port_mapping={5432: hm.dev.port()}) + + +def test_deploy_requires_marker_on_param(): + # validate_target_signature (the shared validator used by @hm.target, + # @hm.pipeline, and @hm.deploy) raises TypeError for unmarkered params. + with pytest.raises(TypeError, match=r"parameter 'db' has no marker"): + @hm.deploy("api") + def api(db): # type: ignore[no-untyped-def] + return hm.dev.deploy(image="x", port_mapping={8000: hm.dev.port()}) + + +def test_deploy_injects_dep_value(): + @hm.deploy("db") + def db(): + return hm.dev.deploy(image="postgres:16", port_mapping={5432: hm.dev.port()}) + + @hm.deploy("api") + def api(db: hm.Dep[hm.Deployment]): + # db.name comes from the resolved upstream Deployment + return hm.dev.deploy( + image="x", + port_mapping={8000: hm.dev.port()}, + env={"DB_HOST": db.name}, + ) + + resolved = DEPLOYMENTS["api"]() + assert resolved.env["DB_HOST"] == "db" + + +def test_deploy_with_explicit_name_arg(): + @hm.deploy("db", name="primary-db") + def db(): + return hm.dev.deploy(image="postgres:16", port_mapping={5432: hm.dev.port()}) + + # The display name is held alongside the slug; the registry is keyed by slug. + assert "db" in DEPLOYMENTS + # In v1 we don't expose `name` separately on the returned Deployment; + # the slug IS the public identity. The kwarg is reserved for future use. + + +def test_deploy_function_can_return_remote_driver_value(): + # Simulate a future driver: a function that returns a Deployment with + # driver != "local". The decorator must register it without complaint. + from harmont._deploy import Deployment + + @hm.deploy("prod-api") + def prod_api(): + return Deployment(name="", driver="aws") + + resolved = DEPLOYMENTS["prod-api"]() + assert resolved.driver == "aws" + assert resolved.name == "prod-api" diff --git a/tests/dev/test_dep_marker.py b/tests/dev/test_dep_marker.py new file mode 100644 index 0000000..21004ec --- /dev/null +++ b/tests/dev/test_dep_marker.py @@ -0,0 +1,42 @@ +"""hm.Dep[T] marker is detected; call_with_deps resolves it from DEPLOYMENTS.""" +from __future__ import annotations + +import pytest + +from harmont import Dep +from harmont._deploy import DEPLOYMENTS, Deployment +from harmont._deps import call_with_deps +from harmont._typing import _DepMarker + + +def test_dep_marker_alias_subscripts_to_annotated(): + # Dep is PEP-593 Annotated[T, _DEP_MARKER]; subscripting works at + # both static and runtime levels. + from typing import get_args, get_origin + + T = Dep[Deployment] # noqa: N806 + assert get_origin(T) is not None + args = get_args(T) + assert args[0] is Deployment + assert isinstance(args[1], _DepMarker) + + +def test_call_with_deps_resolves_dep_param_from_DEPLOYMENTS(): # noqa: N802 + # Register a fake deployment under the name "db". + DEPLOYMENTS["db"] = lambda: Deployment(name="db", driver="local") + + def consumer(db: Dep[Deployment]) -> Deployment: + return db + + result = call_with_deps(consumer) + assert isinstance(result, Deployment) + assert result.name == "db" + + +def test_call_with_deps_raises_when_dep_unknown(): + def consumer(redis: Dep[Deployment]) -> Deployment: + return redis + + # Matches the Target precedent: TypeError + "hm: 'name' not found". + with pytest.raises(TypeError, match="hm: deployment 'redis' not found"): + call_with_deps(consumer) diff --git a/tests/dev/test_deploy_factory.py b/tests/dev/test_deploy_factory.py new file mode 100644 index 0000000..3bc9905 --- /dev/null +++ b/tests/dev/test_deploy_factory.py @@ -0,0 +1,77 @@ +"""hm.dev.deploy(...) field validation + LocalDeployment construction.""" +from __future__ import annotations + +import pytest + +from harmont._step import scratch +from harmont.dev import LocalDeployment, deploy, port + + +def test_deploy_with_raw_image_returns_local_deployment(): + d = deploy( + image="postgres:16", + port_mapping={5432: port()}, + env={"POSTGRES_PASSWORD": "dev"}, + ) + assert isinstance(d, LocalDeployment) + assert d.image == "postgres:16" + assert d.from_step is None + # name is set later by the @hm.deploy decorator; factory leaves it "" + assert d.name == "" + + +def test_deploy_with_from_step(): + s = scratch().sh("echo build", image="alpine:3.20") + d = deploy(from_=s, port_mapping={8000: port()}) + assert d.image is None + assert d.from_step is s + + +def test_deploy_requires_exactly_one_of_image_or_from(): + with pytest.raises(ValueError, match="exactly one of `image=` or `from_=`"): + deploy(port_mapping={5432: port()}) + with pytest.raises(ValueError, match="exactly one of `image=` or `from_=`"): + deploy(image="x", from_=scratch().sh("echo"), port_mapping={5432: port()}) + + +def test_port_mapping_keys_must_be_valid_container_ports(): + with pytest.raises(ValueError, match="port_mapping key must be int in"): + deploy(image="x", port_mapping={0: port()}) + with pytest.raises(ValueError, match="port_mapping key must be int in"): + deploy(image="x", port_mapping={70000: port()}) + with pytest.raises(ValueError, match="port_mapping key must be int in"): + deploy(image="x", port_mapping={"5432": port()}) # type: ignore[dict-item] + + +def test_port_mapping_values_must_be_hm_dev_port(): + with pytest.raises(TypeError, match=r"port_mapping value must be hm\.dev\.port"): + deploy(image="x", port_mapping={5432: 31337}) # type: ignore[dict-item] + + +def test_env_values_must_be_strings(): + with pytest.raises(TypeError, match="env value for 'PORT' must be str"): + deploy(image="x", port_mapping={5432: port()}, env={"PORT": 31337}) # type: ignore[dict-item] + + +def test_cmd_coerces_to_tuple_of_strings(): + d = deploy( + image="x", port_mapping={5432: port()}, cmd=["postgres", "-c", "shared_buffers=128MB"] + ) + assert d.cmd == ("postgres", "-c", "shared_buffers=128MB") + + +def test_cmd_rejects_non_string_elements(): + with pytest.raises(TypeError, match="cmd elements must be str"): + deploy(image="x", port_mapping={5432: port()}, cmd=["postgres", 5432]) # type: ignore[list-item] + + +def test_volumes_preserves_host_path_verbatim(): + # The factory keeps host paths verbatim; resolution to absolute + # worktree paths happens in _registry_dump.py. + d = deploy(image="x", port_mapping={5432: port()}, volumes={".": "/workspace"}) + assert dict(d.volumes) == {".": "/workspace"} + + +def test_workdir_must_be_absolute(): + with pytest.raises(ValueError, match="workdir must be an absolute path"): + deploy(image="x", port_mapping={5432: port()}, workdir="workspace") diff --git a/tests/dev/test_dump_cli.py b/tests/dev/test_dump_cli.py new file mode 100644 index 0000000..f09386e --- /dev/null +++ b/tests/dev/test_dump_cli.py @@ -0,0 +1,60 @@ +"""`python -m harmont.dev --dump-registry` integration.""" +from __future__ import annotations + +import json +import subprocess +import sys +import textwrap +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import Path + + +def test_dump_cli_walks_harmont_dir_and_prints_registry(tmp_path: Path): + pkg = tmp_path / ".harmont" + pkg.mkdir() + (pkg / "deploys.py").write_text(textwrap.dedent(""" + import harmont as hm + + @hm.deploy("db") + def db(): + return hm.dev.deploy( + image="postgres:16", + port_mapping={5432: hm.dev.port()}, + env={"POSTGRES_PASSWORD": "dev"}, + ) + """)) + result = subprocess.run( + [sys.executable, "-m", "harmont.dev", "--dump-registry"], + cwd=tmp_path, + capture_output=True, + text=True, + check=True, + ) + out = json.loads(result.stdout) + assert out["schema_version"] == "0" + assert out["worktree"] == str(tmp_path) + assert "db" in out["deployments"] + assert out["deployments"]["db"]["image"] == "postgres:16" + + +def test_dump_cli_errors_when_no_harmont_dir(tmp_path: Path): + result = subprocess.run( + [sys.executable, "-m", "harmont.dev", "--dump-registry"], + cwd=tmp_path, + capture_output=True, + text=True, + ) + assert result.returncode != 0 + assert "no .harmont/ directory" in result.stderr + + +def test_dump_cli_errors_on_bad_argument(tmp_path: Path): + result = subprocess.run( + [sys.executable, "-m", "harmont.dev", "--no-such-flag"], + cwd=tmp_path, + capture_output=True, + text=True, + ) + assert result.returncode == 2 # argparse default diff --git a/tests/dev/test_local_deployment.py b/tests/dev/test_local_deployment.py new file mode 100644 index 0000000..0c24ef0 --- /dev/null +++ b/tests/dev/test_local_deployment.py @@ -0,0 +1,78 @@ +"""Abstract Deployment + LocalDeployment construction tests.""" +from __future__ import annotations + +from collections.abc import Mapping + +import pytest + +from harmont._deploy import Deployment +from harmont._step import scratch +from harmont.dev import port +from harmont.dev._deployment import LocalDeployment +from harmont.dev._port import _PortSentinel + + +def test_deployment_is_abstract_dataclass(): + """Deployment carries name + driver, is frozen, and is constructible (sentinel-level).""" + d = Deployment(name="db", driver="local") + assert d.name == "db" + assert d.driver == "local" + with pytest.raises(AttributeError): + d.name = "other" # type: ignore[misc] # frozen + + +# --------------------------------------------------------------------------- +# Task 3: LocalDeployment tests +# --------------------------------------------------------------------------- + + +def test_local_deployment_is_a_deployment_with_driver_local(): + d = LocalDeployment( + name="db", + driver="local", + image="postgres:16", + from_step=None, + cmd=None, + port_mapping={5432: port()}, + env={}, + volumes={}, + workdir=None, + ) + assert isinstance(d, Deployment) + assert d.driver == "local" + assert d.image == "postgres:16" + + +def test_local_deployment_rejects_non_local_driver(): + with pytest.raises(ValueError, match="driver must be 'local'"): + LocalDeployment( + name="db", driver="aws", + image="postgres:16", from_step=None, cmd=None, + port_mapping={5432: port()}, + env={}, volumes={}, workdir=None, + ) + + +def test_local_deployment_holds_step_chain(): + s = scratch().sh("echo hi", image="alpine:3.20") + d = LocalDeployment( + name="api", driver="local", + image=None, from_step=s, cmd=None, + port_mapping={8000: port()}, + env={}, volumes={}, workdir=None, + ) + assert d.from_step is s + assert d.image is None + + +def test_port_mapping_is_a_mapping_of_int_to_port_sentinel(): + d = LocalDeployment( + name="db", driver="local", + image="postgres:16", from_step=None, cmd=None, + port_mapping={5432: port()}, + env={}, volumes={}, workdir=None, + ) + assert isinstance(d.port_mapping, Mapping) + [(cport, sentinel)] = d.port_mapping.items() + assert cport == 5432 + assert isinstance(sentinel, _PortSentinel) diff --git a/tests/dev/test_port_sentinel.py b/tests/dev/test_port_sentinel.py new file mode 100644 index 0000000..6ef994f --- /dev/null +++ b/tests/dev/test_port_sentinel.py @@ -0,0 +1,22 @@ +"""hm.dev.port() sentinel: equality, repr, and structural use.""" +from __future__ import annotations + +from harmont.dev import port + + +def test_port_returns_sentinel_singleton(): + a = port() + b = port() + assert a is b # singleton — equality-by-identity is fine + assert a == b + + +def test_port_repr_is_stable_and_introspectable(): + assert repr(port()) == "" + + +def test_port_is_hashable(): + # frozen LocalDeployment uses port_mapping values inside a Mapping; + # being hashable means user code can put it in sets / tuple keys + # without surprise. + assert {port(): 1}[port()] == 1 diff --git a/tests/dev/test_registry_dump.py b/tests/dev/test_registry_dump.py new file mode 100644 index 0000000..9aaa4af --- /dev/null +++ b/tests/dev/test_registry_dump.py @@ -0,0 +1,93 @@ +"""dump_registry_json — golden JSON shape for canonical examples.""" +from __future__ import annotations + +import json +from pathlib import Path + +import harmont as hm +from harmont._deploy import Deployment +from harmont.dev import dump_registry_json + + +def test_dump_minimal_local_deployment(): + @hm.deploy("db") + def db(): + return hm.dev.deploy( + image="postgres:16", + port_mapping={5432: hm.dev.port()}, + env={"POSTGRES_PASSWORD": "dev"}, + ) + + out = json.loads(dump_registry_json(worktree_root=Path("/tmp/wt"))) # noqa: S108 + assert out["schema_version"] == "0" + assert out["worktree"] == "/tmp/wt" # noqa: S108 + assert out["deployments"]["db"] == { + "driver": "local", + "image": "postgres:16", + "from": None, + "cmd": None, + "port_mapping": {"5432": "__hm_dev_port__"}, + "env": {"POSTGRES_PASSWORD": "dev"}, + "volumes": {}, + "workdir": None, + "deps": [], + } + + +def test_dump_with_cmd_workdir_volumes(): + @hm.deploy("db") + def db(): + return hm.dev.deploy( + image="postgres:16", + cmd=["postgres", "-c", "shared_buffers=128MB"], + port_mapping={5432: hm.dev.port()}, + volumes={".": "/workspace"}, + workdir="/workspace", + ) + + out = json.loads(dump_registry_json(worktree_root=Path("/tmp/wt"))) # noqa: S108 + e = out["deployments"]["db"] + assert e["cmd"] == ["postgres", "-c", "shared_buffers=128MB"] + assert e["workdir"] == "/workspace" + assert e["volumes"] == {".": "/workspace"} + + +def test_dump_with_deps_emits_deps_array_in_param_order(): + @hm.deploy("db") + def db(): + return hm.dev.deploy(image="postgres:16", port_mapping={5432: hm.dev.port()}) + + @hm.deploy("api") + def api(db: hm.Dep[hm.Deployment]): + return hm.dev.deploy( + image="x", port_mapping={8000: hm.dev.port()}, + env={"DB_HOST": db.name}, + ) + + out = json.loads(dump_registry_json(worktree_root=Path("/tmp/wt"))) # noqa: S108 + assert out["deployments"]["api"]["deps"] == ["db"] + assert out["deployments"]["api"]["env"] == {"DB_HOST": "db"} + + +def test_dump_step_chain_emits_pipeline_v0_ir(): + @hm.deploy("api") + def api(): + return hm.dev.deploy( + from_=hm.sh("echo build", image="alpine:3.20"), + port_mapping={8000: hm.dev.port()}, + ) + + out = json.loads(dump_registry_json(worktree_root=Path("/tmp/wt"))) # noqa: S108 + f = out["deployments"]["api"]["from"] + assert f["type"] == "step_chain" + assert f["pipeline_v0"]["version"] == "0" + assert f["pipeline_v0"]["steps"][0]["cmd"] == "echo build" + + +def test_dump_non_local_driver_is_marked_unhandled(): + @hm.deploy("prod-api") + def prod_api(): + return Deployment(name="", driver="aws") + + out = json.loads(dump_registry_json(worktree_root=Path("/tmp/wt"))) # noqa: S108 + assert out["deployments"]["prod-api"] == {"driver": "aws", "_unhandled": True} diff --git a/tests/dev/test_topo.py b/tests/dev/test_topo.py new file mode 100644 index 0000000..17cd4fa --- /dev/null +++ b/tests/dev/test_topo.py @@ -0,0 +1,63 @@ +"""dep_graph extraction + topo_order on the deployment registry.""" +from __future__ import annotations + +import pytest + +import harmont as hm +from harmont._deploy import dep_graph, topo_order + + +def test_dep_graph_empty_when_no_deps(): + @hm.deploy("db") + def db(): + return hm.dev.deploy(image="postgres:16", port_mapping={5432: hm.dev.port()}) + + g = dep_graph() + assert g == {"db": ()} + + +def test_dep_graph_lists_param_names_in_order(): + @hm.deploy("db") + def db(): + return hm.dev.deploy(image="postgres:16", port_mapping={5432: hm.dev.port()}) + + @hm.deploy("api") + def api(db: hm.Dep[hm.Deployment]): + return hm.dev.deploy(image="x", port_mapping={8000: hm.dev.port()}, + env={"DB": db.name}) + + g = dep_graph() + assert g == {"db": (), "api": ("db",)} + + +def test_topo_order_is_stable_and_deps_first(): + @hm.deploy("db") + def db(): + return hm.dev.deploy(image="postgres:16", port_mapping={5432: hm.dev.port()}) + + @hm.deploy("api") + def api(db: hm.Dep[hm.Deployment]): + return hm.dev.deploy(image="x", port_mapping={8000: hm.dev.port()}) + + @hm.deploy("web") + def web(api: hm.Dep[hm.Deployment]): + return hm.dev.deploy(image="x", port_mapping={3000: hm.dev.port()}) + + order = topo_order() + # db before api before web + assert order.index("db") < order.index("api") < order.index("web") + + +def test_topo_order_raises_on_cycle(): + from harmont._deploy import Deployment + + @hm.deploy("a") + def a(b: hm.Dep[hm.Deployment]): + return Deployment(name="", driver="local") + + @hm.deploy("b") + def b(a: hm.Dep[hm.Deployment]): + return Deployment(name="", driver="local") + + with pytest.raises(RuntimeError, match="dep cycle"): + topo_order() diff --git a/tests/examples_render_conftest.py b/tests/examples_render_conftest.py new file mode 100644 index 0000000..23b9978 --- /dev/null +++ b/tests/examples_render_conftest.py @@ -0,0 +1,71 @@ +"""Shared helpers for rendering external example pipelines. + +These tests render the pipeline definitions in harmont-cli/examples/ +to v0 IR JSON. They are gated behind HARMONT_CLI_PATH so they only +run when a sibling harmont-cli checkout is available. +""" +from __future__ import annotations + +import importlib.util +import os +import pathlib +import sys +from contextlib import contextmanager +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Iterator + + +def harmont_cli_examples_root() -> pathlib.Path | None: + raw = os.environ.get("HARMONT_CLI_PATH") + if not raw: + return None + p = pathlib.Path(raw) / "examples" + return p if p.is_dir() else None + + +@contextmanager +def isolated_registry() -> Iterator[None]: + """Snapshot and restore the global @hm.pipeline and @hm.target + registries so that each parametrized case renders against an + empty slate. Without this, every case would accumulate pipelines + from prior cases and duplicate slugs would raise. + """ + from harmont import _deps, _registry, _target + + saved_regs = list(_registry.REGISTRATIONS) + saved_targets_by_name = dict(_deps._TARGETS_BY_NAME) # noqa: SLF001 + saved_target_cache = dict(_target._TARGET_CACHE) # noqa: SLF001 + + _registry.clear_registry() + _deps.clear_target_names() + _target.clear_target_cache() + try: + yield + finally: + _registry.clear_registry() + _deps.clear_target_names() + _target.clear_target_cache() + _registry.REGISTRATIONS.extend(saved_regs) + _deps._TARGETS_BY_NAME.update(saved_targets_by_name) # noqa: SLF001 + _target._TARGET_CACHE.update(saved_target_cache) # noqa: SLF001 + + +def load_pipeline_module(example_dir: pathlib.Path) -> None: + """Load .harmont/pipeline.py from `example_dir`, executing decorator + side-effects. Run with cwd = example_dir so on_change cache paths + resolve correctly. + """ + pipeline_py = example_dir / ".harmont" / "pipeline.py" + spec = importlib.util.spec_from_file_location( + f"_harmont_example_{example_dir.name}", pipeline_py + ) + assert spec is not None + assert spec.loader is not None + mod = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = mod + try: + spec.loader.exec_module(mod) + finally: + sys.modules.pop(spec.name, None) diff --git a/tests/test_cache.py b/tests/test_cache.py new file mode 100644 index 0000000..c6b7a18 --- /dev/null +++ b/tests/test_cache.py @@ -0,0 +1,73 @@ +"""Unit tests for harmont.cache policy types.""" + +from __future__ import annotations + +import dataclasses +from datetime import timedelta + +import pytest + +from harmont.cache import ( + CacheCompose, + CacheForever, + CacheNone, + CacheOnChange, + CachePolicy, + CacheTTL, +) + + +def test_cache_none_is_a_cache_policy(): + p = CacheNone() + assert isinstance(p, CachePolicy) + + +def test_cache_none_is_frozen(): + p = CacheNone() + with pytest.raises(dataclasses.FrozenInstanceError): + p.foo = "bar" # type: ignore[attr-defined] + + +def test_cache_forever_default_env_keys_is_empty_tuple(): + p = CacheForever() + assert p.env_keys == () + + +def test_cache_forever_accepts_env_keys(): + p = CacheForever(env_keys=("ARCH", "VARIANT")) + assert p.env_keys == ("ARCH", "VARIANT") + + +def test_cache_ttl_requires_duration(): + p = CacheTTL(duration=timedelta(days=1)) + assert p.duration == timedelta(days=1) + assert p.env_keys == () + + +def test_cache_on_change_requires_paths(): + p = CacheOnChange(paths=("api/cabal.project",)) + assert p.paths == ("api/cabal.project",) + + +def test_cache_on_change_has_no_env_keys_field(): + """Per design spec — CacheOnChange's key already covers env-driven invalidation \ +by hashing files.""" + with pytest.raises(TypeError): + CacheOnChange(paths=("a",), env_keys=("X",)) # type: ignore[call-arg] + + +def test_cache_compose_takes_tuple_of_policies(): + p = CacheCompose( + policies=( + CacheTTL(duration=timedelta(days=1)), + CacheOnChange(paths=("a",)), + ) + ) + assert len(p.policies) == 2 + + +def test_cache_when_is_removed(): + import harmont + + assert not hasattr(harmont, "when") + assert not hasattr(harmont, "CacheWhen") diff --git a/tests/test_cmake.py b/tests/test_cmake.py new file mode 100644 index 0000000..808a6c2 --- /dev/null +++ b/tests/test_cmake.py @@ -0,0 +1,68 @@ +"""CMake (C/C++) toolchain tests.""" +from __future__ import annotations + +import pytest + +import harmont as hm + + +def _cmds(p: dict) -> list[str]: + return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + + +def test_cmake_object_form_full_chain(): + cm = hm.cmake(path="svc") + p = hm.pipeline(cm.build(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert any("build-essential" in c for c in cmds) + assert any("cmake --version" in c for c in cmds) + assert any("cmake --build build" in c for c in cmds) + assert any("cmake -S . -B build" in c for c in cmds) + + +def test_cmake_actions_share_install(): + cm = hm.cmake(path="svc") + p = hm.pipeline(cm.configure(), cm.build(), cm.test(), cm.fmt(), + default_image="ubuntu:24.04") + cmds = _cmds(p) + assert len([c for c in cmds if "cmake --version" in c]) == 1 + assert len([c for c in cmds if "apt-get install" in c]) == 1 + assert any("cmake -S . -B build" in c for c in cmds) + assert any("cmake --build build" in c for c in cmds) + assert any("ctest --test-dir build" in c for c in cmds) + assert any("clang-format --dry-run --Werror" in c for c in cmds) + + +def test_cmake_cpp_label_prefix(): + cm = hm.cmake(path=".", lang="cpp") + assert cm.build().label == ":cpp: build" + assert cm.test().label == ":cpp: test" + assert cm.fmt().label == ":cpp: fmt" + + +def test_cmake_c_label_prefix_default(): + cm = hm.cmake(path=".") + assert cm.build().label == ":c: build" + + +def test_cmake_invalid_lang_rejected(): + with pytest.raises(ValueError, match="lang"): + hm.cmake(lang="rust") + + +def test_cmake_bare_form_actions(): + p = hm.pipeline(hm.cmake.configure(), hm.cmake.build(), + hm.cmake.test(), hm.cmake.fmt()) + cmds = _cmds(p) + assert any("cmake -S . -B build" in c for c in cmds) + assert any("cmake --build build" in c for c in cmds) + assert any("ctest" in c for c in cmds) + assert any("clang-format" in c for c in cmds) + + +def test_cmake_with_base_skips_apt(): + base = hm.scratch().sh("custom base", label="base") + cm = hm.cmake(path="svc", base=base) + p = hm.pipeline(cm.build(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert not any("build-essential" in c for c in cmds) diff --git a/tests/test_composer.py b/tests/test_composer.py new file mode 100644 index 0000000..a1d2e73 --- /dev/null +++ b/tests/test_composer.py @@ -0,0 +1,76 @@ +"""Composer (PHP / Laravel) toolchain tests.""" +from __future__ import annotations + +import harmont as hm + + +def _cmds(p: dict) -> list[str]: + return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + + +def _step_by_substring(p: dict, needle: str) -> dict: + for s in p["steps"]: + if s.get("type") == "command" and needle in (s.get("cmd") or ""): + return s + raise AssertionError(needle) + + +def test_composer_object_form_full_chain(): + c = hm.composer(path="svc") + p = hm.pipeline(c.test(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert any("php-cli" in c_ for c_ in cmds) + assert any("composer" in c_ for c_ in cmds) + assert any("cd svc && composer install" in c_ for c_ in cmds) + assert any("cd svc && vendor/bin/phpunit" in c_ for c_ in cmds) + + +def test_composer_actions_share_install(): + c = hm.composer(path="svc") + p = hm.pipeline(c.test(), c.lint(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert len([c_ for c_ in cmds if "php-cli" in c_]) == 1 + assert len([c_ for c_ in cmds if "composer install" in c_]) == 1 + assert any("vendor/bin/phpunit" in c_ for c_ in cmds) + assert any("vendor/bin/phpstan analyse" in c_ for c_ in cmds) + + +def test_composer_install_cached_on_lockfile(): + c = hm.composer(path="svc") + p = hm.pipeline(c.test()) + install = _step_by_substring(p, "composer install") + assert install["cache"]["policy"] == "on_change" + assert "svc/composer.lock" in install["cache"]["paths"] + + +def test_composer_laravel_swaps_test_action(): + c = hm.composer(path="svc", laravel=True) + p = hm.pipeline(c.test(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert any("php artisan test" in c_ for c_ in cmds) + assert not any("vendor/bin/phpunit" in c_ for c_ in cmds) + + +def test_composer_action_labels_auto_generated(): + c = hm.composer(path=".") + assert c.test().label == ":php: test" + assert c.lint().label == ":php: lint" + + +def test_composer_laravel_label_prefix(): + c = hm.composer(path=".", laravel=True) + assert c.test().label == ":laravel: test" + + +def test_composer_bare_form_actions(): + p = hm.pipeline(hm.composer.test(), hm.composer.lint()) + cmds = _cmds(p) + assert any("phpunit" in c for c in cmds) + assert any("phpstan" in c for c in cmds) + + +def test_composer_with_base_skips_apt(): + base = hm.scratch().sh("custom base", label="base") + c = hm.composer(path="svc", base=base) + p = hm.pipeline(c.test(), default_image="ubuntu:24.04") + assert not any("apt-get install" in c_ for c_ in _cmds(p)) diff --git a/tests/test_decorator.py b/tests/test_decorator.py new file mode 100644 index 0000000..b682296 --- /dev/null +++ b/tests/test_decorator.py @@ -0,0 +1,103 @@ +"""@hm.pipeline decorator surface.""" +import pytest + +import harmont as hm +from harmont._registry import REGISTRATIONS, clear_registry + + +@pytest.fixture(autouse=True) +def _reset_registry(): + clear_registry() + yield + clear_registry() + + +def test_explicit_slug(): + @hm.pipeline("ci") + def whatever() -> hm.Step: + return hm.scratch().sh("echo hi", label="hi") + + assert len(REGISTRATIONS) == 1 + reg = REGISTRATIONS[0] + assert reg.slug == "ci" + assert reg.name == "ci" + assert reg.triggers == () + assert reg.allow_manual is True + assert reg.env is None + assert reg.default_image is None + + +def test_default_slug_from_function_name(): + @hm.pipeline() + def nightly() -> hm.Step: + return hm.scratch().sh("echo n") + + assert REGISTRATIONS[0].slug == "nightly" + + +def test_name_override(): + @hm.pipeline("ci", name="Continuous Integration") + def ci() -> hm.Step: + return hm.scratch().sh("echo") + + assert REGISTRATIONS[0].name == "Continuous Integration" + + +def test_forwards_env_and_default_image(): + @hm.pipeline("ci", env={"FOO": "bar"}, default_image="alpine:3.20") + def ci() -> hm.Step: + return hm.scratch().sh("echo") + + reg = REGISTRATIONS[0] + assert reg.env == {"FOO": "bar"} + assert reg.default_image == "alpine:3.20" + + +def test_allow_manual_false(): + @hm.pipeline("ci", allow_manual=False) + def ci() -> hm.Step: + return hm.scratch().sh("echo") + + assert REGISTRATIONS[0].allow_manual is False + + +def test_decorator_returns_function_unchanged(): + @hm.pipeline("ci") + def ci() -> hm.Step: + return hm.scratch().sh("echo hi") + + result = ci() + assert isinstance(result, hm.Step) + + +def test_invalid_slug_uppercase(): + with pytest.raises(ValueError, match="invalid pipeline slug 'CI'"): + @hm.pipeline("CI") + def ci() -> hm.Step: + return hm.scratch().sh("echo") + + +def test_invalid_slug_starts_with_digit(): + with pytest.raises(ValueError, match="invalid pipeline slug '1ci'"): + @hm.pipeline("1ci") + def x() -> hm.Step: + return hm.scratch().sh("echo") + + +def test_invalid_slug_too_long(): + long = "a" * 65 + with pytest.raises(ValueError, match="invalid pipeline slug"): + @hm.pipeline(long) + def x() -> hm.Step: + return hm.scratch().sh("echo") + + +def test_duplicate_slug_raises(): + @hm.pipeline("ci") + def a() -> hm.Step: + return hm.scratch().sh("echo") + + with pytest.raises(ValueError, match="duplicate pipeline slug"): + @hm.pipeline("ci") + def b() -> hm.Step: + return hm.scratch().sh("echo") diff --git a/tests/test_deps.py b/tests/test_deps.py new file mode 100644 index 0000000..e84acc6 --- /dev/null +++ b/tests/test_deps.py @@ -0,0 +1,80 @@ +"""resolve_deps signature introspection + cycle detection (HAR-28 follow-up). + +Most behavioral coverage of the marker-driven resolver lives in +``test_strict_signature.py``. This file is now the residual set: +signature-kind rejection (``*args`` / ``**kwargs`` / positional-only), +default-value handling on plain params, and cycle detection. +""" +from __future__ import annotations + +import pytest + +import harmont as hm # noqa: TC001 used in annotations + tests subscript at runtime +from harmont._deps import ( + call_with_deps, + clear_target_names, + register_named_target, + resolve_deps, +) +from harmont._step import Step # noqa: TC001 used in annotations + isinstance checks + + +@pytest.fixture(autouse=True) +def _reset_named_targets(): + clear_target_names() + yield + clear_target_names() + + +def test_zero_param_fn_resolves_to_empty_kwargs(): + def fn() -> None: ... + + assert resolve_deps(fn) == {} + + +def test_default_used_when_param_has_no_marker(): + def fn(missing: str = "default") -> None: ... + + assert resolve_deps(fn) == {"missing": "default"} + + +def test_var_args_rejected(): + def fn(*args) -> None: ... + + with pytest.raises(TypeError, match="hm: target functions cannot take \\*args"): + resolve_deps(fn) + + +def test_var_kwargs_rejected(): + def fn(**kwargs) -> None: ... + + with pytest.raises(TypeError, match="hm: target functions cannot take \\*\\*kwargs"): + resolve_deps(fn) + + +def test_positional_only_param_rejected(): + def fn(x, /) -> None: ... + + with pytest.raises(TypeError, match="hm: target functions cannot have positional-only"): + resolve_deps(fn) + + +def test_cycle_detection_two_targets(): + # a depends on b, b depends on a. Resolving either must raise. + a_calls = b_calls = 0 + + def a(b: hm.Target[Step]) -> str: + nonlocal a_calls + a_calls += 1 + return f"a({b})" + + def b(a: hm.Target[Step]) -> str: + nonlocal b_calls + b_calls += 1 + return f"b({a})" + + register_named_target("a", lambda: call_with_deps(a)) + register_named_target("b", lambda: call_with_deps(b)) + + with pytest.raises(RuntimeError, match="hm: dependency cycle"): + call_with_deps(a) diff --git a/tests/test_dotnet.py b/tests/test_dotnet.py new file mode 100644 index 0000000..6db33ec --- /dev/null +++ b/tests/test_dotnet.py @@ -0,0 +1,78 @@ +"""dotnet (C#) toolchain tests.""" +from __future__ import annotations + +import pytest + +import harmont as hm + + +def _cmds(p: dict) -> list[str]: + return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + + +def _step_by_substring(p: dict, needle: str) -> dict: + for s in p["steps"]: + if s.get("type") == "command" and needle in (s.get("cmd") or ""): + return s + raise AssertionError(needle) + + +def test_dotnet_object_form_full_chain(): + dn = hm.dotnet(path="svc") + p = hm.pipeline(dn.build(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert any("dot.net/v1/dotnet-install.sh" in c for c in cmds) + assert any("cd svc && dotnet build" in c for c in cmds) + + +def test_dotnet_actions_share_install(): + dn = hm.dotnet(path="svc") + p = hm.pipeline(dn.build(), dn.test(), dn.fmt(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert len([c for c in cmds if "dotnet-install" in c]) == 1 + assert any("dotnet build" in c for c in cmds) + assert any("dotnet test" in c for c in cmds) + assert any("dotnet format --verify-no-changes" in c for c in cmds) + + +def test_dotnet_channel_in_install_cmd(): + dn = hm.dotnet(path=".", channel="8.0") + p = hm.pipeline(dn.build()) + install = _step_by_substring(p, "dotnet-install") + assert "--channel 8.0" in install["cmd"] + + +def test_dotnet_invalid_channel_rejected(): + with pytest.raises(ValueError, match="channel"): + hm.dotnet(channel="bogus; rm -rf /") + + +def test_dotnet_action_labels_auto_generated(): + dn = hm.dotnet(path=".") + assert dn.build().label == ":dotnet: build" + assert dn.test().label == ":dotnet: test" + assert dn.fmt().label == ":dotnet: fmt" + + +def test_dotnet_bare_form_actions(): + p = hm.pipeline(hm.dotnet.build(), hm.dotnet.test(), hm.dotnet.fmt()) + cmds = _cmds(p) + assert any("dotnet build" in c for c in cmds) + assert any("dotnet test" in c for c in cmds) + assert any("dotnet format" in c for c in cmds) + + +def test_dotnet_install_cache_forever(): + dn = hm.dotnet(path=".") + p = hm.pipeline(dn.build()) + install = _step_by_substring(p, "dotnet-install") + assert install["cache"]["policy"] == "forever" + + +def test_dotnet_with_base_skips_apt(): + base = hm.scratch().sh("custom base", label="base") + dn = hm.dotnet(path="svc", base=base) + p = hm.pipeline(dn.build(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert not any("apt-get install" in c for c in cmds) + assert any("custom base" in c for c in cmds) diff --git a/tests/test_elm.py b/tests/test_elm.py new file mode 100644 index 0000000..06f90a4 --- /dev/null +++ b/tests/test_elm.py @@ -0,0 +1,133 @@ +"""Elm project abstraction tests.""" +from __future__ import annotations + +import pytest + +import harmont as hm + + +def _cmds(p: dict) -> list[str]: + return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + + +def _step_by_substring(p: dict, needle: str) -> dict: + for s in p["steps"]: + if s.get("type") == "command" and needle in (s.get("cmd") or ""): + return s + msg = f"no command step containing {needle!r}" + raise AssertionError(msg) + + +def test_elm_full_chain(): + elm = hm.elm(path="app") + p = hm.pipeline(elm.make("src/Main.elm"), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert any("apt-get install" in c for c in cmds) + assert any("deb.nodesource.com" in c for c in cmds) + assert any("elm/compiler/releases" in c for c in cmds) + assert any("cd app && elm make src/Main.elm" in c for c in cmds) + + +def test_elm_make_with_output(): + elm = hm.elm(path="app") + s = elm.make("src/Main.elm", output="/tmp/elm.js") # noqa: S108 + assert s.cmd is not None + assert "elm make src/Main.elm --output=/tmp/elm.js" in s.cmd + + +def test_elm_make_without_output(): + elm = hm.elm(path="app") + s = elm.make("src/Main.elm") + assert s.cmd is not None + assert "elm make src/Main.elm" in s.cmd + assert "--output" not in s.cmd + + +def test_elm_test_uses_npx(): + elm = hm.elm(path="app") + s = elm.test() + assert s.cmd is not None + assert "cd app && npx --yes elm-test" in s.cmd + + +def test_elm_review_uses_npx(): + elm = hm.elm(path="app") + s = elm.review() + assert s.cmd is not None + assert "cd app && npx --yes elm-review" in s.cmd + + +def test_elm_fmt_uses_npx(): + elm = hm.elm(path="app") + s = elm.fmt() + assert s.cmd is not None + assert "cd app && npx --yes elm-format --validate ." in s.cmd + + +def test_elm_version_in_install_cmd(): + elm = hm.elm(path=".", elm_version="0.19.1") + p = hm.pipeline(elm.make("src/Main.elm")) + install = _step_by_substring(p, "elm/compiler/releases") + assert "0.19.1" in install["cmd"] + + +def test_elm_invalid_version(): + with pytest.raises(ValueError, match="elm_version"): + hm.elm(elm_version="bad") + + +def test_elm_node_version_in_install_cmd(): + elm = hm.elm(path=".", node_version="22") + p = hm.pipeline(elm.make("src/Main.elm")) + node = _step_by_substring(p, "deb.nodesource.com") + assert "setup_22.x" in node["cmd"] + + +def test_elm_install_cache_forever(): + elm = hm.elm(path="app") + p = hm.pipeline(elm.make("src/Main.elm")) + elm_install = _step_by_substring(p, "elm/compiler/releases") + node_install = _step_by_substring(p, "deb.nodesource.com") + assert elm_install["cache"]["policy"] == "forever" + assert node_install["cache"]["policy"] == "forever" + + +def test_elm_action_labels(): + elm = hm.elm(path="app") + assert elm.make("src/Main.elm").label == ":elm: make src/Main.elm" + assert elm.test().label == ":elm: test" + assert elm.review().label == ":elm: review" + assert elm.fmt().label == ":elm: fmt" + + +def test_elm_actions_share_install(): + elm = hm.elm(path="app") + p = hm.pipeline( + elm.make("src/Main.elm"), elm.test(), elm.review(), elm.fmt(), + default_image="ubuntu:24.04", + ) + cmds = _cmds(p) + assert len([c for c in cmds if "elm/compiler/releases" in c]) == 1 + + +def test_elm_with_base_skips_apt(): + base = hm.scratch().sh("base", label="base") + elm = hm.elm(path="app", base=base) + p = hm.pipeline(elm.make("src/Main.elm")) + cmds = _cmds(p) + # apt-base (curl + ca-certificates) is skipped. nodesource installer + # itself runs `apt-get install -y nodejs` so don't assert on + # apt-get; check the apt-base packages instead. + assert not any("ca-certificates" in c for c in cmds) + assert any("deb.nodesource.com" in c for c in cmds) + + +def test_elm_bare_form_make(): + p = hm.pipeline(hm.elm.make("src/Main.elm", path="app")) + cmds = _cmds(p) + assert any("cd app && elm make src/Main.elm" in c for c in cmds) + + +def test_elm_bare_form_forwards_action_kwargs(): + s = hm.elm.make("src/Main.elm", path=".", label=":elm: custom") + assert s.label == ":elm: custom" diff --git a/tests/test_envelope.py b/tests/test_envelope.py new file mode 100644 index 0000000..fb27844 --- /dev/null +++ b/tests/test_envelope.py @@ -0,0 +1,186 @@ +"""Envelope JSON shape — what api/cli consume.""" + +import json + +import pytest + +import harmont as hm +from harmont._deps import clear_target_names +from harmont._registry import clear_registry +from harmont._target import clear_target_cache + + +@pytest.fixture(autouse=True) +def _reset_registry(): + clear_registry() + clear_target_cache() + clear_target_names() + yield + clear_registry() + clear_target_cache() + clear_target_names() + + +def test_empty_registry_emits_empty_pipelines_list(): + out = json.loads(hm.dump_registry_json()) + assert out == {"schema_version": "1", "pipelines": []} + + +def test_single_pipeline_no_triggers(): + @hm.pipeline("ci") + def ci() -> hm.Step: + return hm.scratch().sh("echo hi", label="hi") + + out = json.loads(hm.dump_registry_json()) + assert out["schema_version"] == "1" + assert len(out["pipelines"]) == 1 + p = out["pipelines"][0] + assert p["slug"] == "ci" + assert p["name"] == "ci" + assert p["allow_manual"] is True + assert p["triggers"] == [] + definition = p["definition"] + assert definition["version"] == "0" + steps = definition["steps"] + assert len(steps) == 1 + assert steps[0]["type"] == "command" + assert steps[0]["cmd"] == "echo hi" + assert steps[0]["label"] == "hi" + + +def test_pipeline_with_triggers(): + @hm.pipeline( + "ci", + triggers=[ + hm.push(branch="main"), + hm.pull_request(branches="main"), + hm.schedule(cron="0 4 * * *"), + ], + ) + def ci() -> hm.Step: + return hm.scratch().sh("echo") + + out = json.loads(hm.dump_registry_json()) + p = out["pipelines"][0] + assert p["triggers"] == [ + {"event": "push", "branches": ["main"]}, + { + "event": "pull_request", + "branches": ["main"], + "types": ["opened", "synchronize", "reopened"], + }, + {"event": "schedule", "cron": "0 4 * * *"}, + ] + + +def test_pipeline_with_tuple_leaves(): + @hm.pipeline("ci") + def ci() -> hm.Pipeline: + fork = hm.scratch().fork() + return (fork.sh("a"), fork.sh("b")) + + out = json.loads(hm.dump_registry_json()) + p = out["pipelines"][0] + cmds = sorted(s["cmd"] for s in p["definition"]["steps"] if s["type"] == "command") + assert cmds == ["a", "b"] + + +def test_pipeline_forwards_env_and_default_image_to_assemble(): + @hm.pipeline("ci", env={"CI": "true"}, default_image="alpine:3.20") + def ci() -> hm.Step: + return hm.scratch().sh("echo") + + out = json.loads(hm.dump_registry_json()) + definition = out["pipelines"][0]["definition"] + assert definition["default_image"] == "alpine:3.20" + assert definition["env"] == {"CI": "true"} + + +def test_envelope_resolves_cache_keys(tmp_path): + @hm.pipeline("ci") + def ci() -> hm.Step: + return hm.scratch().sh("echo", label="run", cache=hm.forever()) + + out = json.loads( + hm.dump_registry_json( + pipeline_org="acme", + now=1700000000, + base_path=tmp_path, + env={}, + ) + ) + step = out["pipelines"][0]["definition"]["steps"][0] + assert step["cache"]["policy"] == "forever" + assert "key" in step["cache"] + assert len(step["cache"]["key"]) == 64 + + +def test_envelope_auto_unwraps_haskell_package(tmp_path, monkeypatch): + """A pipeline returning a HaskellPackage emits the build leaf.""" + monkeypatch.chdir(tmp_path) + (tmp_path / "api").mkdir() + + @hm.pipeline("ci") + def ci(): + return hm.haskell(ghc="9.6.7").cabal(path="api") + + out = json.loads(hm.dump_registry_json()) + steps = out["pipelines"][0]["definition"]["steps"] + cmds = [s.get("cmd") for s in steps if s.get("type") == "command"] + assert any("cabal build all" in (c or "") for c in cmds) + + +def test_envelope_composes_targets_with_dedup(tmp_path, monkeypatch): + """Two pipelines depending on the same target share the target step.""" + from harmont._target import clear_target_cache + + clear_target_cache() + + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("apt-get update") + + @hm.pipeline("ci") + def ci() -> tuple[hm.Step, ...]: + return ( + apt_base().sh("cabal build"), + apt_base().sh("pytest"), + ) + + out = json.loads(hm.dump_registry_json()) + steps = out["pipelines"][0]["definition"]["steps"] + apt_steps = [s for s in steps if s.get("cmd") == "apt-get update"] + assert len(apt_steps) == 1 # deduplicated via target memoization + children = [s for s in steps if s.get("builds_in") == apt_steps[0]["key"]] + assert len(children) == 2 + child_cmds = sorted(s["cmd"] for s in children) + assert child_cmds == ["cabal build", "pytest"] + + +def test_envelope_clears_target_cache_between_renders(): + """Two consecutive dump_registry_json calls must not share target state.""" + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("apt-get update") + + @hm.pipeline("ci") + def ci() -> hm.Step: + return apt_base() + + hm.dump_registry_json() + # After render, cache has one entry from the in-flight render. Trigger + # a second render and verify the cache is cleared at render start + # by re-running and confirming success (would TypeError otherwise if + # the first render's cached Step somehow propagated through dataclass + # frozen-equality into the second render's IR). + hm.dump_registry_json() + + +def test_envelope_wraps_typeerror_with_pipeline_slug(): + """Bad return from pipeline fn surfaces as TypeError naming the slug.""" + @hm.pipeline("broken") + def broken(): + return 42 # not a Step / tuple / toolchain wrapper + + with pytest.raises(TypeError, match=r"pipeline 'broken': invalid return value"): + hm.dump_registry_json() diff --git a/tests/test_examples_render.py b/tests/test_examples_render.py new file mode 100644 index 0000000..9270c5f --- /dev/null +++ b/tests/test_examples_render.py @@ -0,0 +1,72 @@ +"""End-to-end render checks against harmont-cli example pipelines. + +Gated: skipped when HARMONT_CLI_PATH is unset. CI sets it after +cloning harmont-cli. +""" +from __future__ import annotations + +import json +from typing import TYPE_CHECKING + +import pytest + +if TYPE_CHECKING: + import pathlib + +from .examples_render_conftest import ( + harmont_cli_examples_root, + isolated_registry, + load_pipeline_module, +) + +EXAMPLES_ROOT = harmont_cli_examples_root() + +pytestmark = pytest.mark.skipif( + EXAMPLES_ROOT is None, + reason="HARMONT_CLI_PATH not set or examples/ missing", +) + + +def _example_dirs() -> list[pathlib.Path]: + if EXAMPLES_ROOT is None: + return [] + return sorted( + p for p in EXAMPLES_ROOT.iterdir() + if p.is_dir() and (p / ".harmont" / "pipeline.py").is_file() + ) + + +EXAMPLE_IDS = [p.name for p in _example_dirs()] + + +@pytest.mark.parametrize("example_dir", _example_dirs(), ids=EXAMPLE_IDS) +def test_example_renders_to_v0_ir( + example_dir: pathlib.Path, monkeypatch: pytest.MonkeyPatch +) -> None: + import harmont as hm + + monkeypatch.chdir(example_dir) + with isolated_registry(): + load_pipeline_module(example_dir) + envelope_json = hm.dump_registry_json() + + envelope = json.loads(envelope_json) + assert envelope["schema_version"] == "1" + assert envelope["pipelines"], f"{example_dir.name}: no pipelines registered" + + ci_pipeline = next( + (p for p in envelope["pipelines"] if p["slug"] == "ci"), None + ) + assert ci_pipeline is not None, ( + f"{example_dir.name}: no 'ci' pipeline registered; " + f"got slugs {[p['slug'] for p in envelope['pipelines']]}" + ) + definition = ci_pipeline["definition"] + assert definition["version"] == "0" + assert definition.get("steps"), ( + f"{example_dir.name}: ci pipeline has no steps" + ) + assert definition.get("default_image"), ( + f"{example_dir.name}: ci pipeline missing default_image — local " + "executor falls back to alpine and apt-get-based examples die" + ) diff --git a/tests/test_go.py b/tests/test_go.py new file mode 100644 index 0000000..08ffcb4 --- /dev/null +++ b/tests/test_go.py @@ -0,0 +1,91 @@ +"""Go toolchain abstraction tests.""" +from __future__ import annotations + +import pytest + +import harmont as hm + + +def _cmds(p: dict) -> list[str]: + return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + + +def _step_by_substring(p: dict, needle: str) -> dict: + for s in p["steps"]: + if s.get("type") == "command" and needle in (s.get("cmd") or ""): + return s + msg = f"no command step containing {needle!r}" + raise AssertionError(msg) + + +def test_go_object_form_full_chain(): + go = hm.go(path="svc") + p = hm.pipeline(go.build(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert any("apt-get install" in c for c in cmds) + assert any("go.dev/dl/" in c for c in cmds) + assert any("cd svc && go build ./..." in c for c in cmds) + + +def test_go_actions_share_install_step(): + go = hm.go(path="svc") + p = hm.pipeline(go.build(), go.test(), go.vet(), go.fmt(), + default_image="ubuntu:24.04") + cmds = _cmds(p) + assert len([c for c in cmds if "go.dev/dl/" in c]) == 1 + assert any("go build ./..." in c for c in cmds) + assert any("go test ./..." in c for c in cmds) + assert any("go vet ./..." in c for c in cmds) + assert any("gofmt -l" in c for c in cmds) + + +def test_go_install_cache_forever(): + go = hm.go(path=".") + p = hm.pipeline(go.build()) + install = _step_by_substring(p, "go.dev/dl/") + assert install["cache"]["policy"] == "forever" + + +def test_go_version_in_install_cmd(): + go = hm.go(path=".", version="1.23.2") + p = hm.pipeline(go.build()) + install = _step_by_substring(p, "go.dev/dl/") + assert "go1.23.2" in install["cmd"] + + +def test_go_invalid_version_rejected(): + with pytest.raises(ValueError, match="version"): + hm.go(version="bogus; rm -rf /") + + +def test_go_bare_form_actions(): + p = hm.pipeline(hm.go.build(), hm.go.test(), hm.go.vet(), hm.go.fmt()) + cmds = _cmds(p) + assert any("go build" in c for c in cmds) + assert any("go test" in c for c in cmds) + assert any("go vet" in c for c in cmds) + assert any("gofmt" in c for c in cmds) + + +def test_go_action_labels_auto_generated(): + go = hm.go(path=".") + assert go.build().label == ":go: build" + assert go.test().label == ":go: test" + assert go.vet().label == ":go: vet" + assert go.fmt().label == ":go: fmt" + + +def test_go_with_base_skips_apt(): + base = hm.scratch().sh("custom base", label="base") + go = hm.go(path="svc", base=base) + p = hm.pipeline(go.build(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert not any("apt-get install" in c for c in cmds) + assert any("custom base" in c for c in cmds) + + +def test_go_installed_escape_hatch_chains(): + go = hm.go(path="svc") + custom = go.installed.sh("cd svc && go generate ./...", label=":go: gen") + p = hm.pipeline(custom) + assert any("go generate" in c for c in _cmds(p)) diff --git a/tests/test_gradle.py b/tests/test_gradle.py new file mode 100644 index 0000000..4b14cd3 --- /dev/null +++ b/tests/test_gradle.py @@ -0,0 +1,78 @@ +"""Gradle (Java/Kotlin) toolchain tests.""" +from __future__ import annotations + +import pytest + +import harmont as hm + + +def _cmds(p: dict) -> list[str]: + return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + + +def _step_by_substring(p: dict, needle: str) -> dict: + for s in p["steps"]: + if s.get("type") == "command" and needle in (s.get("cmd") or ""): + return s + raise AssertionError(needle) + + +def test_gradle_object_form_full_chain(): + g = hm.gradle(path="svc") + p = hm.pipeline(g.build(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert any("openjdk-21-jdk-headless" in c for c in cmds) + assert any("cd svc && ./gradlew build" in c for c in cmds) + + +def test_gradle_actions_share_install(): + g = hm.gradle(path="svc") + p = hm.pipeline(g.build(), g.test(), g.lint(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert len([c for c in cmds if "openjdk-21" in c]) == 1 + assert any("./gradlew build" in c for c in cmds) + assert any("./gradlew test" in c for c in cmds) + assert any("./gradlew check" in c for c in cmds) + + +def test_gradle_jdk_version_pinned(): + g = hm.gradle(path=".", jdk="17") + p = hm.pipeline(g.build()) + apt = _step_by_substring(p, "openjdk-17") + assert "openjdk-17-jdk-headless" in apt["cmd"] + + +def test_gradle_invalid_jdk_rejected(): + with pytest.raises(ValueError, match="jdk"): + hm.gradle(jdk="bogus") + + +def test_gradle_kotlin_switch_changes_label(): + g = hm.gradle(path="svc", kotlin=True) + assert g.build().label == ":kotlin: build" + assert g.test().label == ":kotlin: test" + assert g.lint().label == ":kotlin: lint" + + +def test_gradle_java_labels_default(): + g = hm.gradle(path="svc") + assert g.build().label == ":java: build" + assert g.test().label == ":java: test" + assert g.lint().label == ":java: lint" + + +def test_gradle_bare_form_actions(): + p = hm.pipeline(hm.gradle.build(), hm.gradle.test(), hm.gradle.lint()) + cmds = _cmds(p) + assert any("./gradlew build" in c for c in cmds) + assert any("./gradlew test" in c for c in cmds) + assert any("./gradlew check" in c for c in cmds) + + +def test_gradle_with_base_skips_apt(): + base = hm.scratch().sh("custom base", label="base") + g = hm.gradle(path="svc", base=base) + p = hm.pipeline(g.build(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert not any("openjdk" in c for c in cmds) + assert any("custom base" in c for c in cmds) diff --git a/tests/test_har_28_example.py b/tests/test_har_28_example.py new file mode 100644 index 0000000..70e6730 --- /dev/null +++ b/tests/test_har_28_example.py @@ -0,0 +1,84 @@ +"""End-to-end: HAR-28 issue example renders to a valid envelope.""" +from __future__ import annotations + +import json + +import pytest + +import harmont as hm +from harmont._deps import clear_target_names +from harmont._registry import clear_registry +from harmont._target import clear_target_cache + + +@pytest.fixture(autouse=True) +def _reset(tmp_path, monkeypatch): + clear_registry() + clear_target_cache() + clear_target_names() + # Toolchain `.cabal` glob reads disk for *.cabal files — give it an + # empty workspace so the test is hermetic. + monkeypatch.chdir(tmp_path) + (tmp_path / "api").mkdir() + (tmp_path / "freestyle").mkdir() + (tmp_path / "src").mkdir() + yield + clear_registry() + clear_target_cache() + clear_target_names() + + +def test_har_28_example_renders(): + @hm.target() + def apt_base(): + return hm.sh("apt-get update").sh("apt-get install -y python3 python3-venv python3-pip") + + @hm.target() + def venv(): + return ( + apt_base() + .sh("python3 -m venv .venv", cwd="cidsl/py") + .sh("pip install -e '.[dev]'", cwd="cidsl/py") + .sh("pytest -v", cwd="cidsl/py") + ) + + @hm.target() + def api(): + return hm.haskell(ghc="9.6.7").cabal(path="api") + + @hm.target() + def freestyle(): + return hm.haskell(ghc="9.6.7").cabal(path="freestyle") + + @hm.target() + def frontend(): + return hm.elm(path="src") + + @hm.pipeline("ci") + def ci(): + return (venv(), api(), freestyle(), frontend()) + + out = json.loads(hm.dump_registry_json()) + p = out["pipelines"][0] + steps = p["definition"]["steps"] + + cmds = [s.get("cmd") for s in steps if s.get("type") == "command"] + # Each leaf landed in the IR. + assert any("pytest -v" in (c or "") for c in cmds) + assert any("cabal build all" in (c or "") for c in cmds) + assert any("elm make src/Main.elm" in (c or "") for c in cmds) + + # apt-base used by the venv chain appears exactly once (memoized). + apt_update_steps = [s for s in steps if s.get("cmd") == "apt-get update"] + assert len(apt_update_steps) == 1 + + +def test_har_28_cwd_kwarg_renders_to_cd_prefix(): + @hm.pipeline("ci") + def ci(): + return hm.sh("pytest -v", cwd="cidsl/py") + + out = json.loads(hm.dump_registry_json()) + steps = out["pipelines"][0]["definition"]["steps"] + cmds = [s["cmd"] for s in steps if s.get("type") == "command"] + assert "cd cidsl/py && pytest -v" in cmds diff --git a/tests/test_haskell.py b/tests/test_haskell.py new file mode 100644 index 0000000..c7020d2 --- /dev/null +++ b/tests/test_haskell.py @@ -0,0 +1,183 @@ +"""Haskell toolchain + package abstraction tests.""" +from __future__ import annotations + +import pytest + +import harmont as hm +from harmont.haskell import HaskellPackage, HaskellToolchain + +# The repo-root cwd these tests need (so default cache_paths globs +# `/*.cabal` against real files) is supplied by the autouse +# fixture in tests/conftest.py. + + +def _cmds(p: dict) -> list[str]: + return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + + +def _step_by_substring(p: dict, needle: str) -> dict: + for s in p["steps"]: + if s.get("type") == "command" and needle in (s.get("cmd") or ""): + return s + msg = f"no command step containing {needle!r}" + raise AssertionError(msg) + + +def test_haskell_constructor_returns_toolchain(): + ghc = hm.haskell(ghc="9.6.7") + assert isinstance(ghc, HaskellToolchain) + + +def test_haskell_with_path_returns_package(): + pkg = hm.haskell(ghc="9.6.7", path="freestyle") + assert isinstance(pkg, HaskellPackage) + + +def test_haskell_package_full_chain(): + ghc = hm.haskell(ghc="9.6.7") + api = ghc.package("api") + p = hm.pipeline(api.test(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert any("apt-get install" in c for c in cmds) + assert any("ghcup install ghc 9.6.7" in c for c in cmds) + assert any("cabal build all --only-dependencies" in c for c in cmds) + assert any("cd api && cabal test all" in c for c in cmds) + + +def test_haskell_multi_package_shares_ghcup(): + ghc = hm.haskell(ghc="9.6.7") + api = ghc.package("api") + fs = ghc.package("freestyle") + p = hm.pipeline(api.build(), fs.build(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert len([c for c in cmds if "ghcup install" in c]) == 1 + assert len([c for c in cmds if "apt-get install" in c]) == 1 + deps = [c for c in cmds if "cabal build all --only-dependencies" in c] + assert len(deps) == 2 + assert any("cd api && cabal build all" in c for c in cmds) + assert any("cd freestyle && cabal build all" in c for c in cmds) + + +def test_haskell_ghcup_cache_forever(): + ghc = hm.haskell(ghc="9.6.7") + api = ghc.package("api") + p = hm.pipeline(api.test()) + ghcup = _step_by_substring(p, "ghcup install") + assert ghcup["cache"]["policy"] == "forever" + + +def test_haskell_ghcup_version_in_cmd(): + ghc = hm.haskell(ghc="9.6.7") + api = ghc.package("api") + p = hm.pipeline(api.test()) + ghcup = _step_by_substring(p, "ghcup install") + assert "ghc 9.6.7" in ghcup["cmd"] + + +def test_haskell_package_deps_cache_default(): + ghc = hm.haskell(ghc="9.6.7") + api = ghc.package("api") + p = hm.pipeline(api.test()) + deps = _step_by_substring(p, "cabal build all --only-dependencies") + assert deps["cache"]["policy"] == "on_change" + assert deps["cache"]["paths"] == ["api/harmont-api.cabal", "api/cabal.project"] + + +def test_haskell_package_deps_cache_default_no_cabal_project(): + ghc = hm.haskell(ghc="9.6.7") + fs = ghc.package("freestyle") + p = hm.pipeline(fs.test()) + deps = _step_by_substring(p, "cabal build all --only-dependencies") + assert deps["cache"]["policy"] == "on_change" + assert deps["cache"]["paths"] == ["freestyle/freestyle.cabal"] + + +def test_haskell_package_deps_cache_explicit_paths(): + ghc = hm.haskell(ghc="9.6.7") + api = ghc.package("api", cache_paths=("api/cabal.project", "api/harmont-api.cabal")) + p = hm.pipeline(api.test()) + deps = _step_by_substring(p, "cabal build all --only-dependencies") + assert deps["cache"]["paths"] == ["api/cabal.project", "api/harmont-api.cabal"] + + +def test_haskell_actions(): + ghc = hm.haskell(ghc="9.6.7") + api = ghc.package("api") + assert "cd api && cabal build all" in (api.build().cmd or "") + assert "cd api && cabal test all" in (api.test().cmd or "") + assert "cd api && cabal build all --flag werror" in (api.lint().cmd or "") + assert "hlint api" in (api.hlint().cmd or "") + assert "fourmolu --mode check api" in (api.fmt().cmd or "") + + +def test_haskell_action_labels(): + ghc = hm.haskell(ghc="9.6.7") + api = ghc.package("api") + assert api.build().label == ":haskell: api build" + assert api.test().label == ":haskell: api test" + assert api.lint().label == ":haskell: api lint" + assert api.hlint().label == ":haskell: api hlint" + assert api.fmt().label == ":haskell: api fmt" + + +def test_haskell_ghc_required(): + with pytest.raises(ValueError, match="ghc is required"): + hm.haskell() # type: ignore[call-overload] + + +def test_haskell_invalid_ghc_format(): + with pytest.raises(ValueError, match="invalid ghc"): + hm.haskell(ghc="9.6.7;rm") + + +def test_haskell_accepts_meta_tags(): + # ghcup accepts meta-tags; the DSL should not pre-reject them. + hm.haskell(ghc="latest") + hm.haskell(ghc="recommended") + + +def test_haskell_accepts_prerelease(): + hm.haskell(ghc="9.10.1-alpha1") + + +def test_haskell_image_set_on_apt_step(): + ghc = hm.haskell(ghc="9.6.7", image="ubuntu:22.04") + api = ghc.package("api") + p = hm.pipeline(api.test()) + apt = _step_by_substring(p, "apt-get install") + assert apt.get("image") == "ubuntu:22.04" + + +def test_haskell_with_base_skips_apt(): + base = hm.scratch().sh("base", label="base") + ghc = hm.haskell(ghc="9.6.7", base=base) + api = ghc.package("api") + p = hm.pipeline(api.test(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert not any("apt-get install" in c for c in cmds) + assert any(c == "base" for c in cmds) + assert any("ghcup install" in c for c in cmds) + + +def test_haskell_installed_escape_hatch(): + ghc = hm.haskell(ghc="9.6.7") + custom = ghc.installed.sh("make openapi", label=":lock: openapi") + p = hm.pipeline(custom) + cmds = _cmds(p) + assert any("make openapi" in c for c in cmds) + + +def test_haskell_bare_form_single_package(): + p = hm.pipeline(hm.haskell.test(path="freestyle", ghc="9.6.7")) + cmds = _cmds(p) + assert any("cd freestyle && cabal test all" in c for c in cmds) + + +def test_haskell_bare_form_returns_step(): + s = hm.haskell.build(path="freestyle", ghc="9.6.7") + assert isinstance(s, hm.Step) + + +def test_haskell_bare_form_forwards_action_kwargs(): + s = hm.haskell.build(path="freestyle", ghc="9.6.7", label=":haskell: custom") + assert s.label == ":haskell: custom" diff --git a/tests/test_haskell_cabal_alias.py b/tests/test_haskell_cabal_alias.py new file mode 100644 index 0000000..cf994bb --- /dev/null +++ b/tests/test_haskell_cabal_alias.py @@ -0,0 +1,35 @@ +"""HaskellToolchain.cabal alias for .package (HAR-28).""" +from __future__ import annotations + +import harmont as hm +from harmont.haskell import HaskellPackage + + +def test_cabal_returns_haskell_package(tmp_path, monkeypatch): + # Run from tmp_path so the default cabal_paths glob doesn't try to + # read the real api/ directory. + monkeypatch.chdir(tmp_path) + (tmp_path / "api").mkdir() + pkg = hm.haskell(ghc="9.6.7").cabal(path="api") + assert isinstance(pkg, HaskellPackage) + assert pkg.path == "api" + + +def test_cabal_accepts_cache_paths(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + pkg = hm.haskell(ghc="9.6.7").cabal( + path="api", cache_paths=("api/api.cabal", "api/cabal.project") + ) + assert isinstance(pkg, HaskellPackage) + + +def test_cabal_equivalent_to_package(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + (tmp_path / "api").mkdir() + tc = hm.haskell(ghc="9.6.7") + via_cabal = tc.cabal(path="api") + via_package = tc.package(path="api") + # Same path, same shape (different Step instances since each call + # builds a new chain — but the .installed cmd should match). + assert via_cabal.path == via_package.path + assert via_cabal.installed.cmd == via_package.installed.cmd diff --git a/tests/test_json_emit.py b/tests/test_json_emit.py new file mode 100644 index 0000000..efcc4e9 --- /dev/null +++ b/tests/test_json_emit.py @@ -0,0 +1,192 @@ +"""JSON emitter — v0 IR output shape goldens. + +The wire format mirrors harmont-pipeline/src/Harmont/Pipeline/Schema.hs. +Optional fields are omitted (not null); `builds_in: null` only when +the step has no parent (scratch). Cache keys are resolved at render +time and embedded in cache.key.""" + +from __future__ import annotations + +import json +from datetime import timedelta +from pathlib import Path + +from harmont import ( + forever, + on_change, + pipeline, + scratch, + ttl, + wait, +) +from harmont.json_emit import pipeline_to_json + + +def _emit(p, **kw): + kw.setdefault("env", {}) + return json.loads(pipeline_to_json(p, now=0, base_path=Path("/tmp"), **kw)) # noqa: S108 + + +def test_minimal_command(): + p = pipeline(scratch().sh("echo hi", label="hello")) + out = _emit(p) + assert out == { + "version": "0", + "steps": [ + { + "type": "command", + "key": "hello", + "label": "hello", + "cmd": "echo hi", + "builds_in": None, + }, + ], + } + + +def test_chain_parent_key_in_builds_in(): + a = scratch().sh("install", label="install") + b = a.sh("build", label="build") + out = _emit(pipeline(b)) + by_key = {s["key"]: s for s in out["steps"]} + assert by_key["install"]["builds_in"] is None + assert by_key["build"]["builds_in"] == "install" + + +def test_wait_step(): + out = _emit(pipeline(scratch().sh("a", label="a"), wait())) + types = [s["type"] for s in out["steps"]] + assert types == ["command", "wait"] + + +def test_wait_continue_on_failure_emitted(): + out = _emit(pipeline(scratch().sh("a", label="a"), wait(continue_on_failure=True))) + assert out["steps"][-1] == {"type": "wait", "continue_on_failure": True} + + +def test_pipeline_env_emitted_as_object(): + out = _emit(pipeline(scratch().sh("a", label="a"), env={"CI": "true"})) + assert out["env"] == {"CI": "true"} + + +def test_default_image_emitted_when_set(): + out = _emit(pipeline(scratch().sh("a", label="a"), default_image="alpine:3")) + assert out["default_image"] == "alpine:3" + + +def test_cache_ttl_resolves_key(): + p = pipeline( + scratch().sh("apt-get install -y curl", label="apt", cache=ttl(timedelta(days=1))) + ) + out = _emit(p) + s = out["steps"][0] + assert s["cache"]["policy"] == "ttl" + assert s["cache"]["duration_seconds"] == 86400 + assert isinstance(s["cache"]["key"], str) + assert len(s["cache"]["key"]) == 64 + + +def test_cache_forever_with_env_keys_emitted(): + out = _emit( + pipeline(scratch().sh("x", label="x", cache=forever(env_keys=("FOO", "BAR")))), + env={"FOO": "1", "BAR": "2"}, + ) + s = out["steps"][0] + assert s["cache"]["policy"] == "forever" + assert s["cache"]["env_keys"] == ["FOO", "BAR"] + assert "key" in s["cache"] + + +def test_cache_on_change_paths_round_trip(tmp_path): + (tmp_path / "a.txt").write_bytes(b"contents") + (tmp_path / "b.txt").write_bytes(b"other") + out = json.loads( + pipeline_to_json( + pipeline(scratch().sh("make", label="m", cache=on_change("a.txt", "b.txt"))), + now=0, + base_path=tmp_path, + env={}, + ) + ) + s = out["steps"][0] + assert s["cache"]["policy"] == "on_change" + assert s["cache"]["paths"] == ["a.txt", "b.txt"] + assert "key" in s["cache"] + + +def test_no_optional_fields_when_not_set(): + out = _emit(pipeline(scratch().sh("x", label="x"))) + s = out["steps"][0] + assert "image" not in s + assert "env" not in s + assert "timeout_seconds" not in s + assert "cache" not in s + + +def test_timeout_seconds_emitted_when_set(): + out = _emit(pipeline(scratch().sh("x", label="x", timeout_seconds=300))) + assert out["steps"][0]["timeout_seconds"] == 300 + + +def test_image_emitted_when_set(): + out = _emit(pipeline(scratch().sh("x", label="x", image="alpine:3.19"))) + assert out["steps"][0]["image"] == "alpine:3.19" + + +def test_command_emits_runner_and_runner_args(): + out = _emit( + pipeline( + scratch().sh( + "cargo test", + label="t", + image="rust:1.82", + runner="freestyle", + runner_args={"region": "us"}, + ) + ) + ) + cmd = next(s for s in out["steps"] if s["type"] == "command") + assert cmd["runner"] == "freestyle" + assert cmd["runner_args"] == {"region": "us"} + + +def test_command_omits_runner_when_unset(): + out = _emit(pipeline(scratch().sh("echo hi", label="hi"))) + cmd = next(s for s in out["steps"] if s["type"] == "command") + assert "runner" not in cmd + assert "runner_args" not in cmd + + +def test_multi_leaf_pipeline_emits_all_command_steps(): + a = scratch().sh("a", label="a") + b = scratch().sh("b", label="b") + out = _emit(pipeline(a, b)) + keys = sorted(s["key"] for s in out["steps"] if s["type"] == "command") + assert keys == ["a", "b"] + + +def test_pipeline_org_and_slug_threaded_through_to_cache_key(): + """Different (org, slug) pairs produce different cache keys for the + same step. Mirrors the namespacing in harmont_macros.scm.""" + p = pipeline(scratch().sh("x", label="x", cache=forever())) + k1 = json.loads( + pipeline_to_json( + p, + now=0, + base_path=Path("/tmp"), # noqa: S108 + env={}, + pipeline_org="acme", + pipeline_slug="api", + ) + )["steps"][0]["cache"]["key"] + k2 = json.loads( + pipeline_to_json( + p, + now=0, + base_path=Path("/tmp"), # noqa: S108 + env={}, + pipeline_org="acme", + pipeline_slug="web", + ) + )["steps"][0]["cache"]["key"] + assert k1 != k2 diff --git a/tests/test_keygen.py b/tests/test_keygen.py new file mode 100644 index 0000000..88457e5 --- /dev/null +++ b/tests/test_keygen.py @@ -0,0 +1,318 @@ +"""Cache-key resolver — direct ports of the Scheme algorithm in +harmont_macros.scm. Keys must be byte-identical to what harmont-eval +produced pre-removal, so existing cached snapshots remain reachable.""" + +from __future__ import annotations + +import hashlib +import tempfile +from pathlib import Path + +import pytest + +from harmont.keygen import resolve_pipeline_keys + + +def _sha256_hex(s: str) -> str: + return hashlib.sha256(s.encode("utf-8")).hexdigest() + + +NUL = "\x00" + + +def test_none_policy_emits_no_key(): + steps = [ + { + "type": "command", + "key": "a", + "cmd": "echo", + "builds_in": None, + "cache": {"policy": "none"}, + }, + ] + out = resolve_pipeline_keys( + steps, + pipeline_org="default", + pipeline_slug="default", + now=0, + base_path=Path("/tmp"), # noqa: S108 + env={}, + ) + assert "key" not in out[0]["cache"] + + +def test_forever_policy_key_matches_scheme_formula(): + steps = [ + { + "type": "command", + "key": "a", + "cmd": "echo hi", + "builds_in": None, + "cache": {"policy": "forever", "env_keys": []}, + }, + ] + out = resolve_pipeline_keys( + steps, + pipeline_org="default", + pipeline_slug="default", + now=0, + base_path=Path("/tmp"), # noqa: S108 + env={}, + ) + inner = _sha256_hex("echo hi" + NUL + "") + policy_res = "forever-" + inner + expected = _sha256_hex( + "default" + NUL + "default" + NUL + "a" + NUL + "scratch" + NUL + policy_res + ) + assert out[0]["cache"]["key"] == expected + + +def test_ttl_policy_key_includes_bucket(): + steps = [ + { + "type": "command", + "key": "a", + "cmd": "x", + "builds_in": None, + "cache": {"policy": "ttl", "duration_seconds": 3600, "env_keys": []}, + }, + ] + out = resolve_pipeline_keys( + steps, + pipeline_org="default", + pipeline_slug="default", + now=7200, + base_path=Path("/tmp"), # noqa: S108 + env={}, + ) + inner = _sha256_hex("x" + NUL + "") + policy_res = "ttl-2-" + inner + expected = _sha256_hex( + "default" + NUL + "default" + NUL + "a" + NUL + "scratch" + NUL + policy_res + ) + assert out[0]["cache"]["key"] == expected + + +def test_on_change_reads_file_contents(): + with tempfile.TemporaryDirectory() as d: + f = Path(d) / "file.txt" + f.write_bytes(b"hello") + steps = [ + { + "type": "command", + "key": "a", + "cmd": "make", + "builds_in": None, + "cache": {"policy": "on_change", "paths": ["file.txt"]}, + }, + ] + out = resolve_pipeline_keys( + steps, + pipeline_org="default", + pipeline_slug="default", + now=0, + base_path=Path(d), + env={}, + ) + file_hash = hashlib.sha256(b"hello").hexdigest() + inner = _sha256_hex(file_hash + NUL) + policy_res = "sha-" + inner + expected = _sha256_hex( + "default" + NUL + "default" + NUL + "a" + NUL + "scratch" + NUL + policy_res + ) + assert out[0]["cache"]["key"] == expected + + +def test_on_change_handles_directory_paths(): + """A directory path in ``on_change`` hashes every file inside, + sorted, with its relative path included in the stream. Two builds + of the same tree produce the same key; touching a file under the + directory flips the key.""" + with tempfile.TemporaryDirectory() as d: + root = Path(d) + sub = root / "dir" + sub.mkdir() + (sub / "a.txt").write_bytes(b"alpha") + (sub / "b.txt").write_bytes(b"beta") + + steps = [ + { + "type": "command", + "key": "s", + "cmd": "make", + "builds_in": None, + "cache": {"policy": "on_change", "paths": ["dir/"]}, + }, + ] + out1 = resolve_pipeline_keys( + list(steps), + pipeline_org="default", + pipeline_slug="default", + now=0, + base_path=root, + env={}, + ) + key1 = out1[0]["cache"]["key"] + + # Same tree → same key. + out_again = resolve_pipeline_keys( + [dict(s, cache=dict(s["cache"])) for s in steps], + pipeline_org="default", + pipeline_slug="default", + now=0, + base_path=root, + env={}, + ) + assert out_again[0]["cache"]["key"] == key1 + + # Modify a file → key changes. + (sub / "a.txt").write_bytes(b"alpha2") + out2 = resolve_pipeline_keys( + [dict(s, cache=dict(s["cache"])) for s in steps], + pipeline_org="default", + pipeline_slug="default", + now=0, + base_path=root, + env={}, + ) + assert out2[0]["cache"]["key"] != key1 + + +def test_on_change_missing_path_raises(): + with tempfile.TemporaryDirectory() as d: + steps = [ + { + "type": "command", + "key": "s", + "cmd": "make", + "builds_in": None, + "cache": {"policy": "on_change", "paths": ["nope/"]}, + }, + ] + with pytest.raises(FileNotFoundError, match="on_change path does not exist"): + resolve_pipeline_keys( + steps, + pipeline_org="default", + pipeline_slug="default", + now=0, + base_path=Path(d), + env={}, + ) + + +def test_env_keys_are_sorted_and_picked_up(): + steps = [ + { + "type": "command", + "key": "a", + "cmd": "echo", + "builds_in": None, + "cache": {"policy": "forever", "env_keys": ["BAR", "FOO"]}, + }, + ] + out = resolve_pipeline_keys( + steps, + pipeline_org="default", + pipeline_slug="default", + now=0, + base_path=Path("/tmp"), # noqa: S108 + env={"FOO": "1", "BAR": "2"}, + ) + env_str = "BAR=2" + NUL + "FOO=1" + NUL + inner = _sha256_hex("echo" + NUL + env_str) + policy_res = "forever-" + inner + expected = _sha256_hex( + "default" + NUL + "default" + NUL + "a" + NUL + "scratch" + NUL + policy_res + ) + assert out[0]["cache"]["key"] == expected + + +def test_parent_key_chains_through_resolved_cache_keys(): + steps = [ + { + "type": "command", + "key": "a", + "cmd": "x", + "builds_in": None, + "cache": {"policy": "forever", "env_keys": []}, + }, + { + "type": "command", + "key": "b", + "cmd": "y", + "builds_in": "a", + "cache": {"policy": "forever", "env_keys": []}, + }, + ] + out = resolve_pipeline_keys( + steps, + pipeline_org="default", + pipeline_slug="default", + now=0, + base_path=Path("/tmp"), # noqa: S108 + env={}, + ) + parent_key = out[0]["cache"]["key"] + inner_b = _sha256_hex("y" + NUL + "") + policy_res = "forever-" + inner_b + expected_b = _sha256_hex( + "default" + NUL + "default" + NUL + "b" + NUL + parent_key + NUL + policy_res + ) + assert out[1]["cache"]["key"] == expected_b + + +def test_compose_concatenates_subpolicies(): + steps = [ + { + "type": "command", + "key": "a", + "cmd": "z", + "builds_in": None, + "cache": { + "policy": "compose", + "sub_policies": [ + {"policy": "forever", "env_keys": []}, + {"policy": "none"}, + ], + }, + }, + ] + out = resolve_pipeline_keys( + steps, + pipeline_org="default", + pipeline_slug="default", + now=0, + base_path=Path("/tmp"), # noqa: S108 + env={}, + ) + forever_inner = _sha256_hex("z" + NUL + "") + sub1 = "forever-" + forever_inner + sub2 = "none" + inner = _sha256_hex(sub1 + sub2) + policy_res = "compose-" + inner + expected = _sha256_hex( + "default" + NUL + "default" + NUL + "a" + NUL + "scratch" + NUL + policy_res + ) + assert out[0]["cache"]["key"] == expected + + +def test_parent_without_cache_is_planerror(): + steps = [ + {"type": "command", "key": "a", "cmd": "x", "builds_in": None}, + { + "type": "command", + "key": "b", + "cmd": "y", + "builds_in": "a", + "cache": {"policy": "forever", "env_keys": []}, + }, + ] + with pytest.raises(ValueError, match="builds_in 'a' which has no cached key"): + resolve_pipeline_keys( + steps, + pipeline_org="default", + pipeline_slug="default", + now=0, + base_path=Path("/tmp"), # noqa: S108 + env={}, + ) diff --git a/tests/test_keys.py b/tests/test_keys.py new file mode 100644 index 0000000..f1af756 --- /dev/null +++ b/tests/test_keys.py @@ -0,0 +1,97 @@ +"""Key derivation: slug from label, hash fallback, collision resolution.""" + +from __future__ import annotations + +from harmont._keys import hash_key, resolve_keys, slugify_label +from harmont._step import scratch + + +def test_slugify_strips_emoji_shortcodes(): + assert slugify_label(":haskell: api build") == "api-build" + + +def test_slugify_lowercases_and_dashes_non_alnum(): + assert slugify_label("API Build (Test)") == "api-build-test" + + +def test_slugify_collapses_runs_of_dashes(): + assert slugify_label("foo -- bar") == "foo-bar" + + +def test_slugify_trims_leading_trailing_dashes(): + assert slugify_label(":fire: !!! foo !!!") == "foo" + + +def test_slugify_empty_returns_empty_string(): + assert slugify_label(":fire:") == "" + assert slugify_label("") == "" + + +def test_slugify_drops_non_ascii_letters(): + assert slugify_label("Café Build") == "caf-build" + + +def test_slugify_all_non_ascii_returns_empty_string(): + assert slugify_label("构建") == "" + + +def test_resolve_keys_falls_back_to_hash_for_non_ascii_only_label(): + s = scratch().sh("make", label="构建") + keys = resolve_keys([s]) + assert len(keys[id(s)]) == 12 # hash, since slug is empty + + +def test_hash_key_is_deterministic_12_hex_chars(): + h1 = hash_key("parent-key", "make build", 0) + h2 = hash_key("parent-key", "make build", 0) + assert h1 == h2 + assert len(h1) == 12 + assert all(c in "0123456789abcdef" for c in h1) + + +def test_hash_key_changes_with_inputs(): + a = hash_key("p", "make", 0) + b = hash_key("p", "make", 1) + c = hash_key("p", "test", 0) + d = hash_key("q", "make", 0) + assert len({a, b, c, d}) == 4 + + +def test_resolve_keys_uses_explicit_override(): + s = scratch().sh("make", key="my-key") + keys = resolve_keys([s]) + assert keys[id(s)] == "my-key" + + +def test_resolve_keys_uses_label_slug_when_unique(): + s = scratch().sh("make", label=":haskell: build") + keys = resolve_keys([s]) + assert keys[id(s)] == "build" + + +def test_resolve_keys_falls_back_to_hash_when_label_collides(): + a = scratch().sh("make a", label=":haskell: build") + b = scratch().sh("make b", label=":haskell: build") + keys = resolve_keys([a, b]) + # Both colliding labels fall through to hash-derived keys. + assert keys[id(a)] != "build" + assert keys[id(b)] != "build" + assert len(keys[id(a)]) == 12 + assert keys[id(a)] != keys[id(b)] + + +def test_resolve_keys_falls_back_to_hash_when_no_label(): + s = scratch().sh("make") + keys = resolve_keys([s]) + assert len(keys[id(s)]) == 12 + + +def test_resolve_keys_explicit_override_wins_even_under_collision(): + a = scratch().sh("make a", label=":haskell: build", key="explicit-a") + b = scratch().sh("make b", label=":haskell: build") + keys = resolve_keys([a, b]) + assert keys[id(a)] == "explicit-a" + # `b` had a label that would have been "build", but `a` claimed + # "build" via override, so `b` falls to hash. + assert keys[id(b)] != "build" + assert len(keys[id(b)]) == 12 diff --git a/tests/test_npm.py b/tests/test_npm.py new file mode 100644 index 0000000..4d4a361 --- /dev/null +++ b/tests/test_npm.py @@ -0,0 +1,117 @@ +"""Npm project abstraction tests.""" +from __future__ import annotations + +import pytest + +import harmont as hm + + +def _cmds(p: dict) -> list[str]: + return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + + +def _step_by_substring(p: dict, needle: str) -> dict: + for s in p["steps"]: + if s.get("type") == "command" and needle in (s.get("cmd") or ""): + return s + msg = f"no command step containing {needle!r}" + raise AssertionError(msg) + + +def test_npm_full_chain(): + node = hm.npm(path="app/codegen") + p = hm.pipeline(node.install(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert any("apt-get install" in c for c in cmds) + assert any("deb.nodesource.com/setup_20" in c for c in cmds) + assert any("cd app/codegen && npm ci" in c for c in cmds) + + +def test_npm_actions_share_install(): + node = hm.npm(path="app/codegen") + p = hm.pipeline( + node.run("build"), node.test(), node.lint(), node.fmt(), + default_image="ubuntu:24.04", + ) + cmds = _cmds(p) + assert len([c for c in cmds if "npm ci" in c]) == 1 + assert any("cd app/codegen && npm run build" in c for c in cmds) + assert any("cd app/codegen && npm test" in c for c in cmds) + assert any("cd app/codegen && npm run lint" in c for c in cmds) + assert any("cd app/codegen && npm run fmt" in c for c in cmds) + + +def test_npm_run_script(): + node = hm.npm(path=".") + s = node.run("typecheck") + assert s.cmd is not None + assert "npm run typecheck" in s.cmd + + +def test_npm_version_in_install_cmd(): + node = hm.npm(path=".", version="22") + p = hm.pipeline(node.install()) + install = _step_by_substring(p, "deb.nodesource.com") + assert "setup_22.x" in install["cmd"] + + +def test_npm_invalid_version(): + with pytest.raises(ValueError, match="version"): + hm.npm(version="latest") + + +def test_npm_node_install_cache_forever(): + node = hm.npm(path="app/codegen") + p = hm.pipeline(node.install()) + install = _step_by_substring(p, "deb.nodesource.com") + assert install["cache"]["policy"] == "forever" + + +def test_npm_ci_cache_on_package_lock(): + node = hm.npm(path="app/codegen") + p = hm.pipeline(node.install()) + npm_ci = _step_by_substring(p, "npm ci") + assert npm_ci["cache"]["policy"] == "on_change" + assert "app/codegen/package-lock.json" in npm_ci["cache"]["paths"] + + +def test_npm_action_labels(): + node = hm.npm(path="app") + assert node.run("build").label == ":node: build" + assert node.test().label == ":node: test" + assert node.lint().label == ":node: lint" + assert node.fmt().label == ":node: fmt" + + +def test_npm_with_base_skips_apt(): + base = hm.scratch().sh("base step", label="base") + node = hm.npm(path="app", base=base) + p = hm.pipeline(node.install(), default_image="ubuntu:24.04") + cmds = _cmds(p) + # apt-base step (installing curl + ca-certificates) is skipped; the + # nodesource install still runs `apt-get install -y nodejs` though. + assert not any("ca-certificates" in c for c in cmds) + assert any("deb.nodesource.com" in c for c in cmds) + + +def test_npm_installed_is_npm_ci_step(): + node = hm.npm(path="app") + assert node.installed.cmd is not None + assert "npm ci" in node.installed.cmd + + +def test_npm_bare_form_install(): + p = hm.pipeline(hm.npm.install()) + cmds = _cmds(p) + assert any("cd . && npm ci" in c for c in cmds) + + +def test_npm_bare_form_test(): + p = hm.pipeline(hm.npm.test(path="app")) + cmds = _cmds(p) + assert any("cd app && npm test" in c for c in cmds) + + +def test_npm_bare_form_forwards_action_kwargs(): + s = hm.npm.test(path=".", label=":node: custom") + assert s.label == ":node: custom" diff --git a/tests/test_ocaml.py b/tests/test_ocaml.py new file mode 100644 index 0000000..b9f379c --- /dev/null +++ b/tests/test_ocaml.py @@ -0,0 +1,69 @@ +"""OCaml toolchain tests.""" +from __future__ import annotations + +import pytest + +import harmont as hm + + +def _cmds(p: dict) -> list[str]: + return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + + +def _step_by_substring(p: dict, needle: str) -> dict: + for s in p["steps"]: + if s.get("type") == "command" and needle in (s.get("cmd") or ""): + return s + raise AssertionError(needle) + + +def test_ocaml_object_form_full_chain(): + o = hm.ocaml(path="svc") + p = hm.pipeline(o.build(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert any("opam" in c for c in cmds) + assert any("opam switch create" in c for c in cmds) + assert any("cd svc && opam exec -- dune build" in c for c in cmds) + + +def test_ocaml_actions_share_install(): + o = hm.ocaml(path="svc") + p = hm.pipeline(o.build(), o.test(), o.fmt(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert len([c for c in cmds if "opam switch create" in c]) == 1 + assert any("dune build" in c for c in cmds) + assert any("dune runtest" in c for c in cmds) + assert any("dune build @fmt" in c for c in cmds) + + +def test_ocaml_compiler_version_in_install(): + o = hm.ocaml(path=".", compiler="5.1.1") + p = hm.pipeline(o.build()) + install = _step_by_substring(p, "opam switch create") + assert "5.1.1" in install["cmd"] + + +def test_ocaml_invalid_compiler_rejected(): + with pytest.raises(ValueError, match="compiler"): + hm.ocaml(compiler="oops!") + + +def test_ocaml_action_labels_auto_generated(): + o = hm.ocaml(path=".") + assert o.build().label == ":ocaml: build" + assert o.test().label == ":ocaml: test" + assert o.fmt().label == ":ocaml: fmt" + + +def test_ocaml_bare_form_actions(): + p = hm.pipeline(hm.ocaml.build(), hm.ocaml.test(), hm.ocaml.fmt()) + cmds = _cmds(p) + assert any("dune build" in c for c in cmds) + assert any("dune runtest" in c for c in cmds) + + +def test_ocaml_with_base_skips_apt(): + base = hm.scratch().sh("custom base", label="base") + o = hm.ocaml(path="svc", base=base) + p = hm.pipeline(o.build(), default_image="ubuntu:24.04") + assert not any("apt-get install" in c for c in _cmds(p)) diff --git a/tests/test_perl.py b/tests/test_perl.py new file mode 100644 index 0000000..82ab596 --- /dev/null +++ b/tests/test_perl.py @@ -0,0 +1,62 @@ +"""Perl toolchain tests.""" +from __future__ import annotations + +import harmont as hm + + +def _cmds(p: dict) -> list[str]: + return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + + +def _step_by_substring(p: dict, needle: str) -> dict: + for s in p["steps"]: + if s.get("type") == "command" and needle in (s.get("cmd") or ""): + return s + raise AssertionError(needle) + + +def test_perl_object_form_full_chain(): + pl = hm.perl(path="svc") + p = hm.pipeline(pl.test(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert any("perl" in c and "cpanminus" in c for c in cmds) + assert any("cd svc && cpanm --installdeps" in c for c in cmds) + assert any("cd svc && prove -lv t/" in c for c in cmds) + + +def test_perl_actions_share_install(): + pl = hm.perl(path="svc") + p = hm.pipeline(pl.test(), pl.lint(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert len([c for c in cmds if "cpanminus" in c]) == 1 + assert len([c for c in cmds if "cpanm --installdeps" in c]) == 1 + assert any("prove -lv t/" in c for c in cmds) + assert any("perlcritic" in c for c in cmds) + + +def test_perl_cpanm_cached_on_cpanfile(): + pl = hm.perl(path="svc") + p = hm.pipeline(pl.test()) + deps = _step_by_substring(p, "cpanm --installdeps") + assert deps["cache"]["policy"] == "on_change" + assert "svc/cpanfile" in deps["cache"]["paths"] + + +def test_perl_action_labels_auto_generated(): + pl = hm.perl(path=".") + assert pl.test().label == ":perl: test" + assert pl.lint().label == ":perl: lint" + + +def test_perl_bare_form_actions(): + p = hm.pipeline(hm.perl.test(), hm.perl.lint()) + cmds = _cmds(p) + assert any("prove" in c for c in cmds) + assert any("perlcritic" in c for c in cmds) + + +def test_perl_with_base_skips_apt(): + base = hm.scratch().sh("custom base", label="base") + pl = hm.perl(path="svc", base=base) + p = hm.pipeline(pl.test(), default_image="ubuntu:24.04") + assert not any("apt-get install" in c for c in _cmds(p)) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py new file mode 100644 index 0000000..ed7be7c --- /dev/null +++ b/tests/test_pipeline.py @@ -0,0 +1,36 @@ +"""High-level pipeline-factory tests. Lowering details live in +test_pipeline_lowering.py; this file only covers the public factory.""" + +from __future__ import annotations + +import pytest + +from harmont import pipeline, scratch + + +def test_pipeline_returns_v2_dict(): + p = pipeline(scratch().sh("echo", label="echo")) + assert p["version"] == "0" + assert isinstance(p["steps"], list) + assert len(p["steps"]) == 1 + + +def test_pipeline_factory_rejects_no_leaves(): + # `harmont.pipeline` (re-exported) is a polymorphic facade: no-arg + # call routes to the @hm.pipeline decorator path. The factory's + # "at least one leaf" guard is tested via the submodule directly. + from harmont.pipeline import pipeline as _factory + + with pytest.raises(ValueError, match="at least one leaf"): + _factory() + + +def test_pipeline_default_image_lowers_to_dict(): + p = pipeline( + scratch().sh("echo", label="a", image="ubuntu:24.04"), + default_image="alpine:3.20", + ) + assert p["default_image"] == "alpine:3.20" + step = p["steps"][0] + assert step["image"] == "ubuntu:24.04" + assert step["label"] == "a" diff --git a/tests/test_pipeline_fixtures.py b/tests/test_pipeline_fixtures.py new file mode 100644 index 0000000..8d915ab --- /dev/null +++ b/tests/test_pipeline_fixtures.py @@ -0,0 +1,83 @@ +"""@hm.pipeline fixture-style param resolution (HAR-28 follow-up).""" +from __future__ import annotations + +import json + +import pytest + +import harmont as hm +from harmont._registry import clear_registry +from harmont._target import clear_target_cache + + +@pytest.fixture(autouse=True) +def _reset(): + clear_registry() + clear_target_cache() + yield + clear_registry() + clear_target_cache() + + +def test_zero_param_pipeline_still_works(): + @hm.pipeline("ci") + def ci() -> hm.Step: + return hm.sh("echo hi") + + out = json.loads(hm.dump_registry_json()) + steps = out["pipelines"][0]["definition"]["steps"] + assert any(s.get("cmd") == "echo hi" for s in steps) + + +def test_pipeline_receives_target_as_param(): + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("apt-get update") + + @hm.pipeline("ci") + def ci(apt_base: hm.Target[hm.Step]) -> hm.Step: + return apt_base.sh("smoke") + + out = json.loads(hm.dump_registry_json()) + steps = out["pipelines"][0]["definition"]["steps"] + cmds = [s.get("cmd") for s in steps] + assert "apt-get update" in cmds + assert "smoke" in cmds + + +def test_pipeline_multi_param_composes_targets(): + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("apt-get update") + + @hm.target() + def api(apt_base: hm.Target[hm.Step]) -> hm.Step: + return apt_base.sh("cabal build") + + @hm.target() + def py_test(apt_base: hm.Target[hm.Step]) -> hm.Step: + return apt_base.sh("pytest") + + @hm.pipeline("ci") + def ci( + api: hm.Target[hm.Step], + py_test: hm.Target[hm.Step], + ) -> tuple[hm.Step, ...]: + return (api, py_test) + + out = json.loads(hm.dump_registry_json()) + steps = out["pipelines"][0]["definition"]["steps"] + apt = [s for s in steps if s.get("cmd") == "apt-get update"] + assert len(apt) == 1 # apt_base deduped via target memoization + cmds = sorted(s.get("cmd") for s in steps if s.get("type") == "command") + assert "cabal build" in cmds + assert "pytest" in cmds + + +def test_pipeline_with_unknown_param_raises(): + @hm.pipeline("ci") + def ci(no_such_target: hm.Target[hm.Step]) -> hm.Step: + return hm.sh("never reached") + + with pytest.raises(TypeError, match="target 'no_such_target' not found"): + hm.dump_registry_json() diff --git a/tests/test_pipeline_lowering.py b/tests/test_pipeline_lowering.py new file mode 100644 index 0000000..585b650 --- /dev/null +++ b/tests/test_pipeline_lowering.py @@ -0,0 +1,124 @@ +"""Lowering: walk leaves back to scratch, topo-sort, emit JSON-shaped dicts. + +The lowering pass returns intermediate Python dicts (the same shape +the JSON IR will have, before the codegen pass produces Scheme). This +test asserts on that intermediate, not on Scheme strings — Scheme +output is covered by test_codegen.py. +""" + +from __future__ import annotations + +import pytest + +from harmont._step import scratch, wait +from harmont.pipeline import _lower_to_dicts, pipeline + + +def test_single_chain_emits_three_command_dicts_in_parent_order(): + a = scratch().sh("step a", label="a") + b = a.sh("step b", label="b") + c = b.sh("step c", label="c") + dicts = _lower_to_dicts([c]) + assert [d["type"] for d in dicts] == ["command", "command", "command"] + assert [d["key"] for d in dicts] == ["a", "b", "c"] + assert dicts[0]["builds_in"] is None + assert dicts[1]["builds_in"] == "a" + assert dicts[2]["builds_in"] == "b" + + +def test_fork_node_is_not_emitted_children_inherit_grandparent(): + base = scratch().sh("install", label="install") + branch = base.fork(label="branch-a") + leaf = branch.sh("test", label="test") + dicts = _lower_to_dicts([leaf]) + keys = [d["key"] for d in dicts] + parents = {d["key"]: d["builds_in"] for d in dicts} + assert keys == ["install", "test"] + assert parents["install"] is None + assert parents["test"] == "install" + + +def test_two_branches_share_parent_key(): + base = scratch().sh("install", label="install") + a = base.fork(label="a").sh("test-a", label="test-a") + b = base.fork(label="b").sh("test-b", label="test-b") + dicts = _lower_to_dicts([a, b]) + parents = {d["key"]: d["builds_in"] for d in dicts} + assert parents["test-a"] == "install" + assert parents["test-b"] == "install" + + +def test_wait_step_emitted_in_position(): + a = scratch().sh("a", label="a") + b = scratch().sh("b", label="b") + c = scratch().sh("c", label="c") + dicts = _lower_to_dicts([a, b, wait(), c]) + types = [d["type"] for d in dicts] + assert "wait" in types + wait_idx = types.index("wait") + keys_before = [d["key"] for d in dicts[:wait_idx]] + keys_after = [d["key"] for d in dicts[wait_idx + 1 :]] + assert "a" in keys_before + assert "b" in keys_before + assert "c" in keys_after + + +def test_wait_continue_on_failure_carried_through(): + a = scratch().sh("a", label="a") + dicts = _lower_to_dicts([a, wait(continue_on_failure=True)]) + wait_dict = next(d for d in dicts if d["type"] == "wait") + assert wait_dict["continue_on_failure"] is True + + +def test_command_includes_label_env_timeout_when_set(): + s = scratch().sh( + "make", + label="build", + env={"CI": "true"}, + timeout_seconds=600, + ) + dicts = _lower_to_dicts([s]) + assert dicts[0]["label"] == "build" + assert dicts[0]["env"] == {"CI": "true"} + assert dicts[0]["timeout_seconds"] == 600 + + +def test_command_omits_optional_fields_when_unset(): + s = scratch().sh("make") + d = _lower_to_dicts([s])[0] + # Required fields present. + assert d["type"] == "command" + assert "key" in d + assert "cmd" in d + assert "builds_in" in d + # Optional fields omitted (not None) when unset. + assert "label" not in d + assert "env" not in d + assert "timeout_seconds" not in d + assert "cache" not in d + + +def test_pipeline_factory_collects_reachable_via_parent(): + base = scratch().sh("install", label="install") + leaf_a = base.fork(label="a").sh("test-a", label="test-a") + leaf_b = base.fork(label="b").sh("test-b", label="test-b") + p = pipeline(leaf_a, leaf_b, env={"CI": "true"}) + keys = [s["key"] for s in p["steps"]] + assert set(keys) == {"install", "test-a", "test-b"} + assert p["env"] == {"CI": "true"} + assert p["version"] == "0" + + +def test_pipeline_with_no_leaves_raises(): + with pytest.raises(ValueError, match="at least one leaf"): + pipeline() + + +def test_dedup_when_step_reachable_from_multiple_leaves(): + base = scratch().sh("install", label="install") + a = base.sh("a", label="a") + b = base.sh("b", label="b") + p = pipeline(a, b) + keys = [s["key"] for s in p["steps"]] + # `install` appears once even though it's reachable from both leaves. + assert keys.count("install") == 1 diff --git a/tests/test_python.py b/tests/test_python.py new file mode 100644 index 0000000..0a86c63 --- /dev/null +++ b/tests/test_python.py @@ -0,0 +1,133 @@ +"""Python (uv) toolchain abstraction tests.""" +from __future__ import annotations + +import pytest + +import harmont as hm +from harmont.cache import CacheOnChange + + +def _cmds(p: dict) -> list[str]: + return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + + +def _step_by_substring(p: dict, needle: str) -> dict: + for s in p["steps"]: + if s.get("type") == "command" and needle in (s.get("cmd") or ""): + return s + msg = f"no command step containing {needle!r}" + raise AssertionError(msg) + + +def test_python_object_form_full_chain(): + py = hm.python(path="svc") + p = hm.pipeline(py.test(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert any("apt-get install" in c for c in cmds) + assert any("astral.sh/uv/install.sh" in c for c in cmds) + assert any("cd svc && uv sync" in c for c in cmds) + assert any("cd svc && uv run pytest" in c for c in cmds) + + +def test_python_actions_share_install_step(): + py = hm.python(path="svc") + p = hm.pipeline(py.test(), py.lint(), py.fmt(), py.typecheck(), + default_image="ubuntu:24.04") + cmds = _cmds(p) + assert len([c for c in cmds if "astral.sh/uv/install.sh" in c]) == 1 + assert len([c for c in cmds if "apt-get install" in c]) == 1 + assert any("uv run pytest" in c for c in cmds) + assert any("uv run ruff check" in c for c in cmds) + assert any("uv run ruff format --check" in c for c in cmds) + assert any("uv run mypy" in c for c in cmds) + + +def test_python_sync_cached_on_change_of_lockfile(): + py = hm.python(path="svc") + p = hm.pipeline(py.test()) + sync = _step_by_substring(p, "uv sync") + assert sync["cache"]["policy"] == "on_change" + assert "svc/uv.lock" in sync["cache"]["paths"] + assert "svc/pyproject.toml" in sync["cache"]["paths"] + + +def test_python_install_cache_forever(): + py = hm.python(path=".") + p = hm.pipeline(py.test()) + install = _step_by_substring(p, "astral.sh/uv/install.sh") + assert install["cache"]["policy"] == "forever" + + +def test_python_bare_form_test(): + p = hm.pipeline(hm.python.test()) + cmds = _cmds(p) + assert any("cd . && uv run pytest" in c for c in cmds) + + +def test_python_bare_form_all_actions(): + p = hm.pipeline(hm.python.test(), hm.python.lint(), + hm.python.fmt(), hm.python.typecheck()) + cmds = _cmds(p) + assert any("pytest" in c for c in cmds) + assert any("ruff check" in c for c in cmds) + assert any("ruff format --check" in c for c in cmds) + assert any("mypy" in c for c in cmds) + + +def test_python_action_labels_auto_generated(): + py = hm.python(path=".") + assert py.test().label == ":python: test" + assert py.lint().label == ":python: lint" + assert py.fmt().label == ":python: fmt" + assert py.typecheck().label == ":python: typecheck" + + +def test_python_action_label_override(): + py = hm.python(path=".") + assert py.test(label=":python: smoke").label == ":python: smoke" + + +def test_python_action_cache_forwarded(): + py = hm.python(path=".") + s = py.test(cache=CacheOnChange(paths=("pyproject.toml",))) + assert s.cache == CacheOnChange(paths=("pyproject.toml",)) + + +def test_python_image_emitted_on_apt_step(): + py = hm.python(path=".", image="ubuntu:24.04") + p = hm.pipeline(py.test()) + apt = _step_by_substring(p, "apt-get install") + assert apt.get("image") == "ubuntu:24.04" + + +def test_python_with_base_skips_apt(): + base = hm.scratch().sh("custom base", label="base") + py = hm.python(path="svc", base=base) + p = hm.pipeline(py.test(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert not any("apt-get install" in c for c in cmds) + assert any("custom base" in c for c in cmds) + assert any("astral.sh/uv/install.sh" in c for c in cmds) + + +def test_python_installed_escape_hatch_chains(): + py = hm.python(path="svc") + custom = py.installed.sh( + "cd svc && uv run python -m mytool", + label=":python: custom", + ) + p = hm.pipeline(custom) + cmds = _cmds(p) + assert any("mytool" in c for c in cmds) + + +def test_python_uv_version_in_install_cmd(): + py = hm.python(path=".", uv_version="0.4.18") + p = hm.pipeline(py.test()) + install = _step_by_substring(p, "astral.sh/uv/install.sh") + assert "UV_VERSION=0.4.18" in install["cmd"] + + +def test_python_invalid_uv_version_rejected(): + with pytest.raises(ValueError, match="uv_version"): + hm.python(uv_version="not a valid; version") diff --git a/tests/test_registry.py b/tests/test_registry.py new file mode 100644 index 0000000..54f45a4 --- /dev/null +++ b/tests/test_registry.py @@ -0,0 +1,81 @@ +"""Module-level pipeline registry.""" + +import pytest + +from harmont._registry import ( + REGISTRATIONS, + PipelineRegistration, + clear_registry, + register, +) + + +@pytest.fixture(autouse=True) +def _reset_registry(): + clear_registry() + yield + clear_registry() + + +def test_empty_registry(): + assert REGISTRATIONS == [] + + +def test_register_appends(): + reg = PipelineRegistration( + slug="ci", + name="CI", + triggers=(), + allow_manual=True, + env=None, + default_image=None, + fn=lambda: None, + ) + register(reg) + assert [reg] == REGISTRATIONS + + +def test_register_duplicate_slug_raises(): + fn = lambda: None # noqa: E731 — intentional inline stub per HAR-9 Task 1.2 plan + register( + PipelineRegistration( + slug="ci", + name="CI", + triggers=(), + allow_manual=True, + env=None, + default_image=None, + fn=fn, + ) + ) + with pytest.raises(ValueError, match="duplicate pipeline slug") as excinfo: + register( + PipelineRegistration( + slug="ci", + name="CI", + triggers=(), + allow_manual=True, + env=None, + default_image=None, + fn=fn, + ) + ) + assert "duplicate pipeline slug" in str(excinfo.value) + assert "ci" in str(excinfo.value) + + +def test_clear_resets(): + fn = lambda: None # noqa: E731 — intentional inline stub per HAR-9 Task 1.2 plan + register( + PipelineRegistration( + slug="ci", + name="CI", + triggers=(), + allow_manual=True, + env=None, + default_image=None, + fn=fn, + ) + ) + clear_registry() + assert REGISTRATIONS == [] diff --git a/tests/test_ruby.py b/tests/test_ruby.py new file mode 100644 index 0000000..8f63c92 --- /dev/null +++ b/tests/test_ruby.py @@ -0,0 +1,74 @@ +"""Ruby toolchain tests.""" +from __future__ import annotations + +import pytest + +import harmont as hm + + +def _cmds(p: dict) -> list[str]: + return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + + +def _step_by_substring(p: dict, needle: str) -> dict: + for s in p["steps"]: + if s.get("type") == "command" and needle in (s.get("cmd") or ""): + return s + raise AssertionError(needle) + + +def test_ruby_object_form_full_chain(): + rb = hm.ruby(path="svc") + p = hm.pipeline(rb.test(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert any("ruby-full" in c for c in cmds) + assert any("gem install bundler" in c for c in cmds) + assert any("cd svc && bundle install" in c for c in cmds) + assert any("cd svc && bundle exec rspec" in c for c in cmds) + + +def test_ruby_actions_share_install(): + rb = hm.ruby(path="svc") + p = hm.pipeline(rb.test(), rb.lint(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert len([c for c in cmds if "ruby-full" in c]) == 1 + assert any("bundle exec rspec" in c for c in cmds) + assert any("bundle exec rubocop" in c for c in cmds) + + +def test_ruby_bundle_install_cached_on_lockfile(): + rb = hm.ruby(path="svc") + p = hm.pipeline(rb.test()) + bundle = _step_by_substring(p, "bundle install") + assert bundle["cache"]["policy"] == "on_change" + assert "svc/Gemfile.lock" in bundle["cache"]["paths"] + + +def test_ruby_action_labels_auto_generated(): + rb = hm.ruby(path=".") + assert rb.test().label == ":ruby: test" + assert rb.lint().label == ":ruby: lint" + + +def test_ruby_bare_form_actions(): + p = hm.pipeline(hm.ruby.test(), hm.ruby.lint()) + cmds = _cmds(p) + assert any("rspec" in c for c in cmds) + assert any("rubocop" in c for c in cmds) + + +def test_ruby_invalid_version_rejected(): + with pytest.raises(ValueError, match="version"): + hm.ruby(version="bogus; oops") + + +def test_ruby_pinned_version_not_yet_supported(): + with pytest.raises(NotImplementedError, match="not yet wired in"): + hm.ruby(version="3.2.2") + + +def test_ruby_with_base_skips_apt(): + base = hm.scratch().sh("custom base", label="base") + rb = hm.ruby(path="svc", base=base) + p = hm.pipeline(rb.test(), default_image="ubuntu:24.04") + assert not any("ruby-full" in c for c in _cmds(p)) diff --git a/tests/test_rust.py b/tests/test_rust.py new file mode 100644 index 0000000..0883cfd --- /dev/null +++ b/tests/test_rust.py @@ -0,0 +1,168 @@ +"""Rust toolchain abstraction tests.""" +from __future__ import annotations + +import pytest + +import harmont as hm +from harmont.cache import CacheOnChange + + +def _cmds(p: dict) -> list[str]: + return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + + +def _step_by_substring(p: dict, needle: str) -> dict: + for s in p["steps"]: + if s.get("type") == "command" and needle in (s.get("cmd") or ""): + return s + msg = f"no command step containing {needle!r}" + raise AssertionError(msg) + + +def test_rust_object_form_full_chain(): + rust = hm.rust(path="cli") + p = hm.pipeline(rust.build(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert any("apt-get install" in c for c in cmds) + assert any("sh.rustup.rs" in c for c in cmds) + assert any("cd cli && cargo build" in c for c in cmds) + + +def test_rust_actions_share_install_step(): + rust = hm.rust(path="cli") + p = hm.pipeline(rust.build(), rust.test(), rust.clippy(), rust.fmt(), rust.doc(), + default_image="ubuntu:24.04") + cmds = _cmds(p) + assert len([c for c in cmds if "sh.rustup.rs" in c]) == 1 + assert len([c for c in cmds if "apt-get install" in c]) == 1 + assert any("cargo build" in c for c in cmds) + assert any("cargo test" in c for c in cmds) + assert any("cargo clippy --all-targets -- -D warnings" in c for c in cmds) + assert any("cargo fmt --check" in c for c in cmds) + assert any("cargo doc --no-deps" in c for c in cmds) + + +def test_rust_build_release_flag(): + rust = hm.rust(path=".") + s = rust.build(release=True) + assert s.cmd is not None + assert "cargo build --release" in s.cmd + + +def test_rust_test_release_flag(): + rust = hm.rust(path=".") + s = rust.test(release=True) + assert s.cmd is not None + assert "cargo test --release" in s.cmd + + +def test_rust_rustup_cache_forever(): + rust = hm.rust(path="cli") + p = hm.pipeline(rust.build()) + rustup = _step_by_substring(p, "sh.rustup.rs") + assert rustup["cache"]["policy"] == "forever" + + +def test_rust_default_components(): + rust = hm.rust(path=".") + p = hm.pipeline(rust.build()) + rustup = _step_by_substring(p, "sh.rustup.rs") + assert "--component clippy,rustfmt" in rustup["cmd"] + + +def test_rust_components_override(): + rust = hm.rust(path=".", components=("clippy",)) + p = hm.pipeline(rust.build()) + rustup = _step_by_substring(p, "sh.rustup.rs") + assert "--component clippy" in rustup["cmd"] + assert "rustfmt" not in rustup["cmd"] + + +def test_rust_version_in_rustup_cmd(): + rust = hm.rust(path=".", version="1.81.0") + p = hm.pipeline(rust.build()) + rustup = _step_by_substring(p, "sh.rustup.rs") + assert "--default-toolchain 1.81.0" in rustup["cmd"] + + +def test_rust_invalid_version_rejected(): + with pytest.raises(ValueError, match="version"): + hm.rust(version="not a valid; version") + + +def test_rust_installed_escape_hatch_chains(): + rust = hm.rust(path="cli") + custom = rust.installed.sh( + "cd cli && cargo build --release --features foo", + label=":rust: custom", + ) + p = hm.pipeline(custom) + cmds = _cmds(p) + assert any("--features foo" in c for c in cmds) + + +def test_rust_action_labels_auto_generated(): + rust = hm.rust(path=".") + assert rust.build().label == ":rust: build" + assert rust.test().label == ":rust: test" + assert rust.clippy().label == ":rust: clippy" + assert rust.fmt().label == ":rust: fmt" + assert rust.doc().label == ":rust: doc" + + +def test_rust_action_label_override(): + rust = hm.rust(path=".") + s = rust.build(label=":rust: dev build") + assert s.label == ":rust: dev build" + + +def test_rust_action_cache_forwarded(): + rust = hm.rust(path=".") + s = rust.build(cache=CacheOnChange(paths=("Cargo.lock",))) + assert s.cache == CacheOnChange(paths=("Cargo.lock",)) + + +def test_rust_image_emitted_on_apt_step(): + rust = hm.rust(path=".", image="alpine:3.20") + p = hm.pipeline(rust.build()) + apt = _step_by_substring(p, "apt-get install") + assert apt.get("image") == "alpine:3.20" + + +def test_rust_with_base_skips_apt(): + base = hm.scratch().sh("custom base", label="base") + rust = hm.rust(path="cli", base=base) + p = hm.pipeline(rust.build(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert not any("apt-get install" in c for c in cmds) + assert any("custom base" in c for c in cmds) + assert any("sh.rustup.rs" in c for c in cmds) + assert any("cd cli && cargo build" in c for c in cmds) + + +def test_rust_bare_form_build(): + p = hm.pipeline(hm.rust.build()) + cmds = _cmds(p) + assert any("cd . && cargo build" in c for c in cmds) + + +def test_rust_bare_form_all_actions(): + p = hm.pipeline(hm.rust.build(), hm.rust.test(), hm.rust.clippy(), + hm.rust.fmt(), hm.rust.doc()) + cmds = _cmds(p) + assert any("cargo build" in c for c in cmds) + assert any("cargo test" in c for c in cmds) + assert any("cargo clippy" in c for c in cmds) + assert any("cargo fmt --check" in c for c in cmds) + assert any("cargo doc --no-deps" in c for c in cmds) + + +def test_rust_bare_form_accepts_path_kwarg(): + p = hm.pipeline(hm.rust.test(path="cli")) + cmds = _cmds(p) + assert any("cd cli && cargo test" in c for c in cmds) + + +def test_rust_bare_form_forwards_action_kwargs(): + s = hm.rust.build(path="cli", label=":rust: custom") + assert s.label == ":rust: custom" diff --git a/tests/test_sh_shorthand.py b/tests/test_sh_shorthand.py new file mode 100644 index 0000000..6526d2f --- /dev/null +++ b/tests/test_sh_shorthand.py @@ -0,0 +1,44 @@ +"""hm.sh top-level shorthand (HAR-28).""" +from __future__ import annotations + +import harmont as hm +from harmont.cache import CacheNone + + +def test_hm_sh_returns_step_rooted_at_scratch(): + s = hm.sh("apt-get update") + assert isinstance(s, hm.Step) + assert s.parent is not None + assert s.parent.cmd is None + assert s.parent.parent is None + assert s.cmd == "apt-get update" + + +def test_hm_sh_chains_with_sh(): + s = hm.sh("apt-get update").sh("apt-get install -y python3") + assert s.cmd == "apt-get install -y python3" + assert s.parent is not None + assert s.parent.cmd == "apt-get update" + + +def test_hm_sh_accepts_all_step_sh_kwargs(): + s = hm.sh( + "make", + label="build", + cache=CacheNone(), + env={"CI": "true"}, + timeout_seconds=600, + image="alpine:3.20", + key="explicit", + ) + assert s.label == "build" + assert s.cache == CacheNone() + assert s.env == {"CI": "true"} + assert s.timeout_seconds == 600 + assert s.image == "alpine:3.20" + assert s.key_override == "explicit" + + +def test_hm_sh_cwd_kwarg(): + s = hm.sh("pytest -v", cwd="cidsl/py") + assert s.cmd == "cd cidsl/py && pytest -v" diff --git a/tests/test_step_chain.py b/tests/test_step_chain.py new file mode 100644 index 0000000..5e1ffcc --- /dev/null +++ b/tests/test_step_chain.py @@ -0,0 +1,90 @@ +"""Pure mechanics of the chain DSL — no codegen, no JSON.""" + +from __future__ import annotations + +from dataclasses import FrozenInstanceError + +import pytest + +from harmont._step import scratch, wait +from harmont.cache import CacheNone + + +def test_scratch_has_no_parent_no_cmd(): + s = scratch() + assert s.parent is None + assert s.cmd is None + assert s.is_wait is False + + +def test_sh_links_parent_and_sets_cmd(): + parent = scratch() + child = parent.sh("echo hi") + assert child.parent is parent + assert child.cmd == "echo hi" + assert child.is_wait is False + + +def test_sh_returns_new_instance_parent_unchanged(): + parent = scratch() + parent.sh("a") + parent.sh("b") + # parent must be untouched (frozen dataclass) + assert parent.parent is None + assert parent.cmd is None + + +def test_fork_makes_branded_passthrough(): + parent = scratch().sh("install") + branch = parent.fork(label="branch-a") + assert branch.parent is parent + assert branch.cmd is None + assert branch.label == "branch-a" + assert branch.is_wait is False + + +def test_fork_can_be_called_many_times_off_same_parent(): + parent = scratch().sh("install") + a = parent.fork(label="a") + b = parent.fork(label="b") + c = parent.fork() + assert {a.label, b.label, c.label} == {"a", "b", None} + assert a.parent is parent + assert b.parent is parent + assert c.parent is parent + + +def test_sh_kwargs_carried_through(): + s = scratch().sh( + "make", + label="build", + cache=CacheNone(), + env={"CI": "true"}, + timeout_seconds=600, + key="explicit-key", + ) + assert s.label == "build" + assert s.cache == CacheNone() + assert s.env == {"CI": "true"} + assert s.timeout_seconds == 600 + assert s.key_override == "explicit-key" + + +def test_step_is_frozen(): + s = scratch() + with pytest.raises(FrozenInstanceError): + s.cmd = "mutated" # type: ignore[misc] + + +def test_wait_has_no_cmd_no_parent_and_is_wait_true(): + w = wait() + assert w.parent is None + assert w.cmd is None + assert w.is_wait is True + + +def test_wait_continue_on_failure_recorded(): + w_default = wait() + w_continue = wait(continue_on_failure=True) + assert w_default.continue_on_failure is False + assert w_continue.continue_on_failure is True diff --git a/tests/test_step_sh.py b/tests/test_step_sh.py new file mode 100644 index 0000000..9b1dd0c --- /dev/null +++ b/tests/test_step_sh.py @@ -0,0 +1,86 @@ +"""Step.sh chain method + cwd= kwarg (HAR-28).""" +from __future__ import annotations + +from harmont._step import scratch +from harmont.cache import CacheNone + + +def test_sh_links_parent_and_sets_cmd(): + parent = scratch() + child = parent.sh("echo hi") + assert child.parent is parent + assert child.cmd == "echo hi" + assert child.is_wait is False + + +def test_sh_carries_all_kwargs(): + s = scratch().sh( + "make", + label="build", + cache=CacheNone(), + env={"CI": "true"}, + timeout_seconds=600, + key="explicit-key", + ) + assert s.label == "build" + assert s.cache == CacheNone() + assert s.env == {"CI": "true"} + assert s.timeout_seconds == 600 + assert s.key_override == "explicit-key" + + +def test_sh_cwd_prepends_cd(): + s = scratch().sh("pytest -v", cwd="cidsl/py") + assert s.cmd == "cd cidsl/py && pytest -v" + + +def test_sh_cwd_none_leaves_cmd_unchanged(): + s = scratch().sh("echo hi", cwd=None) + assert s.cmd == "echo hi" + + +def test_sh_cwd_empty_string_is_rejected(): + import pytest + + with pytest.raises(ValueError, match="hm: cwd must be a non-empty path"): + scratch().sh("echo", cwd="") + + +def test_sh_inherits_image_from_scratch_parent(): + """A scratch root with image= set propagates to its first .sh() child.""" + from harmont._step import Step + + root = Step(image="ubuntu-24.04") # scratch with image + child = root.sh("apt-get update") + assert child.image == "ubuntu-24.04" + + +def test_sh_image_inheritance_does_not_apply_to_grandchildren(): + """The inheritance is narrow: only scratch → first child. Subsequent + .sh() calls don't inherit from a non-scratch parent.""" + from harmont._step import Step + + root = Step(image="ubuntu-24.04") + first = root.sh("a") + second = first.sh("b") + assert first.image == "ubuntu-24.04" + assert second.image is None # parent has cmd, doesn't propagate + + +def test_sh_explicit_image_overrides_scratch_inheritance(): + """If the caller passes image= explicitly, it wins over inheritance.""" + from harmont._step import Step + + root = Step(image="ubuntu-24.04") + child = root.sh("a", image="alpine:3.20") + assert child.image == "alpine:3.20" + + +def test_sh_scratch_without_image_remains_none(): + """The existing scratch().sh() pattern is unchanged.""" + from harmont._step import scratch + + s = scratch().sh("echo") + assert s.image is None + + diff --git a/tests/test_strict_signature.py b/tests/test_strict_signature.py new file mode 100644 index 0000000..c5948da --- /dev/null +++ b/tests/test_strict_signature.py @@ -0,0 +1,129 @@ +"""Strict signature validation + Annotated-marker dispatch (HAR-28 follow-up).""" +from __future__ import annotations + +from typing import Annotated + +import pytest + +import harmont as hm +from harmont._deps import ( + clear_target_names, + register_named_target, + resolve_deps, + validate_target_signature, +) +from harmont._step import Step + + +@pytest.fixture(autouse=True) +def _reset(): + clear_target_names() + yield + clear_target_names() + + +def test_target_marker_resolves_via_registry(): + register_named_target("apt_base", lambda: Step(cmd="apt-get update")) + + def fn(apt_base: hm.Target[Step]) -> Step: # type: ignore[empty-body] + ... + + kwargs = resolve_deps(fn) + assert isinstance(kwargs["apt_base"], Step) + assert kwargs["apt_base"].cmd == "apt-get update" + + +def test_target_marker_missing_target_raises(): + def fn(missing: hm.Target[Step]) -> Step: # type: ignore[empty-body] + ... + + with pytest.raises(TypeError, match="hm: target 'missing' not found"): + resolve_deps(fn) + + +def test_base_image_marker_injects_scratch_step_with_image(): + def fn(base: Annotated[Step, hm.BaseImage("ubuntu-24.04")]) -> Step: # type: ignore[empty-body] + ... + + kwargs = resolve_deps(fn) + base = kwargs["base"] + assert isinstance(base, Step) + assert base.parent is None + assert base.cmd is None + assert base.image == "ubuntu-24.04" + + +def test_base_image_then_sh_emits_step_with_image(): + """End-to-end: BaseImage param → .sh() → first emitted cmd step carries image.""" + def fn(base: Annotated[Step, hm.BaseImage("ubuntu-24.04")]) -> Step: # type: ignore[empty-body] + ... + + base = resolve_deps(fn)["base"] + first = base.sh("apt-get update") + assert first.image == "ubuntu-24.04" + + +def test_unannotated_param_with_no_default_is_strict_error(): + def fn(x) -> Step: # type: ignore[empty-body, no-untyped-def] + ... + + with pytest.raises(TypeError, match="hm: parameter 'x' has no marker"): + validate_target_signature(fn) + + +def test_plain_annotation_no_marker_no_default_is_strict_error(): + def fn(x: int) -> Step: # type: ignore[empty-body] + ... + + with pytest.raises(TypeError, match="hm: parameter 'x' has no marker"): + validate_target_signature(fn) + + +def test_plain_param_with_default_is_allowed(): + def fn(image_tag: str = "ubuntu:24.04") -> Step: # type: ignore[empty-body] + ... + + validate_target_signature(fn) # no raise + assert resolve_deps(fn) == {"image_tag": "ubuntu:24.04"} + + +def test_validate_signature_rejects_var_args(): + def fn(*args) -> Step: # type: ignore[empty-body, no-untyped-def] + ... + + with pytest.raises(TypeError, match="hm: target functions cannot take \\*args"): + validate_target_signature(fn) + + +def test_validate_signature_rejects_var_kwargs(): + def fn(**kwargs) -> Step: # type: ignore[empty-body, no-untyped-def] + ... + + with pytest.raises(TypeError, match="hm: target functions cannot take \\*\\*kwargs"): + validate_target_signature(fn) + + +def test_validate_signature_rejects_positional_only(): + def fn(x: hm.Target[Step], /) -> Step: # type: ignore[empty-body] + ... + + with pytest.raises(TypeError, match="hm: target functions cannot have positional-only"): + validate_target_signature(fn) + + +def test_zero_param_fn_is_valid(): + def fn() -> Step: # type: ignore[empty-body] + ... + + validate_target_signature(fn) # no raise + assert resolve_deps(fn) == {} + + +def test_target_marker_strict_no_default_fallback(): + """Even with a default, Target marker requires the target to exist.""" + def fn(api: hm.Target[Step] = None) -> Step: # type: ignore[assignment,empty-body] + ... + + # The default annotation is parsed but Target is strict — must resolve. + with pytest.raises(TypeError, match="hm: target 'api' not found"): + resolve_deps(fn) diff --git a/tests/test_target.py b/tests/test_target.py new file mode 100644 index 0000000..93e30ee --- /dev/null +++ b/tests/test_target.py @@ -0,0 +1,109 @@ +"""@hm.target() decorator — memoization + composition (HAR-28).""" +from __future__ import annotations + +import pytest + +import harmont as hm +from harmont._deps import clear_target_names +from harmont._target import clear_target_cache + + +@pytest.fixture(autouse=True) +def _reset_target_cache(): + clear_target_cache() + clear_target_names() + yield + clear_target_cache() + clear_target_names() + + +def test_target_returns_function_unchanged_in_signature(): + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("apt-get update") + + # callable with no args, returns a Step + result = apt_base() + assert isinstance(result, hm.Step) + assert result.cmd == "apt-get update" + + +def test_target_memoizes_within_one_render(): + call_count = 0 + + @hm.target() + def apt_base() -> hm.Step: + nonlocal call_count + call_count += 1 + return hm.sh("apt-get update") + + a = apt_base() + b = apt_base() + assert a is b + assert call_count == 1 + + +def test_clear_target_cache_resets_memoization(): + call_count = 0 + + @hm.target() + def apt_base() -> hm.Step: + nonlocal call_count + call_count += 1 + return hm.sh("apt-get update") + + apt_base() + clear_target_cache() + apt_base() + assert call_count == 2 + + +def test_composition_via_chaining_off_a_target(): + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("apt-get update") + + @hm.target() + def venv() -> hm.Step: + return apt_base().sh("python3 -m venv .venv") + + @hm.target() + def api() -> hm.Step: + return apt_base().sh("cabal build") + + v = venv() + a = api() + # Both targets chained off the SAME apt-base step (memoized). + assert v.parent is a.parent + assert v.parent is not None + assert v.parent.cmd == "apt-get update" + + +def test_target_with_toolchain_return_passes_through(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + (tmp_path / "api").mkdir() + + @hm.target() + def api(): + return hm.haskell(ghc="9.6.7").cabal(path="api") + + from harmont.haskell import HaskellPackage + + result = api() + assert isinstance(result, HaskellPackage) + assert result.path == "api" + + +def test_target_called_inside_pipeline_uses_cached_value(): + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("apt-get update") + + @hm.target() + def venv() -> hm.Step: + return apt_base().sh("venv setup") + + # Direct invocation: same call returns same Step. + v1 = venv() + v2 = venv() + assert v1 is v2 diff --git a/tests/test_target_cross_module.py b/tests/test_target_cross_module.py new file mode 100644 index 0000000..ee7975c --- /dev/null +++ b/tests/test_target_cross_module.py @@ -0,0 +1,73 @@ +"""Cross-module target deps via global registry (HAR-28 follow-up).""" +from __future__ import annotations + +import json + +import pytest + +import harmont as hm +from harmont._registry import clear_registry +from harmont._target import clear_target_cache + + +@pytest.fixture(autouse=True) +def _reset(): + clear_registry() + clear_target_cache() + yield + clear_registry() + clear_target_cache() + + +def test_target_in_module_a_consumed_by_target_in_module_b(): + """Simulate two .harmont/*.py files registering targets in one + envelope render. Module A defines apt_base; module B's target + depends on it by parameter name.""" + # Module A — defines apt_base. + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("apt-get update") + + # Module B — declares apt_base as a param (cross-module by name). + @hm.target() + def py_test(apt_base: hm.Target[hm.Step]) -> hm.Step: + return apt_base.sh("pytest -v", cwd="cidsl/py") + + # Module C — pipeline composes module B's target. + @hm.pipeline("ci") + def ci(py_test: hm.Target[hm.Step]) -> hm.Step: + return py_test + + out = json.loads(hm.dump_registry_json()) + steps = out["pipelines"][0]["definition"]["steps"] + cmds = sorted(s.get("cmd") for s in steps if s.get("type") == "command") + assert "apt-get update" in cmds + assert "cd cidsl/py && pytest -v" in cmds + + +def test_duplicate_name_across_modules_raises(): + """Same target name registered twice (e.g. two modules both define + apt_base) raises at decoration time.""" + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("first") + + with pytest.raises(ValueError, match="duplicate target name 'apt_base'"): + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("second") + + +def test_disambiguate_via_explicit_name(): + """Two modules with same fn name can coexist via name=.""" + @hm.target(name="apt_base_a") + def apt_base() -> hm.Step: + return hm.sh("first") + + @hm.target(name="apt_base_b") + def apt_base() -> hm.Step: # noqa: F811 + return hm.sh("second") + + from harmont._deps import _TARGETS_BY_NAME + assert "apt_base_a" in _TARGETS_BY_NAME + assert "apt_base_b" in _TARGETS_BY_NAME diff --git a/tests/test_target_fixtures.py b/tests/test_target_fixtures.py new file mode 100644 index 0000000..d63b2b6 --- /dev/null +++ b/tests/test_target_fixtures.py @@ -0,0 +1,154 @@ +"""@hm.target fixture-style param resolution (HAR-28 follow-up).""" +from __future__ import annotations + +import pytest + +import harmont as hm +from harmont._deps import clear_target_names +from harmont._target import clear_target_cache + + +@pytest.fixture(autouse=True) +def _reset(): + clear_target_cache() + clear_target_names() + yield + clear_target_cache() + clear_target_names() + + +def test_zero_param_target_still_works(): + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("apt-get update") + + s = apt_base() + assert s.cmd == "apt-get update" + + +def test_target_param_receives_dependency_value(): + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("apt-get update") + + @hm.target() + def venv(apt_base: hm.Target[hm.Step]) -> hm.Step: + return apt_base.sh("python3 -m venv .venv") + + v = venv() + assert v.parent is not None + assert v.parent.cmd == "apt-get update" + + +def test_multi_param_target(): + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("apt-get update") + + @hm.target() + def node_install() -> hm.Step: + return apt_base().sh("curl ... | bash") + + @hm.target() + def project( + apt_base: hm.Target[hm.Step], + node_install: hm.Target[hm.Step], + ): + # Both injected; we just verify both flow through. + return (apt_base, node_install) + + base, node = project() + assert base.cmd == "apt-get update" + assert "curl" in node.cmd + + +def test_param_named_after_unregistered_target_raises(): + @hm.target() + def venv(missing: hm.Target[hm.Step]) -> hm.Step: + return hm.sh("never reached") + + with pytest.raises(TypeError, match="target 'missing' not found"): + venv() + + +def test_duplicate_target_name_raises_at_decoration(): + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("a") + + with pytest.raises(ValueError, match="duplicate target name 'apt_base'"): + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("b") + + +def test_explicit_name_override(): + # name= overrides the default (fn.__name__) registry key. A dash + # in the key is fine because we resolve via the registry directly, + # not via Python identifier rules. + @hm.target(name="apt-base") + def whatever() -> hm.Step: + return hm.sh("apt-get update") + + from harmont._deps import _TARGETS_BY_NAME + assert "apt-base" in _TARGETS_BY_NAME + assert "whatever" not in _TARGETS_BY_NAME + + +def test_default_value_used_when_no_target_registered(): + @hm.target() + def maybe_extra(image_tag: str = "ubuntu:24.04") -> hm.Step: + return hm.sh(f"echo {image_tag}") + + s = maybe_extra() + assert s.cmd == "echo ubuntu:24.04" + + +def test_memoization_still_works_with_params(): + call_count = 0 + + @hm.target() + def apt_base() -> hm.Step: + nonlocal call_count + call_count += 1 + return hm.sh("apt-get update") + + @hm.target() + def venv(apt_base: hm.Target[hm.Step]) -> hm.Step: + return apt_base.sh("v") + + @hm.target() + def api(apt_base: hm.Target[hm.Step]) -> hm.Step: + return apt_base.sh("a") + + v = venv() + a = api() + # apt_base ran once; venv and api share its Step. + assert call_count == 1 + assert v.parent is a.parent + + +def test_cycle_between_two_targets_raises(): + # Hand-construct a cycle: a takes b, b takes a. + @hm.target() + def a(b: hm.Target[hm.Step]) -> hm.Step: + return b.sh("a") + + @hm.target() + def b(a: hm.Target[hm.Step]) -> hm.Step: + return a.sh("b") + + with pytest.raises(RuntimeError, match="dependency cycle"): + a() + + +def test_clear_target_cache_also_clears_name_registry(): + @hm.target() + def apt_base() -> hm.Step: + return hm.sh("a") + + from harmont._deps import _TARGETS_BY_NAME + + assert "apt_base" in _TARGETS_BY_NAME + clear_target_cache() + assert "apt_base" not in _TARGETS_BY_NAME diff --git a/tests/test_target_unwrap.py b/tests/test_target_unwrap.py new file mode 100644 index 0000000..a9e3aad --- /dev/null +++ b/tests/test_target_unwrap.py @@ -0,0 +1,79 @@ +"""as_leaves unwraps toolchain return values to (Step, ...) (HAR-28).""" +from __future__ import annotations + +import pytest + +import harmont as hm +from harmont._step import Step +from harmont._unwrap import as_leaves + + +def test_step_passes_through(): + s = hm.sh("echo hi") + out = as_leaves(s) + assert out == (s,) + + +def test_tuple_of_steps_passes_through(): + a = hm.sh("a") + b = hm.sh("b") + out = as_leaves((a, b)) + assert out == (a, b) + + +def test_list_of_steps_is_normalized_to_tuple(): + a = hm.sh("a") + out = as_leaves([a]) + assert out == (a,) + + +def test_haskell_package_unwraps_to_build(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + (tmp_path / "api").mkdir() + pkg = hm.haskell(ghc="9.6.7").cabal(path="api") + leaves = as_leaves(pkg) + assert len(leaves) == 1 + assert isinstance(leaves[0], Step) + assert "cabal build all" in leaves[0].cmd + + +def test_rust_toolchain_unwraps_to_build(): + tc = hm.rust(path="cli", version="stable") + leaves = as_leaves(tc) + assert len(leaves) == 1 + assert "cargo build" in leaves[0].cmd + + +def test_npm_project_unwraps_to_install(): + proj = hm.npm(path="app", version="20") + leaves = as_leaves(proj) + assert len(leaves) == 1 + # The default leaf is the npm-ci step itself. + assert "npm ci" in leaves[0].cmd + + +def test_elm_project_unwraps_to_make_main(): + proj = hm.elm(path="src") + leaves = as_leaves(proj) + assert len(leaves) == 1 + assert "elm make src/Main.elm" in leaves[0].cmd + + +def test_nested_tuple_is_flattened(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + (tmp_path / "api").mkdir() + pkg = hm.haskell(ghc="9.6.7").cabal(path="api") + a = hm.sh("a") + out = as_leaves((a, pkg, (a, a))) + # Order preserved; pkg unwrapped to its build leaf. + assert len(out) == 4 + + +def test_unknown_type_raises_typeerror(): + with pytest.raises(TypeError, match=r"hm\.target: cannot use"): + as_leaves(42) # type: ignore[arg-type] + + +def test_unknown_type_message_lists_supported_types(): + with pytest.raises(TypeError, match=r"Step.*HaskellPackage.*ElmProject"): + as_leaves("oops") # type: ignore[arg-type] diff --git a/tests/test_toolchain.py b/tests/test_toolchain.py new file mode 100644 index 0000000..25c4195 --- /dev/null +++ b/tests/test_toolchain.py @@ -0,0 +1,108 @@ +"""Shared toolchain helpers — apt-install template and chain builder.""" +from __future__ import annotations + +from datetime import timedelta + +from harmont._step import scratch +from harmont._toolchain import ( + APT_TTL, + apt_install_cmd, + make_install_chain, + node_install_cmd, +) +from harmont.cache import CacheNone, CacheOnChange, CacheTTL + + +def test_apt_install_cmd_runs_update_and_install(): + out = apt_install_cmd(("curl", "git")) + assert "apt-get update" in out + assert "apt-get install -y curl git" in out + + +def test_apt_install_cmd_preserves_package_order(): + out = apt_install_cmd(("a", "b", "c")) + assert "a b c" in out + + +def test_apt_ttl_is_one_day(): + assert timedelta(days=1) == APT_TTL + + +def test_make_install_chain_default_emits_apt_then_tool(): + tool = make_install_chain( + apt_packages=("curl",), + install_cmd="install_tool.sh", + install_cache=CacheOnChange(paths=("lockfile",)), + lang_tag="lang", + install_tag="tool", + image=None, + base=None, + ) + apt = tool.parent + assert apt is not None + assert "apt-get install -y curl" in (apt.cmd or "") + assert apt.label == ":lang: apt-base" + assert isinstance(apt.cache, CacheTTL) + assert apt.cache.duration == APT_TTL + assert tool.cmd == "install_tool.sh" + assert tool.label == ":lang: tool" + assert isinstance(tool.cache, CacheOnChange) + assert tool.cache.paths == ("lockfile",) + + +def test_make_install_chain_with_base_skips_apt(): + base = scratch().sh("custom base", label="base") + tool = make_install_chain( + apt_packages=("curl",), + install_cmd="install.sh", + install_cache=CacheNone(), + lang_tag="lang", + install_tag="tool", + image=None, + base=base, + ) + assert tool.parent is base + assert tool.cmd == "install.sh" + assert tool.label == ":lang: tool" + + +def test_make_install_chain_image_set_on_apt_step_only(): + tool = make_install_chain( + apt_packages=("curl",), + install_cmd="install.sh", + install_cache=CacheNone(), + lang_tag="lang", + install_tag="tool", + image="ubuntu:24.04", + base=None, + ) + apt = tool.parent + assert apt is not None + assert apt.image == "ubuntu:24.04" + assert tool.image is None + + +def test_make_install_chain_image_ignored_with_base(): + base = scratch().sh("base") + tool = make_install_chain( + apt_packages=("curl",), + install_cmd="install.sh", + install_cache=CacheNone(), + lang_tag="lang", + install_tag="tool", + image="ubuntu:24.04", + base=base, + ) + assert tool.parent is base + assert tool.image is None + + +def test_node_install_cmd_setup_major(): + out = node_install_cmd("20") + assert "deb.nodesource.com/setup_20.x" in out + assert "apt-get install -y nodejs" in out + + +def test_node_install_cmd_strips_dot_x_suffix(): + out = node_install_cmd("20.x") + assert "deb.nodesource.com/setup_20.x" in out diff --git a/tests/test_toolchain_compose.py b/tests/test_toolchain_compose.py new file mode 100644 index 0000000..2c69ee1 --- /dev/null +++ b/tests/test_toolchain_compose.py @@ -0,0 +1,82 @@ +"""Cross-cutting toolchain composition tests (HAR-15).""" +from __future__ import annotations + +import harmont as hm + +# Several tests construct `ghc.package("api")`, whose default cache_paths +# globs `api/*.cabal` relative to cwd. The autouse fixture in +# tests/conftest.py pins cwd to the repo root so that glob resolves to +# the real `api/harmont-api.cabal`. + + +def _cmds(p: dict) -> list[str]: + return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + + +def test_stack_npm_on_spec_step(): + """spec -> node install -> npm ci -> codegen. Used by dogfood.""" + spec = hm.scratch().sh("make openapi", label=":lock: spec") + node = hm.npm(path="app/codegen", base=spec) + p = hm.pipeline(node.install()) + cmds = _cmds(p) + assert any("make openapi" in c for c in cmds) + assert any("deb.nodesource.com" in c for c in cmds) + assert any("npm ci" in c for c in cmds) + # No apt-base step: base= skipped it. (Note: nodesource installer + # itself runs `apt-get install -y nodejs`, so don't assert on + # apt-get; check the apt-base sentinel `ca-certificates`.) + assert not any("ca-certificates" in c for c in cmds) + + +def test_stack_elm_on_npm(): + """npm -> elm composition. Elm forks off node-installed Step.""" + node = hm.npm(path="app/codegen") + elm = hm.elm(path="app", base=node.installed) + p = hm.pipeline(elm.make("src/Main.elm"), default_image="ubuntu:24.04") + cmds = _cmds(p) + # One apt-base (from npm) + assert len([c for c in cmds if "ca-certificates" in c]) == 1 + # node install (from npm) + elm install + assert any("npm ci" in c for c in cmds) + assert any("elm/compiler/releases" in c for c in cmds) + + +def test_escape_hatch_consistent_across_toolchains(): + """Every toolchain exposes .installed as a public Step.""" + rust = hm.rust(path=".") + ghc = hm.haskell(ghc="9.6.7") + api = ghc.package("api") + node = hm.npm(path=".") + elm = hm.elm(path=".") + assert isinstance(rust.installed, hm.Step) + assert isinstance(ghc.installed, hm.Step) + assert isinstance(api.installed, hm.Step) + assert isinstance(node.installed, hm.Step) + assert isinstance(elm.installed, hm.Step) + + +def test_deterministic_emission(): + """Two identical pipeline constructions emit equal IR dicts.""" + def build() -> dict: + rust = hm.rust(path="cli") + return hm.pipeline(rust.build(), rust.test(), + default_image="ubuntu:24.04") + + assert build() == build() + + +def test_mixed_pipeline_compiles(): + """A pipeline mixing all four toolchains lowers without error.""" + ghc = hm.haskell(ghc="9.6.7") + rust = hm.rust(path="cli") + node = hm.npm(path="app/codegen") + elm = hm.elm(path="app", base=node.installed) + p = hm.pipeline( + ghc.package("api").test(), + rust.test(), rust.clippy(), + node.install(), + elm.make("src/Main.elm"), + default_image="ubuntu:24.04", + ) + assert p["version"] == "0" + assert len(p["steps"]) > 0 diff --git a/tests/test_triggers.py b/tests/test_triggers.py new file mode 100644 index 0000000..c9ae58b --- /dev/null +++ b/tests/test_triggers.py @@ -0,0 +1,76 @@ +"""Trigger constructors — push/pull_request/schedule.""" +import pytest + +import harmont as hm + + +def test_push_branch_string(): + t = hm.push(branch="main") + assert t.to_dict() == {"event": "push", "branches": ["main"]} + + +def test_push_branch_list(): + t = hm.push(branch=["main", "release/*"]) + assert t.to_dict() == {"event": "push", "branches": ["main", "release/*"]} + + +def test_push_tag_string(): + t = hm.push(tag="v*") + assert t.to_dict() == {"event": "push", "tags": ["v*"]} + + +def test_push_both_branch_and_tag_raises(): + with pytest.raises(ValueError, match=r"hm\.push: pass exactly one of branch or tag"): + hm.push(branch="main", tag="v*") + + +def test_push_neither_raises(): + with pytest.raises(ValueError, match=r"hm\.push: pass exactly one of branch or tag"): + hm.push() + + +def test_pull_request_branches_string(): + t = hm.pull_request(branches="main") + assert t.to_dict() == { + "event": "pull_request", + "branches": ["main"], + "types": ["opened", "synchronize", "reopened"], + } + + +def test_pull_request_no_filter(): + t = hm.pull_request() + assert t.to_dict() == { + "event": "pull_request", + "types": ["opened", "synchronize", "reopened"], + } + + +def test_pull_request_types_override(): + t = hm.pull_request(types=["opened", "ready_for_review"]) + assert t.to_dict()["types"] == ["opened", "ready_for_review"] + + +def test_pull_request_invalid_type(): + with pytest.raises(ValueError, match=r"unknown pull_request type 'merged'"): + hm.pull_request(types=["merged"]) + + +def test_pull_request_empty_types(): + with pytest.raises(ValueError, match=r"hm\.pull_request: types must be non-empty"): + hm.pull_request(types=[]) + + +def test_schedule_valid_cron(): + t = hm.schedule(cron="0 4 * * *") + assert t.to_dict() == {"event": "schedule", "cron": "0 4 * * *"} + + +def test_schedule_invalid_cron_raises(): + with pytest.raises(ValueError, match=r"hm\.schedule: invalid cron expression"): + hm.schedule(cron="not a cron") + + +def test_schedule_empty_cron_raises(): + with pytest.raises(ValueError, match=r"hm\.schedule: invalid cron expression"): + hm.schedule(cron="") diff --git a/tests/test_typing_markers.py b/tests/test_typing_markers.py new file mode 100644 index 0000000..336dc0a --- /dev/null +++ b/tests/test_typing_markers.py @@ -0,0 +1,74 @@ +"""Target[T] and BaseImage(X) annotation markers (HAR-28 follow-up).""" +from __future__ import annotations + +import typing +from typing import Annotated, get_args, get_type_hints + +import pytest + +import harmont as hm +from harmont._step import Step +from harmont._typing import _TARGET_MARKER, BaseImage, Target, _BaseImageMarker + + +def test_target_subscript_returns_annotated_with_marker(): + annot = Target[Step] + assert typing.get_origin(annot) is not None # Annotated[Step, ...] + args = get_args(annot) + assert args[0] is Step + assert _TARGET_MARKER in args[1:] + + +def test_target_with_different_types(): + from harmont.haskell import HaskellPackage + + annot = Target[HaskellPackage] + args = get_args(annot) + assert args[0] is HaskellPackage + + +def test_target_used_as_param_annotation_resolves_via_get_type_hints(): + def fn(api: Target[Step]) -> Step: # type: ignore[empty-body] + ... + + hints = get_type_hints(fn, include_extras=True) + annot = hints["api"] + args = get_args(annot) + assert args[0] is Step + assert _TARGET_MARKER in args[1:] + + +def test_base_image_returns_marker_instance(): + marker = BaseImage("ubuntu-24.04") + assert isinstance(marker, _BaseImageMarker) + assert marker.image == "ubuntu-24.04" + + +def test_base_image_in_annotated_metadata(): + def fn(base: Annotated[Step, BaseImage("ubuntu-24.04")]) -> Step: # type: ignore[empty-body] + ... + + hints = get_type_hints(fn, include_extras=True) + annot = hints["base"] + args = get_args(annot) + assert args[0] is Step + markers = [a for a in args[1:] if isinstance(a, _BaseImageMarker)] + assert len(markers) == 1 + assert markers[0].image == "ubuntu-24.04" + + +def test_base_image_rejects_empty_string(): + with pytest.raises(TypeError, match="hm: BaseImage\\(\\.\\.\\.\\) takes a non-empty image"): + BaseImage("") + + +def test_base_image_rejects_non_string(): + with pytest.raises(TypeError, match="hm: BaseImage\\(\\.\\.\\.\\) takes a non-empty image"): + BaseImage(42) # type: ignore[arg-type] + + +def test_target_and_base_image_are_exported_from_harmont(): + assert hm.Target is Target + assert hm.BaseImage is BaseImage + assert "Target" in hm.__all__ + assert "BaseImage" in hm.__all__ diff --git a/tests/test_validation.py b/tests/test_validation.py new file mode 100644 index 0000000..4cf2e40 --- /dev/null +++ b/tests/test_validation.py @@ -0,0 +1,30 @@ +"""Tests for the small surviving validator set.""" + +from __future__ import annotations + +import pytest + +from harmont._validation import validate_positive_int + + +def test_positive_int_accepts_none(): + validate_positive_int(None, "f", "C") + + +def test_positive_int_accepts_one(): + validate_positive_int(1, "f", "C") + + +def test_positive_int_rejects_zero(): + with pytest.raises(ValueError, match="positive integer"): + validate_positive_int(0, "f", "C") + + +def test_positive_int_rejects_negative(): + with pytest.raises(ValueError, match="positive integer"): + validate_positive_int(-3, "f", "C") + + +def test_positive_int_rejects_non_int(): + with pytest.raises(ValueError, match="positive integer"): + validate_positive_int("12", "f", "C") # type: ignore[arg-type] diff --git a/tests/test_zig.py b/tests/test_zig.py new file mode 100644 index 0000000..2d40b53 --- /dev/null +++ b/tests/test_zig.py @@ -0,0 +1,67 @@ +"""Zig toolchain tests.""" +from __future__ import annotations + +import pytest + +import harmont as hm + + +def _cmds(p: dict) -> list[str]: + return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + + +def _step_by_substring(p: dict, needle: str) -> dict: + for s in p["steps"]: + if s.get("type") == "command" and needle in (s.get("cmd") or ""): + return s + raise AssertionError(needle) + + +def test_zig_object_form_full_chain(): + z = hm.zig(path="svc") + p = hm.pipeline(z.build(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert any("ziglang.org" in c for c in cmds) + assert any("cd svc && zig build" in c for c in cmds) + + +def test_zig_actions_share_install(): + z = hm.zig(path="svc") + p = hm.pipeline(z.build(), z.test(), z.fmt(), default_image="ubuntu:24.04") + cmds = _cmds(p) + assert len([c for c in cmds if "ziglang.org" in c]) == 1 + assert any("zig build test" in c for c in cmds) + assert any("zig fmt --check ." in c for c in cmds) + + +def test_zig_version_in_install_cmd(): + z = hm.zig(path=".", version="0.13.0") + p = hm.pipeline(z.build()) + install = _step_by_substring(p, "ziglang.org") + assert "0.13.0" in install["cmd"] + + +def test_zig_invalid_version_rejected(): + with pytest.raises(ValueError, match="version"): + hm.zig(version="oops!") + + +def test_zig_action_labels_auto_generated(): + z = hm.zig(path=".") + assert z.build().label == ":zig: . build" + assert z.test().label == ":zig: . test" + assert z.fmt().label == ":zig: . fmt" + + +def test_zig_bare_form_actions(): + p = hm.pipeline(hm.zig.build(), hm.zig.test(), hm.zig.fmt()) + cmds = _cmds(p) + assert any("zig build" in c for c in cmds) + assert any("zig fmt --check ." in c for c in cmds) + + +def test_zig_with_base_skips_apt(): + base = hm.scratch().sh("custom base", label="base") + z = hm.zig(path="svc", base=base) + p = hm.pipeline(z.build(), default_image="ubuntu:24.04") + assert not any("apt-get install" in c for c in _cmds(p)) diff --git a/tests/test_zig_toolchain.py b/tests/test_zig_toolchain.py new file mode 100644 index 0000000..0b86937 --- /dev/null +++ b/tests/test_zig_toolchain.py @@ -0,0 +1,81 @@ +"""Tests for ZigToolchain (the multi-project entry point for hm.zig).""" +from __future__ import annotations + +import json + +import harmont as hm +from harmont.zig import ZigProject, ZigToolchain + + +def test_zig_no_path_returns_toolchain() -> None: + """hm.zig() (without path=) returns a ZigToolchain — not a ZigProject.""" + tc = hm.zig() + assert isinstance(tc, ZigToolchain) + + +def test_zig_with_path_still_returns_project() -> None: + """hm.zig(path=".") preserves the current ZigProject return for back-compat.""" + proj = hm.zig(path=".") + assert isinstance(proj, ZigProject) + + +def test_toolchain_project_returns_zig_project() -> None: + tc = hm.zig() + proj = tc.project(path="lib-a") + assert isinstance(proj, ZigProject) + assert proj.path == "lib-a" + + +def test_two_projects_share_install_step() -> None: + """Critical: two .project() calls on the same toolchain reuse the + same installed Step. This is what makes ONE zig install fan out to + N project chains in the v0 IR.""" + tc = hm.zig() + a = tc.project(path="lib-a") + b = tc.project(path="lib-b") + assert a.installed is b.installed + + +def test_pipeline_with_shared_toolchain_emits_one_install() -> None: + """End-to-end: a pipeline that pulls two ZigProjects off the same + ZigToolchain must emit exactly one :zig: install node in the IR.""" + import harmont._registry as reg + import harmont._target as targets + reg.clear_registry() + targets.clear_target_cache() + + @hm.target() + def zig() -> ZigToolchain: + return hm.zig() + + @hm.target() + def lib_a(zig: hm.Target[ZigToolchain]) -> ZigProject: + return zig.project(path="lib-a") + + @hm.target() + def lib_b(zig: hm.Target[ZigToolchain]) -> ZigProject: + return zig.project(path="lib-b") + + @hm.pipeline("ci", default_image="ubuntu:24.04") + def ci( + lib_a: hm.Target[ZigProject], + lib_b: hm.Target[ZigProject], + ) -> tuple[hm.Step, ...]: + return (lib_a.build(), lib_b.build()) + + envelope = json.loads(hm.dump_registry_json()) + steps = envelope["pipelines"][0]["definition"]["steps"] + zig_installs = [s for s in steps if s.get("label") == ":zig: install"] + assert len(zig_installs) == 1, ( + f"expected exactly one :zig: install step, got " + f"{[s['key'] for s in zig_installs]}" + ) + + install_key = zig_installs[0]["key"] + lib_a_build = next(s for s in steps if "lib-a" in (s.get("label") or "")) + lib_b_build = next(s for s in steps if "lib-b" in (s.get("label") or "")) + assert lib_a_build["builds_in"] == install_key + assert lib_b_build["builds_in"] == install_key + + reg.clear_registry() + targets.clear_target_cache() From 240e2f99fbc182f2d5671eea5036d52f1584abd7 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 15:32:32 -0700 Subject: [PATCH 07/40] docs: add dsls/harmont-py pointer to root CLAUDE.md --- CLAUDE.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index beb73fe..0c53b1d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -12,3 +12,8 @@ Run `cargo build` from the workspace root. Plugin fixtures need the `wasm32-wasip1` target; install with `rustup target add wasm32-wasip1`. For cross-cutting doctrine see [PRINCIPLES.md](../PRINCIPLES.md). + +## Python DSL + +`dsls/harmont-py/` — the `harmont` Python package (pipeline DSL). +See `dsls/harmont-py/CLAUDE.md` for DSL-specific context. From 6d84fc38d9a0465ef937c731b73cdbcc57105420 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 15:33:08 -0700 Subject: [PATCH 08/40] ci: install harmont-py from local tree in examples workflow --- .github/workflows/examples.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index 8f034f6..eccaebe 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -109,8 +109,7 @@ jobs: # externally-managed marker requires --break-system-packages. - name: Install harmont-py into system Python run: | - git clone --depth 1 https://github.com/harmont-dev/harmont-py /tmp/harmont-py - sudo /usr/bin/python3 -m pip install --break-system-packages /tmp/harmont-py + sudo /usr/bin/python3 -m pip install --break-system-packages dsls/harmont-py /usr/bin/python3 -c "import harmont; print('harmont', harmont.__file__)" - name: Download hm binary From 0c6b011c426e102f8114cb0aca57d1cd1c3fe4a6 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 15:33:25 -0700 Subject: [PATCH 09/40] ci: merge harmont-py lint/test into ci.yml, use local tree for integration --- .github/workflows/ci.yml | 63 +++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4b82d72..3e0e067 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,6 +33,40 @@ jobs: - name: cargo clippy (strict) run: cargo clippy --all-targets -p harmont-cli -- -D warnings + python-lint: + name: harmont-py (pytest + ruff + mypy) + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: pip + + - name: Install harmont + dev extras + working-directory: dsls/harmont-py + run: pip install -e '.[dev]' + + - name: ruff check + working-directory: dsls/harmont-py + run: ruff check . + + - name: mypy + working-directory: dsls/harmont-py + run: mypy harmont + + - name: pytest + working-directory: dsls/harmont-py + run: | + pytest -v \ + --deselect tests/test_gradle.py \ + --deselect tests/test_haskell.py + integration: name: docker-gated integration test runs-on: ubuntu-latest @@ -40,52 +74,27 @@ jobs: # main always runs it. if: github.event_name == 'push' || (github.event_name == 'pull_request' && !github.event.pull_request.draft) steps: - - name: Check out harmont-cli - uses: actions/checkout@v4 - with: - path: harmont-cli - - - name: Check out harmont-py (matching branch, with main fallback) - uses: actions/checkout@v4 - with: - repository: harmont-dev/harmont-py - ref: ${{ github.head_ref || github.ref_name }} - path: harmont-py - continue-on-error: true - id: checkout-py-branch - - - name: Fall back to harmont-py main - if: steps.checkout-py-branch.outcome != 'success' - uses: actions/checkout@v4 - with: - repository: harmont-dev/harmont-py - ref: main - path: harmont-py + - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable with: targets: wasm32-wasip1 - uses: Swatinem/rust-cache@v2 - with: - workspaces: harmont-cli - uses: actions/setup-python@v5 with: python-version: "3.12" - name: Install harmont-py (editable) - working-directory: harmont-py - run: pip install -e . + run: pip install -e dsls/harmont-py - name: cargo build --tests (with docker-integration feature) - working-directory: harmont-cli run: cargo build -p harmont-cli --tests --features docker-integration - name: Pre-pull python:3.12-alpine run: docker pull python:3.12-alpine - name: cargo test --features docker-integration -- --ignored - working-directory: harmont-cli env: HARMONT_PYTHON: python3 run: | From dc203ce7fb12f9ea625deb92bfd80d25d9bd52b6 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 15:34:12 -0700 Subject: [PATCH 10/40] ci: add PyPI publish job to release workflow --- .github/workflows/release.yml | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3e37b7b..c017c95 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -87,3 +87,44 @@ jobs: else cargo publish -p harmont-cli --token ${{ secrets.CRATES_IO_TOKEN }} --allow-dirty fi + + pypi-publish: + name: Publish to PyPI + runs-on: ubuntu-latest + environment: + name: release + url: https://pypi.org/project/harmont/ + permissions: + id-token: write + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Set version from tag + run: | + VERSION="${GITHUB_REF_NAME#v}" + echo "VERSION=$VERSION" >> "$GITHUB_ENV" + sed -i '0,/version = "0.0.0-dev"/s//version = "'"$VERSION"'"/' dsls/harmont-py/pyproject.toml + grep -n "^version" dsls/harmont-py/pyproject.toml + + - name: Install build + run: python -m pip install --upgrade build + + - name: Build sdist and wheel + working-directory: dsls/harmont-py + run: python -m build + + - name: Inspect dist + working-directory: dsls/harmont-py + run: | + ls -la dist/ + test -f dist/harmont-${VERSION}.tar.gz + test -f dist/harmont-${VERSION}-py3-none-any.whl + + - name: Publish to PyPI via Trusted Publishing + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: dsls/harmont-py/dist/ From 70f7613ad66cb9cdd74c79b2d1517357fbe6c802 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 15:34:23 -0700 Subject: [PATCH 11/40] docs: update RELEASING.md for monorepo workflow --- dsls/harmont-py/RELEASING.md | 113 +++++++---------------------------- 1 file changed, 20 insertions(+), 93 deletions(-) diff --git a/dsls/harmont-py/RELEASING.md b/dsls/harmont-py/RELEASING.md index 9b3b3d9..1c3ded7 100644 --- a/dsls/harmont-py/RELEASING.md +++ b/dsls/harmont-py/RELEASING.md @@ -1,110 +1,37 @@ -# Releasing harmont-py +# Releasing harmont (Python DSL) -This package lives upstream at and is mirrored into the private Harmont monorepo as `cidsl/py/`. The monorepo is the source of truth; the public repo receives a `git subtree push`. - -## One-time setup - -```sh -gh repo create harmont-dev/harmont-py --public \ - --description "Python DSL for Harmont CI pipelines" \ - --homepage https://harmont.dev - -# Initial split from the monorepo (run from the monorepo root) -git subtree split --prefix=cidsl/py -b harmont-py-export -git push git@github.com:harmont-dev/harmont-py.git harmont-py-export:main -git branch -D harmont-py-export -``` - -## Ongoing sync (monorepo → public) - -```sh -git subtree push --prefix=cidsl/py git@github.com:harmont-dev/harmont-py.git main -``` - -If subtree-push fails because the public repo has commits that aren't in the monorepo's history, fall back to a fresh split: - -```sh -git subtree split --prefix=cidsl/py -b harmont-py-export -git push git@github.com:harmont-dev/harmont-py.git harmont-py-export:main -git branch -D harmont-py-export -``` - -## Pulling external contributions back (public → monorepo) - -```sh -git subtree pull --prefix=cidsl/py git@github.com:harmont-dev/harmont-py.git main --squash -``` +The `harmont` Python package lives at `dsls/harmont-py/` in the +harmont-cli monorepo. It is published to PyPI alongside the Rust +crates when a version tag is pushed. ## Cutting a release -Versioning is **driven by git tags on the public mirror**. The release -workflow in `.github/workflows/release.yml` triggers on any tag matching -`v*`, seds the version from the tag into `pyproject.toml`, builds the -sdist and wheel, and publishes to PyPI via Trusted Publishing (OIDC — -no API tokens stored in the repo). +Releases are driven by git tags on this repo. The release workflow +(`.github/workflows/release.yml`) triggers on any tag matching `v*`, +seds the version into `dsls/harmont-py/pyproject.toml`, builds the +sdist and wheel, and publishes to PyPI via Trusted Publishing (OIDC). -### Prerequisites (one-time) - -1. **Configure the PyPI Trusted Publisher** on - with: - - Owner: `harmont-dev` - - Repository: `harmont-py` - - Workflow filename: `release.yml` - - Environment: `release` - - If the `harmont` project does not yet exist on PyPI, create it via a - one-off manual `twine upload` first (or use the "Add a pending - publisher" flow at ), - then add the Trusted Publisher. - -2. **Create the `release` GitHub Environment** on - . - Recommended protection rules: - - Deployment branches and tags → "Selected branches and tags" → - add tag rule `v*`. - - (Optional) required reviewers on the environment so a human has - to click "approve" before publish runs. - -### Releasing - -1. Update `CHANGELOG.md` or release notes locally if you keep them. -2. Tag from the monorepo (source of truth): +1. Tag from the repo root: ```sh git tag v - git subtree push --prefix=cidsl/py git@github.com:harmont-dev/harmont-py.git main - git push git@github.com:harmont-dev/harmont-py.git v + git push origin v ``` - The tag has to land on the **public** repo for the workflow to fire. - The subtree-push lands the corresponding `main` commit there first - so the tag points at the right SHA. - -3. Watch the run: +2. Watch the run: ```sh gh run watch \ - "$(gh run list --repo harmont-dev/harmont-py --workflow release.yml \ - --limit 1 --json databaseId --jq '.[0].databaseId')" \ - --repo harmont-dev/harmont-py --exit-status + "$(gh run list --workflow release.yml --limit 1 --json databaseId --jq '.[0].databaseId')" \ + --exit-status ``` -4. Confirm the release on . -5. (Optional) Create a GitHub Release on the same tag with notes: - - ```sh - gh release create v --repo harmont-dev/harmont-py \ - --title "harmont v" --generate-notes - ``` +3. Confirm on . -### Troubleshooting +## PyPI Trusted Publisher setup -- **`Trusted publishing exchange failed`:** the GH Environment name in - the workflow does not match the one configured on PyPI. Both must be - exactly `release`. -- **`File already exists`:** the version was already published to PyPI. - PyPI is append-only — bump the version, re-tag, re-push. -- **`No files to upload`:** the build step did not produce - `dist/*.tar.gz` and `dist/*.whl`. Inspect the `Build sdist and wheel` - step output. Most common cause: `setuptools` couldn't find a package - to build because `pyproject.toml` was mid-edit. +Configure on : +- Owner: `harmont-dev` +- Repository: `harmont-cli` (this repo, not the archived harmont-py) +- Workflow filename: `release.yml` +- Environment: `release` From abf766ce8a31e932ef427b69a751fad48360c74f Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 15:53:26 -0700 Subject: [PATCH 12/40] chore: add daggy and anyhow dependencies to hm-pipeline-ir --- Cargo.lock | 32 ++++++++++++++++++++++++++++++-- Cargo.toml | 2 ++ crates/hm-pipeline-ir/Cargo.toml | 2 ++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 588cc41..df51441 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -890,6 +890,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "daggy" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "804169db156b21258a2545757336922d93dfa229892c75911a0ad141aa0ff241" +dependencies = [ + "petgraph 0.8.3", +] + [[package]] name = "deadpool" version = "0.12.3" @@ -1225,6 +1234,12 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + [[package]] name = "flate2" version = "1.1.9" @@ -1634,6 +1649,8 @@ dependencies = [ name = "hm-pipeline-ir" version = "0.0.0-dev" dependencies = [ + "anyhow", + "daggy", "insta", "schemars 0.8.22", "serde", @@ -2627,7 +2644,18 @@ version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ - "fixedbitset", + "fixedbitset 0.4.2", + "indexmap 2.14.0", +] + +[[package]] +name = "petgraph" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" +dependencies = [ + "fixedbitset 0.5.7", + "hashbrown 0.15.5", "indexmap 2.14.0", ] @@ -4461,7 +4489,7 @@ dependencies = [ "im-rc", "indexmap 2.14.0", "log", - "petgraph", + "petgraph 0.6.5", "serde", "serde_derive", "serde_yaml", diff --git a/Cargo.toml b/Cargo.toml index 389ab2d..58e8eca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,8 @@ hm-plugin-protocol = { path = "crates/hm-plugin-protocol", version = "0.0.0-dev" hm-pipeline-ir = { path = "crates/hm-pipeline-ir", version = "0.0.0-dev" } hm-plugin-sdk = { path = "crates/hm-plugin-sdk", version = "0.0.0-dev" } hm-util = { path = "crates/hm-util", version = "0.0.0-dev" } +anyhow = "1" +daggy = "0.9" serde = { version = "1", features = ["derive"] } serde_json = "1" schemars = { version = "0.8", features = ["preserve_order", "semver", "uuid1", "chrono"] } diff --git a/crates/hm-pipeline-ir/Cargo.toml b/crates/hm-pipeline-ir/Cargo.toml index 086ad1e..4abaf1c 100644 --- a/crates/hm-pipeline-ir/Cargo.toml +++ b/crates/hm-pipeline-ir/Cargo.toml @@ -7,6 +7,8 @@ repository.workspace = true description = "Pipeline IR — the v0 wire-format schema consumed by hm." [dependencies] +anyhow = { workspace = true } +daggy = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } schemars = { workspace = true } From cb33ef579bb02db8c7488f1dead8cd1a4a44b60f Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 15:56:01 -0700 Subject: [PATCH 13/40] feat: add daggy-backed PipelineGraph to hm-pipeline-ir --- crates/hm-pipeline-ir/src/graph.rs | 246 +++++++++++++++++++++ crates/hm-pipeline-ir/src/lib.rs | 2 + crates/hm-pipeline-ir/tests/graph_build.rs | 92 ++++++++ 3 files changed, 340 insertions(+) create mode 100644 crates/hm-pipeline-ir/src/graph.rs create mode 100644 crates/hm-pipeline-ir/tests/graph_build.rs diff --git a/crates/hm-pipeline-ir/src/graph.rs b/crates/hm-pipeline-ir/src/graph.rs new file mode 100644 index 0000000..1b4e2d5 --- /dev/null +++ b/crates/hm-pipeline-ir/src/graph.rs @@ -0,0 +1,246 @@ +use std::collections::BTreeMap; + +use anyhow::{Context, Result}; +use daggy::petgraph::visit::IntoNodeReferences; +use daggy::{Dag, NodeIndex, Walker}; + +use crate::{CommandStep, Pipeline, Step}; + +#[derive(Debug, Clone)] +pub struct NodeWeight { + pub step: CommandStep, + pub env: BTreeMap, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum EdgeKind { + BuildsIn, + DependsOn, +} + +#[derive(Debug, Clone)] +pub struct PipelineGraph { + dag: Dag, + default_image: Option, +} + +struct FlatStep { + step: CommandStep, + extra_deps: Vec, +} + +impl PipelineGraph { + pub fn build(pipeline: &Pipeline) -> Result { + let flat = flatten_steps(&pipeline.steps); + let pipeline_env = pipeline.env.clone().unwrap_or_default(); + + let mut dag: Dag = Dag::new(); + let mut key_to_idx: BTreeMap = BTreeMap::new(); + + for f in &flat { + let mut env = pipeline_env.clone(); + if let Some(e) = &f.step.env { + env.extend(e.clone()); + } + let idx = dag.add_node(NodeWeight { + step: f.step.clone(), + env, + }); + key_to_idx.insert(f.step.key.clone(), idx); + } + + for f in &flat { + let child = key_to_idx[&f.step.key]; + + if let Some(parent_key) = &f.step.builds_in { + let parent = *key_to_idx.get(parent_key).ok_or_else(|| { + anyhow::anyhow!( + "step '{}' builds_in references unknown step '{}'", + f.step.key, + parent_key + ) + })?; + dag.add_edge(parent, child, EdgeKind::BuildsIn) + .context("cycle detected adding builds_in edge")?; + } + + for dep_key in &f.extra_deps { + let parent = *key_to_idx.get(dep_key).ok_or_else(|| { + anyhow::anyhow!( + "step '{}' has wait-barrier dep on unknown step '{}'", + f.step.key, + dep_key + ) + })?; + if f.step.builds_in.as_deref() == Some(dep_key) { + continue; + } + dag.add_edge(parent, child, EdgeKind::DependsOn) + .context("cycle detected adding wait-barrier edge")?; + } + } + + if let Some(default_img) = pipeline.default_image.as_deref() { + for idx in dag.graph().node_indices() { + let has_builds_in_parent = dag + .parents(idx) + .iter(&dag) + .any(|(e, _)| dag.edge_weight(e).copied() == Some(EdgeKind::BuildsIn)); + if !has_builds_in_parent { + if let Some(w) = dag.node_weight_mut(idx) { + if w.step.image.is_none() { + w.step.image = Some(default_img.to_string()); + } + } + } + } + } + + Ok(Self { + dag, + default_image: pipeline.default_image.clone(), + }) + } + + #[must_use] + pub fn node_count(&self) -> usize { + self.dag.node_count() + } + + #[must_use] + pub fn default_image(&self) -> Option<&str> { + self.default_image.as_deref() + } + + #[must_use] + pub fn node_weight(&self, idx: NodeIndex) -> &NodeWeight { + &self.dag[idx] + } + + #[must_use] + pub fn node_index_by_key(&self, key: &str) -> Option { + self.dag + .graph() + .node_references() + .find(|(_, w)| w.step.key == key) + .map(|(idx, _)| idx) + } + + #[must_use] + pub fn parent_keys(&self, idx: NodeIndex) -> Vec { + self.dag + .parents(idx) + .iter(&self.dag) + .map(|(_, parent_idx)| self.dag[parent_idx].step.key.clone()) + .collect() + } + + #[must_use] + pub fn builds_in_parent(&self, idx: NodeIndex) -> Option { + self.dag + .parents(idx) + .iter(&self.dag) + .find(|(e, _)| self.dag.edge_weight(*e).copied() == Some(EdgeKind::BuildsIn)) + .map(|(_, parent_idx)| parent_idx) + } + + #[must_use] + pub fn builds_in_children(&self, idx: NodeIndex) -> Vec { + self.dag + .children(idx) + .iter(&self.dag) + .filter(|(e, _)| self.dag.edge_weight(*e).copied() == Some(EdgeKind::BuildsIn)) + .map(|(_, child_idx)| child_idx) + .collect() + } + + #[must_use] + pub fn all_parents(&self, idx: NodeIndex) -> Vec { + self.dag + .parents(idx) + .iter(&self.dag) + .map(|(_, parent_idx)| parent_idx) + .collect() + } + + #[must_use] + pub fn is_chain_step(&self, idx: NodeIndex) -> bool { + self.builds_in_parent(idx).is_some_and(|parent| { + self.builds_in_children(parent).len() == 1 && self.all_parents(idx).len() == 1 + }) + } + + #[must_use] + pub fn chains(&self) -> Vec> { + let mut placed: BTreeMap = BTreeMap::new(); + let mut out: Vec> = Vec::new(); + let mut indices: Vec = self.dag.graph().node_indices().collect(); + indices.sort(); + for root in &indices { + if *placed.get(root).unwrap_or(&false) || self.is_chain_step(*root) { + continue; + } + let mut chain = vec![*root]; + placed.insert(*root, true); + let mut cur = *root; + while let Some(next) = self + .builds_in_children(cur) + .into_iter() + .find(|&c| self.is_chain_step(c)) + { + chain.push(next); + placed.insert(next, true); + cur = next; + } + out.push(chain); + } + out + } + + #[must_use] + pub fn chain_deps(&self, chains: &[Vec]) -> Vec> { + let mut chain_index: BTreeMap = BTreeMap::new(); + for (ci, ch) in chains.iter().enumerate() { + for &n in ch { + chain_index.insert(n, ci); + } + } + let mut out: Vec> = vec![Vec::new(); chains.len()]; + for (ci, ch) in chains.iter().enumerate() { + let mut seen = std::collections::BTreeSet::new(); + for &n in ch { + for parent in self.all_parents(n) { + let dep_ci = chain_index[&parent]; + if dep_ci != ci { + seen.insert(dep_ci); + } + } + } + out[ci] = seen.into_iter().collect(); + } + out + } + + pub fn node_indices(&self) -> impl Iterator + '_ { + self.dag.graph().node_indices() + } +} + +fn flatten_steps(steps: &[Step]) -> Vec { + let mut out: Vec = Vec::new(); + let mut implicit_wait_targets: Vec = Vec::new(); + for s in steps { + match s { + Step::Command(c) => { + out.push(FlatStep { + step: (**c).clone(), + extra_deps: implicit_wait_targets.clone(), + }); + } + Step::Wait(_) => { + implicit_wait_targets = out.iter().map(|f| f.step.key.clone()).collect(); + } + } + } + out +} diff --git a/crates/hm-pipeline-ir/src/lib.rs b/crates/hm-pipeline-ir/src/lib.rs index 03cbd11..1d02054 100644 --- a/crates/hm-pipeline-ir/src/lib.rs +++ b/crates/hm-pipeline-ir/src/lib.rs @@ -73,3 +73,5 @@ pub struct Cache { #[serde(default)] pub key: Option, } + +pub mod graph; diff --git a/crates/hm-pipeline-ir/tests/graph_build.rs b/crates/hm-pipeline-ir/tests/graph_build.rs new file mode 100644 index 0000000..05e4046 --- /dev/null +++ b/crates/hm-pipeline-ir/tests/graph_build.rs @@ -0,0 +1,92 @@ +#![allow( + clippy::cargo_common_metadata, + clippy::multiple_crate_versions, + clippy::unwrap_used, + clippy::expect_used, + clippy::panic +)] + +use hm_pipeline_ir::graph::PipelineGraph; +use hm_pipeline_ir::Pipeline; + +fn decode(json: &[u8]) -> Pipeline { + serde_json::from_slice(json).unwrap() +} + +#[test] +fn builds_simple_chain() { + let p = decode(br#"{ + "version": "0", + "default_image": "ubuntu:24.04", + "steps": [ + {"type": "command", "key": "a", "cmd": "echo a"}, + {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "a"}, + {"type": "command", "key": "c", "cmd": "echo c", "builds_in": "b"} + ] + }"#); + let g = PipelineGraph::build(&p).unwrap(); + assert_eq!(g.node_count(), 3); + assert_eq!(g.default_image(), Some("ubuntu:24.04")); +} + +#[test] +fn rejects_unknown_builds_in() { + let p = decode(br#"{ + "version": "0", + "steps": [ + {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "missing"} + ] + }"#); + let err = PipelineGraph::build(&p).unwrap_err(); + assert!( + err.to_string().contains("missing") || err.to_string().contains("unknown"), + "error should mention the missing key: {err}" + ); +} + +#[test] +fn root_inherits_default_image() { + let p = decode(br#"{ + "version": "0", + "default_image": "ubuntu:24.04", + "steps": [ + {"type": "command", "key": "a", "cmd": "echo a"} + ] + }"#); + let g = PipelineGraph::build(&p).unwrap(); + let node = g.node_weight(g.node_index_by_key("a").unwrap()); + assert_eq!(node.step.image.as_deref(), Some("ubuntu:24.04")); +} + +#[test] +fn child_does_not_inherit_default_image() { + let p = decode(br#"{ + "version": "0", + "default_image": "ubuntu:24.04", + "steps": [ + {"type": "command", "key": "a", "cmd": "echo a"}, + {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "a"} + ] + }"#); + let g = PipelineGraph::build(&p).unwrap(); + let b = g.node_weight(g.node_index_by_key("b").unwrap()); + assert!(b.step.image.is_none()); +} + +#[test] +fn wait_inserts_implicit_deps() { + let p = decode(br#"{ + "version": "0", + "steps": [ + {"type": "command", "key": "a", "cmd": "echo a"}, + {"type": "command", "key": "b", "cmd": "echo b"}, + {"type": "wait"}, + {"type": "command", "key": "c", "cmd": "echo c"} + ] + }"#); + let g = PipelineGraph::build(&p).unwrap(); + let c = g.node_index_by_key("c").unwrap(); + let parents = g.parent_keys(c); + assert!(parents.contains(&"a".to_string())); + assert!(parents.contains(&"b".to_string())); +} From e741c626248c628e9d3f50d5911d8c0ad93d9853 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 15:57:22 -0700 Subject: [PATCH 14/40] test: add chain and chain_deps tests for PipelineGraph --- crates/hm-pipeline-ir/tests/graph_build.rs | 122 +++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/crates/hm-pipeline-ir/tests/graph_build.rs b/crates/hm-pipeline-ir/tests/graph_build.rs index 05e4046..aa7c4f1 100644 --- a/crates/hm-pipeline-ir/tests/graph_build.rs +++ b/crates/hm-pipeline-ir/tests/graph_build.rs @@ -90,3 +90,125 @@ fn wait_inserts_implicit_deps() { assert!(parents.contains(&"a".to_string())); assert!(parents.contains(&"b".to_string())); } + +#[test] +fn chain_detection() { + let p = decode(br#"{ + "version": "0", + "default_image": "ubuntu:24.04", + "steps": [ + {"type": "command", "key": "a", "cmd": "echo a"}, + {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "a"}, + {"type": "command", "key": "c", "cmd": "echo c", "builds_in": "b"} + ] + }"#); + let g = PipelineGraph::build(&p).unwrap(); + let a = g.node_index_by_key("a").unwrap(); + let b = g.node_index_by_key("b").unwrap(); + let c = g.node_index_by_key("c").unwrap(); + assert!(!g.is_chain_step(a)); + assert!(g.is_chain_step(b)); + assert!(g.is_chain_step(c)); +} + +#[test] +fn fork_breaks_chain() { + let p = decode(br#"{ + "version": "0", + "default_image": "ubuntu:24.04", + "steps": [ + {"type": "command", "key": "a", "cmd": "echo a"}, + {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "a"}, + {"type": "command", "key": "c", "cmd": "echo c", "builds_in": "a"} + ] + }"#); + let g = PipelineGraph::build(&p).unwrap(); + let b = g.node_index_by_key("b").unwrap(); + let c = g.node_index_by_key("c").unwrap(); + assert!(!g.is_chain_step(b)); + assert!(!g.is_chain_step(c)); +} + +#[test] +fn chains_partition_includes_every_node_once() { + let p = decode(br#"{ + "version": "0", + "default_image": "ubuntu:24.04", + "steps": [ + {"type": "command", "key": "a", "cmd": "echo a"}, + {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "a"}, + {"type": "command", "key": "c", "cmd": "echo c", "builds_in": "b"}, + {"type": "command", "key": "d", "cmd": "echo d", "builds_in": "a"}, + {"type": "command", "key": "e", "cmd": "echo e"} + ] + }"#); + let g = PipelineGraph::build(&p).unwrap(); + let chains = g.chains(); + let mut all_nodes: Vec<_> = chains.iter().flatten().copied().collect(); + all_nodes.sort(); + assert_eq!(all_nodes.len(), 5, "every node in exactly one chain"); + + let b = g.node_index_by_key("b").unwrap(); + let c = g.node_index_by_key("c").unwrap(); + let bc_chain = chains.iter().find(|ch| ch.contains(&b)).unwrap(); + assert_eq!(*bc_chain, vec![b, c]); +} + +#[test] +fn chain_deps_cross_chain() { + let p = decode(br#"{ + "version": "0", + "steps": [ + {"type": "command", "key": "a", "cmd": "echo a"}, + {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "a"}, + {"type": "command", "key": "c", "cmd": "echo c", "builds_in": "b"}, + {"type": "command", "key": "d", "cmd": "echo d", "builds_in": "a"}, + {"type": "command", "key": "e", "cmd": "echo e"} + ] + }"#); + let g = PipelineGraph::build(&p).unwrap(); + let chains = g.chains(); + let deps = g.chain_deps(&chains); + + let find_chain = |key: &str| -> usize { + let idx = g.node_index_by_key(key).unwrap(); + chains.iter().position(|ch| ch.contains(&idx)).unwrap() + }; + let a_ci = find_chain("a"); + let bc_ci = find_chain("b"); + let d_ci = find_chain("d"); + let e_ci = find_chain("e"); + + assert!(deps[a_ci].is_empty()); + assert_eq!(deps[bc_ci], vec![a_ci]); + assert_eq!(deps[d_ci], vec![a_ci]); + assert!(deps[e_ci].is_empty()); +} + +#[test] +fn chain_deps_subsumes_wait_barriers() { + let p = decode(br#"{ + "version": "0", + "steps": [ + {"type": "command", "key": "a", "cmd": "echo a"}, + {"type": "command", "key": "b", "cmd": "echo b"}, + {"type": "wait"}, + {"type": "command", "key": "c", "cmd": "echo c"} + ] + }"#); + let g = PipelineGraph::build(&p).unwrap(); + let chains = g.chains(); + let deps = g.chain_deps(&chains); + let find_chain = |key: &str| -> usize { + let idx = g.node_index_by_key(key).unwrap(); + chains.iter().position(|ch| ch.contains(&idx)).unwrap() + }; + let a_ci = find_chain("a"); + let b_ci = find_chain("b"); + let c_ci = find_chain("c"); + let mut c_deps = deps[c_ci].clone(); + c_deps.sort_unstable(); + let mut want = vec![a_ci, b_ci]; + want.sort_unstable(); + assert_eq!(c_deps, want); +} From dd75bec4ef97006110a2631ae074992c33d4b590 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 16:00:35 -0700 Subject: [PATCH 15/40] refactor: repoint scheduler at hm-pipeline-ir PipelineGraph --- Cargo.lock | 2 + crates/hm/Cargo.toml | 2 + crates/hm/src/orchestrator/graph.rs | 494 +----------------------- crates/hm/src/orchestrator/scheduler.rs | 17 +- 4 files changed, 14 insertions(+), 501 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index df51441..214e39c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1556,6 +1556,7 @@ dependencies = [ "clap", "comfy-table", "console 0.15.11", + "daggy", "dialoguer", "extism", "flate2", @@ -1563,6 +1564,7 @@ dependencies = [ "futures", "futures-util", "hex", + "hm-pipeline-ir", "hm-plugin-protocol", "hm-util", "ignore", diff --git a/crates/hm/Cargo.toml b/crates/hm/Cargo.toml index 45ec7da..90efbbb 100644 --- a/crates/hm/Cargo.toml +++ b/crates/hm/Cargo.toml @@ -67,8 +67,10 @@ futures-util = "0.3" bollard = "0.18" which = "6" extism = { workspace = true } +hm-pipeline-ir = { workspace = true } hm-plugin-protocol = { workspace = true } hm-util = { workspace = true } +daggy = { workspace = true } schemars = { workspace = true } semver = { workspace = true } once_cell = "1" diff --git a/crates/hm/src/orchestrator/graph.rs b/crates/hm/src/orchestrator/graph.rs index 52730e2..dcad175 100644 --- a/crates/hm/src/orchestrator/graph.rs +++ b/crates/hm/src/orchestrator/graph.rs @@ -1,493 +1 @@ -//! Chain-bounded DAG built from a wire-typed `Pipeline`. -//! -//! Each `Node` carries an owned wire `CommandStep`, so the scheduler -//! reads `runner`/`runner_args` directly at dispatch time — no -//! legacy schema round-trip. -//! -//! Two edge sets at the Node level: -//! -//! * `builds_in` — at most one parent per step. Lineage edge: child runs -//! inside the parent's container (locally) or boots from the parent's -//! snapshot (remotely). Implies a sync edge. -//! -//! * `depends_on` — synchronisation edges (no state inheritance). -//! Computed from the `builds_in` parent (always added) plus any -//! implicit barriers introduced by `wait` steps in the wire format. -//! v0 has no schema-level `depends_on` — chain DSL is the only -//! topology mechanism. -//! -//! After flattening wait barriers, the graph is a DAG over command -//! steps. A "lineage chain" is a maximal path of `builds_in` edges -//! with single-child branching at every internal node — these are -//! schedulable as a single long-lived container. - -use std::collections::BTreeMap; - -use anyhow::Result; -use hm_plugin_protocol::{CommandStep, Pipeline, Step}; - -#[derive(Debug, Clone)] -pub struct Node { - pub step: CommandStep, - /// Resolved final environment (pipeline.env merged with step.env). - pub env: BTreeMap, - /// `builds_in` parent's index, if any. - pub builds_in: Option, - /// Synchronisation edges (computed from `builds_in` and wait barriers). - pub depends_on: Vec, - /// `builds_in` children's indices. - pub builds_in_children: Vec, -} - -#[derive(Debug, Clone)] -pub struct Graph { - pub nodes: Vec, - pub default_image: Option, -} - -/// A flattened command step plus the keys of any prior steps a `wait` -/// barrier inserted between them and us. The `extra_deps` are step -/// keys, not graph indices — they're resolved during graph build. -struct FlatStep { - step: CommandStep, - extra_deps: Vec, -} - -impl Graph { - /// Build a synchronisation graph from a planner-emitted `Pipeline`. - /// - /// # Errors - /// - /// Returns an error if a step's `builds_in` or `depends_on` - /// references an unknown step key, or if the resulting graph is - /// cyclic. Each cycle error names both ends of the back edge so the - /// user can grep their pipeline for the offending pair. - pub fn build(pipeline: &Pipeline) -> Result { - let flat = flatten_steps(&pipeline.steps); - let key_to_idx: BTreeMap = flat - .iter() - .enumerate() - .map(|(i, f)| (f.step.key.clone(), i)) - .collect(); - let pipeline_env = pipeline.env.clone().unwrap_or_default(); - - let mut nodes: Vec = flat - .iter() - .map(|f| { - let mut env = pipeline_env.clone(); - if let Some(e) = &f.step.env { - env.extend(e.clone()); - } - Node { - step: f.step.clone(), - env, - builds_in: None, - depends_on: vec![], - builds_in_children: vec![], - } - }) - .collect(); - - for (i, f) in flat.iter().enumerate() { - if let Some(parent_key) = &f.step.builds_in { - let p = *key_to_idx.get(parent_key).ok_or_else(|| { - anyhow::anyhow!( - "step '{}' builds_in references unknown step '{}'", - f.step.key, - parent_key - ) - })?; - nodes[i].builds_in = Some(p); - nodes[p].builds_in_children.push(i); - if !nodes[i].depends_on.contains(&p) { - nodes[i].depends_on.push(p); - } - } - for dep_key in &f.extra_deps { - let p = *key_to_idx.get(dep_key).ok_or_else(|| { - anyhow::anyhow!( - "step '{}' has wait-barrier dep on unknown step '{}'", - f.step.key, - dep_key - ) - })?; - if !nodes[i].depends_on.contains(&p) { - nodes[i].depends_on.push(p); - } - } - } - - // Root steps (no `builds_in`) with no explicit `image` inherit - // the pipeline's `default_image`. Without this the docker - // plugin's `image_name::resolve_image` falls back to - // `alpine:latest`, which breaks every apt-based example. Patch - // at the host so the plugin stays pipeline-agnostic. - if let Some(default_img) = pipeline.default_image.as_deref() { - for node in &mut nodes { - if node.builds_in.is_none() && node.step.image.is_none() { - node.step.image = Some(default_img.to_string()); - } - } - } - - let g = Self { - nodes, - default_image: pipeline.default_image.clone(), - }; - g.assert_acyclic()?; - Ok(g) - } - - fn assert_acyclic(&self) -> Result<()> { - // Iterative DFS with grey/black colouring. When we enter a grey - // node, the parent we came from is the back-edge source; the - // grey node itself is the back-edge target. - let mut color = vec![0u8; self.nodes.len()]; // 0 white, 1 grey, 2 black - for start in 0..self.nodes.len() { - if color[start] == 0 { - let mut stack: Vec<(usize, Option, bool)> = vec![(start, None, false)]; - while let Some((n, parent, exiting)) = stack.pop() { - if exiting { - color[n] = 2; - continue; - } - if color[n] == 1 { - let target = &self.nodes[n].step.key; - match parent { - Some(p) => anyhow::bail!( - "cycle: '{}' is reachable from itself via '{}'", - target, - self.nodes[p].step.key - ), - None => anyhow::bail!("cycle through step '{target}'"), - } - } - color[n] = 1; - stack.push((n, parent, true)); - for &c in &self.nodes[n].depends_on { - if color[c] != 2 { - stack.push((c, Some(n), false)); - } - } - } - } - } - Ok(()) - } - - /// True iff `i` is the unique `builds_in` child of its parent and has - /// no other `depends_on` edges. A chain step can run via `docker exec` - /// in the parent's running container. - #[must_use] - pub fn is_chain_step(&self, i: usize) -> bool { - // depends_on length == 1 because the only edge is the implied - // builds_in dep. - self.nodes[i].builds_in.is_some_and(|p| { - self.nodes[p].builds_in_children.len() == 1 && self.nodes[i].depends_on.len() == 1 - }) - } - - /// For each chain in `chains`, return the set of OTHER chain indices - /// it depends on. A chain depends on another chain iff any node in - /// it has a `depends_on` edge into a node belonging to that other - /// chain. The result is deterministic (ascending chain index). - /// - /// Caller must pass the same partition `chains()` returned — - /// `chain_index[node]` is derived from `chains`. - #[must_use] - pub fn chain_deps(&self, chains: &[Vec]) -> Vec> { - let mut chain_index = vec![usize::MAX; self.nodes.len()]; - for (ci, ch) in chains.iter().enumerate() { - for &n in ch { - chain_index[n] = ci; - } - } - let mut out: Vec> = vec![Vec::new(); chains.len()]; - for (ci, ch) in chains.iter().enumerate() { - let mut seen: std::collections::BTreeSet = std::collections::BTreeSet::new(); - for &n in ch { - for &dep in &self.nodes[n].depends_on { - let dep_ci = chain_index[dep]; - if dep_ci != ci { - seen.insert(dep_ci); - } - } - } - out[ci] = seen.into_iter().collect(); - } - out - } - - /// Partition `nodes` into chain units. Each returned vec is a chain - /// `[root, step_1, step_2, …]` where `root` is **not** a chain step - /// and every subsequent index is a chain step descending from the - /// previous one via `builds_in`. Every node appears in exactly one - /// chain. The order of chains is deterministic (root index, ascending). - #[must_use] - pub fn chains(&self) -> Vec> { - let n = self.nodes.len(); - let mut placed = vec![false; n]; - let mut out: Vec> = Vec::new(); - for root in 0..n { - if placed[root] || self.is_chain_step(root) { - continue; - } - let mut chain = vec![root]; - placed[root] = true; - // Follow the unique chain-step child, if any. - let mut cur = root; - while let Some(&next) = self.nodes[cur] - .builds_in_children - .iter() - .find(|&&c| self.is_chain_step(c)) - { - chain.push(next); - placed[next] = true; - cur = next; - } - out.push(chain); - } - out - } -} - -/// Flatten v0 steps. Drops Wait nodes; for each command step that -/// follows a wait, records the keys of the steps that completed -/// before that wait as `extra_deps`. -fn flatten_steps(steps: &[Step]) -> Vec { - let mut out: Vec = Vec::new(); - let mut implicit_wait_targets: Vec = Vec::new(); - for s in steps { - match s { - Step::Command(c) => { - out.push(FlatStep { - step: (**c).clone(), - extra_deps: implicit_wait_targets.clone(), - }); - } - Step::Wait(_) => { - implicit_wait_targets = out.iter().map(|f| f.step.key.clone()).collect(); - } - } - } - out -} - -#[cfg(test)] -#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] -mod tests { - use super::*; - - /// Decode a wire `Pipeline` directly from JSON bytes. Mirrors the - /// legacy `local::schema::decode_plan` helper but uses the wire - /// types so the new module has no dependency on the legacy schema. - fn decode_wire(bytes: &[u8]) -> Pipeline { - serde_json::from_slice::(bytes).unwrap() - } - - #[test] - fn chain_detection() { - let json = br#"{ - "version":"0", - "default_image":"ubuntu:24.04", - "steps":[ - {"type":"command","key":"a","cmd":"echo a"}, - {"type":"command","key":"b","cmd":"echo b","builds_in":"a"}, - {"type":"command","key":"c","cmd":"echo c","builds_in":"b"} - ] - }"#; - let p = decode_wire(json); - let g = Graph::build(&p).unwrap(); - assert!(!g.is_chain_step(0)); // root, no parent - assert!(g.is_chain_step(1)); - assert!(g.is_chain_step(2)); - } - - #[test] - fn fork_breaks_chain() { - let json = br#"{ - "version":"0", - "default_image":"ubuntu:24.04", - "steps":[ - {"type":"command","key":"a","cmd":"echo a"}, - {"type":"command","key":"b","cmd":"echo b","builds_in":"a"}, - {"type":"command","key":"c","cmd":"echo c","builds_in":"a"} - ] - }"#; - let p = decode_wire(json); - let g = Graph::build(&p).unwrap(); - assert!(!g.is_chain_step(1)); // sibling exists, must commit a then run separately - assert!(!g.is_chain_step(2)); - } - - #[test] - fn wait_inserts_implicit_deps() { - let json = br#"{ - "version":"0", - "steps":[ - {"type":"command","key":"a","cmd":"echo a"}, - {"type":"command","key":"b","cmd":"echo b"}, - {"type":"wait"}, - {"type":"command","key":"c","cmd":"echo c"} - ] - }"#; - let p = decode_wire(json); - let g = Graph::build(&p).unwrap(); - // c (index 2 since the wait is dropped) should depend on a (0) and b (1). - let c_idx = g.nodes.iter().position(|n| n.step.key == "c").unwrap(); - let a_idx = g.nodes.iter().position(|n| n.step.key == "a").unwrap(); - let b_idx = g.nodes.iter().position(|n| n.step.key == "b").unwrap(); - assert!(g.nodes[c_idx].depends_on.contains(&a_idx)); - assert!(g.nodes[c_idx].depends_on.contains(&b_idx)); - } - - #[test] - fn rejects_unknown_builds_in() { - let json = br#"{ - "version":"0", - "steps":[ - {"type":"command","key":"b","cmd":"echo b","builds_in":"missing"} - ] - }"#; - let p = decode_wire(json); - let err = Graph::build(&p).unwrap_err(); - assert!(err.to_string().contains("missing") || err.to_string().contains("unknown")); - } - - #[test] - fn chains_partition_includes_every_node_once() { - // Pipeline: - // a (root) -> b (chain) -> c (chain) - // -> d (fork) - // e (root, independent) - let json = br#"{ - "version":"0", - "default_image":"ubuntu:24.04", - "steps":[ - {"type":"command","key":"a","cmd":"echo a"}, - {"type":"command","key":"b","cmd":"echo b","builds_in":"a"}, - {"type":"command","key":"c","cmd":"echo c","builds_in":"b"}, - {"type":"command","key":"d","cmd":"echo d","builds_in":"a"}, - {"type":"command","key":"e","cmd":"echo e"} - ] - }"#; - let p = decode_wire(json); - let g = Graph::build(&p).unwrap(); - let idx = |k: &str| g.nodes.iter().position(|n| n.step.key == k).unwrap(); - - // 'a' has two builds_in children, so neither b nor d is a chain - // step relative to a. But b -> c is a single-child chain. - let chains = g.chains(); - let mut all_nodes: Vec = chains.iter().flatten().copied().collect(); - all_nodes.sort_unstable(); - assert_eq!( - all_nodes, - vec![idx("a"), idx("b"), idx("c"), idx("d"), idx("e")] - ); - - // The chain containing 'b' must also contain 'c' in that order. - let bc_chain = chains - .iter() - .find(|ch| ch.contains(&idx("b"))) - .expect("b must be in some chain"); - assert_eq!(bc_chain, &vec![idx("b"), idx("c")]); - - // Every other chain is length-1. - for ch in &chains { - if ch != bc_chain { - assert_eq!(ch.len(), 1, "non-bc chain not singleton: {ch:?}"); - } - } - } - - #[test] - fn chain_deps_aggregates_cross_chain_edges() { - // Pipeline: - // a -> b (chain) -> c (chain) - // a -> d (fork) - // e (independent) - // Chains: [a], [b,c], [d], [e] - // chain_deps: [a] none, [b,c] {[a]}, [d] {[a]}, [e] none - let json = br#"{ - "version":"0", - "steps":[ - {"type":"command","key":"a","cmd":"echo a"}, - {"type":"command","key":"b","cmd":"echo b","builds_in":"a"}, - {"type":"command","key":"c","cmd":"echo c","builds_in":"b"}, - {"type":"command","key":"d","cmd":"echo d","builds_in":"a"}, - {"type":"command","key":"e","cmd":"echo e"} - ] - }"#; - let p = decode_wire(json); - let g = Graph::build(&p).unwrap(); - let chains = g.chains(); - let deps = g.chain_deps(&chains); - - let find_chain = |key: &str| -> usize { - let idx = g.nodes.iter().position(|n| n.step.key == key).unwrap(); - chains.iter().position(|ch| ch.contains(&idx)).unwrap() - }; - let a_ci = find_chain("a"); - let bc_ci = find_chain("b"); - let d_ci = find_chain("d"); - let e_ci = find_chain("e"); - - assert!( - deps[a_ci].is_empty(), - "chain a has no deps: {:?}", - deps[a_ci] - ); - assert_eq!(deps[bc_ci], vec![a_ci]); - assert_eq!(deps[d_ci], vec![a_ci]); - assert!(deps[e_ci].is_empty()); - } - - #[test] - fn chain_deps_subsumes_wait_barriers() { - let json = br#"{ - "version":"0", - "steps":[ - {"type":"command","key":"a","cmd":"echo a"}, - {"type":"command","key":"b","cmd":"echo b"}, - {"type":"wait"}, - {"type":"command","key":"c","cmd":"echo c"} - ] - }"#; - let p = decode_wire(json); - let g = Graph::build(&p).unwrap(); - let chains = g.chains(); - let deps = g.chain_deps(&chains); - let find_chain = |key: &str| -> usize { - let idx = g.nodes.iter().position(|n| n.step.key == key).unwrap(); - chains.iter().position(|ch| ch.contains(&idx)).unwrap() - }; - let a_ci = find_chain("a"); - let b_ci = find_chain("b"); - let c_ci = find_chain("c"); - let mut c_deps = deps[c_ci].clone(); - c_deps.sort_unstable(); - let mut want = vec![a_ci, b_ci]; - want.sort_unstable(); - assert_eq!(c_deps, want); - } - - #[test] - fn chains_root_is_never_a_chain_step() { - let json = br#"{ - "version":"0", - "steps":[ - {"type":"command","key":"a","cmd":"echo a"}, - {"type":"command","key":"b","cmd":"echo b","builds_in":"a"} - ] - }"#; - let p = decode_wire(json); - let g = Graph::build(&p).unwrap(); - for chain in g.chains() { - let root = chain[0]; - assert!(!g.is_chain_step(root), "chain root {root} is a chain step"); - for &step in &chain[1..] { - assert!(g.is_chain_step(step), "non-root {step} is not a chain step"); - } - } - } -} +pub use hm_pipeline_ir::graph::{EdgeKind, NodeWeight, PipelineGraph as Graph}; diff --git a/crates/hm/src/orchestrator/scheduler.rs b/crates/hm/src/orchestrator/scheduler.rs index d65d916..4835805 100644 --- a/crates/hm/src/orchestrator/scheduler.rs +++ b/crates/hm/src/orchestrator/scheduler.rs @@ -29,6 +29,8 @@ use std::path::PathBuf; use std::sync::Arc; use std::time::Instant; +use daggy::NodeIndex; + use anyhow::{Context, Result}; use hm_plugin_protocol::{ ArchiveId, BuildEvent, ExecutorInput, PlanSummary, SnapshotRef, StepResult, @@ -156,7 +158,7 @@ pub async fn run( // its `committed_snapshot` under its node index. A fork-child // chain looks up its `builds_in` parent here to know what base // image to boot from. Mirrors legacy `SharedState::node_image`. - let node_image: Arc>> = Arc::new(Mutex::new(HashMap::new())); + let node_image: Arc>> = Arc::new(Mutex::new(HashMap::new())); // Spawn the output subscriber. Dispatches every BuildEvent to the // selected output-formatter plugin (default: `human`). @@ -166,7 +168,7 @@ pub async fn run( // Announce build start. let started_at = chrono::Utc::now(); let plan_summary = PlanSummary { - step_count: graph.nodes.len(), + step_count: graph.node_count(), chain_count: chains.len(), default_runner: "docker".into(), }; @@ -287,13 +289,13 @@ pub async fn run( async fn run_chain( chain_idx: usize, graph: &Graph, - chain_nodes: &[usize], + chain_nodes: &[NodeIndex], archive_id: ArchiveId, run_id: Uuid, registry: &Arc>, bus: &Arc, cancel: &CancellationToken, - node_image: &Arc>>, + node_image: &Arc>>, ) -> Result { // Seed from the cross-chain lineage map: if this chain's root has // a `builds_in` parent that already committed a snapshot, boot @@ -302,8 +304,7 @@ async fn run_chain( let chain_root = chain_nodes[0]; let mut parent_snapshot: Option = { let g = node_image.lock().await; - graph.nodes[chain_root] - .builds_in + graph.builds_in_parent(chain_root) .and_then(|p| g.get(&p).cloned()) }; @@ -311,12 +312,12 @@ async fn run_chain( if cancel.is_cancelled() { return Ok(0); } - let step_wire = graph.nodes[i].step.clone(); + let step_wire = graph.node_weight(i).step.clone(); // Keep a copy of the step key for diagnostics — `step_wire` is // moved into `ExecutorInput` below. let step_key = step_wire.key.clone(); let env_map: std::collections::BTreeMap = - graph.nodes[i].env.clone().into_iter().collect(); + graph.node_weight(i).env.clone(); let step_id = Uuid::new_v4(); bus.emit(BuildEvent::StepQueued { From 41b378d1aa697b3465eb36ae7981dfea8b54899d Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 16:03:18 -0700 Subject: [PATCH 16/40] test: update integration tests for PipelineGraph API --- crates/hm/tests/default_image_inheritance.rs | 13 ++++++++----- crates/hm/tests/runner_dispatch.rs | 5 +++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/crates/hm/tests/default_image_inheritance.rs b/crates/hm/tests/default_image_inheritance.rs index 0d845a3..5c53dbb 100644 --- a/crates/hm/tests/default_image_inheritance.rs +++ b/crates/hm/tests/default_image_inheritance.rs @@ -30,8 +30,9 @@ fn root_step_inherits_default_image() { ] }"#); let g = Graph::build(&p).expect("build graph"); + let idx = g.node_index_by_key("apt-base").unwrap(); assert_eq!( - g.nodes[0].step.image.as_deref(), + g.node_weight(idx).step.image.as_deref(), Some("ubuntu:24.04"), "root step must inherit pipeline default_image" ); @@ -48,8 +49,9 @@ fn root_step_explicit_image_wins() { ] }"#); let g = Graph::build(&p).expect("build graph"); + let idx = g.node_index_by_key("rust").unwrap(); assert_eq!( - g.nodes[0].step.image.as_deref(), + g.node_weight(idx).step.image.as_deref(), Some("rust:1.82"), "explicit per-step image must override default_image" ); @@ -70,9 +72,9 @@ fn child_step_unchanged_by_default_image() { ] }"#); let g = Graph::build(&p).expect("build graph"); - let child = g.nodes.iter().find(|n| n.step.key == "child").unwrap(); + let idx = g.node_index_by_key("child").unwrap(); assert!( - child.step.image.is_none(), + g.node_weight(idx).step.image.is_none(), "child step must not inherit default_image — chain steps boot from parent snapshot", ); } @@ -86,8 +88,9 @@ fn no_default_image_leaves_root_alone() { ] }"#); let g = Graph::build(&p).expect("build graph"); + let idx = g.node_index_by_key("k").unwrap(); assert!( - g.nodes[0].step.image.is_none(), + g.node_weight(idx).step.image.is_none(), "absent default_image must not synthesize an image" ); } diff --git a/crates/hm/tests/runner_dispatch.rs b/crates/hm/tests/runner_dispatch.rs index e475b8c..8d2eb66 100644 --- a/crates/hm/tests/runner_dispatch.rs +++ b/crates/hm/tests/runner_dispatch.rs @@ -83,8 +83,9 @@ async fn runner_field_dispatches_to_named_plugin() { // Sanity check: the graph must preserve `runner` from the IR. // This is the cheap fast-fail; the dispatch check below is the // load-bearing one. + let first = graph.node_index_by_key("fs-step").unwrap(); assert_eq!( - graph.nodes[0].step.runner.as_deref(), + graph.node_weight(first).step.runner.as_deref(), Some("freestyle"), "graph dropped `runner` field — A3's wire-type fix has regressed" ); @@ -92,7 +93,7 @@ async fn runner_field_dispatches_to_named_plugin() { // 3. Build the executor input exactly as the scheduler does // (orchestrator/scheduler.rs::run_chain). Cloning the wire // step preserves `runner` and `runner_args` verbatim. - let step_wire = graph.nodes[0].step.clone(); + let step_wire = graph.node_weight(first).step.clone(); let input = ExecutorInput { step: step_wire, workspace_archive_id: ArchiveId(Uuid::nil()), From 85eacc33ea614cd0af6878cb57e985c09025ebf5 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 16:38:07 -0700 Subject: [PATCH 17/40] chore: enable daggy serde-1 feature --- Cargo.lock | 3 +++ Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 214e39c..5a971fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -897,6 +897,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "804169db156b21258a2545757336922d93dfa229892c75911a0ad141aa0ff241" dependencies = [ "petgraph 0.8.3", + "serde", ] [[package]] @@ -2659,6 +2660,8 @@ dependencies = [ "fixedbitset 0.5.7", "hashbrown 0.15.5", "indexmap 2.14.0", + "serde", + "serde_derive", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 58e8eca..d4be031 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,7 @@ hm-pipeline-ir = { path = "crates/hm-pipeline-ir", version = "0.0.0-dev" hm-plugin-sdk = { path = "crates/hm-plugin-sdk", version = "0.0.0-dev" } hm-util = { path = "crates/hm-util", version = "0.0.0-dev" } anyhow = "1" -daggy = "0.9" +daggy = { version = "0.9", features = ["serde-1"] } serde = { version = "1", features = ["derive"] } serde_json = "1" schemars = { version = "0.8", features = ["preserve_order", "semver", "uuid1", "chrono"] } From bde8b647e5cd68769debde72b6065c25e936a626 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 16:39:49 -0700 Subject: [PATCH 18/40] feat: add Serialize/Deserialize to NodeWeight and EdgeKind --- crates/hm-pipeline-ir/src/graph.rs | 7 +++- crates/hm-pipeline-ir/tests/graph_serde.rs | 49 ++++++++++++++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 crates/hm-pipeline-ir/tests/graph_serde.rs diff --git a/crates/hm-pipeline-ir/src/graph.rs b/crates/hm-pipeline-ir/src/graph.rs index 1b4e2d5..a9c3031 100644 --- a/crates/hm-pipeline-ir/src/graph.rs +++ b/crates/hm-pipeline-ir/src/graph.rs @@ -4,15 +4,18 @@ use anyhow::{Context, Result}; use daggy::petgraph::visit::IntoNodeReferences; use daggy::{Dag, NodeIndex, Walker}; +use serde::{Deserialize, Serialize}; + use crate::{CommandStep, Pipeline, Step}; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct NodeWeight { pub step: CommandStep, pub env: BTreeMap, } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] pub enum EdgeKind { BuildsIn, DependsOn, diff --git a/crates/hm-pipeline-ir/tests/graph_serde.rs b/crates/hm-pipeline-ir/tests/graph_serde.rs new file mode 100644 index 0000000..769488d --- /dev/null +++ b/crates/hm-pipeline-ir/tests/graph_serde.rs @@ -0,0 +1,49 @@ +#![allow( + clippy::cargo_common_metadata, + clippy::multiple_crate_versions, + clippy::unwrap_used, + clippy::expect_used, + clippy::panic +)] + +use std::collections::BTreeMap; + +use hm_pipeline_ir::graph::{EdgeKind, NodeWeight}; +use hm_pipeline_ir::CommandStep; + +#[test] +fn node_weight_round_trips() { + let nw = NodeWeight { + step: CommandStep { + key: "a".into(), + label: Some("step A".into()), + cmd: "echo a".into(), + builds_in: None, + image: Some("ubuntu:24.04".into()), + env: None, + timeout_seconds: None, + cache: None, + runner: None, + runner_args: None, + }, + env: BTreeMap::from([("FOO".into(), "bar".into())]), + }; + let json = serde_json::to_string(&nw).unwrap(); + let back: NodeWeight = serde_json::from_str(&json).unwrap(); + assert_eq!(back.step.key, "a"); + assert_eq!(back.env.get("FOO").unwrap(), "bar"); +} + +#[test] +fn edge_kind_serializes_as_snake_case() { + assert_eq!(serde_json::to_string(&EdgeKind::BuildsIn).unwrap(), "\"builds_in\""); + assert_eq!(serde_json::to_string(&EdgeKind::DependsOn).unwrap(), "\"depends_on\""); +} + +#[test] +fn edge_kind_round_trips() { + let bi: EdgeKind = serde_json::from_str("\"builds_in\"").unwrap(); + assert_eq!(bi, EdgeKind::BuildsIn); + let dep: EdgeKind = serde_json::from_str("\"depends_on\"").unwrap(); + assert_eq!(dep, EdgeKind::DependsOn); +} From 84cf7d4ff40f7892a52472063c8cf637dbe8a02a Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 16:41:41 -0700 Subject: [PATCH 19/40] feat: make PipelineGraph serializable as wire type --- crates/hm-pipeline-ir/src/graph.rs | 13 +++- crates/hm-pipeline-ir/tests/graph_serde.rs | 36 +++++++++++ .../graph_serde__pipeline_graph_snapshot.snap | 60 +++++++++++++++++++ 3 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 crates/hm-pipeline-ir/tests/snapshots/graph_serde__pipeline_graph_snapshot.snap diff --git a/crates/hm-pipeline-ir/src/graph.rs b/crates/hm-pipeline-ir/src/graph.rs index a9c3031..c313e0f 100644 --- a/crates/hm-pipeline-ir/src/graph.rs +++ b/crates/hm-pipeline-ir/src/graph.rs @@ -21,10 +21,18 @@ pub enum EdgeKind { DependsOn, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct PipelineGraph { - dag: Dag, + #[serde(default = "default_version")] + version: String, + #[serde(default, skip_serializing_if = "Option::is_none")] default_image: Option, + #[serde(rename = "graph")] + dag: Dag, +} + +fn default_version() -> String { + "0".to_string() } struct FlatStep { @@ -100,6 +108,7 @@ impl PipelineGraph { } Ok(Self { + version: "0".to_string(), dag, default_image: pipeline.default_image.clone(), }) diff --git a/crates/hm-pipeline-ir/tests/graph_serde.rs b/crates/hm-pipeline-ir/tests/graph_serde.rs index 769488d..81f61b2 100644 --- a/crates/hm-pipeline-ir/tests/graph_serde.rs +++ b/crates/hm-pipeline-ir/tests/graph_serde.rs @@ -47,3 +47,39 @@ fn edge_kind_round_trips() { let dep: EdgeKind = serde_json::from_str("\"depends_on\"").unwrap(); assert_eq!(dep, EdgeKind::DependsOn); } + +use hm_pipeline_ir::graph::PipelineGraph; +use hm_pipeline_ir::Pipeline; + +fn build_test_graph() -> PipelineGraph { + let p: Pipeline = serde_json::from_value(serde_json::json!({ + "version": "0", + "default_image": "ubuntu:24.04", + "steps": [ + {"type": "command", "key": "a", "cmd": "echo a"}, + {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "a"}, + {"type": "command", "key": "c", "cmd": "echo c"} + ] + })).unwrap(); + PipelineGraph::build(&p).unwrap() +} + +#[test] +fn pipeline_graph_round_trips_through_json() { + let g = build_test_graph(); + let json = serde_json::to_string_pretty(&g).unwrap(); + let back: PipelineGraph = serde_json::from_str(&json).unwrap(); + assert_eq!(back.node_count(), 3); + assert_eq!(back.default_image(), Some("ubuntu:24.04")); + let a = back.node_index_by_key("a").unwrap(); + assert_eq!(back.node_weight(a).step.image.as_deref(), Some("ubuntu:24.04")); + let b = back.node_index_by_key("b").unwrap(); + assert!(back.builds_in_parent(b).is_some()); +} + +#[test] +fn pipeline_graph_snapshot() { + let g = build_test_graph(); + let json = serde_json::to_value(&g).unwrap(); + insta::assert_json_snapshot!(json); +} diff --git a/crates/hm-pipeline-ir/tests/snapshots/graph_serde__pipeline_graph_snapshot.snap b/crates/hm-pipeline-ir/tests/snapshots/graph_serde__pipeline_graph_snapshot.snap new file mode 100644 index 0000000..7d8214b --- /dev/null +++ b/crates/hm-pipeline-ir/tests/snapshots/graph_serde__pipeline_graph_snapshot.snap @@ -0,0 +1,60 @@ +--- +source: crates/hm-pipeline-ir/tests/graph_serde.rs +expression: json +--- +{ + "default_image": "ubuntu:24.04", + "graph": { + "edge_property": "directed", + "edges": [ + [ + 0, + 1, + "builds_in" + ] + ], + "node_holes": [], + "nodes": [ + { + "env": {}, + "step": { + "builds_in": null, + "cache": null, + "cmd": "echo a", + "env": null, + "image": "ubuntu:24.04", + "key": "a", + "label": null, + "timeout_seconds": null + } + }, + { + "env": {}, + "step": { + "builds_in": "a", + "cache": null, + "cmd": "echo b", + "env": null, + "image": null, + "key": "b", + "label": null, + "timeout_seconds": null + } + }, + { + "env": {}, + "step": { + "builds_in": null, + "cache": null, + "cmd": "echo c", + "env": null, + "image": "ubuntu:24.04", + "key": "c", + "label": null, + "timeout_seconds": null + } + } + ] + }, + "version": "0" +} From b953bb19caa774211ee159dae125bdbc05420fa3 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 16:46:21 -0700 Subject: [PATCH 20/40] refactor: replace flat Pipeline wire format with petgraph-serde graph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete Pipeline, Step, WaitStep types and the PipelineGraph::build() constructor. The JSON wire format is now the petgraph-serde graph directly — scheduler receives a pre-deserialized PipelineGraph instead of building one from a flat step list. CommandStep loses its builds_in field (edges carry that information). The anyhow dependency is removed from hm-pipeline-ir since nothing uses it anymore. Test files in hm-pipeline-ir (graph_build, parse_pipeline, schema_snapshot) will not compile until their fixtures are rewritten to the new graph format in the next task. --- Cargo.lock | 1 - crates/hm-pipeline-ir/Cargo.toml | 1 - crates/hm-pipeline-ir/src/graph.rs | 100 +----------------- crates/hm-pipeline-ir/src/lib.rs | 41 +------ crates/hm-pipeline-ir/tests/graph_serde.rs | 23 ++-- crates/hm-plugin-docker/src/image_name.rs | 1 - crates/hm-plugin-protocol/src/ir.rs | 2 +- crates/hm-plugin-protocol/src/lib.rs | 2 +- crates/hm-plugin-protocol/tests/round_trip.rs | 1 - crates/hm/src/commands/run/local.rs | 6 +- crates/hm/src/orchestrator/cache.rs | 1 - crates/hm/src/orchestrator/scheduler.rs | 4 +- crates/hm/tests/default_image_inheritance.rs | 63 ++++++----- crates/hm/tests/plugin_registry.rs | 1 - crates/hm/tests/runner_dispatch.rs | 29 ++--- 15 files changed, 77 insertions(+), 199 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5a971fe..79fd041 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1652,7 +1652,6 @@ dependencies = [ name = "hm-pipeline-ir" version = "0.0.0-dev" dependencies = [ - "anyhow", "daggy", "insta", "schemars 0.8.22", diff --git a/crates/hm-pipeline-ir/Cargo.toml b/crates/hm-pipeline-ir/Cargo.toml index 4abaf1c..2d554b6 100644 --- a/crates/hm-pipeline-ir/Cargo.toml +++ b/crates/hm-pipeline-ir/Cargo.toml @@ -7,7 +7,6 @@ repository.workspace = true description = "Pipeline IR — the v0 wire-format schema consumed by hm." [dependencies] -anyhow = { workspace = true } daggy = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } diff --git a/crates/hm-pipeline-ir/src/graph.rs b/crates/hm-pipeline-ir/src/graph.rs index c313e0f..225830d 100644 --- a/crates/hm-pipeline-ir/src/graph.rs +++ b/crates/hm-pipeline-ir/src/graph.rs @@ -1,12 +1,11 @@ use std::collections::BTreeMap; -use anyhow::{Context, Result}; use daggy::petgraph::visit::IntoNodeReferences; use daggy::{Dag, NodeIndex, Walker}; use serde::{Deserialize, Serialize}; -use crate::{CommandStep, Pipeline, Step}; +use crate::CommandStep; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct NodeWeight { @@ -35,85 +34,7 @@ fn default_version() -> String { "0".to_string() } -struct FlatStep { - step: CommandStep, - extra_deps: Vec, -} - impl PipelineGraph { - pub fn build(pipeline: &Pipeline) -> Result { - let flat = flatten_steps(&pipeline.steps); - let pipeline_env = pipeline.env.clone().unwrap_or_default(); - - let mut dag: Dag = Dag::new(); - let mut key_to_idx: BTreeMap = BTreeMap::new(); - - for f in &flat { - let mut env = pipeline_env.clone(); - if let Some(e) = &f.step.env { - env.extend(e.clone()); - } - let idx = dag.add_node(NodeWeight { - step: f.step.clone(), - env, - }); - key_to_idx.insert(f.step.key.clone(), idx); - } - - for f in &flat { - let child = key_to_idx[&f.step.key]; - - if let Some(parent_key) = &f.step.builds_in { - let parent = *key_to_idx.get(parent_key).ok_or_else(|| { - anyhow::anyhow!( - "step '{}' builds_in references unknown step '{}'", - f.step.key, - parent_key - ) - })?; - dag.add_edge(parent, child, EdgeKind::BuildsIn) - .context("cycle detected adding builds_in edge")?; - } - - for dep_key in &f.extra_deps { - let parent = *key_to_idx.get(dep_key).ok_or_else(|| { - anyhow::anyhow!( - "step '{}' has wait-barrier dep on unknown step '{}'", - f.step.key, - dep_key - ) - })?; - if f.step.builds_in.as_deref() == Some(dep_key) { - continue; - } - dag.add_edge(parent, child, EdgeKind::DependsOn) - .context("cycle detected adding wait-barrier edge")?; - } - } - - if let Some(default_img) = pipeline.default_image.as_deref() { - for idx in dag.graph().node_indices() { - let has_builds_in_parent = dag - .parents(idx) - .iter(&dag) - .any(|(e, _)| dag.edge_weight(e).copied() == Some(EdgeKind::BuildsIn)); - if !has_builds_in_parent { - if let Some(w) = dag.node_weight_mut(idx) { - if w.step.image.is_none() { - w.step.image = Some(default_img.to_string()); - } - } - } - } - } - - Ok(Self { - version: "0".to_string(), - dag, - default_image: pipeline.default_image.clone(), - }) - } - #[must_use] pub fn node_count(&self) -> usize { self.dag.node_count() @@ -237,22 +158,3 @@ impl PipelineGraph { self.dag.graph().node_indices() } } - -fn flatten_steps(steps: &[Step]) -> Vec { - let mut out: Vec = Vec::new(); - let mut implicit_wait_targets: Vec = Vec::new(); - for s in steps { - match s { - Step::Command(c) => { - out.push(FlatStep { - step: (**c).clone(), - extra_deps: implicit_wait_targets.clone(), - }); - } - Step::Wait(_) => { - implicit_wait_targets = out.iter().map(|f| f.step.key.clone()).collect(); - } - } - } - out -} diff --git a/crates/hm-pipeline-ir/src/lib.rs b/crates/hm-pipeline-ir/src/lib.rs index 1d02054..8560326 100644 --- a/crates/hm-pipeline-ir/src/lib.rs +++ b/crates/hm-pipeline-ir/src/lib.rs @@ -1,10 +1,8 @@ //! Pipeline IR, the v0 wire format consumed by the `hm` binary. //! -//! Source of truth lives in two other places that must stay in sync -//! with this file: `harmont-pipeline/src/Harmont/Pipeline/Schema.hs` -//! (Haskell mirror) and `cidsl/py/harmont/__init__.py` (Python emitter). -//! Changing a field name here means changing it in both other places -//! in the same PR. +//! The wire format is a petgraph-serde graph. Nodes carry +//! `CommandStep` + resolved env; edges are `EdgeKind` (`BuildsIn` or +//! `DependsOn`). See `graph::PipelineGraph` for the top-level type. #![forbid(unsafe_code)] #![allow(clippy::multiple_crate_versions, clippy::cargo_common_metadata)] @@ -14,26 +12,6 @@ use std::collections::BTreeMap; use schemars::JsonSchema as DeriveJsonSchema; use serde::{Deserialize, Serialize}; -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] -pub struct Pipeline { - /// Must equal `"0"` — bumping this is reserved for breaking - /// schema changes, none of which are scheduled. The v0 schema - /// gains optional fields in-place (see `runner` below). - pub version: String, - #[serde(default)] - pub env: Option>, - #[serde(default)] - pub default_image: Option, - pub steps: Vec, -} - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum Step { - Command(Box), - Wait(WaitStep), -} - #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] pub struct CommandStep { pub key: String, @@ -41,8 +19,6 @@ pub struct CommandStep { pub label: Option, pub cmd: String, #[serde(default)] - pub builds_in: Option, - #[serde(default)] pub image: Option, #[serde(default)] pub env: Option>, @@ -50,23 +26,12 @@ pub struct CommandStep { pub timeout_seconds: Option, #[serde(default)] pub cache: Option, - /// Names the step-executor plugin that should run this step. - /// `None` ⇒ the default executor handles it (Docker, in the - /// shipped configuration). #[serde(default, skip_serializing_if = "Option::is_none")] pub runner: Option, - /// Plugin-specific extra fields. Validated by the executor - /// plugin's `StepExecutorSpec::step_schema` if it set one. #[serde(default, skip_serializing_if = "Option::is_none")] pub runner_args: Option, } -#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] -pub struct WaitStep { - #[serde(default)] - pub continue_on_failure: bool, -} - #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] pub struct Cache { pub policy: String, diff --git a/crates/hm-pipeline-ir/tests/graph_serde.rs b/crates/hm-pipeline-ir/tests/graph_serde.rs index 81f61b2..b2acb9d 100644 --- a/crates/hm-pipeline-ir/tests/graph_serde.rs +++ b/crates/hm-pipeline-ir/tests/graph_serde.rs @@ -18,7 +18,6 @@ fn node_weight_round_trips() { key: "a".into(), label: Some("step A".into()), cmd: "echo a".into(), - builds_in: None, image: Some("ubuntu:24.04".into()), env: None, timeout_seconds: None, @@ -49,19 +48,23 @@ fn edge_kind_round_trips() { } use hm_pipeline_ir::graph::PipelineGraph; -use hm_pipeline_ir::Pipeline; fn build_test_graph() -> PipelineGraph { - let p: Pipeline = serde_json::from_value(serde_json::json!({ + serde_json::from_value(serde_json::json!({ "version": "0", "default_image": "ubuntu:24.04", - "steps": [ - {"type": "command", "key": "a", "cmd": "echo a"}, - {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "a"}, - {"type": "command", "key": "c", "cmd": "echo c"} - ] - })).unwrap(); - PipelineGraph::build(&p).unwrap() + "graph": { + "nodes": [ + {"step": {"key": "a", "cmd": "echo a", "image": "ubuntu:24.04"}, "env": {}}, + {"step": {"key": "b", "cmd": "echo b"}, "env": {}}, + {"step": {"key": "c", "cmd": "echo c", "image": "ubuntu:24.04"}, "env": {}} + ], + "edge_property": "directed", + "edges": [ + [0, 1, "builds_in"] + ] + } + })).unwrap() } #[test] diff --git a/crates/hm-plugin-docker/src/image_name.rs b/crates/hm-plugin-docker/src/image_name.rs index f48c58e..1d4d98a 100644 --- a/crates/hm-plugin-docker/src/image_name.rs +++ b/crates/hm-plugin-docker/src/image_name.rs @@ -45,7 +45,6 @@ mod tests { key: "k".into(), label: None, cmd: "true".into(), - builds_in: None, image: image.map(String::from), env: None, timeout_seconds: None, diff --git a/crates/hm-plugin-protocol/src/ir.rs b/crates/hm-plugin-protocol/src/ir.rs index 44c1e3d..727be45 100644 --- a/crates/hm-plugin-protocol/src/ir.rs +++ b/crates/hm-plugin-protocol/src/ir.rs @@ -2,4 +2,4 @@ //! live in the `hm-pipeline-ir` crate; this module keeps the //! `hm_plugin_protocol::ir::*` import path working. -pub use hm_pipeline_ir::{Cache, CommandStep, Pipeline, Step, WaitStep}; +pub use hm_pipeline_ir::{Cache, CommandStep}; diff --git a/crates/hm-plugin-protocol/src/lib.rs b/crates/hm-plugin-protocol/src/lib.rs index 057977c..b755825 100644 --- a/crates/hm-plugin-protocol/src/lib.rs +++ b/crates/hm-plugin-protocol/src/lib.rs @@ -29,7 +29,7 @@ pub use host_abi::{ DockerStartArgs, KeyringArgs, KeyringSetArgs, KvScope, Level, LoopbackHandle, LoopbackRecvArgs, SocketHandle, SocketReadArgs, SocketWriteArgs, TtyConfirmArgs, TtyPromptArgs, }; -pub use ir::{Cache, CommandStep, Pipeline, Step, WaitStep}; +pub use ir::{Cache, CommandStep}; pub use manifest::{ Capability, ClapJson, JsonSchema, LifecycleHookSpec, OutputFormatterSpec, PluginManifest, StepExecutorSpec, SubcommandSpec, diff --git a/crates/hm-plugin-protocol/tests/round_trip.rs b/crates/hm-plugin-protocol/tests/round_trip.rs index 890564c..a871bc5 100644 --- a/crates/hm-plugin-protocol/tests/round_trip.rs +++ b/crates/hm-plugin-protocol/tests/round_trip.rs @@ -50,7 +50,6 @@ fn executor_input_round_trip() { key: "build".into(), label: None, cmd: "cargo build".into(), - builds_in: None, image: Some("rust:1.82".into()), env: None, timeout_seconds: None, diff --git a/crates/hm/src/commands/run/local.rs b/crates/hm/src/commands/run/local.rs index d05cb28..88e76e8 100644 --- a/crates/hm/src/commands/run/local.rs +++ b/crates/hm/src/commands/run/local.rs @@ -46,7 +46,7 @@ pub async fn run_pipeline_v0_one_shot( )) } -fn decode_plan_to_wire(bytes: &[u8]) -> anyhow::Result { +fn decode_plan_to_wire(bytes: &[u8]) -> anyhow::Result { serde_json::from_slice(bytes).map_err(|e| anyhow::anyhow!("decode pipeline JSON: {e}")) } @@ -91,12 +91,12 @@ pub async fn handle(args: RunArgs, _ctx: RunContext) -> Result { } let json = render_pipeline_json(&tools, &repo_root, &slug).await?; - let pipeline_wire = decode_plan_to_wire(&json)?; + let graph = decode_plan_to_wire(&json)?; let parallelism = args.parallelism.unwrap_or_else(|| { std::thread::available_parallelism().map_or(4, std::num::NonZeroUsize::get) }); let exit_code = - crate::orchestrator::run(pipeline_wire, repo_root, parallelism, args.format.clone()) + crate::orchestrator::run(graph, repo_root, parallelism, args.format.clone()) .await?; Ok(exit_code) } diff --git a/crates/hm/src/orchestrator/cache.rs b/crates/hm/src/orchestrator/cache.rs index 2f1bdf2..c2eddec 100644 --- a/crates/hm/src/orchestrator/cache.rs +++ b/crates/hm/src/orchestrator/cache.rs @@ -80,7 +80,6 @@ mod tests { key: "build".into(), label: None, cmd: "true".into(), - builds_in: None, image: None, env: None, timeout_seconds: None, diff --git a/crates/hm/src/orchestrator/scheduler.rs b/crates/hm/src/orchestrator/scheduler.rs index 4835805..1f7c7a1 100644 --- a/crates/hm/src/orchestrator/scheduler.rs +++ b/crates/hm/src/orchestrator/scheduler.rs @@ -60,13 +60,11 @@ use super::state::{self, OrchestratorState}; /// scheduler-level failure occurs. Non-zero step exit codes are /// surfaced via the returned `i32`, not as an Err. pub async fn run( - pipeline: hm_plugin_protocol::Pipeline, + graph: crate::orchestrator::graph::Graph, repo_root: PathBuf, parallelism: usize, format_name: String, ) -> Result { - // Build graph + chains directly from the wire-typed pipeline. - let graph = Graph::build(&pipeline).context("build graph")?; let chains = graph.chains(); let chain_deps = graph.chain_deps(&chains); diff --git a/crates/hm/tests/default_image_inheritance.rs b/crates/hm/tests/default_image_inheritance.rs index 5c53dbb..ba7438f 100644 --- a/crates/hm/tests/default_image_inheritance.rs +++ b/crates/hm/tests/default_image_inheritance.rs @@ -14,22 +14,24 @@ )] use harmont_cli::orchestrator::graph::Graph; -use hm_plugin_protocol::Pipeline; -fn decode(json: &[u8]) -> Pipeline { - serde_json::from_slice::(json).unwrap() +fn decode(json: &[u8]) -> Graph { + serde_json::from_slice::(json).unwrap() } #[test] fn root_step_inherits_default_image() { - let p = decode(br#"{ + let g = decode(br#"{ "version": "0", "default_image": "ubuntu:24.04", - "steps": [ - {"type": "command", "key": "apt-base", "cmd": "apt-get update"} - ] + "graph": { + "nodes": [ + {"step": {"key": "apt-base", "cmd": "apt-get update", "image": "ubuntu:24.04"}, "env": {}} + ], + "edge_property": "directed", + "edges": [] + } }"#); - let g = Graph::build(&p).expect("build graph"); let idx = g.node_index_by_key("apt-base").unwrap(); assert_eq!( g.node_weight(idx).step.image.as_deref(), @@ -40,15 +42,17 @@ fn root_step_inherits_default_image() { #[test] fn root_step_explicit_image_wins() { - let p = decode(br#"{ + let g = decode(br#"{ "version": "0", "default_image": "ubuntu:24.04", - "steps": [ - {"type": "command", "key": "rust", "cmd": "cargo build", - "image": "rust:1.82"} - ] + "graph": { + "nodes": [ + {"step": {"key": "rust", "cmd": "cargo build", "image": "rust:1.82"}, "env": {}} + ], + "edge_property": "directed", + "edges": [] + } }"#); - let g = Graph::build(&p).expect("build graph"); let idx = g.node_index_by_key("rust").unwrap(); assert_eq!( g.node_weight(idx).step.image.as_deref(), @@ -62,16 +66,20 @@ fn child_step_unchanged_by_default_image() { // Children boot from the parent's committed snapshot at runtime, // not from an image tag — leaving their image=None is the correct // wire state for chain steps. - let p = decode(br#"{ + let g = decode(br#"{ "version": "0", "default_image": "ubuntu:24.04", - "steps": [ - {"type": "command", "key": "parent", "cmd": "echo p"}, - {"type": "command", "key": "child", "cmd": "echo c", - "builds_in": "parent"} - ] + "graph": { + "nodes": [ + {"step": {"key": "parent", "cmd": "echo p", "image": "ubuntu:24.04"}, "env": {}}, + {"step": {"key": "child", "cmd": "echo c"}, "env": {}} + ], + "edge_property": "directed", + "edges": [ + [0, 1, "builds_in"] + ] + } }"#); - let g = Graph::build(&p).expect("build graph"); let idx = g.node_index_by_key("child").unwrap(); assert!( g.node_weight(idx).step.image.is_none(), @@ -81,13 +89,16 @@ fn child_step_unchanged_by_default_image() { #[test] fn no_default_image_leaves_root_alone() { - let p = decode(br#"{ + let g = decode(br#"{ "version": "0", - "steps": [ - {"type": "command", "key": "k", "cmd": "true"} - ] + "graph": { + "nodes": [ + {"step": {"key": "k", "cmd": "true"}, "env": {}} + ], + "edge_property": "directed", + "edges": [] + } }"#); - let g = Graph::build(&p).expect("build graph"); let idx = g.node_index_by_key("k").unwrap(); assert!( g.node_weight(idx).step.image.is_none(), diff --git a/crates/hm/tests/plugin_registry.rs b/crates/hm/tests/plugin_registry.rs index 1f8b146..635ba4e 100644 --- a/crates/hm/tests/plugin_registry.rs +++ b/crates/hm/tests/plugin_registry.rs @@ -80,7 +80,6 @@ async fn dispatches_step_executor() { key: "build".into(), label: None, cmd: "true".into(), - builds_in: None, image: None, env: None, timeout_seconds: None, diff --git a/crates/hm/tests/runner_dispatch.rs b/crates/hm/tests/runner_dispatch.rs index 8d2eb66..99401cc 100644 --- a/crates/hm/tests/runner_dispatch.rs +++ b/crates/hm/tests/runner_dispatch.rs @@ -39,19 +39,25 @@ use std::collections::BTreeMap; use common::fixtures; use harmont_cli::orchestrator::graph::Graph; use harmont_cli::plugin::{PluginRegistry, RegistryConfig}; -use hm_plugin_protocol::{ArchiveId, CacheDecision, ExecutorInput, Pipeline, StepResult}; +use hm_plugin_protocol::{ArchiveId, CacheDecision, ExecutorInput, StepResult}; use uuid::Uuid; const PIPELINE_JSON: &[u8] = br#"{ "version": "0", - "steps": [ - { - "type": "command", - "key": "fs-step", - "cmd": "irrelevant; fixture ignores cmd", - "runner": "freestyle" - } - ] + "graph": { + "nodes": [ + { + "step": { + "key": "fs-step", + "cmd": "irrelevant; fixture ignores cmd", + "runner": "freestyle" + }, + "env": {} + } + ], + "edge_property": "directed", + "edges": [] + } }"#; #[tokio::test(flavor = "multi_thread")] @@ -76,9 +82,8 @@ async fn runner_field_dispatches_to_named_plugin() { }) .expect("load registry"); - // 2. Parse the IR and build the graph — the conversion under test. - let pipeline: Pipeline = serde_json::from_slice(PIPELINE_JSON).expect("parse pipeline"); - let graph = Graph::build(&pipeline).expect("build graph"); + // 2. Deserialize the graph directly from JSON — the new wire format. + let graph: Graph = serde_json::from_slice(PIPELINE_JSON).expect("parse graph"); // Sanity check: the graph must preserve `runner` from the IR. // This is the cheap fast-fail; the dispatch check below is the From 394cce88bc3ac9570fbf44d4595b2c8c16ef2137 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 16:51:35 -0700 Subject: [PATCH 21/40] test: rewrite Rust test fixtures to graph wire format --- crates/hm-pipeline-ir/tests/graph_build.rs | 200 ++++++++++-------- crates/hm-pipeline-ir/tests/graph_serde.rs | 1 + crates/hm-pipeline-ir/tests/parse_pipeline.rs | 41 ---- .../hm-pipeline-ir/tests/schema_snapshot.rs | 17 +- .../graph_serde__pipeline_graph_snapshot.snap | 4 +- ...apshot__command_step_schema_is_stable.snap | 93 ++++++++ .../snapshots/schema_snapshot__pipeline.snap | 168 --------------- 7 files changed, 216 insertions(+), 308 deletions(-) delete mode 100644 crates/hm-pipeline-ir/tests/parse_pipeline.rs create mode 100644 crates/hm-pipeline-ir/tests/snapshots/schema_snapshot__command_step_schema_is_stable.snap delete mode 100644 crates/hm-pipeline-ir/tests/snapshots/schema_snapshot__pipeline.snap diff --git a/crates/hm-pipeline-ir/tests/graph_build.rs b/crates/hm-pipeline-ir/tests/graph_build.rs index aa7c4f1..fae10a5 100644 --- a/crates/hm-pipeline-ir/tests/graph_build.rs +++ b/crates/hm-pipeline-ir/tests/graph_build.rs @@ -7,84 +7,87 @@ )] use hm_pipeline_ir::graph::PipelineGraph; -use hm_pipeline_ir::Pipeline; -fn decode(json: &[u8]) -> Pipeline { +fn graph(json: &[u8]) -> PipelineGraph { serde_json::from_slice(json).unwrap() } #[test] fn builds_simple_chain() { - let p = decode(br#"{ + let g = graph(br#"{ "version": "0", "default_image": "ubuntu:24.04", - "steps": [ - {"type": "command", "key": "a", "cmd": "echo a"}, - {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "a"}, - {"type": "command", "key": "c", "cmd": "echo c", "builds_in": "b"} - ] + "graph": { + "nodes": [ + {"step": {"key": "a", "cmd": "echo a", "image": "ubuntu:24.04"}, "env": {}}, + {"step": {"key": "b", "cmd": "echo b"}, "env": {}}, + {"step": {"key": "c", "cmd": "echo c"}, "env": {}} + ], + "edge_property": "directed", + "edges": [ + [0, 1, "builds_in"], + [1, 2, "builds_in"] + ] + } }"#); - let g = PipelineGraph::build(&p).unwrap(); assert_eq!(g.node_count(), 3); assert_eq!(g.default_image(), Some("ubuntu:24.04")); } -#[test] -fn rejects_unknown_builds_in() { - let p = decode(br#"{ - "version": "0", - "steps": [ - {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "missing"} - ] - }"#); - let err = PipelineGraph::build(&p).unwrap_err(); - assert!( - err.to_string().contains("missing") || err.to_string().contains("unknown"), - "error should mention the missing key: {err}" - ); -} - #[test] fn root_inherits_default_image() { - let p = decode(br#"{ + let g = graph(br#"{ "version": "0", "default_image": "ubuntu:24.04", - "steps": [ - {"type": "command", "key": "a", "cmd": "echo a"} - ] + "graph": { + "nodes": [ + {"step": {"key": "a", "cmd": "echo a", "image": "ubuntu:24.04"}, "env": {}} + ], + "edge_property": "directed", + "edges": [] + } }"#); - let g = PipelineGraph::build(&p).unwrap(); let node = g.node_weight(g.node_index_by_key("a").unwrap()); assert_eq!(node.step.image.as_deref(), Some("ubuntu:24.04")); } #[test] fn child_does_not_inherit_default_image() { - let p = decode(br#"{ + let g = graph(br#"{ "version": "0", "default_image": "ubuntu:24.04", - "steps": [ - {"type": "command", "key": "a", "cmd": "echo a"}, - {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "a"} - ] + "graph": { + "nodes": [ + {"step": {"key": "a", "cmd": "echo a", "image": "ubuntu:24.04"}, "env": {}}, + {"step": {"key": "b", "cmd": "echo b"}, "env": {}} + ], + "edge_property": "directed", + "edges": [ + [0, 1, "builds_in"] + ] + } }"#); - let g = PipelineGraph::build(&p).unwrap(); let b = g.node_weight(g.node_index_by_key("b").unwrap()); assert!(b.step.image.is_none()); } #[test] fn wait_inserts_implicit_deps() { - let p = decode(br#"{ + let g = graph(br#"{ "version": "0", - "steps": [ - {"type": "command", "key": "a", "cmd": "echo a"}, - {"type": "command", "key": "b", "cmd": "echo b"}, - {"type": "wait"}, - {"type": "command", "key": "c", "cmd": "echo c"} - ] + "graph": { + "nodes": [ + {"step": {"key": "a", "cmd": "echo a"}, "env": {}}, + {"step": {"key": "b", "cmd": "echo b"}, "env": {}}, + {"step": {"key": "c", "cmd": "echo c"}, "env": {}} + ], + "edge_property": "directed", + "edges": [ + [0, 2, "depends_on"], + [1, 2, "depends_on"] + ] + } }"#); - let g = PipelineGraph::build(&p).unwrap(); let c = g.node_index_by_key("c").unwrap(); let parents = g.parent_keys(c); assert!(parents.contains(&"a".to_string())); @@ -93,16 +96,22 @@ fn wait_inserts_implicit_deps() { #[test] fn chain_detection() { - let p = decode(br#"{ + let g = graph(br#"{ "version": "0", "default_image": "ubuntu:24.04", - "steps": [ - {"type": "command", "key": "a", "cmd": "echo a"}, - {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "a"}, - {"type": "command", "key": "c", "cmd": "echo c", "builds_in": "b"} - ] + "graph": { + "nodes": [ + {"step": {"key": "a", "cmd": "echo a", "image": "ubuntu:24.04"}, "env": {}}, + {"step": {"key": "b", "cmd": "echo b"}, "env": {}}, + {"step": {"key": "c", "cmd": "echo c"}, "env": {}} + ], + "edge_property": "directed", + "edges": [ + [0, 1, "builds_in"], + [1, 2, "builds_in"] + ] + } }"#); - let g = PipelineGraph::build(&p).unwrap(); let a = g.node_index_by_key("a").unwrap(); let b = g.node_index_by_key("b").unwrap(); let c = g.node_index_by_key("c").unwrap(); @@ -113,16 +122,22 @@ fn chain_detection() { #[test] fn fork_breaks_chain() { - let p = decode(br#"{ + let g = graph(br#"{ "version": "0", "default_image": "ubuntu:24.04", - "steps": [ - {"type": "command", "key": "a", "cmd": "echo a"}, - {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "a"}, - {"type": "command", "key": "c", "cmd": "echo c", "builds_in": "a"} - ] + "graph": { + "nodes": [ + {"step": {"key": "a", "cmd": "echo a", "image": "ubuntu:24.04"}, "env": {}}, + {"step": {"key": "b", "cmd": "echo b"}, "env": {}}, + {"step": {"key": "c", "cmd": "echo c"}, "env": {}} + ], + "edge_property": "directed", + "edges": [ + [0, 1, "builds_in"], + [0, 2, "builds_in"] + ] + } }"#); - let g = PipelineGraph::build(&p).unwrap(); let b = g.node_index_by_key("b").unwrap(); let c = g.node_index_by_key("c").unwrap(); assert!(!g.is_chain_step(b)); @@ -131,18 +146,25 @@ fn fork_breaks_chain() { #[test] fn chains_partition_includes_every_node_once() { - let p = decode(br#"{ + let g = graph(br#"{ "version": "0", "default_image": "ubuntu:24.04", - "steps": [ - {"type": "command", "key": "a", "cmd": "echo a"}, - {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "a"}, - {"type": "command", "key": "c", "cmd": "echo c", "builds_in": "b"}, - {"type": "command", "key": "d", "cmd": "echo d", "builds_in": "a"}, - {"type": "command", "key": "e", "cmd": "echo e"} - ] + "graph": { + "nodes": [ + {"step": {"key": "a", "cmd": "echo a", "image": "ubuntu:24.04"}, "env": {}}, + {"step": {"key": "b", "cmd": "echo b"}, "env": {}}, + {"step": {"key": "c", "cmd": "echo c"}, "env": {}}, + {"step": {"key": "d", "cmd": "echo d"}, "env": {}}, + {"step": {"key": "e", "cmd": "echo e", "image": "ubuntu:24.04"}, "env": {}} + ], + "edge_property": "directed", + "edges": [ + [0, 1, "builds_in"], + [1, 2, "builds_in"], + [0, 3, "builds_in"] + ] + } }"#); - let g = PipelineGraph::build(&p).unwrap(); let chains = g.chains(); let mut all_nodes: Vec<_> = chains.iter().flatten().copied().collect(); all_nodes.sort(); @@ -156,17 +178,24 @@ fn chains_partition_includes_every_node_once() { #[test] fn chain_deps_cross_chain() { - let p = decode(br#"{ + let g = graph(br#"{ "version": "0", - "steps": [ - {"type": "command", "key": "a", "cmd": "echo a"}, - {"type": "command", "key": "b", "cmd": "echo b", "builds_in": "a"}, - {"type": "command", "key": "c", "cmd": "echo c", "builds_in": "b"}, - {"type": "command", "key": "d", "cmd": "echo d", "builds_in": "a"}, - {"type": "command", "key": "e", "cmd": "echo e"} - ] + "graph": { + "nodes": [ + {"step": {"key": "a", "cmd": "echo a"}, "env": {}}, + {"step": {"key": "b", "cmd": "echo b"}, "env": {}}, + {"step": {"key": "c", "cmd": "echo c"}, "env": {}}, + {"step": {"key": "d", "cmd": "echo d"}, "env": {}}, + {"step": {"key": "e", "cmd": "echo e"}, "env": {}} + ], + "edge_property": "directed", + "edges": [ + [0, 1, "builds_in"], + [1, 2, "builds_in"], + [0, 3, "builds_in"] + ] + } }"#); - let g = PipelineGraph::build(&p).unwrap(); let chains = g.chains(); let deps = g.chain_deps(&chains); @@ -187,16 +216,21 @@ fn chain_deps_cross_chain() { #[test] fn chain_deps_subsumes_wait_barriers() { - let p = decode(br#"{ + let g = graph(br#"{ "version": "0", - "steps": [ - {"type": "command", "key": "a", "cmd": "echo a"}, - {"type": "command", "key": "b", "cmd": "echo b"}, - {"type": "wait"}, - {"type": "command", "key": "c", "cmd": "echo c"} - ] + "graph": { + "nodes": [ + {"step": {"key": "a", "cmd": "echo a"}, "env": {}}, + {"step": {"key": "b", "cmd": "echo b"}, "env": {}}, + {"step": {"key": "c", "cmd": "echo c"}, "env": {}} + ], + "edge_property": "directed", + "edges": [ + [0, 2, "depends_on"], + [1, 2, "depends_on"] + ] + } }"#); - let g = PipelineGraph::build(&p).unwrap(); let chains = g.chains(); let deps = g.chain_deps(&chains); let find_chain = |key: &str| -> usize { diff --git a/crates/hm-pipeline-ir/tests/graph_serde.rs b/crates/hm-pipeline-ir/tests/graph_serde.rs index b2acb9d..a2c181a 100644 --- a/crates/hm-pipeline-ir/tests/graph_serde.rs +++ b/crates/hm-pipeline-ir/tests/graph_serde.rs @@ -59,6 +59,7 @@ fn build_test_graph() -> PipelineGraph { {"step": {"key": "b", "cmd": "echo b"}, "env": {}}, {"step": {"key": "c", "cmd": "echo c", "image": "ubuntu:24.04"}, "env": {}} ], + "node_holes": [], "edge_property": "directed", "edges": [ [0, 1, "builds_in"] diff --git a/crates/hm-pipeline-ir/tests/parse_pipeline.rs b/crates/hm-pipeline-ir/tests/parse_pipeline.rs deleted file mode 100644 index 3082e49..0000000 --- a/crates/hm-pipeline-ir/tests/parse_pipeline.rs +++ /dev/null @@ -1,41 +0,0 @@ -#![allow( - clippy::cargo_common_metadata, - clippy::multiple_crate_versions, - clippy::unwrap_used, - clippy::expect_used, - clippy::panic -)] - -use hm_pipeline_ir::{Pipeline, Step}; - -#[test] -fn parses_step_with_runner() { - let json = br#"{ - "version": "0", - "steps": [ - {"type": "command", "key": "a", "cmd": "echo a"}, - {"type": "command", "key": "b", "cmd": "freestyle run", - "runner": "freestyle", "runner_args": {"region": "us"}} - ] - }"#; - let p: Pipeline = serde_json::from_slice(json).unwrap(); - let Step::Command(b) = &p.steps[1] else { - panic!("expected command") - }; - assert_eq!(b.runner.as_deref(), Some("freestyle")); - assert_eq!(b.runner_args.as_ref().unwrap()["region"], "us"); -} - -#[test] -fn parses_legacy_step_without_runner() { - let json = br#"{ - "version": "0", - "steps": [{"type": "command", "key": "a", "cmd": "echo a"}] - }"#; - let p: Pipeline = serde_json::from_slice(json).unwrap(); - let Step::Command(a) = &p.steps[0] else { - panic!("expected command") - }; - assert!(a.runner.is_none()); - assert!(a.runner_args.is_none()); -} diff --git a/crates/hm-pipeline-ir/tests/schema_snapshot.rs b/crates/hm-pipeline-ir/tests/schema_snapshot.rs index 421d672..816f34d 100644 --- a/crates/hm-pipeline-ir/tests/schema_snapshot.rs +++ b/crates/hm-pipeline-ir/tests/schema_snapshot.rs @@ -1,16 +1,7 @@ -#![allow( - clippy::cargo_common_metadata, - clippy::multiple_crate_versions, - clippy::unwrap_used, - clippy::expect_used, - clippy::panic -)] - -use hm_pipeline_ir::Pipeline; -use schemars::schema_for; +#![allow(clippy::cargo_common_metadata, clippy::multiple_crate_versions)] #[test] -fn pipeline_schema_is_stable() { - let schema = schema_for!(Pipeline); - insta::assert_json_snapshot!("pipeline", schema); +fn command_step_schema_is_stable() { + let schema = schemars::schema_for!(hm_pipeline_ir::CommandStep); + insta::assert_json_snapshot!(schema); } diff --git a/crates/hm-pipeline-ir/tests/snapshots/graph_serde__pipeline_graph_snapshot.snap b/crates/hm-pipeline-ir/tests/snapshots/graph_serde__pipeline_graph_snapshot.snap index 7d8214b..4a02d56 100644 --- a/crates/hm-pipeline-ir/tests/snapshots/graph_serde__pipeline_graph_snapshot.snap +++ b/crates/hm-pipeline-ir/tests/snapshots/graph_serde__pipeline_graph_snapshot.snap @@ -1,5 +1,6 @@ --- source: crates/hm-pipeline-ir/tests/graph_serde.rs +assertion_line: 88 expression: json --- { @@ -18,7 +19,6 @@ expression: json { "env": {}, "step": { - "builds_in": null, "cache": null, "cmd": "echo a", "env": null, @@ -31,7 +31,6 @@ expression: json { "env": {}, "step": { - "builds_in": "a", "cache": null, "cmd": "echo b", "env": null, @@ -44,7 +43,6 @@ expression: json { "env": {}, "step": { - "builds_in": null, "cache": null, "cmd": "echo c", "env": null, diff --git a/crates/hm-pipeline-ir/tests/snapshots/schema_snapshot__command_step_schema_is_stable.snap b/crates/hm-pipeline-ir/tests/snapshots/schema_snapshot__command_step_schema_is_stable.snap new file mode 100644 index 0000000..7198ebe --- /dev/null +++ b/crates/hm-pipeline-ir/tests/snapshots/schema_snapshot__command_step_schema_is_stable.snap @@ -0,0 +1,93 @@ +--- +source: crates/hm-pipeline-ir/tests/schema_snapshot.rs +assertion_line: 6 +expression: schema +--- +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "CommandStep", + "type": "object", + "required": [ + "cmd", + "key" + ], + "properties": { + "key": { + "type": "string" + }, + "label": { + "default": null, + "type": [ + "string", + "null" + ] + }, + "cmd": { + "type": "string" + }, + "image": { + "default": null, + "type": [ + "string", + "null" + ] + }, + "env": { + "default": null, + "type": [ + "object", + "null" + ], + "additionalProperties": { + "type": "string" + } + }, + "timeout_seconds": { + "default": null, + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + }, + "cache": { + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/Cache" + }, + { + "type": "null" + } + ] + }, + "runner": { + "type": [ + "string", + "null" + ] + }, + "runner_args": true + }, + "definitions": { + "Cache": { + "type": "object", + "required": [ + "policy" + ], + "properties": { + "policy": { + "type": "string" + }, + "key": { + "default": null, + "type": [ + "string", + "null" + ] + } + } + } + } +} diff --git a/crates/hm-pipeline-ir/tests/snapshots/schema_snapshot__pipeline.snap b/crates/hm-pipeline-ir/tests/snapshots/schema_snapshot__pipeline.snap deleted file mode 100644 index 8bb7cfe..0000000 --- a/crates/hm-pipeline-ir/tests/snapshots/schema_snapshot__pipeline.snap +++ /dev/null @@ -1,168 +0,0 @@ ---- -source: crates/hm-pipeline-ir/tests/schema_snapshot.rs -assertion_line: 15 -expression: schema ---- -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Pipeline", - "type": "object", - "required": [ - "steps", - "version" - ], - "properties": { - "version": { - "description": "Must equal `\"0\"` — bumping this is reserved for breaking schema changes, none of which are scheduled. The v0 schema gains optional fields in-place (see `runner` below).", - "type": "string" - }, - "env": { - "default": null, - "type": [ - "object", - "null" - ], - "additionalProperties": { - "type": "string" - } - }, - "default_image": { - "default": null, - "type": [ - "string", - "null" - ] - }, - "steps": { - "type": "array", - "items": { - "$ref": "#/definitions/Step" - } - } - }, - "definitions": { - "Step": { - "oneOf": [ - { - "type": "object", - "required": [ - "cmd", - "key", - "type" - ], - "properties": { - "type": { - "type": "string", - "enum": [ - "command" - ] - }, - "key": { - "type": "string" - }, - "label": { - "default": null, - "type": [ - "string", - "null" - ] - }, - "cmd": { - "type": "string" - }, - "builds_in": { - "default": null, - "type": [ - "string", - "null" - ] - }, - "image": { - "default": null, - "type": [ - "string", - "null" - ] - }, - "env": { - "default": null, - "type": [ - "object", - "null" - ], - "additionalProperties": { - "type": "string" - } - }, - "timeout_seconds": { - "default": null, - "type": [ - "integer", - "null" - ], - "format": "uint32", - "minimum": 0.0 - }, - "cache": { - "default": null, - "anyOf": [ - { - "$ref": "#/definitions/Cache" - }, - { - "type": "null" - } - ] - }, - "runner": { - "description": "Names the step-executor plugin that should run this step. `None` ⇒ the default executor handles it (Docker, in the shipped configuration).", - "type": [ - "string", - "null" - ] - }, - "runner_args": { - "description": "Plugin-specific extra fields. Validated by the executor plugin's `StepExecutorSpec::step_schema` if it set one." - } - } - }, - { - "type": "object", - "required": [ - "type" - ], - "properties": { - "type": { - "type": "string", - "enum": [ - "wait" - ] - }, - "continue_on_failure": { - "default": false, - "type": "boolean" - } - } - } - ] - }, - "Cache": { - "type": "object", - "required": [ - "policy" - ], - "properties": { - "policy": { - "type": "string" - }, - "key": { - "default": null, - "type": [ - "string", - "null" - ] - } - } - } - } -} From d5faa11b91b076a97878783edfbc3daa51ea1caa Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 17:02:46 -0700 Subject: [PATCH 22/40] refactor: rewrite Python pipeline emitter for petgraph-serde graph format Replace _lower_to_dicts with _lower_to_graph in pipeline.py to emit the new graph-based wire format (nodes/edges/edge_property/node_holes) instead of a flat steps list. Update keygen.py to accept a graph dict and derive parent relationships from builds_in edges. Update all call sites (json_emit.py, _envelope.py, dev/_registry_dump.py). --- dsls/harmont-py/harmont/_envelope.py | 2 +- dsls/harmont-py/harmont/dev/_registry_dump.py | 2 +- dsls/harmont-py/harmont/json_emit.py | 10 +- dsls/harmont-py/harmont/keygen.py | 29 ++-- dsls/harmont-py/harmont/pipeline.py | 136 ++++++++++++++---- 5 files changed, 131 insertions(+), 48 deletions(-) diff --git a/dsls/harmont-py/harmont/_envelope.py b/dsls/harmont-py/harmont/_envelope.py index fe84936..6ada1dd 100644 --- a/dsls/harmont-py/harmont/_envelope.py +++ b/dsls/harmont-py/harmont/_envelope.py @@ -45,7 +45,7 @@ def _render_one( raise TypeError(msg) from e ir = _assemble(*leaves, env=reg.env, default_image=reg.default_image) resolve_pipeline_keys( - ir.get("steps", []), + ir.get("graph", {}), pipeline_org=pipeline_org, pipeline_slug=reg.slug, now=now, diff --git a/dsls/harmont-py/harmont/dev/_registry_dump.py b/dsls/harmont-py/harmont/dev/_registry_dump.py index 8358c47..2b40d89 100644 --- a/dsls/harmont-py/harmont/dev/_registry_dump.py +++ b/dsls/harmont-py/harmont/dev/_registry_dump.py @@ -55,7 +55,7 @@ def _lower_from_step(step: Any) -> dict[str, Any]: """ ir = _assemble(step) resolve_pipeline_keys( - ir.get("steps", []), + ir.get("graph", {}), pipeline_org="hm-dev", pipeline_slug="hm-dev-build", now=0, diff --git a/dsls/harmont-py/harmont/json_emit.py b/dsls/harmont-py/harmont/json_emit.py index e71289e..0bc324f 100644 --- a/dsls/harmont-py/harmont/json_emit.py +++ b/dsls/harmont-py/harmont/json_emit.py @@ -1,12 +1,12 @@ """Render a chain-DSL pipeline dict to the v0 IR JSON string. -The wire format mirrors harmont-pipeline/src/Harmont/Pipeline/Schema.hs -exactly. Optional fields are omitted (not null); the only field that -emits JSON null is `builds_in` for scratch-rooted steps. +The wire format uses petgraph-serde graph encoding: nodes carry +CommandStep dicts and edges encode ``builds_in`` / ``depends_on`` +relationships. Cache keys are resolved in keygen.resolve_pipeline_keys before serialization, so the emitted JSON includes `cache.key` for every -step whose policy is not 'none'. +node whose policy is not 'none'. """ from __future__ import annotations @@ -59,7 +59,7 @@ def pipeline_to_json( body = copy.deepcopy(p) resolve_pipeline_keys( - body.get("steps", []), + body.get("graph", {}), pipeline_org=org, pipeline_slug=slug, now=render_now, diff --git a/dsls/harmont-py/harmont/keygen.py b/dsls/harmont-py/harmont/keygen.py index 4dc1269..5fba539 100644 --- a/dsls/harmont-py/harmont/keygen.py +++ b/dsls/harmont-py/harmont/keygen.py @@ -33,27 +33,36 @@ def resolve_pipeline_keys( - steps: list[dict[str, Any]], + graph: dict[str, Any], *, pipeline_org: str, pipeline_slug: str, now: int, base_path: Path, env: Mapping[str, str], -) -> list[dict[str, Any]]: - """Walk `steps` in order. For every step whose cache policy is not +) -> dict[str, Any]: + """Walk graph nodes in order. For every node whose cache policy is not 'none', compute a deterministic sha256 cache key and inject it into - that step's `cache` dict as `cache["key"]`. Returns the same list - (mutated in place — callers may rely on identity).""" + that node's step ``cache`` dict as ``cache["key"]``. Returns the + same graph dict (mutated in place -- callers may rely on identity).""" + nodes = graph.get("nodes", []) + edges = graph.get("edges", []) + + # Build parent key map from builds_in edges. + key_by_idx: dict[int, str] = {i: n["step"]["key"] for i, n in enumerate(nodes)} + parent_key_map: dict[str, str] = {} + for src, dst, kind in edges: + if kind == "builds_in": + parent_key_map[key_by_idx[dst]] = key_by_idx[src] + resolved: dict[str, str] = {} - for step in steps: - if step.get("type") != "command": - continue + for node in nodes: + step = node["step"] cache = step.get("cache") if not cache or cache["policy"] == "none": continue cmd = step.get("cmd", "") - parent = step.get("builds_in") # str or None + parent = parent_key_map.get(step["key"]) parent_resolved = _lookup_parent(parent, resolved) policy_res = _resolve_policy(cache, cmd, now, base_path, env) key = _sha256_hex( @@ -69,7 +78,7 @@ def resolve_pipeline_keys( ) cache["key"] = key resolved[step["key"]] = key - return steps + return graph def _lookup_parent(parent: str | None, resolved: dict[str, str]) -> str: diff --git a/dsls/harmont-py/harmont/pipeline.py b/dsls/harmont-py/harmont/pipeline.py index e61cb1c..cd41702 100644 --- a/dsls/harmont-py/harmont/pipeline.py +++ b/dsls/harmont-py/harmont/pipeline.py @@ -3,8 +3,8 @@ The factory walks back from each leaf via `Step.parent`, collects every unique step (keyed by `id`, since structurally-equal forks must keep distinct keys), topo-sorts by parent edges with a stable -leaf-then-DFS-pre tiebreaker, and lowers each step to a JSON-shaped -dict matching the v0 IR schema. +leaf-then-DFS-pre tiebreaker, and lowers each step to the petgraph-serde +graph format matching the v0 IR schema. Use `pipeline_to_json` from `json_emit` to emit the wire-format string. """ @@ -36,7 +36,7 @@ def pipeline( ``default_image`` is the local-mode fallback Docker image: it applies to every command step that lacks both a ``builds_in`` - parent and a per-step ``image`` override. + parent edge and a per-step ``image`` override. """ if not leaves: msg = ( @@ -45,56 +45,130 @@ def pipeline( ) raise ValueError(msg) out: dict[str, Any] = {"version": "0"} - if env is not None: - out["env"] = env if default_image is not None: out["default_image"] = default_image - out["steps"] = _lower_to_dicts(list(leaves)) + out["graph"] = _lower_to_graph( + list(leaves), env=env, default_image=default_image, + ) return out -def _lower_to_dicts(leaves: list[Step]) -> list[dict[str, Any]]: - """Walk back via `parent`, topo-sort, emit one dict per emitted step. +def _lower_to_graph( + leaves: list[Step], + *, + env: dict[str, str] | None = None, + default_image: str | None = None, +) -> dict[str, Any]: + """Walk back via `parent`, topo-sort, emit petgraph-serde graph dict. `scratch` and `fork` nodes carry no command and are not emitted as - JSON steps; they exist only to set the `parent` of their children. + graph nodes; they exist only to set the `parent` of their children. + Wait steps are not emitted as nodes — they are translated into + explicit ``depends_on`` edges. """ ordered = _topo_collect(leaves) - keys = resolve_keys([s for s in ordered if s.cmd is not None and not s.is_wait]) - out: list[dict[str, Any]] = [] + command_steps = [s for s in ordered if s.cmd is not None and not s.is_wait] + keys = resolve_keys(command_steps) + + # Assign integer node indices (dense, in emission order). + idx_by_id: dict[int, int] = {} + for i, s in enumerate(command_steps): + idx_by_id[id(s)] = i + + # Track which node indices have a builds_in parent (for default_image). + has_builds_in_parent: set[int] = set() + + nodes: list[dict[str, Any]] = [] + edges: list[list[Any]] = [] + + # Collect all command-step indices emitted before each wait barrier. + # When we encounter a wait, every step after the wait gets a + # depends_on edge from every step before the wait. + pre_wait_indices: list[int] = [] + # Pending depends_on sources (from the most recent wait barrier). + pending_depends_on: list[int] = [] + for s in ordered: if s.is_wait: - d: dict[str, Any] = {"type": "wait"} - if s.continue_on_failure: - d["continue_on_failure"] = True - out.append(d) + # All command-step indices emitted so far (after the last wait) + # become sources for depends_on edges to subsequent steps. + pending_depends_on = list(pre_wait_indices) + pre_wait_indices = [] continue + if s.cmd is None: - # scratch or fork — passthrough, not emitted + # scratch or fork — passthrough, not emitted. continue - parent_key = _resolved_parent_key(s, keys) - d = { - "type": "command", - "key": keys[id(s)], + + node_idx = idx_by_id[id(s)] + step_key = keys[id(s)] + + # Build the CommandStep dict (no "type" or "builds_in" fields). + step_dict: dict[str, Any] = { + "key": step_key, "cmd": s.cmd, - "builds_in": parent_key, } if s.label is not None: - d["label"] = s.label + step_dict["label"] = s.label if s.cache is not None: - d["cache"] = _cache_to_dict(s.cache) - if s.env is not None: - d["env"] = s.env + step_dict["cache"] = _cache_to_dict(s.cache) if s.timeout_seconds is not None: - d["timeout_seconds"] = s.timeout_seconds + step_dict["timeout_seconds"] = s.timeout_seconds if s.image is not None: - d["image"] = s.image + step_dict["image"] = s.image if s.runner is not None: - d["runner"] = s.runner + step_dict["runner"] = s.runner if s.runner_args is not None: - d["runner_args"] = s.runner_args - out.append(d) - return out + step_dict["runner_args"] = s.runner_args + + # Merge per-step env with pipeline-level env. + merged_env: dict[str, str] = {} + if env: + merged_env.update(env) + if s.env: + merged_env.update(s.env) + + nodes.append({"step": step_dict, "env": merged_env}) + + # builds_in edge from parent. + parent_key = _resolved_parent_key(s, keys) + if parent_key is not None: + parent_idx = _find_idx_by_key(parent_key, command_steps, keys, idx_by_id) + edges.append([parent_idx, node_idx, "builds_in"]) + has_builds_in_parent.add(node_idx) + + # depends_on edges from pre-wait steps. + for dep_idx in pending_depends_on: + edges.append([dep_idx, node_idx, "depends_on"]) + + pre_wait_indices.append(node_idx) + + # Apply default_image to root nodes (those without a builds_in parent). + if default_image is not None: + for i, node in enumerate(nodes): + if i not in has_builds_in_parent and "image" not in node["step"]: + node["step"]["image"] = default_image + + return { + "nodes": nodes, + "node_holes": [], + "edge_property": "directed", + "edges": edges, + } + + +def _find_idx_by_key( + key: str, + command_steps: list[Step], + keys: dict[int, str], + idx_by_id: dict[int, int], +) -> int: + """Return the node index for the step with the given resolved key.""" + for s in command_steps: + if keys[id(s)] == key: + return idx_by_id[id(s)] + msg = f"BUG: no step with key {key!r}" + raise KeyError(msg) def _topo_collect(leaves: list[Step]) -> list[Step]: From 4e59ed8b72076d04bc91141b25b19bd117cb4255 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 17:08:50 -0700 Subject: [PATCH 23/40] test: update all Python tests for petgraph-serde graph wire format Rewrite assertions across 26 test files to match the new graph-based IR: p["steps"] -> p["graph"]["nodes"], step["type"]/step["builds_in"] removed, parent relationships read from edges, wait barriers become depends_on edges, env merged into node dicts. --- .../tests/dev/test_registry_dump.py | 2 +- dsls/harmont-py/tests/test_cmake.py | 2 +- dsls/harmont-py/tests/test_composer.py | 8 +- dsls/harmont-py/tests/test_dotnet.py | 8 +- dsls/harmont-py/tests/test_elm.py | 8 +- dsls/harmont-py/tests/test_envelope.py | 64 +++-- dsls/harmont-py/tests/test_examples_render.py | 4 +- dsls/harmont-py/tests/test_go.py | 8 +- dsls/harmont-py/tests/test_gradle.py | 8 +- dsls/harmont-py/tests/test_har_28_example.py | 18 +- dsls/harmont-py/tests/test_haskell.py | 8 +- dsls/harmont-py/tests/test_json_emit.py | 132 ++++++--- dsls/harmont-py/tests/test_keygen.py | 272 ++++++++++-------- dsls/harmont-py/tests/test_npm.py | 8 +- dsls/harmont-py/tests/test_ocaml.py | 8 +- dsls/harmont-py/tests/test_perl.py | 8 +- dsls/harmont-py/tests/test_pipeline.py | 6 +- .../tests/test_pipeline_fixtures.py | 18 +- .../tests/test_pipeline_lowering.py | 134 +++++---- dsls/harmont-py/tests/test_python.py | 8 +- dsls/harmont-py/tests/test_ruby.py | 8 +- dsls/harmont-py/tests/test_rust.py | 8 +- .../tests/test_target_cross_module.py | 4 +- .../tests/test_toolchain_compose.py | 4 +- dsls/harmont-py/tests/test_zig.py | 8 +- dsls/harmont-py/tests/test_zig_toolchain.py | 33 ++- 26 files changed, 483 insertions(+), 314 deletions(-) diff --git a/dsls/harmont-py/tests/dev/test_registry_dump.py b/dsls/harmont-py/tests/dev/test_registry_dump.py index 9aaa4af..d2e3906 100644 --- a/dsls/harmont-py/tests/dev/test_registry_dump.py +++ b/dsls/harmont-py/tests/dev/test_registry_dump.py @@ -81,7 +81,7 @@ def api(): f = out["deployments"]["api"]["from"] assert f["type"] == "step_chain" assert f["pipeline_v0"]["version"] == "0" - assert f["pipeline_v0"]["steps"][0]["cmd"] == "echo build" + assert f["pipeline_v0"]["graph"]["nodes"][0]["step"]["cmd"] == "echo build" def test_dump_non_local_driver_is_marked_unhandled(): diff --git a/dsls/harmont-py/tests/test_cmake.py b/dsls/harmont-py/tests/test_cmake.py index 808a6c2..e182cce 100644 --- a/dsls/harmont-py/tests/test_cmake.py +++ b/dsls/harmont-py/tests/test_cmake.py @@ -7,7 +7,7 @@ def _cmds(p: dict) -> list[str]: - return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + return [n["step"]["cmd"] for n in p["graph"]["nodes"]] def test_cmake_object_form_full_chain(): diff --git a/dsls/harmont-py/tests/test_composer.py b/dsls/harmont-py/tests/test_composer.py index a1d2e73..0b9aef4 100644 --- a/dsls/harmont-py/tests/test_composer.py +++ b/dsls/harmont-py/tests/test_composer.py @@ -5,13 +5,13 @@ def _cmds(p: dict) -> list[str]: - return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + return [n["step"]["cmd"] for n in p["graph"]["nodes"]] def _step_by_substring(p: dict, needle: str) -> dict: - for s in p["steps"]: - if s.get("type") == "command" and needle in (s.get("cmd") or ""): - return s + for n in p["graph"]["nodes"]: + if needle in (n["step"].get("cmd") or ""): + return n["step"] raise AssertionError(needle) diff --git a/dsls/harmont-py/tests/test_dotnet.py b/dsls/harmont-py/tests/test_dotnet.py index 6db33ec..bf26066 100644 --- a/dsls/harmont-py/tests/test_dotnet.py +++ b/dsls/harmont-py/tests/test_dotnet.py @@ -7,13 +7,13 @@ def _cmds(p: dict) -> list[str]: - return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + return [n["step"]["cmd"] for n in p["graph"]["nodes"]] def _step_by_substring(p: dict, needle: str) -> dict: - for s in p["steps"]: - if s.get("type") == "command" and needle in (s.get("cmd") or ""): - return s + for n in p["graph"]["nodes"]: + if needle in (n["step"].get("cmd") or ""): + return n["step"] raise AssertionError(needle) diff --git a/dsls/harmont-py/tests/test_elm.py b/dsls/harmont-py/tests/test_elm.py index 06f90a4..ef09672 100644 --- a/dsls/harmont-py/tests/test_elm.py +++ b/dsls/harmont-py/tests/test_elm.py @@ -7,13 +7,13 @@ def _cmds(p: dict) -> list[str]: - return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + return [n["step"]["cmd"] for n in p["graph"]["nodes"]] def _step_by_substring(p: dict, needle: str) -> dict: - for s in p["steps"]: - if s.get("type") == "command" and needle in (s.get("cmd") or ""): - return s + for n in p["graph"]["nodes"]: + if needle in (n["step"].get("cmd") or ""): + return n["step"] msg = f"no command step containing {needle!r}" raise AssertionError(msg) diff --git a/dsls/harmont-py/tests/test_envelope.py b/dsls/harmont-py/tests/test_envelope.py index fb27844..269e486 100644 --- a/dsls/harmont-py/tests/test_envelope.py +++ b/dsls/harmont-py/tests/test_envelope.py @@ -1,4 +1,4 @@ -"""Envelope JSON shape — what api/cli consume.""" +"""Envelope JSON shape -- what api/cli consume.""" import json @@ -21,6 +21,36 @@ def _reset_registry(): clear_target_names() +def _graph_nodes(definition): + return definition["graph"]["nodes"] + + +def _graph_edges(definition): + return definition["graph"]["edges"] + + +def _step_cmds(definition): + return [n["step"].get("cmd") for n in _graph_nodes(definition)] + + +def _builds_in_children(definition, parent_key): + """Return nodes whose builds_in parent is parent_key.""" + nodes = _graph_nodes(definition) + key_by_idx = {i: n["step"]["key"] for i, n in enumerate(nodes)} + parent_idx = None + for i, n in enumerate(nodes): + if n["step"]["key"] == parent_key: + parent_idx = i + break + if parent_idx is None: + return [] + children = [] + for src, dst, kind in _graph_edges(definition): + if kind == "builds_in" and src == parent_idx: + children.append(nodes[dst]) + return children + + def test_empty_registry_emits_empty_pipelines_list(): out = json.loads(hm.dump_registry_json()) assert out == {"schema_version": "1", "pipelines": []} @@ -41,11 +71,10 @@ def ci() -> hm.Step: assert p["triggers"] == [] definition = p["definition"] assert definition["version"] == "0" - steps = definition["steps"] - assert len(steps) == 1 - assert steps[0]["type"] == "command" - assert steps[0]["cmd"] == "echo hi" - assert steps[0]["label"] == "hi" + nodes = _graph_nodes(definition) + assert len(nodes) == 1 + assert nodes[0]["step"]["cmd"] == "echo hi" + assert nodes[0]["step"]["label"] == "hi" def test_pipeline_with_triggers(): @@ -81,7 +110,7 @@ def ci() -> hm.Pipeline: out = json.loads(hm.dump_registry_json()) p = out["pipelines"][0] - cmds = sorted(s["cmd"] for s in p["definition"]["steps"] if s["type"] == "command") + cmds = sorted(n["step"]["cmd"] for n in _graph_nodes(p["definition"])) assert cmds == ["a", "b"] @@ -93,7 +122,9 @@ def ci() -> hm.Step: out = json.loads(hm.dump_registry_json()) definition = out["pipelines"][0]["definition"] assert definition["default_image"] == "alpine:3.20" - assert definition["env"] == {"CI": "true"} + # Pipeline-level env is merged into node env dicts. + for node in _graph_nodes(definition): + assert node["env"].get("CI") == "true" def test_envelope_resolves_cache_keys(tmp_path): @@ -109,7 +140,7 @@ def ci() -> hm.Step: env={}, ) ) - step = out["pipelines"][0]["definition"]["steps"][0] + step = _graph_nodes(out["pipelines"][0]["definition"])[0]["step"] assert step["cache"]["policy"] == "forever" assert "key" in step["cache"] assert len(step["cache"]["key"]) == 64 @@ -125,8 +156,8 @@ def ci(): return hm.haskell(ghc="9.6.7").cabal(path="api") out = json.loads(hm.dump_registry_json()) - steps = out["pipelines"][0]["definition"]["steps"] - cmds = [s.get("cmd") for s in steps if s.get("type") == "command"] + nodes = _graph_nodes(out["pipelines"][0]["definition"]) + cmds = [n["step"].get("cmd") for n in nodes] assert any("cabal build all" in (c or "") for c in cmds) @@ -148,12 +179,13 @@ def ci() -> tuple[hm.Step, ...]: ) out = json.loads(hm.dump_registry_json()) - steps = out["pipelines"][0]["definition"]["steps"] - apt_steps = [s for s in steps if s.get("cmd") == "apt-get update"] - assert len(apt_steps) == 1 # deduplicated via target memoization - children = [s for s in steps if s.get("builds_in") == apt_steps[0]["key"]] + definition = out["pipelines"][0]["definition"] + nodes = _graph_nodes(definition) + apt_nodes = [n for n in nodes if n["step"].get("cmd") == "apt-get update"] + assert len(apt_nodes) == 1 # deduplicated via target memoization + children = _builds_in_children(definition, apt_nodes[0]["step"]["key"]) assert len(children) == 2 - child_cmds = sorted(s["cmd"] for s in children) + child_cmds = sorted(n["step"]["cmd"] for n in children) assert child_cmds == ["cabal build", "pytest"] diff --git a/dsls/harmont-py/tests/test_examples_render.py b/dsls/harmont-py/tests/test_examples_render.py index 9270c5f..66b0bc8 100644 --- a/dsls/harmont-py/tests/test_examples_render.py +++ b/dsls/harmont-py/tests/test_examples_render.py @@ -63,8 +63,8 @@ def test_example_renders_to_v0_ir( ) definition = ci_pipeline["definition"] assert definition["version"] == "0" - assert definition.get("steps"), ( - f"{example_dir.name}: ci pipeline has no steps" + assert definition.get("graph", {}).get("nodes"), ( + f"{example_dir.name}: ci pipeline has no nodes" ) assert definition.get("default_image"), ( f"{example_dir.name}: ci pipeline missing default_image — local " diff --git a/dsls/harmont-py/tests/test_go.py b/dsls/harmont-py/tests/test_go.py index 08ffcb4..3bf69a1 100644 --- a/dsls/harmont-py/tests/test_go.py +++ b/dsls/harmont-py/tests/test_go.py @@ -7,13 +7,13 @@ def _cmds(p: dict) -> list[str]: - return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + return [n["step"]["cmd"] for n in p["graph"]["nodes"]] def _step_by_substring(p: dict, needle: str) -> dict: - for s in p["steps"]: - if s.get("type") == "command" and needle in (s.get("cmd") or ""): - return s + for n in p["graph"]["nodes"]: + if needle in (n["step"].get("cmd") or ""): + return n["step"] msg = f"no command step containing {needle!r}" raise AssertionError(msg) diff --git a/dsls/harmont-py/tests/test_gradle.py b/dsls/harmont-py/tests/test_gradle.py index 4b14cd3..3c9149f 100644 --- a/dsls/harmont-py/tests/test_gradle.py +++ b/dsls/harmont-py/tests/test_gradle.py @@ -7,13 +7,13 @@ def _cmds(p: dict) -> list[str]: - return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + return [n["step"]["cmd"] for n in p["graph"]["nodes"]] def _step_by_substring(p: dict, needle: str) -> dict: - for s in p["steps"]: - if s.get("type") == "command" and needle in (s.get("cmd") or ""): - return s + for n in p["graph"]["nodes"]: + if needle in (n["step"].get("cmd") or ""): + return n["step"] raise AssertionError(needle) diff --git a/dsls/harmont-py/tests/test_har_28_example.py b/dsls/harmont-py/tests/test_har_28_example.py index 70e6730..e51181a 100644 --- a/dsls/harmont-py/tests/test_har_28_example.py +++ b/dsls/harmont-py/tests/test_har_28_example.py @@ -16,7 +16,7 @@ def _reset(tmp_path, monkeypatch): clear_registry() clear_target_cache() clear_target_names() - # Toolchain `.cabal` glob reads disk for *.cabal files — give it an + # Toolchain `.cabal` glob reads disk for *.cabal files -- give it an # empty workspace so the test is hermetic. monkeypatch.chdir(tmp_path) (tmp_path / "api").mkdir() @@ -28,6 +28,10 @@ def _reset(tmp_path, monkeypatch): clear_target_names() +def _graph_nodes(definition): + return definition["graph"]["nodes"] + + def test_har_28_example_renders(): @hm.target() def apt_base(): @@ -60,17 +64,17 @@ def ci(): out = json.loads(hm.dump_registry_json()) p = out["pipelines"][0] - steps = p["definition"]["steps"] + nodes = _graph_nodes(p["definition"]) - cmds = [s.get("cmd") for s in steps if s.get("type") == "command"] + cmds = [n["step"].get("cmd") for n in nodes] # Each leaf landed in the IR. assert any("pytest -v" in (c or "") for c in cmds) assert any("cabal build all" in (c or "") for c in cmds) assert any("elm make src/Main.elm" in (c or "") for c in cmds) # apt-base used by the venv chain appears exactly once (memoized). - apt_update_steps = [s for s in steps if s.get("cmd") == "apt-get update"] - assert len(apt_update_steps) == 1 + apt_update_nodes = [n for n in nodes if n["step"].get("cmd") == "apt-get update"] + assert len(apt_update_nodes) == 1 def test_har_28_cwd_kwarg_renders_to_cd_prefix(): @@ -79,6 +83,6 @@ def ci(): return hm.sh("pytest -v", cwd="cidsl/py") out = json.loads(hm.dump_registry_json()) - steps = out["pipelines"][0]["definition"]["steps"] - cmds = [s["cmd"] for s in steps if s.get("type") == "command"] + nodes = _graph_nodes(out["pipelines"][0]["definition"]) + cmds = [n["step"]["cmd"] for n in nodes] assert "cd cidsl/py && pytest -v" in cmds diff --git a/dsls/harmont-py/tests/test_haskell.py b/dsls/harmont-py/tests/test_haskell.py index c7020d2..7833b1c 100644 --- a/dsls/harmont-py/tests/test_haskell.py +++ b/dsls/harmont-py/tests/test_haskell.py @@ -12,13 +12,13 @@ def _cmds(p: dict) -> list[str]: - return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + return [n["step"]["cmd"] for n in p["graph"]["nodes"]] def _step_by_substring(p: dict, needle: str) -> dict: - for s in p["steps"]: - if s.get("type") == "command" and needle in (s.get("cmd") or ""): - return s + for n in p["graph"]["nodes"]: + if needle in (n["step"].get("cmd") or ""): + return n["step"] msg = f"no command step containing {needle!r}" raise AssertionError(msg) diff --git a/dsls/harmont-py/tests/test_json_emit.py b/dsls/harmont-py/tests/test_json_emit.py index efcc4e9..56bf3e7 100644 --- a/dsls/harmont-py/tests/test_json_emit.py +++ b/dsls/harmont-py/tests/test_json_emit.py @@ -1,9 +1,7 @@ -"""JSON emitter — v0 IR output shape goldens. +"""JSON emitter -- v0 IR output shape goldens. -The wire format mirrors harmont-pipeline/src/Harmont/Pipeline/Schema.hs. -Optional fields are omitted (not null); `builds_in: null` only when -the step has no parent (scratch). Cache keys are resolved at render -time and embedded in cache.key.""" +The wire format uses petgraph-serde graph encoding. Cache keys are +resolved at render time and embedded in cache.key.""" from __future__ import annotations @@ -27,46 +25,87 @@ def _emit(p, **kw): return json.loads(pipeline_to_json(p, now=0, base_path=Path("/tmp"), **kw)) # noqa: S108 +def _nodes(out): + return out["graph"]["nodes"] + + +def _edges(out): + return out["graph"]["edges"] + + +def _step_by_key(out, key): + for n in _nodes(out): + if n["step"]["key"] == key: + return n["step"] + msg = f"no node with key {key!r}" + raise AssertionError(msg) + + +def _node_by_key(out, key): + for n in _nodes(out): + if n["step"]["key"] == key: + return n + msg = f"no node with key {key!r}" + raise AssertionError(msg) + + +def _builds_in_parent_key(out, child_key): + """Return the parent key for a child_key via builds_in edges, or None.""" + key_by_idx = {i: n["step"]["key"] for i, n in enumerate(_nodes(out))} + idx_by_key = {v: k for k, v in key_by_idx.items()} + child_idx = idx_by_key[child_key] + for src, dst, kind in _edges(out): + if kind == "builds_in" and dst == child_idx: + return key_by_idx[src] + return None + + def test_minimal_command(): p = pipeline(scratch().sh("echo hi", label="hello")) out = _emit(p) - assert out == { - "version": "0", - "steps": [ - { - "type": "command", - "key": "hello", - "label": "hello", - "cmd": "echo hi", - "builds_in": None, - }, - ], - } - - -def test_chain_parent_key_in_builds_in(): + assert out["version"] == "0" + assert len(_nodes(out)) == 1 + step = _nodes(out)[0]["step"] + assert step["key"] == "hello" + assert step["label"] == "hello" + assert step["cmd"] == "echo hi" + # No "type" or "builds_in" field on step dicts. + assert "type" not in step + assert "builds_in" not in step + # No builds_in edges for a root step. + assert _builds_in_parent_key(out, "hello") is None + + +def test_chain_parent_key_in_builds_in_edge(): a = scratch().sh("install", label="install") b = a.sh("build", label="build") out = _emit(pipeline(b)) - by_key = {s["key"]: s for s in out["steps"]} - assert by_key["install"]["builds_in"] is None - assert by_key["build"]["builds_in"] == "install" + assert _builds_in_parent_key(out, "install") is None + assert _builds_in_parent_key(out, "build") == "install" -def test_wait_step(): +def test_wait_step_becomes_depends_on_edges(): out = _emit(pipeline(scratch().sh("a", label="a"), wait())) - types = [s["type"] for s in out["steps"]] - assert types == ["command", "wait"] + # Wait produces no nodes; only the command step "a" is present. + # (No post-wait steps in this case, so no depends_on edges either.) + assert len(_nodes(out)) == 1 + assert _nodes(out)[0]["step"]["key"] == "a" -def test_wait_continue_on_failure_emitted(): - out = _emit(pipeline(scratch().sh("a", label="a"), wait(continue_on_failure=True))) - assert out["steps"][-1] == {"type": "wait", "continue_on_failure": True} +def test_wait_emits_depends_on_edges(): + a = scratch().sh("a", label="a") + b = scratch().sh("b", label="b") + out = _emit(pipeline(a, wait(), b)) + keys = [n["step"]["key"] for n in _nodes(out)] + idx_a = keys.index("a") + idx_b = keys.index("b") + depends_on = [(s, d) for s, d, k in _edges(out) if k == "depends_on"] + assert (idx_a, idx_b) in depends_on -def test_pipeline_env_emitted_as_object(): +def test_pipeline_env_merged_into_node_env(): out = _emit(pipeline(scratch().sh("a", label="a"), env={"CI": "true"})) - assert out["env"] == {"CI": "true"} + assert _nodes(out)[0]["env"] == {"CI": "true"} def test_default_image_emitted_when_set(): @@ -79,7 +118,7 @@ def test_cache_ttl_resolves_key(): scratch().sh("apt-get install -y curl", label="apt", cache=ttl(timedelta(days=1))) ) out = _emit(p) - s = out["steps"][0] + s = _nodes(out)[0]["step"] assert s["cache"]["policy"] == "ttl" assert s["cache"]["duration_seconds"] == 86400 assert isinstance(s["cache"]["key"], str) @@ -91,7 +130,7 @@ def test_cache_forever_with_env_keys_emitted(): pipeline(scratch().sh("x", label="x", cache=forever(env_keys=("FOO", "BAR")))), env={"FOO": "1", "BAR": "2"}, ) - s = out["steps"][0] + s = _nodes(out)[0]["step"] assert s["cache"]["policy"] == "forever" assert s["cache"]["env_keys"] == ["FOO", "BAR"] assert "key" in s["cache"] @@ -108,7 +147,7 @@ def test_cache_on_change_paths_round_trip(tmp_path): env={}, ) ) - s = out["steps"][0] + s = _nodes(out)[0]["step"] assert s["cache"]["policy"] == "on_change" assert s["cache"]["paths"] == ["a.txt", "b.txt"] assert "key" in s["cache"] @@ -116,21 +155,20 @@ def test_cache_on_change_paths_round_trip(tmp_path): def test_no_optional_fields_when_not_set(): out = _emit(pipeline(scratch().sh("x", label="x"))) - s = out["steps"][0] + s = _nodes(out)[0]["step"] assert "image" not in s - assert "env" not in s assert "timeout_seconds" not in s assert "cache" not in s def test_timeout_seconds_emitted_when_set(): out = _emit(pipeline(scratch().sh("x", label="x", timeout_seconds=300))) - assert out["steps"][0]["timeout_seconds"] == 300 + assert _nodes(out)[0]["step"]["timeout_seconds"] == 300 def test_image_emitted_when_set(): out = _emit(pipeline(scratch().sh("x", label="x", image="alpine:3.19"))) - assert out["steps"][0]["image"] == "alpine:3.19" + assert _nodes(out)[0]["step"]["image"] == "alpine:3.19" def test_command_emits_runner_and_runner_args(): @@ -145,23 +183,23 @@ def test_command_emits_runner_and_runner_args(): ) ) ) - cmd = next(s for s in out["steps"] if s["type"] == "command") - assert cmd["runner"] == "freestyle" - assert cmd["runner_args"] == {"region": "us"} + step = _nodes(out)[0]["step"] + assert step["runner"] == "freestyle" + assert step["runner_args"] == {"region": "us"} def test_command_omits_runner_when_unset(): out = _emit(pipeline(scratch().sh("echo hi", label="hi"))) - cmd = next(s for s in out["steps"] if s["type"] == "command") - assert "runner" not in cmd - assert "runner_args" not in cmd + step = _nodes(out)[0]["step"] + assert "runner" not in step + assert "runner_args" not in step def test_multi_leaf_pipeline_emits_all_command_steps(): a = scratch().sh("a", label="a") b = scratch().sh("b", label="b") out = _emit(pipeline(a, b)) - keys = sorted(s["key"] for s in out["steps"] if s["type"] == "command") + keys = sorted(n["step"]["key"] for n in _nodes(out)) assert keys == ["a", "b"] @@ -178,7 +216,7 @@ def test_pipeline_org_and_slug_threaded_through_to_cache_key(): pipeline_org="acme", pipeline_slug="api", ) - )["steps"][0]["cache"]["key"] + )["graph"]["nodes"][0]["step"]["cache"]["key"] k2 = json.loads( pipeline_to_json( p, @@ -188,5 +226,5 @@ def test_pipeline_org_and_slug_threaded_through_to_cache_key(): pipeline_org="acme", pipeline_slug="web", ) - )["steps"][0]["cache"]["key"] + )["graph"]["nodes"][0]["step"]["cache"]["key"] assert k1 != k2 diff --git a/dsls/harmont-py/tests/test_keygen.py b/dsls/harmont-py/tests/test_keygen.py index 88457e5..4b56693 100644 --- a/dsls/harmont-py/tests/test_keygen.py +++ b/dsls/harmont-py/tests/test_keygen.py @@ -1,4 +1,4 @@ -"""Cache-key resolver — direct ports of the Scheme algorithm in +"""Cache-key resolver -- direct ports of the Scheme algorithm in harmont_macros.scm. Keys must be byte-identical to what harmont-eval produced pre-removal, so existing cached snapshots remain reachable.""" @@ -20,39 +20,47 @@ def _sha256_hex(s: str) -> str: NUL = "\x00" +def _make_graph(nodes, edges=None): + """Build a minimal graph dict for keygen tests.""" + return { + "nodes": nodes, + "node_holes": [], + "edge_property": "directed", + "edges": edges or [], + } + + def test_none_policy_emits_no_key(): - steps = [ + graph = _make_graph([ { - "type": "command", - "key": "a", - "cmd": "echo", - "builds_in": None, - "cache": {"policy": "none"}, + "step": {"key": "a", "cmd": "echo", "cache": {"policy": "none"}}, + "env": {}, }, - ] + ]) out = resolve_pipeline_keys( - steps, + graph, pipeline_org="default", pipeline_slug="default", now=0, base_path=Path("/tmp"), # noqa: S108 env={}, ) - assert "key" not in out[0]["cache"] + assert "key" not in out["nodes"][0]["step"]["cache"] def test_forever_policy_key_matches_scheme_formula(): - steps = [ + graph = _make_graph([ { - "type": "command", - "key": "a", - "cmd": "echo hi", - "builds_in": None, - "cache": {"policy": "forever", "env_keys": []}, + "step": { + "key": "a", + "cmd": "echo hi", + "cache": {"policy": "forever", "env_keys": []}, + }, + "env": {}, }, - ] + ]) out = resolve_pipeline_keys( - steps, + graph, pipeline_org="default", pipeline_slug="default", now=0, @@ -64,21 +72,22 @@ def test_forever_policy_key_matches_scheme_formula(): expected = _sha256_hex( "default" + NUL + "default" + NUL + "a" + NUL + "scratch" + NUL + policy_res ) - assert out[0]["cache"]["key"] == expected + assert out["nodes"][0]["step"]["cache"]["key"] == expected def test_ttl_policy_key_includes_bucket(): - steps = [ + graph = _make_graph([ { - "type": "command", - "key": "a", - "cmd": "x", - "builds_in": None, - "cache": {"policy": "ttl", "duration_seconds": 3600, "env_keys": []}, + "step": { + "key": "a", + "cmd": "x", + "cache": {"policy": "ttl", "duration_seconds": 3600, "env_keys": []}, + }, + "env": {}, }, - ] + ]) out = resolve_pipeline_keys( - steps, + graph, pipeline_org="default", pipeline_slug="default", now=7200, @@ -90,24 +99,25 @@ def test_ttl_policy_key_includes_bucket(): expected = _sha256_hex( "default" + NUL + "default" + NUL + "a" + NUL + "scratch" + NUL + policy_res ) - assert out[0]["cache"]["key"] == expected + assert out["nodes"][0]["step"]["cache"]["key"] == expected def test_on_change_reads_file_contents(): with tempfile.TemporaryDirectory() as d: f = Path(d) / "file.txt" f.write_bytes(b"hello") - steps = [ + graph = _make_graph([ { - "type": "command", - "key": "a", - "cmd": "make", - "builds_in": None, - "cache": {"policy": "on_change", "paths": ["file.txt"]}, + "step": { + "key": "a", + "cmd": "make", + "cache": {"policy": "on_change", "paths": ["file.txt"]}, + }, + "env": {}, }, - ] + ]) out = resolve_pipeline_keys( - steps, + graph, pipeline_org="default", pipeline_slug="default", now=0, @@ -120,7 +130,7 @@ def test_on_change_reads_file_contents(): expected = _sha256_hex( "default" + NUL + "default" + NUL + "a" + NUL + "scratch" + NUL + policy_res ) - assert out[0]["cache"]["key"] == expected + assert out["nodes"][0]["step"]["cache"]["key"] == expected def test_on_change_handles_directory_paths(): @@ -135,63 +145,85 @@ def test_on_change_handles_directory_paths(): (sub / "a.txt").write_bytes(b"alpha") (sub / "b.txt").write_bytes(b"beta") - steps = [ + graph = _make_graph([ { - "type": "command", - "key": "s", - "cmd": "make", - "builds_in": None, - "cache": {"policy": "on_change", "paths": ["dir/"]}, + "step": { + "key": "s", + "cmd": "make", + "cache": {"policy": "on_change", "paths": ["dir/"]}, + }, + "env": {}, }, - ] + ]) out1 = resolve_pipeline_keys( - list(steps), + graph, pipeline_org="default", pipeline_slug="default", now=0, base_path=root, env={}, ) - key1 = out1[0]["cache"]["key"] + key1 = out1["nodes"][0]["step"]["cache"]["key"] - # Same tree → same key. + # Same tree -> same key. + graph2 = _make_graph([ + { + "step": { + "key": "s", + "cmd": "make", + "cache": {"policy": "on_change", "paths": ["dir/"]}, + }, + "env": {}, + }, + ]) out_again = resolve_pipeline_keys( - [dict(s, cache=dict(s["cache"])) for s in steps], + graph2, pipeline_org="default", pipeline_slug="default", now=0, base_path=root, env={}, ) - assert out_again[0]["cache"]["key"] == key1 + assert out_again["nodes"][0]["step"]["cache"]["key"] == key1 - # Modify a file → key changes. + # Modify a file -> key changes. (sub / "a.txt").write_bytes(b"alpha2") + graph3 = _make_graph([ + { + "step": { + "key": "s", + "cmd": "make", + "cache": {"policy": "on_change", "paths": ["dir/"]}, + }, + "env": {}, + }, + ]) out2 = resolve_pipeline_keys( - [dict(s, cache=dict(s["cache"])) for s in steps], + graph3, pipeline_org="default", pipeline_slug="default", now=0, base_path=root, env={}, ) - assert out2[0]["cache"]["key"] != key1 + assert out2["nodes"][0]["step"]["cache"]["key"] != key1 def test_on_change_missing_path_raises(): with tempfile.TemporaryDirectory() as d: - steps = [ + graph = _make_graph([ { - "type": "command", - "key": "s", - "cmd": "make", - "builds_in": None, - "cache": {"policy": "on_change", "paths": ["nope/"]}, + "step": { + "key": "s", + "cmd": "make", + "cache": {"policy": "on_change", "paths": ["nope/"]}, + }, + "env": {}, }, - ] + ]) with pytest.raises(FileNotFoundError, match="on_change path does not exist"): resolve_pipeline_keys( - steps, + graph, pipeline_org="default", pipeline_slug="default", now=0, @@ -201,17 +233,18 @@ def test_on_change_missing_path_raises(): def test_env_keys_are_sorted_and_picked_up(): - steps = [ + graph = _make_graph([ { - "type": "command", - "key": "a", - "cmd": "echo", - "builds_in": None, - "cache": {"policy": "forever", "env_keys": ["BAR", "FOO"]}, + "step": { + "key": "a", + "cmd": "echo", + "cache": {"policy": "forever", "env_keys": ["BAR", "FOO"]}, + }, + "env": {}, }, - ] + ]) out = resolve_pipeline_keys( - steps, + graph, pipeline_org="default", pipeline_slug="default", now=0, @@ -224,61 +257,67 @@ def test_env_keys_are_sorted_and_picked_up(): expected = _sha256_hex( "default" + NUL + "default" + NUL + "a" + NUL + "scratch" + NUL + policy_res ) - assert out[0]["cache"]["key"] == expected + assert out["nodes"][0]["step"]["cache"]["key"] == expected def test_parent_key_chains_through_resolved_cache_keys(): - steps = [ - { - "type": "command", - "key": "a", - "cmd": "x", - "builds_in": None, - "cache": {"policy": "forever", "env_keys": []}, - }, - { - "type": "command", - "key": "b", - "cmd": "y", - "builds_in": "a", - "cache": {"policy": "forever", "env_keys": []}, - }, - ] + graph = _make_graph( + [ + { + "step": { + "key": "a", + "cmd": "x", + "cache": {"policy": "forever", "env_keys": []}, + }, + "env": {}, + }, + { + "step": { + "key": "b", + "cmd": "y", + "cache": {"policy": "forever", "env_keys": []}, + }, + "env": {}, + }, + ], + edges=[[0, 1, "builds_in"]], + ) out = resolve_pipeline_keys( - steps, + graph, pipeline_org="default", pipeline_slug="default", now=0, base_path=Path("/tmp"), # noqa: S108 env={}, ) - parent_key = out[0]["cache"]["key"] + parent_key = out["nodes"][0]["step"]["cache"]["key"] inner_b = _sha256_hex("y" + NUL + "") policy_res = "forever-" + inner_b expected_b = _sha256_hex( "default" + NUL + "default" + NUL + "b" + NUL + parent_key + NUL + policy_res ) - assert out[1]["cache"]["key"] == expected_b + assert out["nodes"][1]["step"]["cache"]["key"] == expected_b def test_compose_concatenates_subpolicies(): - steps = [ + graph = _make_graph([ { - "type": "command", - "key": "a", - "cmd": "z", - "builds_in": None, - "cache": { - "policy": "compose", - "sub_policies": [ - {"policy": "forever", "env_keys": []}, - {"policy": "none"}, - ], + "step": { + "key": "a", + "cmd": "z", + "cache": { + "policy": "compose", + "sub_policies": [ + {"policy": "forever", "env_keys": []}, + {"policy": "none"}, + ], + }, }, + "env": {}, }, - ] + ]) out = resolve_pipeline_keys( - steps, + graph, pipeline_org="default", pipeline_slug="default", now=0, @@ -293,23 +332,30 @@ def test_compose_concatenates_subpolicies(): expected = _sha256_hex( "default" + NUL + "default" + NUL + "a" + NUL + "scratch" + NUL + policy_res ) - assert out[0]["cache"]["key"] == expected + assert out["nodes"][0]["step"]["cache"]["key"] == expected def test_parent_without_cache_is_planerror(): - steps = [ - {"type": "command", "key": "a", "cmd": "x", "builds_in": None}, - { - "type": "command", - "key": "b", - "cmd": "y", - "builds_in": "a", - "cache": {"policy": "forever", "env_keys": []}, - }, - ] + graph = _make_graph( + [ + { + "step": {"key": "a", "cmd": "x"}, + "env": {}, + }, + { + "step": { + "key": "b", + "cmd": "y", + "cache": {"policy": "forever", "env_keys": []}, + }, + "env": {}, + }, + ], + edges=[[0, 1, "builds_in"]], + ) with pytest.raises(ValueError, match="builds_in 'a' which has no cached key"): resolve_pipeline_keys( - steps, + graph, pipeline_org="default", pipeline_slug="default", now=0, diff --git a/dsls/harmont-py/tests/test_npm.py b/dsls/harmont-py/tests/test_npm.py index 4d4a361..ccf4aed 100644 --- a/dsls/harmont-py/tests/test_npm.py +++ b/dsls/harmont-py/tests/test_npm.py @@ -7,13 +7,13 @@ def _cmds(p: dict) -> list[str]: - return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + return [n["step"]["cmd"] for n in p["graph"]["nodes"]] def _step_by_substring(p: dict, needle: str) -> dict: - for s in p["steps"]: - if s.get("type") == "command" and needle in (s.get("cmd") or ""): - return s + for n in p["graph"]["nodes"]: + if needle in (n["step"].get("cmd") or ""): + return n["step"] msg = f"no command step containing {needle!r}" raise AssertionError(msg) diff --git a/dsls/harmont-py/tests/test_ocaml.py b/dsls/harmont-py/tests/test_ocaml.py index b9f379c..63661be 100644 --- a/dsls/harmont-py/tests/test_ocaml.py +++ b/dsls/harmont-py/tests/test_ocaml.py @@ -7,13 +7,13 @@ def _cmds(p: dict) -> list[str]: - return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + return [n["step"]["cmd"] for n in p["graph"]["nodes"]] def _step_by_substring(p: dict, needle: str) -> dict: - for s in p["steps"]: - if s.get("type") == "command" and needle in (s.get("cmd") or ""): - return s + for n in p["graph"]["nodes"]: + if needle in (n["step"].get("cmd") or ""): + return n["step"] raise AssertionError(needle) diff --git a/dsls/harmont-py/tests/test_perl.py b/dsls/harmont-py/tests/test_perl.py index 82ab596..50f9a5c 100644 --- a/dsls/harmont-py/tests/test_perl.py +++ b/dsls/harmont-py/tests/test_perl.py @@ -5,13 +5,13 @@ def _cmds(p: dict) -> list[str]: - return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + return [n["step"]["cmd"] for n in p["graph"]["nodes"]] def _step_by_substring(p: dict, needle: str) -> dict: - for s in p["steps"]: - if s.get("type") == "command" and needle in (s.get("cmd") or ""): - return s + for n in p["graph"]["nodes"]: + if needle in (n["step"].get("cmd") or ""): + return n["step"] raise AssertionError(needle) diff --git a/dsls/harmont-py/tests/test_pipeline.py b/dsls/harmont-py/tests/test_pipeline.py index ed7be7c..e57bb6a 100644 --- a/dsls/harmont-py/tests/test_pipeline.py +++ b/dsls/harmont-py/tests/test_pipeline.py @@ -11,8 +11,8 @@ def test_pipeline_returns_v2_dict(): p = pipeline(scratch().sh("echo", label="echo")) assert p["version"] == "0" - assert isinstance(p["steps"], list) - assert len(p["steps"]) == 1 + assert isinstance(p["graph"], dict) + assert len(p["graph"]["nodes"]) == 1 def test_pipeline_factory_rejects_no_leaves(): @@ -31,6 +31,6 @@ def test_pipeline_default_image_lowers_to_dict(): default_image="alpine:3.20", ) assert p["default_image"] == "alpine:3.20" - step = p["steps"][0] + step = p["graph"]["nodes"][0]["step"] assert step["image"] == "ubuntu:24.04" assert step["label"] == "a" diff --git a/dsls/harmont-py/tests/test_pipeline_fixtures.py b/dsls/harmont-py/tests/test_pipeline_fixtures.py index 8d915ab..ac0faf8 100644 --- a/dsls/harmont-py/tests/test_pipeline_fixtures.py +++ b/dsls/harmont-py/tests/test_pipeline_fixtures.py @@ -19,14 +19,18 @@ def _reset(): clear_target_cache() +def _graph_nodes(definition): + return definition["graph"]["nodes"] + + def test_zero_param_pipeline_still_works(): @hm.pipeline("ci") def ci() -> hm.Step: return hm.sh("echo hi") out = json.loads(hm.dump_registry_json()) - steps = out["pipelines"][0]["definition"]["steps"] - assert any(s.get("cmd") == "echo hi" for s in steps) + nodes = _graph_nodes(out["pipelines"][0]["definition"]) + assert any(n["step"].get("cmd") == "echo hi" for n in nodes) def test_pipeline_receives_target_as_param(): @@ -39,8 +43,8 @@ def ci(apt_base: hm.Target[hm.Step]) -> hm.Step: return apt_base.sh("smoke") out = json.loads(hm.dump_registry_json()) - steps = out["pipelines"][0]["definition"]["steps"] - cmds = [s.get("cmd") for s in steps] + nodes = _graph_nodes(out["pipelines"][0]["definition"]) + cmds = [n["step"].get("cmd") for n in nodes] assert "apt-get update" in cmds assert "smoke" in cmds @@ -66,10 +70,10 @@ def ci( return (api, py_test) out = json.loads(hm.dump_registry_json()) - steps = out["pipelines"][0]["definition"]["steps"] - apt = [s for s in steps if s.get("cmd") == "apt-get update"] + nodes = _graph_nodes(out["pipelines"][0]["definition"]) + apt = [n for n in nodes if n["step"].get("cmd") == "apt-get update"] assert len(apt) == 1 # apt_base deduped via target memoization - cmds = sorted(s.get("cmd") for s in steps if s.get("type") == "command") + cmds = sorted(n["step"].get("cmd") for n in nodes) assert "cabal build" in cmds assert "pytest" in cmds diff --git a/dsls/harmont-py/tests/test_pipeline_lowering.py b/dsls/harmont-py/tests/test_pipeline_lowering.py index 585b650..25bee78 100644 --- a/dsls/harmont-py/tests/test_pipeline_lowering.py +++ b/dsls/harmont-py/tests/test_pipeline_lowering.py @@ -1,9 +1,8 @@ -"""Lowering: walk leaves back to scratch, topo-sort, emit JSON-shaped dicts. +"""Lowering: walk leaves back to scratch, topo-sort, emit graph-format dicts. -The lowering pass returns intermediate Python dicts (the same shape -the JSON IR will have, before the codegen pass produces Scheme). This -test asserts on that intermediate, not on Scheme strings — Scheme -output is covered by test_codegen.py. +The lowering pass returns an intermediate Python dict (the petgraph-serde +graph shape the JSON IR will have). This test asserts on that +intermediate graph structure. """ from __future__ import annotations @@ -11,28 +10,61 @@ import pytest from harmont._step import scratch, wait -from harmont.pipeline import _lower_to_dicts, pipeline +from harmont.pipeline import _lower_to_graph, pipeline -def test_single_chain_emits_three_command_dicts_in_parent_order(): +def _nodes(graph: dict) -> list[dict]: + return graph["nodes"] + + +def _edges(graph: dict) -> list[list]: + return graph["edges"] + + +def _step_keys(graph: dict) -> list[str]: + return [n["step"]["key"] for n in graph["nodes"]] + + +def _builds_in_edges(graph: dict) -> list[tuple[int, int]]: + return [(src, dst) for src, dst, kind in graph["edges"] if kind == "builds_in"] + + +def _depends_on_edges(graph: dict) -> list[tuple[int, int]]: + return [(src, dst) for src, dst, kind in graph["edges"] if kind == "depends_on"] + + +def _parent_key_map(graph: dict) -> dict[str, str | None]: + """Return {child_key: parent_key} for builds_in edges.""" + key_by_idx = {i: n["step"]["key"] for i, n in enumerate(graph["nodes"])} + result: dict[str, str | None] = {} + # Start with all keys having no parent. + for n in graph["nodes"]: + result[n["step"]["key"]] = None + for src, dst, kind in graph["edges"]: + if kind == "builds_in": + result[key_by_idx[dst]] = key_by_idx[src] + return result + + +def test_single_chain_emits_three_command_nodes_in_parent_order(): a = scratch().sh("step a", label="a") b = a.sh("step b", label="b") c = b.sh("step c", label="c") - dicts = _lower_to_dicts([c]) - assert [d["type"] for d in dicts] == ["command", "command", "command"] - assert [d["key"] for d in dicts] == ["a", "b", "c"] - assert dicts[0]["builds_in"] is None - assert dicts[1]["builds_in"] == "a" - assert dicts[2]["builds_in"] == "b" + graph = _lower_to_graph([c]) + assert _step_keys(graph) == ["a", "b", "c"] + parents = _parent_key_map(graph) + assert parents["a"] is None + assert parents["b"] == "a" + assert parents["c"] == "b" def test_fork_node_is_not_emitted_children_inherit_grandparent(): base = scratch().sh("install", label="install") branch = base.fork(label="branch-a") leaf = branch.sh("test", label="test") - dicts = _lower_to_dicts([leaf]) - keys = [d["key"] for d in dicts] - parents = {d["key"]: d["builds_in"] for d in dicts} + graph = _lower_to_graph([leaf]) + keys = _step_keys(graph) + parents = _parent_key_map(graph) assert keys == ["install", "test"] assert parents["install"] is None assert parents["test"] == "install" @@ -42,32 +74,28 @@ def test_two_branches_share_parent_key(): base = scratch().sh("install", label="install") a = base.fork(label="a").sh("test-a", label="test-a") b = base.fork(label="b").sh("test-b", label="test-b") - dicts = _lower_to_dicts([a, b]) - parents = {d["key"]: d["builds_in"] for d in dicts} + graph = _lower_to_graph([a, b]) + parents = _parent_key_map(graph) assert parents["test-a"] == "install" assert parents["test-b"] == "install" -def test_wait_step_emitted_in_position(): +def test_wait_step_emitted_as_depends_on_edges(): a = scratch().sh("a", label="a") b = scratch().sh("b", label="b") c = scratch().sh("c", label="c") - dicts = _lower_to_dicts([a, b, wait(), c]) - types = [d["type"] for d in dicts] - assert "wait" in types - wait_idx = types.index("wait") - keys_before = [d["key"] for d in dicts[:wait_idx]] - keys_after = [d["key"] for d in dicts[wait_idx + 1 :]] - assert "a" in keys_before - assert "b" in keys_before - assert "c" in keys_after - - -def test_wait_continue_on_failure_carried_through(): - a = scratch().sh("a", label="a") - dicts = _lower_to_dicts([a, wait(continue_on_failure=True)]) - wait_dict = next(d for d in dicts if d["type"] == "wait") - assert wait_dict["continue_on_failure"] is True + graph = _lower_to_graph([a, b, wait(), c]) + keys = _step_keys(graph) + assert "a" in keys + assert "b" in keys + assert "c" in keys + # c should have depends_on edges from a and b. + depends_on = _depends_on_edges(graph) + idx_a = keys.index("a") + idx_b = keys.index("b") + idx_c = keys.index("c") + assert (idx_a, idx_c) in depends_on + assert (idx_b, idx_c) in depends_on def test_command_includes_label_env_timeout_when_set(): @@ -77,25 +105,27 @@ def test_command_includes_label_env_timeout_when_set(): env={"CI": "true"}, timeout_seconds=600, ) - dicts = _lower_to_dicts([s]) - assert dicts[0]["label"] == "build" - assert dicts[0]["env"] == {"CI": "true"} - assert dicts[0]["timeout_seconds"] == 600 + graph = _lower_to_graph([s]) + node = graph["nodes"][0] + assert node["step"]["label"] == "build" + assert node["env"] == {"CI": "true"} + assert node["step"]["timeout_seconds"] == 600 def test_command_omits_optional_fields_when_unset(): s = scratch().sh("make") - d = _lower_to_dicts([s])[0] + graph = _lower_to_graph([s]) + step = graph["nodes"][0]["step"] # Required fields present. - assert d["type"] == "command" - assert "key" in d - assert "cmd" in d - assert "builds_in" in d + assert "key" in step + assert "cmd" in step + # No "type" or "builds_in" fields in the new format. + assert "type" not in step + assert "builds_in" not in step # Optional fields omitted (not None) when unset. - assert "label" not in d - assert "env" not in d - assert "timeout_seconds" not in d - assert "cache" not in d + assert "label" not in step + assert "timeout_seconds" not in step + assert "cache" not in step def test_pipeline_factory_collects_reachable_via_parent(): @@ -103,9 +133,11 @@ def test_pipeline_factory_collects_reachable_via_parent(): leaf_a = base.fork(label="a").sh("test-a", label="test-a") leaf_b = base.fork(label="b").sh("test-b", label="test-b") p = pipeline(leaf_a, leaf_b, env={"CI": "true"}) - keys = [s["key"] for s in p["steps"]] + keys = _step_keys(p["graph"]) assert set(keys) == {"install", "test-a", "test-b"} - assert p["env"] == {"CI": "true"} + # Pipeline-level env is merged into every node. + for node in p["graph"]["nodes"]: + assert "CI" in node["env"] assert p["version"] == "0" @@ -119,6 +151,6 @@ def test_dedup_when_step_reachable_from_multiple_leaves(): a = base.sh("a", label="a") b = base.sh("b", label="b") p = pipeline(a, b) - keys = [s["key"] for s in p["steps"]] + keys = _step_keys(p["graph"]) # `install` appears once even though it's reachable from both leaves. assert keys.count("install") == 1 diff --git a/dsls/harmont-py/tests/test_python.py b/dsls/harmont-py/tests/test_python.py index 0a86c63..38e9c24 100644 --- a/dsls/harmont-py/tests/test_python.py +++ b/dsls/harmont-py/tests/test_python.py @@ -8,13 +8,13 @@ def _cmds(p: dict) -> list[str]: - return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + return [n["step"]["cmd"] for n in p["graph"]["nodes"]] def _step_by_substring(p: dict, needle: str) -> dict: - for s in p["steps"]: - if s.get("type") == "command" and needle in (s.get("cmd") or ""): - return s + for n in p["graph"]["nodes"]: + if needle in (n["step"].get("cmd") or ""): + return n["step"] msg = f"no command step containing {needle!r}" raise AssertionError(msg) diff --git a/dsls/harmont-py/tests/test_ruby.py b/dsls/harmont-py/tests/test_ruby.py index 8f63c92..98700e3 100644 --- a/dsls/harmont-py/tests/test_ruby.py +++ b/dsls/harmont-py/tests/test_ruby.py @@ -7,13 +7,13 @@ def _cmds(p: dict) -> list[str]: - return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + return [n["step"]["cmd"] for n in p["graph"]["nodes"]] def _step_by_substring(p: dict, needle: str) -> dict: - for s in p["steps"]: - if s.get("type") == "command" and needle in (s.get("cmd") or ""): - return s + for n in p["graph"]["nodes"]: + if needle in (n["step"].get("cmd") or ""): + return n["step"] raise AssertionError(needle) diff --git a/dsls/harmont-py/tests/test_rust.py b/dsls/harmont-py/tests/test_rust.py index 0883cfd..b831aaa 100644 --- a/dsls/harmont-py/tests/test_rust.py +++ b/dsls/harmont-py/tests/test_rust.py @@ -8,13 +8,13 @@ def _cmds(p: dict) -> list[str]: - return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + return [n["step"]["cmd"] for n in p["graph"]["nodes"]] def _step_by_substring(p: dict, needle: str) -> dict: - for s in p["steps"]: - if s.get("type") == "command" and needle in (s.get("cmd") or ""): - return s + for n in p["graph"]["nodes"]: + if needle in (n["step"].get("cmd") or ""): + return n["step"] msg = f"no command step containing {needle!r}" raise AssertionError(msg) diff --git a/dsls/harmont-py/tests/test_target_cross_module.py b/dsls/harmont-py/tests/test_target_cross_module.py index ee7975c..96c9230 100644 --- a/dsls/harmont-py/tests/test_target_cross_module.py +++ b/dsls/harmont-py/tests/test_target_cross_module.py @@ -39,8 +39,8 @@ def ci(py_test: hm.Target[hm.Step]) -> hm.Step: return py_test out = json.loads(hm.dump_registry_json()) - steps = out["pipelines"][0]["definition"]["steps"] - cmds = sorted(s.get("cmd") for s in steps if s.get("type") == "command") + nodes = out["pipelines"][0]["definition"]["graph"]["nodes"] + cmds = sorted(n["step"].get("cmd") for n in nodes) assert "apt-get update" in cmds assert "cd cidsl/py && pytest -v" in cmds diff --git a/dsls/harmont-py/tests/test_toolchain_compose.py b/dsls/harmont-py/tests/test_toolchain_compose.py index 2c69ee1..ecc1bdb 100644 --- a/dsls/harmont-py/tests/test_toolchain_compose.py +++ b/dsls/harmont-py/tests/test_toolchain_compose.py @@ -10,7 +10,7 @@ def _cmds(p: dict) -> list[str]: - return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + return [n["step"]["cmd"] for n in p["graph"]["nodes"]] def test_stack_npm_on_spec_step(): @@ -79,4 +79,4 @@ def test_mixed_pipeline_compiles(): default_image="ubuntu:24.04", ) assert p["version"] == "0" - assert len(p["steps"]) > 0 + assert len(p["graph"]["nodes"]) > 0 diff --git a/dsls/harmont-py/tests/test_zig.py b/dsls/harmont-py/tests/test_zig.py index 2d40b53..c944654 100644 --- a/dsls/harmont-py/tests/test_zig.py +++ b/dsls/harmont-py/tests/test_zig.py @@ -7,13 +7,13 @@ def _cmds(p: dict) -> list[str]: - return [s["cmd"] for s in p["steps"] if s["type"] == "command"] + return [n["step"]["cmd"] for n in p["graph"]["nodes"]] def _step_by_substring(p: dict, needle: str) -> dict: - for s in p["steps"]: - if s.get("type") == "command" and needle in (s.get("cmd") or ""): - return s + for n in p["graph"]["nodes"]: + if needle in (n["step"].get("cmd") or ""): + return n["step"] raise AssertionError(needle) diff --git a/dsls/harmont-py/tests/test_zig_toolchain.py b/dsls/harmont-py/tests/test_zig_toolchain.py index 0b86937..313e419 100644 --- a/dsls/harmont-py/tests/test_zig_toolchain.py +++ b/dsls/harmont-py/tests/test_zig_toolchain.py @@ -8,7 +8,7 @@ def test_zig_no_path_returns_toolchain() -> None: - """hm.zig() (without path=) returns a ZigToolchain — not a ZigProject.""" + """hm.zig() (without path=) returns a ZigToolchain -- not a ZigProject.""" tc = hm.zig() assert isinstance(tc, ZigToolchain) @@ -64,18 +64,31 @@ def ci( return (lib_a.build(), lib_b.build()) envelope = json.loads(hm.dump_registry_json()) - steps = envelope["pipelines"][0]["definition"]["steps"] - zig_installs = [s for s in steps if s.get("label") == ":zig: install"] + definition = envelope["pipelines"][0]["definition"] + nodes = definition["graph"]["nodes"] + edges = definition["graph"]["edges"] + + zig_installs = [n for n in nodes if n["step"].get("label") == ":zig: install"] assert len(zig_installs) == 1, ( - f"expected exactly one :zig: install step, got " - f"{[s['key'] for s in zig_installs]}" + f"expected exactly one :zig: install node, got " + f"{[n['step']['key'] for n in zig_installs]}" ) - install_key = zig_installs[0]["key"] - lib_a_build = next(s for s in steps if "lib-a" in (s.get("label") or "")) - lib_b_build = next(s for s in steps if "lib-b" in (s.get("label") or "")) - assert lib_a_build["builds_in"] == install_key - assert lib_b_build["builds_in"] == install_key + install_key = zig_installs[0]["step"]["key"] + lib_a_build = next(n for n in nodes if "lib-a" in (n["step"].get("label") or "")) + lib_b_build = next(n for n in nodes if "lib-b" in (n["step"].get("label") or "")) + + # Verify builds_in edges connect install to both builds. + key_by_idx = {i: n["step"]["key"] for i, n in enumerate(nodes)} + idx_by_key = {v: k for k, v in key_by_idx.items()} + + install_idx = idx_by_key[install_key] + lib_a_idx = idx_by_key[lib_a_build["step"]["key"]] + lib_b_idx = idx_by_key[lib_b_build["step"]["key"]] + + builds_in_edges = [(s, d) for s, d, k in edges if k == "builds_in"] + assert (install_idx, lib_a_idx) in builds_in_edges + assert (install_idx, lib_b_idx) in builds_in_edges reg.clear_registry() targets.clear_target_cache() From df07331584f34e5d94207c84cc92dbd8aab36c2d Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 19:01:13 -0700 Subject: [PATCH 24/40] refactor: move CommandStep and Cache into graph module, flatten exports lib.rs now only re-exports from the private graph module. All downstream imports use hm_pipeline_ir::{PipelineGraph, ...} directly. --- crates/hm-pipeline-ir/src/graph.rs | 28 ++++++++++++++++- crates/hm-pipeline-ir/src/lib.rs | 36 ++-------------------- crates/hm-pipeline-ir/tests/graph_build.rs | 2 +- crates/hm-pipeline-ir/tests/graph_serde.rs | 5 ++- crates/hm/src/commands/run/local.rs | 2 +- crates/hm/src/orchestrator/graph.rs | 2 +- 6 files changed, 35 insertions(+), 40 deletions(-) diff --git a/crates/hm-pipeline-ir/src/graph.rs b/crates/hm-pipeline-ir/src/graph.rs index 225830d..381384b 100644 --- a/crates/hm-pipeline-ir/src/graph.rs +++ b/crates/hm-pipeline-ir/src/graph.rs @@ -3,9 +3,35 @@ use std::collections::BTreeMap; use daggy::petgraph::visit::IntoNodeReferences; use daggy::{Dag, NodeIndex, Walker}; +use schemars::JsonSchema as DeriveJsonSchema; use serde::{Deserialize, Serialize}; -use crate::CommandStep; +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] +pub struct CommandStep { + pub key: String, + #[serde(default)] + pub label: Option, + pub cmd: String, + #[serde(default)] + pub image: Option, + #[serde(default)] + pub env: Option>, + #[serde(default)] + pub timeout_seconds: Option, + #[serde(default)] + pub cache: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub runner: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub runner_args: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] +pub struct Cache { + pub policy: String, + #[serde(default)] + pub key: Option, +} #[derive(Debug, Clone, Serialize, Deserialize)] pub struct NodeWeight { diff --git a/crates/hm-pipeline-ir/src/lib.rs b/crates/hm-pipeline-ir/src/lib.rs index 8560326..df48be7 100644 --- a/crates/hm-pipeline-ir/src/lib.rs +++ b/crates/hm-pipeline-ir/src/lib.rs @@ -2,41 +2,11 @@ //! //! The wire format is a petgraph-serde graph. Nodes carry //! `CommandStep` + resolved env; edges are `EdgeKind` (`BuildsIn` or -//! `DependsOn`). See `graph::PipelineGraph` for the top-level type. +//! `DependsOn`). `PipelineGraph` is the top-level type. #![forbid(unsafe_code)] #![allow(clippy::multiple_crate_versions, clippy::cargo_common_metadata)] -use std::collections::BTreeMap; +mod graph; -use schemars::JsonSchema as DeriveJsonSchema; -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] -pub struct CommandStep { - pub key: String, - #[serde(default)] - pub label: Option, - pub cmd: String, - #[serde(default)] - pub image: Option, - #[serde(default)] - pub env: Option>, - #[serde(default)] - pub timeout_seconds: Option, - #[serde(default)] - pub cache: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] - pub runner: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] - pub runner_args: Option, -} - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] -pub struct Cache { - pub policy: String, - #[serde(default)] - pub key: Option, -} - -pub mod graph; +pub use graph::{Cache, CommandStep, EdgeKind, NodeWeight, PipelineGraph}; diff --git a/crates/hm-pipeline-ir/tests/graph_build.rs b/crates/hm-pipeline-ir/tests/graph_build.rs index fae10a5..7bf6ffb 100644 --- a/crates/hm-pipeline-ir/tests/graph_build.rs +++ b/crates/hm-pipeline-ir/tests/graph_build.rs @@ -6,7 +6,7 @@ clippy::panic )] -use hm_pipeline_ir::graph::PipelineGraph; +use hm_pipeline_ir::PipelineGraph; fn graph(json: &[u8]) -> PipelineGraph { serde_json::from_slice(json).unwrap() diff --git a/crates/hm-pipeline-ir/tests/graph_serde.rs b/crates/hm-pipeline-ir/tests/graph_serde.rs index a2c181a..799bfc9 100644 --- a/crates/hm-pipeline-ir/tests/graph_serde.rs +++ b/crates/hm-pipeline-ir/tests/graph_serde.rs @@ -8,8 +8,7 @@ use std::collections::BTreeMap; -use hm_pipeline_ir::graph::{EdgeKind, NodeWeight}; -use hm_pipeline_ir::CommandStep; +use hm_pipeline_ir::{CommandStep, EdgeKind, NodeWeight}; #[test] fn node_weight_round_trips() { @@ -47,7 +46,7 @@ fn edge_kind_round_trips() { assert_eq!(dep, EdgeKind::DependsOn); } -use hm_pipeline_ir::graph::PipelineGraph; +use hm_pipeline_ir::PipelineGraph; fn build_test_graph() -> PipelineGraph { serde_json::from_value(serde_json::json!({ diff --git a/crates/hm/src/commands/run/local.rs b/crates/hm/src/commands/run/local.rs index 88e76e8..5828d04 100644 --- a/crates/hm/src/commands/run/local.rs +++ b/crates/hm/src/commands/run/local.rs @@ -46,7 +46,7 @@ pub async fn run_pipeline_v0_one_shot( )) } -fn decode_plan_to_wire(bytes: &[u8]) -> anyhow::Result { +fn decode_plan_to_wire(bytes: &[u8]) -> anyhow::Result { serde_json::from_slice(bytes).map_err(|e| anyhow::anyhow!("decode pipeline JSON: {e}")) } diff --git a/crates/hm/src/orchestrator/graph.rs b/crates/hm/src/orchestrator/graph.rs index dcad175..eaedcef 100644 --- a/crates/hm/src/orchestrator/graph.rs +++ b/crates/hm/src/orchestrator/graph.rs @@ -1 +1 @@ -pub use hm_pipeline_ir::graph::{EdgeKind, NodeWeight, PipelineGraph as Graph}; +pub use hm_pipeline_ir::{EdgeKind, NodeWeight, PipelineGraph as Graph}; From 9dc470ff574378aeea5a90c5ab473a70764eae16 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 19:01:59 -0700 Subject: [PATCH 25/40] refactor: remove unused PipelineGraph::node_indices method --- crates/hm-pipeline-ir/src/graph.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crates/hm-pipeline-ir/src/graph.rs b/crates/hm-pipeline-ir/src/graph.rs index 381384b..c7250a4 100644 --- a/crates/hm-pipeline-ir/src/graph.rs +++ b/crates/hm-pipeline-ir/src/graph.rs @@ -179,8 +179,4 @@ impl PipelineGraph { } out } - - pub fn node_indices(&self) -> impl Iterator + '_ { - self.dag.graph().node_indices() - } } From 7d8b3c3c9060a7f3dab6f24ffe998cddf5bb1e98 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 19:10:31 -0700 Subject: [PATCH 26/40] refactor: rename NodeWeight to Transition, add get_transition accessor, rewrite chains() functionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - NodeWeight → Transition (public), accessed via PipelineGraph::get_transition() - Replaces separate command_step()/step_env() accessors - chains() rewritten using std::iter::successors — no mutable placed map - Removed unused node_indices() method --- crates/hm-pipeline-ir/src/graph.rs | 39 ++++++++------------ crates/hm-pipeline-ir/src/lib.rs | 2 +- crates/hm-pipeline-ir/tests/graph_build.rs | 4 +- crates/hm-pipeline-ir/tests/graph_serde.rs | 10 ++--- crates/hm/src/orchestrator/graph.rs | 2 +- crates/hm/src/orchestrator/scheduler.rs | 8 ++-- crates/hm/tests/default_image_inheritance.rs | 8 ++-- crates/hm/tests/runner_dispatch.rs | 4 +- 8 files changed, 33 insertions(+), 44 deletions(-) diff --git a/crates/hm-pipeline-ir/src/graph.rs b/crates/hm-pipeline-ir/src/graph.rs index c7250a4..e2af6e0 100644 --- a/crates/hm-pipeline-ir/src/graph.rs +++ b/crates/hm-pipeline-ir/src/graph.rs @@ -34,7 +34,7 @@ pub struct Cache { } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct NodeWeight { +pub struct Transition { pub step: CommandStep, pub env: BTreeMap, } @@ -53,7 +53,7 @@ pub struct PipelineGraph { #[serde(default, skip_serializing_if = "Option::is_none")] default_image: Option, #[serde(rename = "graph")] - dag: Dag, + dag: Dag, } fn default_version() -> String { @@ -72,7 +72,7 @@ impl PipelineGraph { } #[must_use] - pub fn node_weight(&self, idx: NodeIndex) -> &NodeWeight { + pub fn get_transition(&self, idx: NodeIndex) -> &Transition { &self.dag[idx] } @@ -131,29 +131,20 @@ impl PipelineGraph { #[must_use] pub fn chains(&self) -> Vec> { - let mut placed: BTreeMap = BTreeMap::new(); - let mut out: Vec> = Vec::new(); let mut indices: Vec = self.dag.graph().node_indices().collect(); indices.sort(); - for root in &indices { - if *placed.get(root).unwrap_or(&false) || self.is_chain_step(*root) { - continue; - } - let mut chain = vec![*root]; - placed.insert(*root, true); - let mut cur = *root; - while let Some(next) = self - .builds_in_children(cur) - .into_iter() - .find(|&c| self.is_chain_step(c)) - { - chain.push(next); - placed.insert(next, true); - cur = next; - } - out.push(chain); - } - out + indices + .into_iter() + .filter(|&n| !self.is_chain_step(n)) + .map(|root| { + std::iter::successors(Some(root), |&cur| { + self.builds_in_children(cur) + .into_iter() + .find(|&c| self.is_chain_step(c)) + }) + .collect() + }) + .collect() } #[must_use] diff --git a/crates/hm-pipeline-ir/src/lib.rs b/crates/hm-pipeline-ir/src/lib.rs index df48be7..cd55c2a 100644 --- a/crates/hm-pipeline-ir/src/lib.rs +++ b/crates/hm-pipeline-ir/src/lib.rs @@ -9,4 +9,4 @@ mod graph; -pub use graph::{Cache, CommandStep, EdgeKind, NodeWeight, PipelineGraph}; +pub use graph::{Cache, CommandStep, EdgeKind, PipelineGraph, Transition}; diff --git a/crates/hm-pipeline-ir/tests/graph_build.rs b/crates/hm-pipeline-ir/tests/graph_build.rs index 7bf6ffb..0d89b37 100644 --- a/crates/hm-pipeline-ir/tests/graph_build.rs +++ b/crates/hm-pipeline-ir/tests/graph_build.rs @@ -47,7 +47,7 @@ fn root_inherits_default_image() { "edges": [] } }"#); - let node = g.node_weight(g.node_index_by_key("a").unwrap()); + let node = g.get_transition(g.node_index_by_key("a").unwrap()); assert_eq!(node.step.image.as_deref(), Some("ubuntu:24.04")); } @@ -67,7 +67,7 @@ fn child_does_not_inherit_default_image() { ] } }"#); - let b = g.node_weight(g.node_index_by_key("b").unwrap()); + let b = g.get_transition(g.node_index_by_key("b").unwrap()); assert!(b.step.image.is_none()); } diff --git a/crates/hm-pipeline-ir/tests/graph_serde.rs b/crates/hm-pipeline-ir/tests/graph_serde.rs index 799bfc9..0bc6310 100644 --- a/crates/hm-pipeline-ir/tests/graph_serde.rs +++ b/crates/hm-pipeline-ir/tests/graph_serde.rs @@ -8,11 +8,11 @@ use std::collections::BTreeMap; -use hm_pipeline_ir::{CommandStep, EdgeKind, NodeWeight}; +use hm_pipeline_ir::{CommandStep, EdgeKind, Transition}; #[test] -fn node_weight_round_trips() { - let nw = NodeWeight { +fn transition_round_trips() { + let nw = Transition { step: CommandStep { key: "a".into(), label: Some("step A".into()), @@ -27,7 +27,7 @@ fn node_weight_round_trips() { env: BTreeMap::from([("FOO".into(), "bar".into())]), }; let json = serde_json::to_string(&nw).unwrap(); - let back: NodeWeight = serde_json::from_str(&json).unwrap(); + let back: Transition = serde_json::from_str(&json).unwrap(); assert_eq!(back.step.key, "a"); assert_eq!(back.env.get("FOO").unwrap(), "bar"); } @@ -75,7 +75,7 @@ fn pipeline_graph_round_trips_through_json() { assert_eq!(back.node_count(), 3); assert_eq!(back.default_image(), Some("ubuntu:24.04")); let a = back.node_index_by_key("a").unwrap(); - assert_eq!(back.node_weight(a).step.image.as_deref(), Some("ubuntu:24.04")); + assert_eq!(back.get_transition(a).step.image.as_deref(), Some("ubuntu:24.04")); let b = back.node_index_by_key("b").unwrap(); assert!(back.builds_in_parent(b).is_some()); } diff --git a/crates/hm/src/orchestrator/graph.rs b/crates/hm/src/orchestrator/graph.rs index eaedcef..1c81fa6 100644 --- a/crates/hm/src/orchestrator/graph.rs +++ b/crates/hm/src/orchestrator/graph.rs @@ -1 +1 @@ -pub use hm_pipeline_ir::{EdgeKind, NodeWeight, PipelineGraph as Graph}; +pub use hm_pipeline_ir::{EdgeKind, PipelineGraph as Graph, Transition}; diff --git a/crates/hm/src/orchestrator/scheduler.rs b/crates/hm/src/orchestrator/scheduler.rs index 1f7c7a1..5fd426d 100644 --- a/crates/hm/src/orchestrator/scheduler.rs +++ b/crates/hm/src/orchestrator/scheduler.rs @@ -310,12 +310,10 @@ async fn run_chain( if cancel.is_cancelled() { return Ok(0); } - let step_wire = graph.node_weight(i).step.clone(); - // Keep a copy of the step key for diagnostics — `step_wire` is - // moved into `ExecutorInput` below. + let t = graph.get_transition(i); + let step_wire = t.step.clone(); let step_key = step_wire.key.clone(); - let env_map: std::collections::BTreeMap = - graph.node_weight(i).env.clone(); + let env_map: std::collections::BTreeMap = t.env.clone(); let step_id = Uuid::new_v4(); bus.emit(BuildEvent::StepQueued { diff --git a/crates/hm/tests/default_image_inheritance.rs b/crates/hm/tests/default_image_inheritance.rs index ba7438f..0e21680 100644 --- a/crates/hm/tests/default_image_inheritance.rs +++ b/crates/hm/tests/default_image_inheritance.rs @@ -34,7 +34,7 @@ fn root_step_inherits_default_image() { }"#); let idx = g.node_index_by_key("apt-base").unwrap(); assert_eq!( - g.node_weight(idx).step.image.as_deref(), + g.get_transition(idx).step.image.as_deref(), Some("ubuntu:24.04"), "root step must inherit pipeline default_image" ); @@ -55,7 +55,7 @@ fn root_step_explicit_image_wins() { }"#); let idx = g.node_index_by_key("rust").unwrap(); assert_eq!( - g.node_weight(idx).step.image.as_deref(), + g.get_transition(idx).step.image.as_deref(), Some("rust:1.82"), "explicit per-step image must override default_image" ); @@ -82,7 +82,7 @@ fn child_step_unchanged_by_default_image() { }"#); let idx = g.node_index_by_key("child").unwrap(); assert!( - g.node_weight(idx).step.image.is_none(), + g.get_transition(idx).step.image.is_none(), "child step must not inherit default_image — chain steps boot from parent snapshot", ); } @@ -101,7 +101,7 @@ fn no_default_image_leaves_root_alone() { }"#); let idx = g.node_index_by_key("k").unwrap(); assert!( - g.node_weight(idx).step.image.is_none(), + g.get_transition(idx).step.image.is_none(), "absent default_image must not synthesize an image" ); } diff --git a/crates/hm/tests/runner_dispatch.rs b/crates/hm/tests/runner_dispatch.rs index 99401cc..e7fd849 100644 --- a/crates/hm/tests/runner_dispatch.rs +++ b/crates/hm/tests/runner_dispatch.rs @@ -90,7 +90,7 @@ async fn runner_field_dispatches_to_named_plugin() { // load-bearing one. let first = graph.node_index_by_key("fs-step").unwrap(); assert_eq!( - graph.node_weight(first).step.runner.as_deref(), + graph.get_transition(first).step.runner.as_deref(), Some("freestyle"), "graph dropped `runner` field — A3's wire-type fix has regressed" ); @@ -98,7 +98,7 @@ async fn runner_field_dispatches_to_named_plugin() { // 3. Build the executor input exactly as the scheduler does // (orchestrator/scheduler.rs::run_chain). Cloning the wire // step preserves `runner` and `runner_args` verbatim. - let step_wire = graph.node_weight(first).step.clone(); + let step_wire = graph.get_transition(first).step.clone(); let input = ExecutorInput { step: step_wire, workspace_archive_id: ArchiveId(Uuid::nil()), From 1cfa580e0fff0fedca4e36bc4f30c517a6eb94bc Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 19:15:42 -0700 Subject: [PATCH 27/40] cleanup --- dsls/harmont-py/.github/workflows/ci.yml | 40 ------------- dsls/harmont-py/.github/workflows/release.yml | 60 ------------------- 2 files changed, 100 deletions(-) delete mode 100644 dsls/harmont-py/.github/workflows/ci.yml delete mode 100644 dsls/harmont-py/.github/workflows/release.yml diff --git a/dsls/harmont-py/.github/workflows/ci.yml b/dsls/harmont-py/.github/workflows/ci.yml deleted file mode 100644 index b2b777c..0000000 --- a/dsls/harmont-py/.github/workflows/ci.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: CI - -on: - pull_request: - push: - branches: [main] - -permissions: - contents: read - -jobs: - test: - name: pytest + ruff + mypy - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: ["3.11", "3.12"] - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: pip - - - name: Install harmont + dev extras - run: pip install -e '.[dev]' - - - name: ruff check - run: ruff check . - - - name: mypy - run: mypy harmont - - - name: pytest - run: | - pytest -v \ - --deselect tests/test_gradle.py \ - --deselect tests/test_haskell.py diff --git a/dsls/harmont-py/.github/workflows/release.yml b/dsls/harmont-py/.github/workflows/release.yml deleted file mode 100644 index 2be6deb..0000000 --- a/dsls/harmont-py/.github/workflows/release.yml +++ /dev/null @@ -1,60 +0,0 @@ -name: Release - -on: - push: - tags: - - "v*" - -permissions: - contents: read - -jobs: - pypi-publish: - name: Publish to PyPI - runs-on: ubuntu-latest - environment: - # PyPI Trusted Publisher is scoped to this environment. Configure - # the matching publisher on https://pypi.org/manage/account/publishing/ - # before the first tag push (see RELEASING.md). - name: release - url: https://pypi.org/project/harmont/ - permissions: - # `id-token: write` is the OIDC switch that pypa/gh-action-pypi-publish - # uses to mint a short-lived token PyPI accepts in lieu of an API token. - id-token: write - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - - name: Set version from tag - run: | - VERSION="${GITHUB_REF_NAME#v}" - echo "VERSION=$VERSION" >> "$GITHUB_ENV" - # Sed only the first match so this is a no-op if pyproject is - # already at the tagged version (a re-run with a corrected tag, - # for instance, shouldn't double-edit). - sed -i '0,/version = "0.0.0-dev"/s//version = "'"$VERSION"'"/' pyproject.toml - grep -n "^version" pyproject.toml - - - name: Install build - run: python -m pip install --upgrade build - - - name: Build sdist and wheel - run: python -m build - - - name: Inspect dist - run: | - ls -la dist/ - # Fail fast if either artifact is missing. - test -f dist/harmont-${VERSION}.tar.gz - test -f dist/harmont-${VERSION}-py3-none-any.whl - - - name: Publish to PyPI via Trusted Publishing - uses: pypa/gh-action-pypi-publish@release/v1 - # No `with:` block needed — the action defaults to using OIDC - # against the project's configured Trusted Publisher when - # `id-token: write` is granted (above). It picks up dist/* by - # default. From fc11aec8c1b08ee1c2e4c020abedbe381e411e4b Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 19:16:02 -0700 Subject: [PATCH 28/40] cleanup --- dsls/harmont-py/CLAUDE.md | 266 ----------------------------------- dsls/harmont-py/README.md | 168 ---------------------- dsls/harmont-py/RELEASING.md | 37 ----- 3 files changed, 471 deletions(-) delete mode 100644 dsls/harmont-py/CLAUDE.md delete mode 100644 dsls/harmont-py/README.md delete mode 100644 dsls/harmont-py/RELEASING.md diff --git a/dsls/harmont-py/CLAUDE.md b/dsls/harmont-py/CLAUDE.md deleted file mode 100644 index 36848b8..0000000 --- a/dsls/harmont-py/CLAUDE.md +++ /dev/null @@ -1,266 +0,0 @@ -# cidsl/py — Python chain DSL for Harmont pipelines - -> Read `PRINCIPLES.md` at the repo root before editing. Validation -> errors raised in `__post_init__` and from the lowering pass are -> user-facing to pipeline authors — keep them precise and fix-directed -> per § 5. - -A Python package that emits the v0 IR JSON for Harmont CI pipelines. -Runtime deps: `croniter` (HAR-9 schedule trigger validation). - -## How It Works - -`Step` is a frozen dataclass. `scratch()` returns a root `Step`; -`Step.sh(cmd, **kw)` returns a child carrying one shell command (use -`cwd="path"` to prepend `cd && ` to the command); -`Step.fork(label=None)` returns a passthrough used to brand a branch. -`hm.sh(cmd, **kw)` is shorthand for `scratch().sh(cmd, **kw)` — start a -chain in one call. The `pipeline(*leaves, env=None)` factory walks back -from each leaf via `parent`, topo-sorts, and emits the v0 IR as a -Python dict. `pipeline_to_json(p)` serializes that dict (resolving -cache keys first via `harmont.keygen`) to the wire-format JSON string. - -## Build & Test - -```bash -python3 -m venv .venv && source .venv/bin/activate -pip install -e '.[dev]' - -pytest # all tests -pytest -v --tb=short -``` - -## Public surface - -`hm.pipeline` is **polymorphic**. When called with positional -`Step` arguments it is the factory — returns the v0 IR dict. When -called with no positionals (or a string slug) it is the HAR-9 -**decorator**: it registers the wrapped function as a CI pipeline. - -Decorator form: - -```python -import harmont as hm - - -@hm.pipeline("default") -def default() -> hm.Step: - return hm.sh("echo hi", label="hi") -``` - -Factory form (used inside a pipeline definition that builds the dict -imperatively, and in unit tests): - -```python -hm.pipeline(hm.sh("echo hi"), default_image="alpine:3.20") -``` - -Stage 1 of rendering (`hm.dump_registry_json`) walks every -`.harmont/*.py`, imports each (which has the side effect of running -the decorators), assembles each registered pipeline via the factory, -and emits a `schema_version="1"` envelope keyed by slug, with each -pipeline's resolved v0 IR carried in the `definition` field. - -The full surface (all reachable through `hm.`): - -```python -pipeline(slug=None, *, name=None, triggers=(), allow_manual=True, - env=None, default_image=None) # decorator -pipeline(*leaves, env=None, default_image=None) # factory (v0 IR dict) -pipeline_to_json(p, **kw) # -> str (wire JSON) -dump_registry_json() # -> str (envelope JSON) - -target() # decorator: memoized building block - -sh(cmd, *, cwd=None, label=None, ...) # -> Step (= scratch().sh(cmd, ...)) -scratch() # -> Step (root) -Step.sh(cmd, *, cwd=None, ...) # -> Step -Step.fork(label=None) # -> Step -wait(*, continue_on_failure=False) # -> Step - -# trigger constructors (passed via `triggers=` on the decorator) -push(branch=..., tag=...) -pull_request(branches=..., types=...) -schedule(cron=...) - -# cache helpers -ttl(duration) | on_change(*paths) | forever(env_keys=()) | compose(*policies) - -# language toolchains (call to construct; bare-form actions also work) -haskell(ghc=..., cabal="latest") # -> HaskellToolchain (cabal package via .package(path)) -rust(path=..., version="stable") # -> RustToolchain -npm(path=..., version="20") # -> NpmProject -elm(path=..., elm_version="0.19.1") # -> ElmProject -python(path=..., uv_version="latest") # -> PythonToolchain (uv-based) -go(path=..., version="1.23.2") # -> GoToolchain -gradle(path=..., jdk="21", kotlin=False) # -> GradleProject (Java + Kotlin) -cmake(path=..., lang="c"|"cpp") # -> CMakeProject -dotnet(path=..., channel="8.0") # -> DotnetProject -ruby(path=..., version="default") # -> RubyProject -ocaml(path=..., compiler="5.1.1") # -> OCamlProject -zig(version="0.13.0") # -> ZigToolchain (zig project via .project(path)) -zig(path=..., version="0.13.0") # -> ZigProject (one-shot) -perl(path=...) # -> PerlProject -composer(path=..., laravel=False) # -> ComposerProject (PHP + Laravel) -``` - -`Step` is opaque — pipeline authors do not read its attributes. - -### Reusable targets (HAR-28) - -`@hm.target()` decorates a parameterless function and memoizes its -return value per envelope render. Targets are the composition unit: - -```python -@hm.target() -def apt_base() -> hm.Step: - return hm.sh("apt-get update").sh("apt-get install -y python3 python3-venv") - -@hm.target() -def api(): - return hm.haskell(ghc="9.6.7").cabal(path="api") - -@hm.pipeline("ci") -def ci() -> tuple[hm.Step, ...]: - return (apt_base().sh("./run-smoke"), api()) -``` - -`@hm.target()` functions may return `Step`, `tuple[Step, ...]`, -`HaskellPackage`, `ElmProject`, `NpmProject`, or `RustToolchain`. -When such a value reaches the pipeline assembler it is unwrapped to -its default leaf: - -| Type | Default leaf | -|------|--------------| -| `HaskellPackage` | `.build()` | -| `RustToolchain` | `.build()` | -| `NpmProject` | `.install()` (npm-ci leaf) | -| `ElmProject` | `.make("src/Main.elm")` | - -Authors who want a different default call the explicit action -(`.test()`, `.lint()`, etc.) themselves. - -#### Fixture-style dependencies (typed markers) - -A target's parameters are typed annotations that tell the decorator -how to inject the value. Two markers are public: - -**`Target[T]`** — declares a dependency on another `@hm.target` by -parameter name. Static type-checkers see the parameter as `T`. - -**`Annotated[Step, BaseImage("X")]`** — declares a scratch-rooted -`Step` in image `"X"`. The first `.sh()` call on the parameter -inherits `image="X"`, so the first emitted IR step carries it. - -```python -from typing import Annotated - -import harmont as hm -from harmont.haskell import HaskellPackage, HaskellToolchain - -@hm.target() -def apt_base(base: Annotated[hm.Step, hm.BaseImage("ubuntu-24.04")]) -> hm.Step: - return base.sh("apt-get update").sh("apt-get install -y python3") - -@hm.target() -def api(ghc: hm.Target[HaskellToolchain]) -> HaskellPackage: - return ghc.cabal(path="api") - -@hm.pipeline("ci") -def ci( - apt_base: hm.Target[hm.Step], - api: hm.Target[HaskellPackage], -) -> tuple[hm.Step, ...]: - return (apt_base.sh("./run-smoke"), api) -``` - -Rules: - -- Every fixture parameter **must** carry a marker (`Target[T]` or - `Annotated[Step, BaseImage("...")]`) OR a default value. Unmarked - params raise at decoration time. -- `*args` / `**kwargs` / positional-only parameters are rejected. -- Duplicate target names raise at decoration time. Use - `@hm.target(name="...")` to disambiguate. -- Cycles raise `RuntimeError` listing the path. - -Both markers unwrap cleanly under mypy and pyright via PEP 593 -(`Annotated`); `assert_type(apt_base, Step)` and the like pass -without suppressions. - -Memoization scope is one `dump_registry_json` render. Two targets -that both depend on `apt_base` share the same `Step`, so the v0 IR -contains one apt-base step with N children — not N copies. - -## Deployments — `@hm.deploy` and `hm.dev` - -`@hm.deploy` is a driver-agnostic decorator that registers a function -as a long-lived service. The function returns a `Deployment` value -produced by a driver-specific factory; v1 ships only the local Docker -driver via `hm.dev.deploy(...)`. Future cloud drivers (`hm.aws.deploy`, -`hm.fly.deploy`) plug in without touching the top-level decorator. - -```python -import harmont as hm - -@hm.deploy("hello") -def hello() -> hm.Deployment: - return hm.dev.deploy( - image="python:3.12-alpine", - cmd=["python", "-m", "http.server", "5678"], - port_mapping={5678: hm.dev.port()}, - ) - -@hm.deploy("greeter") -def greeter(hello: hm.Dep[hm.Deployment]) -> hm.Deployment: - return hm.dev.deploy( - image="python:3.12-alpine", - cmd=["python", "-m", "http.server", "5678"], - port_mapping={5678: hm.dev.port()}, - env={"HELLO_HOST": hello.name}, - ) -``` - -Public surface: - -```python -hm.deploy(slug=None, *, name=None) # decorator -hm.Dep[T] # PEP-593 fixture marker -hm.Deployment # abstract dataclass - -hm.dev.deploy(*, image=None, from_=None, cmd=None, - port_mapping=None, env=None, - volumes=None, workdir=None) # -> LocalDeployment -hm.dev.port() # OS-assigned host port sentinel -hm.dev.LocalDeployment # concrete subclass -hm.dev.dump_registry_json(*, worktree_root) # -> v0 JSON -``` - -`hm.dev.port()` is only valid as a value in `port_mapping`. The host -port is assigned by Docker (via `-p :`) at `hm dev up` -time; query it from another terminal with `hm dev port-of -`. Ports are fresh on every `hm dev up`. - -The Rust CLI (`hm dev up`) shells out to `python -m harmont.dev ---dump-registry` to obtain the registry JSON. Schema is at -`docs/superpowers/specs/2026-05-21-hm-dev-deploy-design.md` § 1. - -## Cache keys - -`harmont.keygen.resolve_pipeline_keys` ports the algorithm previously -implemented in Scheme. `pipeline_to_json` calls it before -serialization, so every step whose policy is not `none` has a -deterministic `cache.key` baked into the wire-format JSON. - -## Snapshot lineage: `builds_in`, `image`, `default_image` - -A chain edge — `parent.sh(cmd, ...)` — emits `builds_in: ""` in the v0 IR JSON. The edge encodes both synchronisation (the -planner waits for the parent) and state inheritance (the local -executor reuses the parent's container; the cloud planner boots from -its snapshot). - -A step rooted at `scratch()` has `builds_in: null`. It boots from -`image="..."` locally (or the pipeline's `default_image`). The cloud -planner ignores `image`/`default_image` (it always boots from the -Freestyle base image). diff --git a/dsls/harmont-py/README.md b/dsls/harmont-py/README.md deleted file mode 100644 index 56f0ab9..0000000 --- a/dsls/harmont-py/README.md +++ /dev/null @@ -1,168 +0,0 @@ -# harmont-py - -[![license](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) - -Python DSL for defining [Harmont](https://harmont.dev) CI pipelines. - -Pipelines are chains of shell commands, branched with `.fork()`, synchronized with `hm.wait()`, registered with a decorator, and rendered to a JSON IR. The companion [`harmont-cli`](https://github.com/harmont-dev/harmont-cli) consumes that IR and runs the pipeline locally in Docker or on the hosted Harmont cloud. - -The package installs as `harmont` and you import it as `harmont`: - -```python -import harmont as hm -``` - -## Quick start - -### 1. Write a pipeline - -A pipeline file lives at `.harmont/.py` in your repo: - -```python -import harmont as hm - - -@hm.pipeline("hello") -def hello() -> hm.Step: - return ( - hm.sh("echo 'hello from harmont'", label="hello") - .sh("uname -a", label="env") - ) -``` - -### 2. Install - -Not yet on PyPI. Install from source (Python 3.11+): - -```sh -git clone https://github.com/harmont-dev/harmont-py -cd harmont-py -pip install -e . -``` - -If you arrived here from the [`harmont-cli`](https://github.com/harmont-dev/harmont-cli) Quick start, you already did this — skip to Step 3. - -Development extras (pytest, mypy, ruff): - -```sh -pip install -e '.[dev]' -``` - -### 3. Run - -Use the [Harmont CLI](https://github.com/harmont-dev/harmont-cli): - -```sh -hm run hello -``` - -`hm run` walks `.harmont/*.py`, imports each file (triggering the decorators), renders the registered pipeline to JSON, and executes it (locally in Docker by default, or against the cloud via `hm cloud run`). - -## DSL surface - -| Primitive | Returns | What it does | -|---|---|---| -| `hm.sh(cmd, cwd=..., label=...)` | `Step` | Start a chain in one call (= `hm.scratch().sh(cmd, ...)`) | -| `hm.scratch()` | `Step` | Empty root; chain with `.sh(...)` for an explicit start | -| `Step.sh(cmd, cwd=..., ...)` | `Step` | Run a shell command; chained `.sh` shares container state | -| `Step.fork(label=...)` | `Step` | Branch a shared base into parallel work | -| `hm.wait()` | `Step` | Explicit synchronization barrier | -| `@hm.target()` | decorator | Reusable, memoized building block | -| `@hm.pipeline("slug")` | decorator | Register a pipeline (multiple per file are fine) | -| `hm.pipeline(*leaves, env=..., default_image=...)` | `dict` | Factory form — build the v0 IR dict directly (used in tests) | - -Cache policies (`hm.ttl`, `hm.on_change`, `hm.forever`, `hm.compose`), triggers (`hm.push`, `hm.pull_request`, `hm.schedule`), and matrix axes are documented in the module docstrings; start at `harmont/__init__.py`. - -## Language toolchains - -`harmont` ships first-class wrappers for the common toolchains. Each exposes the actions that make sense for that ecosystem (e.g. `.build()`, `.test()`, `.clippy()`, `.fmt()` for Rust; `.test()`, `.lint()`, `.fmt()`, `.typecheck()` for Python): - -| Call | Project type | -|---|---| -| `hm.rust(path=..., version="stable")` | cargo + clippy + rustfmt | -| `hm.haskell(ghc="9.6.7", cabal="latest")` | cabal (call `.cabal(path)` to build a package) | -| `hm.python(path=..., uv_version="latest")` | uv-based Python project | -| `hm.go(path=..., version="1.23.2")` | go build/test/vet/fmt | -| `hm.npm(path=..., version="20")` | npm + arbitrary scripts | -| `hm.gradle(path=..., jdk="21", kotlin=False)` | Java or Kotlin via Gradle | -| `hm.cmake(path=..., lang="c"\|"cpp")` | C/C++ via CMake + CTest | -| `hm.dotnet(path=..., channel="8.0")` | .NET via dotnet CLI | -| `hm.ruby(path=..., version="default")` | Bundler + Rake | -| `hm.ocaml(path=..., compiler="5.1.1")` | opam + Dune | -| `hm.zig(path=..., version="0.13.0")` | zig build/test/fmt | -| `hm.perl(path=...)` | cpanm + prove | -| `hm.composer(path=..., laravel=False)` | PHP / Laravel via Composer | -| `hm.elm(path=..., elm_version="0.19.1")` | Elm | - -Working examples for each toolchain live in [`harmont-cli/examples/`](https://github.com/harmont-dev/harmont-cli/tree/main/examples). - -## Composing with targets - -For larger pipelines, factor toolchain setup into `@hm.target()` and let pipelines depend on them by parameter name. `Target[T]` and `Annotated[Step, BaseImage("...")]` are typed markers that unwrap cleanly under mypy and pyright. - -```python -from typing import Annotated - -import harmont as hm -from harmont.haskell import HaskellPackage, HaskellToolchain - - -@hm.target() -def apt_base(base: Annotated[hm.Step, hm.BaseImage("ubuntu-24.04")]) -> hm.Step: - return base.sh("apt-get update").sh("apt-get install -y python3") - - -@hm.target() -def ghc() -> HaskellToolchain: - return hm.haskell(ghc="9.6.7") - - -@hm.target() -def api(ghc: hm.Target[HaskellToolchain]) -> HaskellPackage: - return ghc.cabal(path="api") - - -@hm.pipeline("ci") -def ci( - apt_base: hm.Target[hm.Step], - api: hm.Target[HaskellPackage], -) -> tuple[hm.Step, ...]: - return (apt_base.sh("./run-smoke"), api) -``` - -Every fixture parameter must carry a marker or default value; unmarked parameters raise at decoration time. Memoization scope is one `dump_registry_json` render, so two targets that depend on the same `apt_base` share a single step. - -
-How rendering works - -`hm.sh(...).sh(...)` builds a chain of frozen `Step` dataclasses. Each `.sh()` returns a new `Step` carrying the parent reference. The `hm.pipeline()` factory walks back from each leaf, topo-sorts, and emits a `version: "0"` IR dict matching the schema in `harmont-pipeline` (Haskell side). - -When used as a decorator, `@hm.pipeline("slug")` registers the wrapped function with a module-level registry. `hm.dump_registry_json()` walks every `.harmont/*.py`, imports each (which triggers the decorators), and returns the full envelope. - -A chain edge — `parent.sh(cmd, ...)` — emits `builds_in: ""` in the v0 IR JSON. The edge encodes synchronisation and state inheritance: the local executor reuses the parent's container; the cloud planner boots from its snapshot. A step rooted at `scratch()` has `builds_in: null` and boots from `image="..."` (or the pipeline's `default_image`) locally; the cloud planner ignores `image` (it always boots from the Freestyle base). - -The JSON wire format and cache-key algorithm are stable; see module docstrings under `harmont/` for the contract. - -
- -## Build & test - -```sh -python3 -m venv .venv && source .venv/bin/activate -pip install -e '.[dev]' - -pytest # all tests -pytest -v --tb=short -mypy --strict harmont -ruff check . -``` - -`pytest` is configured to treat warnings as errors (`filterwarnings = ["error"]`). - -## See also - -- [`harmont-cli`](https://github.com/harmont-dev/harmont-cli) — the CLI that runs pipelines defined with this package (`hm run`). - -## License - -MIT. See [`LICENSE`](LICENSE). diff --git a/dsls/harmont-py/RELEASING.md b/dsls/harmont-py/RELEASING.md deleted file mode 100644 index 1c3ded7..0000000 --- a/dsls/harmont-py/RELEASING.md +++ /dev/null @@ -1,37 +0,0 @@ -# Releasing harmont (Python DSL) - -The `harmont` Python package lives at `dsls/harmont-py/` in the -harmont-cli monorepo. It is published to PyPI alongside the Rust -crates when a version tag is pushed. - -## Cutting a release - -Releases are driven by git tags on this repo. The release workflow -(`.github/workflows/release.yml`) triggers on any tag matching `v*`, -seds the version into `dsls/harmont-py/pyproject.toml`, builds the -sdist and wheel, and publishes to PyPI via Trusted Publishing (OIDC). - -1. Tag from the repo root: - - ```sh - git tag v - git push origin v - ``` - -2. Watch the run: - - ```sh - gh run watch \ - "$(gh run list --workflow release.yml --limit 1 --json databaseId --jq '.[0].databaseId')" \ - --exit-status - ``` - -3. Confirm on . - -## PyPI Trusted Publisher setup - -Configure on : -- Owner: `harmont-dev` -- Repository: `harmont-cli` (this repo, not the archived harmont-py) -- Workflow filename: `release.yml` -- Environment: `release` From 1f11da68aa9a9e24b432494c949c426966d4db1d Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 19:34:05 -0700 Subject: [PATCH 29/40] feat: add dag() accessor to PipelineGraph --- crates/hm-pipeline-ir/src/graph.rs | 37 ++++++++++++---------- crates/hm-pipeline-ir/tests/graph_serde.rs | 6 ++++ 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/crates/hm-pipeline-ir/src/graph.rs b/crates/hm-pipeline-ir/src/graph.rs index e2af6e0..be3e30c 100644 --- a/crates/hm-pipeline-ir/src/graph.rs +++ b/crates/hm-pipeline-ir/src/graph.rs @@ -53,7 +53,7 @@ pub struct PipelineGraph { #[serde(default, skip_serializing_if = "Option::is_none")] default_image: Option, #[serde(rename = "graph")] - dag: Dag, + inner: Dag, } fn default_version() -> String { @@ -61,9 +61,14 @@ fn default_version() -> String { } impl PipelineGraph { + #[must_use] + pub fn dag(&self) -> &Dag { + &self.inner + } + #[must_use] pub fn node_count(&self) -> usize { - self.dag.node_count() + self.inner.node_count() } #[must_use] @@ -73,12 +78,12 @@ impl PipelineGraph { #[must_use] pub fn get_transition(&self, idx: NodeIndex) -> &Transition { - &self.dag[idx] + &self.inner[idx] } #[must_use] pub fn node_index_by_key(&self, key: &str) -> Option { - self.dag + self.inner .graph() .node_references() .find(|(_, w)| w.step.key == key) @@ -87,37 +92,37 @@ impl PipelineGraph { #[must_use] pub fn parent_keys(&self, idx: NodeIndex) -> Vec { - self.dag + self.inner .parents(idx) - .iter(&self.dag) - .map(|(_, parent_idx)| self.dag[parent_idx].step.key.clone()) + .iter(&self.inner) + .map(|(_, parent_idx)| self.inner[parent_idx].step.key.clone()) .collect() } #[must_use] pub fn builds_in_parent(&self, idx: NodeIndex) -> Option { - self.dag + self.inner .parents(idx) - .iter(&self.dag) - .find(|(e, _)| self.dag.edge_weight(*e).copied() == Some(EdgeKind::BuildsIn)) + .iter(&self.inner) + .find(|(e, _)| self.inner.edge_weight(*e).copied() == Some(EdgeKind::BuildsIn)) .map(|(_, parent_idx)| parent_idx) } #[must_use] pub fn builds_in_children(&self, idx: NodeIndex) -> Vec { - self.dag + self.inner .children(idx) - .iter(&self.dag) - .filter(|(e, _)| self.dag.edge_weight(*e).copied() == Some(EdgeKind::BuildsIn)) + .iter(&self.inner) + .filter(|(e, _)| self.inner.edge_weight(*e).copied() == Some(EdgeKind::BuildsIn)) .map(|(_, child_idx)| child_idx) .collect() } #[must_use] pub fn all_parents(&self, idx: NodeIndex) -> Vec { - self.dag + self.inner .parents(idx) - .iter(&self.dag) + .iter(&self.inner) .map(|(_, parent_idx)| parent_idx) .collect() } @@ -131,7 +136,7 @@ impl PipelineGraph { #[must_use] pub fn chains(&self) -> Vec> { - let mut indices: Vec = self.dag.graph().node_indices().collect(); + let mut indices: Vec = self.inner.graph().node_indices().collect(); indices.sort(); indices .into_iter() diff --git a/crates/hm-pipeline-ir/tests/graph_serde.rs b/crates/hm-pipeline-ir/tests/graph_serde.rs index 0bc6310..55e03f2 100644 --- a/crates/hm-pipeline-ir/tests/graph_serde.rs +++ b/crates/hm-pipeline-ir/tests/graph_serde.rs @@ -80,6 +80,12 @@ fn pipeline_graph_round_trips_through_json() { assert!(back.builds_in_parent(b).is_some()); } +#[test] +fn dag_accessor_exposes_node_count() { + let g = build_test_graph(); + assert_eq!(g.dag().node_count(), 3); +} + #[test] fn pipeline_graph_snapshot() { let g = build_test_graph(); From 4c7b61eacec3fe9f557bdd3010df0ed9d67acf2b Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 19:35:56 -0700 Subject: [PATCH 30/40] feat: add StepOutcome type and compute_chain_info helper --- crates/hm/src/orchestrator/scheduler.rs | 89 ++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/crates/hm/src/orchestrator/scheduler.rs b/crates/hm/src/orchestrator/scheduler.rs index 5fd426d..df55dc5 100644 --- a/crates/hm/src/orchestrator/scheduler.rs +++ b/crates/hm/src/orchestrator/scheduler.rs @@ -29,7 +29,7 @@ use std::path::PathBuf; use std::sync::Arc; use std::time::Instant; -use daggy::NodeIndex; +use daggy::{Dag, NodeIndex, Walker}; use anyhow::{Context, Result}; use hm_plugin_protocol::{ @@ -40,7 +40,7 @@ use uuid::Uuid; use crate::error::HmError; use crate::orchestrator::docker_client::DockerClient; -use crate::orchestrator::graph::Graph; +use crate::orchestrator::graph::{EdgeKind, Graph, Transition}; use crate::orchestrator::source::build_archive_bytes; use crate::plugin::{PluginRegistry, RegistryConfig}; @@ -50,6 +50,15 @@ use tokio_util::sync::CancellationToken; use super::events::EventBus; use super::state::{self, OrchestratorState}; +/// Outcome of a single step execution, used by the upcoming dataflow +/// scheduler to propagate exit codes and snapshot lineage. +#[derive(Clone)] +#[allow(dead_code)] +struct StepOutcome { + exit_code: i32, + snapshot: Option, +} + /// Entry point: run a parsed pipeline locally end-to-end. Returns /// the overall exit code (0 = success, [`crate::error::EXIT_BUILD_FAILED`] /// when any step exited non-zero). @@ -436,3 +445,79 @@ async fn run_chain( } Ok(0) } + +/// Per-node chain membership used for event enrichment. Maps every +/// node in the DAG to (chain_id, position_within_chain). +#[allow(dead_code)] +struct ChainInfo { + chain_count: usize, + node_chain_id: HashMap, + node_chain_pos: HashMap, +} + +/// Walk the DAG and assign each node to a linear chain. A chain starts +/// at any node not yet assigned and extends forward through single +/// `BuildsIn` children where the child has exactly one parent total. +/// This mirrors `PipelineGraph::chains()` but lives as a free function +/// operating on the raw `Dag`. +#[allow(dead_code)] +fn compute_chain_info(dag: &Dag) -> ChainInfo { + let mut node_chain_id: HashMap = HashMap::new(); + let mut node_chain_pos: HashMap = HashMap::new(); + let mut chain_count: usize = 0; + + // Walk nodes in index order. + let mut indices: Vec = dag.graph().node_indices().collect(); + indices.sort(); + + for idx in indices { + if node_chain_id.contains_key(&idx) { + continue; + } + + // Start a new chain rooted at this unvisited node. + let chain_id = chain_count; + chain_count += 1; + + let mut cur = idx; + let mut pos: usize = 0; + loop { + node_chain_id.insert(cur, chain_id); + node_chain_pos.insert(cur, pos); + pos += 1; + + // Collect BuildsIn children of `cur`. + let builds_in_children: Vec = dag + .children(cur) + .iter(dag) + .filter(|(e, _)| dag.edge_weight(*e).copied() == Some(EdgeKind::BuildsIn)) + .map(|(_, child)| child) + .collect(); + + // Follow the chain only if there's exactly one BuildsIn child... + if builds_in_children.len() != 1 { + break; + } + let child = builds_in_children[0]; + + // ...that hasn't been assigned yet... + if node_chain_id.contains_key(&child) { + break; + } + + // ...and that child has exactly one parent total. + let parent_count = dag.parents(child).iter(dag).count(); + if parent_count != 1 { + break; + } + + cur = child; + } + } + + ChainInfo { + chain_count, + node_chain_id, + node_chain_pos, + } +} From 84792dc25db84213a6cf9c10589416420a39f177 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 19:37:56 -0700 Subject: [PATCH 31/40] feat: extract execute_step from run_chain --- crates/hm/src/orchestrator/scheduler.rs | 147 ++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/crates/hm/src/orchestrator/scheduler.rs b/crates/hm/src/orchestrator/scheduler.rs index df55dc5..8e2f995 100644 --- a/crates/hm/src/orchestrator/scheduler.rs +++ b/crates/hm/src/orchestrator/scheduler.rs @@ -446,6 +446,153 @@ async fn run_chain( Ok(0) } +/// Execute a single step, returning its outcome (exit code + snapshot). +/// +/// This is the single-step analogue of the per-iteration body inside +/// [`run_chain`]. The upcoming dataflow scheduler (which dispatches +/// nodes in topological order rather than chain order) will call this +/// once per node. +/// +/// On cache hit the function returns early with exit code 0 and the +/// cached snapshot so downstream nodes receive the correct +/// `parent_snapshot` without running the plugin at all. +/// +/// On non-zero exit the cancellation token is cancelled so sibling +/// tasks observe the failure promptly. +#[allow(dead_code, clippy::too_many_arguments)] +async fn execute_step( + node_idx: NodeIndex, + transition: Transition, + parent_snapshot: Option, + chain_id: usize, + chain_pos: usize, + archive_id: ArchiveId, + run_id: Uuid, + registry: Arc>, + bus: Arc, + cancel: CancellationToken, +) -> Result { + let step_wire = transition.step; + let step_key = step_wire.key.clone(); + let env_map = transition.env; + let step_id = Uuid::new_v4(); + + bus.emit(BuildEvent::StepQueued { + step_id, + key: step_key.clone(), + chain_idx: chain_pos, + }); + + // Decide cache outcome host-side. + let decision = { + let s = state::current().context("no orchestrator state")?; + cache::decide(&s.docker, &step_wire).await? + }; + if let hm_plugin_protocol::CacheDecision::Hit { tag } = &decision { + bus.emit(BuildEvent::StepCacheHit { + step_id, + key: step_wire + .cache + .as_ref() + .and_then(|c| c.key.clone()) + .unwrap_or_default(), + tag: tag.0.clone(), + }); + // Short-circuit: the cached image already exists locally, so + // there is nothing for the executor plugin to do. Return the + // snapshot so downstream nodes can use it as their parent. + return Ok(StepOutcome { + exit_code: 0, + snapshot: Some(tag.clone()), + }); + } + + let input = ExecutorInput { + step: step_wire, + workspace_archive_id: archive_id, + env: env_map, + workdir: "/workspace".to_string(), + run_id, + step_id, + cache_lookup: decision, + parent_snapshot, + }; + + // Resolve the runner plugin name. Steps that didn't declare a + // runner fall back to whichever plugin registered as + // `default: true` (docker, in the embedded binary). + let runner = if let Some(name) = input.step.runner.clone() { + name + } else { + let reg = registry.lock().await; + reg.default_runner_name() + .map_or_else(|| "docker".into(), str::to_string) + }; + let started = Instant::now(); + bus.emit(BuildEvent::StepStart { + step_id, + runner: runner.clone(), + image: input.step.image.clone(), + }); + + // Dispatch to the runner-named plugin. Look up the Arc under the + // registry lock, drop the lock BEFORE awaiting so other tasks can + // dispatch concurrently. + let plugin = { + let reg = registry.lock().await; + let idx = reg + .runner_index + .get(&runner) + .copied() + .or(reg.default_runner) + .ok_or_else(|| HmError::UnknownRunner { + step_key: input.step.key.clone(), + runner: runner.clone(), + available: reg.runner_index.keys().cloned().collect(), + })?; + reg.get(idx).context("plugin moved away under us")? + }; + crate::plugin::host_fns::set_current_step_id(step_id); + let result: Result = plugin.call_capability("hm_executor_run", &input).await; + crate::plugin::host_fns::clear_current_step_id(); + + let dur_ms = started.elapsed().as_millis() as u64; + match result { + Ok(sr) => { + bus.emit(BuildEvent::StepEnd { + step_id, + exit_code: sr.exit_code, + duration_ms: dur_ms, + snapshot: sr.committed_snapshot.clone(), + }); + if sr.exit_code != 0 { + bus.emit(BuildEvent::ChainFailed { + chain_idx: chain_id, + failed_step_id: step_id, + failed_step_key: step_key.clone(), + exit_code: sr.exit_code, + message: format!("step '{}' exited with code {}", step_key, sr.exit_code), + ts: chrono::Utc::now(), + }); + cancel.cancel(); + } + Ok(StepOutcome { + exit_code: sr.exit_code, + snapshot: sr.committed_snapshot, + }) + } + Err(e) => { + bus.emit(BuildEvent::StepEnd { + step_id, + exit_code: 1, + duration_ms: dur_ms, + snapshot: None, + }); + Err(e) + } + } +} + /// Per-node chain membership used for event enrichment. Maps every /// node in the DAG to (chain_id, position_within_chain). #[allow(dead_code)] From 06a88be657a565a57fd6d9b0dbd974617af1e86b Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 19:44:40 -0700 Subject: [PATCH 32/40] refactor: replace chain-based scheduling with topological dataflow --- crates/hm/src/orchestrator/scheduler.rs | 351 ++++++------------------ 1 file changed, 87 insertions(+), 264 deletions(-) diff --git a/crates/hm/src/orchestrator/scheduler.rs b/crates/hm/src/orchestrator/scheduler.rs index 8e2f995..3aca52c 100644 --- a/crates/hm/src/orchestrator/scheduler.rs +++ b/crates/hm/src/orchestrator/scheduler.rs @@ -24,12 +24,14 @@ clippy::significant_drop_tightening )] -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; use std::time::Instant; use daggy::{Dag, NodeIndex, Walker}; +use daggy::petgraph::algo::toposort; +use futures::future::{BoxFuture, FutureExt, join_all}; use anyhow::{Context, Result}; use hm_plugin_protocol::{ @@ -40,7 +42,7 @@ use uuid::Uuid; use crate::error::HmError; use crate::orchestrator::docker_client::DockerClient; -use crate::orchestrator::graph::{EdgeKind, Graph, Transition}; +use crate::orchestrator::graph::{EdgeKind, Transition}; use crate::orchestrator::source::build_archive_bytes; use crate::plugin::{PluginRegistry, RegistryConfig}; @@ -53,7 +55,6 @@ use super::state::{self, OrchestratorState}; /// Outcome of a single step execution, used by the upcoming dataflow /// scheduler to propagate exit codes and snapshot lineage. #[derive(Clone)] -#[allow(dead_code)] struct StepOutcome { exit_code: i32, snapshot: Option, @@ -74,9 +75,6 @@ pub async fn run( parallelism: usize, format_name: String, ) -> Result { - let chains = graph.chains(); - let chain_deps = graph.chain_deps(&chains); - // Set up per-run state. let bus = EventBus::new(); let archives = ArchiveStore::new(); @@ -161,118 +159,111 @@ pub async fn run( let semaphore = Arc::new(tokio::sync::Semaphore::new(parallelism)); - // Cross-chain snapshot lineage. When a step completes, we stash - // its `committed_snapshot` under its node index. A fork-child - // chain looks up its `builds_in` parent here to know what base - // image to boot from. Mirrors legacy `SharedState::node_image`. - let node_image: Arc>> = Arc::new(Mutex::new(HashMap::new())); - // Spawn the output subscriber. Dispatches every BuildEvent to the // selected output-formatter plugin (default: `human`). let sink_handle = super::output_subscriber::spawn(bus.clone(), registry.clone(), format_name.clone()); - // Announce build start. + // ── dataflow scheduling ────────────────────────────────────── + + let dag = graph.dag(); + let chain_info = compute_chain_info(dag); + + let order = toposort(dag.graph(), None) + .map_err(|c| anyhow::anyhow!("pipeline graph has a cycle at {:?}", c.node_id()))?; + let started_at = chrono::Utc::now(); - let plan_summary = PlanSummary { - step_count: graph.node_count(), - chain_count: chains.len(), - default_runner: "docker".into(), - }; bus.emit(BuildEvent::BuildStart { run_id, - plan: plan_summary, + plan: PlanSummary { + step_count: graph.node_count(), + chain_count: chain_info.chain_count, + default_runner: "docker".into(), + }, started_at, }); - // Schedule chains. Each chain runs sequentially internally; chains - // run concurrently subject to the semaphore and the chain_deps DAG. let started_total = Instant::now(); - let mut overall = 0i32; - let mut completed: HashSet = HashSet::new(); - let mut pending: Vec = (0..chains.len()).collect(); - let mut in_flight: tokio::task::JoinSet<(usize, Result)> = tokio::task::JoinSet::new(); - - loop { - // Spawn ready chains. - let mut still_pending = Vec::with_capacity(pending.len()); - for ci in std::mem::take(&mut pending) { - let ready = chain_deps[ci].iter().all(|d| completed.contains(d)); - if !ready { - still_pending.push(ci); - continue; - } - let semaphore = semaphore.clone(); - let registry = registry.clone(); - let graph = graph.clone(); - let cancel = cancel.clone(); - let chain_nodes = chains[ci].clone(); - let bus = bus.clone(); - let node_image = node_image.clone(); - in_flight.spawn(async move { - let _permit = semaphore.acquire_owned().await.expect("semaphore"); - if cancel.is_cancelled() { - return (ci, Ok(0)); - } - let rc = run_chain( - ci, - &graph, - &chain_nodes, - archive_id, - run_id, - ®istry, - &bus, - &cancel, - &node_image, - ) - .await; - (ci, rc) - }); - } - pending = still_pending; - - if in_flight.is_empty() { - break; - } - match in_flight.join_next().await { - Some(Ok((ci, Ok(0)))) => { - completed.insert(ci); + type StepFuture = futures::future::Shared>; + let mut done: HashMap = HashMap::new(); + + for &n in &order { + let preds: Vec<(EdgeKind, StepFuture)> = dag + .parents(n) + .iter(dag) + .map(|(e, p)| (*dag.edge_weight(e).unwrap(), done[&p].clone())) + .collect(); + + let transition = dag[n].clone(); + let chain_id = chain_info.node_chain_id[&n]; + let chain_pos = chain_info.node_chain_pos[&n]; + let sem = semaphore.clone(); + let reg = registry.clone(); + let bus = bus.clone(); + let cancel = cancel.clone(); + + let fut: StepFuture = async move { + // Await all predecessors. + let pred_outcomes: Vec = + join_all(preds.iter().map(|(_, f)| f.clone())).await; + + // Early exit on cancellation or predecessor failure. + if cancel.is_cancelled() { + return StepOutcome { exit_code: 0, snapshot: None }; } - Some(Ok((ci, Ok(_rc)))) => { - overall = crate::error::EXIT_BUILD_FAILED; - cancel.cancel(); - completed.insert(ci); - // ChainFailed already emitted by run_chain; no stderr write here. - } - Some(Ok((_, Err(e)))) => { - cancel.cancel(); - bus.emit(BuildEvent::BuildEnd { - exit_code: crate::error::EXIT_BUILD_FAILED, - duration_ms: started_total.elapsed().as_millis() as u64, - }); - return Err(e); - } - Some(Err(je)) => { - cancel.cancel(); - bus.emit(BuildEvent::BuildEnd { - exit_code: crate::error::EXIT_BUILD_FAILED, - duration_ms: started_total.elapsed().as_millis() as u64, - }); - return Err(anyhow::anyhow!("chain task panicked: {je}")); + + // Acquire parallelism permit. + let _permit = sem + .acquire_owned() + .await + .expect("semaphore closed unexpectedly"); + + // Find the BuildsIn parent's snapshot for container lineage. + let parent_snapshot = preds + .iter() + .zip(&pred_outcomes) + .find(|((ek, _), _)| *ek == EdgeKind::BuildsIn) + .and_then(|(_, outcome)| outcome.snapshot.clone()); + + match execute_step( + n, + transition, + parent_snapshot, + chain_id, + chain_pos, + archive_id, + run_id, + reg, + bus, + cancel, + ) + .await + { + Ok(outcome) => outcome, + Err(_) => StepOutcome { exit_code: 1, snapshot: None }, } - None => break, } + .boxed() + .shared(); + + tokio::spawn(fut.clone()); + done.insert(n, fut); } + let outcomes: Vec = join_all(done.into_values()).await; + let overall = if outcomes.iter().any(|o| o.exit_code != 0) { + crate::error::EXIT_BUILD_FAILED + } else { + 0 + }; + let dur = started_total.elapsed().as_millis() as u64; bus.emit(BuildEvent::BuildEnd { exit_code: overall, duration_ms: dur, }); - // Wait briefly for the sink to drain the BuildEnd event. It exits - // when it sees BuildEnd, so this completes quickly. let _ = tokio::time::timeout(std::time::Duration::from_secs(2), sink_handle).await; state::clear(); @@ -280,172 +271,6 @@ pub async fn run( Ok(overall) } -/// Drive one chain end-to-end. Each step within a chain runs -/// sequentially, with the previous step's snapshot becoming the next -/// step's `parent_snapshot` input. -/// -/// `node_image` is the cross-chain lineage map: when this chain's -/// root is a fork-child (its `builds_in` parent lives in another -/// chain), we look up the parent's committed snapshot there to seed -/// our initial `parent_snapshot`. Each step we run records its -/// committed snapshot back so downstream fork-children can find it. -#[allow( - clippy::too_many_arguments, - reason = "tightly-coupled per-run state — splitting into a struct would just rename the bag" -)] -async fn run_chain( - chain_idx: usize, - graph: &Graph, - chain_nodes: &[NodeIndex], - archive_id: ArchiveId, - run_id: Uuid, - registry: &Arc>, - bus: &Arc, - cancel: &CancellationToken, - node_image: &Arc>>, -) -> Result { - // Seed from the cross-chain lineage map: if this chain's root has - // a `builds_in` parent that already committed a snapshot, boot - // from it. Otherwise this is a chain-root proper and starts from - // the step's image. - let chain_root = chain_nodes[0]; - let mut parent_snapshot: Option = { - let g = node_image.lock().await; - graph.builds_in_parent(chain_root) - .and_then(|p| g.get(&p).cloned()) - }; - - for (pos, &i) in chain_nodes.iter().enumerate() { - if cancel.is_cancelled() { - return Ok(0); - } - let t = graph.get_transition(i); - let step_wire = t.step.clone(); - let step_key = step_wire.key.clone(); - let env_map: std::collections::BTreeMap = t.env.clone(); - let step_id = Uuid::new_v4(); - - bus.emit(BuildEvent::StepQueued { - step_id, - key: step_key.clone(), - chain_idx: pos, - }); - - // Decide cache outcome host-side. - let decision = { - let s = state::current().context("no orchestrator state")?; - cache::decide(&s.docker, &step_wire).await? - }; - if let hm_plugin_protocol::CacheDecision::Hit { tag } = &decision { - bus.emit(BuildEvent::StepCacheHit { - step_id, - key: step_wire - .cache - .as_ref() - .and_then(|c| c.key.clone()) - .unwrap_or_default(), - tag: tag.0.clone(), - }); - } - - let input = ExecutorInput { - step: step_wire, - workspace_archive_id: archive_id, - env: env_map, - workdir: "/workspace".to_string(), - run_id, - step_id, - cache_lookup: decision, - parent_snapshot: parent_snapshot.clone(), - }; - - // `input.step.runner` is the IR field as-declared. Steps that - // didn't declare a runner fall back to whichever plugin - // registered as `default: true` (docker, in the embedded - // binary). The hardcoded `"docker"` is only a last-resort - // fallback when no plugin claims default — practically - // unreachable, but cheap to keep so the dispatch lookup below - // still has a string to look up. - let runner = if let Some(name) = input.step.runner.clone() { - name - } else { - let reg = registry.lock().await; - reg.default_runner_name() - .map_or_else(|| "docker".into(), str::to_string) - }; - let started = Instant::now(); - bus.emit(BuildEvent::StepStart { - step_id, - runner: runner.clone(), - image: input.step.image.clone(), - }); - - // Dispatch to the runner-named plugin. Look up the Arc under - // the registry lock, drop the lock BEFORE awaiting so other - // chains can dispatch concurrently — the per-plugin pool - // serialises (or parallelises, up to its capacity) calls - // internally. - let plugin = { - let reg = registry.lock().await; - let idx = reg - .runner_index - .get(&runner) - .copied() - .or(reg.default_runner) - .ok_or_else(|| HmError::UnknownRunner { - step_key: input.step.key.clone(), - runner: runner.clone(), - available: reg.runner_index.keys().cloned().collect(), - })?; - reg.get(idx).context("plugin moved away under us")? - }; - crate::plugin::host_fns::set_current_step_id(step_id); - let result: Result = plugin.call_capability("hm_executor_run", &input).await; - crate::plugin::host_fns::clear_current_step_id(); - - let dur_ms = started.elapsed().as_millis() as u64; - match result { - Ok(sr) => { - bus.emit(BuildEvent::StepEnd { - step_id, - exit_code: sr.exit_code, - duration_ms: dur_ms, - snapshot: sr.committed_snapshot.clone(), - }); - // Publish this step's committed snapshot to the - // cross-chain map so fork-children rooted at this - // node can boot from it. - if let Some(snap) = sr.committed_snapshot.clone() { - let mut g = node_image.lock().await; - g.insert(i, snap); - } - parent_snapshot = sr.committed_snapshot; - if sr.exit_code != 0 { - bus.emit(BuildEvent::ChainFailed { - chain_idx, - failed_step_id: step_id, - failed_step_key: step_key.clone(), - exit_code: sr.exit_code, - message: format!("step '{}' exited with code {}", step_key, sr.exit_code), - ts: chrono::Utc::now(), - }); - return Ok(sr.exit_code); - } - } - Err(e) => { - bus.emit(BuildEvent::StepEnd { - step_id, - exit_code: 1, - duration_ms: dur_ms, - snapshot: None, - }); - return Err(e); - } - } - } - Ok(0) -} - /// Execute a single step, returning its outcome (exit code + snapshot). /// /// This is the single-step analogue of the per-iteration body inside @@ -459,9 +284,9 @@ async fn run_chain( /// /// On non-zero exit the cancellation token is cancelled so sibling /// tasks observe the failure promptly. -#[allow(dead_code, clippy::too_many_arguments)] +#[allow(clippy::too_many_arguments)] async fn execute_step( - node_idx: NodeIndex, + _node_idx: NodeIndex, transition: Transition, parent_snapshot: Option, chain_id: usize, @@ -595,7 +420,6 @@ async fn execute_step( /// Per-node chain membership used for event enrichment. Maps every /// node in the DAG to (chain_id, position_within_chain). -#[allow(dead_code)] struct ChainInfo { chain_count: usize, node_chain_id: HashMap, @@ -607,7 +431,6 @@ struct ChainInfo { /// `BuildsIn` children where the child has exactly one parent total. /// This mirrors `PipelineGraph::chains()` but lives as a free function /// operating on the raw `Dag`. -#[allow(dead_code)] fn compute_chain_info(dag: &Dag) -> ChainInfo { let mut node_chain_id: HashMap = HashMap::new(); let mut node_chain_pos: HashMap = HashMap::new(); From 0028aa60e4352ac8d1d68ac2e15b2f29a7444bbc Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 19:47:07 -0700 Subject: [PATCH 33/40] refactor: replace chain-based scheduling with topological dataflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Walk the DAG in topological order, spawning a Shared per node. Each future awaits its predecessors, acquires a parallelism permit, then dispatches to the executor plugin. Snapshot lineage flows through future results — no cross-chain map needed. Deletes run_chain(); the entire scheduling model is now ~50 lines in the dataflow loop. --- crates/hm/src/orchestrator/scheduler.rs | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/crates/hm/src/orchestrator/scheduler.rs b/crates/hm/src/orchestrator/scheduler.rs index 3aca52c..95881e5 100644 --- a/crates/hm/src/orchestrator/scheduler.rs +++ b/crates/hm/src/orchestrator/scheduler.rs @@ -1,5 +1,7 @@ -//! Chain-bounded scheduler. Dispatches each step to its registered -//! step-executor plugin (Docker by default) via the plugin host. +//! Dataflow scheduler. Walks the pipeline DAG in topological order, +//! spawning a shared future per step. Each future awaits its +//! predecessors, acquires a parallelism permit, and dispatches the +//! step to its registered executor plugin (Docker by default). // Pedantic-bucket nags accepted at module scope: // - `cast_possible_truncation`: every `as u64` here is a millisecond @@ -208,8 +210,10 @@ pub async fn run( let pred_outcomes: Vec = join_all(preds.iter().map(|(_, f)| f.clone())).await; - // Early exit on cancellation or predecessor failure. - if cancel.is_cancelled() { + // Early exit if any predecessor failed or the build was cancelled. + if cancel.is_cancelled() + || pred_outcomes.iter().any(|o| o.exit_code != 0) + { return StepOutcome { exit_code: 0, snapshot: None }; } @@ -241,7 +245,10 @@ pub async fn run( .await { Ok(outcome) => outcome, - Err(_) => StepOutcome { exit_code: 1, snapshot: None }, + Err(e) => { + tracing::error!(%e, "step execution failed"); + StepOutcome { exit_code: 1, snapshot: None } + } } } .boxed() @@ -273,11 +280,6 @@ pub async fn run( /// Execute a single step, returning its outcome (exit code + snapshot). /// -/// This is the single-step analogue of the per-iteration body inside -/// [`run_chain`]. The upcoming dataflow scheduler (which dispatches -/// nodes in topological order rather than chain order) will call this -/// once per node. -/// /// On cache hit the function returns early with exit code 0 and the /// cached snapshot so downstream nodes receive the correct /// `parent_snapshot` without running the plugin at all. From bdc5c3d3e50f1c78622299fd2888cd8b137c843e Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 19:48:10 -0700 Subject: [PATCH 34/40] refactor: strip PipelineGraph to dag(), node_count(), default_image() --- crates/hm-pipeline-ir/src/graph.rs | 107 +---------------------------- 1 file changed, 3 insertions(+), 104 deletions(-) diff --git a/crates/hm-pipeline-ir/src/graph.rs b/crates/hm-pipeline-ir/src/graph.rs index be3e30c..ffe2c7b 100644 --- a/crates/hm-pipeline-ir/src/graph.rs +++ b/crates/hm-pipeline-ir/src/graph.rs @@ -1,7 +1,6 @@ use std::collections::BTreeMap; -use daggy::petgraph::visit::IntoNodeReferences; -use daggy::{Dag, NodeIndex, Walker}; +use daggy::Dag; use schemars::JsonSchema as DeriveJsonSchema; use serde::{Deserialize, Serialize}; @@ -61,11 +60,6 @@ fn default_version() -> String { } impl PipelineGraph { - #[must_use] - pub fn dag(&self) -> &Dag { - &self.inner - } - #[must_use] pub fn node_count(&self) -> usize { self.inner.node_count() @@ -77,102 +71,7 @@ impl PipelineGraph { } #[must_use] - pub fn get_transition(&self, idx: NodeIndex) -> &Transition { - &self.inner[idx] - } - - #[must_use] - pub fn node_index_by_key(&self, key: &str) -> Option { - self.inner - .graph() - .node_references() - .find(|(_, w)| w.step.key == key) - .map(|(idx, _)| idx) - } - - #[must_use] - pub fn parent_keys(&self, idx: NodeIndex) -> Vec { - self.inner - .parents(idx) - .iter(&self.inner) - .map(|(_, parent_idx)| self.inner[parent_idx].step.key.clone()) - .collect() - } - - #[must_use] - pub fn builds_in_parent(&self, idx: NodeIndex) -> Option { - self.inner - .parents(idx) - .iter(&self.inner) - .find(|(e, _)| self.inner.edge_weight(*e).copied() == Some(EdgeKind::BuildsIn)) - .map(|(_, parent_idx)| parent_idx) - } - - #[must_use] - pub fn builds_in_children(&self, idx: NodeIndex) -> Vec { - self.inner - .children(idx) - .iter(&self.inner) - .filter(|(e, _)| self.inner.edge_weight(*e).copied() == Some(EdgeKind::BuildsIn)) - .map(|(_, child_idx)| child_idx) - .collect() - } - - #[must_use] - pub fn all_parents(&self, idx: NodeIndex) -> Vec { - self.inner - .parents(idx) - .iter(&self.inner) - .map(|(_, parent_idx)| parent_idx) - .collect() - } - - #[must_use] - pub fn is_chain_step(&self, idx: NodeIndex) -> bool { - self.builds_in_parent(idx).is_some_and(|parent| { - self.builds_in_children(parent).len() == 1 && self.all_parents(idx).len() == 1 - }) - } - - #[must_use] - pub fn chains(&self) -> Vec> { - let mut indices: Vec = self.inner.graph().node_indices().collect(); - indices.sort(); - indices - .into_iter() - .filter(|&n| !self.is_chain_step(n)) - .map(|root| { - std::iter::successors(Some(root), |&cur| { - self.builds_in_children(cur) - .into_iter() - .find(|&c| self.is_chain_step(c)) - }) - .collect() - }) - .collect() - } - - #[must_use] - pub fn chain_deps(&self, chains: &[Vec]) -> Vec> { - let mut chain_index: BTreeMap = BTreeMap::new(); - for (ci, ch) in chains.iter().enumerate() { - for &n in ch { - chain_index.insert(n, ci); - } - } - let mut out: Vec> = vec![Vec::new(); chains.len()]; - for (ci, ch) in chains.iter().enumerate() { - let mut seen = std::collections::BTreeSet::new(); - for &n in ch { - for parent in self.all_parents(n) { - let dep_ci = chain_index[&parent]; - if dep_ci != ci { - seen.insert(dep_ci); - } - } - } - out[ci] = seen.into_iter().collect(); - } - out + pub fn dag(&self) -> &Dag { + &self.inner } } From 96eedf1e0b3f195f2bb146647792668ce412770a Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 19:52:04 -0700 Subject: [PATCH 35/40] test: update all tests for stripped PipelineGraph API --- crates/hm-pipeline-ir/tests/graph_build.rs | 185 +++---------------- crates/hm-pipeline-ir/tests/graph_serde.rs | 20 +- crates/hm/tests/default_image_inheritance.rs | 25 ++- crates/hm/tests/runner_dispatch.rs | 9 +- 4 files changed, 64 insertions(+), 175 deletions(-) diff --git a/crates/hm-pipeline-ir/tests/graph_build.rs b/crates/hm-pipeline-ir/tests/graph_build.rs index 0d89b37..3a1ba0f 100644 --- a/crates/hm-pipeline-ir/tests/graph_build.rs +++ b/crates/hm-pipeline-ir/tests/graph_build.rs @@ -6,12 +6,24 @@ clippy::panic )] +use daggy::Walker; +use daggy::petgraph::visit::IntoNodeReferences; use hm_pipeline_ir::PipelineGraph; fn graph(json: &[u8]) -> PipelineGraph { serde_json::from_slice(json).unwrap() } +fn find_by_key<'a>(g: &'a PipelineGraph, key: &str) -> &'a hm_pipeline_ir::Transition { + let dag = g.dag(); + let (_, t) = dag + .graph() + .node_references() + .find(|(_, t)| t.step.key == key) + .unwrap(); + t +} + #[test] fn builds_simple_chain() { let g = graph(br#"{ @@ -47,8 +59,8 @@ fn root_inherits_default_image() { "edges": [] } }"#); - let node = g.get_transition(g.node_index_by_key("a").unwrap()); - assert_eq!(node.step.image.as_deref(), Some("ubuntu:24.04")); + let t = find_by_key(&g, "a"); + assert_eq!(t.step.image.as_deref(), Some("ubuntu:24.04")); } #[test] @@ -67,7 +79,7 @@ fn child_does_not_inherit_default_image() { ] } }"#); - let b = g.get_transition(g.node_index_by_key("b").unwrap()); + let b = find_by_key(&g, "b"); assert!(b.step.image.is_none()); } @@ -88,161 +100,14 @@ fn wait_inserts_implicit_deps() { ] } }"#); - let c = g.node_index_by_key("c").unwrap(); - let parents = g.parent_keys(c); - assert!(parents.contains(&"a".to_string())); - assert!(parents.contains(&"b".to_string())); -} - -#[test] -fn chain_detection() { - let g = graph(br#"{ - "version": "0", - "default_image": "ubuntu:24.04", - "graph": { - "nodes": [ - {"step": {"key": "a", "cmd": "echo a", "image": "ubuntu:24.04"}, "env": {}}, - {"step": {"key": "b", "cmd": "echo b"}, "env": {}}, - {"step": {"key": "c", "cmd": "echo c"}, "env": {}} - ], - "edge_property": "directed", - "edges": [ - [0, 1, "builds_in"], - [1, 2, "builds_in"] - ] - } - }"#); - let a = g.node_index_by_key("a").unwrap(); - let b = g.node_index_by_key("b").unwrap(); - let c = g.node_index_by_key("c").unwrap(); - assert!(!g.is_chain_step(a)); - assert!(g.is_chain_step(b)); - assert!(g.is_chain_step(c)); -} - -#[test] -fn fork_breaks_chain() { - let g = graph(br#"{ - "version": "0", - "default_image": "ubuntu:24.04", - "graph": { - "nodes": [ - {"step": {"key": "a", "cmd": "echo a", "image": "ubuntu:24.04"}, "env": {}}, - {"step": {"key": "b", "cmd": "echo b"}, "env": {}}, - {"step": {"key": "c", "cmd": "echo c"}, "env": {}} - ], - "edge_property": "directed", - "edges": [ - [0, 1, "builds_in"], - [0, 2, "builds_in"] - ] - } - }"#); - let b = g.node_index_by_key("b").unwrap(); - let c = g.node_index_by_key("c").unwrap(); - assert!(!g.is_chain_step(b)); - assert!(!g.is_chain_step(c)); -} - -#[test] -fn chains_partition_includes_every_node_once() { - let g = graph(br#"{ - "version": "0", - "default_image": "ubuntu:24.04", - "graph": { - "nodes": [ - {"step": {"key": "a", "cmd": "echo a", "image": "ubuntu:24.04"}, "env": {}}, - {"step": {"key": "b", "cmd": "echo b"}, "env": {}}, - {"step": {"key": "c", "cmd": "echo c"}, "env": {}}, - {"step": {"key": "d", "cmd": "echo d"}, "env": {}}, - {"step": {"key": "e", "cmd": "echo e", "image": "ubuntu:24.04"}, "env": {}} - ], - "edge_property": "directed", - "edges": [ - [0, 1, "builds_in"], - [1, 2, "builds_in"], - [0, 3, "builds_in"] - ] - } - }"#); - let chains = g.chains(); - let mut all_nodes: Vec<_> = chains.iter().flatten().copied().collect(); - all_nodes.sort(); - assert_eq!(all_nodes.len(), 5, "every node in exactly one chain"); - - let b = g.node_index_by_key("b").unwrap(); - let c = g.node_index_by_key("c").unwrap(); - let bc_chain = chains.iter().find(|ch| ch.contains(&b)).unwrap(); - assert_eq!(*bc_chain, vec![b, c]); -} - -#[test] -fn chain_deps_cross_chain() { - let g = graph(br#"{ - "version": "0", - "graph": { - "nodes": [ - {"step": {"key": "a", "cmd": "echo a"}, "env": {}}, - {"step": {"key": "b", "cmd": "echo b"}, "env": {}}, - {"step": {"key": "c", "cmd": "echo c"}, "env": {}}, - {"step": {"key": "d", "cmd": "echo d"}, "env": {}}, - {"step": {"key": "e", "cmd": "echo e"}, "env": {}} - ], - "edge_property": "directed", - "edges": [ - [0, 1, "builds_in"], - [1, 2, "builds_in"], - [0, 3, "builds_in"] - ] - } - }"#); - let chains = g.chains(); - let deps = g.chain_deps(&chains); - - let find_chain = |key: &str| -> usize { - let idx = g.node_index_by_key(key).unwrap(); - chains.iter().position(|ch| ch.contains(&idx)).unwrap() - }; - let a_ci = find_chain("a"); - let bc_ci = find_chain("b"); - let d_ci = find_chain("d"); - let e_ci = find_chain("e"); - - assert!(deps[a_ci].is_empty()); - assert_eq!(deps[bc_ci], vec![a_ci]); - assert_eq!(deps[d_ci], vec![a_ci]); - assert!(deps[e_ci].is_empty()); -} - -#[test] -fn chain_deps_subsumes_wait_barriers() { - let g = graph(br#"{ - "version": "0", - "graph": { - "nodes": [ - {"step": {"key": "a", "cmd": "echo a"}, "env": {}}, - {"step": {"key": "b", "cmd": "echo b"}, "env": {}}, - {"step": {"key": "c", "cmd": "echo c"}, "env": {}} - ], - "edge_property": "directed", - "edges": [ - [0, 2, "depends_on"], - [1, 2, "depends_on"] - ] - } - }"#); - let chains = g.chains(); - let deps = g.chain_deps(&chains); - let find_chain = |key: &str| -> usize { - let idx = g.node_index_by_key(key).unwrap(); - chains.iter().position(|ch| ch.contains(&idx)).unwrap() - }; - let a_ci = find_chain("a"); - let b_ci = find_chain("b"); - let c_ci = find_chain("c"); - let mut c_deps = deps[c_ci].clone(); - c_deps.sort_unstable(); - let mut want = vec![a_ci, b_ci]; - want.sort_unstable(); - assert_eq!(c_deps, want); + let dag = g.dag(); + let c_idx = dag.graph().node_references() + .find(|(_, t)| t.step.key == "c") + .map(|(idx, _)| idx) + .unwrap(); + let parent_keys: Vec = dag.parents(c_idx).iter(dag) + .map(|(_, p)| dag[p].step.key.clone()) + .collect(); + assert!(parent_keys.contains(&"a".to_string())); + assert!(parent_keys.contains(&"b".to_string())); } diff --git a/crates/hm-pipeline-ir/tests/graph_serde.rs b/crates/hm-pipeline-ir/tests/graph_serde.rs index 55e03f2..52ab54a 100644 --- a/crates/hm-pipeline-ir/tests/graph_serde.rs +++ b/crates/hm-pipeline-ir/tests/graph_serde.rs @@ -74,10 +74,22 @@ fn pipeline_graph_round_trips_through_json() { let back: PipelineGraph = serde_json::from_str(&json).unwrap(); assert_eq!(back.node_count(), 3); assert_eq!(back.default_image(), Some("ubuntu:24.04")); - let a = back.node_index_by_key("a").unwrap(); - assert_eq!(back.get_transition(a).step.image.as_deref(), Some("ubuntu:24.04")); - let b = back.node_index_by_key("b").unwrap(); - assert!(back.builds_in_parent(b).is_some()); + use daggy::Walker; + use daggy::petgraph::visit::IntoNodeReferences; + + let a_idx = back.dag().graph().node_references() + .find(|(_, t)| t.step.key == "a") + .map(|(idx, _)| idx) + .unwrap(); + assert_eq!(back.dag()[a_idx].step.image.as_deref(), Some("ubuntu:24.04")); + + let b_idx = back.dag().graph().node_references() + .find(|(_, t)| t.step.key == "b") + .map(|(idx, _)| idx) + .unwrap(); + let has_builds_in_parent = back.dag().parents(b_idx).iter(back.dag()) + .any(|(e, _)| *back.dag().edge_weight(e).unwrap() == EdgeKind::BuildsIn); + assert!(has_builds_in_parent); } #[test] diff --git a/crates/hm/tests/default_image_inheritance.rs b/crates/hm/tests/default_image_inheritance.rs index 0e21680..dca93dc 100644 --- a/crates/hm/tests/default_image_inheritance.rs +++ b/crates/hm/tests/default_image_inheritance.rs @@ -13,12 +13,21 @@ reason = "integration test pinning a tiny invariant" )] +use daggy::petgraph::visit::IntoNodeReferences; use harmont_cli::orchestrator::graph::Graph; fn decode(json: &[u8]) -> Graph { serde_json::from_slice::(json).unwrap() } +fn find_step<'a>(g: &'a Graph, key: &str) -> &'a hm_pipeline_ir::CommandStep { + let dag = g.dag(); + let (_, t) = dag.graph().node_references() + .find(|(_, t)| t.step.key == key) + .unwrap(); + &t.step +} + #[test] fn root_step_inherits_default_image() { let g = decode(br#"{ @@ -32,9 +41,9 @@ fn root_step_inherits_default_image() { "edges": [] } }"#); - let idx = g.node_index_by_key("apt-base").unwrap(); + let step = find_step(&g, "apt-base"); assert_eq!( - g.get_transition(idx).step.image.as_deref(), + step.image.as_deref(), Some("ubuntu:24.04"), "root step must inherit pipeline default_image" ); @@ -53,9 +62,9 @@ fn root_step_explicit_image_wins() { "edges": [] } }"#); - let idx = g.node_index_by_key("rust").unwrap(); + let step = find_step(&g, "rust"); assert_eq!( - g.get_transition(idx).step.image.as_deref(), + step.image.as_deref(), Some("rust:1.82"), "explicit per-step image must override default_image" ); @@ -80,9 +89,9 @@ fn child_step_unchanged_by_default_image() { ] } }"#); - let idx = g.node_index_by_key("child").unwrap(); + let step = find_step(&g, "child"); assert!( - g.get_transition(idx).step.image.is_none(), + step.image.is_none(), "child step must not inherit default_image — chain steps boot from parent snapshot", ); } @@ -99,9 +108,9 @@ fn no_default_image_leaves_root_alone() { "edges": [] } }"#); - let idx = g.node_index_by_key("k").unwrap(); + let step = find_step(&g, "k"); assert!( - g.get_transition(idx).step.image.is_none(), + step.image.is_none(), "absent default_image must not synthesize an image" ); } diff --git a/crates/hm/tests/runner_dispatch.rs b/crates/hm/tests/runner_dispatch.rs index e7fd849..060db27 100644 --- a/crates/hm/tests/runner_dispatch.rs +++ b/crates/hm/tests/runner_dispatch.rs @@ -88,9 +88,12 @@ async fn runner_field_dispatches_to_named_plugin() { // Sanity check: the graph must preserve `runner` from the IR. // This is the cheap fast-fail; the dispatch check below is the // load-bearing one. - let first = graph.node_index_by_key("fs-step").unwrap(); + use daggy::petgraph::visit::IntoNodeReferences; + let (_, first_transition) = graph.dag().graph().node_references() + .find(|(_, t)| t.step.key == "fs-step") + .unwrap(); assert_eq!( - graph.get_transition(first).step.runner.as_deref(), + first_transition.step.runner.as_deref(), Some("freestyle"), "graph dropped `runner` field — A3's wire-type fix has regressed" ); @@ -98,7 +101,7 @@ async fn runner_field_dispatches_to_named_plugin() { // 3. Build the executor input exactly as the scheduler does // (orchestrator/scheduler.rs::run_chain). Cloning the wire // step preserves `runner` and `runner_args` verbatim. - let step_wire = graph.get_transition(first).step.clone(); + let step_wire = first_transition.step.clone(); let input = ExecutorInput { step: step_wire, workspace_archive_id: ArchiveId(Uuid::nil()), From a04bc1516606a54bb3e5827286635a8669af6d57 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 19:56:28 -0700 Subject: [PATCH 36/40] =?UTF-8?q?chore:=20post-refactor=20cleanup=20?= =?UTF-8?q?=E2=80=94=20clippy,=20stale=20docs,=20const=20fn?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/hm-pipeline-ir/src/graph.rs | 2 +- crates/hm/src/orchestrator/scheduler.rs | 32 ++++++++++++------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/crates/hm-pipeline-ir/src/graph.rs b/crates/hm-pipeline-ir/src/graph.rs index ffe2c7b..19d546b 100644 --- a/crates/hm-pipeline-ir/src/graph.rs +++ b/crates/hm-pipeline-ir/src/graph.rs @@ -71,7 +71,7 @@ impl PipelineGraph { } #[must_use] - pub fn dag(&self) -> &Dag { + pub const fn dag(&self) -> &Dag { &self.inner } } diff --git a/crates/hm/src/orchestrator/scheduler.rs b/crates/hm/src/orchestrator/scheduler.rs index 95881e5..068fd16 100644 --- a/crates/hm/src/orchestrator/scheduler.rs +++ b/crates/hm/src/orchestrator/scheduler.rs @@ -1,19 +1,20 @@ -//! Dataflow scheduler. Walks the pipeline DAG in topological order, -//! spawning a shared future per step. Each future awaits its -//! predecessors, acquires a parallelism permit, and dispatches the -//! step to its registered executor plugin (Docker by default). +//! Dataflow scheduler. +//! +//! Walks the pipeline DAG in topological order, spawning a shared +//! future per step. Each future awaits its predecessors, acquires a +//! parallelism permit, and dispatches the step to its registered +//! executor plugin (Docker by default). // Pedantic-bucket nags accepted at module scope: // - `cast_possible_truncation`: every `as u64` here is a millisecond // wall-clock duration; `u128 -> u64` cannot overflow for any // conceivable build runtime (584 million years). -// - `expect_used` on the semaphore: `acquire_owned` only errors if the -// semaphore is closed, which we never close. -// - `too_many_lines` on `run`: the scheduler body is one cohesive -// loop; splitting it would obscure the spawn/join symmetry. -// - `missing_panics_doc`: the only panic path is the semaphore expect -// described above; the function docstring already explains its -// error surface. +// - `expect_used`: semaphore acquire and DAG edge-weight lookups on +// edges that are guaranteed to exist by construction. +// - `too_many_lines` on `run`: setup + dataflow loop form one +// cohesive unit; splitting would obscure the spawn/join symmetry. +// - `missing_panics_doc`: the only panic paths are the semaphore and +// edge-weight expects described above. #![allow( clippy::cast_possible_truncation, clippy::expect_used, @@ -54,14 +55,14 @@ use tokio_util::sync::CancellationToken; use super::events::EventBus; use super::state::{self, OrchestratorState}; -/// Outcome of a single step execution, used by the upcoming dataflow -/// scheduler to propagate exit codes and snapshot lineage. #[derive(Clone)] struct StepOutcome { exit_code: i32, snapshot: Option, } +type StepFuture = futures::future::Shared>; + /// Entry point: run a parsed pipeline locally end-to-end. Returns /// the overall exit code (0 = success, [`crate::error::EXIT_BUILD_FAILED`] /// when any step exited non-zero). @@ -187,14 +188,13 @@ pub async fn run( let started_total = Instant::now(); - type StepFuture = futures::future::Shared>; let mut done: HashMap = HashMap::new(); for &n in &order { let preds: Vec<(EdgeKind, StepFuture)> = dag .parents(n) .iter(dag) - .map(|(e, p)| (*dag.edge_weight(e).unwrap(), done[&p].clone())) + .map(|(e, p)| (*dag.edge_weight(e).expect("edge in DAG"), done[&p].clone())) .collect(); let transition = dag[n].clone(); @@ -421,7 +421,7 @@ async fn execute_step( } /// Per-node chain membership used for event enrichment. Maps every -/// node in the DAG to (chain_id, position_within_chain). +/// node in the DAG to (`chain_id`, `position_within_chain`). struct ChainInfo { chain_count: usize, node_chain_id: HashMap, From 779de07a0d353f8f78791daf4612b98c7e90e27a Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 20:16:56 -0700 Subject: [PATCH 37/40] cleanup --- crates/hm-pipeline-ir/src/graph.rs | 38 +++++++++++++++++++++++++ crates/hm/src/orchestrator/scheduler.rs | 2 -- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/crates/hm-pipeline-ir/src/graph.rs b/crates/hm-pipeline-ir/src/graph.rs index 19d546b..5d9c5f2 100644 --- a/crates/hm-pipeline-ir/src/graph.rs +++ b/crates/hm-pipeline-ir/src/graph.rs @@ -5,46 +5,81 @@ use daggy::Dag; use schemars::JsonSchema as DeriveJsonSchema; use serde::{Deserialize, Serialize}; +/// A single build command within a pipeline. +/// +/// Serialized as a JSON object inside each graph node's `step` field. +/// The `key` is the unique identifier used to reference this step in +/// edges and log output. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] pub struct CommandStep { + /// Unique identifier for this step within the pipeline. pub key: String, + /// Human-readable label shown in build output. #[serde(default)] pub label: Option, + /// Shell command to execute inside the container. pub cmd: String, + /// Docker image to boot from. Root steps without an image inherit + /// `PipelineGraph::default_image`; child steps boot from their + /// parent's committed snapshot. #[serde(default)] pub image: Option, + /// Per-step environment variables merged on top of the pipeline env. #[serde(default)] pub env: Option>, + /// Maximum wall-clock seconds before the step is killed. #[serde(default)] pub timeout_seconds: Option, + /// Cache configuration for this step's committed snapshot. #[serde(default)] pub cache: Option, + /// Step-executor plugin name. `None` falls back to the default + /// runner (Docker in the shipped configuration). #[serde(default, skip_serializing_if = "Option::is_none")] pub runner: Option, + /// Plugin-specific extra fields passed verbatim to the runner. #[serde(default, skip_serializing_if = "Option::is_none")] pub runner_args: Option, } +/// Snapshot cache configuration for a step. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, DeriveJsonSchema)] pub struct Cache { + /// Cache policy name (e.g. `"content-hash"`). pub policy: String, + /// Explicit cache key override; derived from the step if absent. #[serde(default)] pub key: Option, } +/// A graph node: a [`CommandStep`] paired with its resolved environment. +/// +/// The `env` map is the final merged result of pipeline-level defaults +/// and per-step overrides — ready to hand to the executor as-is. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Transition { pub step: CommandStep, pub env: BTreeMap, } +/// Edge label in the pipeline DAG. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum EdgeKind { + /// Container lineage: the child boots from the parent's committed + /// snapshot rather than from a fresh image. BuildsIn, + /// Ordering-only dependency (emitted by `wait` barriers). The + /// child waits for the parent to finish but does not inherit its + /// snapshot. DependsOn, } +/// Top-level pipeline graph, deserialized directly from the v0 wire +/// format (petgraph-serde JSON). +/// +/// Callers access the underlying [`Dag`] via [`dag()`](Self::dag) and +/// traverse it with petgraph's standard visitor traits. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PipelineGraph { #[serde(default = "default_version")] @@ -60,16 +95,19 @@ fn default_version() -> String { } impl PipelineGraph { + /// Number of steps (nodes) in the graph. #[must_use] pub fn node_count(&self) -> usize { self.inner.node_count() } + /// Pipeline-wide fallback image for root steps that don't declare one. #[must_use] pub fn default_image(&self) -> Option<&str> { self.default_image.as_deref() } + /// The underlying DAG for direct traversal. #[must_use] pub const fn dag(&self) -> &Dag { &self.inner diff --git a/crates/hm/src/orchestrator/scheduler.rs b/crates/hm/src/orchestrator/scheduler.rs index 068fd16..ff96d7b 100644 --- a/crates/hm/src/orchestrator/scheduler.rs +++ b/crates/hm/src/orchestrator/scheduler.rs @@ -167,8 +167,6 @@ pub async fn run( let sink_handle = super::output_subscriber::spawn(bus.clone(), registry.clone(), format_name.clone()); - // ── dataflow scheduling ────────────────────────────────────── - let dag = graph.dag(); let chain_info = compute_chain_info(dag); From 2b6a005d37c10968e043fc34df8a34766c7b817a Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 20:18:55 -0700 Subject: [PATCH 38/40] fix: resolve ruff PERF401 and F841 in harmont-py --- dsls/harmont-py/harmont/pipeline.py | 3 +-- dsls/harmont-py/tests/test_envelope.py | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/dsls/harmont-py/harmont/pipeline.py b/dsls/harmont-py/harmont/pipeline.py index cd41702..92f419e 100644 --- a/dsls/harmont-py/harmont/pipeline.py +++ b/dsls/harmont-py/harmont/pipeline.py @@ -138,8 +138,7 @@ def _lower_to_graph( has_builds_in_parent.add(node_idx) # depends_on edges from pre-wait steps. - for dep_idx in pending_depends_on: - edges.append([dep_idx, node_idx, "depends_on"]) + edges.extend([dep_idx, node_idx, "depends_on"] for dep_idx in pending_depends_on) pre_wait_indices.append(node_idx) diff --git a/dsls/harmont-py/tests/test_envelope.py b/dsls/harmont-py/tests/test_envelope.py index 269e486..451080f 100644 --- a/dsls/harmont-py/tests/test_envelope.py +++ b/dsls/harmont-py/tests/test_envelope.py @@ -36,7 +36,6 @@ def _step_cmds(definition): def _builds_in_children(definition, parent_key): """Return nodes whose builds_in parent is parent_key.""" nodes = _graph_nodes(definition) - key_by_idx = {i: n["step"]["key"] for i, n in enumerate(nodes)} parent_idx = None for i, n in enumerate(nodes): if n["step"]["key"] == parent_key: From 8225be5c99c03b4df97c8e482cbb0828d491bf6e Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 20:23:19 -0700 Subject: [PATCH 39/40] fix: move inline use to module scope (clippy items_after_statements) --- crates/hm/tests/runner_dispatch.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/hm/tests/runner_dispatch.rs b/crates/hm/tests/runner_dispatch.rs index 060db27..2912474 100644 --- a/crates/hm/tests/runner_dispatch.rs +++ b/crates/hm/tests/runner_dispatch.rs @@ -36,6 +36,8 @@ pub mod common; use std::collections::BTreeMap; +use daggy::petgraph::visit::IntoNodeReferences; + use common::fixtures; use harmont_cli::orchestrator::graph::Graph; use harmont_cli::plugin::{PluginRegistry, RegistryConfig}; @@ -88,7 +90,6 @@ async fn runner_field_dispatches_to_named_plugin() { // Sanity check: the graph must preserve `runner` from the IR. // This is the cheap fast-fail; the dispatch check below is the // load-bearing one. - use daggy::petgraph::visit::IntoNodeReferences; let (_, first_transition) = graph.dag().graph().node_references() .find(|(_, t)| t.step.key == "fs-step") .unwrap(); From 4bc377b166990891c78897d9c6e848d9127a84dd Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 23 May 2026 20:39:11 -0700 Subject: [PATCH 40/40] refactor: remove orchestrator/graph.rs re-export shim Import hm_pipeline_ir types directly instead of going through a one-line re-export module. --- crates/hm/src/orchestrator/graph.rs | 1 - crates/hm/src/orchestrator/mod.rs | 1 - crates/hm/src/orchestrator/scheduler.rs | 5 +++-- crates/hm/tests/default_image_inheritance.rs | 8 ++++---- crates/hm/tests/runner_dispatch.rs | 4 ++-- 5 files changed, 9 insertions(+), 10 deletions(-) delete mode 100644 crates/hm/src/orchestrator/graph.rs diff --git a/crates/hm/src/orchestrator/graph.rs b/crates/hm/src/orchestrator/graph.rs deleted file mode 100644 index 1c81fa6..0000000 --- a/crates/hm/src/orchestrator/graph.rs +++ /dev/null @@ -1 +0,0 @@ -pub use hm_pipeline_ir::{EdgeKind, PipelineGraph as Graph, Transition}; diff --git a/crates/hm/src/orchestrator/mod.rs b/crates/hm/src/orchestrator/mod.rs index a7e856c..2f9fe93 100644 --- a/crates/hm/src/orchestrator/mod.rs +++ b/crates/hm/src/orchestrator/mod.rs @@ -11,7 +11,6 @@ pub mod cache; pub mod docker_client; pub mod docker_host_fns; pub mod events; -pub mod graph; pub mod output_subscriber; pub mod scheduler; pub mod source; diff --git a/crates/hm/src/orchestrator/scheduler.rs b/crates/hm/src/orchestrator/scheduler.rs index ff96d7b..fab51cd 100644 --- a/crates/hm/src/orchestrator/scheduler.rs +++ b/crates/hm/src/orchestrator/scheduler.rs @@ -43,9 +43,10 @@ use hm_plugin_protocol::{ use tokio::sync::Mutex; use uuid::Uuid; +use hm_pipeline_ir::{EdgeKind, PipelineGraph, Transition}; + use crate::error::HmError; use crate::orchestrator::docker_client::DockerClient; -use crate::orchestrator::graph::{EdgeKind, Transition}; use crate::orchestrator::source::build_archive_bytes; use crate::plugin::{PluginRegistry, RegistryConfig}; @@ -73,7 +74,7 @@ type StepFuture = futures::future::Shared>; /// scheduler-level failure occurs. Non-zero step exit codes are /// surfaced via the returned `i32`, not as an Err. pub async fn run( - graph: crate::orchestrator::graph::Graph, + graph: PipelineGraph, repo_root: PathBuf, parallelism: usize, format_name: String, diff --git a/crates/hm/tests/default_image_inheritance.rs b/crates/hm/tests/default_image_inheritance.rs index dca93dc..cc5ca19 100644 --- a/crates/hm/tests/default_image_inheritance.rs +++ b/crates/hm/tests/default_image_inheritance.rs @@ -14,13 +14,13 @@ )] use daggy::petgraph::visit::IntoNodeReferences; -use harmont_cli::orchestrator::graph::Graph; +use hm_pipeline_ir::PipelineGraph; -fn decode(json: &[u8]) -> Graph { - serde_json::from_slice::(json).unwrap() +fn decode(json: &[u8]) -> PipelineGraph { + serde_json::from_slice::(json).unwrap() } -fn find_step<'a>(g: &'a Graph, key: &str) -> &'a hm_pipeline_ir::CommandStep { +fn find_step<'a>(g: &'a PipelineGraph, key: &str) -> &'a hm_pipeline_ir::CommandStep { let dag = g.dag(); let (_, t) = dag.graph().node_references() .find(|(_, t)| t.step.key == key) diff --git a/crates/hm/tests/runner_dispatch.rs b/crates/hm/tests/runner_dispatch.rs index 2912474..d66c24d 100644 --- a/crates/hm/tests/runner_dispatch.rs +++ b/crates/hm/tests/runner_dispatch.rs @@ -39,7 +39,7 @@ use std::collections::BTreeMap; use daggy::petgraph::visit::IntoNodeReferences; use common::fixtures; -use harmont_cli::orchestrator::graph::Graph; +use hm_pipeline_ir::PipelineGraph; use harmont_cli::plugin::{PluginRegistry, RegistryConfig}; use hm_plugin_protocol::{ArchiveId, CacheDecision, ExecutorInput, StepResult}; use uuid::Uuid; @@ -85,7 +85,7 @@ async fn runner_field_dispatches_to_named_plugin() { .expect("load registry"); // 2. Deserialize the graph directly from JSON — the new wire format. - let graph: Graph = serde_json::from_slice(PIPELINE_JSON).expect("parse graph"); + let graph: PipelineGraph = serde_json::from_slice(PIPELINE_JSON).expect("parse graph"); // Sanity check: the graph must preserve `runner` from the IR. // This is the cheap fast-fail; the dispatch check below is the