diff --git a/.agents/CONVENTIONS.md b/.agents/CONVENTIONS.md new file mode 100644 index 0000000..a71a302 --- /dev/null +++ b/.agents/CONVENTIONS.md @@ -0,0 +1,12 @@ +# Notation conventions + +Agent-facing material in this repo distinguishes three shapes so an agent never has to guess whether a token is a cmdlet parameter, an API field, or a transport-agnostic idea. + +- **PowerShell** — backtick the literal as it appears in `Get-Help -Full`. Switches stand alone; valued parameters use a space before the value. + - `` `-ExtendedLogging` `` (switch), `` `-TaskId ` `` (valued), `` `Connect-Safeguard -DeviceCode` `` (cmdlet + switch). +- **API / JSON** — backtick a PascalCase field as `Field: value`, mirroring the transfer-object shape SPP emits and accepts. + - `` `ExtendedLogs: true` ``, `` `OperationType: CheckPassword` ``. +- **Concept (transport-agnostic)** — plain English, no backticks. Use this in orchestration prose where the agent should not yet be biased toward PS or API. + - "with extended logging enabled", "trigger the affected operation". + +Rule of thumb: `AGENTS.md` speaks **concept**. The skills speak **PowerShell** (e.g., `safeguard-ps-operations`) or **API** with backticks. When a skill bridges them, it shows both forms side by side. diff --git a/.agents/schemas/evidence.schema.json b/.agents/schemas/evidence.schema.json new file mode 100644 index 0000000..ac257dc --- /dev/null +++ b/.agents/schemas/evidence.schema.json @@ -0,0 +1,259 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://raw.githubusercontent.com/petrsnd/SafeguardCustomPlatform/main/.agents/schemas/evidence.schema.json", + "title": "Custom Platform Probing Evidence", + "description": "Internal agent contract. The evidence artifact produced by the target-probing skill and consumed by strategy-selection and script-authoring. Distinct from the human-facing platform-script schema in schema/. This is v0 — fields marked TODO require a real probing run to lock down their shape; do not invent values.", + "type": "object", + "required": [ + "schemaVersion", + "protocol", + "target", + "serviceAccount", + "probeRun" + ], + "additionalProperties": false, + "properties": { + "schemaVersion": { + "type": "string", + "description": "Version of this evidence schema. Bumped when the contract changes.", + "const": "0.1" + }, + "protocol": { + "type": "string", + "description": "Transport protocol of the target. SSH and HTTP only — telnet is out of scope for the agent skill system.", + "enum": ["ssh", "http"] + }, + "target": { + "type": "object", + "description": "Identification of the target system the probes ran against.", + "required": ["host"], + "additionalProperties": false, + "properties": { + "host": { + "type": "string", + "description": "Hostname or IP of the target. No credentials." + }, + "port": { + "type": "integer", + "description": "TCP port. Optional; default depends on protocol." + }, + "nonProductionAffirmed": { + "type": "boolean", + "description": "Whether the operator has affirmed the target is non-production. The probe-safety contract requires this to be true before any probe runs." + } + } + }, + "serviceAccount": { + "type": "object", + "description": "Service-account identification used by probes to authenticate to the target. Secrets MUST NOT appear here — name and credential kind only.", + "required": ["accountName"], + "additionalProperties": false, + "properties": { + "accountName": { + "type": "string", + "description": "Username/account identifier used for probing. The secret itself is never recorded in evidence." + }, + "credentialKind": { + "type": "string", + "description": "What kind of credential the service account uses. Sourced from operator declaration; not inferred.", + "enum": ["password", "ssh-key", "api-key", "bearer-token", "unknown"] + } + } + }, + "probeRun": { + "type": "object", + "description": "Metadata about the probing session itself.", + "required": ["startedAt", "probes"], + "additionalProperties": false, + "properties": { + "startedAt": { + "type": "string", + "format": "date-time", + "description": "ISO-8601 UTC timestamp when probing began." + }, + "endedAt": { + "type": "string", + "format": "date-time", + "description": "ISO-8601 UTC timestamp when probing concluded. Optional while probing is in progress." + }, + "operatorTool": { + "type": "string", + "description": "Identifier of the agent runtime that performed the probes (e.g., 'github-copilot-cli', 'claude-code'). For audit only." + }, + "probes": { + "type": "array", + "description": "Ordered record of probes executed. Each entry is a single probe step. Read-only probes record observations; destructive probes additionally record the explicit per-probe consent given by the operator (see the probe-safety contract in target-probing/SKILL.md).", + "items": { + "$ref": "#/definitions/probeRecord" + } + }, + "haltedReason": { + "type": "string", + "description": "If probing stopped before completion, why. Examples: 'lockout-signal', 'throttle-signal', 'mfa-challenge', 'operator-stop', 'rate-limit-exceeded'.", + "enum": [ + "completed", + "lockout-signal", + "throttle-signal", + "mfa-challenge", + "operator-stop", + "rate-limit-exceeded", + "operator-denied-destructive", + "error" + ] + } + } + }, + "sshFindings": { + "$ref": "#/definitions/sshFindings", + "description": "Findings produced by SSH probe playbooks. Present only when protocol == 'ssh'." + }, + "httpFindings": { + "$ref": "#/definitions/httpFindings", + "description": "Findings produced by HTTP probe playbooks. Present only when protocol == 'http'." + }, + "strategyHints": { + "type": "object", + "description": "Optional hints from the probing skill that strategy-selection may consider. Not authoritative — strategy-selection makes the final call.", + "additionalProperties": false, + "properties": { + "preferredPattern": { + "type": "string", + "description": "If probing strongly suggests one of the four authoring patterns, name it here. Otherwise omit.", + "enum": [ + "ssh-interactive", + "ssh-batch", + "http-api", + "http-form-fill" + ] + }, + "rationale": { + "type": "string", + "description": "Short, citation-style reason for the hint. Must reference a specific probe record, not be generic." + } + } + } + }, + "definitions": { + "probeRecord": { + "type": "object", + "description": "A single probe step. Field shapes for 'observation' and 'destructiveDetails' are placeholders pending a real probing run; populated structures land in Phase 3 / Phase 5.", + "required": ["id", "kind", "command", "result"], + "additionalProperties": false, + "properties": { + "id": { + "type": "string", + "description": "Stable ID of the probe within this run. Used for back-references." + }, + "kind": { + "type": "string", + "description": "Probe classification. Read-only probes never mutate target state.", + "enum": ["read-only", "destructive"] + }, + "category": { + "type": "string", + "description": "What the probe is investigating. SSH categories: prompt, batch-mode, sudo, password-change. HTTP categories: auth-scheme, login-form, cookie, api-discovery.", + "enum": [ + "prompt", + "batch-mode", + "sudo", + "password-change", + "auth-scheme", + "login-form", + "cookie", + "api-discovery", + "other" + ] + }, + "command": { + "type": "string", + "description": "The exact command or HTTP request line that was issued. No secrets — substitute placeholders for credentials." + }, + "consent": { + "type": "object", + "description": "Required for destructive probes; absent for read-only probes.", + "additionalProperties": false, + "required": ["grantedAt"], + "properties": { + "grantedAt": { + "type": "string", + "format": "date-time", + "description": "Timestamp the operator granted explicit per-probe consent." + }, + "summaryShown": { + "type": "string", + "description": "The one-line 'what this will do, what could go wrong' summary that the operator approved." + } + } + }, + "result": { + "type": "string", + "description": "Outcome of the probe.", + "enum": ["ok", "failed", "skipped", "halted"] + }, + "observation": { + "description": "TODO: structured observation payload. Shape locked down in Phase 3 once the SSH and HTTP playbooks are authored against real targets. Free-form for now.", + "type": "object", + "additionalProperties": true + }, + "errorSignature": { + "type": "string", + "description": "If result is 'failed', a short stable signature suitable for matching against docs/agent-reference/failure-patterns.md. Optional." + } + } + }, + "sshFindings": { + "type": "object", + "description": "SSH-specific finding shapes. Fields below are the categories the playbook MUST cover; their internal shape is intentionally permissive in v0 and will be tightened once more SSH targets have been probed.", + "additionalProperties": false, + "properties": { + "shellPrompt": { + "type": "object", + "additionalProperties": true, + "description": "What the shell prompt looks like, banner contents, motd presence, etc." + }, + "batchModeSupported": { + "type": "object", + "additionalProperties": true, + "description": "Whether non-interactive ExecuteCommand-style probes succeeded vs needed a PTY." + }, + "sudoBehavior": { + "type": "object", + "additionalProperties": true, + "description": "Whether sudo prompts for a password, has NOPASSWD, or is not present." + }, + "passwordChangeCommand": { + "type": "object", + "additionalProperties": true, + "description": "Which password-change command path the target supports (passwd, chpasswd, vendor-specific CLI, etc.) — observed, not assumed." + } + } + }, + "httpFindings": { + "type": "object", + "description": "HTTP-specific finding shapes. Fields below are the categories the playbook MUST cover; their internal shape is intentionally permissive in v0 and will be tightened once more HTTP targets have been probed.", + "additionalProperties": false, + "properties": { + "authScheme": { + "type": "object", + "additionalProperties": true, + "description": "Which authentication scheme(s) the target accepts (basic, bearer, api-key header, form-fill, cookie). Observed via WWW-Authenticate, login form inspection, etc." + }, + "loginForm": { + "type": "object", + "additionalProperties": true, + "description": "If form-fill is in play: form action URL, field names, hidden tokens (CSRF), redirect chain." + }, + "cookieBehavior": { + "type": "object", + "additionalProperties": true, + "description": "Session cookie names, flags, lifetimes; whether session cookies suffice for subsequent calls." + }, + "apiDiscovery": { + "type": "object", + "additionalProperties": true, + "description": "Discovered API endpoints relevant to the planned operations (e.g., user lookup, password change, key rotation). Sourced from vendor docs or probe responses; never invented." + } + } + } + } +} diff --git a/.agents/skills/safeguard-ps-operations/SKILL.md b/.agents/skills/safeguard-ps-operations/SKILL.md new file mode 100644 index 0000000..31139a3 --- /dev/null +++ b/.agents/skills/safeguard-ps-operations/SKILL.md @@ -0,0 +1,242 @@ +--- +name: safeguard-ps-operations +description: >- + Use when the agent must drive a live SPP appliance through safeguard-ps + to validate, import, trigger, and inspect a custom platform script. + Covers Connect-Safeguard -DeviceCode (preferred) / -Browser (fallback) + auth, the cmdlet menu (Test- / Import- / Export- / asset / account / + trigger), idempotency, extended-logging triggers, task-log JSON + retrieval, and how to call tools/Invoke-PlatformDevLoop.ps1 instead of + re-implementing the loop. All cmdlet syntax must be sourced from + Get-Help -Full against the installed module, never paraphrased + from memory. +--- + +# safeguard-ps-operations + +## Pre-flight + +Before invoking any cmdlet covered by this skill, consult [`AGENTS.md`](../../../AGENTS.md) for the active workflow algorithm (new-platform vs enhance-platform) and the iterative debug-loop budget. Single-skill entry points (e.g., "just import this script") still run inside one of those workflows; do not bypass the orchestration layer. + +## Scope + +This skill is the `safeguard-ps` wrapper. It owns: + +- Authenticating to a Safeguard for Privileged Passwords (SPP) appliance. +- Validating, importing, and exporting custom platform scripts. +- Creating or updating the asset and account used for testing. +- Triggering operations (`CheckPassword`, `ChangePassword`, …) with extended logging. +- Fetching the resulting task-log JSON for [`task-log-analysis`](../task-log-analysis/SKILL.md). + +It is the **only** skill that directly calls `safeguard-ps`. Other skills request operations through this one. + +## Modes + +- **full-loop** — every operation in this skill is in scope. +- **author-only** — only `Test-SafeguardCustomPlatformScript` and `Export-SafeguardCustomPlatformScript` (when applied to a local file via `-OutFile`) are usable; everything else requires an appliance and **fails closed** with a clear message. +- **probe-only** — fails closed. Use [`target-probing`](../target-probing/SKILL.md) instead. + +## Grounding rule (mandatory) + +Every cmdlet, parameter name, and parameter-set described to the operator MUST come from `Get-Help -Full` against the **installed** `safeguard-ps` module. Do not paraphrase from memory, vendor docs, or prior conversations. + +Before invoking any cmdlet you have not used in the current voyage, the agent runs `Get-Help` itself — do not ask the operator to run it and paste output back: + +```powershell +Get-Help -Full | Out-String -Width 200 +``` + +`Out-String -Width 200` matters: in narrow shells the parameter table wraps and `Required?` / `Position?` columns shift, making it easy to misread a switch as a valued parameter. Pin the width. + +If a cmdlet's parameter is a `[switch]` and the value comes from a variable, use the colon form (`-Insecure:$ins`, not `-Insecure $ins`). PowerShell silently swallows the latter on switches and the cmdlet ends up with the parameter's default — usually `$false` — which is rarely what the agent wanted. The value bound to the colon must be a `[bool]`. + +Sibling cmdlets are not symmetric. `Test-` and `Invoke-` pairs diverge in parameter names; `New-SafeguardCustomPlatform` accepts `-ScriptFile` directly with no separate `Import-` step; `Get-SafeguardTaskLog` with no arguments returns a flat GUID array and only returns `{Recorded, Level, Event}` records when given `-TaskId `. Run `Get-Help` on every cmdlet's first use in the voyage, even when a sibling was just used. + +## Authentication + +### PowerShell version: prefer 7+, warn and continue otherwise + +safeguard-ps targets PS 7. Several cmdlets behave better there (cleaner error records, no Windows-PowerShell-only quirks). Before calling any cmdlet: + +```powershell +if ($PSVersionTable.PSVersion.Major -lt 7) { + Write-Warning "Running on PowerShell $($PSVersionTable.PSVersion). PowerShell 7+ is recommended for safeguard-ps; continuing anyway." +} +``` + +Do not block on the version. If only PS 5.1 is available, the agent emits the warning once and proceeds. + +### Module presence: check, don't ask + +Before invoking any cmdlet, verify `safeguard-ps` is installed **and at least version 8.4.3**: + +```powershell +Get-Module -ListAvailable -Name safeguard-ps | + Sort-Object Version -Descending | + Select-Object -First 1 -ExpandProperty Version +``` + +8.4.3 is the floor because earlier versions lack `-ExtendedLogging` on `Invoke-SafeguardAssetSshHostKeyDiscovery` — without it, host-key-discovery failures emit only the surface 60307 error and persist no task log, leaving the agent with nothing to diagnose. `tools/Invoke-PlatformDevLoop.ps1` enforces this floor at startup and refuses to run against older modules. + +If the module is missing or below 8.4.3, ask the operator **once** for permission to install or upgrade: + +```powershell +Install-Module -Name safeguard-ps -Scope CurrentUser -Force +``` + +Latest stable from PowerShell Gallery is the default. Do not ask "is `safeguard-ps` available" or "which version do you have" first — check, then proceed or ask once. + +### Connect: prefer `-DeviceCode`, fall back to `-Browser`, then `-Insecure` on TLS error + +Connect with PKCE only — no password-in-script recipes; no `-Username`/`-Password` parameters in agent flows. Prefer **`-DeviceCode`**: it prints a verification URL and short code instead of launching a local browser, which is the lower-friction default for terminal sessions and works in headless / SSH / CI contexts. Fall back to `-Browser` only if the appliance does not have the Device Code grant enabled (firmware < 7.4 or grant disabled in Appliance Management): + +```powershell +Connect-Safeguard -Appliance -DeviceCode +# or, if Device Code is not enabled on this appliance: +Connect-Safeguard -Appliance -Browser +``` + +The verification URL and short code only appear on the cmdlet's stdout and the code expires in minutes. The agent must surface both to the operator when calling `Connect-Safeguard -DeviceCode` — the operator cannot complete the login otherwise. + +Both forms block until the PKCE callback completes; await the cmdlet's own success/failure rather than asking the operator "are you logged in yet?". On a TLS/cert error (self-signed cert, mismatched CN — common on lab appliances), ask **once** for permission to retry with `-Insecure`: + +```powershell +Connect-Safeguard -Appliance -Insecure -DeviceCode +``` + +Do not pre-ask whether the appliance has a valid certificate. Try secure; the error message tells both the operator and the agent unambiguously when `-Insecure` is needed. + +### Persist the session across iterations — serialize the token, never keep a long-running shell + +**Login budget = 1 per voyage.** Each `Connect-Safeguard -DeviceCode` (or `-Browser`) costs the operator real time and attention. Connect exactly once. + +**Long-running interactive PowerShell sessions are banned in agent flows.** They wedge on PSReadLine prediction, swallow `$ConfirmPreference` prompts, return stale back-buffer through `read_powershell`, and routinely cost a re-login when the agent has to kill them. Do not start a persistent shell to hold `$Global:SafeguardSession`. Do not invoke `Connect-Safeguard` as an async command kept alive across iterations. + +The only correct shape is short-lived sync `powershell -Command { ... }` calls. `$Global:SafeguardSession` holds **a short-lived bearer token, not a permanent credential** — valid for the rest of the voyage (typically several hours), safe to serialize to the gitignored per-session state directory, expires on its own. + +**Step 1 — connect once and serialize.** Sync call; the shell exits when `Connect-Safeguard` returns (no `-NoExit`, no async): + +``` +Connect-Safeguard -Appliance -Insecure -DeviceCode | Out-Null +$Global:SafeguardSession | + ConvertTo-Json -Depth 5 | + Set-Content "$env:USERPROFILE\.copilot\session-state\\files\sg-session.json" -Encoding utf8 +``` + +**Step 2 — every subsequent cmdlet is its own fresh sync call.** Re-hydrate the saved session, normalize `Insecure` to a `[bool]` once, then thread `Appliance`, `AccessToken`, and the bool through every call: + +``` +$s = Get-Content "\sg-session.json" | ConvertFrom-Json +$ins = [bool]$s.Insecure.IsPresent # required: $s.Insecure does not bind to -Insecure directly after JSON roundtrip +Get-SafeguardCustomPlatform -Appliance $s.Appliance -Insecure:$ins -AccessToken $s.AccessToken +Invoke-SafeguardAssetAccountPasswordChange -Appliance $s.Appliance -Insecure:$ins -AccessToken $s.AccessToken ` + -AssetToUse -AccountToUse -ExtendedLogging +Get-SafeguardTaskLog -Appliance $s.Appliance -Insecure:$ins -AccessToken $s.AccessToken -TaskId +``` + +Every safeguard-ps cmdlet that takes `-Appliance` also accepts `-AccessToken`. Threading those three through every call eliminates the dependency on `$Global:SafeguardSession` entirely — output returns cleanly via stdout, no PSReadLine wedging, no confirmation prompts swallowing inputs. Pulling `Insecure` from the session (rather than hardcoding `-Insecure`) keeps the agent honest: if the operator connected with a valid cert, the saved value is `$false` and every call validates. + +#### Why explicit threading and not "just re-hydrate the session global" + +Two superficially-simpler shortcuts do **not** work. Documented here so the next agent does not re-discover them the hard way: + +- **`Connect-Safeguard` has no parameter set that accepts an existing access token.** `Get-Help Connect-Safeguard -Full` lists seven parameter sets (Resource Owner, Credential, PKCE, Browser, Certificate, Gui, DeviceCode); every one performs a fresh login. `-NoSessionVariable` is the inverse direction (return the token instead of caching it); it does not consume one. +- **Assigning to `$Global:SafeguardSession` directly does not re-hydrate the session.** Writing `$Global:SafeguardSession = Get-Content sg-session.json | ConvertFrom-Json` populates the global with the right shape, but cmdlets that use the session variable still emit `No current Safeguard login session.` and prompt for `-Appliance`, hanging non-interactive runs. The cmdlets evidently consult a module-private variable that only `Connect-Safeguard` itself can set. + +If a future cmdlet is found that **only** reads the session variable and refuses `-AccessToken`, the correct response is to file a defect against safeguard-ps to add the parameter set — not to spin up a long-running shell to host the cmdlet. + +Treat `sg-session.json` like any other secret: write it only under the per-session state directory, never commit it, never paste it into chat or task-log output. Delete it at the end of the voyage. The bearer token redacts itself naturally on expiry; a stale file cannot be used to attack the appliance later. Never log `$Global:SafeguardSession`, the access token, or any password parameter to operator-visible output. + +If the agent finds itself about to call `Connect-Safeguard` a second time in the same voyage, **stop**. The token in `sg-session.json` is still good unless the operator rebooted the appliance or several hours have passed; re-read it. A second login is a defect, not a workaround. + +This pattern is verified in [`tools/README.md`](../../../tools/README.md) ("Authentication" section). `tools/Invoke-PlatformDevLoop.ps1` itself does not call `Connect-Safeguard`; the operator connects once and the wrapper picks up `$Global:SafeguardSession` (when invoked from a session that has it cached) or `-AccessToken` plumbed through. + +## Cmdlet menu + +The cmdlets this skill calls, all sourced from `Get-Help`. The shapes below are recap; consult `Get-Help` for parameter details. + +| Cmdlet | Purpose | Used by | +| --- | --- | --- | +| `Connect-Safeguard -DeviceCode` (or `-Browser`) | PKCE login. Caches `$Global:SafeguardSession`. `-DeviceCode` prints a verification URL and short code; `-Browser` launches a local browser. | Skill bootstrap. | +| `Test-SafeguardCustomPlatformScript` | Server-side dry-run of a script. POSTs to `Core/Platforms/ValidateScript/Raw`; returns the platform-preview object the script would produce. | `Invoke-PlatformDevLoop.ps1` validate phase. | +| `Import-SafeguardCustomPlatformScript` | PUTs the script to `Core/Platforms/{Id}/Script/Raw`, then re-reads the platform via `Get-SafeguardCustomPlatform`. | `Invoke-PlatformDevLoop.ps1` import phase. | +| `Export-SafeguardCustomPlatformScript` | Pulls the deployed JSON back. **Source of truth for the enhance-platform workflow** — on-disk samples are starting points and may have drifted. | Manual call before authoring an enhancement. | +| `Get-SafeguardCustomPlatform` | Looks up a platform by name or ID. Used to confirm the platform exists before import. | Idempotency checks. | +| `Test-SafeguardAssetAccountPassword` | Triggers `CheckPassword` (`POST Core/v4/AssetAccounts/{id}/CheckPassword?extendedLogging=true`). | `Invoke-PlatformDevLoop.ps1` trigger phase. | +| `Invoke-SafeguardAssetAccountPasswordChange` | Triggers `ChangePassword` (`POST Core/v4/AssetAccounts/{id}/ChangePassword?extendedLogging=true`). | `Invoke-PlatformDevLoop.ps1` trigger phase. | +| `Get-SafeguardTaskLog` | Pulls the extended task log. Without `-LogName`, iterates available logs and emits a synthetic `--- ---` separator entry between sections. | `Invoke-PlatformDevLoop.ps1` log phase. | + +Endpoint paths and the separator/redaction behavior are documented with appliance-source citations in [`tools/README.md`](../../../tools/README.md) ("Cmdlet citations" and `phases[3] data` sections). + +Asset and account create/update cmdlets (`New-SafeguardAsset`, `Edit-SafeguardAsset`, `New-SafeguardAssetAccount`, `Edit-SafeguardAssetAccount`) are out of this skill's mandatory loop — operators usually create those once, by hand, against the test appliance. If the workflow needs them, source their syntax from `Get-Help` at use time and treat create-or-update as **idempotent**: look up first, edit if it exists, create if it does not. Do not re-create on every iteration. + +### Discovery setup is a prerequisite, not a cmdlet call + +A freshly-onboarded asset will reject `Invoke-SafeguardAssetAccountDiscovery` with error 60392 until an Account Discovery Schedule and Rule are wired to it. The recipe — schedule + rule + asset-attach, plus the `-DiscoveryType` ValidateSet and the `Add-SafeguardAccountDiscoveryRule` param-set trap — is in [`docs/agent-reference/failure-patterns.md`](../../../docs/agent-reference/failure-patterns.md) under *Discovery-trigger errors*. Read it before the first discovery trigger on a new platform; do not wait for 60392 to fire. + +## Always trigger with extended logging + +Every trigger cmdlet must pass `-ExtendedLogging`. The `See extended logs: Get-SafeguardTaskLog ` line that the dev-loop script regex-matches to extract the task ID is **only emitted when `-ExtendedLogging` is set** (see `tools/Invoke-PlatformDevLoop.ps1` lines 178–196 for the extraction logic and lines 282–298 of [`tools/README.md`](../../../tools/README.md) for the appliance-side rationale). + +If the operator triggered an operation without `-ExtendedLogging`, the task ID cannot be reliably recovered; ask them to re-trigger. + +## Cmdlet quirks + +Five cmdlet quirks: + +- **`-TaskId` requires a `[guid]` cast.** `Get-SafeguardTaskLog -TaskId ""` rejects a bare string. Cast at the call site: `Get-SafeguardTaskLog -TaskId ([guid]$id)`. +- **Task-log GUID lists are lexicographic, not chronological.** v1 GUIDs from the appliance are not time-ordered; sorting and taking the "last" item finds the wrong task. To identify the task a trigger just produced, **diff** the GUID set before and after the trigger and pick the new entry. +- **`SshCommunication` sub-log is empty for non-script-engine paths.** Built-in operations such as host-key discovery (`Invoke-SafeguardAssetSshHostKeyDiscovery`) run through a different runtime than scripted custom-platform operations; an empty `SshCommunication` array on those tasks is normal, not a failure signal. Read the `Operation` log for those. +- **Platform Tasks are async — wait for terminal state before reading downstream data.** Every operation triggered against the appliance is a Platform Task. Some `safeguard-ps` cmdlets (`Test-SafeguardAsset`, `Test-SafeguardAssetAccountPassword`, `Invoke-SafeguardAssetAccountPasswordChange`) wrap the trigger with internal polling and appear synchronous to the caller — the returned object reflects a terminal state. Others (`Invoke-SafeguardAssetAccountDiscovery` is the canonical case) return immediately with `RequestStatus.State = "Accepted"` and `PercentComplete = 0` because only the queueing step has completed. Inspect the return value: if `RequestStatus.State` is `Accepted` or `Running`, the task is not done. Querying `Get-…` cmdlets that consume results (`Get-SafeguardDiscoveredAccount`, etc.) before that returns zero rows that look like a script failure but are just an unfinished task. Either poll `RequestStatus.State` until it is no longer `Accepted`/`Running`, or pull the task log (using the GUID-diff approach above) and confirm a `Success` / failure record before consuming downstream data. +- **Hung non-auth cmdlet = skipped `Get-Help` on a cmdlet inferred from a sibling.** A `safeguard-ps` cmdlet that produces no output and never returns is almost always interactively prompting for a required parameter the agent did not supply — because the agent inferred parameter names from a sibling cmdlet instead of running `Get-Help`. Example: calling `Test-SafeguardAssetAccountPassword -AssetToTest ` when the real parameter is `-AssetToUse`, inferred from `Invoke-SafeguardAssetAccountPasswordChange`. Kill the call, run `Get-Help -Full | Out-String -Width 200`, fix the invocation. The grounding rule above is not optional for cmdlets that "look like" ones the agent just used. `Connect-Safeguard -DeviceCode` is the one explicit exception — it blocks by design until PKCE completes. + +## Use `Invoke-PlatformDevLoop.ps1` instead of re-implementing the loop + +The standard validate → import → trigger → log path is implemented once, in [`tools/Invoke-PlatformDevLoop.ps1`](../../../tools/Invoke-PlatformDevLoop.ps1). This skill calls that script rather than chaining cmdlets in prose. + +| Sub-phase needed | Switch | Appliance contact | +| --- | --- | --- | +| Local schema check only (fast inner loop) | `-SchemaOnly` | none | +| Schema + appliance dry-run (no writes) | `-ValidateOnly` | yes | +| Schema + appliance dry-run + import (no trigger) | `-NoTrigger` | yes | +| Full loop incl. trigger and task-log fetch | _(default)_ | yes | + +Output contract (one JSON document on stdout, phase-indexed exit code, programmer errors throw without JSON) is documented in [`tools/README.md`](../../../tools/README.md) ("Output JSON shape", "Exit-code contract"). The exit-code semantics are: `0` full success, `1` validate, `2` import, `3` trigger, `4` log fetch (script header lines 38–43, body block at lines 269–407). + +When a hand-rolled cmdlet sequence is unavoidable (e.g., a one-off `Export-…` to capture deployed JSON), still emit progress to stderr and any structured result to stdout so callers can parse cleanly. + +## Mandatory sequencing: validate before import, every time + +Schema check → `Test-SafeguardCustomPlatformScript` → `Import-SafeguardCustomPlatformScript`. Always in that order. Never: + +- Import a draft that has not been schema-validated locally (the fast inner loop catches malformed JSON cheaply). +- Import a draft that has not passed `Test-SafeguardCustomPlatformScript` against the appliance. The server-side dry-run catches imported-function arity errors, undeclared-variable references, and other things schema validation cannot. Skipping it leaks bad scripts onto the appliance and makes the iteration loop slower, not faster. +- "Just re-import to see if it works" after editing a draft that was last validated several edits ago. + +The dev-loop wrapper enforces this sequence: `-SchemaOnly` is the schema check alone, `-ValidateOnly` runs schema + `Test-`, `-NoTrigger` runs schema + `Test-` + `Import-` (no trigger), and the default runs the full chain. Use the wrapper rather than chaining cmdlets in prose. + +## Idempotency conventions + +- **Platform lookups before import.** `Import-SafeguardCustomPlatformScript -PlatformToEdit ` errors with `Unable to find custom platform matching ''` if the platform does not exist; the dev-loop wrapper surfaces this verbatim as the `import` phase error (see real failure example in [`tools/README.md`](../../../tools/README.md), exit-2 block). Confirm the platform exists once at the start of a session, then re-use the name. +- **Asset / account on new-platform workflow.** They cannot exist yet — the platform is new. Create directly without a pre-check; the new-platform workflow in [`AGENTS.md`](../../../AGENTS.md) is explicit about this. Do not waste a turn asking the operator "does this asset exist already?". +- **Asset / account on enhance-platform workflow.** Look up by name or ID; edit if found; create otherwise. Do not delete-then-create. +- **Triggers** are not idempotent in the strict sense (each run produces a new task log), but re-triggering after a failed run is safe — the prior task log persists. + +## Error semantics this skill recognises + +The dev-loop wrapper distinguishes three failure shapes: + +1. **Programmer error** — the script throws and writes nothing to stdout. Examples: missing required parameter for the chosen mode, no active session, schema file not found. The agent treats these as bugs in its own invocation and fixes the call rather than re-running. +2. **Operational failure** — the script writes its JSON document and exits with a phase index (1–4). The agent reads the JSON, identifies the failed phase, and routes the result to the right next step (validate → script-authoring; import → check the platform name; trigger / log → [`task-log-analysis`](../task-log-analysis/SKILL.md)). +3. **Trigger failure with task log** — `safeguard-ps` raises `Ex.SafeguardLongRunningTaskException` carrying a typed `TaskLog` array. The wrapper surfaces this as `phases[2].status = "failed"` plus a structured `taskLog` field. A real failure example is captured in [`tools/README.md`](../../../tools/README.md) (`phases[2] data`, "Real failure-path output"). Even on trigger failure the wrapper still attempts the log fetch when a task GUID was extractable, so `phases[3]` is usually populated and the exit code stays `3`. + +## Secret handling + +- Do not write secret parameter values into evidence, status messages, or operator-visible output. +- SPP server-side already redacts known credential parameters as the literal string `**secret**` in returned task logs (constant `Hercules\Source\Hercules.DevKit\Constants\ParameterConstants.cs:5`, cited in [`tools/README.md`](../../../tools/README.md), "Secret handling"). Do not attempt to recover real values from these markers. +- Custom-script authors who add new secret parameters must declare them with `Type: "Secret"` so the same redaction applies — see [`script-authoring`](../script-authoring/SKILL.md). + +## Failing closed + +This skill refuses to run any operation that requires appliance contact when the active mode is `author-only` or `probe-only`, or when there is no `Connect-Safeguard` session and no `-AccessToken` was supplied. The dev-loop wrapper enforces the same check at lines 235–250 of `tools/Invoke-PlatformDevLoop.ps1`. Surface the missing prerequisite to the operator; do not attempt a workaround. + diff --git a/.agents/skills/script-authoring/SKILL.md b/.agents/skills/script-authoring/SKILL.md new file mode 100644 index 0000000..263cf6d --- /dev/null +++ b/.agents/skills/script-authoring/SKILL.md @@ -0,0 +1,248 @@ +--- +name: script-authoring +description: >- + Use when drafting or revising the custom-platform JSON itself. Four + pattern recipes (ssh-interactive, ssh-batch, http-api, http-form-fill) + cite schema, samples, and templates and cover Do blocks, status + messages, custom parameters, and reserved variables. The http-api + recipe spans every auth shape the API documents — Basic/Digest via + HttpAuth, or Bearer / custom Authorization scheme / custom-header API + key via script-built Headers — plus one-step vs two-step token fetch. + Mandates the fast inner loop: local schema validation against schema/ + before any appliance round-trip. SchemaOnly green is necessary but not + sufficient — cross-reference samples for analogous patterns before + declaring ready. +--- + +# script-authoring + +## Pre-flight + +Before drafting or revising any platform JSON, consult [`AGENTS.md`](../../../AGENTS.md) for the active workflow algorithm (new-platform vs enhance-platform) and the iterative debug-loop budget. If the operator skipped target probing or strategy selection and went straight to "write me the JSON", surface that — the wrong pattern compiles cleanly but fails on the appliance. + +## Scope + +Four pattern sub-recipes cover the supported transports: + +- [`ssh-interactive`](#ssh-interactive) +- [`ssh-batch`](#ssh-batch) +- [`http-api`](#http-api) +- [`http-form-fill`](#http-form-fill) + +Telnet/TN3270 is out of scope for the agent skill system. The recipes below are starting points; pick one based on [`strategy-selection`](../strategy-selection/SKILL.md) output and adapt it. + +## Modes + +`author-only`, `probe-only`, `full-loop`. The skill never directly contacts the appliance — it produces JSON and hands off to [`safeguard-ps-operations`](../safeguard-ps-operations/SKILL.md). + +## Authoritative inputs + +- The platform-script JSON Schema: [`schema/custom-platform-script.schema.json`](../../../schema/custom-platform-script.schema.json). The schema is intentionally permissive ("Provides autocomplete and hover help for editors while remaining permissive enough to allow valid edge-case scripts" — schema description, line 5). +- The samples and templates index: [`docs/agent-reference/samples-index.md`](../../../docs/agent-reference/samples-index.md). Look up a starting point by `(protocol, auth-scheme, operations)` — do not walk `samples/` from scratch. +- Reference for individual scripting commands lives under [`docs/reference/commands/`](../../../docs/reference/commands/). Prefer those pages over guessing command shapes. +- Reserved variables and custom parameter conventions: [`docs/reference/reserved-parameters.md`](../../../docs/reference/reserved-parameters.md), [`docs/reference/variables.md`](../../../docs/reference/variables.md), [`docs/reference/custom-parameters.md`](../../../docs/reference/custom-parameters.md). +- Status message taxonomy: [`docs/reference/status-messages.md`](../../../docs/reference/status-messages.md). + +## Mandatory: fast inner loop first + +Local JSON Schema validation runs **before** any appliance round-trip: + +```powershell +./tools/Invoke-PlatformDevLoop.ps1 -ScriptFile -SchemaOnly +``` + +Sub-second, no appliance contact, exit `0` on pass and `1` on schema rejection. Only after this passes does the agent move to `-ValidateOnly` (server dry-run) and then to import + trigger. + +### `SchemaOnly` is necessary, not sufficient + +A green local schema check proves the JSON parses and conforms to the schema. It does **not** catch: + +- Undefined variables referenced inside `Do` blocks (`%FuncUserName%` vs `%FuncUsername%`, etc. — the schema does not parse `%…%` substitutions). +- Regex in `ExpectRegex` / `Condition.If` that compiles but does not match real target output. +- `Send` / `Receive` ordering that drifts out of sync with the actual prompt. +- Status messages emitted in the wrong order or at the wrong phase. + +Before declaring a draft "ready to import," cross-reference an analogous sample from `samples-index.md`. If a sample uses a construct your draft does not (e.g., a `Try`/`Catch` around `Disconnect`, a `Receive` flush of the login banner, a `Headers` block before `HttpAuth`), surface that divergence to the operator rather than silently omitting it. + +## Conventions all four patterns share + +- **Top-level shape.** `Id`, `BackEnd: "Scriptable"`, optional `Meta`, optional `Imports`, then one object per operation (`CheckSystem`, `CheckPassword`, `ChangePassword`, …). Operation objects contain `Parameters` (array of single-key objects) and `Do` (array of command objects). See [`schema/custom-platform-script.schema.json`](../../../schema/custom-platform-script.schema.json) lines 14–80 for the top-level fields, and [`docs/reference/script-structure.md`](../../../docs/reference/script-structure.md) for prose. +- **Reserved parameters** are not declared by the script — SPP injects them. Custom parameters are declared in `Parameters` and addressed as `%Name%`. See [`docs/reference/reserved-parameters.md`](../../../docs/reference/reserved-parameters.md) and [`docs/reference/custom-parameters.md`](../../../docs/reference/custom-parameters.md). +- **Secrets.** Any parameter that holds a credential MUST be `Type: "Secret"` so SPP redacts it in task logs (see the redaction note in [`safeguard-ps-operations`](../safeguard-ps-operations/SKILL.md) and [`tools/README.md`](../../../tools/README.md), "Secret handling"). Use the `::$` modifier (`%FuncPassword::$%`) where the templates and samples do; do not invent a different escape. +- **`Try` / `Catch`.** Wrap fallible operations (network calls, command execution, parses) so a clean `Disconnect` still runs and a structured `Return`/`Throw` is produced. Both [`templates/TemplateSshMinimal.json`](../../../templates/TemplateSshMinimal.json) and [`templates/TemplateHttpMinimal.json`](../../../templates/TemplateHttpMinimal.json) demonstrate this shape end-to-end. +- **Return values.** End each operation with `Return` (typically `%CheckResult%` or a discovery payload). Never let an operation fall off the end of `Do` without a return. +- **Status messages.** Emit them via the supported logging commands (see [`docs/reference/status-messages.md`](../../../docs/reference/status-messages.md)) — they end up in the task log and are how [`task-log-analysis`](../task-log-analysis/SKILL.md) knows how far the script got. + +If a `Do`-block construct does not appear in any sample or template, **stop and ask** before adding it. The grounding rule applies inside the JSON, not just around it. + +### Bake diagnostics in on the first try + +Every appliance round-trip (validate → import → trigger → fetch task log) costs the operator real time. Treat it as the most expensive resource in the loop. Before a trigger, mentally walk through every failure branch the script can take and ask: *if this fails, will the task log tell me **why**, or will I need another iteration to find out?* If the answer is "another iteration", instrument the script before triggering — not after the first failure surprises you. + +Concrete rules for any `Send`/`ExecuteCommand` block whose output is parsed: + +- **Capture stderr.** Use `2>&1` (combine streams) for shell pipelines whose output you parse. Never `2>/dev/null`. Never bare stdout-only on a command that can fail. The actual diagnostic almost always comes out on stderr. +- **Capture exit codes explicitly.** Prefer `cmd 2>&1; echo MARKER_RC_$?\n` over `cmd && echo OK || echo FAIL`. The numeric code distinguishes auth failure from permission failure from syntax failure without another round trip; the binary OK/FAIL form throws that information away. +- **Suppress sudo's password prompt** with `-S -p ''` when piping a password into sudo. Without `-p ''`, sudo's prompt text leaks into the captured buffer and pollutes the regex / parse logic of whatever command follows. +- **Terminate `Send` buffers with `\n`.** A PTY shell will not execute a typed line until it sees a newline. A `Send` without `\n` causes the next `Receive` to time out (or match echo) — a silent class of bug that costs an entire iteration to diagnose. +- **Echo the parsed buffer back** via `WriteResponseObject` (or the equivalent diagnostic command) so it lands in the task log. Without this, parse-condition failures in `Condition` blocks produce a `Returning false` with no visible reason — another wasted iteration. + +### When two iterations fail with the same signature, stop drafting and grep + +If iteration N+1 fails with the same classified phase and substantively the same signature as iteration N (same `Status` enum value, same parse failure in the same `Receive`, same regex that did not fire), switch from drafting to sample-mining: + +1. Run `grep -rn "" samples//` for the construct that is failing — `passwd`, `chpasswd`, `Bearer`, `HttpAuth`, `ExtractJsonObject` against a similar response shape, etc. +2. **Read the matching sample's full operation in context**, not just the line that grep returned. The shape around the line — what `Receive` precedes it, which buffer is marked `ContainsSecret`, whether the surrounding command has quotes — is usually what makes the sample work. +3. Port the working shape into the draft as a single change. Trigger. If the new failure is in a different phase, the port worked; iterate from there. + +### Function-call signatures: copy from samples, do not infer + +When emitting a `Function` call — whether to a locally-defined function, an imported library function, or anything else with a name and `Parameters` array — the agent **must** find at least one working call site for that function in `samples/` and copy the `Parameters` array shape verbatim. + +- The call's `Parameters` field is a positional array; calls do not name their arguments. Order matters; arity matters. +- Public prose docs (e.g., [`docs/reference/imports.md`](../../../docs/reference/imports.md)) list library and function names but **deliberately do not document call signatures**. That is not an oversight — the deployed appliance's view of an imported function's arity can drift from any external reference, including the upstream source it was built from. Samples are the only source of call shapes that round-trip through CI against shipped appliances. +- Search the whole `samples/` tree, not just the closest production sample for the active pattern. A function may be imported by several samples in different sub-trees. +- If no sample exercises the call you need, **stop and ask** the operator. The fallback is empirical probing via `Test-SafeguardCustomPlatformScript`: submit a call with a deliberate-arity guess and read the appliance's `expects N parameters` error literally. The appliance is authoritative for its own deployed signature. That probe is a [`safeguard-ps-operations`](../safeguard-ps-operations/SKILL.md) action, not this skill's. +- Do not pad with `""` to match a guessed arity, do not reorder a sample's call to "look more logical," and do not infer a parameter from a function name. + +If a sample's call site uses 3 args and another uses 4, that is a real signal: either the function is overloaded, or one of those samples shadows the import with a locally-defined function of the same name. Read the sample's `Imports` and `Functions` blocks before copying — the right call site is the one whose enclosing script imports the same library yours does. + +## Pattern recipes + +### SSH operations checklist (applies to both ssh-interactive and ssh-batch) + +Every SSH platform meant for asset onboarding **must** include a `DiscoverSshHostKey` operation. The appliance classifies a platform as SSH-capable by inspecting its operation set (Hercules runtime check, not the schema) and refuses host-key flows on platforms that lack it — surfaces as `60306: Platform does not support SSH authentication` from `New-SafeguardCustomPlatformAsset`. Copy the shape from [`samples/ssh/generic-linux/GenericLinux.json`](../../../samples/ssh/generic-linux/GenericLinux.json); do not set `SoftwareVersionVariableName` on the command (no on-disk sample does, and the runtime silently fails on it via 60307). + +### ssh-interactive + +**Use when** the target presents a shell prompt, banner, or appliance CLI; password change goes through interactive prompts (`passwd`); sudo may prompt. + +**Starter:** [`templates/TemplateSshMinimal.json`](../../../templates/TemplateSshMinimal.json) — minimum viable `CheckSystem` using `Connect` + `Send` + `Receive`. Wraps the work in `Try`/`Catch` and unconditionally `Disconnect`s. + +**Closest production sample:** [`samples/ssh/generic-linux/GenericLinux.json`](../../../samples/ssh/generic-linux/GenericLinux.json) — full `CheckSystem`, `CheckPassword`, `ChangePassword`, `DiscoverSshHostKey`. Mid-complexity sample with prompt flushing and unique success markers (e.g., `INIT_CHECK=$?` style). + +**Key shapes (verified in the sample/template above):** + +- `Connect`: `Type: "Ssh"`, `RequestTerminal: true` (default), `NetworkAddress`, `Port`, `Login`, `Password`/`UserKey`, `CheckHostKey`/`HostKey`, `Timeout`. The connection is named via `ConnectionObjectName` (e.g., `"Global:ConnectSsh"`); subsequent `Send`/`Receive` reference the unscoped name (`"ConnectSsh"`). +- `Send` writes a single line; pair with `Receive` using `ExpectRegex` to anchor on the prompt or a unique marker. +- `Disconnect` always inside its own `Try`/`Catch` so a hung session does not mask the operation result. + +**Common pitfalls:** unflushed banners (the first `Receive` after `Connect` is often the banner, not the prompt); over-broad `ExpectRegex` that matches `passwd:` inside an error sentence; putting `Disconnect` after `Return`. + +Reference: [`docs/guides/ssh-platforms.md`](../../../docs/guides/ssh-platforms.md), [`docs/reference/commands/connect.md`](../../../docs/reference/commands/connect.md), [`docs/reference/commands/send-receive.md`](../../../docs/reference/commands/send-receive.md). + +### ssh-batch + +**Use when** the target accepts `ssh user@host ''` cleanly: stdout/stderr/exit-code returned without a PTY. + +**Closest production sample:** [`samples/ssh/linux-ssh-batch-mode/LinuxSshBatchModeExample.json`](../../../samples/ssh/linux-ssh-batch-mode/LinuxSshBatchModeExample.json). The `Connect` block sets `RequestTerminal: false` (line 162) and the loop uses `ExecuteCommand` with `BufferName`, `StderrBufferName`, and `ExitStatusBufferName` (lines 205–211, 235–241). + +**Key shapes:** + +- `Connect`: same as `ssh-interactive` but `RequestTerminal: false`. +- `ExecuteCommand`: `ConnectionObjectName`, `Command`, `Stdin` (optional), `BufferName` for stdout, `StderrBufferName`, `ExitStatusBufferName`. Inspect the exit-status variable in a `Condition` block, not by parsing stderr. +- `CommandContainsSecret` / `InputContainsSecret` mark whether the `Command`/`Stdin` carries a secret so SPP can redact in task logs. + +**Common pitfalls:** assuming PTY-style behavior (interactive `passwd` does not work over batch mode — use `chpasswd` or vendor-specific batch commands); forgetting to check the exit-status buffer. + +#### `CheckPassword` on Linux: pass the whole shadow line to `CompareShadowHash` + +The authoritative pattern (matches the Hercules `LinuxSshFunctions.json` import that ships with the appliance) is in [`samples/ssh/generic-linux/GenericLinux.json`](../../../samples/ssh/generic-linux/GenericLinux.json) lines 220–245: + +1. `ExecuteCommand`: `sudo -S /usr/bin/getent shadow %AccountUserName%` (batch mode has no PTY, so `-S` is required). +2. Capture the whole stdout buffer into `%AccountEntry%`. +3. `CompareShadowHash` with `SaltedHash: "%AccountEntry%"` — **pass the whole shadow line, not a pre-extracted field**. The component handler splits on `:` and pulls field[1] internally (verified in Hercules `Source/Hercules.WebService/Common/Crypt/PasswordHash.cs` `CheckPasswordAgainstShadowEntry`). +4. `Condition` on `PasswordHashMatched == true` → `Return true`; else `Return false`. +5. Wrap the whole sequence in `Try`/`Catch`; the `Catch` is the **fallback** for environments where `getent` is unavailable (locked-down sudo, no shadow read), not a hash-format workaround. + +**Do not pre-split the shadow line in a `SetItem` expression** (`ShadowLine.Split(':')[1]`). Two compounding reasons: + +- It is unnecessary — `CompareShadowHash` does the split itself. +- It triggers a Z.Expressions overload-ambiguity error on `string.Split(char)` (catalogued in [`docs/agent-reference/failure-patterns.md`](../../../docs/agent-reference/failure-patterns.md)), and the resulting `Try`/`Catch` fallback emits a sentinel verdict that looks like a target-state mismatch but is really a script bug. + +`CompareShadowHash` understands yescrypt (`$y$j9T$…`, default on Ubuntu 22.04+ / Debian 12+), bcrypt, SHA-512, SHA-256, MD5, and AIX SSHA. There is no hash-format reason to abandon it for an auth-by-login primary; auth-by-login is the documented `Catch` fallback only. + +Reference: [`docs/guides/ssh-platforms.md`](../../../docs/guides/ssh-platforms.md) ("Batch mode" section), [`docs/reference/commands/execute-command.md`](../../../docs/reference/commands/execute-command.md). + +#### Catch blocks must log before falling back + +Any `Try`/`Catch` whose `Catch` produces a verdict (rather than re-raising) **must log the caught exception** via `WriteResponseObject` (or a `Status` message that includes the exception text) before emitting the fallback value. Otherwise the next agent reads a clean verdict — `PasswordMismatch`, `false`, `Error` — and attributes it to target state when the actual cause was a script-side bug the catch swallowed. A Z.Expressions overload error in a pre-split `SetItem`, for example, will surface as a bare `PasswordMismatch` unless the catch emits the inner exception text. + +### http-api + +**Use when** the target exposes a documented HTTP/REST API and the script presents a credential the operator already holds (token, password, API key, anything else). + +The script shape is the same regardless of auth scheme: `BaseAddress` → `NewHttpRequest` → (auth setup) → `Request` → `ExtractJsonObject` → `Status`. What varies is two orthogonal choices the recipe makes you spell out: **auth shape** and **one-step vs two-step**. + +#### Auth shape — pick a bucket, then a specific scheme + +The first decision is *who handles the auth dance*: + +| Bucket | What the script does | Auth schemes | +| --- | --- | --- | +| **HttpAuth-managed** | Hand SPP a username/password and an auth `Type`; the runtime builds the header. | `Basic`, `Digest` | +| **Script-managed header** | Build the header value yourself and attach it via `Headers`/`AddHeaders`. | `Authorization: Bearer `, custom `Authorization` schemes (`PVEAPIToken=…`, `Token …`, vendor-specific), custom-header API keys (`X-API-Key`, `X-Vault-Token`, `X-Auth-Token`, …) | + +**HttpAuth-managed (Basic, Digest).** Set per-request, not once globally; this matches the existing samples and avoids leaking the service-account credential into requests that should target the managed account. + +```jsonc +{ "HttpAuth": { + "RequestObjectName": "SystemRequest", + "Type": "Basic", + "Credentials": { "Login": "%FuncUsername%", "Password": "%FuncPassword%" } } } +``` + +Closest production sample for Basic: [`samples/http/wordpress/WordPressHttp.json`](../../../samples/http/wordpress/WordPressHttp.json) (lines 33–40, 78–82, 128–132). Starter template: [`templates/Pattern-GenericRestApiBasicAuth.json`](../../../templates/Pattern-GenericRestApiBasicAuth.json). For Digest the shape is identical with `Type: "Digest"`; clean self-hostable Digest targets are rare in 2025, so verify the runtime supports the scheme against the deployed appliance version before committing. + +**Script-managed header (Bearer, custom Authorization scheme, custom-header API key).** Use `Headers`/`AddHeaders`, not `HttpAuth`. There is no `HttpAuth` `Type` for arbitrary header shapes. + +```jsonc +{ "Headers": { + "RequestObjectName": "CheckRequest", + "AddHeaders": { + "Accept": "application/json", + "Authorization": "Bearer %AccessToken%" } } } +``` + +Swap `Authorization: Bearer ` for whatever the vendor actually uses. Two common variants: + +- **Bearer or custom `Authorization` scheme** (`Authorization: Bearer `, `Authorization: PVEAPIToken=user@realm!tokenid=UUID`, `Authorization: Token …`). Closest production sample: [`samples/http/onelogin-jit/OneLogin_GRC_JIT_addon.json`](../../../samples/http/onelogin-jit/OneLogin_GRC_JIT_addon.json) (Bearer header at lines 1228, 1361, 1510, 1672, 1834, 2002, 2135). Starter template: [`templates/Pattern-GenericRestApiBearerToken.json`](../../../templates/Pattern-GenericRestApiBearerToken.json). +- **Custom-header API key** (`X-API-Key: %ApiKey%`, `X-Vault-Token: %Token%`, `X-Auth-Token: …`). See lines 184–190 of [`templates/Pattern-GenericRestApiKeyRotation.json`](../../../templates/Pattern-GenericRestApiKeyRotation.json). That template also covers the `CheckApiKey` / `ChangeApiKey` operation pair for when the script must rotate the key itself; pair with [`docs/guides/api-key-management.md`](../../../docs/guides/api-key-management.md). + +Whichever bucket you're in, declare the credential as `Type: "Secret"` in `Parameters` so SPP redacts it in task logs. + +#### One-step vs two-step + +Orthogonal to the bucket above: + +- **One-step** — the operator already holds the credential the script presents on every operation call. HttpAuth-managed shapes are almost always one-step. Script-managed-header shapes are one-step when the credential is a long-lived API key or PAT. +- **Two-step** — the script POSTs credentials (often HttpAuth-managed `Basic` with client id/secret, sometimes form-encoded) to a token endpoint, parses the response with `ExtractJsonObject` to capture an access token, then attaches that token via script-managed `Headers` on every subsequent operation call. The `samples/http/onelogin-jit/` sample is the canonical example, interleaving `HttpAuth Basic` on the token call (lines 2275–2278) with `Authorization: Bearer %AccessToken%` on the operation calls. + +Two-step gotcha: do **not** reuse the same `RequestObjectName` for the token-fetch and the operation calls. The two have different `HttpAuth`/`Headers` configurations and crossing them is a common source of 401s. Build a fresh `NewHttpRequest` for each. + +Reference: [`docs/guides/http-platforms.md`](../../../docs/guides/http-platforms.md), [`docs/reference/commands/http-auth.md`](../../../docs/reference/commands/http-auth.md), [`docs/reference/commands/http-setup.md`](../../../docs/reference/commands/http-setup.md), [`docs/reference/commands/request.md`](../../../docs/reference/commands/request.md), [`docs/reference/commands/json.md`](../../../docs/reference/commands/json.md), [`docs/guides/api-key-management.md`](../../../docs/guides/api-key-management.md). + +### http-form-fill + +**Use when** the target only has an HTML login form (no API). + +**Closest production sample:** [`samples/http/facebook/CustomFacebook.json`](../../../samples/http/facebook/CustomFacebook.json). The pattern uses `ExtractFormData` to walk the rendered form (lines 112, 195, 250) and `Request` with `ContentType: "application/x-www-form-urlencoded"` to submit it (lines 137, 222, 277). Cookies persist by default across requests on the same `RequestObjectName`. + +**Key shapes:** + +- GET the login page; `ExtractFormData` to capture hidden fields (CSRF tokens, lifecycle cookies). +- Mutate the extracted form object (set username/password fields), POST it back with the right `ContentType`. +- Handle multi-step flows (login → password-change page → submit) as separate `Request` + `ExtractFormData` cycles. Do not assume a single round-trip works. +- Watch for redirects; some forms set the session cookie on a 30x response, so do not abort on redirect. + +**Common pitfalls:** matching field names that the vendor changes between releases (treat the form structure as observed, not assumed); skipping CSRF tokens; reusing a `RequestObjectName` across login domains and losing cookies. + +Reference: [`docs/guides/http-platforms.md`](../../../docs/guides/http-platforms.md) ("Form-fill" section), [`docs/reference/commands/forms.md`](../../../docs/reference/commands/forms.md), [`docs/reference/commands/cookies.md`](../../../docs/reference/commands/cookies.md), [`docs/quick-start/http-form-fill.md`](../../../docs/quick-start/http-form-fill.md). + +## After authoring + +1. Run `Invoke-PlatformDevLoop.ps1 -SchemaOnly` against the draft. Iterate on schema errors until clean. +2. Cross-reference the chosen pattern's analogous sample. Note any structural divergences and surface them. +3. Hand off to [`safeguard-ps-operations`](../safeguard-ps-operations/SKILL.md) for `-ValidateOnly` and onward. Do not call the appliance from this skill. +4. When the trigger fails, route the task log to [`task-log-analysis`](../task-log-analysis/SKILL.md) — do not jump straight back into editing the JSON without classifying the failure. + diff --git a/.agents/skills/strategy-selection/SKILL.md b/.agents/skills/strategy-selection/SKILL.md new file mode 100644 index 0000000..af81788 --- /dev/null +++ b/.agents/skills/strategy-selection/SKILL.md @@ -0,0 +1,104 @@ +--- +name: strategy-selection +description: >- + Use to decide the implementation approach for a custom platform from + protocol, vendor documentation, and probe evidence. Maps inputs to a + recommendation across SSH (interactive vs batch) and HTTP (form-fill + vs api). For http-api, also picks the auth shape — HttpAuth-managed + (Basic, Digest) vs script-managed header (Bearer, custom Authorization + scheme, custom-header API key) — and one-step vs two-step token fetch. + Plus credential intent (password / SSH key / API key / bearer token) + and self-managed vs service-account. Accepts both fetched URLs and + vendor-doc excerpts the user pasted into the conversation. +--- + +# strategy-selection + +## Pre-flight + +Before recommending a pattern, consult [`AGENTS.md`](../../../AGENTS.md) for the active workflow algorithm. Strategy selection is the bridge between [`target-probing`](../target-probing/SKILL.md) and [`script-authoring`](../script-authoring/SKILL.md): it turns vendor docs plus probe evidence into a concrete recommendation, then hands off. It does not author JSON. + +## Scope + +Map `(protocol, vendor docs, probe evidence)` to one of the four authoring patterns covered by [`script-authoring`](../script-authoring/SKILL.md): + +- `ssh-interactive` +- `ssh-batch` +- `http-api` +- `http-form-fill` + +When `http-api` is the recommendation, also pick: + +- **Auth shape bucket.** *HttpAuth-managed* (Basic, Digest) vs *script-managed header* (Bearer, custom `Authorization` scheme, custom-header API key). +- **Specific scheme** within the bucket (e.g., `Bearer` vs `PVEAPIToken=…` vs `X-API-Key`). +- **One-step vs two-step** — does the operator already hold the credential the script presents on every call (one-step), or must the script POST credentials to a token endpoint first (two-step)? + +Plus two orthogonal dimensions that apply to every pattern: **credential intent** (password / SSH key / API key / bearer token) and **self-managed vs service-account**. + +## Modes + +`author-only`, `probe-only`, `full-loop`. The skill never contacts the appliance or the target. It reads inputs and produces a recommendation. + +## Inputs + +1. **Protocol.** From the operator's stated requirement and confirmed by `target-probing` (`evidence.protocol`). +2. **Vendor documentation.** Either a URL the agent has fetched, or content the operator pasted into the conversation. **Both are first-class.** Use [`docs/agent-reference/vendor-doc-search-recipes.md`](../../../docs/agent-reference/vendor-doc-search-recipes.md) for query templates and the normalization recipe. +3. **Probe evidence.** The artifact produced by [`target-probing`](../target-probing/SKILL.md), conforming to [`.agents/schemas/evidence.schema.json`](../../../.agents/schemas/evidence.schema.json). +4. **Operator-declared credential intent.** `credentialKind` from the evidence artifact (`password | ssh-key | api-key | bearer-token | unknown`). This is sourced from the operator, not inferred — that is a deliberate choice in the schema (lines 56–60). + +## The decision tree + +This skill **does not duplicate** the decision tree. The tree lives in [`docs/agent-reference/strategy-decision-tree.md`](../../../docs/agent-reference/strategy-decision-tree.md). This skill wraps it with prompting rules: when to ask the operator, when to decide autonomously, how to phrase the trade-off when both branches look viable. + +When a recommendation is made, cite the row that drove it. *"Recommended `ssh-batch` because the [SSH branch row matching `ssh user@host '' returns stdout/stderr/exit-code cleanly`](../../../docs/agent-reference/strategy-decision-tree.md#ssh-branch) matches probe record ``."* + +## Vendor-doc handling + +Whether vendor docs arrive via URL fetch or paste, normalize them into the structured record shown in [`docs/agent-reference/vendor-doc-search-recipes.md`](../../../docs/agent-reference/vendor-doc-search-recipes.md) ("Normalization recipe") **before** running the decision tree: + +``` +Vendor: +Version: +Source: +Captured: +Authentication: scheme / endpoint / notes +Operations: method / endpoint / payload / notes +Pagination: shape / parameters +Quirks: one or two notes +``` + +This serves three purposes: it forces the agent to read the docs (not skim), it strips secrets, and it gives `script-authoring` a single citable artifact. + +If the agent has no web search **and** the operator has not pasted vendor docs, fall back to: + +1. Whatever `target-probing` captured under `httpFindings.apiDiscovery` or `sshFindings`. +2. Asking the operator to paste the relevant pages. + +Do not invent vendor-doc content. The grounding rule applies. + +## When to ask vs decide + +The detailed rules per branch are in the decision tree. The skill-level meta-rules: + +- **Ask** when the choice has security implications and probe evidence does not conclusively favour one option (e.g., Basic vs Bearer when both are documented; service-account vs self-managed when the deployment context is unclear). +- **Ask** when the operator has not stated `credentialKind` and probe evidence cannot infer it (a Bearer token in the operator's hand vs an API key looks similar in `httpFindings.authScheme`). +- **Decide** when probe evidence and vendor docs corroborate one option directly. State the decision and cite the corroborating evidence in one sentence — the operator can correct course before authoring begins. +- **Surface the trade-off** when both an SSH and an HTTP path are viable. Per the decision tree's top-level guidance, prefer HTTP when the API covers the required operations end-to-end without shell access; APIs tend to produce stabler scripts than shell-prompt scraping. Make the trade-off visible rather than picking silently. + +## Self-managed vs service-account + +Orthogonal to the four patterns. Decide based on vendor docs and probe evidence, not assumption (see the "Self-managed vs service-account" section of the decision tree). When in doubt, ask the operator which mode the deployment will use — the answer changes which operations the script must implement and may bring `service-account` parameters (like a separate `FuncUsername`/`FuncPassword`) into scope. + +## Output + +The skill emits a short structured recommendation to whichever caller asked, typically `script-authoring` next: + +- **Recommended pattern** — one of the four. +- **For `http-api`:** auth shape bucket + specific scheme + one-step vs two-step. +- **Credential intent** — one of the schema-defined kinds. +- **Self-managed vs service-account** — pick or "ask operator". +- **Citations** — the decision-tree row that drove the choice, the vendor-doc record, and the relevant `probeRecord.id`s from the evidence artifact. +- **Open questions** — anything the agent could not decide with the available evidence. Surface these to the operator before authoring rather than after. + +When evidence is incomplete, the recommendation is allowed to be conditional ("`http-api` with two-step Bearer if vendor confirms the token endpoint at `/oauth/token`; otherwise re-probe and revisit"). A conditional recommendation is preferable to a confident guess. + diff --git a/.agents/skills/target-probing/SKILL.md b/.agents/skills/target-probing/SKILL.md new file mode 100644 index 0000000..47ce059 --- /dev/null +++ b/.agents/skills/target-probing/SKILL.md @@ -0,0 +1,190 @@ +--- +name: target-probing +description: >- + Use when the agent must learn how a live target system actually behaves + before authoring or revising a custom platform script. Covers per-protocol + recon recipes (SSH and HTTP) run from the operator's local shell with a + service-account credential, the probe-safety contract (read-only by + default, per-probe consent for destructive probes, rate limits, + no-production guard), and the structured evidence artifact consumed by + strategy-selection and script-authoring. +--- + +# target-probing + +## Pre-flight + +Before running any probe, consult [`AGENTS.md`](../../../AGENTS.md) for the active workflow algorithm and the iterative debug-loop budget. Probing happens once per workflow at the start (or when prior assumptions have been invalidated). It is not a free retry mechanism. + +## Scope + +Local-shell recon recipes against a live target with a service-account credential. SSH and HTTP only — telnet/TN3270 is out of scope for the agent skill system. Probing produces a structured **evidence artifact** that conforms to [`.agents/schemas/evidence.schema.json`](../../../.agents/schemas/evidence.schema.json) and is consumed by [`strategy-selection`](../strategy-selection/SKILL.md) and [`script-authoring`](../script-authoring/SKILL.md). + +This skill calls `ssh`, `curl`/`Invoke-WebRequest`, etc. directly from the operator's machine. It does **not** mediate probes through SPP. + +## Modes + +- **probe-only**, **full-loop** — operational. +- **author-only** — fails closed. There is no offline form of probing. + +## Probe-safety contract (mandatory) + +All six items below are non-negotiable. The agent enforces them at execution time, not just at planning time. + +1. **Read-only by default.** Probes that only observe — banner grab, `WWW-Authenticate` header inspection, `whoami`, `id`, `uname`, GET on a documented API endpoint, login-form HTML inspection — run without per-probe confirmation. +2. **Destructive probes that go beyond the service account require explicit per-probe operator opt-in.** Key install, account create/delete, sudo-that-mutates non-service-account state, POST/PUT/DELETE against undocumented endpoints — each is presented to the operator with a one-line *"what this will do, what could go wrong"* summary and proceeds only on explicit consent. Consent is **per probe, not per session**. Record the consent timestamp and the summary that was shown in the evidence artifact (`probeRecord.consent.grantedAt`, `probeRecord.consent.summaryShown` — see [`.agents/schemas/evidence.schema.json`](../../../.agents/schemas/evidence.schema.json) lines 173–188). + + **Exception: the service account password on the target under test.** Once `nonProductionAffirmed=true` is set, the service account *is* a test account, and rotating its password as part of validating the workflow under test is an announced operation, not a per-probe consent gate. The agent announces the intent up front (e.g., *"I'll rotate the service account password during this iteration to exercise `ChangePassword`. Capture the new value if you need it for re-auth."*) and proceeds. Record the announcement once on `probeRun` (free-text in the run summary) rather than as a destructive `probeRecord`. This exception is scoped narrowly: it covers the service account on the target identified in `target.host`, and only operations the workflow itself is testing. +3. **Rate limits.** Hard cap of 3 authentication attempts per minute per target. Back off on any auth failure rather than retrying. The cap exists to avoid tripping account-lockout policies and IDS, not as a guideline to be ignored when "just one more try" looks productive. +4. **No production targets.** This skill refuses to run if the operator has not affirmed the target is non-production. The affirmation is captured as `target.nonProductionAffirmed: true` in the evidence artifact (schema line 40). The affirmation is a soft control: it places responsibility on the operator. The agent does not (and cannot) independently verify environment classification. +5. **Pre-flight echo.** Before the first probe of a session, print the planned probe sequence, the service account name (not the secret), and the target host, and wait for an explicit "go" from the operator. The service-account credential should already have been captured during requirements gathering (per *Question discipline* in `AGENTS.md`); if for some reason it has not, ask for it in the same turn as the echo block, with a one-line reminder that the operator can rotate it later. +6. **Fail-closed on lockout / throttle / MFA signals.** If any probe response indicates lockout, throttling, or MFA challenge, stop probing immediately and surface to the operator. Do not continue down the playbook. Record `probeRun.haltedReason` accordingly (`lockout-signal | throttle-signal | mfa-challenge | rate-limit-exceeded | operator-stop | operator-denied-destructive | error`; enum at schema lines 92–103). + +If the agent cannot satisfy any of the six items, it stops and asks. Bypassing the contract is never acceptable, even when the operator nominally consents to skip it — the contract exists precisely to catch the consequences of "this will be fine" decisions. + +## Evidence artifact + +Every probing session produces one evidence artifact, conforming to [`.agents/schemas/evidence.schema.json`](../../../.agents/schemas/evidence.schema.json). Required fields: `schemaVersion` (`"0.1"`), `protocol` (`ssh` or `http`), `target` (with `host` and `nonProductionAffirmed`), `serviceAccount` (account name and `credentialKind` — never the secret), and `probeRun` (with `startedAt` and an ordered `probes` array). + +**Secrets never appear in evidence.** The `serviceAccount.accountName` field is required; there is no field for the secret. `probeRecord.command` substitutes a placeholder for any credential. This is enforced by the schema's `additionalProperties: false` at the top level — invented secret-bearing fields fail validation. + +Protocol-specific findings live under `sshFindings` or `httpFindings`. The v0 schema marks the internal shapes of these as TODO and intentionally permissive; this skill is the first consumer to populate them. When the playbooks below settle on a final shape, propose a schema bump as a follow-up — do not silently invent fields the schema rejects. + +`strategyHints` is optional. Use it sparingly: it signals to `strategy-selection` that probing strongly favours one of the four authoring patterns. The `rationale` must cite a specific `probeRecord.id`, not a generic statement. + +## Pre-flight echo template + +Before the first probe of a session, print and wait for "go": + +``` +Target: [:] +Protocol: +Service account: (secret not echoed) +Credential kind: +Non-production: +Planned probes (in order): + 1. + 2. ... +``` + +If `nonProductionAffirmed` is not yet true, the echo block stops at that line and asks the operator to affirm. Do not proceed without the affirmation. + +## Surface blockers immediately + +When a probe reveals that a prerequisite the operator named is missing or wrong — the managed account doesn't exist, the target hostname doesn't resolve, the documented API endpoint returns 404, the service account lacks the privilege the workflow assumes — **stop the playbook on that finding and ask**. Do not bundle it with later findings or carry on with probes that depend on the missing thing. + +When asking, give the operator something to act on rather than just the negative result: + +- Echo the exact command run and the output that revealed the gap. +- Where it is cheap to gather, list the closest matches the agent already saw — e.g., for a missing managed account, run `getent passwd | awk -F: '$3>=1000 && $3<60000 {print $1}'` and show non-system users; for a missing API endpoint, list the endpoints that did return 200. +- Ask one focused question with sensible choices: *"`mcptest1` doesn't exist on the target. Other non-system users I see are: `testmcp1`, `alice`, `bob`. Pick one, give me a different name, or have me create `mcptest1`?"* + +This rule complements `AGENTS.md` *Question discipline*: the default posture is still act-then-ask, but a missing prerequisite is an immediate blocker, not a question deferred to the end of the playbook. + +## Probe shell hygiene (mandatory for every probe) + +Probes run non-interactively under the agent. A probe that works fine for a human can wedge or silently drop diagnostic output here. Round trips are paid in operator time, so each probe must return a complete, parseable result on the first try. + +- **Capture stderr explicitly with `2>&1`.** Auth errors, permission denied, "command not found", DNS failures land on stderr; a probe that captures only stdout reports "no output" for a command that actually failed. Never bare stdout-only; never `2>/dev/null`. +- **Disable interactive prompts up front.** Add `-o BatchMode=yes` to every `ssh` (refuses to ask for a password — fail fast instead of hanging). The prompt probe also wants `-o StrictHostKeyChecking=accept-new` so a brand-new host key is accepted once but a *changed* key still aborts. +- **Cap every probe with a hard timeout.** `ssh -o ConnectTimeout=10`; for commands run on the target via a working ssh, prefix with `timeout 10`. For curl: `--max-time 15 --connect-timeout 5`. A wedged probe is worse than a failed one — a failure at least returns a signal. +- **For HTTP probes,** add `--fail-with-body` and `-w '%{http_code}\n'` so the response code lands next to the body. A 401 with no body is a useful signal; a hung curl is a wasted round trip. +- **If a probe surfaces a prompt the playbook did not plan for, kill it and re-run with the suppressing flag** (`-o BatchMode=yes`, `sudo -n`, `-o StrictHostKeyChecking=accept-new`). Do not write to it. Interacting with an unplanned prompt is how a read-only probe accidentally becomes destructive. + +## SSH playbook + +Categories the playbook covers, all `read-only` by default. Each maps to a `probeRecord.category` value (schema line 156–167) and contributes to `sshFindings` (schema lines 206–231). + +### SSH client: Python paramiko + +Paramiko is the default SSH probe client on every operator host. One path, not a per-OS matrix: + +- Cross-platform; Python is already on the agent's tool belt. +- Returns clean `(stdout, stderr, exit_code)` without PTY allocation. +- Accepts the credential as a Python variable, never as a process argument (so the secret stays out of `ps`/argv/shell history). Substitute a placeholder in `probeRecord.command` regardless. +- Works for both key-based and password auth via the same API. + +Install once with `pip install paramiko` if missing. + +Two SSH-client traps to avoid: + +- **Native `ssh` cannot pass a password non-interactively** — it prompts, and `-o BatchMode=yes` refuses password auth outright. Use native `ssh` only for key-based auth, or skip it and use paramiko. +- **Banned: `wsl sshpass …` chained from PowerShell.** Cross-shell I/O between WSL and the parent PowerShell wedges silently — the command produces no output and never returns. If WSL is the right environment for some reason, run from inside a WSL shell, not PowerShell calling into WSL. + +Every SSH probe opens with `paramiko.SSHClient.connect()`; that call is itself the auth probe. If it raises `AuthenticationException`, stop the playbook on that finding (per *Surface blockers immediately* above) — do not run downstream probes against a credential the agent already knows fails. `prompt` and `batch-mode` below presuppose auth has succeeded; their results are about the target's *shell*, not the credential. A batch-mode failure with auth-failure noise in the log is an auth finding, not a batch-mode finding. + +### `prompt` — what does the shell look like? + +`ssh -o StrictHostKeyChecking=accept-new @` and observe: + +- The login banner / motd (free text before the first prompt). Captured into `sshFindings.shellPrompt`. +- The shell prompt format (`$`, `#`, vendor menu, custom PS1). The prompt format dictates the `Receive` regex shape in `ssh-interactive`. +- Whether a banner runs *between* connect and the prompt — that affects whether the script needs an initial banner-flushing `Receive`. + +### `batch-mode` — does `ExecuteCommand`-style work? + +`ssh @ 'echo OK; id'` and check whether stdout returns cleanly without a PTY. If yes, `ssh-batch` is viable; if the target rejects it (`PTY allocation request failed`, vendor CLI that requires a terminal), only `ssh-interactive` is viable. Captured into `sshFindings.batchModeSupported`. + +### `sudo` — escalation behavior + +Read-only probes only: `sudo -n true` (does not prompt) and `sudo -l` (lists permissions). Captured into `sshFindings.sudoBehavior`. + +A probe that runs a privileged command (`sudo something-that-mutates`) is **destructive** and requires per-probe consent. + +### `password-change` — which command path? + +Read-only: which password-change tooling is available (`which passwd chpasswd`), and whether the account is self-managed vs service-managed. Identifying the tool is read-only. + +Actually rotating the service account's password to validate the workflow under test is covered by the contract item 2 exception: announce the intent up front, proceed without per-probe consent. *Other* destructive password operations — rotating a non-service-account, changing a password on a different host — remain destructive probes requiring per-probe consent. + +When announcing the service-account rotation, present the one-line summary explicitly: *"I'll rotate the service account password during this iteration to exercise the workflow; capture the new value if you need it for re-auth."* + +Captured into `sshFindings.passwordChangeCommand`. + +## HTTP playbook + +Categories, all `read-only` by default. Each maps to a `probeRecord.category` value (schema lines 156–167) and contributes to `httpFindings` (schema lines 233–258). + +### `auth-scheme` — what does the server want? + +`curl -i /` (without credentials) and inspect: + +- Status code (typically 401 for API endpoints, 30x with `Location: /login` for form-fill targets). +- `WWW-Authenticate` header — distinguishes Basic from Bearer/OAuth-style challenges. +- Body content type (JSON error vs HTML login page). + +Captured into `httpFindings.authScheme`. + +### `login-form` — when there is no API + +GET the login page and read the rendered HTML. Extract: form `action` URL, field names (username/password and any hidden fields), CSRF tokens, redirect chain on submission. Read-only; does not require credentials. Captured into `httpFindings.loginForm`. + +### `cookie` — session shape + +GET → POST a single round-trip with the service-account credential (still subject to the rate limit), inspect the `Set-Cookie` headers and whether subsequent calls succeed without re-auth. The `POST` to a login endpoint is read-only by intent — it does not mutate target state in the sense the contract guards against — but it counts toward the auth-attempt rate cap. Captured into `httpFindings.cookieBehavior`. + +### `api-discovery` — what endpoints exist? + +GET against documented endpoints from vendor docs (see [`docs/agent-reference/vendor-doc-search-recipes.md`](../../../docs/agent-reference/vendor-doc-search-recipes.md)) for user lookup, password change, key rotation. Confirm the operations the script will need actually exist and what they require. + +Do **not** speculatively POST/PUT/DELETE against undocumented endpoints — that is destructive (see contract item 2). Captured into `httpFindings.apiDiscovery`. + +## Halt signals + +The skill stops probing and sets `probeRun.haltedReason` (schema lines 92–103) when any of these occur: + +- HTTP `429 Too Many Requests`, vendor-specific throttle headers (`Retry-After`, `X-RateLimit-Remaining: 0`). +- Lockout indicators: HTTP `423 Locked`, body text matching `account locked`/`account disabled`, SSH connection close immediately after username. +- MFA challenges: HTTP body containing a one-time-password prompt, SSH server prompting for `Verification code:` after the password. +- Operator says stop. + +After a halt, the agent does not retry the same probe. It surfaces the halt to the operator and waits for guidance. + +## Output handoff + +When probing concludes: + +1. Validate the evidence artifact against [`.agents/schemas/evidence.schema.json`](../../../.agents/schemas/evidence.schema.json) before handing it off (any JSON Schema validator works; the schema is draft-07). +2. Pass the artifact to [`strategy-selection`](../strategy-selection/SKILL.md). That skill is the next stop, not `script-authoring` — pattern selection happens with vendor docs + evidence in one place, not piecemeal. +3. Save the artifact alongside the workflow's other working files so a future iteration can re-read it without re-probing. + diff --git a/.agents/skills/task-log-analysis/SKILL.md b/.agents/skills/task-log-analysis/SKILL.md new file mode 100644 index 0000000..39db692 --- /dev/null +++ b/.agents/skills/task-log-analysis/SKILL.md @@ -0,0 +1,112 @@ +--- +name: task-log-analysis +description: >- + Use when an operation has run and produced an extended task log that + must be classified and turned into a next step. Pulls or accepts the + extended task-log JSON, classifies the failure phase + (connect / auth / parse / operation / unknown), extracts actionable + signals, and recommends the next iteration. Backed by the + failure-pattern catalog at docs/agent-reference/failure-patterns.md, + which ships empty and is grown only from real runs. +--- + +# task-log-analysis + +## Pre-flight + +Before turning a task log into a fix, consult [`AGENTS.md`](../../../AGENTS.md) for the iterative debug-loop budget (3 same-signature failures or 10 total iterations, whichever first). Each iteration must produce a changed draft; if this skill cannot articulate what changed since the prior log, escalate early instead of grinding. + +## Scope + +This skill takes an extended task log produced by SPP and turns it into a structured next step: + +1. Pull the log (full-loop) or accept a saved JSON file (author-only). +2. Classify the failure phase: `connect | auth | parse | operation | unknown`. +3. Extract the actionable signal (the first mismatch-class entry, the offending `Send`/`Receive` pair, the failing HTTP status code, etc.). +4. Recommend the next iteration: which skill to re-engage, which assumption to test, which probe to re-run. + +It is the only skill that is allowed to read raw task-log JSON and surface conclusions; other skills request analysis through this one. + +## Modes + +- **full-loop** — fetches the log live via [`safeguard-ps-operations`](../safeguard-ps-operations/SKILL.md) (`Get-SafeguardTaskLog -TaskId ` under the hood; the dev-loop wrapper already does this in its `log` phase). +- **author-only** — accepts a task-log JSON file the operator saved earlier and reads it from disk. Useful for retrospective analysis with no appliance available. +- **probe-only** — fails closed. + +## Inputs + +The skill consumes the JSON document produced by [`tools/Invoke-PlatformDevLoop.ps1`](../../../tools/Invoke-PlatformDevLoop.ps1), or any `Get-SafeguardTaskLog` result the operator saved as JSON. The relevant fields are documented with real-output examples in [`tools/README.md`](../../../tools/README.md) ("phases[2] (trigger) data" and "phases[3] (log) data"). + +Two layers of evidence matter: + +- **`phases[2].data.taskLog`** — the structured array (`Timestamp`, `Status`, `Message`) that `safeguard-ps` attaches to `Ex.SafeguardLongRunningTaskException` on a trigger failure. The first non-`Queued`/`Running` `Status` value usually pins the failure phase; the last entry is the user-visible summary. +- **`phases[3].data.log`** — the per-named-log entries (`Recorded`, `Level`, `Event`) from `Get-SafeguardTaskLog`. Sections are separated by synthetic `--- ---` entries inserted by safeguard-ps. The two log names produced by SPP for platform tasks are stable string constants `Operation` and `SshCommunication` (cited in [`tools/README.md`](../../../tools/README.md) "phases[3] data"; defined in `Hercules\Source\Rsms.Public\Constants\Logging.cs:14-15`). + +Read both. The `Operation` log shows what the platform script intended; `SshCommunication` (when present) shows the raw frames so a `Send`/`Receive` mismatch becomes diagnosable. + +### Fetching a task log directly: `Get-SafeguardTaskLog` parameter shape + +Two non-obvious facts about the cmdlet: + +- **No-args vs `-TaskId` return entirely different shapes.** With no arguments, `Get-SafeguardTaskLog` returns a flat array of recent task-ID GUID strings across **all** tasks the session can see — a discovery call, not a log-fetching call. With `-TaskId ` it returns the actual `{Recorded, Level, Event}` records for that task. +- **Section headers come through with empty `Level`.** The synthetic `--- ---` separator entries SPP inserts between named logs (`Operation`, `SshCommunication`) carry empty `Level`. Treat any record whose `Level` is empty as a section delimiter, not as a real log event, and use it to know which named log the surrounding records belong to. + +When the operator only has an asset/account and no task GUID, do not enumerate every recent task ID and parse JSON looking for the account name — ask the operator to re-trigger with `-ExtendedLogging`. The trigger output emits the new GUID directly. + +## Classification flow + +| Phase | What it means | Where it shows up | +| --- | --- | --- | +| **connect** | Could not establish the underlying transport. SSH: TCP reset, host-key mismatch, timeout. HTTP: DNS/TLS/connect-refused. | Earliest entries in `Operation` or `phases[2].data.taskLog`. SSH-side host-key issues surface as `SshHostKeyMismatch` `Status`. | +| **auth** | Transport up, credentials rejected. SSH: `Permission denied`, `passwd`-prompt-after-banner. HTTP: 401/403, login-form re-presentation. | Look for `PasswordMismatch`, `SshKeyMismatch`, `ApiKeyMismatch` `Status` values (from the stable `TaskStatus` enum, see [`tools/README.md`](../../../tools/README.md) "phases[2] data" closing paragraph), or HTTP status codes in `Operation` events. | +| **parse** | Connection and auth succeeded but a `Receive`/`ExtractJsonObject`/`ExtractFormData`/regex did not match what came back. | `Operation` shows the script proceeding past auth; the failure event is a parse/regex error, often with the buffer contents inline. | +| **operation** | The script ran, the target accepted it, but the action did not produce the desired state (password not actually changed, account not found by discovery, etc.). | The trigger may even report success; the follow-up `CheckPassword` then mismatches. Compare across two task logs in this case. | +| **unknown** | Nothing above fits. | Stop and ask the operator. Do not invent a category. | + +The first three buckets are mutually exclusive; `operation` can co-occur (e.g., auth succeeded but operation failed because the account does not exist). + +## Signal extraction + +For each failure, surface to the operator (and to whichever skill is taking the next step): + +- The classified phase. +- The exact `Status` and `Message` of the first mismatch-class entry. +- The exact `Event` text of the last `Operation` entry before the failure. +- For SSH parse failures: the corresponding `SshCommunication` `Send` and `Receive` pair (look for adjacent `Send :` / `Receive :` events bracketing the failure timestamp). +- For HTTP failures: the `Verb`, `Url`, response status code, and (when present) response body excerpt. +- The script changes, if any, that distinguish this iteration from the previous one. If the answer is "no change", that is itself the signal — the loop is stuck and should escalate. + +Do not paraphrase the messages; quote them verbatim. The catalog below matches on substrings. + +**Catch-block masking rule.** When the operation returns a clean verdict (e.g., `PasswordMismatch`, `CheckResult: false`, a sentinel error string) and the log shows a `Try`/`Catch` fired earlier in the operation, the verdict is the catch's fallback value, **not** the target's answer. Read the caught exception text from the inner `Operation` events (typically `Exception evaluating expression …`, `Command X failed with an error …`, or `An error was thrown in the try block …`) before drawing target-side conclusions. Verdict-shaped script bugs are the failure mode this rule guards against. + +## Failure-pattern catalog + +The catalog is [`docs/agent-reference/failure-patterns.md`](../../../docs/agent-reference/failure-patterns.md). It **ships empty by design**. Rows are added only from real extended task logs; rows mined from prose guides or invented from memory are explicitly not acceptable. + +When the catalog is empty, this skill falls back to the classification flow above and asks the operator for guidance on signatures it has not seen before. When it has rows, this skill matches the extracted signature substring against the `signature` column and surfaces the recommended fix; an exact-or-near match shortens the next iteration to a targeted change. + +If a real failure is observed that the catalog does not cover, the agent **proposes a new row** at the end of the loop — signature, phase, likely cause, recommended fix, first-observed date and target type — and asks the operator to confirm before adding it. Confirmation lives outside this skill (the row is added to `failure-patterns.md` by hand or in a follow-up commit). + +## Recommendation routing + +Hand the result back to the right skill: + +- `connect` failures with host-key / SSL mismatches → re-run probes via [`target-probing`](../target-probing/SKILL.md) to capture the new fingerprint, then update the script. +- `connect` / `auth` failures with credential issues → confirm the service-account credential with the operator before any further appliance call. Do not retry blindly (credential lockout risk; the probe-safety contract in [`target-probing`](../target-probing/SKILL.md) applies even outside probing). +- `auth` failures with HTTP 401 after a token exchange → revisit [`script-authoring`](../script-authoring/SKILL.md) for the `http-api` two-step token-handling shape; common cause is reusing a single `RequestObjectName` across token-fetch and operation calls. +- `parse` failures → revisit `script-authoring`; cross-reference the failing `Receive`/regex against the analogous sample. +- `operation` failures (target accepted but state did not change) → revisit [`strategy-selection`](../strategy-selection/SKILL.md). The wrong pattern may have been chosen (e.g., interactive `passwd` over batch SSH succeeds visibly but does not actually rotate). +- `unknown` → stop and ask. + +## Escalation + +This skill never silently retries. When the loop budget is exhausted or two consecutive iterations produce the same signature without a meaningful script change, surface to the operator: + +- The full classification result. +- Every signature seen in this loop. +- The set of changes attempted between iterations. +- A specific, narrow next question (e.g., *"Run `passwd -S ` on the target and paste the output — the task log shows the change command exited 0 but `CheckPassword` still mismatches, which is consistent with the account being locked"*). + +This is the loop-budget backstop from [`AGENTS.md`](../../../AGENTS.md): the desktop operator is the final arbiter; do not fabricate progress. + diff --git a/.github/workflows/agent-corpus.yml b/.github/workflows/agent-corpus.yml new file mode 100644 index 0000000..2ee7e82 --- /dev/null +++ b/.github/workflows/agent-corpus.yml @@ -0,0 +1,46 @@ +name: Agent Reference Corpus + +on: + push: + branches: [main, feature/agent-skills] + paths: + - 'AGENTS.md' + - '.agents/**' + - 'docs/agent-reference/**' + - 'samples/**' + - 'templates/**' + - 'tools/Build-SamplesIndex.ps1' + - 'tools/Test-AgentLinks.ps1' + - '.github/workflows/agent-corpus.yml' + pull_request: + branches: [main] + paths: + - 'AGENTS.md' + - '.agents/**' + - 'docs/agent-reference/**' + - 'samples/**' + - 'templates/**' + - 'tools/Build-SamplesIndex.ps1' + - 'tools/Test-AgentLinks.ps1' + - '.github/workflows/agent-corpus.yml' + +jobs: + link-validity: + name: Agent link validity + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Validate agent-facing relative links + shell: pwsh + run: ./tools/Test-AgentLinks.ps1 + + samples-index-freshness: + name: Samples index freshness + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Verify samples-index.md is up to date + shell: pwsh + run: ./tools/Build-SamplesIndex.ps1 -CheckOnly diff --git a/.gitignore b/.gitignore index e0d3abb..b9a0534 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,10 @@ # Plan file (session artifact, not part of the repo) plan.md +# Agent-skills authoring scaffolding (working notes, not deliverables) +agent-skills-plan.md +.agents/prompts/ + # OS artifacts .DS_Store Thumbs.db diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..a1e935e --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,139 @@ +# AGENTS.md — SafeguardCustomPlatform + +Repository for Safeguard for Privileged Passwords (SPP) custom platform scripts. The repo holds the JSON schema (`schema/`), tested reference samples (`samples/`), pattern templates (`templates/`), human-facing documentation (`docs/`), and tooling (`tools/`) for authoring custom platforms. + +This file is the agent orchestrator. Read it first. + +## Project structure + +- `AGENTS.md` (this file) — orchestrator: workflows + routing table. +- `.agents/` — `skills/` (one subdirectory per capability skill, each with a `SKILL.md`), `schemas/evidence.schema.json` (probing-evidence contract), `prompts/` (per-phase implementation prompts), `CONVENTIONS.md`. +- `schema/custom-platform-script.schema.json` — authoritative platform-script schema. +- `samples/` — tested production-grade samples (ssh, http, telnet). +- `templates/` — pattern templates and minimal starters. +- `docs/agent-reference/` — machine-first reference (samples-index, strategy-decision-tree, failure-patterns, vendor-doc-search-recipes). `docs/concepts|guides|reference|tutorials|quick-start/` are human-facing — keep untouched. +- `tools/` — `Invoke-PlatformDevLoop.ps1` (validate→import→trigger→log wrapper), `Build-SamplesIndex.ps1`, `Test-AgentLinks.ps1`, `TestTool.ps1` (human-facing). + +## Custom platform mental model + +A Safeguard custom platform script is a JSON document that teaches SPP how to manage credentials on a target system that SPP does not natively support. The script declares operations (`CheckSystem`, `CheckPassword`, `ChangePassword`, `DiscoverAccounts`, …), each defined as a list of scripting-engine commands (`Connect`, `Send`, `Receive`, `ExecuteCommand`, `Request`, `HttpAuth`, `ExtractJsonObject`, …). + +SPP runs the script against an asset and an account: the asset supplies network address and a service-account credential; the account is the managed identity whose credential is being checked or rotated. The script returns a status (success, failure, error) and, in extended-logging mode, a structured task log describing each command's input and output. + +Authoring a custom platform is iterative: draft the JSON, validate it against the schema, import it into a test appliance, trigger an operation with extended logging, read the task log, fix the script, repeat. The agent skill system is built around making this loop fast and grounded in real evidence. + +## Operating modes + +The agent declares the active mode at the start of every session. Each skill declares the modes it supports and **fails closed** when invoked outside them. + +| Mode | What is available | What works | +| --- | --- | --- | +| `author-only` | Repo only. No SPP appliance, no live target. | Schema validation, sample/template lookup, JSON drafting, strategy selection, log analysis from saved files. | +| `probe-only` | Operator's shell can reach the target with a service-account credential, but no SPP appliance. | Authoring + target probing. Live import/trigger does not run. | +| `full-loop` | Appliance + target both available. | The full validate → import → trigger → log-analyze loop. | + +If the agent is unsure which mode applies, it asks the operator before proceeding. + +## Question discipline + +The agent's default posture is **act, then ask only when blocked**. Every up-front question costs operator time; the iterative debug loop already expects course-correction, so a wrong-but-recoverable choice usually beats a question. Concretely: + +- **Ask only what is required to take the next action.** Do not pre-collect facts the next step doesn't yet need. Probing rarely needs the operator's deployment topology; authoring rarely needs port numbers. +- **Try, then ask on failure.** When two paths exist (secure vs `-Insecure`, module-installed vs not, asset-exists vs not), pick the safer/more-common path and try it. Ask only when the attempt errors out and the next step depends on the answer. +- **Ask for the service-account credential up front, with the credential kind first.** Gather what kind of credential the operator wants the platform to use — password, SSH key, API key, bearer token — *before* asking for the secret value, because the kind shapes everything else (probing technique, strategy selection, script auth shape). Ask for both in the same turn as the rest of requirements gathering, with a one-line note that the operator can rotate the secret after the workflow is done. Don't trade multiple turns for "do you have a credential / how would you like to provide it / …". +- **Treat `nonProductionAffirmed=true` as license to exercise the operations under test.** Once the operator affirms non-prod, the service account *is* a test account. The agent may run `CheckPassword`/`ChangePassword` against it as part of the workflow, with an up-front announcement that the password will be rotated as part of exercising `ChangePassword`. This is an announcement, not a per-probe consent gate. The probe-safety contract's destructive-probe rule still applies to operations beyond the service-account-on-this-target's password (key installs, account creation, sudo-that-mutates, etc.). +- **Do not ask "is this tool installed?".** Check first; if missing, ask once whether to install (e.g., `Install-Module safeguard-ps -Scope CurrentUser` from PowerShell Gallery, latest version). +- **Do not ask "does this asset exist?" on the new-platform workflow.** It cannot — the platform is new. Asset/account lookup is part of the enhance-platform workflow only. +- **Do not ask "are you logged in yet?".** `Connect-Safeguard -DeviceCode` (or `-Browser`) blocks until login completes (or fails); await the cmdlet's own signal rather than polling the operator. The token is long-lived — persist it once to the per-session state directory and rehydrate it for subsequent cmdlets, so re-login is rare. See `safeguard-ps-operations` for the persistence shape. +- **When yielding to the operator, surface what they need to act on.** The flip side of *act, then ask only when blocked*: when the next step requires the operator — device-code login, browser SSO, secret entry, a manual probe the agent cannot run — say what they need to do, where to look, and any time window, before yielding. A blocked operator who doesn't know they are blocked is the canonical silent iteration burn. + +If the agent finds itself drafting a third clarifying question before any tool has run, that is the signal to stop, pick a default, and try. + +## Notation: PowerShell vs API vs concept + +See [`.agents/CONVENTIONS.md`](.agents/CONVENTIONS.md). Short version: `AGENTS.md` speaks concept (plain English); skills speak PowerShell (backticked cmdlets/switches) or API (backticked PascalCase fields). + +## Authentication and safety + +- **Use PowerShell 7+ when available; warn and continue otherwise.** safeguard-ps targets PS 7, and several cmdlets behave better there (cleaner error records, no Windows-PowerShell-only quirks). If only PS 5.1 is available, the agent emits a one-line warning and proceeds — do not block on the version. +- **Connect with `-DeviceCode` (preferred) or `-Browser` (fallback).** All `safeguard-ps` connections in agent flows use interactive PKCE — no password-in-script recipes. `-DeviceCode` prints a verification URL and short code rather than launching a local browser, which is the lower-friction default for terminal sessions. Fall back to `-Browser` only if the appliance does not have the Device Code grant enabled. +- **Never operate against a production target.** The operator must affirm the target is non-production before any probe or trigger runs. The affirmation is a soft control — responsibility rests with the operator. The agent does not (and cannot) verify environment classification independently. +- **Never log session tokens or secrets.** `$SafeguardSession`, target passwords, API keys, and private keys must not appear in evidence files, status messages, or operator-visible output. +- **Probe-safety contract.** The `target-probing` skill enforces a strict contract (read-only by default, per-probe consent for destructive probes beyond the service account, auth-attempt rate limit, fail-closed on lockout/throttle/MFA). See [`.agents/skills/target-probing/SKILL.md`](.agents/skills/target-probing/SKILL.md) for the full contract. Rotating the service account password as part of the workflow under test is announced up front but does not require per-probe consent (see *Question discipline*). +- **`SchemaOnly` is not a correctness signal.** Local schema validation only proves the JSON is well-formed and conformant. It does not catch undefined variables in `Do` blocks, regex that does not match in practice, or status-message ordering. Cross-reference samples for analogous patterns before treating green as ready-to-import. + +## Sample and template index + +The agent-facing index of every sample and template lives at [`docs/agent-reference/samples-index.md`](docs/agent-reference/samples-index.md) (generated by `tools/Build-SamplesIndex.ps1`; CI fails if stale). Use it to find a starting point by `(protocol, auth-scheme, operations)`. Telnet samples are excluded — telnet is out of scope for the agent skill system. Other agent-reference material lives alongside it in [`docs/agent-reference/`](docs/agent-reference/) (decision tree, failure-pattern catalog, vendor-doc search recipes). + +## Workflow: new platform + +Use this workflow when the operator's request is to build a custom platform that does not yet exist in the appliance. + +1. **Gather requirements.** Classify intent (new vs enhance — this workflow is *new*), then collect what is missing: + - **Platform display name** the operator wants the new platform to use on the appliance (e.g., `My Custom Linux`). + - Target system (vendor, product, version) and protocol (SSH or HTTP — telnet is out of scope). + - Operations needed (`CheckSystem`, `CheckPassword`, `ChangePassword`, optionally `DiscoverAccounts`). + - **Credential intent** — self-managed (the managed account rotates its own password) vs service-account (a separate account rotates the managed one). + - **Service-account credential** — kind first (`password` / `ssh-key` / `api-key` / `bearer-token`), then the secret value or path. Per *Question discipline*, ask in this turn so probing isn't blocked later. + - Any vendor documentation the operator can share (URL the agent fetches, or an excerpt pasted into the conversation — both first-class). + Ask only what is missing. Do not re-ask for facts the operator already provided. +2. **Search samples-index + vendor docs.** Look up a starting point in [`docs/agent-reference/samples-index.md`](docs/agent-reference/samples-index.md) by `(protocol, auth-scheme, operations)`. If vendor docs are needed, use [`docs/agent-reference/vendor-doc-search-recipes.md`](docs/agent-reference/vendor-doc-search-recipes.md). +3. **Probe the target.** Hand off to [`target-probing`](.agents/skills/target-probing/SKILL.md). The skill enforces its own probe-safety contract and produces an evidence artifact conforming to [`.agents/schemas/evidence.schema.json`](.agents/schemas/evidence.schema.json). In `author-only` mode this step is skipped and the workflow proceeds with whatever the operator can supply by hand. +4. **Select a strategy.** Hand off to [`strategy-selection`](.agents/skills/strategy-selection/SKILL.md) with the probe evidence (or the operator-supplied substitute) and any vendor docs. Output: one of the four authoring patterns plus credential-intent and self-managed-vs-service-account. +5. **Author the JSON.** Hand off to [`script-authoring`](.agents/skills/script-authoring/SKILL.md). The skill mandates the fast inner loop: local schema validation against [`schema/custom-platform-script.schema.json`](schema/custom-platform-script.schema.json) before any appliance round-trip. +6. **Validate, import, and trigger.** Hand off to [`safeguard-ps-operations`](.agents/skills/safeguard-ps-operations/SKILL.md), which prefers [`tools/Invoke-PlatformDevLoop.ps1`](tools/Invoke-PlatformDevLoop.ps1) over re-implementing the loop. Create the asset and account directly without a pre-check (the platform is new). Trigger with extended logging enabled. Requires `full-loop` mode. +7. **Analyze the task log.** Hand off to [`task-log-analysis`](.agents/skills/task-log-analysis/SKILL.md). +8. **Enter the iterative debug loop** (below) until green or the loop budget triggers escalation. + +## Workflow: enhance platform + +Use this workflow when the operator wants to change a platform that is already deployed on the appliance. + +1. **Gather requirements.** What operation is changing, what new behavior is expected, what existing behavior must not regress. Ask only what is missing. +2. **Source the current JSON via export.** Run `Export-SafeguardCustomPlatformScript` against the appliance (via [`safeguard-ps-operations`](.agents/skills/safeguard-ps-operations/SKILL.md)). **The deployed copy is authoritative for the diff.** On-disk samples in `samples/` are starting points — drift between the deployed JSON and any sample is expected and benign. +3. **Diff-aware authoring.** Hand off to [`script-authoring`](.agents/skills/script-authoring/SKILL.md) with the exported JSON as the base. Limit the change set to what the requirement demands. The fast inner loop (local schema validation) still runs before any appliance round-trip. +4. **Validate, import, and trigger only operations affected by the change.** A `ChangePassword` edit does not require re-testing `DiscoverAccounts`. +5. **Analyze the task log** for each affected operation via [`task-log-analysis`](.agents/skills/task-log-analysis/SKILL.md). +6. **Enter the iterative debug loop** (below) until green or the loop budget triggers escalation. + +## Iterative debug loop + +Both workflows enter this loop after the first trigger. The loop is the same in both cases. + +1. **Try manually first** (when probe-only or full-loop is available). Reproduce the operation against the target with the service-account credential before changing the JSON. If the manual attempt fails, the JSON is not the right thing to fix yet — re-probe. +2. **Draft or revise the JSON** via [`script-authoring`](.agents/skills/script-authoring/SKILL.md). +3. **Fast inner loop:** local schema validation (`Invoke-PlatformDevLoop.ps1 -SchemaOnly`). Sub-second; no appliance contact. Iterate here until clean before paying for a round-trip. +4. **`Test-SafeguardCustomPlatformScript`** against the appliance via [`safeguard-ps-operations`](.agents/skills/safeguard-ps-operations/SKILL.md). This catches things local schema validation cannot. +5. **Import** the script. +6. **Trigger** the affected operation with extended logging enabled. +7. **Analyze the task log** via [`task-log-analysis`](.agents/skills/task-log-analysis/SKILL.md). Decide: green, revise, or escalate. + +### Loop budget (best-effort) + +Stop and escalate to the operator when **either** of these is true: + +- **3 failures share the same error signature.** Repeated identical failures mean the current hypothesis is wrong, not that one more tweak will work. The classification produced by [`task-log-analysis`](.agents/skills/task-log-analysis/SKILL.md) is what defines "same signature." +- **10 total iterations** have run, whichever comes first. + +Two reinforcing rules: + +- **Each iteration must produce a changed draft.** If the agent cannot articulate what changed since the prior iteration in one sentence, escalate early — looping with no real change is the most expensive failure mode. +- **On operator correction, stop the current tactic.** Before responding or retrying, restate the active skill and the specific rule the correction invoked (e.g., "script-authoring — fast inner loop runs before any appliance round-trip"). Then resume. Arguing the correction or pivoting to a new tactic without re-grounding is how a single misread compounds into three wasted iterations. +- **The counter is not persisted.** Context compaction or shell restart resets it. The desktop operator is the backstop: if the operator notices the loop has restarted twice on the same problem, that is the signal to escalate regardless of the in-memory counter. + +## Routing table + +The five capability skills. Each `SKILL.md` opens with a pre-flight pointer back to this file; that is convention, not enforcement. + +| Skill | When to load | Modes | File | +| --- | --- | --- | --- | +| `target-probing` | The agent must learn how a live target actually behaves before authoring or revising — banner grab, auth-scheme detection, prompt shape, sudo behavior, login-form/API discovery. Produces the structured evidence artifact consumed by `strategy-selection` and `script-authoring`. | `probe-only`, `full-loop` | [`.agents/skills/target-probing/SKILL.md`](.agents/skills/target-probing/SKILL.md) | +| `strategy-selection` | A pattern decision is needed: `ssh-interactive` vs `ssh-batch`; `http-form-fill` vs `http-api`; within `http-api`, the auth shape (HttpAuth-managed Basic/Digest vs script-managed-header Bearer/custom-scheme/custom-header) and one-step vs two-step token fetch; password vs SSH key vs API key vs bearer token; self-managed vs service-account. Accepts probe evidence, fetched URLs, and pasted vendor-doc excerpts. | `author-only`, `probe-only`, `full-loop` | [`.agents/skills/strategy-selection/SKILL.md`](.agents/skills/strategy-selection/SKILL.md) | +| `script-authoring` | Drafting or revising the platform JSON. Four pattern recipes (`ssh-interactive`, `ssh-batch`, `http-api`, `http-form-fill`). The `http-api` recipe covers any auth shape the API documents — Basic/Digest via `HttpAuth`, or Bearer / custom `Authorization` scheme / custom-header API key via script-built `Headers`. Mandates the fast inner loop (local schema validation) before any appliance round-trip. | `author-only`, `probe-only`, `full-loop` | [`.agents/skills/script-authoring/SKILL.md`](.agents/skills/script-authoring/SKILL.md) | +| `safeguard-ps-operations` | Driving a live SPP appliance through `safeguard-ps`: `Connect-Safeguard -DeviceCode` (or `-Browser`), `Test-` / `Import-` / `Export-SafeguardCustomPlatformScript`, asset/account create-or-update, triggering operations with extended logging, fetching task-log JSON. Wraps [`tools/Invoke-PlatformDevLoop.ps1`](tools/Invoke-PlatformDevLoop.ps1). All cmdlet syntax must come from `Get-Help -Full`. | `full-loop` (most operations); `author-only` for `Test-` and `Export-` against a local file | [`.agents/skills/safeguard-ps-operations/SKILL.md`](.agents/skills/safeguard-ps-operations/SKILL.md) | +| `task-log-analysis` | An operation has run and produced an extended task log that must be classified (`connect` / `auth` / `parse` / `operation` / `unknown`) and turned into a next step. Backed by [`docs/agent-reference/failure-patterns.md`](docs/agent-reference/failure-patterns.md), which ships empty and grows only from real runs. | `full-loop` (live log); `author-only` (saved JSON file) | [`.agents/skills/task-log-analysis/SKILL.md`](.agents/skills/task-log-analysis/SKILL.md) | + +## Keeping this file current + +When skills, workflows, modes, the loop budget, or `docs/agent-reference/` paths change, update this file in the same change. Do not let the routing table drift from the skills it points at. diff --git a/docs/agent-reference/README.md b/docs/agent-reference/README.md new file mode 100644 index 0000000..dca7032 --- /dev/null +++ b/docs/agent-reference/README.md @@ -0,0 +1,20 @@ +[← Repository README](../../README.md) + +# Agent reference corpus + +This directory holds machine-first reference material for the agent skill system. It is cited from `AGENTS.md` and from individual `SKILL.md` files under `.agents/skills/`. + +Human-facing documentation lives in `docs/concepts/`, `docs/guides/`, `docs/tutorials/`, `docs/reference/`, and `docs/quick-start/` and is not duplicated here. Agent-only content stays out of human-facing locations. + +## Files + +| File | Purpose | How it is maintained | +| --- | --- | --- | +| [`samples-index.md`](samples-index.md) | Normalized index of every sample and template (protocol, auth-scheme, operations, OS-family, file-path, README). | **Generated** by `tools/Build-SamplesIndex.ps1`. CI runs the same script with `-CheckOnly` and fails if the committed copy is stale. | +| [`strategy-decision-tree.md`](strategy-decision-tree.md) | Decision table that backs the `strategy-selection` skill (SSH and HTTP only). | Hand-maintained from `docs/guides/`. SSH and HTTP only. | +| [`failure-patterns.md`](failure-patterns.md) | Error-signature → likely cause → fix catalog used by `task-log-analysis`. | **Empty in Phase 1.** Rows are populated from real extended task logs in Phase 5/F. Invented rows are not acceptable. | +| [`vendor-doc-search-recipes.md`](vendor-doc-search-recipes.md) | Query templates for fetching vendor docs and a normalization recipe for pasted vendor-doc excerpts. | Hand-maintained. | + +## Related contracts + +- `.agents/schemas/evidence.schema.json` — JSON Schema for the probing evidence artifact produced by the `target-probing` skill and consumed by `strategy-selection` and `script-authoring`. This is an internal agent contract and is deliberately separate from `schema/custom-platform-script.schema.json`. diff --git a/docs/agent-reference/failure-patterns.md b/docs/agent-reference/failure-patterns.md new file mode 100644 index 0000000..f919b23 --- /dev/null +++ b/docs/agent-reference/failure-patterns.md @@ -0,0 +1,66 @@ +[← Agent reference](README.md) + +# Failure patterns + +This catalog maps real error signatures observed in extended task logs to likely causes and concrete next-step fixes. It backs the `task-log-analysis` skill. + +## Provenance rule + +Every row is grounded in (a) a captured extended task-log JSON or cmdlet response, **reproduced at least once to rule out transient artifacts like polling-timing races against unfinished async tasks**, and (b) a concrete fix that was applied and verified against the same target. Rows mined from prose guides, inferred from source, or invented from memory are explicitly not acceptable — they undermine the skill they support. + +When `task-log-analysis` encounters a signature this catalog does not cover, it falls back to the classification flow (connect / auth / parse / operation / unknown) and proposes a candidate row at the end of the loop for the operator to confirm. + +## Validate-phase errors + +Errors raised by `Test-SafeguardCustomPlatformScript` (and equivalent server-side import validation) are **not** extended task-log failures — they are caught before the script ever runs. They are catalogued separately so the strict provenance rule above is preserved for the trigger-time table. + +Each row is grounded in a real `Test-SafeguardCustomPlatformScript` response captured during authoring. + +| signature | likely cause | recommended fix | +| --- | --- | --- | +| `Function 'X' expects N parameters, but is being called with M` | Caller is passing the wrong number of positional args to an imported (or local) function. The public docs at `docs/reference/imports.md` deliberately do not list signatures because the deployed appliance's view can drift from any external reference. | Grep `samples/` for `"Name": "X"` and copy the `Parameters` array from a working call site that imports the same library. If no sample exercises the call, attempt the call with a guess and read the appliance's `expects N` error literally — the appliance is authoritative for its own deployed signature. Order in the array is positional; calls do not name their parameters. | +| `60020: Invalid custom platform script. Reason: Platform definition is not valid JSON: Specified argument was out of the range of valid values.` (returned by `Test-SafeguardCustomPlatformScript` and `Import-SafeguardCustomPlatformScript` when the script body parses cleanly as JSON locally) | The appliance's script deserializer rejects a top-level `"$schema"` field even though the JSON Schema at `schema/custom-platform-script.schema.json` lists `$schema` as a permitted property. Adding `"$schema": "..."` for editor IntelliSense breaks server-side validation; local schema validation does not catch it because the field is schema-legal. | Remove the top-level `"$schema"` property before invoking `Test-` or `Import-`. Keep IntelliSense via the editor's JSON-schema mapping (`json.schemas` in VS Code settings, or a sibling `*.schema.json` reference) instead of an inline declaration. | + +## Asset-onboarding errors + +Errors raised by `New-SafeguardCustomPlatformAsset` (and equivalent appliance APIs that create or onboard a custom-platform asset) — these happen **before** any task runs, so the script itself never executes. The appliance is inspecting the platform's *shape* to decide what protocol it speaks and which onboarding steps to wire up. + +Each row is grounded in a real cmdlet response captured during authoring or onboarding. + +| signature | likely cause | recommended fix | +| --- | --- | --- | +| `60306: Platform does not support SSH authentication.` (returned by `New-SafeguardCustomPlatformAsset` when `-AcceptSshHostKey` or `-NoSshHostKeyDiscovery` is supplied for a custom platform whose script does not expose any SSH-recognized operation) | The appliance classifies a custom platform as "SSH-capable" by inspecting its operation set. If the script defines no operation that is recognized as SSH (notably `DiscoverSshHostKey`), the appliance refuses to onboard the asset under the SSH host-key flow and rejects with HTTP 400 / error 60306. Schema validation passes because the schema does not require `DiscoverSshHostKey` — this is a runtime classification rule, not a static one. | Add a `DiscoverSshHostKey` operation to the platform script (every SSH sample under `samples/ssh/` has one). Re-import the script, then retry asset creation. | +| `60307: Could not discover SSH host key: ... Failed to discover SSH host key for asset Custom Asset.` (returned by `New-SafeguardAsset -AcceptSshHostKey` and `Invoke-SafeguardAssetSshHostKeyDiscovery -AcceptSshHostKey`) **with no task log persisted on `safeguard-ps` < 8.4.3** (TaskLogs count unchanged) | Two compounding issues. (a) Before `safeguard-ps` 8.4.3, the cmdlet `Invoke-SafeguardAssetSshHostKeyDiscovery` did not expose an `-ExtendedLogging` switch even though the underlying `POST /v4/Assets/{id}/DiscoverSshHostKey` endpoint accepts `?extendedLogging=true`. Without it, host-key-discovery failures emitted only the surface 60307 error and persisted no task log, leaving the operator with nothing to diagnose. As of 8.4.3 the cmdlet has `-ExtendedLogging` natively and `tools/Invoke-PlatformDevLoop.ps1` enforces 8.4.3 as the floor. (b) Once extended logging is captured, the same 60307 surfaces a real Operation log that dies at `Executing WriteResponseObject component` if the script's `DiscoverSshHostKey` command sets a parameter the runtime does not honor on this code path (e.g., `SoftwareVersionVariableName: "GLOBAL:ServerSoftwareName"` — present in some custom drafts, absent from every on-disk SSH sample). The script-engine error is silently swallowed and re-emerges as the generic 60307. | For (a): upgrade with `Install-Module safeguard-ps -Scope CurrentUser -Force` to pick up 8.4.3+, then call `Invoke-SafeguardAssetSshHostKeyDiscovery -Asset -ExtendedLogging -AcceptSshHostKey`. Confirm a new entry appears in `Get-SafeguardTaskLog` (diff before/after counts; v1 GUIDs are not lexicographically time-ordered). For (b): drop `SoftwareVersionVariableName` from the `DiscoverSshHostKey` command and wrap it in a `Try`/`Catch` that returns `true` from the operation. Cross-reference `samples/ssh/*/...` — none of the on-disk SSH samples include `SoftwareVersionVariableName` on `DiscoverSshHostKey`. | + +## Discovery-trigger errors + +Errors raised by `Invoke-SafeguardAssetAccountDiscovery` (and equivalent appliance APIs that trigger account discovery on an already-onboarded asset) — these happen **before** the discovery script runs because the appliance has no schedule/rule wired to the asset to execute against. + +Each row is grounded in a real cmdlet response captured during authoring or onboarding. + +| signature | likely cause | recommended fix | +| --- | --- | --- | +| `60392: Account discovery is not configured for this asset.` (returned by `Invoke-SafeguardAssetAccountDiscovery` as HTTP 400 against a freshly-onboarded asset whose platform script defines `DiscoverAccounts`) | The trigger endpoint requires an **Account Discovery Schedule** with at least one discovery **Rule** to be attached to the asset before it will dispatch the script. `New-SafeguardCustomPlatformAsset` does not attach one by default, so the script never runs even though it is correct. | Create a schedule with `New-SafeguardAccountDiscoverySchedule -Name -DiscoveryType `. Valid `-DiscoveryType` values: `Unix`, `Windows`, `Directory`, `SPS`, `StarlingConnect`, `RoleBased` — pick the one that matches how your platform enumerates accounts. Build a rule object locally with the matching `New-SafeguardAccountDiscoveryRule` cmdlet (e.g. `New-SafeguardAccountDiscoveryRuleUnix -Name -FindAll`); these rule cmdlets take no appliance parameters and return an in-memory object. Attach the rule with `Add-SafeguardAccountDiscoveryRule -Schedule -RuleObject ` — use the `Object` parameter set only; passing `-RuleName` alongside `-RuleObject` selects the conflicting `Attributes` set and fails with "Parameter set cannot be resolved". Attach the asset with `Add-SafeguardAccountDiscoveryScheduleAsset -Schedule -AssetsToAdd ` and re-trigger. The trigger is asynchronous and returns `RequestStatus.State = "Accepted"` immediately; poll until the task completes before reading results with `Get-SafeguardDiscoveredAccount` — querying too early returns zero rows that look like a script failure but are just an unfinished task. Only the `Unix` path is verified end-to-end by this catalog; the other discovery types are documented from cmdlet introspection. | + +## Trigger-time errors (from extended task logs) + +| signature | phase | likely cause | recommended fix | +| --- | --- | --- | --- | +| `An error was thrown in the try block: "An interactive session is required"` (immediately after first `Send` on a successfully-opened SSH connection) | operation | The `Connect` command opened the SSH session without requesting a PTY, but the very first `Send` (or any subsequent `sudo` invocation against a target with `Defaults use_pty` in sudoers) requires an interactive shell. The Ubuntu 24.04 default sudoers sets `Defaults use_pty`, so any `sudo` will fail with this signature even if the initial bash session would have tolerated `Send` without a PTY. | Add `"RequestTerminal": "%RequestTerminal%"` (or `"RequestTerminal": true`) to every `Connect` command in the script, and declare a `RequestTerminal` parameter (`Type: Boolean`, `DefaultValue: true`) on every operation that opens a connection. The PTY allocation is a per-Connect setting, not a per-platform one. | +| `Sorry, try again.\r\nsudo: no password was provided\r\nsudo: 2 incorrect password attempts` (in a `Receive` buffer after a `(printf ... ) \| sudo -S ...` pipeline; the `Send` completed cleanly and `Connect` succeeded) | operation | Piping a password through a bash one-liner into `sudo -S` is brittle inside a PTY-allocated shell: the parent shell may strip or echo the password line in ways that defeat sudo's stdin read, and the only diagnostic ever printed is a generic "Sorry, try again". The pattern is also fragile to passwords containing shell metacharacters. The proven SSH password-rotation pattern in the repo never pipes the password — it sends `sudo passwd ` as a normal command, then walks through sudo's password prompt, the new-password prompt, and the retype-new-password prompt **as separate `Send`/`Receive` pairs**, with the password buffers marked `"ContainsSecret": true`. | Replace any `printf ... \| sudo -S ...` construct with the prompt-driven pattern from [`samples/ssh/generic-linux/GenericLinux.json`](../../samples/ssh/generic-linux/GenericLinux.json) lines 281–340 (`ChangeUserPassword`): `Send "sudo passwd ; echo CHGPASS=$?\n"`, then `Receive` the sudo prompt, `Send` `%FuncPassword%` (`ContainsSecret: true`, no surrounding quotes, no trailing `\n` — the appliance terminates secret sends itself), `Receive` `[Nn]ew.*[Pp]assword:`, `Send` `%NewPassword%`, `Receive` retype/new prompt, `Send` `%NewPassword%` again, `Receive` `CHGPASS=[0-9]+`. Capture the final buffer with `WriteResponseObject` so the task log shows the success/failure marker on the very next iteration. | +| `Error in component 'SetItem': invalid expression: Ambiguous match found for: 'Split'` (raised by Z.Expressions when evaluating any `%{ ... .Split(':') ... }%` interpolation) | parse | The script-engine's expression evaluator (Z.Expressions) is bound against a modern .NET base class library where `string.Split(...)` has multiple overloads (`Split(char)`, `Split(char[])`, `Split(string)`, etc.). When the operand is a single-character literal like `':'`, the evaluator cannot pick between `Split(char)` and `Split(char[])` and throws `Ambiguous match found for: 'Split'`. The result is that **any** ssh-batch script using `someString.Split(':')[N]` to slice a colon-delimited shadow line, `passwd` entry, or fstab field will fail at the first `SetItem` that interpolates it — even though the same expression is legal C# in isolation. | **First ask whether you need to split at all.** If the value is going into `CompareShadowHash.SaltedHash`, do **not** pre-split — the component handler splits the shadow line on `:` internally (verified in Hercules `Source/Hercules.WebService/Common/Crypt/PasswordHash.cs` `CheckPasswordAgainstShadowEntry`, and demonstrated by [`samples/ssh/generic-linux/GenericLinux.json`](../../samples/ssh/generic-linux/GenericLinux.json) line 236 which passes `%AccountEntry%` whole). When a split is genuinely required, replace `.Split('')[]` with a `Regex.Match`-based extraction: `%{ Regex.Match(, "^[^:]*:([^:]*):").Groups[1].Value }%` for the second colon-delimited field, parameterizing the pattern for the field index needed. `Regex.Match` has a single overload at the appliance and is unambiguous. The same applies to any other ambiguous `string` method (`Replace`, `IndexOf`) when called with a single-character literal. | +| Operation returns a clean verdict (e.g., `CheckResult: false`, `PasswordMismatch`) against a yescrypt-format `/etc/shadow` entry (`$y$j9T$...`) **after** a `Try`/`Catch` fired earlier in the operation. The catch's exception text — typically a Z.Expressions `Ambiguous match found for: 'Split'` from a `SetItem` that pre-extracted a hash field — is logged but the verdict surfaces unannotated. | operation (misdiagnosed) | The verdict is the **catch's fallback value, not the target's answer**. `CompareShadowHash` supports yescrypt (`$y$` prefix) via the `Yescrypt.IsYescrypt`/`Yescrypt.CheckPassword` branch in `PasswordHash.CheckPasswordAgainstHash`; the upstream `LinuxSshFunctions.json` import uses it on yescrypt-default Ubuntu/Debian systems. The actual root cause is a script-side bug in the path that runs **before** `CompareShadowHash` — most often pre-splitting the shadow line in a `SetItem` expression that hits the Split-overload-ambiguity row above, then a `Try`/`Catch` falls back to `auth-by-login` or returns a sentinel mismatch. Without reading the caught exception, the verdict looks like a target-side hash incompatibility. | Read the **caught exception text** from the operation log before drawing target-side conclusions — `script-authoring`'s "Catch blocks must log before falling back" rule applies. Then fix the real bug: pass `%AccountEntry%` (the whole shadow line, captured via `getent shadow `) directly to `CompareShadowHash.SaltedHash`. Do not pre-split. Mirror [`samples/ssh/generic-linux/GenericLinux.json`](../../samples/ssh/generic-linux/GenericLinux.json) lines 220–245: capture, compare whole, condition on `PasswordHashMatched`, and only fall back to a login-as-test pattern in a `Catch` block (and only when `getent` is genuinely unavailable, e.g., locked-down sudo). yescrypt is not the problem. | + + diff --git a/docs/agent-reference/samples-index.md b/docs/agent-reference/samples-index.md new file mode 100644 index 0000000..d963853 --- /dev/null +++ b/docs/agent-reference/samples-index.md @@ -0,0 +1,57 @@ +[← Agent reference](README.md) + +# Samples and templates index + +**Generated file. Do not edit by hand.** Regenerate with: + +```powershell +./tools/Build-SamplesIndex.ps1 +``` + +CI runs the same script with `-CheckOnly` and fails the build if the committed copy differs. + +## Conventions + +- **protocol** — derived from the directory (`samples/ssh/`, `samples/http/`) or the template filename (`Pattern-GenericLinux*` / `Pattern-WindowsSsh*` / `TemplateSsh*` → ssh; `Pattern-GenericHttp*` / `Pattern-GenericRestApi*` / `TemplateHttp*` → http). +- **auth-scheme** — best-effort from JSON content. HTTP: from `HttpAuth.Type`, an `Authorization: Bearer` header, an API-key-shaped custom header, or `ExtractFormData`. SSH: `Interactive` (Send/Receive), `Batch` (ExecuteCommand), or `Mixed`. Blank when undetermined. +- **operations** — intersection of top-level keys with the canonical operation list from `schema/custom-platform-script.schema.json`. Imports and user-defined functions never appear here. +- **OS-family** — intentionally blank. Phase 1 prefers blank over guessed values; revisit in a later phase if needed. +- **file-path** and **README** — filesystem facts. `—` means the field could not be determined. +- `samples/telnet/` is excluded — telnet is out of scope for the agent skill system. The samples remain in the repo for human reference. + +## Samples + +| protocol | auth-scheme | operations | OS-family | file-path | README | +| --- | --- | --- | --- | --- | --- | +| http | Form | CheckPassword, ChangePassword | — | [`samples/http/facebook/CustomFacebook.json`](../../samples/http/facebook/CustomFacebook.json) | [README](../../samples/http/facebook/README.md) | +| http | — | CheckSystem, CheckPassword, ChangePassword | — | [`samples/http/forgerock-openam/Forgerock_OpenAM.json`](../../samples/http/forgerock-openam/Forgerock_OpenAM.json) | [README](../../samples/http/forgerock-openam/README.md) | +| http | — | CheckSystem, CheckPassword, ChangePassword, EnableAccount, DisableAccount, DiscoverAccounts | — | [`samples/http/okta-discovery/Okta_WithDiscoveryAndGroupMembershipRestore.json`](../../samples/http/okta-discovery/Okta_WithDiscoveryAndGroupMembershipRestore.json) | [README](../../samples/http/okta-discovery/README.md) | +| http | Basic+Bearer | CheckSystem, ChangePassword, EnableAccount, DisableAccount, ElevateAccount, DemoteAccount | — | [`samples/http/onelogin-jit/OneLogin_GRC_JIT_addon.json`](../../samples/http/onelogin-jit/OneLogin_GRC_JIT_addon.json) | [README](../../samples/http/onelogin-jit/README.md) | +| http | Form | CheckPassword, ChangePassword | — | [`samples/http/twitter/CustomTwitter.json`](../../samples/http/twitter/CustomTwitter.json) | [README](../../samples/http/twitter/README.md) | +| http | Basic | CheckSystem, CheckPassword, ChangePassword | — | [`samples/http/wordpress/WordPressHttp.json`](../../samples/http/wordpress/WordPressHttp.json) | [README](../../samples/http/wordpress/README.md) | +| ssh | Interactive | CheckSystem, CheckPassword, ChangePassword, DiscoverSshHostKey, ChangeSshKey, CheckSshKey, DiscoverAuthorizedKeys | — | [`samples/ssh/generic-linux-ssh-keys/GenericLinuxWithSSHKeySupport.json`](../../samples/ssh/generic-linux-ssh-keys/GenericLinuxWithSSHKeySupport.json) | [README](../../samples/ssh/generic-linux-ssh-keys/README.md) | +| ssh | Interactive | CheckSystem, CheckPassword, ChangePassword, DiscoverSshHostKey | — | [`samples/ssh/generic-linux-with-ad/GenericLinuxWithAD.json`](../../samples/ssh/generic-linux-with-ad/GenericLinuxWithAD.json) | [README](../../samples/ssh/generic-linux-with-ad/README.md) | +| ssh | Interactive | CheckSystem, CheckPassword, ChangePassword, DiscoverSshHostKey, DiscoverAccounts | — | [`samples/ssh/generic-linux-with-discovery/GenericLinuxWithDiscovery.json`](../../samples/ssh/generic-linux-with-discovery/GenericLinuxWithDiscovery.json) | [README](../../samples/ssh/generic-linux-with-discovery/README.md) | +| ssh | Interactive | CheckSystem, CheckPassword, ChangePassword, DiscoverSshHostKey | — | [`samples/ssh/generic-linux/GenericLinux.json`](../../samples/ssh/generic-linux/GenericLinux.json) | [README](../../samples/ssh/generic-linux/README.md) | +| ssh | Interactive | CheckSystem, CheckPassword, ChangePassword, DiscoverSshHostKey | — | [`samples/ssh/linux-app-text-config/LinuxApplicationTextConfig.json`](../../samples/ssh/linux-app-text-config/LinuxApplicationTextConfig.json) | [README](../../samples/ssh/linux-app-text-config/README.md) | +| ssh | Batch | DiscoverSshHostKey, CheckSystem, CheckPassword, ChangePassword | — | [`samples/ssh/linux-ssh-batch-mode/LinuxSshBatchModeExample.json`](../../samples/ssh/linux-ssh-batch-mode/LinuxSshBatchModeExample.json) | [README](../../samples/ssh/linux-ssh-batch-mode/README.md) | +| ssh | Batch | DiscoverSshHostKey, CheckSystem, CheckPassword, ChangePassword | — | [`samples/ssh/restricted-authorized-key/RestrictedAuthorizedKeyExample.json`](../../samples/ssh/restricted-authorized-key/RestrictedAuthorizedKeyExample.json) | [README](../../samples/ssh/restricted-authorized-key/README.md) | +| ssh | Interactive | CheckSystem, CheckPassword, ChangePassword, DiscoverSshHostKey, DiscoverAccounts | — | [`samples/ssh/vcenter-appliance/vCenterServerAppliance.json`](../../samples/ssh/vcenter-appliance/vCenterServerAppliance.json) | [README](../../samples/ssh/vcenter-appliance/README.md) | + +## Templates + +| protocol | auth-scheme | operations | OS-family | file-path | README | +| --- | --- | --- | --- | --- | --- | +| — | — | CheckSystem, DiscoverSshHostKey, CheckPassword, ChangePassword | — | [`templates/Pattern-SshBatchShadowCompare.json`](../../templates/Pattern-SshBatchShadowCompare.json) | [README](../../templates/README.md) | +| http | — | CheckSystem, DiscoverAccounts | — | [`templates/Pattern-GenericHttpAccountDiscovery.json`](../../templates/Pattern-GenericHttpAccountDiscovery.json) | [README](../../templates/README.md) | +| http | Basic | CheckSystem, ElevateAccount, DemoteAccount | — | [`templates/Pattern-GenericHttpJitElevation.json`](../../templates/Pattern-GenericHttpJitElevation.json) | [README](../../templates/README.md) | +| http | Basic | CheckSystem, CheckPassword, ChangePassword, DiscoverAccounts | — | [`templates/Pattern-GenericRestApiBasicAuth.json`](../../templates/Pattern-GenericRestApiBasicAuth.json) | [README](../../templates/README.md) | +| http | Bearer | CheckSystem, CheckPassword, ChangePassword | — | [`templates/Pattern-GenericRestApiBearerToken.json`](../../templates/Pattern-GenericRestApiBearerToken.json) | [README](../../templates/README.md) | +| http | Bearer | CheckSystem, CheckApiKey, ChangeApiKey | — | [`templates/Pattern-GenericRestApiKeyRotation.json`](../../templates/Pattern-GenericRestApiKeyRotation.json) | [README](../../templates/README.md) | +| http | Bearer | CheckSystem | — | [`templates/TemplateHttpMinimal.json`](../../templates/TemplateHttpMinimal.json) | [README](../../templates/README.md) | +| ssh | Batch | CheckSystem, UpdateDependentSystem | — | [`templates/Pattern-GenericLinuxDependentSystem.json`](../../templates/Pattern-GenericLinuxDependentSystem.json) | [README](../../templates/README.md) | +| ssh | Batch | CheckSystem, CheckFile, ChangeFile | — | [`templates/Pattern-GenericLinuxFileManagement.json`](../../templates/Pattern-GenericLinuxFileManagement.json) | [README](../../templates/README.md) | +| ssh | Interactive | CheckSystem, CheckPassword, ChangePassword, DiscoverAccounts, DiscoverSshHostKey, CheckSshKey, ChangeSshKey, EnableAccount, DisableAccount | — | [`templates/Pattern-GenericLinuxFull.json`](../../templates/Pattern-GenericLinuxFull.json) | [README](../../templates/README.md) | +| ssh | Batch | CheckSystem, DiscoverServices | — | [`templates/Pattern-GenericLinuxServiceDiscovery.json`](../../templates/Pattern-GenericLinuxServiceDiscovery.json) | [README](../../templates/README.md) | +| ssh | — | CheckSystem, ChangePassword, CheckPassword | — | [`templates/Pattern-WindowsSshBasic.json`](../../templates/Pattern-WindowsSshBasic.json) | [README](../../templates/README.md) | +| ssh | Interactive | CheckSystem | — | [`templates/TemplateSshMinimal.json`](../../templates/TemplateSshMinimal.json) | [README](../../templates/README.md) | diff --git a/docs/agent-reference/strategy-decision-tree.md b/docs/agent-reference/strategy-decision-tree.md new file mode 100644 index 0000000..6730a24 --- /dev/null +++ b/docs/agent-reference/strategy-decision-tree.md @@ -0,0 +1,117 @@ +[← Agent reference](README.md) + +# Strategy decision tree (SSH and HTTP) + +Backs the `strategy-selection` skill. Maps `(protocol, vendor docs, probe evidence)` to a recommended authoring pattern from the four covered by the `script-authoring` skill: + +- `ssh-interactive` +- `ssh-batch` +- `http-api` +- `http-form-fill` + +Telnet/TN3270 are out of scope for the agent skill system. The repository's human-facing telnet material remains under `samples/telnet/` and `docs/`. + +## Source documents + +This table is built from the following repo files. When in doubt, read the source rather than relying on this summary. + +| Source | Why it matters | +| --- | --- | +| [`docs/guides/ssh-platforms.md`](../guides/ssh-platforms.md) | Authoritative on the SSH interactive-vs-batch decision and the connection/login patterns. | +| [`docs/guides/http-platforms.md`](../guides/http-platforms.md) | Authoritative on HTTP authentication patterns: Basic, Bearer/OAuth2, API keys in headers, cookie/form-fill. | +| [`docs/guides/ssh-key-management.md`](../guides/ssh-key-management.md) | Disambiguates password vs SSH-key flows when the service-account credential is a key. | +| [`docs/guides/api-key-management.md`](../guides/api-key-management.md) | Disambiguates password vs API-key flows on HTTP targets. | +| [`docs/guides/jit-elevation.md`](../guides/jit-elevation.md) | Operation-shape guidance when the target supports Elevate/Demote rather than (or in addition to) password rotation. | +| [`docs/guides/account-discovery.md`](../guides/account-discovery.md) | Operation-shape guidance when discovery is required. | +| [`docs/reference/imports.md`](../reference/imports.md) | Lists the system import libraries Safeguard provides. The decision below influences which imports the script will pull in. | +| [`samples-index.md`](samples-index.md) | Concrete starting points keyed by protocol and auth-scheme. | + +## Top-level decision + +| Question | If yes | If no | +| --- | --- | --- | +| Does the target system expose a documented HTTP/REST API for the operations the agent must implement? | Use an HTTP pattern. Continue at [HTTP branch](#http-branch). | Use an SSH pattern. Continue at [SSH branch](#ssh-branch). | + +If the target exposes both a CLI/SSH path and an HTTP API, **prefer HTTP** when the API covers the required operations end-to-end without shell access. APIs tend to produce stabler scripts than shell-prompt scraping. Surface this trade-off to the user before committing if both paths look viable. + +## SSH branch + +Reference: [`docs/guides/ssh-platforms.md` § Choosing an SSH pattern](../guides/ssh-platforms.md#choosing-an-ssh-pattern). + +| Probe / vendor evidence | Recommended pattern | Notes | +| --- | --- | --- | +| The target accepts `ssh user@host ''`-style remote command execution and returns stdout/stderr/exit-code cleanly; no PTY needed. | `ssh-batch` | Set `RequestTerminal: false`. Capture `BufferName`, `StderrBufferName`, `ExitStatusBufferName`. See `samples/ssh/linux-ssh-batch-mode/` and `samples/ssh/restricted-authorized-key/`. | +| The target presents a shell prompt, banner, or an appliance CLI menu; password change goes through `passwd` with interactive prompts; sudo prompts may appear. | `ssh-interactive` | Use `Connect` + `Send` + `Receive` + `Disconnect`, `RequestTerminal: true`. Flush banners; use unique markers (e.g., `INIT_CHECK=$?`). See `samples/ssh/generic-linux/`. | +| Mixed: the agent needs interactive flow for password change but plain `ExecuteCommand` for discovery. | `ssh-interactive` for the interactive operations, `ssh-batch` shape inside discovery operations. | Mixed scripts exist (see operations column in `samples-index.md`). Keep `Connect`/`Disconnect` consistent per operation. | + +### SSH credential intent + +| Service-account credential kind (declared by operator) | Influence on pattern | +| --- | --- | +| Password | Either `ssh-interactive` or `ssh-batch` is fine; choose by prompt behavior above. | +| SSH key | Pass `UserKey` on `Connect`. Often pairs with `ssh-batch`. See `samples/ssh/restricted-authorized-key/` and [`docs/guides/ssh-key-management.md`](../guides/ssh-key-management.md). | + +### When to ask vs decide (SSH) + +- **Ask** if the target's prompt style cannot be observed (probe-only mode without console output, or operator hasn't run a probe yet). +- **Ask** if the operator has not stated whether the seed is password or SSH key. +- **Decide** if probe evidence directly shows interactive prompts (→ `ssh-interactive`) or clean `ExecuteCommand` behavior (→ `ssh-batch`). + +## HTTP branch + +Reference: [`docs/guides/http-platforms.md` § Authentication patterns](../guides/http-platforms.md#authentication-patterns). + +| Probe / vendor evidence | Recommended pattern | Notes | +| --- | --- | --- | +| Vendor docs describe a documented HTTP/REST API for the operations the script must implement, regardless of auth scheme. | `http-api` | Pick auth shape from the sub-table below. See `samples/http/` and templates `Pattern-GenericRestApiBasicAuth.json`, `Pattern-GenericRestApiBearerToken.json`, `Pattern-GenericRestApiKeyRotation.json`. | +| The target only has an HTML login form (no API), the operator can provide credentials, and operations work by submitting forms. | `http-form-fill` | Use `ExtractFormData` to walk the form, `Request` with `application/x-www-form-urlencoded`, and rely on default cookie persistence. See `samples/http/facebook/` and `samples/http/twitter/`. | + +### `http-api` auth shape + +Pick a bucket, then a specific scheme. The bucket determines whether the script uses `HttpAuth` or builds the header itself via `Headers`/`AddHeaders`. + +| Probe / vendor evidence | Bucket | Scheme | Notes | +| --- | --- | --- | --- | +| `WWW-Authenticate: Basic …` response, or vendor docs describe HTTP Basic on every call. | HttpAuth-managed | `Basic` | `HttpAuth` `Type: "Basic"` per request. See `samples/http/wordpress/`. | +| Vendor docs describe HTTP Digest. | HttpAuth-managed | `Digest` | Same shape as Basic with `Type: "Digest"`. Rare in modern self-hosted products; verify the runtime supports the scheme. | +| Vendor docs or probe evidence show `Authorization: Bearer …` on operation calls. | Script-managed header | `Bearer` | `Headers.AddHeaders` with `Authorization: Bearer %Token%`. See `samples/http/onelogin-jit/`. | +| Vendor docs show a custom `Authorization` scheme such as `PVEAPIToken=…` (Proxmox), `Token …`, or other vendor-specific prefix. | Script-managed header | Custom `Authorization` scheme | Same as Bearer but with the vendor's scheme name. Treat as Bearer-shaped for authoring purposes. | +| Vendor docs describe a static API key passed in a custom header (e.g., `X-API-Key`, `X-Vault-Token`, `X-Auth-Token`). | Script-managed header | Custom-header API key | `Headers.AddHeaders` with the named header. Pair with [`docs/guides/api-key-management.md`](../guides/api-key-management.md) when the same script must rotate the key. | + +### `http-api` one-step vs two-step + +Orthogonal to the bucket above: + +| Probe / vendor evidence | One-step or two-step | Notes | +| --- | --- | --- | +| The operator already holds the credential the script presents on every call (long-lived API key, static PAT, root API token). | One-step | Apply auth shape directly on the operation request. | +| Vendor docs describe a token endpoint that exchanges credentials for a short-lived access token (OAuth2 client_credentials, login + token endpoints). | Two-step | POST to the token endpoint (often HttpAuth-managed `Basic` with client id/secret, sometimes form-encoded), `ExtractJsonObject` to capture the token, then attach via script-managed `Headers` on operation calls. Do **not** reuse the same `RequestObjectName` for token-fetch and operation calls. | + +### HTTP credential intent + +| Service-account credential kind | Influence on pattern | +| --- | --- | +| Password | Likely `http-api` with HttpAuth-managed `Basic`/`Digest`, or `http-form-fill` if there's no API. If the API documents a token endpoint that exchanges a password for a token, that is `http-api` two-step Bearer. | +| API key | Likely `http-api` with script-managed-header — the specific scheme depends on whether the vendor uses `Authorization` (Bearer-shaped) or a custom header. Unless the API explicitly accepts the key as a Bearer token, pick by what vendor docs say verbatim. | +| Bearer token (operator already holds) | `http-api` one-step Bearer. Consider whether the script should refresh; if not, document the assumption. | + +### When to ask vs decide (HTTP) + +- **Ask** if no vendor documentation has been supplied (URL or pasted excerpt) and probe evidence is ambiguous. +- **Ask** if the API supports multiple schemes and the choice has security implications (e.g., Basic vs Bearer when both are documented). +- **Decide** when vendor docs explicitly call out one scheme and probe evidence (auth-scheme detection, redirect chain) corroborates it. + +## Self-managed vs service-account + +This dimension is orthogonal to the patterns above and influences which operations the script must implement, not which transport pattern to use. + +| Mode | Symptom | Implication | +| --- | --- | --- | +| Self-managed | The managed account can change its own credential (e.g., `passwd` for the same user, `PATCH /me`). | `ChangePassword` uses `%FuncUserName%`/`%FuncPassword%` directly; no separate service account. | +| Service-account | A privileged service account changes the managed account's credential (e.g., `chpasswd` as root, `PATCH /users/{id}` as admin). | The script needs both the managed account context and the service-account context; `LoginSsh`/`LogoutSsh` may run as the service account. | + +Decide based on vendor docs and probe evidence, not assumption. When in doubt, ask the operator which mode the deployment will use. + +## Vendor documentation inputs + +`strategy-selection` accepts both fetched URLs and vendor-doc excerpts pasted into the conversation. See [`vendor-doc-search-recipes.md`](vendor-doc-search-recipes.md) for query templates and the normalization recipe. diff --git a/docs/agent-reference/vendor-doc-search-recipes.md b/docs/agent-reference/vendor-doc-search-recipes.md new file mode 100644 index 0000000..39dcd92 --- /dev/null +++ b/docs/agent-reference/vendor-doc-search-recipes.md @@ -0,0 +1,114 @@ +[← Agent reference](README.md) + +# Vendor doc search recipes + +Used by the `strategy-selection` skill (and indirectly by `script-authoring`) to obtain authoritative vendor documentation about a target system. Two input paths are equally first-class: + +1. **The agent has web search.** Use the query templates below, then normalize the result. +2. **The user pasted vendor-doc content into the conversation.** Use the normalization recipe directly. + +Either way, the goal is a small, structured "vendor evidence" record that strategy-selection can reason over without re-reading raw vendor pages. + +## Query templates + +Replace `` with the target product name (e.g., "Okta", "WordPress", "vCenter"). Replace `` with the actual deployed version when known; omit if not. Prefer official vendor domains in the result list. + +### HTTP / REST API + +- Authentication scheme(s): + - ` REST API authentication` + - ` API basic auth OR bearer token OR api key` + - ` OAuth2 client credentials token endpoint` +- Password change / credential rotation endpoint: + - ` API change password endpoint` + - ` API reset user password REST` + - ` API rotate api key` +- Account discovery / enumeration: + - ` API list users pagination` + - ` API search users filter` +- Pagination shape (when the API documents one): + - ` API pagination cursor OR offset OR link header` +- Form-fill fallback (when no public API exists): + - ` change password form action url` + - ` login form CSRF token field name` + +### SSH + +- Login and shell behavior: + - ` SSH login banner` + - ` default shell ` + - ` CLI prompt format` +- Password change command: + - ` change password CLI command` + - ` service account password rotation` +- Privilege escalation: + - ` sudo NOPASSWD service account` + - ` root password change command` +- Discovery: + - ` list local accounts command` + - ` /etc/passwd format` +- SSH key management: + - ` authorized_keys path location` + - ` sshd_config AuthorizedKeysFile` + +## Choosing among results + +Prefer, in order: + +1. The vendor's own documentation site (e.g., `developer..com`, `docs..com`). +2. The vendor's API reference (Swagger/OpenAPI page if published). +3. The vendor's release notes for the deployed `` — these often describe behavioral changes the agent must account for. +4. Vendor-published code samples in their official SDK repos. + +Do not treat third-party blog posts as authoritative. They are useful for orientation but the script must be grounded in a vendor-controlled source before it is shipped. + +## Normalization recipe + +Whether content arrived from web search or from a paste, distill it into the same shape before handing it to `strategy-selection`. Cite the source URL or paste so the user can audit. + +``` +Vendor: +Version: +Source: +Captured: + +Authentication: + - scheme: + - endpoint: + - notes: + +Operations: + - : + method: + endpoint: + payload: + notes: + +Pagination: + - shape: + - parameters: + +Quirks: + - +``` + +Rules: + +- **No secrets.** Strip any tokens, passwords, or sample API keys from the captured content before saving. +- **One source per record.** If multiple vendor pages were consulted, produce one record per page and let strategy-selection consider them as siblings. +- **Verbatim quotes for surprising claims.** When the vendor doc says something counterintuitive (e.g., "401 has an empty body"), include the exact sentence and the URL. + +## When the user pasted content + +Treat the paste as the source. Do not re-fetch unless the user asks — they may be working in an environment where the agent has no web access, and silent re-fetching changes the grounding. + +If the paste appears truncated (unbalanced JSON, cut-off URLs, mid-sentence), surface this to the user and ask whether to proceed with what's available or wait for a fuller paste. + +## When the agent has no web search + +Web search is not available in every agent runtime. If the agent cannot fetch and the user has not pasted vendor docs, `strategy-selection` falls back to: + +1. Probe evidence from `target-probing` (auth-scheme detection, login-form inspection, etc.). +2. Asking the operator to paste the relevant vendor pages. + +Do not invent vendor-doc content. If neither probes nor a paste are available, stop and ask. diff --git a/docs/reference/imports.md b/docs/reference/imports.md index 9f2aa3d..1207e38 100644 --- a/docs/reference/imports.md +++ b/docs/reference/imports.md @@ -46,6 +46,8 @@ At runtime, the scripting engine expands the `Imports` list and merges the impor These are the 17 generic SSH-oriented system libraries currently suitable for custom platform scripts. Function names below are the most useful entry points visible in the built-in libraries; many libraries also include lower-level helper functions. +> **For real call shapes, look at `samples/`.** This page lists library names and key entry points only — it intentionally does not document parameter lists. The deployed appliance's view of an imported function's arity and parameter order can drift from any external reference, so the trustworthy source is a working call site in a sample. Search the repo for `"Name": ""` and copy the `Parameters` array from a sample that imports the same library. + | Library | Purpose | Key functions provided | Use when | | --- | --- | --- | --- | | `LinuxSshLogin` | Standard Linux SSH login/logout handling, shell setup, and shared validation. | `LoginSsh`, `LogoutSsh`, `SetUpEnvironment`, `VerifyDelegationPrefix` | Most Linux SSH platforms that need the built-in prompt handling, shell detection, and logout flow. | @@ -139,6 +141,7 @@ If you need more than login/logout, you can import multiple libraries in the sam ## See also +- The `samples/` directory — every imported-function call site in the repo is a working call shape against a shipped appliance. Grep for `"Name": ""` to find example calls. - [SSH Platforms Guide](../guides/ssh-platforms.md) - [Script Structure Reference](script-structure.md) - [Your First SSH Script](../tutorials/your-first-ssh-script.md) diff --git a/templates/Pattern-SshBatchShadowCompare.json b/templates/Pattern-SshBatchShadowCompare.json new file mode 100644 index 0000000..b45dc6e --- /dev/null +++ b/templates/Pattern-SshBatchShadowCompare.json @@ -0,0 +1,987 @@ +{ + "Id": "SshBatchShadowCompare", + "BackEnd": "Scriptable", + "Meta": { + "Filename": "Pattern-SshBatchShadowCompare.json", + "Description": "PATTERN TEMPLATE - SSH batch-mode (no PTY) password management for Linux. CheckPassword via 'sudo -S getent shadow' + CompareShadowHash on the whole shadow line (handles yescrypt/sha512crypt natively). ChangePassword via 'sudo -S chpasswd'. Service-account intent; sudo-with-password (no NOPASSWD required). Adapt FuncUserName/AccountUserName parameters and DelegationPrefix to your environment." + }, + "CheckSystem": { + "Parameters": [ + { + "AssetName": { + "Type": "String", + "Required": false, + "DefaultValue": "" + } + }, + { + "Address": { + "Type": "String", + "Required": true + } + }, + { + "Port": { + "Type": "Integer", + "Required": false, + "DefaultValue": 22 + } + }, + { + "Timeout": { + "Type": "Integer", + "Required": false, + "DefaultValue": 20 + } + }, + { + "FuncUserName": { + "Type": "String", + "Required": true + } + }, + { + "FuncPassword": { + "Type": "Secret", + "Required": false + } + }, + { + "UserKey": { + "Type": "Secret", + "Required": false, + "DefaultValue": "" + } + }, + { + "DelegationPrefix": { + "Type": "String", + "Required": false, + "DefaultValue": "sudo" + } + }, + { + "CheckHostKey": { + "Type": "Boolean", + "Required": false, + "DefaultValue": true + } + }, + { + "HostKey": { + "Type": "String", + "Required": false + } + } + ], + "Do": [ + { + "Try": { + "Do": [ + { + "Function": { + "Name": "ConnectToAsset" + } + }, + { + "Function": { + "Name": "RunCommand", + "Parameters": [ + "/usr/bin/id %FuncUserName%", + [], + false, + false, + false + ], + "ResultVariable": "Result" + } + }, + { + "Function": { + "Name": "DisconnectFromAsset" + } + }, + { + "Condition": { + "If": "Result.rc == 0", + "Then": { + "Do": [ + { + "Return": { + "Value": true + } + } + ] + } + } + }, + { + "Status": { + "Type": "Checking", + "Percent": 90, + "Message": { + "Name": "UnexpectedDataReceived", + "Parameters": [ + "Output: %{ Result.Stdout }% Error: %{ Result.Stderr }%" + ] + } + } + }, + { + "Return": { + "Value": false + } + } + ], + "Catch": [ + { + "Function": { + "Name": "DisconnectFromAsset" + } + }, + { + "Return": { + "Value": "Error" + } + } + ] + } + } + ] + }, + "DiscoverSshHostKey": { + "Parameters": [ + { + "Address": { + "Type": "String", + "Required": true + } + }, + { + "Port": { + "Type": "Integer", + "Required": false, + "DefaultValue": 22 + } + }, + { + "Timeout": { + "Type": "Integer", + "Required": false, + "DefaultValue": 20 + } + } + ], + "Do": [ + { + "Try": { + "Do": [ + { + "DiscoverSshHostKey": { + "HostKeyVariableName": "HostKey", + "Port": "%Port%", + "NetworkAddress": "%Address%", + "Timeout": "%Timeout%" + } + }, + { + "WriteResponseObject": { + "Value": "%HostKey::$%" + } + } + ], + "Catch": [ + { + "Throw": { + "Value": "%Exception%" + } + } + ] + } + }, + { + "Return": { + "Value": true + } + } + ] + }, + "CheckPassword": { + "Parameters": [ + { + "AssetName": { + "Type": "String", + "Required": false, + "DefaultValue": "" + } + }, + { + "Address": { + "Type": "String", + "Required": true + } + }, + { + "Port": { + "Type": "Integer", + "Required": false, + "DefaultValue": 22 + } + }, + { + "Timeout": { + "Type": "Integer", + "Required": false, + "DefaultValue": 20 + } + }, + { + "FuncUserName": { + "Type": "String", + "Required": true + } + }, + { + "FuncPassword": { + "Type": "Secret", + "Required": false + } + }, + { + "UserKey": { + "Type": "Secret", + "Required": false, + "DefaultValue": "" + } + }, + { + "DelegationPrefix": { + "Type": "String", + "Required": false, + "DefaultValue": "sudo" + } + }, + { + "CheckHostKey": { + "Type": "Boolean", + "Required": false, + "DefaultValue": true + } + }, + { + "HostKey": { + "Type": "String", + "Required": false + } + }, + { + "AccountUserName": { + "Type": "String", + "Required": true + } + }, + { + "AccountPassword": { + "Type": "Secret", + "Required": true + } + } + ], + "Do": [ + { + "SetItem": { + "Name": "CheckResult", + "Value": false + } + }, + { + "Try": { + "Do": [ + { + "Function": { + "Name": "ConnectToAsset" + } + }, + { + "Function": { + "Name": "RunCommand", + "Parameters": [ + "/usr/bin/getent shadow %AccountUserName%", + [], + false, + false, + true + ], + "ResultVariable": "Result" + } + }, + { + "Function": { + "Name": "DisconnectFromAsset" + } + }, + { + "Condition": { + "If": "Result.rc != 0", + "Then": { + "Do": [ + { + "Status": { + "Type": "Checking", + "Percent": 80, + "Message": { + "Name": "AccountNotFound", + "Parameters": [ + "%AccountUserName%" + ] + } + } + }, + { + "Return": { + "Value": false + } + } + ] + } + } + }, + { + "SetItem": { + "Name": "AccountEntry", + "Value": "%{ Result.Stdout.TrimEnd() }%" + } + }, + { + "Condition": { + "If": "!Regex.IsMatch(AccountEntry, \"^\" + AccountUserName + \":[^:]+:\")", + "Then": { + "Do": [ + { + "Status": { + "Type": "Checking", + "Percent": 85, + "Message": { + "Name": "UnexpectedDataReceived", + "Parameters": [ + "getent shadow returned an unexpected shape for %AccountUserName%" + ] + } + } + }, + { + "Return": { + "Value": false + } + } + ] + } + } + }, + { + "Status": { + "Type": "Checking", + "Percent": 75, + "Message": { + "Name": "VerifyingPassword" + } + } + }, + { + "CompareShadowHash": { + "Password": "%AccountPassword%", + "SaltedHash": "%AccountEntry%", + "ResultVariable": "PasswordHashMatched" + } + }, + { + "Return": { + "Value": "%{ PasswordHashMatched }%" + } + } + ], + "Catch": [ + { + "Function": { + "Name": "DisconnectFromAsset" + } + }, + { + "Status": { + "Type": "Checking", + "Percent": 90, + "Message": { + "Name": "UnexpectedDataReceived", + "Parameters": [ + "getent+CompareShadowHash path failed: %Exception::$%. Falling back to auth-by-login." + ] + } + } + }, + { + "SetItem": { + "Name": "GLOBAL:CheckSsh", + "Value": null + } + }, + { + "Try": { + "Do": [ + { + "Connect": { + "ConnectionObjectName": "Global:CheckSsh", + "Type": "Ssh", + "Port": "%Port%", + "NetworkAddress": "%Address%", + "Login": "%AccountUserName%", + "RequestTerminal": false, + "Password": "%AccountPassword::$%", + "CheckHostKey": "%CheckHostKey%", + "HostKey": "%HostKey::$%", + "Timeout": "%Timeout%" + } + }, + { + "SetItem": { + "Name": "CheckResult", + "Value": true + } + }, + { + "Condition": { + "If": "CheckSsh != null", + "Then": { + "Do": [ + { + "Disconnect": { + "ConnectionObjectName": "CheckSsh" + } + } + ] + } + } + } + ], + "Catch": [ + { + "Condition": { + "If": "CheckSsh != null", + "Then": { + "Do": [ + { + "Disconnect": { + "ConnectionObjectName": "CheckSsh" + } + } + ] + } + } + }, + { + "SetItem": { + "Name": "CheckResult", + "Value": false + } + } + ] + } + } + ] + } + }, + { + "Return": { + "Value": "%{ CheckResult }%" + } + } + ] + }, + "ChangePassword": { + "Parameters": [ + { + "AssetName": { + "Type": "String", + "Required": false, + "DefaultValue": "" + } + }, + { + "Address": { + "Type": "String", + "Required": true + } + }, + { + "Port": { + "Type": "Integer", + "Required": false, + "DefaultValue": 22 + } + }, + { + "Timeout": { + "Type": "Integer", + "Required": false, + "DefaultValue": 20 + } + }, + { + "FuncUserName": { + "Type": "String", + "Required": true + } + }, + { + "FuncPassword": { + "Type": "Secret", + "Required": false + } + }, + { + "UserKey": { + "Type": "Secret", + "Required": false, + "DefaultValue": "" + } + }, + { + "DelegationPrefix": { + "Type": "String", + "Required": false, + "DefaultValue": "sudo" + } + }, + { + "CheckHostKey": { + "Type": "Boolean", + "Required": false, + "DefaultValue": true + } + }, + { + "HostKey": { + "Type": "String", + "Required": false + } + }, + { + "AccountUserName": { + "Type": "String", + "Required": true + } + }, + { + "AccountPassword": { + "Type": "Secret", + "Required": false + } + }, + { + "NewPassword": { + "Type": "Secret", + "Required": true + } + } + ], + "Do": [ + { + "Try": { + "Do": [ + { + "Function": { + "Name": "ConnectToAsset" + } + }, + { + "Status": { + "Type": "Changing", + "Percent": 50, + "Message": { + "Name": "ChangingPassword", + "Parameters": [ + "%AccountUserName%" + ] + } + } + }, + { + "Function": { + "Name": "RunCommand", + "Parameters": [ + "/usr/sbin/chpasswd", + [ + "%AccountUserName%:%NewPassword%" + ], + false, + true, + false + ], + "ResultVariable": "Result" + } + }, + { + "Function": { + "Name": "DisconnectFromAsset" + } + }, + { + "Condition": { + "If": "Result.rc != 0", + "Then": { + "Do": [ + { + "Status": { + "Type": "Changing", + "Percent": 90, + "Message": { + "Name": "UnexpectedDataReceived", + "Parameters": [ + "chpasswd failed rc=%{ Result.rc }% stderr=%{ Result.Stderr }%" + ] + } + } + }, + { + "Throw": { + "Value": "chpasswd failed" + } + } + ] + } + } + }, + { + "Return": { + "Value": true + } + } + ], + "Catch": [ + { + "Function": { + "Name": "DisconnectFromAsset" + } + }, + { + "Status": { + "Type": "Changing", + "Percent": 95, + "Message": { + "Name": "UnexpectedDataReceived", + "Parameters": [ + "%Exception::$%" + ] + } + } + }, + { + "Throw": { + "Value": "%Exception%" + } + } + ] + } + } + ] + }, + "Functions": [ + { + "Name": "ConnectToAsset", + "Do": [ + { + "SetItem": { + "Name": "GLOBAL:ConnectSsh", + "Value": null + } + }, + { + "SetItem": { + "Name": "GLOBAL:ServerSoftwareName", + "Value": "" + } + }, + { + "Status": { + "Type": "Connecting", + "Percent": 30, + "Message": { + "Name": "AssetConnectingWithAddress", + "Parameters": [ + "%AssetName%", + "%Address%" + ] + } + } + }, + { + "Try": { + "Do": [ + { + "Connect": { + "ConnectionObjectName": "Global:ConnectSsh", + "Type": "Ssh", + "Port": "%Port%", + "NetworkAddress": "%Address%", + "Login": "%FuncUserName%", + "RequestTerminal": false, + "Password": "%FuncPassword::$%", + "UserKey": "%UserKey::$%", + "CheckHostKey": "%CheckHostKey%", + "HostKey": "%HostKey::$%", + "Timeout": "%Timeout%", + "SoftwareVersionVariableName": "GLOBAL:ServerSoftwareName" + } + }, + { + "Status": { + "Type": "Checking", + "Percent": 40, + "Message": { + "Name": "SystemLoginCheck", + "Parameters": [ + "%Address%" + ] + } + } + } + ], + "Catch": [ + { + "Status": { + "Type": "Connecting", + "Percent": 95, + "Message": { + "Name": "AssetConnectFailedWithReasonAndAddress", + "Parameters": [ + "%AssetName%", + "%Address%", + "%Exception%" + ] + } + } + }, + { + "Throw": { + "Value": "Failed to connect" + } + } + ] + } + } + ] + }, + { + "Name": "DisconnectFromAsset", + "Do": [ + { + "Condition": { + "If": "ConnectSsh != null", + "Then": { + "Do": [ + { + "Disconnect": { + "ConnectionObjectName": "ConnectSsh" + } + } + ] + } + } + }, + { + "Return": { + "Value": true + } + } + ] + }, + { + "Name": "RunCommand", + "Parameters": [ + { + "Cmd": { + "Type": "String" + } + }, + { + "StdinArgs": { + "Type": "Array" + } + }, + { + "CommandContainsSecret": { + "Type": "Boolean" + } + }, + { + "InputContainsSecret": { + "Type": "Boolean" + } + }, + { + "OutputContainsSecret": { + "Type": "Boolean" + } + } + ], + "Do": [ + { + "SetItem": { + "Name": "rc", + "Value": 1 + } + }, + { + "SetItem": { + "Name": "Stdout", + "Value": "" + } + }, + { + "SetItem": { + "Name": "Stderr", + "Value": "" + } + }, + { + "SetItem": { + "Name": "runcmd", + "Value": "%{ Cmd }%" + } + }, + { + "Condition": { + "If": "!string.IsNullOrEmpty(DelegationPrefix)", + "Then": { + "Do": [ + { + "SetItem": { + "Name": "runcmd", + "Value": "%DelegationPrefix% %{ Cmd }%" + } + } + ] + } + } + }, + { + "Try": { + "Do": [ + { + "ExecuteCommand": { + "ConnectionObjectName": "ConnectSsh", + "Command": "%runcmd%", + "Stdin": "%{ StdinArgs }%", + "BufferName": "Stdout", + "StderrBufferName": "Stderr", + "ExitStatusBufferName": "rc", + "CommandContainsSecret": "%{ CommandContainsSecret }%", + "InputContainsSecret": "%{ InputContainsSecret }%", + "OutputContainsSecret": "%{ OutputContainsSecret }%" + } + }, + { + "Condition": { + "If": "(rc != 0) && Regex.IsMatch(Stderr, \"(a terminal is required to read the password)\")", + "Then": { + "Do": [ + { + "Condition": { + "If": "string.IsNullOrEmpty(FuncPassword)", + "Then": { + "Do": [ + { + "Status": { + "Type": "Checking", + "Message": { + "Name": "UnexpectedDataReceived", + "Parameters": [ + "Account %FuncUserName% requires a password to run sudo; provide FuncPassword or configure NOPASSWD." + ] + }, + "Percent": 90 + } + }, + { + "Throw": { + "Value": "Sudo credentials are required" + } + } + ] + } + } + }, + { + "SetItem": { + "Name": "runcmd", + "Value": "%DelegationPrefix% -S %{ Cmd }%" + } + }, + { + "SetItem": { + "Name": "Stdin", + "Value": "%{ StdinArgs.ToList() }%" + } + }, + { + "Eval": { + "Expression": "Stdin.Insert(0, FuncPassword)" + } + }, + { + "SetItem": { + "Name": "StdinArray", + "Value": "%{ Stdin.ToArray() }%" + } + }, + { + "ExecuteCommand": { + "ConnectionObjectName": "ConnectSsh", + "Command": "%runcmd%", + "Stdin": "%{ StdinArray }%", + "BufferName": "Stdout", + "StderrBufferName": "Stderr", + "ExitStatusBufferName": "rc", + "CommandContainsSecret": "%{ CommandContainsSecret }%", + "InputContainsSecret": true, + "OutputContainsSecret": "%{ OutputContainsSecret }%" + } + } + ] + } + } + } + ], + "Catch": [ + { + "Status": { + "Type": "Checking", + "Message": { + "Name": "UnexpectedDataReceived", + "Parameters": [ + "Command failed with error %Exception::$%" + ] + }, + "Percent": 90 + } + }, + { + "Throw": { + "Value": "Command failed" + } + } + ] + } + }, + { + "Return": { + "Value": { + "rc": "%{ rc }%", + "Stdout": "%{ Stdout }%", + "Stderr": "%{ Stderr }%" + } + } + } + ] + } + ] +} diff --git a/tools/Build-SamplesIndex.ps1 b/tools/Build-SamplesIndex.ps1 new file mode 100644 index 0000000..ce56067 --- /dev/null +++ b/tools/Build-SamplesIndex.ps1 @@ -0,0 +1,344 @@ +#requires -Version 5.1 +<# +.SYNOPSIS + Generates docs/agent-reference/samples-index.md from the contents of + samples/ and templates/. + +.DESCRIPTION + Walks samples/ssh, samples/http, and templates/ (samples/telnet is + excluded from the agent-facing index — telnet is out of scope for + the agent skill system). For every JSON + file found, emits one row in the index with the columns: + + protocol | auth-scheme | operations | OS-family | file-path | README + + Grounding rules: + - protocol is derived from the directory path (ssh|http) for samples, + and from filename prefix (Pattern-GenericLinux* / TemplateSsh* are + ssh; Pattern-GenericHttp* / Pattern-GenericRestApi* / TemplateHttp* + are http) for templates. Files that don't classify cleanly are + emitted with protocol = "" rather than guessed. + - operations is the intersection of top-level keys with the canonical + operation list from schema/custom-platform-script.schema.json. + Imports and user-defined functions never appear here. + - auth-scheme is best-effort from the JSON content: + HTTP: HttpAuth.Type (Basic/Digest), or "Bearer" if an + "Authorization": "Bearer ..." header is set, or "ApiKey" + if a non-Authorization header carries the secret, or + "Form" if ExtractFormData appears, else blank. + SSH: "Interactive" if Send/Receive appear, "Batch" if + ExecuteCommand appears without Send/Receive, "Mixed" if + both, else blank. + - OS-family is left blank by the build script. Phase 1 prefers + "blank" over "guess"; OS family is hard to ground from JSON alone + and the sample README's heading text isn't structured. Future + phases may revisit. + - file-path and README are filesystem facts. + + The script writes the file deterministically (sorted rows, LF line + endings) so the CI freshness check can do a byte-for-byte compare. + +.PARAMETER RepoRoot + Path to the repository root. Defaults to the parent of this script. + +.PARAMETER OutputPath + Path to write the index. Defaults to + docs/agent-reference/samples-index.md under -RepoRoot. + +.PARAMETER CheckOnly + If set, write to a temp file and compare against the committed copy. + Exits 0 if identical, 1 if they differ. Used by CI. + +.EXAMPLE + ./tools/Build-SamplesIndex.ps1 + Regenerates the index in place. + +.EXAMPLE + ./tools/Build-SamplesIndex.ps1 -CheckOnly + Used by CI to fail the build if the committed index is stale. +#> +[CmdletBinding()] +param( + [string] $RepoRoot, + [string] $OutputPath, + [switch] $CheckOnly +) + +$ErrorActionPreference = 'Stop' + +if (-not $RepoRoot) { + $RepoRoot = Split-Path -Parent $PSScriptRoot + if (-not $RepoRoot) { + $RepoRoot = (Resolve-Path (Join-Path $PSScriptRoot '..')).Path + } +} +$RepoRoot = (Resolve-Path $RepoRoot).Path + +if (-not $OutputPath) { + $OutputPath = Join-Path $RepoRoot 'docs/agent-reference/samples-index.md' +} + +$schemaPath = Join-Path $RepoRoot 'schema/custom-platform-script.schema.json' +if (-not (Test-Path $schemaPath)) { + throw "Schema not found at $schemaPath. Cannot ground the operation list." +} + +# Canonical operation list -- intersect-only against this set so user-defined +# functions and imported helpers never leak into the operations column. +$schema = Get-Content $schemaPath -Raw | ConvertFrom-Json +$nonOperationProps = @('$schema', 'Id', 'BackEnd', 'Meta', 'Imports', 'Import', 'Functions') +$canonicalOperations = @($schema.properties.PSObject.Properties.Name | + Where-Object { $_ -notin $nonOperationProps }) + +function Get-Protocol { + param([string] $RelativePath) + + $parts = $RelativePath -split '[\\/]' + + if ($parts[0] -eq 'samples') { + switch ($parts[1]) { + 'ssh' { return 'ssh' } + 'http' { return 'http' } + 'telnet' { return 'telnet' } # filtered out before emission + default { return '' } + } + } + + if ($parts[0] -eq 'templates') { + $name = [System.IO.Path]::GetFileName($RelativePath) + if ($name -match '^(Pattern-GenericLinux|Pattern-WindowsSsh|TemplateSsh)') { + return 'ssh' + } + if ($name -match '^(Pattern-GenericHttp|Pattern-GenericRestApi|TemplateHttp)') { + return 'http' + } + return '' + } + + return '' +} + +function Get-OperationsList { + param($Json) + + if ($null -eq $Json) { return @() } + $names = $Json.PSObject.Properties.Name + $ops = @($names | Where-Object { $canonicalOperations -contains $_ }) + return ,$ops +} + +function Get-AuthScheme { + param( + [string] $Protocol, + [string] $RawJson + ) + + if ($Protocol -eq 'http') { + $schemes = @() + + $basicMatch = [regex]::Match($RawJson, '"HttpAuth"\s*:\s*\{[^}]*"Type"\s*:\s*"([^"]+)"') + if ($basicMatch.Success) { + $schemes += $basicMatch.Groups[1].Value + } + + if ($RawJson -match '"Authorization"\s*:\s*"Bearer\b') { + $schemes += 'Bearer' + } + + # Custom header carrying a secret (e.g., x-api-key, X-Auth-Token). + # We only flag this when no Authorization header was already classified, + # to avoid double-labeling Bearer flows that also set other headers. + if ($schemes.Count -eq 0 -and + $RawJson -match '"AddHeaders"\s*:\s*\{[^}]*(?i)(api[-_ ]?key|x-auth-token|api-token)') { + $schemes += 'ApiKey' + } + + if ($RawJson -match '\bExtractFormData\b') { + $schemes += 'Form' + } + + $schemes = @($schemes | Select-Object -Unique) + return ($schemes -join '+') + } + + if ($Protocol -eq 'ssh') { + $hasInteractive = ($RawJson -match '"Send"\s*:') -and ($RawJson -match '"Receive"\s*:') + $hasBatch = $RawJson -match '"ExecuteCommand"\s*:' + + if ($hasInteractive -and $hasBatch) { return 'Mixed' } + if ($hasInteractive) { return 'Interactive' } + if ($hasBatch) { return 'Batch' } + return '' + } + + return '' +} + +function Find-ReadmePath { + param( + [string] $JsonAbsolutePath, + [string] $RepoRootPath + ) + + $dir = Split-Path -Parent $JsonAbsolutePath + $candidate = Join-Path $dir 'README.md' + if (Test-Path $candidate) { + $rel = (Resolve-Path $candidate).Path.Substring($RepoRootPath.Length).TrimStart('\','/') + return ($rel -replace '\\', '/') + } + return '' +} + +function ConvertTo-RelativePath { + param( + [string] $AbsolutePath, + [string] $RepoRootPath + ) + $rel = $AbsolutePath.Substring($RepoRootPath.Length).TrimStart('\','/') + return ($rel -replace '\\', '/') +} + +# Collect every JSON under samples/ and templates/. +$samplesRoot = Join-Path $RepoRoot 'samples' +$templatesRoot = Join-Path $RepoRoot 'templates' + +$jsonFiles = @() +if (Test-Path $samplesRoot) { $jsonFiles += Get-ChildItem -Path $samplesRoot -Filter '*.json' -Recurse -File } +if (Test-Path $templatesRoot) { $jsonFiles += Get-ChildItem -Path $templatesRoot -Filter '*.json' -Recurse -File } + +$rows = @() +foreach ($file in $jsonFiles) { + $rel = ConvertTo-RelativePath -AbsolutePath $file.FullName -RepoRootPath $RepoRoot + $protocol = Get-Protocol -RelativePath $rel + + # Telnet samples are out of scope for the agent-facing index. + if ($protocol -eq 'telnet') { continue } + + $raw = Get-Content $file.FullName -Raw + try { + $json = $raw | ConvertFrom-Json + } catch { + Write-Warning "Skipping $rel — invalid JSON: $_" + continue + } + + $ops = Get-OperationsList -Json $json + $opsCell = if ($ops.Count -gt 0) { ($ops -join ', ') } else { '' } + + $auth = Get-AuthScheme -Protocol $protocol -RawJson $raw + $readme = Find-ReadmePath -JsonAbsolutePath $file.FullName -RepoRootPath $RepoRoot + + $rows += [pscustomobject]@{ + Kind = if ($rel.StartsWith('samples/')) { 'samples' } else { 'templates' } + Protocol = $protocol + AuthScheme = $auth + Operations = $opsCell + OsFamily = '' # intentionally blank — see header note + FilePath = $rel + Readme = $readme + } +} + +$rows = $rows | Sort-Object Kind, Protocol, FilePath + +function Format-Cell { + param([string] $Value) + if ([string]::IsNullOrEmpty($Value)) { return '—' } + return $Value +} + +function Format-Row { + param([pscustomobject] $Row) + # Use repo-relative links from docs/agent-reference/ so they resolve on + # GitHub and on disk. From docs/agent-reference/, "../../" reaches repo root. + $fileLink = if ($Row.FilePath) { "[``$($Row.FilePath)``](../../$($Row.FilePath))" } else { '—' } + $readmeLink = if ($Row.Readme) { "[README](../../$($Row.Readme))" } else { '—' } + + return "| {0} | {1} | {2} | {3} | {4} | {5} |" -f ` + (Format-Cell $Row.Protocol), + (Format-Cell $Row.AuthScheme), + (Format-Cell $Row.Operations), + (Format-Cell $Row.OsFamily), + $fileLink, + $readmeLink +} + +$sampleRows = $rows | Where-Object { $_.Kind -eq 'samples' } +$templateRows = $rows | Where-Object { $_.Kind -eq 'templates' } + +$header = @' +[← Agent reference](README.md) + +# Samples and templates index + +**Generated file. Do not edit by hand.** Regenerate with: + +```powershell +./tools/Build-SamplesIndex.ps1 +``` + +CI runs the same script with `-CheckOnly` and fails the build if the committed copy differs. + +## Conventions + +- **protocol** — derived from the directory (`samples/ssh/`, `samples/http/`) or the template filename (`Pattern-GenericLinux*` / `Pattern-WindowsSsh*` / `TemplateSsh*` → ssh; `Pattern-GenericHttp*` / `Pattern-GenericRestApi*` / `TemplateHttp*` → http). +- **auth-scheme** — best-effort from JSON content. HTTP: from `HttpAuth.Type`, an `Authorization: Bearer` header, an API-key-shaped custom header, or `ExtractFormData`. SSH: `Interactive` (Send/Receive), `Batch` (ExecuteCommand), or `Mixed`. Blank when undetermined. +- **operations** — intersection of top-level keys with the canonical operation list from `schema/custom-platform-script.schema.json`. Imports and user-defined functions never appear here. +- **OS-family** — intentionally blank. Phase 1 prefers blank over guessed values; revisit in a later phase if needed. +- **file-path** and **README** — filesystem facts. `—` means the field could not be determined. +- `samples/telnet/` is excluded — telnet is out of scope for the agent skill system. The samples remain in the repo for human reference. + +## Samples + +| protocol | auth-scheme | operations | OS-family | file-path | README | +| --- | --- | --- | --- | --- | --- | +'@ + +$middle = @' + +## Templates + +| protocol | auth-scheme | operations | OS-family | file-path | README | +| --- | --- | --- | --- | --- | --- | +'@ + +$sb = [System.Text.StringBuilder]::new() +$null = $sb.AppendLine($header.TrimEnd()) +foreach ($r in $sampleRows) { + $null = $sb.AppendLine((Format-Row -Row $r)) +} +$null = $sb.AppendLine($middle.TrimEnd()) +foreach ($r in $templateRows) { + $null = $sb.AppendLine((Format-Row -Row $r)) +} + +$content = $sb.ToString() + +# Normalize to LF line endings for deterministic CI compare. +$content = $content -replace "`r`n", "`n" + +if ($CheckOnly) { + if (-not (Test-Path $OutputPath)) { + Write-Host "samples-index.md does not exist; regenerate with ./tools/Build-SamplesIndex.ps1" -ForegroundColor Red + exit 1 + } + $existing = (Get-Content $OutputPath -Raw) -replace "`r`n", "`n" + if ($existing -ne $content) { + Write-Host "samples-index.md is stale; regenerate with ./tools/Build-SamplesIndex.ps1" -ForegroundColor Red + # Surface a small diff hint by comparing line counts. + $existingLines = ($existing -split "`n").Count + $newLines = ($content -split "`n").Count + Write-Host " committed: $existingLines lines; regenerated: $newLines lines" + exit 1 + } + Write-Host "samples-index.md is up to date." + exit 0 +} + +# Write atomically with LF endings. +$outDir = Split-Path -Parent $OutputPath +if (-not (Test-Path $outDir)) { + New-Item -ItemType Directory -Path $outDir -Force | Out-Null +} +[System.IO.File]::WriteAllText($OutputPath, $content) +Write-Host "Wrote $OutputPath" diff --git a/tools/Invoke-PlatformDevLoop.ps1 b/tools/Invoke-PlatformDevLoop.ps1 new file mode 100644 index 0000000..fd1e70c --- /dev/null +++ b/tools/Invoke-PlatformDevLoop.ps1 @@ -0,0 +1,450 @@ +<# +.SYNOPSIS +Run the custom-platform dev loop: validate -> import -> trigger -> fetch task log. + +.DESCRIPTION +Wraps the agent-facing iterative loop into a single call that emits a single +structured JSON document on stdout and exits with a phase-indexed exit code. +Designed to be called by the safeguard-ps-operations agent skill (Phase 3) and +by humans during day-to-day authoring. + +Four modes, selected by mutually-exclusive switches: + + -SchemaOnly local Test-Json against schema/. No appliance contact. + -ValidateOnly schema + Test-SafeguardCustomPlatformScript on the appliance. + -NoTrigger validate + Import-SafeguardCustomPlatformScript. + (default) full loop: validate + import + trigger + fetch task log. + +Output contract (always one JSON object on stdout): + + { + "mode": "SchemaOnly|ValidateOnly|NoTrigger|FullLoop", + "scriptFile": "...", + "platform": "..." | null, + "operation": "CheckPassword|ChangePassword" | null, + "account": "..." | null, + "phases": [ + { "name": "validate"|"import"|"trigger"|"log", + "status": "success"|"failed"|"skipped", + "durationMs": , + "error": "..." | null, + "data": | null } + ], + "exitCode": , + "startedAt": "", + "endedAt": "" + } + +Exit codes: + 0 full success (or all-skipped is a programmer error, not 0) + 1 validate phase failed + 2 import phase failed + 3 trigger phase failed + 4 log fetch failed + +The script throws (no JSON written) only for programmer errors: + - required parameter missing for the chosen mode + - input script file not readable + - schema file not found + - no Connect-Safeguard session and no -AccessToken when appliance contact + is required + +Programmer-error throws produce a non-zero PowerShell exit but no JSON +on stdout, so callers can distinguish "tool misuse" from "loop failure". + +.PARAMETER ScriptFile +Path to the custom platform JSON script. Required for every mode. + +.PARAMETER SchemaOnly +Run only local JSON Schema validation against -SchemaFile. No appliance contact. + +.PARAMETER ValidateOnly +Run schema validation and Test-SafeguardCustomPlatformScript only. No import, +no trigger. + +.PARAMETER NoTrigger +Run schema + Test + Import. Skip trigger and log fetch. + +.PARAMETER PlatformToEdit +Custom platform name or numeric ID to import the script into. Required for +-NoTrigger and full-loop modes. + +.PARAMETER Operation +Trigger to run in full-loop mode. CheckPassword maps to +Test-SafeguardAssetAccountPassword; ChangePassword maps to +Invoke-SafeguardAssetAccountPasswordChange. Both pass -ExtendedLogging so a +task log is produced. + +.PARAMETER AccountToUse +Account ID or name to run the trigger against. Required for full-loop. + +.PARAMETER AssetToUse +Optional asset name or ID disambiguator. Passed straight through to the +underlying safeguard-ps cmdlet. + +.PARAMETER AssetPartition +Optional asset-partition name or ID. Passed through. + +.PARAMETER AssetPartitionId +Optional asset-partition numeric ID (overrides -AssetPartition). Passed through. + +.PARAMETER SchemaFile +Override path to the JSON Schema. Defaults to +/schema/custom-platform-script.schema.json relative to this script. + +.PARAMETER Appliance +Pass-through for safeguard-ps cmdlets. Usually unset; the cached +$Global:SafeguardSession from Connect-Safeguard -DeviceCode (or -Browser) is used instead. + +.PARAMETER AccessToken +Pass-through bearer token for safeguard-ps cmdlets. Usually unset. + +.PARAMETER Insecure +Pass-through. Skip SSL verification on the appliance. + +.EXAMPLE +PS> Invoke-PlatformDevLoop.ps1 -ScriptFile .\samples\ssh\generic-linux\GenericLinux.json -SchemaOnly + +.EXAMPLE +PS> Invoke-PlatformDevLoop.ps1 -ScriptFile .\my.json -ValidateOnly -Insecure + +.EXAMPLE +PS> Invoke-PlatformDevLoop.ps1 -ScriptFile .\my.json -PlatformToEdit "My Custom Linux" ` + -Operation CheckPassword -AccountToUse oracle -Insecure +#> +[CmdletBinding()] +param( + [Parameter(Mandatory=$true, Position=0)] + [string]$ScriptFile, + + [switch]$SchemaOnly, + [switch]$ValidateOnly, + [switch]$NoTrigger, + + [object]$PlatformToEdit, + + [ValidateSet('CheckPassword','ChangePassword')] + [string]$Operation, + + [object]$AccountToUse, + [object]$AssetToUse, + [object]$AssetPartition, + [int]$AssetPartitionId, + + [string]$SchemaFile, + + [string]$Appliance, + [object]$AccessToken, + [switch]$Insecure +) + +$ErrorActionPreference = 'Stop' +Set-StrictMode -Version Latest + +# ---- PowerShell version preference ---------------------------------------- +# safeguard-ps targets PS 7. Several cmdlets emit cleaner error records there +# and avoid Windows-PowerShell-only quirks. Warn (don't block) on PS 5.1. +if ($PSVersionTable.PSVersion.Major -lt 7) { + Write-Warning "Running on PowerShell $($PSVersionTable.PSVersion). PowerShell 7+ is recommended for safeguard-ps; continuing anyway." +} + +# ---- minimum module version ------------------------------------------------ +# safeguard-ps 8.4.3 added -ExtendedLogging to Invoke-SafeguardAssetSshHostKeyDiscovery, +# which the new-platform workflow relies on to capture a persistent task log when +# host-key discovery fails (see docs/agent-reference/failure-patterns.md). Earlier +# versions emit only the surface 60307 error with no task log. +$script:MinSafeguardPsVersion = [Version]'8.4.3' +$installedVersions = @(Get-Module -ListAvailable -Name safeguard-ps | + Sort-Object Version -Descending | + Select-Object -ExpandProperty Version) +if (-not $installedVersions -or $installedVersions[0] -lt $script:MinSafeguardPsVersion) { + $have = if ($installedVersions) { $installedVersions[0] } else { '' } + throw "Invoke-PlatformDevLoop.ps1 requires safeguard-ps >= $($script:MinSafeguardPsVersion); found $have. Run: Install-Module safeguard-ps -Scope CurrentUser -Force" +} + +# ---- helpers --------------------------------------------------------------- + +function Write-StatusLine { + param([string]$Message) + # All progress text goes to stderr so stdout stays a single JSON document. + [Console]::Error.WriteLine($Message) +} + +function New-PhaseResult { + param([string]$Name) + [ordered]@{ + name = $Name + status = 'skipped' + durationMs = 0 + error = $null + data = $null + } +} + +function Set-PhaseSuccess { + param($Phase, [int]$DurationMs, $Data) + $Phase.status = 'success' + $Phase.durationMs = $DurationMs + $Phase.data = $Data +} + +function Set-PhaseFailure { + param($Phase, [int]$DurationMs, [string]$ErrorMessage, $Data = $null) + $Phase.status = 'failed' + $Phase.durationMs = $DurationMs + $Phase.error = $ErrorMessage + $Phase.data = $Data +} + +function Get-TaskIdFromInformationMessages { + param([string[]]$Messages) + # safeguard-ps's Wait-LongRunningTask emits this exact line via Write-Host + # (Information stream) when a triggered task with extendedLogging completes + # or fails: + # "See extended logs: Get-SafeguardTaskLog " + # Source: safeguard-ps Wait-LongRunningTask (the two extendedLogging + # branches that emit "See extended logs:"); verified against v8.4.3. + # The GUID is not exposed on the cmdlet's return value (which is a + # human-readable multi-line string), so capturing Information messages and + # regex-matching this line is the grounded extraction path. + if (-not $Messages) { return $null } + foreach ($msg in $Messages) { + if ($msg -match 'Get-SafeguardTaskLog\s+([0-9a-fA-F-]{36})') { + return $matches[1] + } + } + return $null +} + +function Test-ApplianceConnection { + param([string]$Token) + if ($Token) { return $true } + try { + $session = Get-Variable -Name SafeguardSession -Scope Global -ErrorAction Stop -ValueOnly + return [bool]$session + } catch { + return $false + } +} + +# ---- mode resolution and parameter validation ------------------------------ + +# Mutually exclusive switches. +$flagCount = @($SchemaOnly, $ValidateOnly, $NoTrigger | Where-Object { $_ }).Count +if ($flagCount -gt 1) { + throw "Specify at most one of -SchemaOnly, -ValidateOnly, -NoTrigger." +} + +$mode = if ($SchemaOnly) { 'SchemaOnly' } + elseif ($ValidateOnly) { 'ValidateOnly' } + elseif ($NoTrigger) { 'NoTrigger' } + else { 'FullLoop' } + +if (-not (Test-Path -LiteralPath $ScriptFile -PathType Leaf)) { + throw "ScriptFile not found or not readable: $ScriptFile" +} +$ScriptFile = (Resolve-Path -LiteralPath $ScriptFile).ProviderPath + +if (-not $SchemaFile) { + $SchemaFile = Join-Path $PSScriptRoot '..\schema\custom-platform-script.schema.json' +} +if (-not (Test-Path -LiteralPath $SchemaFile -PathType Leaf)) { + throw "SchemaFile not found: $SchemaFile" +} +$SchemaFile = (Resolve-Path -LiteralPath $SchemaFile).ProviderPath + +$needsAppliance = $mode -in @('ValidateOnly','NoTrigger','FullLoop') +$needsPlatform = $mode -in @('NoTrigger','FullLoop') +$needsTrigger = $mode -eq 'FullLoop' + +if ($needsAppliance -and -not (Test-ApplianceConnection -Token $AccessToken)) { + throw "Mode '$mode' requires an active Safeguard session. Run Connect-Safeguard -DeviceCode (or -Browser) first, or pass -AccessToken." +} +if ($needsPlatform -and -not $PlatformToEdit) { + throw "Mode '$mode' requires -PlatformToEdit (custom platform name or ID)." +} +if ($needsTrigger -and -not $Operation) { + throw "Mode 'FullLoop' requires -Operation (CheckPassword or ChangePassword)." +} +if ($needsTrigger -and -not $AccountToUse) { + throw "Mode 'FullLoop' requires -AccountToUse." +} + +# ---- pass-through builder for safeguard-ps cmdlets ------------------------- + +$applianceArgs = @{} +if ($Appliance) { $applianceArgs.Appliance = $Appliance } +if ($AccessToken) { $applianceArgs.AccessToken = $AccessToken } +if ($Insecure) { $applianceArgs.Insecure = $true } + +# ---- phase records --------------------------------------------------------- + +$validatePhase = New-PhaseResult -Name 'validate' +$importPhase = New-PhaseResult -Name 'import' +$triggerPhase = New-PhaseResult -Name 'trigger' +$logPhase = New-PhaseResult -Name 'log' + +$startedAt = Get-Date +$exitCode = 0 + +# ---- phase 1: validate ----------------------------------------------------- + +Write-StatusLine "[validate] schema=$SchemaFile script=$ScriptFile" +$sw = [System.Diagnostics.Stopwatch]::StartNew() +try { + $scriptText = Get-Content -LiteralPath $ScriptFile -Raw + # Test-Json throws with a useful message; capture it. + $null = $scriptText | Test-Json -SchemaFile $SchemaFile -ErrorAction Stop + $localPreview = $null + if ($mode -eq 'SchemaOnly') { + Set-PhaseSuccess -Phase $validatePhase -DurationMs $sw.ElapsedMilliseconds -Data @{ schemaOnly = $true } + } + else { + Write-StatusLine "[validate] calling Test-SafeguardCustomPlatformScript" + $apiPreview = Test-SafeguardCustomPlatformScript @applianceArgs -ScriptFile $ScriptFile + Set-PhaseSuccess -Phase $validatePhase -DurationMs $sw.ElapsedMilliseconds -Data $apiPreview + } +} +catch { + Set-PhaseFailure -Phase $validatePhase -DurationMs $sw.ElapsedMilliseconds -ErrorMessage $_.Exception.Message + $exitCode = 1 +} +finally { + $sw.Stop() +} + +# ---- phase 2: import ------------------------------------------------------- + +if ($exitCode -eq 0 -and $mode -in @('NoTrigger','FullLoop')) { + Write-StatusLine "[import] platform=$PlatformToEdit" + $sw = [System.Diagnostics.Stopwatch]::StartNew() + try { + $imported = Import-SafeguardCustomPlatformScript @applianceArgs -PlatformToEdit $PlatformToEdit -ScriptFile $ScriptFile + Set-PhaseSuccess -Phase $importPhase -DurationMs $sw.ElapsedMilliseconds -Data $imported + } + catch { + Set-PhaseFailure -Phase $importPhase -DurationMs $sw.ElapsedMilliseconds -ErrorMessage $_.Exception.Message + $exitCode = 2 + } + finally { + $sw.Stop() + } +} + +# ---- phase 3: trigger ------------------------------------------------------ + +$triggerInfo = $null # Information-stream messages captured from the trigger cmdlet +$triggerText = $null # Cmdlet return value (multi-line human-readable status string) +$triggerTaskId = $null +$triggerFailureLog = $null # Structured log array attached to SafeguardLongRunningTaskException +if ($exitCode -eq 0 -and $mode -eq 'FullLoop') { + Write-StatusLine "[trigger] op=$Operation account=$AccountToUse" + $sw = [System.Diagnostics.Stopwatch]::StartNew() + try { + $triggerArgs = @{} + $applianceArgs + $triggerArgs.AccountToUse = $AccountToUse + $triggerArgs.ExtendedLogging = $true + if ($PSBoundParameters.ContainsKey('AssetToUse')) { $triggerArgs.AssetToUse = $AssetToUse } + if ($PSBoundParameters.ContainsKey('AssetPartition')) { $triggerArgs.AssetPartition = $AssetPartition } + if ($PSBoundParameters.ContainsKey('AssetPartitionId')) { $triggerArgs.AssetPartitionId = $AssetPartitionId } + + # Wait-LongRunningTask in safeguard-ps writes the "See extended logs: ..." + # line via Write-Host (Information stream, 6). -InformationVariable captures + # those records without polluting stdout. + $invokeArgs = @{} + $triggerArgs + $invokeArgs.InformationVariable = 'devloopTriggerInfo' + $invokeArgs.InformationAction = 'SilentlyContinue' + + switch ($Operation) { + 'CheckPassword' { $triggerText = Test-SafeguardAssetAccountPassword @invokeArgs } + 'ChangePassword' { $triggerText = Invoke-SafeguardAssetAccountPasswordChange @invokeArgs } + } + + $triggerInfo = @($devloopTriggerInfo | ForEach-Object { $_.MessageData.ToString() }) + $triggerTaskId = Get-TaskIdFromInformationMessages -Messages $triggerInfo + + Set-PhaseSuccess -Phase $triggerPhase -DurationMs $sw.ElapsedMilliseconds -Data ([ordered]@{ + taskId = $triggerTaskId + outputText = $triggerText + informationStream = $triggerInfo + }) + } + catch { + # Capture whatever Information records were emitted before the throw (the + # "See extended logs: ..." line is emitted before the exception in + # Wait-LongRunningTask's failure path). + try { $triggerInfo = @($devloopTriggerInfo | ForEach-Object { $_.MessageData.ToString() }) } catch { $triggerInfo = @() } + $triggerTaskId = Get-TaskIdFromInformationMessages -Messages $triggerInfo + + # SafeguardLongRunningTaskException carries the structured log array. + $exData = $null + $ex = $_.Exception + try { + if ($ex.PSObject.Properties['TaskLog'] -and $ex.TaskLog) { + $exData = @($ex.TaskLog | ForEach-Object { + [ordered]@{ Timestamp = $_.Timestamp; Status = $_.Status; Message = $_.Message } + }) + $triggerFailureLog = $exData + } + } catch { } + + Set-PhaseFailure -Phase $triggerPhase -DurationMs $sw.ElapsedMilliseconds -ErrorMessage $ex.Message -Data ([ordered]@{ + taskId = $triggerTaskId + informationStream = $triggerInfo + taskLog = $triggerFailureLog + }) + $exitCode = 3 + } + finally { + $sw.Stop() + } +} + +# ---- phase 4: log fetch ---------------------------------------------------- + +# In the trigger-failure path we still try the log fetch if we got a task ID, +# because the extended log is the most useful artifact for an agent to analyze. +# Exit code reflects the trigger failure (3), not the log fetch. +if ($mode -eq 'FullLoop' -and ($exitCode -eq 0 -or ($exitCode -eq 3 -and $triggerTaskId))) { + $sw = [System.Diagnostics.Stopwatch]::StartNew() + try { + if (-not $triggerTaskId) { + throw "Could not extract a task ID from the trigger Information stream. Expected a 'See extended logs: Get-SafeguardTaskLog ' message but none was captured." + } + Write-StatusLine "[log] taskId=$triggerTaskId" + $log = Get-SafeguardTaskLog @applianceArgs -TaskId $triggerTaskId + Set-PhaseSuccess -Phase $logPhase -DurationMs $sw.ElapsedMilliseconds -Data ([ordered]@{ + taskId = $triggerTaskId + log = $log + }) + } + catch { + Set-PhaseFailure -Phase $logPhase -DurationMs $sw.ElapsedMilliseconds -ErrorMessage $_.Exception.Message + if ($exitCode -eq 0) { $exitCode = 4 } + } + finally { + $sw.Stop() + } +} + +# ---- emit result ----------------------------------------------------------- + +$endedAt = Get-Date + +$result = [ordered]@{ + mode = $mode + scriptFile = $ScriptFile + schemaFile = $SchemaFile + platform = if ($PlatformToEdit) { $PlatformToEdit } else { $null } + operation = if ($Operation) { $Operation } else { $null } + account = if ($AccountToUse) { $AccountToUse } else { $null } + phases = @($validatePhase, $importPhase, $triggerPhase, $logPhase) + exitCode = $exitCode + startedAt = $startedAt.ToUniversalTime().ToString('o') + endedAt = $endedAt.ToUniversalTime().ToString('o') +} + +# Depth 100 because task-log JSON nests deeply (per-command Input/Output records). +$result | ConvertTo-Json -Depth 100 + +exit $exitCode diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 0000000..878d897 --- /dev/null +++ b/tools/README.md @@ -0,0 +1,395 @@ +# tools/ + +Tooling for the SafeguardCustomPlatform repo. + +| Script | Purpose | Audience | +|---|---|---| +| `TestTool.ps1` | Original human-facing upload + trigger script. Edit-in-place script with hard-coded variables. | Humans | +| `Build-SamplesIndex.ps1` | Regenerates `docs/agent-reference/samples-index.md` from `samples/` and `templates/`. | CI + agents | +| `Test-AgentLinks.ps1` | Validates relative links in `AGENTS.md` and `.agents/skills/*/SKILL.md` against `docs/agent-reference/`. | CI | +| `Invoke-PlatformDevLoop.ps1` | Structured dev-loop wrapper: validate → import → trigger → fetch task log. JSON output, phase-indexed exit codes. | Agents (Phase 3 `safeguard-ps-operations` skill) and humans | + +The remainder of this document covers `Invoke-PlatformDevLoop.ps1`. + +--- + +## Invoke-PlatformDevLoop.ps1 + +Wraps the iterative custom-platform dev loop into a single call. Always emits +one JSON document on stdout describing each phase, and exits with a +phase-indexed exit code. Designed to be cited line-for-line by the +`safeguard-ps-operations` agent skill. + +### Modes + +Selected by mutually-exclusive switches. Specifying more than one is a +programmer error and throws. + +| Mode | Switch | What runs | Appliance contact | +|---|---|---|---| +| **SchemaOnly** | `-SchemaOnly` | local `Test-Json` against `schema/custom-platform-script.schema.json` | none | +| **ValidateOnly** | `-ValidateOnly` | schema + `Test-SafeguardCustomPlatformScript` | yes (no writes) | +| **NoTrigger** | `-NoTrigger` | schema + `Test-…` + `Import-SafeguardCustomPlatformScript` | yes (writes platform script) | +| **FullLoop** | _(default)_ | schema + `Test-…` + `Import-…` + trigger + fetch task log | yes (writes platform script + runs trigger) | + +### Parameters + +| Parameter | Required for | Notes | +|---|---|---| +| `-ScriptFile` | every mode | Path to the custom-platform JSON. | +| `-PlatformToEdit` | NoTrigger, FullLoop | Custom platform name or numeric ID to import the script into. | +| `-Operation` | FullLoop | `CheckPassword` (uses `Test-SafeguardAssetAccountPassword`) or `ChangePassword` (uses `Invoke-SafeguardAssetAccountPasswordChange`). Both pass `-ExtendedLogging`. | +| `-AccountToUse` | FullLoop | Account name or ID. Pass-through to the trigger cmdlet. | +| `-AssetToUse` | optional | Asset disambiguator. Pass-through. | +| `-AssetPartition` / `-AssetPartitionId` | optional | Asset-partition disambiguators. Pass-through. | +| `-SchemaFile` | optional | Override JSON Schema path. Defaults to `/schema/custom-platform-script.schema.json` relative to this script. | +| `-Appliance`, `-AccessToken`, `-Insecure` | optional | Pass-through to safeguard-ps cmdlets. Usually you connect once via `Connect-Safeguard -DeviceCode` (or `-Browser`) and let the cached `$Global:SafeguardSession` carry through. | + +### Authentication + +`Invoke-PlatformDevLoop.ps1` does **not** call `Connect-Safeguard` itself. +Connect once before invoking, preferring `-DeviceCode` (PKCE; prints a +verification URL and short code rather than launching a local browser), +falling back to `-Browser` if the appliance does not have the Device Code +grant enabled: + +```powershell +Connect-Safeguard -Appliance -Insecure -DeviceCode +``` + +If you bypass the cached session by passing `-AccessToken`, that token is +forwarded to every safeguard-ps cmdlet the script calls. + +### Output JSON shape + +One JSON document on stdout per invocation. Verbose progress (the +`[validate] …`, `[import] …`, `[trigger] …`, `[log] …` lines) goes to +stderr so stdout stays parseable. Top-level shape: + +```jsonc +{ + "mode": "SchemaOnly|ValidateOnly|NoTrigger|FullLoop", + "scriptFile": "", + "schemaFile": "", + "platform": "", + "operation": "CheckPassword|ChangePassword|null", + "account": "", + "phases": [ , , , ], + "exitCode": , + "startedAt": "", + "endedAt": "" +} +``` + +Every phase has the same skeleton: + +```jsonc +{ + "name": "validate|import|trigger|log", + "status": "success|failed|skipped", + "durationMs": , + "error": "" | null, + "data": | null +} +``` + +#### `phases[0]` (validate) `data` + +* SchemaOnly: `{ "schemaOnly": true }`. +* ValidateOnly / NoTrigger / FullLoop: the platform-preview object returned + by `Test-SafeguardCustomPlatformScript`. Notable fields include + `SupportedOperations` (array of operation names), `ConnectionProperties` + (boolean capability flags), `PasswordFeatureProperties`, + `CustomScriptProperties.Parameters` (flat list of `{Name, DefaultValue, + Type, TaskName}`). + + Excerpt from a real run against `samples/ssh/generic-linux/GenericLinux.json`: + + ```jsonc + { + "Id": 0, + "PlatformType": "Custom", + "Name": "ExampleLinuxScript", + "SupportedOperations": [ + "TestConnection", "CheckPassword", "ChangePassword", "DiscoverSshHostKey" + ], + "CustomScriptProperties": { + "HasScript": true, + "Parameters": [ + { "Name": "Port", "DefaultValue": "22", "Type": "Integer", "TaskName": "TestConnection" }, + { "Name": "FuncUserName", "DefaultValue": "", "Type": "String", "TaskName": "TestConnection" }, + // ... + ] + } + } + ``` + +#### `phases[1]` (import) `data` + +The updated custom-platform object as returned by +`Import-SafeguardCustomPlatformScript` (which internally re-reads the +platform after the PUT). Same shape as the validate preview but with +real `Id` and the platform's configured display name. + +Failure example (real output, exit 2): + +```jsonc +{ + "name": "import", + "status": "failed", + "durationMs": 38, + "error": "Unable to find custom platform matching 'NoSuchPlatform_DevLoopTest_123'", + "data": null +} +``` + +#### `phases[2]` (trigger) `data` + +```jsonc +{ + "taskId": "", // extracted from the Information stream + "outputText": "", + "informationStream": [ "" ] +} +``` + +The **task GUID is not on the cmdlet's return value.** safeguard-ps's +`Wait-LongRunningTask` emits the line +`See extended logs: Get-SafeguardTaskLog ` via `Write-Host` +(Information stream, 6). The script captures that stream with +`-InformationVariable` and regex-matches `Get-SafeguardTaskLog\s+` to +extract the ID. The cmdlet's return value (`outputText`) is the +human-readable status summary — useful for display, but not parseable. + +Real success excerpt (CheckPassword against an Ubuntu 24.04 asset): + +```text +Task completed successfully. + 6/2/2026 11:55:35 PM Queued Queuing task. + 6/2/2026 11:55:35 PM Running Starting task. + 6/2/2026 11:55:35 PM Checking Verifying Password. + 6/2/2026 11:55:35 PM Connecting Connecting with asset ubtu2404-1.dan.test (...) + ... + 6/2/2026 11:55:38 PM Finalizing The password for account root matches the password on the asset. + 6/2/2026 11:55:38 PM Success Task completed successfully. +``` + +On task **failure**, safeguard-ps throws `Ex.SafeguardLongRunningTaskException` +(constructed by `New-LongRunningTaskException`) which carries a typed +`TaskLog` array. The dev-loop +script catches that, surfaces `error = exception message`, and adds a +`taskLog` field with the structured entries. + +Real failure-path output (CheckPassword against an account whose stored +password was deliberately wrong): + +```jsonc +{ + "name": "trigger", + "status": "failed", + "durationMs": 4590, + "error": "The current account password does not match the password on the asset.", + "data": { + "taskId": "3e5c7705-5eea-11f1-bfb2-df700470d6bc", + "informationStream": [ + " 6/3/2026 1:19:18 AM Queued Queuing task.", + " 6/3/2026 1:19:19 AM Connecting Connecting with asset ubtu2404-1.dan.test (...)", + " 6/3/2026 1:19:21 AM PasswordMismatch The password for account root does not match the password on the asset.", + " 6/3/2026 1:19:21 AM PasswordMismatch The current account password does not match the password on the asset.", + "See extended logs: Get-SafeguardTaskLog 3e5c7705-5eea-11f1-bfb2-df700470d6bc" + ], + "taskLog": [ + { "Timestamp": "6/3/2026 1:19:18 AM", "Status": "Queued", "Message": "Queuing task." }, + { "Timestamp": "6/3/2026 1:19:19 AM", "Status": "Connecting", "Message": "Connecting with asset ubtu2404-1.dan.test (...)" }, + { "Timestamp": "6/3/2026 1:19:21 AM", "Status": "PasswordMismatch", "Message": "The password for account root does not match the password on the asset." }, + { "Timestamp": "6/3/2026 1:19:21 AM", "Status": "PasswordMismatch", "Message": "The current account password does not match the password on the asset." } + ] + } +} +``` + +The `taskLog` element shape is fixed: `{ Timestamp, Status, Message }`, +defined by +`PangaeaAppliance\src\Data\Transfer\V2\PlatformTasks\TaskLog.cs`. `Status` +is an enum (`PangaeaAppliance\src\Data\Transfer\V2\PlatformTasks\TaskStatus.cs`) +with 25 stable values including `Queued`, `Running`, `Checking`, +`Connecting`, `Changing`, `Saving`, `Finalizing`, `Success`, `Failure`, +`Cancelled`, `Skipped`, `PasswordMismatch`, `SshHostKeyMismatch`, +`SshKeyMismatch`, `ApiKeyMismatch`, `FileMismatch`, `Discovering`, +`Submitted`, and assorted `Service*`/`Task*` outcomes. The first +mismatch-class entry typically pins the failure to a specific +account/asset; the last entry is the summary message that is also +surfaced as the phase `error`. + +#### `phases[3]` (log) `data` + +```jsonc +{ + "taskId": "", + "log": [ , , ... ] +} +``` + +`log` is the array returned by `Get-SafeguardTaskLog -TaskId `. Each +entry has shape `{Recorded, Level, Event}`. The appliance exposes named +logs via two endpoints (`GET /Core/v4/TaskLogs/{taskId}` lists the available +log names; `GET /Core/v4/TaskLogs/{taskId}/{logName}` returns the events +for that named log). When `Get-SafeguardTaskLog` is called without a +`-LogName`, safeguard-ps's `Get-SafeguardTaskLog` iterates the listed +logs and emits a synthetic separator entry between each: + +```jsonc +{ "Recorded": "", "Level": "", "Event": "--- ---" } +``` + +The two log names produced by SPP for platform tasks are stable string +constants: + +* `Operation` — high-level platform-script execution log +* `SshCommunication` — raw SSH transport-level frames (when applicable) + +Both are defined in +`Hercules\Source\Rsms.Public\Constants\Logging.cs:14-15`. + +Real entry shapes: + +```jsonc +// Section header (synthesised by safeguard-ps, not the appliance) +{ "Recorded": "", "Level": "", "Event": "--- Operation ---" } +// Operation entry +{ "Recorded": "2026-06-02T23:55:35.4880454Z", + "Level": "Information", + "Event": "Initializing CheckPassword platform task 8c1e2bd4-…\r\n" } +// SshCommunication entry +{ "Recorded": "2026-06-02T23:55:36.6461523Z", + "Level": "Debug", + "Event": "Send : grep -q '^root:' /etc/passwd; echo \"CHECKUSER=$?\"\r\n" } +``` + +**Secret handling.** SPP server-side redacts known credential parameters as +the literal string `**secret**` before returning the log. The redaction +constant is defined in +`Hercules\Source\Hercules.DevKit\Constants\ParameterConstants.cs:5` +(`public const string Secret = "**secret**"`). +Agents should NOT attempt to recover real values from these markers. +Custom-script authors who introduce new secret parameters should declare +them with `Type: "Secret"` so SPP applies the same redaction. + +The log fetch is best-effort even when the trigger fails: if the trigger +phase fails (status = failed, exit 3) **and** a task GUID was extracted, +the log phase still runs to capture the extended log. The exit code +remains 3 (the trigger failure), not 4. + +#### Exit-4 error shape + +`Get-SafeguardTaskLog` raises a terminating error when the task ID is not +recognised by the appliance: + +``` +OperationStopped: 404: Not Found -- 0: +``` + +The script catches that, sets `phases[3].status = "failed"` with that +message in `phases[3].error`, and exits 4. The same error path covers +the case where the trigger ran without `-ExtendedLogging`: in that mode +safeguard-ps's `Wait-LongRunningTask` only emits the +`See extended logs: ` Information-stream line when extended logging +is on, so the dev-loop script always passes +`-ExtendedLogging` to the trigger cmdlet — making this exit code primarily +a guard against transient appliance issues (revoked session, log-archive +churn) rather than a normal authoring failure. + +### Exit-code contract + +The script exits with the **index of the first failed phase**, or 0 on +full success. A skipped phase does not affect the exit code. + +| Exit | Meaning | Verified | +|---|---|---| +| 0 | All non-skipped phases succeeded. | ✓ live (SchemaOnly, ValidateOnly, NoTrigger, FullLoop) | +| 1 | Validate phase failed (local schema OR appliance `Test-SafeguardCustomPlatformScript`). | ✓ live (local schema reject) | +| 2 | Import phase failed. | ✓ live (`Unable to find custom platform matching ''`) | +| 3 | Trigger phase failed. | ✓ live (`PasswordMismatch` against ubtu2404-1.dan.test) | +| 4 | Log fetch phase failed. | ✓ via cmdlet probe (see below) | + +The script emits its JSON on stdout **even on phase failure** so callers +can read structured details. The script throws (no JSON, non-zero PS exit +code) only on programmer error: + +* mutually-exclusive mode switches both set +* `-ScriptFile` not readable +* `-SchemaFile` not found +* mode requires appliance contact and there is no `Connect-Safeguard` + session and no `-AccessToken` +* mode requires `-PlatformToEdit`, `-Operation`, or `-AccountToUse` and + the parameter is missing + +### Examples (verified real output) + +```powershell +# 1. Local schema check only — no appliance. +.\tools\Invoke-PlatformDevLoop.ps1 ` + -ScriptFile .\samples\ssh\generic-linux\GenericLinux.json ` + -SchemaOnly +# Exit 0; phases[0].status = success; phases 1..3 skipped. + +# 2. Appliance dry-run (validate only). +Connect-Safeguard -Appliance 192.168.117.15 -Insecure -DeviceCode +.\tools\Invoke-PlatformDevLoop.ps1 ` + -ScriptFile .\samples\ssh\generic-linux\GenericLinux.json ` + -ValidateOnly -Insecure +# Exit 0; phases[0].data carries the full platform preview. + +# 3. Validate + import, no trigger. +.\tools\Invoke-PlatformDevLoop.ps1 ` + -ScriptFile .\samples\ssh\generic-linux\GenericLinux.json ` + -PlatformToEdit "DELETELINUX" -NoTrigger -Insecure +# Exit 0; phases[1].data carries the updated platform object. + +# 4. Full loop: CheckPassword with extended logging. +.\tools\Invoke-PlatformDevLoop.ps1 ` + -ScriptFile .\samples\ssh\generic-linux\GenericLinux.json ` + -PlatformToEdit "DELETELINUX" ` + -Operation CheckPassword -AccountToUse 10 -Insecure +# Exit 0; ~5s end-to-end against an SSH target; +# phases[2].data.taskId == phases[3].data.taskId; phases[3].data.log +# contains both SshCommunication and Operation sections. + +# 5. Failure example: import into a non-existent platform. +.\tools\Invoke-PlatformDevLoop.ps1 ` + -ScriptFile .\samples\ssh\generic-linux\GenericLinux.json ` + -PlatformToEdit "NoSuchPlatform_DevLoopTest_123" -NoTrigger -Insecure +# Exit 2; phases[0]=success, phases[1]=failed +# error: "Unable to find custom platform matching 'NoSuchPlatform_DevLoopTest_123'" + +# 6. Failure example: CheckPassword with a wrong stored credential. +.\tools\Invoke-PlatformDevLoop.ps1 ` + -ScriptFile .\samples\ssh\generic-linux\GenericLinux.json ` + -PlatformToEdit "DELETELINUX" ` + -Operation CheckPassword -AccountToUse 10 -Insecure +# Exit 3; phases 0..1 = success, phase 2 (trigger) = failed with structured +# taskLog, phase 3 (log) = success (best-effort fetch still ran because +# the taskId was extractable from the Information stream). +``` + +### Versions verified + +* PowerShell 7.6.2 +* `safeguard-ps` 8.4.3 (minimum — enforced at script start; earlier versions lack `-ExtendedLogging` on `Invoke-SafeguardAssetSshHostKeyDiscovery`) +* SPP appliance reachable at the time of authoring. + +### Cmdlet citations + +Cmdlets the script calls. Syntax sourced from `Get-Help -Full` +against the installed module — not paraphrased from memory: + +* `Test-Json` (`Microsoft.PowerShell.Utility`, PS 7+) — local schema check +* `Test-SafeguardCustomPlatformScript` — POSTs the script to `Core/Platforms/ValidateScript/Raw`; returns the platform-preview object the script would produce +* `Import-SafeguardCustomPlatformScript` — PUTs the script to `Core/Platforms/{Id}/Script/Raw`, then re-reads the platform via `Get-SafeguardCustomPlatform` and returns it +* `Test-SafeguardAssetAccountPassword` — CheckPassword trigger (calls `POST Core/v4/AssetAccounts/{id}/CheckPassword?extendedLogging=true`; appliance handler `AssetAccountsController_Tasks.cs::CheckPasswordAsync`) +* `Invoke-SafeguardAssetAccountPasswordChange` — ChangePassword trigger (calls `POST Core/v4/AssetAccounts/{id}/ChangePassword?extendedLogging=true`; appliance handler `AssetAccountsController_Tasks.cs::ChangePasswordAsync`) +* `Get-SafeguardTaskLog` — when no `-LogName` is given, calls `GET Core/TaskLogs/{taskId}` to list available logs, then iterates each via `GET Core/TaskLogs/{taskId}/{logName}`; emits a synthetic `--- ---` separator entry between sections + +The trigger cmdlets call `Invoke-SafeguardMethod -LongRunningTask` under +the hood, which polls until `RequestStatus.PercentComplete == 100` and +emits the extended-log hint via `Write-Host` from `Wait-LongRunningTask`. diff --git a/tools/Test-AgentLinks.ps1 b/tools/Test-AgentLinks.ps1 new file mode 100644 index 0000000..a5f7b0b --- /dev/null +++ b/tools/Test-AgentLinks.ps1 @@ -0,0 +1,109 @@ +#requires -Version 5.1 +<# +.SYNOPSIS + Validates that every relative markdown link in AGENTS.md, every + .agents/skills//SKILL.md, and every docs/agent-reference/*.md + file resolves to a real file on disk. + +.DESCRIPTION + The agent skill system depends on agents being able to follow + citations from AGENTS.md and SKILL.md files. A broken citation makes + the affected skill silently unreliable. CI runs this script on every + pull request and fails the build on any unresolved relative link. + + Rules: + - Absolute URLs (http://, https://, mailto:) are skipped. + - Anchors (#fragment) and query strings (?...) are stripped before + resolving. + - Paths are resolved relative to the markdown file that contains + the link. + - In-page anchors (links starting with '#') are skipped — anchor + validity is not in scope for Phase 1. + + The script exits 0 on success, 1 on any broken link. + +.PARAMETER RepoRoot + Path to the repository root. Defaults to the parent of this script. +#> +[CmdletBinding()] +param( + [string] $RepoRoot +) + +$ErrorActionPreference = 'Stop' + +if (-not $RepoRoot) { + $RepoRoot = Split-Path -Parent $PSScriptRoot + if (-not $RepoRoot) { + $RepoRoot = (Resolve-Path (Join-Path $PSScriptRoot '..')).Path + } +} +$RepoRoot = (Resolve-Path $RepoRoot).Path + +# Gather the files this check covers. +$targets = @() +$agentsMd = Join-Path $RepoRoot 'AGENTS.md' +if (Test-Path $agentsMd) { $targets += Get-Item $agentsMd } + +$skillsRoot = Join-Path $RepoRoot '.agents/skills' +if (Test-Path $skillsRoot) { + $targets += Get-ChildItem -Path $skillsRoot -Filter 'SKILL.md' -Recurse -File +} + +$refRoot = Join-Path $RepoRoot 'docs/agent-reference' +if (Test-Path $refRoot) { + $targets += Get-ChildItem -Path $refRoot -Filter '*.md' -Recurse -File +} + +if ($targets.Count -eq 0) { + Write-Host 'No agent-facing markdown files found; nothing to check.' + exit 0 +} + +# Markdown inline links: [text](target). Image links [! ...] start with !, +# but the link target shape is identical so this regex matches both. +$linkPattern = '\[(?[^\]]*)\]\((?[^)\s]+)(?:\s+"[^"]*")?\)' +$broken = @() + +foreach ($file in $targets) { + $content = Get-Content $file.FullName -Raw + $fileDir = Split-Path -Parent $file.FullName + $relFile = $file.FullName.Substring($RepoRoot.Length).TrimStart('\','/') -replace '\\','/' + + $matches = [regex]::Matches($content, $linkPattern) + foreach ($m in $matches) { + $target = $m.Groups['target'].Value.Trim() + + # Skip absolute URLs and mailto. + if ($target -match '^(https?:|mailto:|ftp:|tel:)') { continue } + + # Skip in-page anchors. + if ($target.StartsWith('#')) { continue } + + # Strip anchor and query for filesystem resolution. + $path = $target -replace '#.*$', '' -replace '\?.*$', '' + if (-not $path) { continue } + + # Resolve relative to the file that contains the link. + $candidate = Join-Path $fileDir $path + try { + $resolved = (Resolve-Path -LiteralPath $candidate -ErrorAction Stop).Path + if (-not (Test-Path -LiteralPath $resolved)) { + $broken += [pscustomobject]@{ File = $relFile; Link = $target } + } + } catch { + $broken += [pscustomobject]@{ File = $relFile; Link = $target } + } + } +} + +if ($broken.Count -gt 0) { + Write-Host "Broken links found:" -ForegroundColor Red + foreach ($b in $broken) { + Write-Host (" {0} -> {1}" -f $b.File, $b.Link) -ForegroundColor Red + } + exit 1 +} + +Write-Host ("Checked {0} agent-facing files; all relative links resolve." -f $targets.Count) +exit 0