diff --git a/capabilities/web-security/agents/web-security.md b/capabilities/web-security/agents/web-security.md index 2e4f217..41c248e 100644 --- a/capabilities/web-security/agents/web-security.md +++ b/capabilities/web-security/agents/web-security.md @@ -81,7 +81,7 @@ Use tools proactively when they reduce uncertainty or verify a finding. Match th - Use `execute_http` for standard HTTP work: reconnaissance, payload delivery, session-based testing, and response analysis. `reset_http_session` clears cookies/state; `get_http_cookies` inspects the jar. - For fuzzing, wordlist-based attacks, complex encoding chains, multi-request scripting, or any task requiring shell pipelines — use `bash` with `curl`, `python`, `ffuf`, or other CLI tools directly. `execute_http` is not suited for high-volume or programmatic testing. -- Use browser automation only when a real browser is required: DOM behavior, client-side execution, login flows, clickjacking, screenshots, or JavaScript-driven state changes. Prefer the `agent-browser` CLI when it is available on the current `PATH`; use the `agent_browser_*` MCP tools as the fallback. +- Use browser automation only when a real browser is required: DOM behavior, client-side execution, login flows, clickjacking, screenshots, or JavaScript-driven state changes. Prefer the `agent-browser` CLI when it is available on the current `PATH`; use the `agent_browser_*` MCP tools as the fallback. For XSS sink confirmation, use `agent_browser_xss_verifier_start` before triggering the payload and `agent_browser_xss_verifier_check` after rendering to prove controlled JavaScript execution with a token. - Use Protoscope when inspecting or crafting protobuf payloads. Prefer the local `protoscope` CLI when it is available on the current `PATH`; use the `protoscope_*` MCP tools as the fallback. - Use `store_credential` and `get_credential` to preserve auth state instead of manually re-entering secrets or tokens. Also supports TOTP/MFA via `add_totp_credential` and `generate_mfa_code`. - Use `assess_confidence` before claiming a vulnerability so your report is grounded in demonstrated evidence rather than a lead or hypothesis. @@ -102,7 +102,7 @@ You may also have tools from MCP servers. Check your tool schema for what's avai - **Proxy tools (Caido, Burp):** Check health first. If it fails, fall back to built-in tools and do not retry. Replay tools (e.g. `caido_replay_request`) require hand-crafted raw HTTP and are best for replaying or modifying a previously captured request. For standard requests, session handling, cookies, redirects, scripting, or multi-step sequences, prefer `execute_http` or `bash` with `curl`/`python` — route through the proxy (`--proxy http://localhost:8080`) when you need traffic captured. - **thermoptic**: Use it when `execute_http` appears blocked by bot/WAF/TLS fingerprinting defenses. Check health first; if unavailable, fall back immediately. - **jxscout**: Finds **gadgets**, not vulnerabilities. Always trace data flow and confirm exploitability before reporting. Load the `jxscout-security-research` skill for the full workflow guide. -- **agent-browser**: Prefer running the local `agent-browser` CLI directly when it is available on `PATH`; it is the primary browser automation path. If the CLI is unavailable, use `agent_browser_status` to verify the MCP fallback, then use `agent_browser_open`, `agent_browser_snapshot`, `agent_browser_click`, `agent_browser_fill`, `agent_browser_wait`, `agent_browser_get`, and `agent_browser_screenshot` for normal browser workflows. Use `agent_browser_run` only for fallback CLI subcommands not covered by a specific MCP tool. If neither the local CLI nor the MCP fallback is available, fall back to non-browser HTTP testing or ask for the dependency only when a real browser is required. +- **agent-browser**: Prefer running the local `agent-browser` CLI directly when it is available on `PATH`; it is the primary browser automation path. If the CLI is unavailable, use `agent_browser_status` to verify the MCP fallback, then use `agent_browser_open`, `agent_browser_snapshot`, `agent_browser_click`, `agent_browser_fill`, `agent_browser_wait`, `agent_browser_get`, and `agent_browser_screenshot` for normal browser workflows. Use `agent_browser_xss_verifier_start`, `agent_browser_xss_verifier_check`, and `agent_browser_xss_verifier_reset` for XSS execution proof; adapt one of the returned token payloads to the sink, then check for `CONFIRMED`. Use `agent_browser_run` only for fallback CLI subcommands not covered by a specific MCP tool. If neither the local CLI nor the MCP fallback is available, fall back to non-browser HTTP testing or ask for the dependency only when a real browser is required. - **protoscope**: Prefer running the local `protoscope` CLI directly when it is available on `PATH`; it is the primary protobuf inspection and assembly path. If the CLI is unavailable, use `protoscope_status` to verify the MCP fallback. Use `protoscope_inspect_file` or `protoscope_inspect_hex` to decode binary protobuf payloads, and `protoscope_assemble_text` or `protoscope_assemble_file` to build binary protobuf bytes from Protoscope text. Use descriptor-set and message-type options when available to improve field names and enum output. - **hackerone**: Query HackerOne programs, scopes, reports, and hacktivity. Run `hackerone_health` first to verify credentials. Use `hackerone_get_program_scope` to enumerate in-scope assets before testing. Use `hackerone_search_hacktivity` to study previously disclosed vulnerabilities in a program. Use `hackerone_submit_report` only after the full reporting pipeline completes (assess_confidence → report-preflight → exploit-verifier → report-writer). Requires `H1_USERNAME` and `H1_API_TOKEN` env vars. - **jira**: Create internal Jira remediation tickets from validated findings. Run `jira_health` first to verify credentials. Use `jira_get_create_metadata` before creating issues when the project or issue type is uncertain. Use `jira_create_issue` only after the full reporting pipeline completes; include the validated report body, severity/priority mapping, and links to Dreadnode evidence or artifacts. Requires `JIRA_BASE_URL`, `JIRA_EMAIL`, and `JIRA_API_TOKEN` env vars. diff --git a/capabilities/web-security/mcp/agent_browser.py b/capabilities/web-security/mcp/agent_browser.py index 70fedd1..1dd4f7d 100644 --- a/capabilities/web-security/mcp/agent_browser.py +++ b/capabilities/web-security/mcp/agent_browser.py @@ -21,7 +21,10 @@ from __future__ import annotations import asyncio +import html +import json import os +import secrets import shlex import shutil from typing import Annotated @@ -32,6 +35,23 @@ MAX_OUTPUT_CHARS = int(os.environ.get("AGENT_BROWSER_MAX_OUTPUT_CHARS", "50000")) DEFAULT_TIMEOUT = int(os.environ.get("AGENT_BROWSER_TIMEOUT", "60")) +_XSS_VERIFIER_SESSIONS: dict[str, dict[str, str]] = {} + + +def _drop_empty(value: object) -> object: + if isinstance(value, dict): + return { + key: cleaned + for key, item in value.items() + if (cleaned := _drop_empty(item)) not in (None, "", [], {}) + } + if isinstance(value, list): + return [ + cleaned + for item in value + if (cleaned := _drop_empty(item)) not in (None, "", [], {}) + ] + return value def _truncate(text: str) -> str: @@ -69,6 +89,284 @@ def _missing_dependency_message() -> str: ) +def _raise_agent_browser_error(result: str) -> None: + if result.startswith("Error:") or result.startswith("Error (exit "): + raise RuntimeError(result) + + +async def _eval_browser_json( + js: str, + *, + global_args: list[str] | None = None, + timeout: int = DEFAULT_TIMEOUT, +) -> dict: + result = await _run_agent_browser( + ["eval", js], + global_args=global_args, + timeout=timeout, + ) + _raise_agent_browser_error(result) + try: + parsed = json.loads(result) + except json.JSONDecodeError as exc: + raise RuntimeError( + f"agent-browser eval returned non-JSON output: {result[:500]}" + ) from exc + if isinstance(parsed, str): + try: + parsed = json.loads(parsed) + except json.JSONDecodeError as exc: + raise RuntimeError( + f"agent-browser eval returned a JSON string, but it did not contain a JSON object: {parsed[:500]}" + ) from exc + if not isinstance(parsed, dict): + raise RuntimeError("agent-browser eval returned JSON, but not an object") + return parsed + + +def _build_xss_canary_script(token: str) -> str: + token_js = json.dumps(token) + return f""" +(function() {{ + var token = {token_js}; + var previous = window.__dreadnodeXssVerifier; + if (previous && previous.token === token && previous.armed) {{ + return JSON.stringify({{status: "already_armed", token: token, url: location.href}}); + }} + if (previous && typeof previous.restore === "function") {{ + previous.restore(); + }} + + var state = {{ + token: token, + armed: true, + armedAt: new Date().toISOString(), + url: location.href, + events: [], + csp: [] + }}; + + function toText(value) {{ + try {{ + if (typeof value === "string") return value; + return JSON.stringify(value); + }} catch (e) {{ + return String(value); + }} + }} + + function record(channel, value, detail) {{ + var text = toText(value); + state.events.push({{ + channel: channel, + value: text.slice(0, 500), + matched: text.indexOf(token) !== -1, + detail: detail || {{}}, + url: location.href, + at: new Date().toISOString() + }}); + if (state.events.length > 50) state.events.shift(); + }} + + window.__dreadnodeXssProof = function(value, detail) {{ + record("proof-function", value, detail || {{}}); + return "recorded"; + }}; + + var originalAlert = window.alert; + var originalConfirm = window.confirm; + var originalPrompt = window.prompt; + var originalConsole = {{}}; + window.alert = function(message) {{ record("alert", message); return undefined; }}; + window.confirm = function(message) {{ record("confirm", message); return false; }}; + window.prompt = function(message, defaultValue) {{ + record("prompt", message, {{defaultValue: toText(defaultValue).slice(0, 100)}}); + return null; + }}; + + ["log", "info", "warn", "error"].forEach(function(level) {{ + var original = console[level]; + originalConsole[level] = original; + console[level] = function() {{ + var args = Array.prototype.slice.call(arguments); + var text = args.map(toText).join(" "); + if (text.indexOf(token) !== -1) record("console." + level, text); + if (typeof original === "function") return original.apply(console, args); + }}; + }}); + + window.addEventListener("message", function(event) {{ + var text = toText(event.data); + if (text.indexOf(token) !== -1) {{ + record("postMessage", text, {{origin: event.origin || ""}}); + }} + }}); + + window.addEventListener("securitypolicyviolation", function(event) {{ + state.csp.push({{ + violatedDirective: event.violatedDirective || "", + blockedURI: event.blockedURI || "", + sample: event.sample || "", + at: new Date().toISOString() + }}); + if (state.csp.length > 20) state.csp.shift(); + }}); + + try {{ + new MutationObserver(function(mutations) {{ + mutations.forEach(function(mutation) {{ + mutation.addedNodes.forEach(function(node) {{ + if (!node || node.nodeType !== 1) return; + var tag = String(node.tagName || "").toLowerCase(); + if (tag === "script") {{ + record("script-node", (node.src || "") + " " + (node.textContent || "").slice(0, 500)); + }} + }}); + }}); + }}).observe(document.documentElement, {{childList: true, subtree: true}}); + }} catch (e) {{ + record("observer-error", String(e)); + }} + + state.restore = function() {{ + window.alert = originalAlert; + window.confirm = originalConfirm; + window.prompt = originalPrompt; + Object.keys(originalConsole).forEach(function(level) {{ + console[level] = originalConsole[level]; + }}); + }}; + window.__dreadnodeXssVerifier = state; + return JSON.stringify({{status: "armed", token: token, url: location.href, armedAt: state.armedAt}}); +}})() +""" + + +def _read_xss_canary_script() -> str: + return """ +(function() { + var state = window.__dreadnodeXssVerifier; + if (!state || !state.armed) return JSON.stringify({armed: false, url: location.href}); + return JSON.stringify({ + armed: true, + token: state.token, + armedAt: state.armedAt, + url: location.href, + events: state.events || [], + csp: state.csp || [] + }); +})() +""" + + +def _reset_xss_canary_script() -> str: + return """ +(function() { + var state = window.__dreadnodeXssVerifier; + if (state && typeof state.restore === "function") state.restore(); + delete window.__dreadnodeXssVerifier; + delete window.__dreadnodeXssProof; + return JSON.stringify({status: "reset", url: location.href}); +})() +""" + + +def _xss_payload_examples(token: str) -> dict[str, str]: + proof_call = ( + f'window.__dreadnodeXssProof({json.dumps(token)},{{source:"xss-payload"}})' + ) + attr_proof = html.escape(proof_call, quote=True) + dialog_value = "__DN_XSS_PROOF__:" + token + return { + "script_tag": f"", + "event_handler": f"", + "svg_onload": f"", + "javascript_url": f"javascript:{proof_call}", + "dialog": f"", + "console": f"", + "post_message": f"", + } + + +def _xss_verdict(state: dict, expected_token: str) -> dict: + if not state.get("armed"): + return { + "verified": False, + "verdict": "CANARY_LOST", + "confidence": "none", + "reason": "The verifier canary is not present in the current page. Re-arm it after navigation on the page where the payload renders.", + "url": state.get("url"), + } + + observed_token = state.get("token") + if observed_token != expected_token: + return { + "verified": False, + "verdict": "TOKEN_MISMATCH", + "confidence": "none", + "reason": "The page contains a different verifier token than the token being checked.", + "expected_token": expected_token, + "observed_token": observed_token, + "url": state.get("url"), + } + + events = state.get("events", []) + matched = [event for event in events if event.get("matched")] + proof_events = [ + event + for event in matched + if event.get("channel") + in { + "proof-function", + "alert", + "confirm", + "prompt", + "console.log", + "console.info", + "console.warn", + "console.error", + "postMessage", + } + ] + script_events = [ + event for event in matched if event.get("channel") == "script-node" + ] + + if proof_events: + return { + "verified": True, + "verdict": "CONFIRMED", + "confidence": "high", + "reason": "A payload-controlled proof token was observed from JavaScript running in the browser page context.", + "url": state.get("url"), + "evidence": proof_events[:10], + "event_count": len(events), + "csp": state.get("csp", []), + } + + if script_events: + return { + "verified": False, + "verdict": "PARTIAL", + "confidence": "medium", + "reason": "A script element containing the proof token was added to the DOM, but JavaScript execution did not return the token through an instrumented proof channel.", + "url": state.get("url"), + "evidence": script_events[:10], + "event_count": len(events), + "csp": state.get("csp", []), + } + + return { + "verified": False, + "verdict": "NOT_DETECTED", + "confidence": "none", + "reason": "No payload-controlled proof token was observed. The payload may not have executed, may have been sanitized, may need interaction, or may have rendered after navigation.", + "url": state.get("url"), + "event_count": len(events), + "csp": state.get("csp", []), + } + + async def _run_agent_browser( args: list[str], *, @@ -217,7 +515,9 @@ async def agent_browser_press( timeout: Annotated[int, "Command timeout in seconds"] = DEFAULT_TIMEOUT, ) -> str: """Press a keyboard key.""" - return await _run_agent_browser(["press", key], global_args=global_args, timeout=timeout) + return await _run_agent_browser( + ["press", key], global_args=global_args, timeout=timeout + ) @mcp.tool @@ -305,5 +605,116 @@ async def agent_browser_close( return await _run_agent_browser(["close"], global_args=global_args, timeout=timeout) +@mcp.tool +async def agent_browser_xss_verifier_start( + label: Annotated[ + str, + "Local verifier label for tracking multiple browser sessions", + ] = "default", + global_args: Annotated[ + list[str] | None, + "Optional agent-browser global CLI flags for the target session", + ] = None, + timeout: Annotated[int, "Command timeout in seconds"] = DEFAULT_TIMEOUT, +) -> dict: + """Arm a token-based XSS execution verifier in the current browser page. + + The verifier proves controlled JavaScript execution, not mere reflection. + It returns an unguessable token and payload templates that send that token + back through instrumented browser channels. + """ + token = secrets.token_urlsafe(16) + state = await _eval_browser_json( + _build_xss_canary_script(token), + global_args=global_args, + timeout=timeout, + ) + _XSS_VERIFIER_SESSIONS[label] = { + "token": token, + "global_args": json.dumps(global_args or []), + } + return _drop_empty( + { + "status": state.get("status", "armed"), + "label": label, + "token": token, + "url": state.get("url"), + "payloads": _xss_payload_examples(token), + "next_step": "Inject or adapt one payload in the suspected XSS sink, trigger rendering in this same browser session, then call agent_browser_xss_verifier_check.", + "limitations": [ + "Re-arm after page navigation because JavaScript context is page-scoped.", + "Blind XSS requires an out-of-band callback URL instead of this browser-local verifier.", + "A CONFIRMED verdict requires the payload to return the proof token through a proof function, dialog, console, or postMessage channel.", + ], + } + ) + + +@mcp.tool +async def agent_browser_xss_verifier_check( + label: Annotated[ + str, + "Verifier label from agent_browser_xss_verifier_start", + ] = "default", + token: Annotated[ + str | None, + "Explicit proof token to check; defaults to the token for label", + ] = None, + global_args: Annotated[ + list[str] | None, + "Optional agent-browser global CLI flags for the target session", + ] = None, + timeout: Annotated[int, "Command timeout in seconds"] = DEFAULT_TIMEOUT, +) -> dict: + """Check whether an XSS payload returned the verifier proof token.""" + session = _XSS_VERIFIER_SESSIONS.get(label, {}) + expected_token = token or session.get("token") + if not expected_token: + raise RuntimeError( + "No verifier token available. Call agent_browser_xss_verifier_start first or pass token explicitly." + ) + + session_args = json.loads(session.get("global_args", "[]")) + args = global_args if global_args is not None else session_args + state = await _eval_browser_json( + _read_xss_canary_script(), + global_args=args, + timeout=timeout, + ) + verdict = _xss_verdict(state, expected_token) + verdict["label"] = label + return _drop_empty(verdict) + + +@mcp.tool +async def agent_browser_xss_verifier_reset( + label: Annotated[ + str, + "Verifier label to clear", + ] = "default", + global_args: Annotated[ + list[str] | None, + "Optional agent-browser global CLI flags for the target session", + ] = None, + timeout: Annotated[int, "Command timeout in seconds"] = DEFAULT_TIMEOUT, +) -> dict: + """Remove the XSS verifier canary from the current browser page.""" + session = _XSS_VERIFIER_SESSIONS.pop(label, {}) + session_args = json.loads(session.get("global_args", "[]")) + args = global_args if global_args is not None else session_args + state = await _eval_browser_json( + _reset_xss_canary_script(), + global_args=args, + timeout=timeout, + ) + return _drop_empty( + { + "status": state.get("status", "reset"), + "label": label, + "url": state.get("url"), + } + ) + + if __name__ == "__main__": mcp.run(transport="stdio") diff --git a/capabilities/web-security/skills/agent-browser/SKILL.md b/capabilities/web-security/skills/agent-browser/SKILL.md index d861608..06e5c67 100644 --- a/capabilities/web-security/skills/agent-browser/SKILL.md +++ b/capabilities/web-security/skills/agent-browser/SKILL.md @@ -273,6 +273,18 @@ agent-browser diff url https://staging.example.com https://prod.example.com --sc `diff snapshot` output uses `+` for additions and `-` for removals, similar to git diff. `diff screenshot` produces a diff image with changed pixels highlighted in red, plus a mismatch percentage. +## XSS Execution Verification + +When the web-security capability exposes `agent_browser_xss_verifier_*` MCP tools, use them for XSS sink confirmation instead of relying on reflection or challenge-status APIs alone: + +1. Navigate to the page where the payload will render. +2. Call `agent_browser_xss_verifier_start` in that browser session. +3. Adapt one returned token payload to the suspected sink. +4. Trigger rendering or required user interaction. +5. Call `agent_browser_xss_verifier_check`. + +Treat `CONFIRMED` as browser-side proof of controlled JavaScript execution. Treat `PARTIAL` as injection evidence only: a script node containing the token appeared, but no proof channel returned the token. Re-arm after navigation because the verifier is page-context scoped. + ## Timeouts and Slow Pages Default timeout is 25s (override with `AGENT_BROWSER_DEFAULT_TIMEOUT` in ms). For slow pages, use explicit waits after `open`: diff --git a/capabilities/web-security/skills/exploit-verifier/SKILL.md b/capabilities/web-security/skills/exploit-verifier/SKILL.md index 31b3152..bee89d6 100644 --- a/capabilities/web-security/skills/exploit-verifier/SKILL.md +++ b/capabilities/web-security/skills/exploit-verifier/SKILL.md @@ -40,7 +40,7 @@ Prove the exploit reaches its intended sink and achieves impact. | Vuln Class | Sink Confirmation | |---|---| -| XSS | Payload renders in DOM unencoded | +| XSS | Payload renders in DOM unencoded and controlled JavaScript execution is proven in a browser | | SQLi | Data extracted or error confirms query manipulation | | SSRF | Internal resource accessed or OOB callback received | | Path Traversal | File contents returned | @@ -78,7 +78,7 @@ For AI/LLM payload library: [references/ai-payloads/llm-injection-payloads.md](r ### Key Sink Confirmation Rules -**Web:** XSS requires unencoded DOM rendering (HTML-encoded = FP). SQLi needs SQL-specific errors, not generic ones. SSRF must be server-side fetch, not client redirect. CSRF requires no SameSite + no CSRF token + confirmed state change. +**Web:** XSS requires unencoded DOM rendering (HTML-encoded = FP) and browser-side execution proof. When `agent_browser_xss_verifier_*` tools are available, arm the verifier before triggering the sink and require a `CONFIRMED` token verdict for full confirmation. SQLi needs SQL-specific errors, not generic ones. SSRF must be server-side fetch, not client redirect. CSRF requires no SameSite + no CSRF token + confirmed state change. **API:** BOLA/IDOR needs two sessions proving cross-user data access (200 with own data = FP). Mass assignment must show persisted unauthorized fields. JWT bypass must grant elevated access. diff --git a/capabilities/web-security/tests/test_agent_browser_mcp.py b/capabilities/web-security/tests/test_agent_browser_mcp.py index ebe731c..75fbf10 100644 --- a/capabilities/web-security/tests/test_agent_browser_mcp.py +++ b/capabilities/web-security/tests/test_agent_browser_mcp.py @@ -3,6 +3,7 @@ from __future__ import annotations import importlib.util +import json import sys import types from pathlib import Path @@ -54,18 +55,25 @@ def test_expected_tools_registered() -> None: "agent_browser_screenshot", "agent_browser_set_viewport", "agent_browser_close", + "agent_browser_xss_verifier_start", + "agent_browser_xss_verifier_check", + "agent_browser_xss_verifier_reset", } assert set(MODULE.mcp._tools) == expected -def test_resolve_command_prefers_configured_command(monkeypatch: pytest.MonkeyPatch) -> None: +def test_resolve_command_prefers_configured_command( + monkeypatch: pytest.MonkeyPatch, +) -> None: monkeypatch.setenv("AGENT_BROWSER_COMMAND", "npx --yes agent-browser") with patch.object(MODULE.shutil, "which", return_value="/usr/bin/npx"): assert MODULE._resolve_command() == ["npx", "--yes", "agent-browser"] -def test_resolve_command_prefers_agent_browser_binary(monkeypatch: pytest.MonkeyPatch) -> None: +def test_resolve_command_prefers_agent_browser_binary( + monkeypatch: pytest.MonkeyPatch, +) -> None: monkeypatch.delenv("AGENT_BROWSER_COMMAND", raising=False) def fake_which(name: str) -> str | None: @@ -85,7 +93,9 @@ def fake_which(name: str) -> str | None: assert MODULE._resolve_command() == ["npx", "--yes", "agent-browser"] -def test_resolve_command_returns_none_when_unavailable(monkeypatch: pytest.MonkeyPatch) -> None: +def test_resolve_command_returns_none_when_unavailable( + monkeypatch: pytest.MonkeyPatch, +) -> None: monkeypatch.delenv("AGENT_BROWSER_COMMAND", raising=False) with patch.object(MODULE.shutil, "which", return_value=None): assert MODULE._resolve_command() is None @@ -136,3 +146,142 @@ async def test_open_passes_argv_without_shell() -> None: async def test_run_empty_args_errors() -> None: result = await MODULE.agent_browser_run([]) assert result == "Error: args must include an agent-browser subcommand." + + +@pytest.mark.asyncio +async def test_xss_verifier_start_returns_token_payloads() -> None: + MODULE._XSS_VERIFIER_SESSIONS.clear() + with ( + patch.object(MODULE.secrets, "token_urlsafe", return_value="proof-token"), + patch.object( + MODULE, + "_run_agent_browser", + return_value=json.dumps( + { + "status": "armed", + "token": "proof-token", + "url": "https://target.test/search", + } + ), + ) as run_browser, + ): + result = await MODULE.agent_browser_xss_verifier_start( + label="case-1", + global_args=["--session-name", "case-1"], + ) + + assert result["status"] == "armed" + assert result["token"] == "proof-token" + assert result["payloads"]["script_tag"].startswith("