diff --git a/.github/actions/setup-python-poetry/action.yml b/.github/actions/setup-python-poetry/action.yml index dd5be4b3a28..fd6c082c7b8 100644 --- a/.github/actions/setup-python-poetry/action.yml +++ b/.github/actions/setup-python-poetry/action.yml @@ -22,6 +22,10 @@ inputs: description: 'Run `poetry lock` during setup. Only enable when a prior step mutates pyproject.toml (e.g. API `@master` VCS rewrite). Default: false.' required: false default: 'false' + enable-cache: + description: 'Whether to enable Poetry dependency caching via actions/setup-python' + required: false + default: 'true' runs: using: 'composite' @@ -74,8 +78,10 @@ runs: uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 with: python-version: ${{ inputs.python-version }} - cache: 'poetry' - cache-dependency-path: ${{ inputs.working-directory }}/poetry.lock + # Disable cache when callers skip dependency install: Poetry 2.3.4 creates + # the venv in a path setup-python can't hash, breaking the post-step save-cache. + cache: ${{ inputs.enable-cache == 'true' && 'poetry' || '' }} + cache-dependency-path: ${{ inputs.enable-cache == 'true' && format('{0}/poetry.lock', inputs.working-directory) || '' }} - name: Install Python dependencies if: inputs.install-dependencies == 'true' diff --git a/.github/workflows/api-security.yml b/.github/workflows/api-security.yml index 9323df97c34..7b8dc72cb1f 100644 --- a/.github/workflows/api-security.yml +++ b/.github/workflows/api-security.yml @@ -60,6 +60,7 @@ jobs: files: | api/** .github/workflows/api-security.yml + .safety-policy.yml files_ignore: | api/docs/** api/README.md @@ -80,10 +81,8 @@ jobs: - name: Safety if: steps.check-changes.outputs.any_changed == 'true' - run: poetry run safety check --ignore 79023,79027,86217,71600 - # TODO: 79023 & 79027 knack ReDoS until `azure-cli-core` (via `cartography`) allows `knack` >=0.13.0 - # TODO: 86217 because `alibabacloud-tea-openapi == 0.4.3` don't let us upgrade `cryptography >= 46.0.0` - # TODO: 71600 CVE-2024-1135 false positive - fixed in gunicorn 22.0.0, project uses 23.0.0 + # Accepted CVEs, severity threshold, and ignore expirations live in ../.safety-policy.yml + run: poetry run safety check --policy-file ../.safety-policy.yml - name: Vulture if: steps.check-changes.outputs.any_changed == 'true' diff --git a/.github/workflows/backport.yml b/.github/workflows/backport.yml index 5895188634f..ff467496417 100644 --- a/.github/workflows/backport.yml +++ b/.github/workflows/backport.yml @@ -35,6 +35,7 @@ jobs: egress-policy: block allowed-endpoints: > api.github.com:443 + github.com:443 - name: Check labels id: label_check diff --git a/.github/workflows/docs-bump-version.yml b/.github/workflows/docs-bump-version.yml index c1e3f6afb49..bacc1fa1ecb 100644 --- a/.github/workflows/docs-bump-version.yml +++ b/.github/workflows/docs-bump-version.yml @@ -130,7 +130,7 @@ jobs: with: author: prowler-bot <179230569+prowler-bot@users.noreply.github.com> token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }} - base: master + base: ${{ env.BASE_BRANCH }} commit-message: 'docs: Update version to v${{ env.PROWLER_VERSION }}' branch: docs-version-update-to-v${{ env.PROWLER_VERSION }} title: 'docs: Update version to v${{ env.PROWLER_VERSION }}' @@ -221,11 +221,6 @@ jobs: with: egress-policy: audit - - name: Checkout repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - name: Calculate next patch version run: | MAJOR_VERSION=${NEEDS_DETECT_RELEASE_TYPE_OUTPUTS_MAJOR_VERSION} @@ -250,7 +245,13 @@ jobs: NEEDS_DETECT_RELEASE_TYPE_OUTPUTS_PATCH_VERSION: ${{ needs.detect-release-type.outputs.patch_version }} NEEDS_DETECT_RELEASE_TYPE_OUTPUTS_CURRENT_DOCS_VERSION: ${{ needs.detect-release-type.outputs.current_docs_version }} - - name: Bump versions in documentation for patch version + - name: Checkout master branch + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: ${{ env.BASE_BRANCH }} + persist-credentials: false + + - name: Bump versions in documentation for master run: | set -e @@ -261,12 +262,12 @@ jobs: echo "Files modified:" git --no-pager diff - - name: Create PR for documentation update to version branch + - name: Create PR for documentation update to master uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0 with: author: prowler-bot <179230569+prowler-bot@users.noreply.github.com> token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }} - base: ${{ env.VERSION_BRANCH }} + base: ${{ env.BASE_BRANCH }} commit-message: 'docs: Update version to v${{ env.PROWLER_VERSION }}' branch: docs-version-update-to-v${{ env.PROWLER_VERSION }} title: 'docs: Update version to v${{ env.PROWLER_VERSION }}' @@ -282,3 +283,42 @@ jobs: ### License By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. + + - name: Checkout version branch + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: ${{ env.VERSION_BRANCH }} + persist-credentials: false + + - name: Bump versions in documentation for version branch + run: | + set -e + + # Update prowler-app.mdx with current release version + sed -i "s|PROWLER_UI_VERSION=\"${CURRENT_DOCS_VERSION}\"|PROWLER_UI_VERSION=\"${PROWLER_VERSION}\"|" docs/getting-started/installation/prowler-app.mdx + sed -i "s|PROWLER_API_VERSION=\"${CURRENT_DOCS_VERSION}\"|PROWLER_API_VERSION=\"${PROWLER_VERSION}\"|" docs/getting-started/installation/prowler-app.mdx + + echo "Files modified:" + git --no-pager diff + + - name: Create PR for documentation update to version branch + uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0 + with: + author: prowler-bot <179230569+prowler-bot@users.noreply.github.com> + token: ${{ secrets.PROWLER_BOT_ACCESS_TOKEN }} + base: ${{ env.VERSION_BRANCH }} + commit-message: 'docs: Update version to v${{ env.PROWLER_VERSION }}' + branch: docs-version-update-to-v${{ env.PROWLER_VERSION }}-branch + title: 'docs: Update version to v${{ env.PROWLER_VERSION }}' + labels: no-changelog,skip-sync + body: | + ### Description + + Update Prowler documentation version references to v${{ env.PROWLER_VERSION }} in version branch after releasing Prowler v${{ env.PROWLER_VERSION }}. + + ### Files Updated + - `docs/getting-started/installation/prowler-app.mdx`: `PROWLER_UI_VERSION` and `PROWLER_API_VERSION` + + ### License + + By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. diff --git a/.github/workflows/pr-check-compliance-mapping.yml b/.github/workflows/pr-check-compliance-mapping.yml index 1ed78995bb2..be934d59836 100644 --- a/.github/workflows/pr-check-compliance-mapping.yml +++ b/.github/workflows/pr-check-compliance-mapping.yml @@ -20,7 +20,13 @@ permissions: {} jobs: check-compliance-mapping: - if: contains(github.event.pull_request.labels.*.name, 'no-compliance-check') == false + if: >- + github.event.pull_request.state == 'open' && + contains(github.event.pull_request.labels.*.name, 'no-compliance-check') == false && + ( + (github.event.action != 'labeled' && github.event.action != 'unlabeled') + || github.event.label.name == 'no-compliance-check' + ) runs-on: ubuntu-latest timeout-minutes: 15 permissions: diff --git a/.github/workflows/prepare-release.yml b/.github/workflows/prepare-release.yml index 4ff8fc049cc..07caea72557 100644 --- a/.github/workflows/prepare-release.yml +++ b/.github/workflows/prepare-release.yml @@ -45,6 +45,7 @@ jobs: with: python-version: '3.12' install-dependencies: 'false' + enable-cache: 'false' - name: Configure Git run: | diff --git a/.github/workflows/sdk-container-build-push.yml b/.github/workflows/sdk-container-build-push.yml index 6d606f21be6..8a2bba691c5 100644 --- a/.github/workflows/sdk-container-build-push.yml +++ b/.github/workflows/sdk-container-build-push.yml @@ -81,6 +81,7 @@ jobs: with: python-version: ${{ env.PYTHON_VERSION }} install-dependencies: 'false' + enable-cache: 'false' - name: Inject poetry-bumpversion plugin run: pipx inject poetry poetry-bumpversion diff --git a/.github/workflows/sdk-pypi-release.yml b/.github/workflows/sdk-pypi-release.yml index 1932e84ed7d..7916431dec5 100644 --- a/.github/workflows/sdk-pypi-release.yml +++ b/.github/workflows/sdk-pypi-release.yml @@ -80,6 +80,7 @@ jobs: with: python-version: ${{ env.PYTHON_VERSION }} install-dependencies: 'false' + enable-cache: 'false' - name: Build Prowler package run: poetry build @@ -116,6 +117,7 @@ jobs: with: python-version: ${{ env.PYTHON_VERSION }} install-dependencies: 'false' + enable-cache: 'false' - name: Install toml package run: pip install toml diff --git a/.github/workflows/sdk-security.yml b/.github/workflows/sdk-security.yml index c13061ff1b9..ceb6b1db1c7 100644 --- a/.github/workflows/sdk-security.yml +++ b/.github/workflows/sdk-security.yml @@ -83,7 +83,8 @@ jobs: - name: Security scan with Safety if: steps.check-changes.outputs.any_changed == 'true' - run: poetry run safety check -r pyproject.toml + # Accepted CVEs, severity threshold, and ignore expirations live in .safety-policy.yml + run: poetry run safety check -r pyproject.toml --policy-file .safety-policy.yml - name: Dead code detection with Vulture if: steps.check-changes.outputs.any_changed == 'true' diff --git a/.gitignore b/.gitignore index d959c0e524f..9e0e4da8491 100644 --- a/.gitignore +++ b/.gitignore @@ -151,6 +151,8 @@ node_modules #Β Persistent data _data/ +/openspec/ +/.gitmodules # AI Instructions (generated by skills/setup.sh from AGENTS.md) CLAUDE.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9d2503af604..b980e3a6c24 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -152,17 +152,19 @@ repos: - id: safety name: safety description: "Safety is a tool that checks your installed dependencies for known security vulnerabilities" - # TODO: Botocore needs urllib3 1.X so we need to ignore these vulnerabilities 77744,77745. Remove this once we upgrade to urllib3 2.X - # TODO: 79023 & 79027 knack ReDoS until `azure-cli-core` (via `cartography`) allows `knack` >=0.13.0 - # TODO: 86217 because `alibabacloud-tea-openapi == 0.4.3` don't let us upgrade `cryptography >= 46.0.0` - # TODO: 71600 CVE-2024-1135 false positive - fixed in gunicorn 22.0.0, project uses 23.0.0 - entry: safety check --ignore 70612,66963,74429,76352,76353,77744,77745,79023,79027,86217,71600 + # Accepted CVEs, severity threshold, and ignore expirations live in .safety-policy.yml + entry: safety check --policy-file .safety-policy.yml language: system pass_filenames: false files: { glob: - ["**/pyproject.toml", "**/poetry.lock", "**/requirements*.txt"], + [ + "**/pyproject.toml", + "**/poetry.lock", + "**/requirements*.txt", + ".safety-policy.yml", + ], } - id: vulture diff --git a/.safety-policy.yml b/.safety-policy.yml new file mode 100644 index 00000000000..fec97e2fb9d --- /dev/null +++ b/.safety-policy.yml @@ -0,0 +1,58 @@ +# Safety policy for `safety check` (Safety CLI 3.x, v2 schema). +# Applied in: .pre-commit-config.yaml, .github/workflows/api-security.yml, +# .github/workflows/sdk-security.yml via `--policy-file`. +# +# Validate: poetry run safety validate policy_file --path .safety-policy.yml + +security: + # Scan unpinned requirements too. Prowler pins via poetry.lock, so this is + # defensive against accidental unpinned entries. + ignore-unpinned-requirements: False + + # CVSS severity filter. 7 = report only HIGH (7.0–8.9) and CRITICAL (9.0–10.0). + # Reference: 9=CRITICAL only, 7=CRITICAL+HIGH, 4=CRITICAL+HIGH+MEDIUM. + ignore-cvss-severity-below: 7 + + # Unknown severity is unrated, not safe. Keep False so unrated CVEs still fail + # the build and get a human eye. Flip to True only if noise is unmanageable. + ignore-cvss-unknown-severity: False + + # Fail the build when a non-ignored vulnerability is found. + continue-on-vulnerability-error: False + + # Explicit accepted vulnerabilities. Each entry MUST have a reason and an + # expiry. Expired entries fail the scan, forcing re-audit. + ignore-vulnerabilities: + 77744: + reason: "Botocore requires urllib3 1.X. Remove once upgraded to urllib3 2.X." + expires: '2026-10-22' + 77745: + reason: "Botocore requires urllib3 1.X. Remove once upgraded to urllib3 2.X." + expires: '2026-10-22' + 79023: + reason: "knack ReDoS; blocked until azure-cli-core (via cartography) allows knack >=0.13.0." + expires: '2026-10-22' + 79027: + reason: "knack ReDoS; blocked until azure-cli-core (via cartography) allows knack >=0.13.0." + expires: '2026-10-22' + 86217: + reason: "alibabacloud-tea-openapi==0.4.3 blocks upgrade to cryptography >=46.0.0." + expires: '2026-10-22' + 71600: + reason: "CVE-2024-1135 false positive. Fixed in gunicorn 22.0.0; project uses 23.0.0." + expires: '2026-10-22' + 70612: + reason: "TBD - audit required. Reason not documented in prior --ignore list." + expires: '2026-07-22' + 66963: + reason: "TBD - audit required. Reason not documented in prior --ignore list." + expires: '2026-07-22' + 74429: + reason: "TBD - audit required. Reason not documented in prior --ignore list." + expires: '2026-07-22' + 76352: + reason: "TBD - audit required. Reason not documented in prior --ignore list." + expires: '2026-07-22' + 76353: + reason: "TBD - audit required. Reason not documented in prior --ignore list." + expires: '2026-07-22' diff --git a/README.md b/README.md index dbf836eec05..e2961b20b48 100644 --- a/README.md +++ b/README.md @@ -300,6 +300,36 @@ python prowler-cli.py -v > If your Poetry version is below v2.0.0, continue using `poetry shell` to activate your environment. > For further guidance, refer to the Poetry Environment Activation Guide https://python-poetry.org/docs/managing-environments/#activating-the-environment. +# πŸ›‘οΈ GitHub Action + +The official **Prowler GitHub Action** runs Prowler scans in your GitHub workflows using the official [`prowlercloud/prowler`](https://hub.docker.com/r/prowlercloud/prowler) Docker image. Scans run on any [supported provider](https://docs.prowler.com/user-guide/providers/), with optional [`--push-to-cloud`](https://docs.prowler.com/user-guide/tutorials/prowler-app-import-findings) to send findings to Prowler Cloud and optional SARIF upload so findings show up in the repo's **Security β†’ Code scanning** tab and as inline PR annotations. + +```yaml +name: Prowler IaC Scan +on: + pull_request: + +permissions: + contents: read + security-events: write + actions: read + +jobs: + prowler: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: prowler-cloud/prowler@5.25 + with: + provider: iac + output-formats: sarif json-ocsf + upload-sarif: true + flags: --severity critical high +``` + +Full configuration, per-provider authentication, and SARIF examples: [Prowler GitHub Action tutorial](docs/user-guide/tutorials/prowler-app-github-action.mdx). Marketplace listing: [Prowler Security Scan](https://github.com/marketplace/actions/prowler-security-scan). + # ✏️ High level architecture ## Prowler App diff --git a/action.yml b/action.yml new file mode 100644 index 00000000000..3b5b8d8bdfc --- /dev/null +++ b/action.yml @@ -0,0 +1,307 @@ +name: Prowler Security Scan +description: Run Prowler cloud security scanner using the official Docker image +branding: + icon: cloud + color: green + +inputs: + provider: + description: Cloud provider to scan (e.g. aws, azure, gcp, github, kubernetes, iac). See https://docs.prowler.com for supported providers. + required: true + image-tag: + description: > + Docker image tag for prowlercloud/prowler. + Default is "stable" (latest release). Available tags: + "stable" (latest release), "latest" (master branch, not stable), + "" (pinned release version). + See all tags at https://hub.docker.com/r/prowlercloud/prowler/tags + required: false + default: stable + output-formats: + description: Output format(s) for scan results (e.g. "json-ocsf", "sarif json-ocsf") + required: false + default: json-ocsf + push-to-cloud: + description: Push scan findings to Prowler Cloud. Requires the PROWLER_CLOUD_API_KEY environment variable. See https://docs.prowler.com/user-guide/tutorials/prowler-app-import-findings#using-the-cli + required: false + default: "false" + flags: + description: 'Additional CLI flags passed to the Prowler scan (e.g. "--severity critical high --compliance cis_aws"). Values containing spaces can be quoted, e.g. "--resource-tag ''Environment=My Server''".' + required: false + default: "" + extra-env: + description: > + Space-, newline-, or comma-separated list of host environment variable NAMES to forward to the Prowler container + (e.g. "AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN" for AWS, + "GITHUB_PERSONAL_ACCESS_TOKEN" for GitHub, "CLOUDFLARE_API_TOKEN" for Cloudflare). + List names only; set the values via `env:` at the workflow or job level (typically from `secrets.*`). + See the README for per-provider examples. + required: false + default: "" + upload-sarif: + description: 'Upload SARIF results to GitHub Code Scanning (requires "sarif" in output-formats and both `security-events: write` and `actions: read` permissions)' + required: false + default: "false" + sarif-file: + description: Path to the SARIF file to upload (auto-detected from output/ if not set) + required: false + default: "" + sarif-category: + description: Category for the SARIF upload (used to distinguish multiple analyses) + required: false + default: prowler + fail-on-findings: + description: Fail the workflow step when Prowler detects findings (exit code 3). By default the action tolerates findings and succeeds. + required: false + default: "false" + +runs: + using: composite + steps: + - name: Validate inputs + shell: bash + env: + INPUT_IMAGE_TAG: ${{ inputs.image-tag }} + INPUT_UPLOAD_SARIF: ${{ inputs.upload-sarif }} + INPUT_OUTPUT_FORMATS: ${{ inputs.output-formats }} + run: | + # Validate image tag format (alphanumeric, dots, hyphens, underscores only) + if [[ ! "$INPUT_IMAGE_TAG" =~ ^[a-zA-Z0-9._-]+$ ]]; then + echo "::error::Invalid image-tag '${INPUT_IMAGE_TAG}'. Must contain only alphanumeric characters, dots, hyphens, and underscores." + exit 1 + fi + + # Warn if upload-sarif is enabled but sarif not in output-formats + if [ "$INPUT_UPLOAD_SARIF" = "true" ]; then + if [[ ! "$INPUT_OUTPUT_FORMATS" =~ (^|[[:space:]])sarif($|[[:space:]]) ]]; then + echo "::warning::upload-sarif is enabled but 'sarif' is not included in output-formats ('${INPUT_OUTPUT_FORMATS}'). SARIF upload will fail unless you add 'sarif' to output-formats." + fi + fi + + - name: Run Prowler scan + shell: bash + env: + INPUT_PROVIDER: ${{ inputs.provider }} + INPUT_IMAGE_TAG: ${{ inputs.image-tag }} + INPUT_OUTPUT_FORMATS: ${{ inputs.output-formats }} + INPUT_PUSH_TO_CLOUD: ${{ inputs.push-to-cloud }} + INPUT_FLAGS: ${{ inputs.flags }} + INPUT_EXTRA_ENV: ${{ inputs.extra-env }} + INPUT_FAIL_ON_FINDINGS: ${{ inputs.fail-on-findings }} + run: | + set -e + + # Parse space-separated inputs with shlex so values with spaces can be quoted + # (e.g. `--resource-tag 'Environment=My Server'`). + mapfile -t OUTPUT_FORMATS < <(python3 -c 'import shlex, os; [print(t) for t in shlex.split(os.environ.get("INPUT_OUTPUT_FORMATS", ""))]') + mapfile -t EXTRA_FLAGS < <(python3 -c 'import shlex, os; [print(t) for t in shlex.split(os.environ.get("INPUT_FLAGS", ""))]') + mapfile -t EXTRA_ENV_NAMES < <(python3 -c 'import shlex, os; [print(t) for t in shlex.split(os.environ.get("INPUT_EXTRA_ENV", "").replace(",", " "))]') + + env_args=() + for var in "${EXTRA_ENV_NAMES[@]}"; do + [ -z "$var" ] && continue + if [[ ! "$var" =~ ^[A-Za-z_][A-Za-z0-9_]*$ ]]; then + echo "::error::Invalid env var name '${var}' in extra-env. Names must match ^[A-Za-z_][A-Za-z0-9_]*$." + exit 1 + fi + env_args+=("-e" "$var") + done + + push_args=() + if [ "$INPUT_PUSH_TO_CLOUD" = "true" ]; then + push_args=("--push-to-cloud") + env_args+=("-e" "PROWLER_CLOUD_API_KEY") + fi + + mkdir -p "$GITHUB_WORKSPACE/output" + chmod 777 "$GITHUB_WORKSPACE/output" + + set +e + docker run --rm \ + "${env_args[@]}" \ + -v "$GITHUB_WORKSPACE:/home/prowler/workspace" \ + -v "$GITHUB_WORKSPACE/output:/home/prowler/workspace/output" \ + -w /home/prowler/workspace \ + "prowlercloud/prowler:${INPUT_IMAGE_TAG}" \ + "$INPUT_PROVIDER" \ + --output-formats "${OUTPUT_FORMATS[@]}" \ + "${push_args[@]}" \ + "${EXTRA_FLAGS[@]}" + exit_code=$? + set -e + + # Exit code 3 = findings detected + if [ "$exit_code" -eq 3 ] && [ "$INPUT_FAIL_ON_FINDINGS" != "true" ]; then + echo "::notice::Prowler detected findings (exit code 3). Set fail-on-findings to 'true' to fail the workflow on findings." + exit 0 + fi + exit $exit_code + + - name: Upload scan results + if: always() + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + with: + name: prowler-${{ inputs.provider }} + path: output/ + retention-days: 30 + if-no-files-found: warn + + - name: Find SARIF file + if: always() && inputs.upload-sarif == 'true' + id: find-sarif + shell: bash + env: + INPUT_SARIF_FILE: ${{ inputs.sarif-file }} + run: | + if [ -n "$INPUT_SARIF_FILE" ]; then + echo "sarif_path=$INPUT_SARIF_FILE" >> "$GITHUB_OUTPUT" + else + sarif_file=$(find output/ -name '*.sarif' -type f | head -1) + if [ -z "$sarif_file" ]; then + echo "::warning::No .sarif file found in output/. Ensure 'sarif' is included in output-formats." + echo "sarif_path=" >> "$GITHUB_OUTPUT" + else + echo "sarif_path=$sarif_file" >> "$GITHUB_OUTPUT" + fi + fi + + - name: Upload SARIF to GitHub Code Scanning + if: always() && inputs.upload-sarif == 'true' && steps.find-sarif.outputs.sarif_path != '' + uses: github/codeql-action/upload-sarif@d4b3ca9fa7f69d38bfcd667bdc45bc373d16277e # v4 + with: + sarif_file: ${{ steps.find-sarif.outputs.sarif_path }} + category: ${{ inputs.sarif-category }} + + - name: Write scan summary + if: always() + shell: bash + env: + INPUT_PROVIDER: ${{ inputs.provider }} + INPUT_UPLOAD_SARIF: ${{ inputs.upload-sarif }} + INPUT_PUSH_TO_CLOUD: ${{ inputs.push-to-cloud }} + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + REPO_URL: ${{ github.server_url }}/${{ github.repository }} + BRANCH: ${{ github.head_ref || github.ref_name }} + GH_TOKEN: ${{ github.token }} + run: | + set +e + + # Build a link to the scan step in the workflow logs. Requires `actions: read` + # on the caller's GITHUB_TOKEN; silently skips the link if unavailable. + scan_step_url="" + if [ -n "${GH_TOKEN:-}" ] && command -v gh >/dev/null 2>&1; then + job_info=$(gh api \ + "repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/attempts/${GITHUB_RUN_ATTEMPT:-1}/jobs" \ + --jq ".jobs[] | select(.runner_name == \"${RUNNER_NAME:-}\")" 2>/dev/null) + if [ -n "$job_info" ]; then + job_id=$(jq -r '.id // empty' <<<"$job_info") + step_number=$(jq -r '[.steps[]? | select((.name // "") | test("Run Prowler scan"; "i")) | .number] | first // empty' <<<"$job_info") + if [ -z "$step_number" ]; then + step_number=$(jq -r '[.steps[]? | select(.status == "in_progress") | .number] | first // empty' <<<"$job_info") + fi + if [ -n "$job_id" ] && [ -n "$step_number" ]; then + scan_step_url="${REPO_URL}/actions/runs/${GITHUB_RUN_ID}/job/${job_id}#step:${step_number}:1" + elif [ -n "$job_id" ]; then + scan_step_url="${REPO_URL}/actions/runs/${GITHUB_RUN_ID}/job/${job_id}" + fi + fi + fi + + # Map provider code to a properly-cased display name. + case "$INPUT_PROVIDER" in + alibabacloud) provider_name="Alibaba Cloud" ;; + aws) provider_name="AWS" ;; + azure) provider_name="Azure" ;; + cloudflare) provider_name="Cloudflare" ;; + gcp) provider_name="GCP" ;; + github) provider_name="GitHub" ;; + googleworkspace) provider_name="Google Workspace" ;; + iac) provider_name="IaC" ;; + image) provider_name="Container Image" ;; + kubernetes) provider_name="Kubernetes" ;; + llm) provider_name="LLM" ;; + m365) provider_name="Microsoft 365" ;; + mongodbatlas) provider_name="MongoDB Atlas" ;; + nhn) provider_name="NHN" ;; + openstack) provider_name="OpenStack" ;; + oraclecloud) provider_name="Oracle Cloud" ;; + vercel) provider_name="Vercel" ;; + *) provider_name="${INPUT_PROVIDER^}" ;; + esac + + ocsf_file=$(find output/ -name '*.ocsf.json' -type f 2>/dev/null | head -1) + + { + echo "## Prowler ${provider_name} Scan Summary" + echo "" + + counts="" + if [ -n "$ocsf_file" ] && [ -s "$ocsf_file" ]; then + counts=$(jq -r '[ + length, + ([.[] | select(.status_code == "FAIL")] | length), + ([.[] | select(.status_code == "PASS")] | length), + ([.[] | select(.status_code == "MUTED")] | length), + ([.[] | select(.status_code == "FAIL" and .severity == "Critical")] | length), + ([.[] | select(.status_code == "FAIL" and .severity == "High")] | length), + ([.[] | select(.status_code == "FAIL" and .severity == "Medium")] | length), + ([.[] | select(.status_code == "FAIL" and .severity == "Low")] | length), + ([.[] | select(.status_code == "FAIL" and .severity == "Informational")] | length) + ] | @tsv' "$ocsf_file" 2>/dev/null) + fi + + if [ -n "$counts" ]; then + read -r total fail pass muted critical high medium low info <<<"$counts" + + line="**${fail:-0} failing** Β· ${pass:-0} passing" + [ "${muted:-0}" -gt 0 ] && line="${line} Β· ${muted} muted" + echo "${line} β€” ${total:-0} checks total" + echo "" + echo "| Severity | Failing |" + echo "|----------|---------|" + echo "| ‼️ Critical | ${critical:-0} |" + echo "| πŸ”΄ High | ${high:-0} |" + echo "| 🟠 Medium | ${medium:-0} |" + echo "| πŸ”΅ Low | ${low:-0} |" + echo "| βšͺ Informational | ${info:-0} |" + echo "" + else + echo "_No findings report was produced. Check the scan logs above._" + echo "" + fi + + if [ -n "$scan_step_url" ]; then + echo "**Scan logs:** [view in workflow run](${scan_step_url})" + echo "" + fi + + echo "**Get the full report:** [\`prowler-${INPUT_PROVIDER}\` artifact](${RUN_URL}#artifacts)" + + if [ "$INPUT_UPLOAD_SARIF" = "true" ] && [ -n "$BRANCH" ]; then + encoded_branch=$(jq -nr --arg b "$BRANCH" '$b|@uri') + echo "" + echo "**See results in GitHub Code Security:** [open alerts on \`${BRANCH}\`](${REPO_URL}/security/code-scanning?query=is%3Aopen+branch%3A${encoded_branch})" + fi + + if [ "$INPUT_PUSH_TO_CLOUD" != "true" ]; then + echo "" + echo "---" + echo "" + echo "### Scale ${provider_name} security with Prowler Cloud ☁️" + echo "" + echo "Send this scan's findings to **[Prowler Cloud](https://cloud.prowler.com)** and get:" + echo "" + echo "- **Unified findings** across every cloud, SaaS provider (M365, Google Workspace, GitHub, MongoDB Atlas), IaC repo, Kubernetes cluster, and container image" + echo "- **Posture over time** with alerts, and notifications" + echo "- **Prowler Lighthouse AI**: agentic assistant that triages findings, explains root cause and helps with remediation" + echo "- **50+ Compliance frameworks** mapped automatically" + echo "- **Enterprise-ready platform**: SOC 2 Type 2, SSO/SAML, AWS Security Hub, S3 and Jira integrations" + echo "" + echo "**Get started in 3 steps:**" + echo "1. Create an account at [cloud.prowler.com](https://cloud.prowler.com)" + echo "2. Generate a Prowler Cloud API key ([docs](https://docs.prowler.com/user-guide/tutorials/prowler-app-import-findings#using-the-cli))" + echo "3. Add \`PROWLER_CLOUD_API_KEY\` to your GitHub secrets and set \`push-to-cloud: true\` on this action" + echo "" + echo "See [prowler.com/pricing](https://prowler.com/pricing) for plan details." + fi + } >> "$GITHUB_STEP_SUMMARY" diff --git a/api/CHANGELOG.md b/api/CHANGELOG.md index 729325ca380..d6ac07f9566 100644 --- a/api/CHANGELOG.md +++ b/api/CHANGELOG.md @@ -4,12 +4,28 @@ All notable changes to the **Prowler API** are documented in this file. ## [1.26.0] (Prowler UNRELEASED) +### πŸš€ Added + +- CIS Benchmark PDF report generation for scans, exposing the latest CIS version per provider via `GET /scans/{id}/cis/{name}/` and picking the variant dynamically via `_pick_latest_cis_variant` (no hard-coded provider β†’ version mapping) [(#10650)](https://github.com/prowler-cloud/prowler/pull/10650) + ### πŸ”„ Changed - Allows tenant owners to expel users from their organizations [(#10787)](https://github.com/prowler-cloud/prowler/pull/10787) --- +## [1.25.4] (Prowler v5.24.4) + +### πŸš€ Added + +- `DJANGO_SENTRY_TRACES_SAMPLE_RATE` env var (default `0.02`) enables Sentry performance tracing for the API [(#10873)](https://github.com/prowler-cloud/prowler/pull/10873) + +### πŸ”„ Changed + +- Attack Paths: Neo4j driver `connection_acquisition_timeout` is now configurable via `NEO4J_CONN_ACQUISITION_TIMEOUT` (default lowered from 120 s to 15 s) [(#10873)](https://github.com/prowler-cloud/prowler/pull/10873) + +--- + ## [1.25.3] (Prowler v5.24.3) ### πŸš€ Added @@ -20,6 +36,7 @@ All notable changes to the **Prowler API** are documented in this file. - Finding groups aggregated `status` now treats muted findings as resolved: a group is `FAIL` only while at least one non-muted FAIL remains, otherwise it is `PASS` (including fully-muted groups). The `filter[status]` filter and the `sort=status` ordering share the same semantics, keeping `status` consistent with `fail_count` and the orthogonal `muted` flag [(#10825)](https://github.com/prowler-cloud/prowler/pull/10825) - `aggregate_findings` is now idempotent: it deletes the scan's existing `ScanSummary` rows before `bulk_create`, so re-runs (such as the post-mute reaggregation pipeline) no longer violate the `unique_scan_summary` constraint and no longer abort the downstream `DailySeveritySummary` / `FindingGroupDailySummary` recomputation for the affected scan [(#10827)](https://github.com/prowler-cloud/prowler/pull/10827) +- Attack Paths: Findings on AWS were silently dropped during the Neo4j merge for resources whose Cartography node is keyed by a short identifier (e.g. EC2 instances) rather than the full ARN [(#10839)](https://github.com/prowler-cloud/prowler/pull/10839) --- diff --git a/api/Dockerfile b/api/Dockerfile index 07f69d0b0f2..1bcffc479ec 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -22,6 +22,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libtool \ libxslt1-dev \ python3-dev \ + git \ && rm -rf /var/lib/apt/lists/* # Install PowerShell diff --git a/api/poetry.lock b/api/poetry.lock index b74417c745e..f93e0d21e66 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.3.4 and should not be changed by hand. [[package]] name = "about-time" @@ -2974,7 +2974,7 @@ files = [ [package.dependencies] autopep8 = "*" Django = ">=4.2" -gprof2dot = ">=2017.09.19" +gprof2dot = ">=2017.9.19" sqlparse = "*" [[package]] @@ -4582,7 +4582,7 @@ files = [ [package.dependencies] attrs = ">=22.2.0" -jsonschema-specifications = ">=2023.03.6" +jsonschema-specifications = ">=2023.3.6" referencing = ">=0.28.4" rpds-py = ">=0.7.1" @@ -4790,7 +4790,7 @@ librabbitmq = ["librabbitmq (>=2.0.0) ; python_version < \"3.11\""] mongodb = ["pymongo (==4.15.3)"] msgpack = ["msgpack (==1.1.2)"] pyro = ["pyro4 (==4.82)"] -qpid = ["qpid-python (==1.36.0-1)", "qpid-tools (==1.36.0-1)"] +qpid = ["qpid-python (==1.36.0.post1)", "qpid-tools (==1.36.0.post1)"] redis = ["redis (>=4.5.2,!=4.5.5,!=5.0.2,<6.5)"] slmq = ["softlayer_messaging (>=1.0.3)"] sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"] @@ -4811,7 +4811,7 @@ files = [ ] [package.dependencies] -certifi = ">=14.05.14" +certifi = ">=14.5.14" durationpy = ">=0.7" google-auth = ">=1.0.1" oauthlib = ">=3.2.2" @@ -6964,11 +6964,11 @@ description = "C parser in Python" optional = false python-versions = ">=3.10" groups = ["main", "dev"] +markers = "platform_python_implementation != \"PyPy\" and implementation_name != \"PyPy\"" files = [ {file = "pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992"}, {file = "pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29"}, ] -markers = {main = "implementation_name != \"PyPy\" and platform_python_implementation != \"PyPy\"", dev = "platform_python_implementation != \"PyPy\" and implementation_name != \"PyPy\""} [[package]] name = "pydantic" @@ -7194,7 +7194,7 @@ files = [ ] [package.dependencies] -astroid = ">=3.2.2,<=3.3.0-dev0" +astroid = ">=3.2.2,<=3.3.0.dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} dill = [ {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, @@ -7216,7 +7216,7 @@ description = "The MSALRuntime Python Interop Package" optional = false python-versions = ">=3.6" groups = ["main"] -markers = "(platform_system == \"Windows\" or platform_system == \"Darwin\" or platform_system == \"Linux\") and sys_platform == \"win32\"" +markers = "sys_platform == \"win32\" and (platform_system == \"Windows\" or platform_system == \"Darwin\" or platform_system == \"Linux\")" files = [ {file = "pymsalruntime-0.18.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:0c22e2e83faa10de422bbfaacc1bb2887c9025ee8a53f0fc2e4f7db01c4a7b66"}, {file = "pymsalruntime-0.18.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:8ce2944a0f944833d047bb121396091e00287e2b6373716106da86ea99abf379"}, @@ -8209,10 +8209,10 @@ files = [ ] [package.dependencies] -botocore = ">=1.37.4,<2.0a.0" +botocore = ">=1.37.4,<2.0a0" [package.extras] -crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"] +crt = ["botocore[crt] (>=1.37.4,<2.0a0)"] [[package]] name = "safety" diff --git a/api/src/backend/api/attack_paths/database.py b/api/src/backend/api/attack_paths/database.py index f8e20b659e3..f5fddd06131 100644 --- a/api/src/backend/api/attack_paths/database.py +++ b/api/src/backend/api/attack_paths/database.py @@ -28,6 +28,7 @@ "ATTACK_PATHS_READ_QUERY_TIMEOUT_SECONDS", default=30 ) MAX_CUSTOM_QUERY_NODES = env.int("ATTACK_PATHS_MAX_CUSTOM_QUERY_NODES", default=250) +CONN_ACQUISITION_TIMEOUT = env.int("NEO4J_CONN_ACQUISITION_TIMEOUT", default=15) READ_EXCEPTION_CODES = [ "Neo.ClientError.Statement.AccessMode", "Neo.ClientError.Procedure.ProcedureNotFound", @@ -62,7 +63,7 @@ def init_driver() -> neo4j.Driver: auth=(config["USER"], config["PASSWORD"]), keep_alive=True, max_connection_lifetime=7200, - connection_acquisition_timeout=120, + connection_acquisition_timeout=CONN_ACQUISITION_TIMEOUT, max_connection_pool_size=50, ) _driver.verify_connectivity() diff --git a/api/src/backend/api/tests/test_attack_paths_database.py b/api/src/backend/api/tests/test_attack_paths_database.py index 7e07792a69e..8828d239111 100644 --- a/api/src/backend/api/tests/test_attack_paths_database.py +++ b/api/src/backend/api/tests/test_attack_paths_database.py @@ -12,6 +12,8 @@ import neo4j import pytest +import api.attack_paths.database as db_module + class TestLazyInitialization: """Test that Neo4j driver is initialized lazily on first use.""" @@ -19,8 +21,6 @@ class TestLazyInitialization: @pytest.fixture(autouse=True) def reset_module_state(self): """Reset module-level singleton state before each test.""" - import api.attack_paths.database as db_module - original_driver = db_module._driver db_module._driver = None @@ -31,8 +31,6 @@ def reset_module_state(self): def test_driver_not_initialized_at_import(self): """Driver should be None after module import (no eager connection).""" - import api.attack_paths.database as db_module - assert db_module._driver is None @patch("api.attack_paths.database.settings") @@ -41,8 +39,6 @@ def test_init_driver_creates_connection_on_first_call( self, mock_driver_factory, mock_settings ): """init_driver() should create connection only when called.""" - import api.attack_paths.database as db_module - mock_driver = MagicMock() mock_driver_factory.return_value = mock_driver mock_settings.DATABASES = { @@ -69,8 +65,6 @@ def test_init_driver_returns_cached_driver_on_subsequent_calls( self, mock_driver_factory, mock_settings ): """Subsequent calls should return cached driver without reconnecting.""" - import api.attack_paths.database as db_module - mock_driver = MagicMock() mock_driver_factory.return_value = mock_driver mock_settings.DATABASES = { @@ -99,8 +93,6 @@ def test_get_driver_delegates_to_init_driver( self, mock_driver_factory, mock_settings ): """get_driver() should use init_driver() for lazy initialization.""" - import api.attack_paths.database as db_module - mock_driver = MagicMock() mock_driver_factory.return_value = mock_driver mock_settings.DATABASES = { @@ -118,14 +110,50 @@ def test_get_driver_delegates_to_init_driver( mock_driver_factory.assert_called_once() +class TestConnectionAcquisitionTimeout: + """Test that the connection acquisition timeout is configurable.""" + + @pytest.fixture(autouse=True) + def reset_module_state(self): + original_driver = db_module._driver + original_timeout = db_module.CONN_ACQUISITION_TIMEOUT + + db_module._driver = None + + yield + + db_module._driver = original_driver + db_module.CONN_ACQUISITION_TIMEOUT = original_timeout + + @patch("api.attack_paths.database.settings") + @patch("api.attack_paths.database.neo4j.GraphDatabase.driver") + def test_driver_receives_configured_timeout( + self, mock_driver_factory, mock_settings + ): + """init_driver() should pass CONN_ACQUISITION_TIMEOUT to the neo4j driver.""" + mock_driver_factory.return_value = MagicMock() + mock_settings.DATABASES = { + "neo4j": { + "HOST": "localhost", + "PORT": 7687, + "USER": "neo4j", + "PASSWORD": "password", + } + } + db_module.CONN_ACQUISITION_TIMEOUT = 42 + + db_module.init_driver() + + _, kwargs = mock_driver_factory.call_args + assert kwargs["connection_acquisition_timeout"] == 42 + + class TestAtexitRegistration: """Test that atexit cleanup handler is registered correctly.""" @pytest.fixture(autouse=True) def reset_module_state(self): """Reset module-level singleton state before each test.""" - import api.attack_paths.database as db_module - original_driver = db_module._driver db_module._driver = None @@ -141,8 +169,6 @@ def test_atexit_registered_on_first_init( self, mock_driver_factory, mock_atexit_register, mock_settings ): """atexit.register should be called on first initialization.""" - import api.attack_paths.database as db_module - mock_driver_factory.return_value = MagicMock() mock_settings.DATABASES = { "neo4j": { @@ -168,8 +194,6 @@ def test_atexit_registered_only_once( The double-checked locking on _driver ensures the atexit registration block only executes once (when _driver is first created). """ - import api.attack_paths.database as db_module - mock_driver_factory.return_value = MagicMock() mock_settings.DATABASES = { "neo4j": { @@ -194,8 +218,6 @@ class TestCloseDriver: @pytest.fixture(autouse=True) def reset_module_state(self): """Reset module-level singleton state before each test.""" - import api.attack_paths.database as db_module - original_driver = db_module._driver db_module._driver = None @@ -206,8 +228,6 @@ def reset_module_state(self): def test_close_driver_closes_and_clears_driver(self): """close_driver() should close the driver and set it to None.""" - import api.attack_paths.database as db_module - mock_driver = MagicMock() db_module._driver = mock_driver @@ -218,8 +238,6 @@ def test_close_driver_closes_and_clears_driver(self): def test_close_driver_handles_none_driver(self): """close_driver() should handle case where driver is None.""" - import api.attack_paths.database as db_module - db_module._driver = None # Should not raise @@ -229,8 +247,6 @@ def test_close_driver_handles_none_driver(self): def test_close_driver_clears_driver_even_on_close_error(self): """Driver should be cleared even if close() raises an exception.""" - import api.attack_paths.database as db_module - mock_driver = MagicMock() mock_driver.close.side_effect = Exception("Connection error") db_module._driver = mock_driver @@ -246,8 +262,6 @@ class TestExecuteReadQuery: """Test read query execution helper.""" def test_execute_read_query_calls_read_session_and_returns_result(self): - import api.attack_paths.database as db_module - tx = MagicMock() expected_graph = MagicMock() run_result = MagicMock() @@ -289,8 +303,6 @@ def execute_read_side_effect(fn): assert result is expected_graph def test_execute_read_query_defaults_parameters_to_empty_dict(self): - import api.attack_paths.database as db_module - tx = MagicMock() run_result = MagicMock() run_result.graph.return_value = MagicMock() @@ -325,8 +337,6 @@ class TestGetSessionReadOnly: @pytest.fixture(autouse=True) def reset_module_state(self): - import api.attack_paths.database as db_module - original_driver = db_module._driver db_module._driver = None yield @@ -341,8 +351,6 @@ def reset_module_state(self): ) def test_get_session_raises_write_query_not_allowed(self, neo4j_code): """Read-mode Neo4j errors should raise `WriteQueryNotAllowedException`.""" - import api.attack_paths.database as db_module - mock_session = MagicMock() neo4j_error = neo4j.exceptions.Neo4jError._hydrate_neo4j( code=neo4j_code, @@ -362,8 +370,6 @@ def test_get_session_raises_write_query_not_allowed(self, neo4j_code): def test_get_session_raises_generic_exception_for_other_errors(self): """Non-read-mode Neo4j errors should raise GraphDatabaseQueryException.""" - import api.attack_paths.database as db_module - mock_session = MagicMock() neo4j_error = neo4j.exceptions.Neo4jError._hydrate_neo4j( code="Neo.ClientError.Statement.SyntaxError", @@ -388,8 +394,6 @@ class TestThreadSafety: @pytest.fixture(autouse=True) def reset_module_state(self): """Reset module-level singleton state before each test.""" - import api.attack_paths.database as db_module - original_driver = db_module._driver db_module._driver = None @@ -404,8 +408,6 @@ def test_concurrent_init_creates_single_driver( self, mock_driver_factory, mock_settings ): """Multiple threads calling init_driver() should create only one driver.""" - import api.attack_paths.database as db_module - mock_driver = MagicMock() mock_driver_factory.return_value = mock_driver mock_settings.DATABASES = { @@ -448,8 +450,6 @@ class TestHasProviderData: """Test has_provider_data helper for checking provider nodes in Neo4j.""" def test_returns_true_when_nodes_exist(self): - import api.attack_paths.database as db_module - mock_session = MagicMock() mock_result = MagicMock() mock_result.single.return_value = MagicMock() # non-None record @@ -468,8 +468,6 @@ def test_returns_true_when_nodes_exist(self): mock_session.run.assert_called_once() def test_returns_false_when_no_nodes(self): - import api.attack_paths.database as db_module - mock_session = MagicMock() mock_result = MagicMock() mock_result.single.return_value = None @@ -486,8 +484,6 @@ def test_returns_false_when_no_nodes(self): assert db_module.has_provider_data("db-tenant-abc", "provider-123") is False def test_returns_false_when_database_not_found(self): - import api.attack_paths.database as db_module - session_ctx = MagicMock() session_ctx.__enter__.side_effect = db_module.GraphDatabaseQueryException( message="Database does not exist", @@ -503,8 +499,6 @@ def test_returns_false_when_database_not_found(self): ) def test_raises_on_other_errors(self): - import api.attack_paths.database as db_module - session_ctx = MagicMock() session_ctx.__enter__.side_effect = db_module.GraphDatabaseQueryException( message="Connection refused", diff --git a/api/src/backend/api/tests/test_views.py b/api/src/backend/api/tests/test_views.py index b81ae0c677f..736cbbd56ec 100644 --- a/api/src/backend/api/tests/test_views.py +++ b/api/src/backend/api/tests/test_views.py @@ -4113,6 +4113,51 @@ def test_compliance_local_file( assert cd.startswith('attachment; filename="') assert cd.endswith(f'filename="{fname.name}"') + def test_cis_no_output(self, authenticated_client, scans_fixture): + """CIS PDF endpoint must 404 when the scan has no output_location.""" + scan = scans_fixture[0] + scan.state = StateChoices.COMPLETED + scan.output_location = "" + scan.save() + + url = reverse("scan-cis", kwargs={"pk": scan.id}) + resp = authenticated_client.get(url) + assert resp.status_code == status.HTTP_404_NOT_FOUND + assert ( + resp.json()["errors"]["detail"] + == "The scan has no reports, or the CIS report generation task has not started yet." + ) + + def test_cis_local_file(self, authenticated_client, scans_fixture, monkeypatch): + """CIS PDF endpoint must serve the latest generated PDF.""" + scan = scans_fixture[0] + scan.state = StateChoices.COMPLETED + + with tempfile.TemporaryDirectory() as tmp: + tmp_path = Path(tmp) + base = tmp_path / "reports" + cis_dir = base / "cis" + cis_dir.mkdir(parents=True, exist_ok=True) + fname = cis_dir / "prowler-output-aws-20260101000000_cis_report.pdf" + fname.write_bytes(b"%PDF-1.4 fake pdf") + + scan.output_location = str(base / "scan.zip") + scan.save() + + monkeypatch.setattr( + glob, + "glob", + lambda p: [str(fname)] if p.endswith("*_cis_report.pdf") else [], + ) + + url = reverse("scan-cis", kwargs={"pk": scan.id}) + resp = authenticated_client.get(url) + assert resp.status_code == status.HTTP_200_OK + assert resp["Content-Type"] == "application/pdf" + cd = resp["Content-Disposition"] + assert cd.startswith('attachment; filename="') + assert cd.endswith(f'filename="{fname.name}"') + @patch("api.v1.views.Task.objects.get") @patch("api.v1.views.TaskSerializer") def test__get_task_status_returns_none_if_task_not_executing( diff --git a/api/src/backend/api/v1/views.py b/api/src/backend/api/v1/views.py index 28b5fd49f04..99813576f5d 100644 --- a/api/src/backend/api/v1/views.py +++ b/api/src/backend/api/v1/views.py @@ -1926,6 +1926,27 @@ def destroy(self, request, *args, pk=None, **kwargs): ), }, ), + cis=extend_schema( + tags=["Scan"], + summary="Retrieve CIS Benchmark compliance report", + description="Download the CIS Benchmark compliance report as a PDF file. " + "When a provider ships multiple CIS versions, the report is generated " + "for the highest available version.", + request=None, + responses={ + 200: OpenApiResponse( + description="PDF file containing the CIS compliance report" + ), + 202: OpenApiResponse(description="The task is in progress"), + 401: OpenApiResponse( + description="API key missing or user not Authenticated" + ), + 403: OpenApiResponse(description="There is a problem with credentials"), + 404: OpenApiResponse( + description="The scan has no CIS reports, or the CIS report generation task has not started yet" + ), + }, + ), ) @method_decorator(CACHE_DECORATOR, name="list") @method_decorator(CACHE_DECORATOR, name="retrieve") @@ -1994,6 +2015,9 @@ def get_serializer_class(self): elif self.action == "csa": if hasattr(self, "response_serializer_class"): return self.response_serializer_class + elif self.action == "cis": + if hasattr(self, "response_serializer_class"): + return self.response_serializer_class return super().get_serializer_class() def partial_update(self, request, *args, **kwargs): @@ -2236,6 +2260,45 @@ def compliance(self, request, pk=None, name=None): content, filename = loader return self._serve_file(content, filename, "text/csv") + @action( + detail=True, + methods=["get"], + url_name="cis", + ) + def cis(self, request, pk=None): + scan = self.get_object() + running_resp = self._get_task_status(scan) + if running_resp: + return running_resp + + if not scan.output_location: + return Response( + { + "detail": "The scan has no reports, or the CIS report generation task has not started yet." + }, + status=status.HTTP_404_NOT_FOUND, + ) + + if scan.output_location.startswith("s3://"): + bucket = env.str("DJANGO_OUTPUT_S3_AWS_OUTPUT_BUCKET", "") + key_prefix = scan.output_location.removeprefix(f"s3://{bucket}/") + prefix = os.path.join( + os.path.dirname(key_prefix), + "cis", + "*_cis_report.pdf", + ) + loader = self._load_file(prefix, s3=True, bucket=bucket, list_objects=True) + else: + base = os.path.dirname(scan.output_location) + pattern = os.path.join(base, "cis", "*_cis_report.pdf") + loader = self._load_file(pattern, s3=False) + + if isinstance(loader, Response): + return loader + + content, filename = loader + return self._serve_file(content, filename, "application/pdf") + @action( detail=True, methods=["get"], diff --git a/api/src/backend/config/settings/sentry.py b/api/src/backend/config/settings/sentry.py index 65c6277817c..580821f7b23 100644 --- a/api/src/backend/config/settings/sentry.py +++ b/api/src/backend/config/settings/sentry.py @@ -120,6 +120,7 @@ def before_send(event, hint): # see https://docs.sentry.io/platforms/python/data-management/data-collected/ for more info before_send=before_send, send_default_pii=True, + traces_sample_rate=env.float("DJANGO_SENTRY_TRACES_SAMPLE_RATE", default=0.02), _experiments={ # Set continuous_profiling_auto_start to True # to automatically start the profiler on when diff --git a/api/src/backend/tasks/assets/img/cis_logo.png b/api/src/backend/tasks/assets/img/cis_logo.png new file mode 100644 index 00000000000..7c1568da5ec Binary files /dev/null and b/api/src/backend/tasks/assets/img/cis_logo.png differ diff --git a/api/src/backend/tasks/jobs/attack_paths/aws.py b/api/src/backend/tasks/jobs/attack_paths/aws.py index 7248cc39e8c..4acb37d6418 100644 --- a/api/src/backend/tasks/jobs/attack_paths/aws.py +++ b/api/src/backend/tasks/jobs/attack_paths/aws.py @@ -313,3 +313,16 @@ def sync_aws_account( ) return failed_syncs + + +def extract_short_uid(uid: str) -> str: + """Return the short identifier from an AWS ARN or resource ID. + + Supported inputs end in one of: + - `/` (e.g. `instance/i-xxx`) + - `:` (e.g. `function:name`) + - `` (e.g. `bucket-name` or `i-xxx`) + + If `uid` is already a short resource ID, it is returned unchanged. + """ + return uid.rsplit("/", 1)[-1].rsplit(":", 1)[-1] diff --git a/api/src/backend/tasks/jobs/attack_paths/config.py b/api/src/backend/tasks/jobs/attack_paths/config.py index 5f5c523ceb5..0816626b67d 100644 --- a/api/src/backend/tasks/jobs/attack_paths/config.py +++ b/api/src/backend/tasks/jobs/attack_paths/config.py @@ -37,6 +37,8 @@ class ProviderConfig: # Label for resources connected to the account node, enabling indexed finding lookups. resource_label: str # e.g., "_AWSResource" ingestion_function: Callable + # Maps a Postgres resource UID (e.g. full ARN) to the short-id form Cartography stores on some node types (e.g. `i-xxx` for EC2Instance). + short_uid_extractor: Callable[[str], str] # Provider Configurations @@ -48,6 +50,7 @@ class ProviderConfig: uid_field="arn", resource_label="_AWSResource", ingestion_function=aws.start_aws_ingestion, + short_uid_extractor=aws.extract_short_uid, ) PROVIDER_CONFIGS: dict[str, ProviderConfig] = { @@ -116,6 +119,21 @@ def get_provider_resource_label(provider_type: str) -> str: return config.resource_label if config else "_UnknownProviderResource" +def _identity_short_uid(uid: str) -> str: + """Fallback short-uid extractor for providers without a custom mapping.""" + return uid + + +def get_short_uid_extractor(provider_type: str) -> Callable[[str], str]: + """Get the short-uid extractor for a provider type. + + Returns an identity function when the provider is unknown, so callers can + rely on a callable always being returned. + """ + config = PROVIDER_CONFIGS.get(provider_type) + return config.short_uid_extractor if config else _identity_short_uid + + # Dynamic Isolation Label Helpers # -------------------------------- diff --git a/api/src/backend/tasks/jobs/attack_paths/findings.py b/api/src/backend/tasks/jobs/attack_paths/findings.py index 0b2ecb4c450..3581f0ca0f0 100644 --- a/api/src/backend/tasks/jobs/attack_paths/findings.py +++ b/api/src/backend/tasks/jobs/attack_paths/findings.py @@ -8,7 +8,7 @@ """ from collections import defaultdict -from typing import Any, Generator +from typing import Any, Callable, Generator from uuid import UUID import neo4j @@ -21,6 +21,7 @@ get_node_uid_field, get_provider_resource_label, get_root_node_label, + get_short_uid_extractor, ) from tasks.jobs.attack_paths.queries import ( ADD_RESOURCE_LABEL_TEMPLATE, @@ -57,7 +58,9 @@ ] -def _to_neo4j_dict(record: dict[str, Any], resource_uid: str) -> dict[str, Any]: +def _to_neo4j_dict( + record: dict[str, Any], resource_uid: str, resource_short_uid: str +) -> dict[str, Any]: """Transform a Django `.values()` record into a `dict` ready for Neo4j ingestion.""" return { "id": str(record["id"]), @@ -75,6 +78,7 @@ def _to_neo4j_dict(record: dict[str, Any], resource_uid: str) -> dict[str, Any]: "muted": record["muted"], "muted_reason": record["muted_reason"], "resource_uid": resource_uid, + "resource_short_uid": resource_short_uid, } @@ -170,6 +174,8 @@ def load_findings( batch_num = 0 total_records = 0 + edges_merged = 0 + edges_dropped = 0 for batch in findings_batches: batch_num += 1 batch_size = len(batch) @@ -178,9 +184,15 @@ def load_findings( parameters["findings_data"] = batch logger.info(f"Loading findings batch {batch_num} ({batch_size} records)") - neo4j_session.run(query, parameters) + summary = neo4j_session.run(query, parameters).single() + if summary is not None: + edges_merged += summary.get("merged_count", 0) + edges_dropped += summary.get("dropped_count", 0) - logger.info(f"Finished loading {total_records} records in {batch_num} batches") + logger.info( + f"Finished loading {total_records} records in {batch_num} batches " + f"(edges_merged={edges_merged}, edges_dropped={edges_dropped})" + ) return total_records @@ -205,8 +217,9 @@ def stream_findings_with_resources( ) tenant_id = prowler_api_provider.tenant_id + short_uid_extractor = get_short_uid_extractor(prowler_api_provider.provider) for batch in _paginate_findings(tenant_id, scan_id): - enriched = _enrich_batch_with_resources(batch, tenant_id) + enriched = _enrich_batch_with_resources(batch, tenant_id, short_uid_extractor) if enriched: yield enriched @@ -269,6 +282,7 @@ def _fetch_findings_batch( def _enrich_batch_with_resources( findings_batch: list[dict[str, Any]], tenant_id: str, + short_uid_extractor: Callable[[str], str], ) -> list[dict[str, Any]]: """ Enrich findings with their resource UIDs. @@ -280,7 +294,7 @@ def _enrich_batch_with_resources( resource_map = _build_finding_resource_map(finding_ids, tenant_id) return [ - _to_neo4j_dict(finding, resource_uid) + _to_neo4j_dict(finding, resource_uid, short_uid_extractor(resource_uid)) for finding in findings_batch for resource_uid in resource_map.get(finding["id"], []) ] diff --git a/api/src/backend/tasks/jobs/attack_paths/queries.py b/api/src/backend/tasks/jobs/attack_paths/queries.py index 26ffa32f92b..eb1d82a96ee 100644 --- a/api/src/backend/tasks/jobs/attack_paths/queries.py +++ b/api/src/backend/tasks/jobs/attack_paths/queries.py @@ -35,46 +35,56 @@ def render_cypher_template(template: str, replacements: dict[str, str]) -> str: UNWIND $findings_data AS finding_data OPTIONAL MATCH (resource_by_uid:__RESOURCE_LABEL__ {{__NODE_UID_FIELD__: finding_data.resource_uid}}) - WITH finding_data, resource_by_uid - OPTIONAL MATCH (resource_by_id:__RESOURCE_LABEL__ {{id: finding_data.resource_uid}}) WHERE resource_by_uid IS NULL - WITH finding_data, COALESCE(resource_by_uid, resource_by_id) AS resource - WHERE resource IS NOT NULL - - MERGE (finding:{PROWLER_FINDING_LABEL} {{id: finding_data.id}}) - ON CREATE SET - finding.id = finding_data.id, - finding.uid = finding_data.uid, - finding.inserted_at = finding_data.inserted_at, - finding.updated_at = finding_data.updated_at, - finding.first_seen_at = finding_data.first_seen_at, - finding.scan_id = finding_data.scan_id, - finding.delta = finding_data.delta, - finding.status = finding_data.status, - finding.status_extended = finding_data.status_extended, - finding.severity = finding_data.severity, - finding.check_id = finding_data.check_id, - finding.check_title = finding_data.check_title, - finding.muted = finding_data.muted, - finding.muted_reason = finding_data.muted_reason, - finding.firstseen = timestamp(), - finding.lastupdated = $last_updated, - finding._module_name = 'cartography:prowler', - finding._module_version = $prowler_version - ON MATCH SET - finding.status = finding_data.status, - finding.status_extended = finding_data.status_extended, - finding.lastupdated = $last_updated - - MERGE (resource)-[rel:HAS_FINDING]->(finding) - ON CREATE SET - rel.firstseen = timestamp(), - rel.lastupdated = $last_updated, - rel._module_name = 'cartography:prowler', - rel._module_version = $prowler_version - ON MATCH SET - rel.lastupdated = $last_updated + OPTIONAL MATCH (resource_by_short:__RESOURCE_LABEL__ {{id: finding_data.resource_short_uid}}) + WHERE resource_by_uid IS NULL AND resource_by_id IS NULL + WITH finding_data, + resource_by_uid, + resource_by_id, + head(collect(resource_by_short)) AS resource_by_short + WITH finding_data, + COALESCE(resource_by_uid, resource_by_id, resource_by_short) AS resource + + FOREACH (_ IN CASE WHEN resource IS NOT NULL THEN [1] ELSE [] END | + MERGE (finding:{PROWLER_FINDING_LABEL} {{id: finding_data.id}}) + ON CREATE SET + finding.id = finding_data.id, + finding.uid = finding_data.uid, + finding.inserted_at = finding_data.inserted_at, + finding.updated_at = finding_data.updated_at, + finding.first_seen_at = finding_data.first_seen_at, + finding.scan_id = finding_data.scan_id, + finding.delta = finding_data.delta, + finding.status = finding_data.status, + finding.status_extended = finding_data.status_extended, + finding.severity = finding_data.severity, + finding.check_id = finding_data.check_id, + finding.check_title = finding_data.check_title, + finding.muted = finding_data.muted, + finding.muted_reason = finding_data.muted_reason, + finding.firstseen = timestamp(), + finding.lastupdated = $last_updated, + finding._module_name = 'cartography:prowler', + finding._module_version = $prowler_version + ON MATCH SET + finding.status = finding_data.status, + finding.status_extended = finding_data.status_extended, + finding.lastupdated = $last_updated + MERGE (resource)-[rel:HAS_FINDING]->(finding) + ON CREATE SET + rel.firstseen = timestamp(), + rel.lastupdated = $last_updated, + rel._module_name = 'cartography:prowler', + rel._module_version = $prowler_version + ON MATCH SET + rel.lastupdated = $last_updated + ) + + WITH sum(CASE WHEN resource IS NOT NULL THEN 1 ELSE 0 END) AS merged_count, + sum(CASE WHEN resource IS NULL THEN 1 ELSE 0 END) AS dropped_count + + RETURN merged_count, dropped_count """ # Internet queries (used by internet.py) diff --git a/api/src/backend/tasks/jobs/report.py b/api/src/backend/tasks/jobs/report.py index a41a8d62927..005df623f22 100644 --- a/api/src/backend/tasks/jobs/report.py +++ b/api/src/backend/tasks/jobs/report.py @@ -1,3 +1,6 @@ +import gc +import re +from collections.abc import Iterable from pathlib import Path from shutil import rmtree @@ -6,6 +9,7 @@ from tasks.jobs.export import _generate_compliance_output_directory, _upload_to_s3 from tasks.jobs.reports import ( FRAMEWORK_REGISTRY, + CISReportGenerator, CSAReportGenerator, ENSReportGenerator, NIS2ReportGenerator, @@ -17,10 +21,53 @@ from api.db_router import READ_REPLICA_ALIAS from api.db_utils import rls_transaction from api.models import Provider, ScanSummary, ThreatScoreSnapshot +from prowler.lib.check.compliance_models import Compliance from prowler.lib.outputs.finding import Finding as FindingOutput logger = get_task_logger(__name__) +# Matches CIS compliance_ids like "cis_1.4_aws", "cis_5.0_azure", +# "cis_1.10_kubernetes", "cis_3.0.1_aws". Requires at least one dotted +# component so malformed inputs like "cis_._aws" or "cis_5._aws" are rejected +# at the regex stage, rather than by a later ValueError fallback. +_CIS_VARIANT_RE = re.compile(r"^cis_(?P\d+(?:\.\d+)+)_(?P.+)$") + + +def _pick_latest_cis_variant(compliance_ids: Iterable[str]) -> str | None: + """Return the CIS compliance_id with the highest semantic version. + + CIS ships many variants per provider (e.g. cis_1.4_aws, ..., cis_6.0_aws). + A lexicographic sort is incorrect for version strings like ``1.10`` vs + ``1.2``; this helper parses the version into a tuple of ints so ``1.10`` + is correctly ordered after ``1.2``. Malformed names are skipped so a + broken JSON cannot crash the whole CIS pipeline. + + Args: + compliance_ids: Iterable of CIS compliance identifiers. Expected to + belong to a single provider (callers should pass the already + filtered keys from ``Compliance.get_bulk(provider_type)``). + + Returns: + The compliance_id with the highest parsed version, or ``None`` if no + well-formed CIS identifier was found. + """ + best_key: tuple[int, ...] | None = None + best_name: str | None = None + for name in compliance_ids: + match = _CIS_VARIANT_RE.match(name) + if not match: + continue + try: + key = tuple(int(part) for part in match.group("version").split(".")) + except ValueError: + # Defensive: the regex already guarantees numeric chunks, but we + # keep the guard so a future regex change cannot crash callers. + continue + if best_key is None or key > best_key: + best_key = key + best_name = name + return best_name + def generate_threatscore_report( tenant_id: str, @@ -191,6 +238,53 @@ def generate_csa_report( ) +def generate_cis_report( + tenant_id: str, + scan_id: str, + compliance_id: str, + output_path: str, + provider_id: str, + only_failed: bool = True, + include_manual: bool = False, + provider_obj: Provider | None = None, + requirement_statistics: dict[str, dict[str, int]] | None = None, + findings_cache: dict[str, list[FindingOutput]] | None = None, +) -> None: + """ + Generate a PDF compliance report for a specific CIS Benchmark variant. + + Unlike single-version frameworks (ENS, NIS2, CSA), CIS has multiple + variants per provider (e.g., cis_1.4_aws, cis_5.0_aws, cis_6.0_aws). This + wrapper is called once per variant, receiving the specific compliance_id. + + Args: + tenant_id: The tenant ID for Row-Level Security context. + scan_id: ID of the scan executed by Prowler. + compliance_id: ID of the specific CIS variant (e.g., "cis_5.0_aws"). + output_path: Output PDF file path. + provider_id: Provider ID for the scan. + only_failed: If True, only include failed requirements in detailed section. + include_manual: If True, include manual requirements in detailed section. + provider_obj: Pre-fetched Provider object to avoid duplicate queries. + requirement_statistics: Pre-aggregated requirement statistics. + findings_cache: Cache of already loaded findings to avoid duplicate queries. + """ + generator = CISReportGenerator(FRAMEWORK_REGISTRY["cis"]) + + generator.generate( + tenant_id=tenant_id, + scan_id=scan_id, + compliance_id=compliance_id, + output_path=output_path, + provider_id=provider_id, + provider_obj=provider_obj, + requirement_statistics=requirement_statistics, + findings_cache=findings_cache, + only_failed=only_failed, + include_manual=include_manual, + ) + + def generate_compliance_reports( tenant_id: str, scan_id: str, @@ -199,6 +293,7 @@ def generate_compliance_reports( generate_ens: bool = True, generate_nis2: bool = True, generate_csa: bool = True, + generate_cis: bool = True, only_failed_threatscore: bool = True, min_risk_level_threatscore: int = 4, include_manual_ens: bool = True, @@ -206,6 +301,8 @@ def generate_compliance_reports( only_failed_nis2: bool = True, only_failed_csa: bool = True, include_manual_csa: bool = False, + only_failed_cis: bool = True, + include_manual_cis: bool = False, ) -> dict[str, dict[str, bool | str]]: """ Generate multiple compliance reports with shared database queries. @@ -215,6 +312,13 @@ def generate_compliance_reports( - Aggregating requirement statistics once (shared across all reports) - Reusing compliance framework data when possible + For CIS a single PDF is produced per run: the one matching the highest + available CIS version for the scan's provider (picked dynamically from + ``Compliance.get_bulk`` via :func:`_pick_latest_cis_variant`). The + returned ``results["cis"]`` entry has the same flat shape as the other + single-version frameworks β€” the picked variant is an internal detail, + not surfaced in the result. + Args: tenant_id: The tenant ID for Row-Level Security context. scan_id: The ID of the scan to generate reports for. @@ -223,6 +327,8 @@ def generate_compliance_reports( generate_ens: Whether to generate ENS report. generate_nis2: Whether to generate NIS2 report. generate_csa: Whether to generate CSA CCM report. + generate_cis: Whether to generate a CIS Benchmark report for the + latest CIS version available for the provider. only_failed_threatscore: For ThreatScore, only include failed requirements. min_risk_level_threatscore: Minimum risk level for ThreatScore critical requirements. include_manual_ens: For ENS, include manual requirements. @@ -230,22 +336,26 @@ def generate_compliance_reports( only_failed_nis2: For NIS2, only include failed requirements. only_failed_csa: For CSA CCM, only include failed requirements. include_manual_csa: For CSA CCM, include manual requirements. + only_failed_cis: For CIS, only include failed requirements in detailed section. + include_manual_cis: For CIS, include manual requirements in detailed section. Returns: - Dictionary with results for each report type. + Dictionary with results for each report type. Every value has the + same flat shape: ``{"upload": bool, "path": str, "error"?: str}``. """ logger.info( "Generating compliance reports for scan %s with provider %s" - " (ThreatScore: %s, ENS: %s, NIS2: %s, CSA: %s)", + " (ThreatScore: %s, ENS: %s, NIS2: %s, CSA: %s, CIS: %s)", scan_id, provider_id, generate_threatscore, generate_ens, generate_nis2, generate_csa, + generate_cis, ) - results = {} + results: dict = {} # Validate that the scan has findings and get provider info with rls_transaction(tenant_id, using=READ_REPLICA_ALIAS): @@ -259,6 +369,8 @@ def generate_compliance_reports( results["nis2"] = {"upload": False, "path": ""} if generate_csa: results["csa"] = {"upload": False, "path": ""} + if generate_cis: + results["cis"] = {"upload": False, "path": ""} return results provider_obj = Provider.objects.get(id=provider_id) @@ -299,11 +411,18 @@ def generate_compliance_reports( results["csa"] = {"upload": False, "path": ""} generate_csa = False + # For CIS we do NOT pre-check the provider against a hard-coded whitelist + # (that list drifts the moment a new CIS JSON ships). Instead, we let + # `_pick_latest_cis_variant` over `Compliance.get_bulk(provider_type)` + # return None for providers that lack CIS, and treat that as "nothing to + # do" below. + if ( not generate_threatscore and not generate_ens and not generate_nis2 and not generate_csa + and not generate_cis ): return results @@ -350,6 +469,13 @@ def generate_compliance_reports( scan_id, compliance_framework="csa", ) + cis_path = _generate_compliance_output_directory( + DJANGO_TMP_OUTPUT_DIRECTORY, + provider_uid, + tenant_id, + scan_id, + compliance_framework="cis", + ) out_dir = str(Path(threatscore_path).parent.parent) except Exception as e: logger.error("Error generating output directory: %s", e) @@ -362,6 +488,8 @@ def generate_compliance_reports( results["nis2"] = error_dict.copy() if generate_csa: results["csa"] = error_dict.copy() + if generate_cis: + results["cis"] = error_dict.copy() return results # Generate ThreatScore report @@ -569,12 +697,92 @@ def generate_compliance_reports( logger.error("Error generating CSA CCM report: %s", e) results["csa"] = {"upload": False, "path": "", "error": str(e)} - # Clean up temporary files if all reports were uploaded successfully - all_uploaded = all( - result.get("upload", False) - for result in results.values() - if result.get("upload") is not None - ) + # Generate CIS Benchmark report for the latest available version only. + # CIS ships multiple versions per provider (e.g. cis_1.4_aws, cis_5.0_aws, + # cis_6.0_aws); we dynamically pick the highest semantic version at run + # time rather than hard-coding a per-provider mapping. `Compliance.get_bulk` + # is the single source of truth for which providers have CIS. + if generate_cis: + latest_cis: str | None = None + try: + frameworks_bulk = Compliance.get_bulk(provider_type) + latest_cis = _pick_latest_cis_variant( + name for name in frameworks_bulk.keys() if name.startswith("cis_") + ) + except Exception as e: + logger.error("Error discovering CIS variants for %s: %s", provider_type, e) + results["cis"] = {"upload": False, "path": "", "error": str(e)} + + if "cis" not in results: + if latest_cis is None: + logger.info("No CIS variants available for provider %s", provider_type) + results["cis"] = {"upload": False, "path": ""} + else: + logger.info( + "Selected latest CIS variant for provider %s: %s", + provider_type, + latest_cis, + ) + pdf_path_cis = f"{cis_path}_cis_report.pdf" + try: + generate_cis_report( + tenant_id=tenant_id, + scan_id=scan_id, + compliance_id=latest_cis, + output_path=pdf_path_cis, + provider_id=provider_id, + only_failed=only_failed_cis, + include_manual=include_manual_cis, + provider_obj=provider_obj, + requirement_statistics=requirement_statistics, + findings_cache=findings_cache, + ) + + upload_uri_cis = _upload_to_s3( + tenant_id, + scan_id, + pdf_path_cis, + f"cis/{Path(pdf_path_cis).name}", + ) + + if upload_uri_cis: + results["cis"] = { + "upload": True, + "path": upload_uri_cis, + } + logger.info( + "CIS report %s uploaded to %s", + latest_cis, + upload_uri_cis, + ) + else: + results["cis"] = {"upload": False, "path": out_dir} + logger.warning( + "CIS report %s saved locally at %s", + latest_cis, + out_dir, + ) + + except Exception as e: + logger.error("Error generating CIS report %s: %s", latest_cis, e) + results["cis"] = { + "upload": False, + "path": "", + "error": str(e), + } + finally: + # Free ReportLab/matplotlib memory before moving on. + gc.collect() + + # Clean up temporary files only if every requested report has been + # successfully uploaded. All result entries now share the same flat + # shape, so the check is a single comprehension. + upload_flags = [ + bool(entry.get("upload", False)) + for entry in results.values() + if isinstance(entry, dict) and entry.get("upload") is not None + ] + all_uploaded = bool(upload_flags) and all(upload_flags) if all_uploaded: try: @@ -595,6 +803,7 @@ def generate_compliance_reports_job( generate_ens: bool = True, generate_nis2: bool = True, generate_csa: bool = True, + generate_cis: bool = True, ) -> dict[str, dict[str, bool | str]]: """ Celery task wrapper for generate_compliance_reports. @@ -607,9 +816,12 @@ def generate_compliance_reports_job( generate_ens: Whether to generate ENS report. generate_nis2: Whether to generate NIS2 report. generate_csa: Whether to generate CSA CCM report. + generate_cis: Whether to generate the CIS Benchmark report for the + latest CIS version available for the provider. Returns: - Dictionary with results for each report type. + Dictionary with results for each report type. Every entry shares the + same flat ``{"upload", "path", "error"?}`` shape. """ return generate_compliance_reports( tenant_id=tenant_id, @@ -619,4 +831,5 @@ def generate_compliance_reports_job( generate_ens=generate_ens, generate_nis2=generate_nis2, generate_csa=generate_csa, + generate_cis=generate_cis, ) diff --git a/api/src/backend/tasks/jobs/reports/__init__.py b/api/src/backend/tasks/jobs/reports/__init__.py index 1fc475a4679..a538416f59d 100644 --- a/api/src/backend/tasks/jobs/reports/__init__.py +++ b/api/src/backend/tasks/jobs/reports/__init__.py @@ -17,6 +17,9 @@ get_chart_color_for_percentage, ) +# Framework-specific generators +from .cis import CISReportGenerator + # Reusable components # Reusable components: Color helpers, Badge components, Risk component, # Table components, Section components @@ -31,10 +34,12 @@ create_section_header, create_status_badge, create_summary_table, + escape_html, get_color_for_compliance, get_color_for_risk_level, get_color_for_weight, get_status_color, + truncate_text, ) # Framework configuration: Main configuration, Color constants, ENS colors, @@ -90,8 +95,6 @@ FrameworkConfig, get_framework_config, ) - -# Framework-specific generators from .csa import CSAReportGenerator from .ens import ENSReportGenerator from .nis2 import NIS2ReportGenerator @@ -109,6 +112,7 @@ "ENSReportGenerator", "NIS2ReportGenerator", "CSAReportGenerator", + "CISReportGenerator", # Configuration "FrameworkConfig", "FRAMEWORK_REGISTRY", @@ -182,6 +186,9 @@ # Section components "create_section_header", "create_summary_table", + # Text helpers + "truncate_text", + "escape_html", # Chart functions "get_chart_color_for_percentage", "create_vertical_bar_chart", diff --git a/api/src/backend/tasks/jobs/reports/cis.py b/api/src/backend/tasks/jobs/reports/cis.py new file mode 100644 index 00000000000..0fbb416a171 --- /dev/null +++ b/api/src/backend/tasks/jobs/reports/cis.py @@ -0,0 +1,755 @@ +import os +import re +from collections import defaultdict +from typing import Any + +from reportlab.lib.units import inch +from reportlab.platypus import Image, PageBreak, Paragraph, Spacer, Table, TableStyle + +from api.models import StatusChoices + +from .base import ( + BaseComplianceReportGenerator, + ComplianceData, + RequirementData, + get_requirement_metadata, +) +from .charts import ( + create_horizontal_bar_chart, + create_pie_chart, + create_stacked_bar_chart, + get_chart_color_for_percentage, +) +from .components import ColumnConfig, create_data_table, escape_html, truncate_text +from .config import ( + CHART_COLOR_GREEN_1, + CHART_COLOR_RED, + CHART_COLOR_YELLOW, + COLOR_BG_BLUE, + COLOR_BLUE, + COLOR_BORDER_GRAY, + COLOR_DARK_GRAY, + COLOR_GRAY, + COLOR_GRID_GRAY, + COLOR_HIGH_RISK, + COLOR_LIGHT_BLUE, + COLOR_SAFE, + COLOR_WHITE, +) + +# Ordered buckets used both in the executive summary tables and the charts +# section. Exposed as module constants so the two call sites never drift. +_PROFILE_BUCKET_ORDER: tuple[str, ...] = ("L1", "L2", "Other") +_ASSESSMENT_BUCKET_ORDER: tuple[str, ...] = ("Automated", "Manual") + +# Anchored matchers for profile normalization β€” substring checks on "L1"/"L2" +# would happily match unrelated tokens like "CL2 Worker" or "HL2" coming from +# future CIS profile enum values. +_LEVEL_2_RE = re.compile(r"(?:\bLevel\s*2\b|\bL2\b|Level_2)") +_LEVEL_1_RE = re.compile(r"(?:\bLevel\s*1\b|\bL1\b|Level_1)") + + +def _normalize_profile(profile: Any) -> str: + """Bucket a CIS Profile enum/string into one of: ``L1``, ``L2``, ``Other``. + + The ``CIS_Requirement_Attribute_Profile`` enum has values like + ``"Level 1"``, ``"Level 2"``, ``"E3 Level 1"``, ``"E5 Level 2"``. We + collapse them into three buckets to keep charts and badges readable + across CIS variants, using anchored regex matches so that future enum + values cannot accidentally promote e.g. ``"CL2 Worker"`` into ``L2``. + + Args: + profile: The profile value (enum member, string, or ``None``). + + Returns: + One of ``"L1"``, ``"L2"``, ``"Other"``. + """ + if profile is None: + return "Other" + value = getattr(profile, "value", None) or str(profile) + if _LEVEL_2_RE.search(value): + return "L2" + if _LEVEL_1_RE.search(value): + return "L1" + return "Other" + + +def _profile_badge_text(bucket: str) -> str: + """Map a normalized profile bucket (L1/L2/Other) to a short badge label.""" + return {"L1": "Level 1", "L2": "Level 2"}.get(bucket, "Other") + + +# ============================================================================= +# CIS Report Generator +# ============================================================================= + + +class CISReportGenerator(BaseComplianceReportGenerator): + """ + PDF report generator for CIS (Center for Internet Security) Benchmarks. + + CIS differs from single-version frameworks (ENS, NIS2, CSA) in that: + - Each provider has multiple CIS versions (e.g. AWS: 1.4, 1.5, ..., 6.0). + - Section names differ across versions and providers and MUST be derived + at runtime from the loaded compliance data. + - Requirements carry Profile (Level 1/Level 2) and AssessmentStatus + (Automated/Manual) attributes that drive the executive summary and + charts. + + This generator produces: + - Cover page with Prowler logo and dynamic CIS version/provider metadata + - Executive summary with overall compliance score, counts, and breakdowns + by Profile and AssessmentStatus + - Charts: overall status pie, pass rate by section (horizontal bar), + Level 1 vs Level 2 pass/fail distribution (stacked bar) + - Requirements index grouped by dynamic section + - Detailed findings for FAIL requirements with CIS-specific audit / + remediation / rationale details + """ + + # Per-run memoization cache for ``_compute_statistics``. ``generate()`` + # is the public entry point and is called once per PDF, so scoping the + # cache to the last seen ComplianceData instance is enough to avoid the + # double computation between executive summary and charts section. + _stats_cache_key: int | None = None + _stats_cache_value: dict | None = None + + # Body section ordering β€” ensure every top-level section starts on its + # own clean page. The base class only puts a PageBreak AFTER Charts and + # Requirements Index, so Executive Summary and Charts end up sharing a + # page. This override prepends a PageBreak so Compliance Analysis always + # begins on a fresh page. + def _build_body_sections(self, data: ComplianceData) -> list: + return [PageBreak(), *super()._build_body_sections(data)] + + # ------------------------------------------------------------------------- + # Cover page override β€” shows dynamic CIS version + provider in the title + # ------------------------------------------------------------------------- + + def create_cover_page(self, data: ComplianceData) -> list: + """Create the CIS report cover page with Prowler + CIS logos side by side.""" + elements = [] + + # Create logos side by side (same pattern as NIS2 / ENS) + prowler_logo_path = os.path.join( + os.path.dirname(__file__), "../../assets/img/prowler_logo.png" + ) + cis_logo_path = os.path.join( + os.path.dirname(__file__), "../../assets/img/cis_logo.png" + ) + + if os.path.exists(cis_logo_path): + prowler_logo = Image(prowler_logo_path, width=3.5 * inch, height=0.7 * inch) + cis_logo = Image(cis_logo_path, width=2.3 * inch, height=1.1 * inch) + logos_table = Table( + [[prowler_logo, cis_logo]], colWidths=[4 * inch, 2.5 * inch] + ) + logos_table.setStyle( + TableStyle( + [ + ("ALIGN", (0, 0), (0, 0), "LEFT"), + ("ALIGN", (1, 0), (1, 0), "RIGHT"), + ("VALIGN", (0, 0), (0, 0), "MIDDLE"), + ("VALIGN", (1, 0), (1, 0), "MIDDLE"), + ] + ) + ) + elements.append(logos_table) + elif os.path.exists(prowler_logo_path): + # Fallback: only the Prowler logo if the CIS asset is missing + elements.append(Image(prowler_logo_path, width=5 * inch, height=1 * inch)) + + elements.append(Spacer(1, 0.5 * inch)) + + # Dynamic title: "CIS Benchmark v5.0 β€” AWS Compliance Report" + provider_label = "" + if data.provider_obj: + provider_label = f" β€” {data.provider_obj.provider.upper()}" + title_text = ( + f"CIS Benchmark v{data.version}{provider_label}
Compliance Report" + ) + elements.append(Paragraph(title_text, self.styles["title"])) + elements.append(Spacer(1, 0.5 * inch)) + + # Metadata table via base class helper + info_rows = self._build_info_rows(data, language=self.config.language) + metadata_data = [] + for label, value in info_rows: + if label in ("Name:", "Description:") and value: + metadata_data.append( + [label, Paragraph(str(value), self.styles["normal_center"])] + ) + else: + metadata_data.append([label, value]) + + metadata_table = Table(metadata_data, colWidths=[2 * inch, 4 * inch]) + metadata_table.setStyle( + TableStyle( + [ + ("BACKGROUND", (0, 0), (0, -1), COLOR_BLUE), + ("TEXTCOLOR", (0, 0), (0, -1), COLOR_WHITE), + ("FONTNAME", (0, 0), (0, -1), "FiraCode"), + ("BACKGROUND", (1, 0), (1, -1), COLOR_BG_BLUE), + ("TEXTCOLOR", (1, 0), (1, -1), COLOR_GRAY), + ("FONTNAME", (1, 0), (1, -1), "PlusJakartaSans"), + ("ALIGN", (0, 0), (-1, -1), "LEFT"), + ("VALIGN", (0, 0), (-1, -1), "TOP"), + ("FONTSIZE", (0, 0), (-1, -1), 11), + ("GRID", (0, 0), (-1, -1), 1, COLOR_BORDER_GRAY), + ("LEFTPADDING", (0, 0), (-1, -1), 10), + ("RIGHTPADDING", (0, 0), (-1, -1), 10), + ("TOPPADDING", (0, 0), (-1, -1), 8), + ("BOTTOMPADDING", (0, 0), (-1, -1), 8), + ] + ) + ) + elements.append(metadata_table) + + return elements + + # ------------------------------------------------------------------------- + # Executive Summary + # ------------------------------------------------------------------------- + + def create_executive_summary(self, data: ComplianceData) -> list: + """Create the CIS executive summary section.""" + elements = [] + + elements.append(Paragraph("Executive Summary", self.styles["h1"])) + elements.append(Spacer(1, 0.1 * inch)) + + stats = self._compute_statistics(data) + + # --- Summary metrics table --- + summary_data = [ + ["Metric", "Value"], + ["Total Requirements", str(stats["total"])], + ["Passed", str(stats["passed"])], + ["Failed", str(stats["failed"])], + ["Manual", str(stats["manual"])], + ["Overall Compliance", f"{stats['overall_compliance']:.1f}%"], + ] + summary_table = Table(summary_data, colWidths=[3 * inch, 2 * inch]) + summary_table.setStyle( + TableStyle( + [ + ("BACKGROUND", (0, 0), (-1, 0), COLOR_BLUE), + ("TEXTCOLOR", (0, 0), (-1, 0), COLOR_WHITE), + ("BACKGROUND", (0, 2), (0, 2), COLOR_SAFE), + ("TEXTCOLOR", (0, 2), (0, 2), COLOR_WHITE), + ("BACKGROUND", (0, 3), (0, 3), COLOR_HIGH_RISK), + ("TEXTCOLOR", (0, 3), (0, 3), COLOR_WHITE), + ("BACKGROUND", (0, 4), (0, 4), COLOR_DARK_GRAY), + ("TEXTCOLOR", (0, 4), (0, 4), COLOR_WHITE), + ("FONTNAME", (0, 0), (-1, 0), "PlusJakartaSans"), + ("FONTSIZE", (0, 0), (-1, 0), 12), + ("FONTSIZE", (0, 1), (-1, -1), 10), + ("ALIGN", (0, 0), (-1, -1), "CENTER"), + ("VALIGN", (0, 0), (-1, -1), "MIDDLE"), + ("GRID", (0, 0), (-1, -1), 0.5, COLOR_BORDER_GRAY), + ("BOTTOMPADDING", (0, 0), (-1, 0), 10), + ( + "ROWBACKGROUNDS", + (1, 1), + (1, -1), + [COLOR_WHITE, COLOR_BG_BLUE], + ), + ] + ) + ) + elements.append(summary_table) + elements.append(Spacer(1, 0.25 * inch)) + + # --- Profile breakdown table --- + elements.append(Paragraph("Breakdown by Profile", self.styles["h2"])) + elements.append(Spacer(1, 0.1 * inch)) + profile_counts = stats["profile_counts"] + profile_table_data = [["Profile", "Passed", "Failed", "Manual", "Total"]] + for bucket in _PROFILE_BUCKET_ORDER: + counts = profile_counts.get(bucket, {"passed": 0, "failed": 0, "manual": 0}) + total = counts["passed"] + counts["failed"] + counts["manual"] + if total == 0: + continue + profile_table_data.append( + [ + _profile_badge_text(bucket), + str(counts["passed"]), + str(counts["failed"]), + str(counts["manual"]), + str(total), + ] + ) + profile_table = Table( + profile_table_data, + colWidths=[1.5 * inch, 1 * inch, 1 * inch, 1 * inch, 1 * inch], + ) + profile_table.setStyle( + TableStyle( + [ + ("BACKGROUND", (0, 0), (-1, 0), COLOR_BLUE), + ("TEXTCOLOR", (0, 0), (-1, 0), COLOR_WHITE), + ("FONTNAME", (0, 0), (-1, 0), "FiraCode"), + ("FONTSIZE", (0, 0), (-1, 0), 10), + ("ALIGN", (0, 0), (-1, -1), "CENTER"), + ("VALIGN", (0, 0), (-1, -1), "MIDDLE"), + ("FONTSIZE", (0, 1), (-1, -1), 9), + ("GRID", (0, 0), (-1, -1), 0.5, COLOR_GRID_GRAY), + ( + "ROWBACKGROUNDS", + (0, 1), + (-1, -1), + [COLOR_WHITE, COLOR_BG_BLUE], + ), + ] + ) + ) + elements.append(profile_table) + elements.append(Spacer(1, 0.25 * inch)) + + # --- Assessment status breakdown --- + elements.append(Paragraph("Breakdown by Assessment Status", self.styles["h2"])) + elements.append(Spacer(1, 0.1 * inch)) + assessment_counts = stats["assessment_counts"] + assessment_table_data = [["Assessment", "Passed", "Failed", "Manual", "Total"]] + for bucket in _ASSESSMENT_BUCKET_ORDER: + counts = assessment_counts.get( + bucket, {"passed": 0, "failed": 0, "manual": 0} + ) + total = counts["passed"] + counts["failed"] + counts["manual"] + if total == 0: + continue + assessment_table_data.append( + [ + bucket, + str(counts["passed"]), + str(counts["failed"]), + str(counts["manual"]), + str(total), + ] + ) + assessment_table = Table( + assessment_table_data, + colWidths=[1.5 * inch, 1 * inch, 1 * inch, 1 * inch, 1 * inch], + ) + assessment_table.setStyle( + TableStyle( + [ + ("BACKGROUND", (0, 0), (-1, 0), COLOR_LIGHT_BLUE), + ("TEXTCOLOR", (0, 0), (-1, 0), COLOR_WHITE), + ("FONTNAME", (0, 0), (-1, 0), "FiraCode"), + ("FONTSIZE", (0, 0), (-1, 0), 10), + ("ALIGN", (0, 0), (-1, -1), "CENTER"), + ("VALIGN", (0, 0), (-1, -1), "MIDDLE"), + ("FONTSIZE", (0, 1), (-1, -1), 9), + ("GRID", (0, 0), (-1, -1), 0.5, COLOR_GRID_GRAY), + ( + "ROWBACKGROUNDS", + (0, 1), + (-1, -1), + [COLOR_WHITE, COLOR_BG_BLUE], + ), + ] + ) + ) + elements.append(assessment_table) + elements.append(Spacer(1, 0.25 * inch)) + + # --- Top 5 failing sections --- + top_failing = stats["top_failing_sections"] + if top_failing: + elements.append( + Paragraph("Top Sections with Lowest Compliance", self.styles["h2"]) + ) + elements.append(Spacer(1, 0.1 * inch)) + top_table_data = [["Section", "Passed", "Failed", "Compliance"]] + for section_label, section_stats in top_failing: + passed = section_stats["passed"] + failed = section_stats["failed"] + total = passed + failed + pct = (passed / total * 100) if total > 0 else 100 + top_table_data.append( + [ + truncate_text(section_label, 55), + str(passed), + str(failed), + f"{pct:.1f}%", + ] + ) + top_table = Table( + top_table_data, + colWidths=[3.5 * inch, 0.9 * inch, 0.9 * inch, 1.2 * inch], + ) + top_table.setStyle( + TableStyle( + [ + ("BACKGROUND", (0, 0), (-1, 0), COLOR_HIGH_RISK), + ("TEXTCOLOR", (0, 0), (-1, 0), COLOR_WHITE), + ("FONTNAME", (0, 0), (-1, 0), "FiraCode"), + ("FONTSIZE", (0, 0), (-1, 0), 10), + ("ALIGN", (0, 0), (-1, -1), "CENTER"), + ("VALIGN", (0, 0), (-1, -1), "MIDDLE"), + ("FONTSIZE", (0, 1), (-1, -1), 9), + ("GRID", (0, 0), (-1, -1), 0.5, COLOR_GRID_GRAY), + ( + "ROWBACKGROUNDS", + (0, 1), + (-1, -1), + [COLOR_WHITE, COLOR_BG_BLUE], + ), + ] + ) + ) + elements.append(top_table) + + return elements + + # ------------------------------------------------------------------------- + # Charts section + # ------------------------------------------------------------------------- + + def create_charts_section(self, data: ComplianceData) -> list: + """Create the CIS charts section.""" + elements = [] + + elements.append(Paragraph("Compliance Analysis", self.styles["h1"])) + elements.append(Spacer(1, 0.1 * inch)) + + # --- Pie chart: overall Pass / Fail / Manual --- + stats = self._compute_statistics(data) + pie_labels = [] + pie_values = [] + pie_colors = [] + if stats["passed"] > 0: + pie_labels.append(f"Pass ({stats['passed']})") + pie_values.append(stats["passed"]) + pie_colors.append(CHART_COLOR_GREEN_1) + if stats["failed"] > 0: + pie_labels.append(f"Fail ({stats['failed']})") + pie_values.append(stats["failed"]) + pie_colors.append(CHART_COLOR_RED) + if stats["manual"] > 0: + pie_labels.append(f"Manual ({stats['manual']})") + pie_values.append(stats["manual"]) + pie_colors.append(CHART_COLOR_YELLOW) + + if pie_values: + elements.append(Paragraph("Overall Status Distribution", self.styles["h2"])) + elements.append(Spacer(1, 0.1 * inch)) + pie_buffer = create_pie_chart( + labels=pie_labels, + values=pie_values, + colors=pie_colors, + ) + pie_buffer.seek(0) + elements.append(Image(pie_buffer, width=4.5 * inch, height=4.5 * inch)) + elements.append(Spacer(1, 0.2 * inch)) + + # --- Horizontal bar: pass rate by section --- + section_stats = stats["section_stats"] + if section_stats: + elements.append(PageBreak()) + elements.append(Paragraph("Compliance by Section", self.styles["h1"])) + elements.append(Spacer(1, 0.1 * inch)) + elements.append( + Paragraph( + "The following chart shows compliance percentage for each CIS " + "section based on automated checks:", + self.styles["normal_center"], + ) + ) + elements.append(Spacer(1, 0.1 * inch)) + + # Sort sections by pass rate descending for readability + sorted_sections = sorted( + section_stats.items(), + key=lambda item: ( + (item[1]["passed"] / (item[1]["passed"] + item[1]["failed"]) * 100) + if (item[1]["passed"] + item[1]["failed"]) > 0 + else 100 + ), + reverse=True, + ) + bar_labels = [] + bar_values = [] + for section_label, section_data in sorted_sections: + total = section_data["passed"] + section_data["failed"] + if total == 0: + continue + pct = (section_data["passed"] / total) * 100 + bar_labels.append(truncate_text(section_label, 60)) + bar_values.append(pct) + + if bar_values: + bar_buffer = create_horizontal_bar_chart( + labels=bar_labels, + values=bar_values, + xlabel="Compliance (%)", + color_func=get_chart_color_for_percentage, + label_fontsize=9, + ) + bar_buffer.seek(0) + elements.append(Image(bar_buffer, width=6.5 * inch, height=5 * inch)) + + # --- Stacked bar: Level 1 vs Level 2 pass/fail --- + profile_counts = stats["profile_counts"] + has_profile_data = any( + (counts["passed"] + counts["failed"]) > 0 + for counts in profile_counts.values() + ) + if has_profile_data: + elements.append(PageBreak()) + elements.append(Paragraph("Profile Breakdown", self.styles["h1"])) + elements.append(Spacer(1, 0.1 * inch)) + elements.append( + Paragraph( + "Distribution of Pass / Fail / Manual across CIS profile levels.", + self.styles["normal_center"], + ) + ) + elements.append(Spacer(1, 0.1 * inch)) + + profile_labels = [] + pass_series = [] + fail_series = [] + manual_series = [] + for bucket in _PROFILE_BUCKET_ORDER: + counts = profile_counts.get(bucket) + if not counts: + continue + total = counts["passed"] + counts["failed"] + counts["manual"] + if total == 0: + continue + profile_labels.append(_profile_badge_text(bucket)) + pass_series.append(counts["passed"]) + fail_series.append(counts["failed"]) + manual_series.append(counts["manual"]) + + if profile_labels: + stacked_buffer = create_stacked_bar_chart( + labels=profile_labels, + data_series={ + "Pass": pass_series, + "Fail": fail_series, + "Manual": manual_series, + }, + xlabel="Profile", + ylabel="Requirements", + ) + stacked_buffer.seek(0) + elements.append(Image(stacked_buffer, width=6 * inch, height=4 * inch)) + + return elements + + # ------------------------------------------------------------------------- + # Requirements Index + # ------------------------------------------------------------------------- + + def create_requirements_index(self, data: ComplianceData) -> list: + """Create the CIS requirements index grouped by dynamic section.""" + elements = [] + + elements.append(Paragraph("Requirements Index", self.styles["h1"])) + elements.append(Spacer(1, 0.1 * inch)) + + sections = self._derive_sections(data) + by_section: dict[str, list[dict]] = defaultdict(list) + for req in data.requirements: + meta = get_requirement_metadata(req.id, data.attributes_by_requirement_id) + section = "Other" + profile_bucket = "Other" + assessment = "" + if meta: + section = getattr(meta, "Section", "Other") or "Other" + profile_bucket = _normalize_profile(getattr(meta, "Profile", None)) + assessment_enum = getattr(meta, "AssessmentStatus", None) + assessment = getattr(assessment_enum, "value", None) or str( + assessment_enum or "" + ) + by_section[section].append( + { + "id": req.id, + "description": truncate_text(req.description, 80), + "profile": _profile_badge_text(profile_bucket), + "assessment": assessment or "-", + "status": (req.status or "").upper(), + } + ) + + columns = [ + ColumnConfig("ID", 0.9 * inch, "id", align="LEFT"), + ColumnConfig("Description", 3.0 * inch, "description", align="LEFT"), + ColumnConfig("Profile", 0.9 * inch, "profile"), + ColumnConfig("Assessment", 1 * inch, "assessment"), + ColumnConfig("Status", 0.9 * inch, "status"), + ] + + for section in sections: + rows = by_section.get(section, []) + if not rows: + continue + elements.append(Paragraph(truncate_text(section, 90), self.styles["h2"])) + elements.append(Spacer(1, 0.05 * inch)) + table = create_data_table( + data=rows, + columns=columns, + header_color=self.config.primary_color, + normal_style=self.styles["normal_center"], + ) + elements.append(table) + elements.append(Spacer(1, 0.15 * inch)) + + return elements + + # ------------------------------------------------------------------------- + # Detailed findings hook β€” inject CIS-specific rationale / audit content + # ------------------------------------------------------------------------- + + def _render_requirement_detail_extras( + self, req: RequirementData, data: ComplianceData + ) -> list: + """Render CIS rationale, impact, audit, remediation and references.""" + extras = [] + meta = get_requirement_metadata(req.id, data.attributes_by_requirement_id) + if meta is None: + return extras + + field_map = [ + ("Rationale", "RationaleStatement"), + ("Impact", "ImpactStatement"), + ("Audit Procedure", "AuditProcedure"), + ("Remediation", "RemediationProcedure"), + ("References", "References"), + ] + + for label, attr_name in field_map: + value = getattr(meta, attr_name, None) + if not value: + continue + text = str(value).strip() + if not text: + continue + extras.append(Paragraph(f"{label}:", self.styles["h3"])) + extras.append(Paragraph(escape_html(text), self.styles["normal"])) + extras.append(Spacer(1, 0.08 * inch)) + + return extras + + # ------------------------------------------------------------------------- + # Private helpers + # ------------------------------------------------------------------------- + + def _derive_sections(self, data: ComplianceData) -> list[str]: + """Extract ordered unique Section names from loaded compliance data.""" + seen: dict[str, bool] = {} + for req in data.requirements: + meta = get_requirement_metadata(req.id, data.attributes_by_requirement_id) + if meta is None: + continue + section = getattr(meta, "Section", None) or "Other" + if section not in seen: + seen[section] = True + return list(seen.keys()) + + def _compute_statistics(self, data: ComplianceData) -> dict: + """Aggregate all statistics needed for summary and charts. + + Memoized per-``ComplianceData`` instance via ``_stats_cache_*``: the + executive summary and the charts section both need the same numbers, + so they would otherwise re-iterate the requirements twice. We key on + ``id(data)`` because ``ComplianceData`` is a dataclass and its + instances are not hashable. + + Returns a dict with: + - total, passed, failed, manual: int + - overall_compliance: float (percentage) + - profile_counts: {"L1": {"passed", "failed", "manual"}, ...} + - assessment_counts: {"Automated": {...}, "Manual": {...}} + - section_stats: {section_name: {"passed", "failed", "manual"}, ...} + - top_failing_sections: list[(section_name, stats)] (up to 5) + """ + cache_key = id(data) + if self._stats_cache_key == cache_key and self._stats_cache_value is not None: + return self._stats_cache_value + stats = self._compute_statistics_uncached(data) + self._stats_cache_key = cache_key + self._stats_cache_value = stats + return stats + + def _compute_statistics_uncached(self, data: ComplianceData) -> dict: + """Actual aggregation kernel; call ``_compute_statistics`` instead.""" + total = len(data.requirements) + passed = sum(1 for r in data.requirements if r.status == StatusChoices.PASS) + failed = sum(1 for r in data.requirements if r.status == StatusChoices.FAIL) + manual = sum(1 for r in data.requirements if r.status == StatusChoices.MANUAL) + + evaluated = passed + failed + overall_compliance = (passed / evaluated * 100) if evaluated > 0 else 100.0 + + profile_counts: dict[str, dict[str, int]] = { + "L1": {"passed": 0, "failed": 0, "manual": 0}, + "L2": {"passed": 0, "failed": 0, "manual": 0}, + "Other": {"passed": 0, "failed": 0, "manual": 0}, + } + assessment_counts: dict[str, dict[str, int]] = { + "Automated": {"passed": 0, "failed": 0, "manual": 0}, + "Manual": {"passed": 0, "failed": 0, "manual": 0}, + } + section_stats: dict[str, dict[str, int]] = defaultdict( + lambda: {"passed": 0, "failed": 0, "manual": 0} + ) + + for req in data.requirements: + meta = get_requirement_metadata(req.id, data.attributes_by_requirement_id) + if meta is None: + continue + + profile_bucket = _normalize_profile(getattr(meta, "Profile", None)) + assessment_enum = getattr(meta, "AssessmentStatus", None) + assessment_value = getattr(assessment_enum, "value", None) or str( + assessment_enum or "" + ) + assessment_bucket = ( + "Automated" if assessment_value == "Automated" else "Manual" + ) + section = getattr(meta, "Section", None) or "Other" + + status_key = { + StatusChoices.PASS: "passed", + StatusChoices.FAIL: "failed", + StatusChoices.MANUAL: "manual", + }.get(req.status) + if status_key is None: + continue + + profile_counts[profile_bucket][status_key] += 1 + assessment_counts[assessment_bucket][status_key] += 1 + section_stats[section][status_key] += 1 + + # Top 5 sections with lowest pass rate (only sections with evaluated reqs) + def _section_rate(item): + _, stats_ = item + evaluated_ = stats_["passed"] + stats_["failed"] + if evaluated_ == 0: + return 101 # sort evaluated=0 to the bottom + return stats_["passed"] / evaluated_ * 100 + + top_failing_sections = sorted( + ( + item + for item in section_stats.items() + if (item[1]["passed"] + item[1]["failed"]) > 0 + ), + key=_section_rate, + )[:5] + + return { + "total": total, + "passed": passed, + "failed": failed, + "manual": manual, + "overall_compliance": overall_compliance, + "profile_counts": profile_counts, + "assessment_counts": assessment_counts, + "section_stats": dict(section_stats), + "top_failing_sections": top_failing_sections, + } diff --git a/api/src/backend/tasks/jobs/reports/components.py b/api/src/backend/tasks/jobs/reports/components.py index 323c4547e64..049cc043d3c 100644 --- a/api/src/backend/tasks/jobs/reports/components.py +++ b/api/src/backend/tasks/jobs/reports/components.py @@ -26,6 +26,52 @@ ) +def truncate_text(text: str, max_len: int) -> str: + """Truncate ``text`` to ``max_len`` characters, appending an ellipsis if cut. + + Used by report generators that need to squeeze long descriptions, section + titles or finding titles into a fixed-width table cell. + + Args: + text: Source string. ``None`` and non-string values are treated as empty. + max_len: Maximum output length including the ellipsis. Values < 4 are + clamped so the result never grows beyond ``max_len``. + + Returns: + The original string if short enough, otherwise ``text[: max_len - 3] + "..."``. + When ``max_len < 4`` a plain substring of length ``max_len`` is returned + so callers never get a string longer than they asked for. + """ + if not text: + return "" + text = str(text) + if len(text) <= max_len: + return text + if max_len < 4: + return text[:max_len] + return text[: max_len - 3] + "..." + + +def escape_html(text: str) -> str: + """Escape the minimal HTML entities required for safe ReportLab Paragraph rendering. + + ReportLab's ``Paragraph`` parses a small HTML subset, so raw ``<``, ``>`` + and ``&`` in user-provided content (rationale, remediation, etc.) would + break layout or be interpreted as tags. This helper mirrors + ``html.escape`` but avoids pulling in the stdlib dependency and keeps the + output deterministic. + + Args: + text: Untrusted source string. + + Returns: + A string safe to embed inside a ReportLab Paragraph. + """ + return ( + str(text or "").replace("&", "&").replace("<", "<").replace(">", ">") + ) + + def get_color_for_risk_level(risk_level: int) -> colors.Color: """ Get color based on risk level. diff --git a/api/src/backend/tasks/jobs/reports/config.py b/api/src/backend/tasks/jobs/reports/config.py index fe0326980d4..669f31c2871 100644 --- a/api/src/backend/tasks/jobs/reports/config.py +++ b/api/src/backend/tasks/jobs/reports/config.py @@ -313,6 +313,32 @@ class FrameworkConfig: has_niveles=False, has_weight=False, ), + "cis": FrameworkConfig( + name="cis", + display_name="CIS Benchmark", + logo_filename=None, + primary_color=COLOR_BLUE, + secondary_color=COLOR_LIGHT_BLUE, + bg_color=COLOR_BG_BLUE, + attribute_fields=[ + "Section", + "SubSection", + "Profile", + "AssessmentStatus", + "Description", + "RationaleStatement", + "ImpactStatement", + "RemediationProcedure", + "AuditProcedure", + "References", + ], + sections=None, # Derived dynamically per CIS variant (section names differ across versions/providers) + language="en", + has_risk_levels=False, + has_dimensions=False, + has_niveles=False, + has_weight=False, + ), } @@ -336,5 +362,7 @@ def get_framework_config(compliance_id: str) -> FrameworkConfig | None: return FRAMEWORK_REGISTRY["nis2"] if "csa" in compliance_lower or "ccm" in compliance_lower: return FRAMEWORK_REGISTRY["csa_ccm"] + if compliance_lower.startswith("cis_") or "cis" in compliance_lower: + return FRAMEWORK_REGISTRY["cis"] return None diff --git a/api/src/backend/tasks/tasks.py b/api/src/backend/tasks/tasks.py index b4a8f91668f..e22eb2d14de 100644 --- a/api/src/backend/tasks/tasks.py +++ b/api/src/backend/tasks/tasks.py @@ -1000,13 +1000,17 @@ def jira_integration_task( @handle_provider_deletion def generate_compliance_reports_task(tenant_id: str, scan_id: str, provider_id: str): """ - Optimized task to generate ThreatScore, ENS, NIS2, and CSA CCM reports with shared queries. + Optimized task to generate ThreatScore, ENS, NIS2, CSA CCM and CIS reports with shared queries. This task is more efficient than running separate report tasks because it reuses database queries: - Provider object fetched once (instead of multiple times) - Requirement statistics aggregated once (instead of multiple times) - Can reduce database load by up to 50-70% + CIS emits a single PDF per run: the one matching the highest CIS version + available for the scan's provider, picked dynamically from + ``Compliance.get_bulk`` (no hard-coded provider β†’ version mapping). + Args: tenant_id (str): The tenant identifier. scan_id (str): The scan identifier. @@ -1023,6 +1027,7 @@ def generate_compliance_reports_task(tenant_id: str, scan_id: str, provider_id: generate_ens=True, generate_nis2=True, generate_csa=True, + generate_cis=True, ) diff --git a/api/src/backend/tasks/tests/test_attack_paths_scan.py b/api/src/backend/tasks/tests/test_attack_paths_scan.py index 283c0650e1d..986a2f5b2c5 100644 --- a/api/src/backend/tasks/tests/test_attack_paths_scan.py +++ b/api/src/backend/tasks/tests/test_attack_paths_scan.py @@ -1285,6 +1285,12 @@ def findings_generator(): config = SimpleNamespace(update_tag=12345) mock_session = MagicMock() + first_result = MagicMock() + first_result.single.return_value = {"merged_count": 1, "dropped_count": 0} + second_result = MagicMock() + second_result.single.return_value = {"merged_count": 0, "dropped_count": 1} + mock_session.run.side_effect = [first_result, second_result] + with ( patch( "tasks.jobs.attack_paths.findings.get_node_uid_field", @@ -1294,6 +1300,7 @@ def findings_generator(): "tasks.jobs.attack_paths.findings.get_provider_resource_label", return_value="_AWSResource", ), + patch("tasks.jobs.attack_paths.findings.logger") as mock_logger, ): findings_module.load_findings( mock_session, findings_generator(), provider, config @@ -1305,6 +1312,14 @@ def findings_generator(): assert params["last_updated"] == config.update_tag assert "findings_data" in params + summary_log = next( + call_args.args[0] + for call_args in mock_logger.info.call_args_list + if call_args.args and "Finished loading" in call_args.args[0] + ) + assert "edges_merged=1" in summary_log + assert "edges_dropped=1" in summary_log + def test_stream_findings_with_resources_returns_latest_scan_data( self, tenants_fixture, @@ -1484,11 +1499,12 @@ def test_enrich_batch_with_resources_single_resource( "default", ): result = findings_module._enrich_batch_with_resources( - [finding_dict], str(tenant.id) + [finding_dict], str(tenant.id), lambda uid: f"short:{uid}" ) assert len(result) == 1 assert result[0]["resource_uid"] == resource.uid + assert result[0]["resource_short_uid"] == f"short:{resource.uid}" assert result[0]["id"] == str(finding.id) assert result[0]["status"] == "FAIL" @@ -1572,7 +1588,7 @@ def test_enrich_batch_with_resources_multiple_resources( "default", ): result = findings_module._enrich_batch_with_resources( - [finding_dict], str(tenant.id) + [finding_dict], str(tenant.id), lambda uid: uid ) assert len(result) == 3 @@ -1646,7 +1662,7 @@ def test_enrich_batch_with_resources_no_resources_skips( patch("tasks.jobs.attack_paths.findings.logger") as mock_logger, ): result = findings_module._enrich_batch_with_resources( - [finding_dict], str(tenant.id) + [finding_dict], str(tenant.id), lambda uid: uid ) assert len(result) == 0 @@ -1693,6 +1709,63 @@ def empty_gen(): mock_session.run.assert_not_called() + @pytest.mark.parametrize( + "uid, expected", + [ + ( + "arn:aws:ec2:us-east-1:552455647653:instance/i-05075b63eb51baacb", + "i-05075b63eb51baacb", + ), + ( + "arn:aws:ec2:us-east-1:123456789012:volume/vol-0abcd1234ef567890", + "vol-0abcd1234ef567890", + ), + ( + "arn:aws:ec2:us-east-1:123456789012:security-group/sg-0123abcd", + "sg-0123abcd", + ), + ("arn:aws:s3:::my-bucket-name", "my-bucket-name"), + ("arn:aws:iam::123456789012:role/MyRole", "MyRole"), + ( + "arn:aws:lambda:us-east-1:123456789012:function:my-function", + "my-function", + ), + ("i-05075b63eb51baacb", "i-05075b63eb51baacb"), + ], + ) + def test_extract_short_uid_aws_variants(self, uid, expected): + from tasks.jobs.attack_paths.aws import extract_short_uid + + assert extract_short_uid(uid) == expected + + def test_insert_finding_template_has_short_id_fallback(self): + from tasks.jobs.attack_paths.queries import ( + INSERT_FINDING_TEMPLATE, + render_cypher_template, + ) + + rendered = render_cypher_template( + INSERT_FINDING_TEMPLATE, + { + "__NODE_UID_FIELD__": "arn", + "__RESOURCE_LABEL__": "_AWSResource", + }, + ) + + assert ( + "resource_by_uid:_AWSResource {arn: finding_data.resource_uid}" in rendered + ) + assert "resource_by_id:_AWSResource {id: finding_data.resource_uid}" in rendered + assert ( + "resource_by_short:_AWSResource {id: finding_data.resource_short_uid}" + in rendered + ) + assert "head(collect(resource_by_short)) AS resource_by_short" in rendered + assert ( + "COALESCE(resource_by_uid, resource_by_id, resource_by_short)" in rendered + ) + assert "RETURN merged_count, dropped_count" in rendered + class TestAddResourceLabel: def test_add_resource_label_applies_private_label(self): diff --git a/api/src/backend/tasks/tests/test_reports.py b/api/src/backend/tasks/tests/test_reports.py index 858f4c06ca0..a25df876f3b 100644 --- a/api/src/backend/tasks/tests/test_reports.py +++ b/api/src/backend/tasks/tests/test_reports.py @@ -4,7 +4,11 @@ import matplotlib import pytest from reportlab.lib import colors -from tasks.jobs.report import generate_compliance_reports, generate_threatscore_report +from tasks.jobs.report import ( + _pick_latest_cis_variant, + generate_compliance_reports, + generate_threatscore_report, +) from tasks.jobs.reports import ( CHART_COLOR_GREEN_1, CHART_COLOR_GREEN_2, @@ -422,6 +426,266 @@ def test_no_findings_returns_early_for_both_reports( mock_ens.assert_not_called() mock_nis2.assert_not_called() + @patch("tasks.jobs.report._upload_to_s3") + @patch("tasks.jobs.report.generate_cis_report") + def test_no_findings_returns_flat_cis_entry( + self, + mock_cis, + mock_upload, + tenants_fixture, + scans_fixture, + providers_fixture, + ): + """Scan with no findings and ``generate_cis=True`` must yield a flat + ``{"upload": False, "path": ""}`` entry, consistent with the other + frameworks (no nested dict, no sentinel keys).""" + tenant = tenants_fixture[0] + scan = scans_fixture[0] + provider = providers_fixture[0] + + result = generate_compliance_reports( + tenant_id=str(tenant.id), + scan_id=str(scan.id), + provider_id=str(provider.id), + generate_threatscore=False, + generate_ens=False, + generate_nis2=False, + generate_csa=False, + generate_cis=True, + ) + + assert result["cis"] == {"upload": False, "path": ""} + mock_cis.assert_not_called() + + +@pytest.mark.django_db +class TestGenerateComplianceReportsCIS: + """Test suite covering the CIS branch of generate_compliance_reports.""" + + def _force_scan_has_findings(self, monkeypatch): + """Bypass the ScanSummary.exists() early-return guard.""" + + class _FakeManager: + def filter(self, **kwargs): + class _Q: + def exists(self): + return True + + return _Q() + + monkeypatch.setattr("tasks.jobs.report.ScanSummary.objects", _FakeManager()) + + @patch("tasks.jobs.report._aggregate_requirement_statistics_from_database") + @patch("tasks.jobs.report._upload_to_s3") + @patch("tasks.jobs.report.generate_cis_report") + @patch("tasks.jobs.report.Compliance.get_bulk") + def test_cis_picks_latest_version( + self, + mock_get_bulk, + mock_cis, + mock_upload, + mock_stats, + monkeypatch, + tenants_fixture, + scans_fixture, + providers_fixture, + ): + """CIS branch should generate a single PDF for the highest version. + + The returned ``results["cis"]`` must have the same flat shape as the + other single-version frameworks (``{"upload", "path"}``) β€” the picked + variant is an internal detail and is not exposed in the result. + """ + tenant = tenants_fixture[0] + scan = scans_fixture[0] + provider = providers_fixture[0] + + self._force_scan_has_findings(monkeypatch) + + mock_stats.return_value = {} + # Multiple CIS variants + a non-CIS framework that must be ignored. + # Includes 1.10 to verify the selection is not lexicographic. + mock_get_bulk.return_value = { + "cis_1.4_aws": Mock(), + "cis_1.10_aws": Mock(), + "cis_2.0_aws": Mock(), + "cis_5.0_aws": Mock(), + "ens_rd2022_aws": Mock(), + } + mock_upload.return_value = "s3://bucket/path" + + result = generate_compliance_reports( + tenant_id=str(tenant.id), + scan_id=str(scan.id), + provider_id=str(provider.id), + generate_threatscore=False, + generate_ens=False, + generate_nis2=False, + generate_csa=False, + generate_cis=True, + ) + + # Exactly one call for the latest version, never for older variants + # or non-CIS frameworks. + assert mock_cis.call_count == 1 + assert mock_cis.call_args.kwargs["compliance_id"] == "cis_5.0_aws" + + assert result["cis"]["upload"] is True + assert result["cis"]["path"] == "s3://bucket/path" + assert "compliance_id" not in result["cis"] + + @patch("tasks.jobs.report._aggregate_requirement_statistics_from_database") + @patch("tasks.jobs.report._upload_to_s3") + @patch("tasks.jobs.report.generate_cis_report") + @patch("tasks.jobs.report.Compliance.get_bulk") + def test_cis_latest_variant_failure_captured_in_results( + self, + mock_get_bulk, + mock_cis, + mock_upload, + mock_stats, + monkeypatch, + tenants_fixture, + scans_fixture, + providers_fixture, + ): + """A failure in the latest CIS variant must be surfaced in the flat results entry.""" + tenant = tenants_fixture[0] + scan = scans_fixture[0] + provider = providers_fixture[0] + + self._force_scan_has_findings(monkeypatch) + + mock_stats.return_value = {} + mock_get_bulk.return_value = { + "cis_1.4_aws": Mock(), + "cis_5.0_aws": Mock(), + } + mock_cis.side_effect = RuntimeError("boom") + + result = generate_compliance_reports( + tenant_id=str(tenant.id), + scan_id=str(scan.id), + provider_id=str(provider.id), + generate_threatscore=False, + generate_ens=False, + generate_nis2=False, + generate_csa=False, + generate_cis=True, + ) + + # Only the latest variant is attempted; its failure lands in a flat + # entry keyed under "cis" with the same shape as sibling frameworks. + assert mock_cis.call_count == 1 + assert result["cis"]["upload"] is False + assert result["cis"]["error"] == "boom" + assert "compliance_id" not in result["cis"] + + @patch("tasks.jobs.report._aggregate_requirement_statistics_from_database") + @patch("tasks.jobs.report._upload_to_s3") + @patch("tasks.jobs.report.generate_cis_report") + @patch("tasks.jobs.report.Compliance.get_bulk") + def test_cis_provider_without_cis_skipped_cleanly( + self, + mock_get_bulk, + mock_cis, + mock_upload, + mock_stats, + monkeypatch, + tenants_fixture, + scans_fixture, + providers_fixture, + ): + """When ``Compliance.get_bulk`` returns no CIS entry the CIS branch + must skip cleanly and record a flat ``{"upload": False, "path": ""}`` + entry β€” no hard-coded provider whitelist is consulted.""" + tenant = tenants_fixture[0] + scan = scans_fixture[0] + provider = providers_fixture[0] + + self._force_scan_has_findings(monkeypatch) + mock_stats.return_value = {} + # No ``cis_*`` keys in the bulk β†’ no variant picked. + mock_get_bulk.return_value = {"ens_rd2022_aws": Mock()} + + result = generate_compliance_reports( + tenant_id=str(tenant.id), + scan_id=str(scan.id), + provider_id=str(provider.id), + generate_threatscore=False, + generate_ens=False, + generate_nis2=False, + generate_csa=False, + generate_cis=True, + ) + + assert result["cis"] == {"upload": False, "path": ""} + mock_cis.assert_not_called() + + +class TestPickLatestCisVariant: + """Unit tests for `_pick_latest_cis_variant` helper.""" + + def test_empty_returns_none(self): + assert _pick_latest_cis_variant([]) is None + + def test_single_variant(self): + assert _pick_latest_cis_variant(["cis_5.0_aws"]) == "cis_5.0_aws" + + def test_numeric_not_lexicographic(self): + """1.10 must beat 1.2 (lex sort would pick 1.2).""" + variants = ["cis_1.2_kubernetes", "cis_1.10_kubernetes"] + assert _pick_latest_cis_variant(variants) == "cis_1.10_kubernetes" + + def test_major_version_wins(self): + variants = ["cis_1.4_aws", "cis_2.0_aws", "cis_5.0_aws", "cis_6.0_aws"] + assert _pick_latest_cis_variant(variants) == "cis_6.0_aws" + + def test_minor_version_breaks_tie(self): + variants = ["cis_3.0_aws", "cis_3.1_aws", "cis_2.9_aws"] + assert _pick_latest_cis_variant(variants) == "cis_3.1_aws" + + def test_three_part_version(self): + """Versions like 3.0.1 must win over 3.0.""" + variants = ["cis_3.0_aws", "cis_3.0.1_aws"] + assert _pick_latest_cis_variant(variants) == "cis_3.0.1_aws" + + def test_malformed_names_ignored(self): + variants = ["notcis_1.0_aws", "cis_abc_aws", "cis_5.0_aws"] + assert _pick_latest_cis_variant(variants) == "cis_5.0_aws" + + def test_only_malformed_returns_none(self): + variants = ["notcis_1.0_aws", "cis_abc_aws"] + assert _pick_latest_cis_variant(variants) is None + + def test_multidigit_provider_name(self): + """Provider name with underscores (e.g. googleworkspace) must parse.""" + variants = ["cis_1.3_googleworkspace"] + assert _pick_latest_cis_variant(variants) == "cis_1.3_googleworkspace" + + def test_accepts_iterator(self): + """The helper must accept any iterable, not just lists.""" + + def _gen(): + yield "cis_1.4_aws" + yield "cis_5.0_aws" + + assert _pick_latest_cis_variant(_gen()) == "cis_5.0_aws" + + def test_rejects_single_integer_version(self): + """The regex requires at least one dotted component. ``cis_5_aws`` + without a minor version is malformed per the backend contract.""" + assert _pick_latest_cis_variant(["cis_5_aws"]) is None + + def test_rejects_trailing_dot(self): + """Inputs like ``cis_5._aws`` must be rejected at the regex stage + instead of silently normalising to ``(5, 0)``.""" + assert _pick_latest_cis_variant(["cis_5._aws", "cis_1.0_aws"]) == "cis_1.0_aws" + + def test_rejects_lone_dot_version(self): + """``cis_._aws`` has no numeric component and must be skipped.""" + assert _pick_latest_cis_variant(["cis_._aws", "cis_1.0_aws"]) == "cis_1.0_aws" + class TestOptimizationImprovements: """Test suite for optimization-related functionality.""" diff --git a/api/src/backend/tasks/tests/test_reports_cis.py b/api/src/backend/tasks/tests/test_reports_cis.py new file mode 100644 index 00000000000..2d4528c82d0 --- /dev/null +++ b/api/src/backend/tasks/tests/test_reports_cis.py @@ -0,0 +1,532 @@ +from unittest.mock import Mock, patch + +import pytest +from reportlab.platypus import Image, LongTable, Paragraph, Table +from tasks.jobs.reports import FRAMEWORK_REGISTRY, ComplianceData, RequirementData +from tasks.jobs.reports.cis import ( + CISReportGenerator, + _normalize_profile, + _profile_badge_text, +) + +from api.models import StatusChoices + +# ============================================================================= +# Fixtures +# ============================================================================= + + +@pytest.fixture +def cis_generator(): + """Create a CISReportGenerator instance for testing.""" + config = FRAMEWORK_REGISTRY["cis"] + return CISReportGenerator(config) + + +def _make_attr( + section: str, + profile_value: str = "Level 1", + assessment_value: str = "Automated", + sub_section: str = "", + **extras, +) -> Mock: + """Build a mock CIS_Requirement_Attribute with duck-typed fields.""" + attr = Mock() + attr.Section = section + attr.SubSection = sub_section + # CIS enums have `.value`. Use a simple Mock that exposes `.value`. + attr.Profile = Mock(value=profile_value) + attr.AssessmentStatus = Mock(value=assessment_value) + attr.Description = extras.get("description", "desc") + attr.RationaleStatement = extras.get("rationale", "the rationale") + attr.ImpactStatement = extras.get("impact", "the impact") + attr.RemediationProcedure = extras.get("remediation", "the remediation") + attr.AuditProcedure = extras.get("audit", "the audit") + attr.AdditionalInformation = "" + attr.DefaultValue = "" + attr.References = extras.get("references", "https://example.com") + return attr + + +@pytest.fixture +def basic_cis_compliance_data(): + """Create basic ComplianceData for CIS testing (no requirements).""" + return ComplianceData( + tenant_id="tenant-123", + scan_id="scan-456", + provider_id="provider-789", + compliance_id="cis_5.0_aws", + framework="CIS", + name="CIS Amazon Web Services Foundations Benchmark v5.0.0", + version="5.0", + description="Center for Internet Security AWS Foundations Benchmark", + ) + + +@pytest.fixture +def populated_cis_compliance_data(basic_cis_compliance_data): + """CIS data with mixed requirements across 2 sections, Profile L1/L2, Pass/Fail/Manual.""" + data = basic_cis_compliance_data + data.requirements = [ + RequirementData( + id="1.1", + description="Maintain current contact details", + status=StatusChoices.PASS, + passed_findings=5, + failed_findings=0, + total_findings=5, + checks=["aws_check_1"], + ), + RequirementData( + id="1.2", + description="Ensure root account has no access keys", + status=StatusChoices.FAIL, + passed_findings=0, + failed_findings=3, + total_findings=3, + checks=["aws_check_2"], + ), + RequirementData( + id="1.3", + description="Ensure MFA is enabled for all IAM users", + status=StatusChoices.MANUAL, + checks=[], + ), + RequirementData( + id="2.1", + description="Ensure S3 Buckets are logging", + status=StatusChoices.PASS, + passed_findings=2, + failed_findings=0, + total_findings=2, + checks=["aws_check_3"], + ), + RequirementData( + id="2.2", + description="Ensure encryption at rest is enabled", + status=StatusChoices.FAIL, + passed_findings=0, + failed_findings=4, + total_findings=4, + checks=["aws_check_4"], + ), + ] + data.attributes_by_requirement_id = { + "1.1": { + "attributes": { + "req_attributes": [ + _make_attr( + "1 Identity and Access Management", + profile_value="Level 1", + assessment_value="Automated", + ) + ], + "checks": ["aws_check_1"], + } + }, + "1.2": { + "attributes": { + "req_attributes": [ + _make_attr( + "1 Identity and Access Management", + profile_value="Level 1", + assessment_value="Automated", + ) + ], + "checks": ["aws_check_2"], + } + }, + "1.3": { + "attributes": { + "req_attributes": [ + _make_attr( + "1 Identity and Access Management", + profile_value="Level 2", + assessment_value="Manual", + ) + ], + "checks": [], + } + }, + "2.1": { + "attributes": { + "req_attributes": [ + _make_attr( + "2 Storage", + profile_value="Level 2", + assessment_value="Automated", + ) + ], + "checks": ["aws_check_3"], + } + }, + "2.2": { + "attributes": { + "req_attributes": [ + _make_attr( + "2 Storage", + profile_value="Level 1", + assessment_value="Automated", + ) + ], + "checks": ["aws_check_4"], + } + }, + } + return data + + +# ============================================================================= +# Helper function tests +# ============================================================================= + + +class TestNormalizeProfile: + """Test suite for _normalize_profile helper.""" + + def test_level_1_string(self): + assert _normalize_profile(Mock(value="Level 1")) == "L1" + + def test_level_2_string(self): + assert _normalize_profile(Mock(value="Level 2")) == "L2" + + def test_e3_level_1(self): + assert _normalize_profile(Mock(value="E3 Level 1")) == "L1" + + def test_e5_level_2(self): + assert _normalize_profile(Mock(value="E5 Level 2")) == "L2" + + def test_none_returns_other(self): + assert _normalize_profile(None) == "Other" + + def test_substring_trap_rejected(self): + """Unrelated tokens containing the literal ``L2`` must NOT map to L2.""" + # A future enum value like "CL2 Kubernetes Worker" would be silently + # misclassified by a naive substring check. + assert _normalize_profile(Mock(value="CL2 Worker")) == "Other" + assert _normalize_profile(Mock(value="HL2 Legacy")) == "Other" + + def test_raw_string_level_1(self): + # Mock without .value falls back to str(profile); use a real string + class NoValue: + def __str__(self): + return "Level 1" + + assert _normalize_profile(NoValue()) == "L1" + + def test_unknown_profile_returns_other(self): + assert _normalize_profile(Mock(value="Custom Profile")) == "Other" + + +class TestProfileBadgeText: + def test_l1_label(self): + assert _profile_badge_text("L1") == "Level 1" + + def test_l2_label(self): + assert _profile_badge_text("L2") == "Level 2" + + def test_other_label(self): + assert _profile_badge_text("Other") == "Other" + + +# ============================================================================= +# Generator initialization +# ============================================================================= + + +class TestCISGeneratorInitialization: + def test_generator_created(self, cis_generator): + assert cis_generator is not None + assert cis_generator.config.name == "cis" + + def test_generator_language(self, cis_generator): + assert cis_generator.config.language == "en" + + def test_generator_sections_dynamic(self, cis_generator): + # CIS sections differ per variant so config.sections MUST be None + assert cis_generator.config.sections is None + + def test_attribute_fields_contain_cis_specific(self, cis_generator): + for field in ("Profile", "AssessmentStatus", "RationaleStatement"): + assert field in cis_generator.config.attribute_fields + + +# ============================================================================= +# _derive_sections +# ============================================================================= + + +class TestDeriveSections: + def test_preserves_first_seen_order( + self, cis_generator, populated_cis_compliance_data + ): + sections = cis_generator._derive_sections(populated_cis_compliance_data) + assert sections == [ + "1 Identity and Access Management", + "2 Storage", + ] + + def test_deduplicates_sections(self, cis_generator, basic_cis_compliance_data): + basic_cis_compliance_data.requirements = [ + RequirementData(id="1.1", description="a", status=StatusChoices.PASS), + RequirementData(id="1.2", description="b", status=StatusChoices.PASS), + ] + attr = _make_attr("1 IAM") + basic_cis_compliance_data.attributes_by_requirement_id = { + "1.1": {"attributes": {"req_attributes": [attr], "checks": []}}, + "1.2": {"attributes": {"req_attributes": [attr], "checks": []}}, + } + assert cis_generator._derive_sections(basic_cis_compliance_data) == ["1 IAM"] + + def test_empty_data_returns_empty(self, cis_generator, basic_cis_compliance_data): + basic_cis_compliance_data.requirements = [] + basic_cis_compliance_data.attributes_by_requirement_id = {} + assert cis_generator._derive_sections(basic_cis_compliance_data) == [] + + +# ============================================================================= +# _compute_statistics +# ============================================================================= + + +class TestComputeStatistics: + def test_totals(self, cis_generator, populated_cis_compliance_data): + stats = cis_generator._compute_statistics(populated_cis_compliance_data) + assert stats["total"] == 5 + assert stats["passed"] == 2 + assert stats["failed"] == 2 + assert stats["manual"] == 1 + + def test_overall_compliance_excludes_manual( + self, cis_generator, populated_cis_compliance_data + ): + stats = cis_generator._compute_statistics(populated_cis_compliance_data) + # 2 passed / 4 evaluated (pass + fail) = 50% + assert stats["overall_compliance"] == pytest.approx(50.0) + + def test_overall_compliance_all_manual( + self, cis_generator, basic_cis_compliance_data + ): + basic_cis_compliance_data.requirements = [ + RequirementData(id="x", description="d", status=StatusChoices.MANUAL), + ] + attr = _make_attr("1 IAM", profile_value="Level 1", assessment_value="Manual") + basic_cis_compliance_data.attributes_by_requirement_id = { + "x": {"attributes": {"req_attributes": [attr], "checks": []}}, + } + stats = cis_generator._compute_statistics(basic_cis_compliance_data) + # No evaluated β†’ defaults to 100% + assert stats["overall_compliance"] == 100.0 + + def test_profile_counts(self, cis_generator, populated_cis_compliance_data): + stats = cis_generator._compute_statistics(populated_cis_compliance_data) + profile = stats["profile_counts"] + # From fixture: + # L1: 1.1 (PASS, Auto), 1.2 (FAIL, Auto), 2.2 (FAIL, Auto) β†’ pass=1, fail=2, manual=0 + # L2: 1.3 (MANUAL, Manual), 2.1 (PASS, Auto) β†’ pass=1, fail=0, manual=1 + assert profile["L1"] == {"passed": 1, "failed": 2, "manual": 0} + assert profile["L2"] == {"passed": 1, "failed": 0, "manual": 1} + + def test_assessment_counts(self, cis_generator, populated_cis_compliance_data): + stats = cis_generator._compute_statistics(populated_cis_compliance_data) + assessment = stats["assessment_counts"] + # Automated: 1.1 PASS, 1.2 FAIL, 2.1 PASS, 2.2 FAIL β†’ pass=2, fail=2, manual=0 + # Manual: 1.3 MANUAL β†’ pass=0, fail=0, manual=1 + assert assessment["Automated"] == {"passed": 2, "failed": 2, "manual": 0} + assert assessment["Manual"] == {"passed": 0, "failed": 0, "manual": 1} + + def test_top_failing_sections_includes_all_evaluated( + self, cis_generator, populated_cis_compliance_data + ): + stats = cis_generator._compute_statistics(populated_cis_compliance_data) + top = stats["top_failing_sections"] + # Both sections have 1 PASS + 1 FAIL evaluated β†’ tied at 50%. The + # sort is stable, so both must appear and both must be capped at + # 5 entries. + assert len(top) == 2 + section_names = {name for name, _ in top} + assert section_names == { + "1 Identity and Access Management", + "2 Storage", + } + + def test_compute_statistics_is_memoized( + self, cis_generator, populated_cis_compliance_data + ): + """Calling ``_compute_statistics`` twice with the same data must + reuse the cached value and not re-run the uncached kernel.""" + with patch.object( + CISReportGenerator, + "_compute_statistics_uncached", + wraps=cis_generator._compute_statistics_uncached, + ) as spy: + cis_generator._compute_statistics(populated_cis_compliance_data) + cis_generator._compute_statistics(populated_cis_compliance_data) + assert spy.call_count == 1 + + +# ============================================================================= +# Executive summary +# ============================================================================= + + +class TestCISExecutiveSummary: + def test_title_present(self, cis_generator, populated_cis_compliance_data): + elements = cis_generator.create_executive_summary(populated_cis_compliance_data) + paragraphs = [e for e in elements if isinstance(e, Paragraph)] + text = " ".join(str(p.text) for p in paragraphs) + assert "Executive Summary" in text + + def test_tables_rendered(self, cis_generator, populated_cis_compliance_data): + elements = cis_generator.create_executive_summary(populated_cis_compliance_data) + tables = [e for e in elements if isinstance(e, Table)] + # Exact count: Summary, Profile, Assessment, Top Failing Sections = 4. + assert len(tables) == 4 + + def test_no_requirements(self, cis_generator, basic_cis_compliance_data): + basic_cis_compliance_data.requirements = [] + basic_cis_compliance_data.attributes_by_requirement_id = {} + elements = cis_generator.create_executive_summary(basic_cis_compliance_data) + # With no requirements: Summary table always renders, and both Profile + # and Assessment breakdown tables render with a 0-filled default row, + # but Top Failing Sections is suppressed β†’ exactly 3 tables. + tables = [e for e in elements if isinstance(e, Table)] + assert len(tables) == 3 + + +# ============================================================================= +# Charts section +# ============================================================================= + + +class TestCISChartsSection: + def test_charts_rendered(self, cis_generator, populated_cis_compliance_data): + elements = cis_generator.create_charts_section(populated_cis_compliance_data) + # At least 1 image for the pie + 1 for section bar + 1 for stacked + images = [e for e in elements if isinstance(e, Image)] + assert len(images) >= 1 + + def test_charts_no_data_no_crash(self, cis_generator, basic_cis_compliance_data): + basic_cis_compliance_data.requirements = [] + basic_cis_compliance_data.attributes_by_requirement_id = {} + elements = cis_generator.create_charts_section(basic_cis_compliance_data) + # Must not raise; may or may not have any Image + assert isinstance(elements, list) + + +# ============================================================================= +# Requirements index +# ============================================================================= + + +class TestCISRequirementsIndex: + def test_title_present(self, cis_generator, populated_cis_compliance_data): + elements = cis_generator.create_requirements_index( + populated_cis_compliance_data + ) + paragraphs = [e for e in elements if isinstance(e, Paragraph)] + text = " ".join(str(p.text) for p in paragraphs) + assert "Requirements Index" in text + + def test_groups_by_section(self, cis_generator, populated_cis_compliance_data): + elements = cis_generator.create_requirements_index( + populated_cis_compliance_data + ) + paragraphs = [e for e in elements if isinstance(e, Paragraph)] + text = " ".join(str(p.text) for p in paragraphs) + assert "1 Identity and Access Management" in text + assert "2 Storage" in text + + def test_renders_tables_per_section( + self, cis_generator, populated_cis_compliance_data + ): + elements = cis_generator.create_requirements_index( + populated_cis_compliance_data + ) + # One table per section with requirements. ``create_data_table`` + # returns a LongTable when the row count exceeds its threshold and a + # plain Table otherwise β€” both are valid. + tables = [e for e in elements if isinstance(e, (Table, LongTable))] + assert len(tables) == 2 + + +# ============================================================================= +# Detailed findings extras hook +# ============================================================================= + + +class TestRenderRequirementDetailExtras: + def test_inserts_all_fields(self, cis_generator, populated_cis_compliance_data): + req = populated_cis_compliance_data.requirements[1] # 1.2 FAIL + extras = cis_generator._render_requirement_detail_extras( + req, populated_cis_compliance_data + ) + text = " ".join(str(p.text) for p in extras if isinstance(p, Paragraph)) + assert "Rationale" in text + assert "Impact" in text + assert "Audit Procedure" in text + assert "Remediation" in text + assert "References" in text + + def test_missing_metadata_returns_empty( + self, cis_generator, basic_cis_compliance_data + ): + basic_cis_compliance_data.attributes_by_requirement_id = {} + req = RequirementData(id="99", description="unknown", status=StatusChoices.FAIL) + extras = cis_generator._render_requirement_detail_extras( + req, basic_cis_compliance_data + ) + assert extras == [] + + def test_escapes_html_chars(self, cis_generator, basic_cis_compliance_data): + attr = _make_attr( + "1 IAM", + rationale="", + ) + basic_cis_compliance_data.attributes_by_requirement_id = { + "1.1": {"attributes": {"req_attributes": [attr], "checks": []}} + } + req = RequirementData(id="1.1", description="d", status=StatusChoices.FAIL) + extras = cis_generator._render_requirement_detail_extras( + req, basic_cis_compliance_data + ) + text = " ".join(str(p.text) for p in extras if isinstance(p, Paragraph)) + assert "