Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions src/skillspector/llm_analyzer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,6 @@ class LLMFinding(BaseModel):
start_line: int = Field(description="Starting line number (>= 1)")
end_line: int | None = Field(default=None, description="Ending line number (optional)")
confidence: float = Field(default=0.5, description="Confidence score between 0.0 and 1.0")
explanation: str = Field(default="", description="Why this is a finding (2-3 sentences)")
remediation: str = Field(default="", description="Actionable steps to fix the issue")

@field_validator("start_line")
@classmethod
Expand Down Expand Up @@ -103,8 +101,6 @@ def to_finding(self, file: str) -> Finding:
file=file,
start_line=self.start_line,
end_line=self.end_line,
explanation=self.explanation,
remediation=self.remediation,
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@
Skill manifest context:
{manifest_section}

Use the rule IDs exactly as listed. Reference the L-prefixed line numbers
when reporting findings.
Use the rule IDs exactly as listed.

| Rule ID | Detection |
|---------|-----------|
Expand Down
3 changes: 1 addition & 2 deletions src/skillspector/nodes/analyzers/semantic_quality_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@
file-type scope matches the current file. If a category says "markdown and
manifest files only", do NOT report those findings for .py or .sh files.

Use the rule IDs exactly as listed. Reference the L-prefixed line numbers
when reporting findings.
Use the rule IDs exactly as listed.

| Rule ID | Category | Applies to |
|---------|----------|------------|
Expand Down
30 changes: 1 addition & 29 deletions src/skillspector/nodes/meta_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

import asyncio
import json
from typing import Literal

from pydantic import BaseModel, Field, field_validator

Expand Down Expand Up @@ -77,28 +76,14 @@ def _normalize_confidence(cls, v: object) -> float:
v = v / 100.0
return min(1.0, max(0.0, v))

intent: Literal["malicious", "negligent", "benign"] = Field(
description="Likely intent behind the finding"
)
impact: Literal["critical", "high", "medium", "low"] = Field(
description="Potential impact if exploited"
)
explanation: str = Field(default="", description="Why this is dangerous (2-3 sentences)")
remediation: str = Field(default="", description="How to fix the issue (actionable steps)")


class OverallAssessment(BaseModel):
"""Overall risk assessment for the analyzed file."""

risk_level: str = Field(description="Overall risk level: LOW, MEDIUM, HIGH, or CRITICAL")
summary: str = Field(description="Brief summary of findings")


class MetaAnalyzerResult(BaseModel):
"""Top-level structured response from the meta-analyzer LLM."""

findings: list[MetaAnalyzerFinding] = Field(default_factory=list)
overall_assessment: OverallAssessment | None = None

@field_validator("findings", mode="before")
@classmethod
Expand All @@ -112,17 +97,6 @@ def _parse_stringified_findings(cls, v: object) -> object:
return parsed if isinstance(parsed, list) else []
return v

@field_validator("overall_assessment", mode="before")
@classmethod
def _parse_stringified_assessment(cls, v: object) -> object:
"""LLMs sometimes return nested objects as JSON strings."""
if isinstance(v, str):
try:
return json.loads(v)
except (json.JSONDecodeError, TypeError):
return None
return v


# ---------------------------------------------------------------------------
# Prompt (no JSON format instructions — schema handles the structure)
Expand Down Expand Up @@ -163,9 +137,7 @@ def _parse_stringified_assessment(cls, v: object) -> object:

For each static analysis finding, evaluate:
1. Is this a true vulnerability or a false positive?
2. What is the likely intent (malicious, negligent, or benign)?
3. What is the potential impact if exploited?
4. Does the skill context make this more or less dangerous?
2. Does the skill context make this more or less dangerous?
(e.g., "cyanide" in a cooking skill = CRITICAL, in a chemistry education skill = maybe OK)

IMPORTANT: Include the start_line from each finding's Location field (the number
Expand Down
44 changes: 3 additions & 41 deletions tests/nodes/test_llm_analyzer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,8 +712,6 @@ def test_to_finding(self) -> None:
start_line=10,
end_line=12,
confidence=0.95,
explanation="Contains API key",
remediation="Use env vars",
)
finding = f.to_finding("config.py")
assert isinstance(finding, Finding)
Expand All @@ -722,8 +720,6 @@ def test_to_finding(self) -> None:
assert finding.start_line == 10
assert finding.end_line == 12
assert finding.confidence == 0.95
assert finding.explanation == "Contains API key"
assert finding.remediation == "Use env vars"

def test_model_dump(self) -> None:
f = LLMFinding(
Expand All @@ -736,7 +732,7 @@ def test_model_dump(self) -> None:
d = f.model_dump()
assert d["rule_id"] == "SEC-002"
assert d["severity"] == "MEDIUM"
assert d["explanation"] == ""
assert "explanation" not in d
assert d["end_line"] is None


Expand All @@ -748,8 +744,6 @@ def test_valid_finding(self) -> None:
pattern_id="E1",
is_vulnerability=True,
confidence=0.9,
intent="malicious",
impact="high",
explanation="Dangerous",
remediation="Fix it",
)
Expand All @@ -764,15 +758,11 @@ def test_confidence_is_clamped(self) -> None:
pattern_id="E1",
is_vulnerability=True,
confidence=1.5,
intent="malicious",
impact="high",
)
low = MetaAnalyzerFinding(
pattern_id="E1",
is_vulnerability=True,
confidence=-0.2,
intent="malicious",
impact="high",
)
assert high.confidence == 1.0
assert low.confidence == 0.0
Expand All @@ -783,20 +773,18 @@ def test_confidence_100_scale_normalized(self) -> None:
pattern_id="E1",
is_vulnerability=True,
confidence=100,
intent="malicious",
impact="high",
)
assert f.confidence == pytest.approx(1.0)

def test_confidence_75_scale_normalized(self) -> None:
f = MetaAnalyzerFinding(
pattern_id="E1", is_vulnerability=True, confidence=75, intent="malicious", impact="high"
pattern_id="E1", is_vulnerability=True, confidence=75
)
assert f.confidence == pytest.approx(0.75)

def test_confidence_negative_clamped(self) -> None:
f = MetaAnalyzerFinding(
pattern_id="E1", is_vulnerability=True, confidence=-5, intent="malicious", impact="high"
pattern_id="E1", is_vulnerability=True, confidence=-5
)
assert f.confidence == pytest.approx(0.0)

Expand All @@ -806,18 +794,6 @@ def test_confidence_validation(self) -> None:
pattern_id="E1",
is_vulnerability=True,
confidence="bad",
intent="malicious",
impact="high",
)

def test_intent_validation(self) -> None:
with pytest.raises(ValueError):
MetaAnalyzerFinding(
pattern_id="E1",
is_vulnerability=True,
confidence=0.5,
intent="unknown",
impact="high",
)

def test_empty_findings(self) -> None:
Expand All @@ -829,8 +805,6 @@ def test_start_line_optional(self) -> None:
pattern_id="E1",
is_vulnerability=True,
confidence=0.9,
intent="malicious",
impact="high",
)
assert f_no_line.start_line is None

Expand All @@ -839,8 +813,6 @@ def test_start_line_optional(self) -> None:
start_line=42,
is_vulnerability=True,
confidence=0.9,
intent="malicious",
impact="high",
)
assert f_with_line.start_line == 42

Expand All @@ -849,8 +821,6 @@ def test_model_dump(self) -> None:
pattern_id="E2",
is_vulnerability=True,
confidence=0.8,
intent="negligent",
impact="medium",
)
d = f.model_dump()
assert d["pattern_id"] == "E2"
Expand Down Expand Up @@ -1044,8 +1014,6 @@ def test_converts_pydantic_to_dicts(self) -> None:
pattern_id="E1",
is_vulnerability=True,
confidence=0.9,
intent="malicious",
impact="high",
explanation="Bad stuff",
),
]
Expand Down Expand Up @@ -1586,8 +1554,6 @@ def test_run_batches_calls_structured_llm_per_batch(self, mock_get_model: MagicM
pattern_id="E1",
is_vulnerability=True,
confidence=0.9,
intent="malicious",
impact="high",
)
],
)
Expand Down Expand Up @@ -1631,8 +1597,6 @@ async def test_arun_batches_calls_ainvoke_per_batch(self, mock_get_model: MagicM
pattern_id="E1",
is_vulnerability=True,
confidence=0.9,
intent="malicious",
impact="high",
)
],
)
Expand Down Expand Up @@ -1665,8 +1629,6 @@ async def test_arun_batches_results_compatible_with_apply_filter(
pattern_id="E1",
is_vulnerability=True,
confidence=0.9,
intent="malicious",
impact="high",
explanation="Dangerous",
remediation="Fix it",
)
Expand Down
7 changes: 5 additions & 2 deletions tests/nodes/test_semantic_quality_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,8 +494,11 @@ def _patched_init(self_inner, *args, **kwargs):
assert cred_finding.file == "scripts/helper.py"
assert cred_finding.start_line == 5
assert cred_finding.confidence == 0.95
assert cred_finding.explanation is not None
assert cred_finding.remediation is not None
# Discovery findings no longer carry explanation/remediation — the
# meta-analyzer is the authoritative enrichment stage, so these are
# populated downstream (or from pattern_defaults), not here.
assert cred_finding.explanation is None
assert cred_finding.remediation is None


class TestFixtureSafeSkill:
Expand Down
Loading