Skip to content

Commit 302c57a

Browse files
committed
refactor(gen-tests): replace hand-rolled Gherkin parser and fix stub indentation
- Replace 6 regexes + 3 brittle parsing functions with gherkin-official canonical parser (Cucumber project, actively maintained, single dep) - Fix textwrap.dedent bug causing Given/When/Then docstring lines to land at column 0 instead of 4-space indented in generated test stubs - Add gherkin-official>=39.0.0 to dev dependency group
1 parent ef9664c commit 302c57a

File tree

3 files changed

+119
-91
lines changed

3 files changed

+119
-91
lines changed

.opencode/skills/tdd/scripts/gen_test_stubs.py

Lines changed: 101 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818

1919
import re
2020
import sys
21-
import textwrap
2221
from dataclasses import dataclass
2322
from pathlib import Path
23+
from typing import Any
24+
25+
from gherkin import Parser as GherkinParser
2426

2527
PROJECT_ROOT = Path(__file__).resolve().parents[4]
2628
FEATURES_DIR = PROJECT_ROOT / "docs" / "features"
@@ -29,12 +31,6 @@
2931
FEATURE_STAGES = ("backlog", "in-progress", "completed")
3032

3133
ID_TAG_RE = re.compile(r"@id:([a-f0-9]{8})")
32-
DEPRECATED_TAG_RE = re.compile(r"@deprecated")
33-
EXAMPLE_RE = re.compile(r"^\s*Example:\s*(.+)$")
34-
GIVEN_RE = re.compile(r"^\s*Given\s+(.+)$")
35-
WHEN_RE = re.compile(r"^\s*When\s+(.+)$")
36-
THEN_RE = re.compile(r"^\s*Then\s+(.+)$")
37-
FEATURE_RE = re.compile(r"^\s*Feature:\s*(.+)$")
3834

3935
TEST_FUNC_RE = re.compile(r"^def (test_\w+)\(.*\)")
4036
TEST_ID_RE = re.compile(r"test_\w+_([a-f0-9]{8})\b")
@@ -90,97 +86,112 @@ def parse_feature_file(path: Path) -> FeatureFile | None:
9086
FeatureFile if valid, None if no Feature: line found.
9187
"""
9288
text = path.read_text(encoding="utf-8")
93-
lines = text.splitlines()
94-
95-
feature_name = ""
96-
for line in lines:
97-
m = FEATURE_RE.match(line)
98-
if m:
99-
feature_name = m.group(1).strip()
100-
break
101-
102-
if not feature_name:
89+
doc = GherkinParser().parse(text)
90+
feature: dict[str, Any] | None = doc.get("feature")
91+
if not feature or not feature.get("name"):
10392
return None
10493

10594
story_slug = path.stem
106-
examples = _parse_examples(lines, str(path))
95+
examples = _extract_examples(feature, str(path))
10796
return FeatureFile(
10897
path=path,
109-
feature_name=feature_name,
98+
feature_name=feature["name"],
11099
story_slug=story_slug,
111100
examples=examples,
112101
)
113102

114103

115-
def _parse_examples(lines: list[str], source_file: str) -> list[GherkinExample]:
116-
"""Extract all Example blocks from feature file lines.
104+
def _extract_examples(
105+
feature: dict[str, Any], source_file: str
106+
) -> list[GherkinExample]:
107+
"""Extract all Example blocks from a parsed Gherkin feature AST.
117108
118109
Args:
119-
lines: Lines of the .feature file.
120-
source_file: Path string for error reporting.
110+
feature: The 'feature' dict from gherkin-official Parser output.
111+
source_file: Path string for provenance tracking.
121112
122113
Returns:
123114
List of parsed GherkinExample objects.
124115
"""
125116
examples: list[GherkinExample] = []
126-
i = 0
127-
while i < len(lines):
128-
line = lines[i]
129-
id_match = ID_TAG_RE.search(line)
130-
if id_match:
131-
id_hex = id_match.group(1)
132-
deprecated = bool(DEPRECATED_TAG_RE.search(line))
133-
title, given, when, then, i = _parse_example_block(lines, i + 1)
134-
examples.append(
135-
GherkinExample(
136-
id_hex=id_hex,
137-
title=title,
138-
given=given,
139-
when=when,
140-
then=then,
141-
deprecated=deprecated,
142-
source_file=source_file,
143-
)
144-
)
145-
else:
146-
i += 1
117+
for child in feature.get("children", []):
118+
scenario: dict[str, Any] | None = child.get("scenario")
119+
if scenario is None:
120+
continue
121+
example = _scenario_to_example(scenario, source_file)
122+
if example is not None:
123+
examples.append(example)
147124
return examples
148125

149126

150-
def _parse_example_block(
151-
lines: list[str], start: int
152-
) -> tuple[str, str, str, str, int]:
153-
"""Parse the Example/Given/When/Then lines after an @id tag.
127+
def _scenario_to_example(
128+
scenario: dict[str, Any], source_file: str
129+
) -> GherkinExample | None:
130+
"""Convert a single parsed scenario dict to a GherkinExample.
131+
132+
Skips scenarios without an @id tag.
133+
134+
Args:
135+
scenario: A scenario dict from the Gherkin AST.
136+
source_file: Path string for provenance tracking.
137+
138+
Returns:
139+
GherkinExample if the scenario has an @id tag, None otherwise.
140+
"""
141+
tags = scenario.get("tags", [])
142+
id_hex = _extract_id_tag(tags)
143+
if id_hex is None:
144+
return None
145+
146+
deprecated = any(t["name"] == "@deprecated" for t in tags)
147+
given, when, then = _extract_steps(scenario.get("steps", []))
148+
return GherkinExample(
149+
id_hex=id_hex,
150+
title=scenario.get("name", ""),
151+
given=given,
152+
when=when,
153+
then=then,
154+
deprecated=deprecated,
155+
source_file=source_file,
156+
)
157+
158+
159+
def _extract_id_tag(tags: list[dict[str, Any]]) -> str | None:
160+
"""Find the @id:<hex> tag value from a list of AST tags.
161+
162+
Args:
163+
tags: List of tag dicts from the Gherkin AST.
164+
165+
Returns:
166+
The 8-char hex ID, or None if no @id tag is present.
167+
"""
168+
for tag in tags:
169+
m = ID_TAG_RE.search(tag.get("name", ""))
170+
if m:
171+
return m.group(1)
172+
return None
173+
174+
175+
def _extract_steps(steps: list[dict[str, Any]]) -> tuple[str, str, str]:
176+
"""Extract Given/When/Then text from parsed Gherkin steps.
154177
155178
Args:
156-
lines: All lines of the file.
157-
start: Line index to start parsing from.
179+
steps: List of step dicts from the Gherkin AST.
158180
159181
Returns:
160-
Tuple of (title, given, when, then, next_line_index).
182+
Tuple of (given, when, then) step text strings.
161183
"""
162-
title = given = when = then = ""
163-
i = start
164-
while i < len(lines):
165-
line = lines[i]
166-
if ID_TAG_RE.search(line):
167-
break
168-
m_example = EXAMPLE_RE.match(line)
169-
m_given = GIVEN_RE.match(line)
170-
m_when = WHEN_RE.match(line)
171-
m_then = THEN_RE.match(line)
172-
if m_example:
173-
title = m_example.group(1).strip()
174-
elif m_given:
175-
given = m_given.group(1).strip()
176-
elif m_when:
177-
when = m_when.group(1).strip()
178-
elif m_then:
179-
then = m_then.group(1).strip()
180-
i += 1
181-
break
182-
i += 1
183-
return title, given, when, then, i
184+
given = when = then = ""
185+
for step in steps:
186+
keyword_type = step.get("keywordType", "")
187+
text = step.get("text", "")
188+
if keyword_type == "Context":
189+
given = text
190+
elif keyword_type == "Action":
191+
when = text
192+
elif keyword_type == "Outcome":
193+
then = text
194+
return given, when, then
184195

185196

186197
def generate_stub(feature_slug: str, example: GherkinExample) -> str:
@@ -198,39 +209,39 @@ def generate_stub(feature_slug: str, example: GherkinExample) -> str:
198209
if example.deprecated:
199210
markers.append("@pytest.mark.deprecated")
200211

201-
docstring = _build_docstring(example)
202212
marker_lines = "\n".join(markers)
213+
docstring = _build_docstring(example)
203214

204-
return textwrap.dedent(f"""\
205-
{marker_lines}
206-
def {func_name}() -> None:
207-
{docstring}
208-
# Given
209-
210-
# When
211-
212-
# Then
213-
raise NotImplementedError
214-
""")
215+
lines = [
216+
marker_lines,
217+
f"def {func_name}() -> None:",
218+
*docstring,
219+
" # Given",
220+
"",
221+
" # When",
222+
"",
223+
" # Then",
224+
" raise NotImplementedError",
225+
]
226+
return "\n".join(lines) + "\n"
215227

216228

217-
def _build_docstring(example: GherkinExample) -> str:
218-
"""Build a properly indented docstring for a test stub.
229+
def _build_docstring(example: GherkinExample) -> list[str]:
230+
"""Build properly indented docstring lines for a test stub.
219231
220232
Args:
221233
example: The parsed Gherkin example.
222234
223235
Returns:
224-
Indented docstring block including triple quotes.
236+
List of indented lines (each with 4-space prefix) including triple quotes.
225237
"""
226-
lines = [
238+
return [
227239
' """',
228240
f" Given: {example.given}",
229241
f" When: {example.when}",
230242
f" Then: {example.then}",
231243
' """',
232244
]
233-
return "\n".join(lines)
234245

235246

236247
def generate_test_file(

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,5 +147,6 @@ gen-tests = "python .opencode/skills/tdd/scripts/gen_test_stubs.py"
147147

148148
[dependency-groups]
149149
dev = [
150+
"gherkin-official>=39.0.0",
150151
"safety>=3.7.0",
151152
]

uv.lock

Lines changed: 17 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)