1818
1919import re
2020import sys
21- import textwrap
2221from dataclasses import dataclass
2322from pathlib import Path
23+ from typing import Any
24+
25+ from gherkin import Parser as GherkinParser
2426
2527PROJECT_ROOT = Path (__file__ ).resolve ().parents [4 ]
2628FEATURES_DIR = PROJECT_ROOT / "docs" / "features"
2931FEATURE_STAGES = ("backlog" , "in-progress" , "completed" )
3032
3133ID_TAG_RE = re .compile (r"@id:([a-f0-9]{8})" )
32- DEPRECATED_TAG_RE = re .compile (r"@deprecated" )
33- EXAMPLE_RE = re .compile (r"^\s*Example:\s*(.+)$" )
34- GIVEN_RE = re .compile (r"^\s*Given\s+(.+)$" )
35- WHEN_RE = re .compile (r"^\s*When\s+(.+)$" )
36- THEN_RE = re .compile (r"^\s*Then\s+(.+)$" )
37- FEATURE_RE = re .compile (r"^\s*Feature:\s*(.+)$" )
3834
3935TEST_FUNC_RE = re .compile (r"^def (test_\w+)\(.*\)" )
4036TEST_ID_RE = re .compile (r"test_\w+_([a-f0-9]{8})\b" )
@@ -90,97 +86,112 @@ def parse_feature_file(path: Path) -> FeatureFile | None:
9086 FeatureFile if valid, None if no Feature: line found.
9187 """
9288 text = path .read_text (encoding = "utf-8" )
93- lines = text .splitlines ()
94-
95- feature_name = ""
96- for line in lines :
97- m = FEATURE_RE .match (line )
98- if m :
99- feature_name = m .group (1 ).strip ()
100- break
101-
102- if not feature_name :
89+ doc = GherkinParser ().parse (text )
90+ feature : dict [str , Any ] | None = doc .get ("feature" )
91+ if not feature or not feature .get ("name" ):
10392 return None
10493
10594 story_slug = path .stem
106- examples = _parse_examples ( lines , str (path ))
95+ examples = _extract_examples ( feature , str (path ))
10796 return FeatureFile (
10897 path = path ,
109- feature_name = feature_name ,
98+ feature_name = feature [ "name" ] ,
11099 story_slug = story_slug ,
111100 examples = examples ,
112101 )
113102
114103
115- def _parse_examples (lines : list [str ], source_file : str ) -> list [GherkinExample ]:
116- """Extract all Example blocks from feature file lines.
104+ def _extract_examples (
105+ feature : dict [str , Any ], source_file : str
106+ ) -> list [GherkinExample ]:
107+ """Extract all Example blocks from a parsed Gherkin feature AST.
117108
118109 Args:
119- lines: Lines of the .feature file .
120- source_file: Path string for error reporting .
110+ feature: The 'feature' dict from gherkin-official Parser output .
111+ source_file: Path string for provenance tracking .
121112
122113 Returns:
123114 List of parsed GherkinExample objects.
124115 """
125116 examples : list [GherkinExample ] = []
126- i = 0
127- while i < len (lines ):
128- line = lines [i ]
129- id_match = ID_TAG_RE .search (line )
130- if id_match :
131- id_hex = id_match .group (1 )
132- deprecated = bool (DEPRECATED_TAG_RE .search (line ))
133- title , given , when , then , i = _parse_example_block (lines , i + 1 )
134- examples .append (
135- GherkinExample (
136- id_hex = id_hex ,
137- title = title ,
138- given = given ,
139- when = when ,
140- then = then ,
141- deprecated = deprecated ,
142- source_file = source_file ,
143- )
144- )
145- else :
146- i += 1
117+ for child in feature .get ("children" , []):
118+ scenario : dict [str , Any ] | None = child .get ("scenario" )
119+ if scenario is None :
120+ continue
121+ example = _scenario_to_example (scenario , source_file )
122+ if example is not None :
123+ examples .append (example )
147124 return examples
148125
149126
150- def _parse_example_block (
151- lines : list [str ], start : int
152- ) -> tuple [str , str , str , str , int ]:
153- """Parse the Example/Given/When/Then lines after an @id tag.
127+ def _scenario_to_example (
128+ scenario : dict [str , Any ], source_file : str
129+ ) -> GherkinExample | None :
130+ """Convert a single parsed scenario dict to a GherkinExample.
131+
132+ Skips scenarios without an @id tag.
133+
134+ Args:
135+ scenario: A scenario dict from the Gherkin AST.
136+ source_file: Path string for provenance tracking.
137+
138+ Returns:
139+ GherkinExample if the scenario has an @id tag, None otherwise.
140+ """
141+ tags = scenario .get ("tags" , [])
142+ id_hex = _extract_id_tag (tags )
143+ if id_hex is None :
144+ return None
145+
146+ deprecated = any (t ["name" ] == "@deprecated" for t in tags )
147+ given , when , then = _extract_steps (scenario .get ("steps" , []))
148+ return GherkinExample (
149+ id_hex = id_hex ,
150+ title = scenario .get ("name" , "" ),
151+ given = given ,
152+ when = when ,
153+ then = then ,
154+ deprecated = deprecated ,
155+ source_file = source_file ,
156+ )
157+
158+
159+ def _extract_id_tag (tags : list [dict [str , Any ]]) -> str | None :
160+ """Find the @id:<hex> tag value from a list of AST tags.
161+
162+ Args:
163+ tags: List of tag dicts from the Gherkin AST.
164+
165+ Returns:
166+ The 8-char hex ID, or None if no @id tag is present.
167+ """
168+ for tag in tags :
169+ m = ID_TAG_RE .search (tag .get ("name" , "" ))
170+ if m :
171+ return m .group (1 )
172+ return None
173+
174+
175+ def _extract_steps (steps : list [dict [str , Any ]]) -> tuple [str , str , str ]:
176+ """Extract Given/When/Then text from parsed Gherkin steps.
154177
155178 Args:
156- lines: All lines of the file.
157- start: Line index to start parsing from.
179+ steps: List of step dicts from the Gherkin AST.
158180
159181 Returns:
160- Tuple of (title, given, when, then, next_line_index) .
182+ Tuple of (given, when, then) step text strings .
161183 """
162- title = given = when = then = ""
163- i = start
164- while i < len (lines ):
165- line = lines [i ]
166- if ID_TAG_RE .search (line ):
167- break
168- m_example = EXAMPLE_RE .match (line )
169- m_given = GIVEN_RE .match (line )
170- m_when = WHEN_RE .match (line )
171- m_then = THEN_RE .match (line )
172- if m_example :
173- title = m_example .group (1 ).strip ()
174- elif m_given :
175- given = m_given .group (1 ).strip ()
176- elif m_when :
177- when = m_when .group (1 ).strip ()
178- elif m_then :
179- then = m_then .group (1 ).strip ()
180- i += 1
181- break
182- i += 1
183- return title , given , when , then , i
184+ given = when = then = ""
185+ for step in steps :
186+ keyword_type = step .get ("keywordType" , "" )
187+ text = step .get ("text" , "" )
188+ if keyword_type == "Context" :
189+ given = text
190+ elif keyword_type == "Action" :
191+ when = text
192+ elif keyword_type == "Outcome" :
193+ then = text
194+ return given , when , then
184195
185196
186197def generate_stub (feature_slug : str , example : GherkinExample ) -> str :
@@ -198,39 +209,39 @@ def generate_stub(feature_slug: str, example: GherkinExample) -> str:
198209 if example .deprecated :
199210 markers .append ("@pytest.mark.deprecated" )
200211
201- docstring = _build_docstring (example )
202212 marker_lines = "\n " .join (markers )
213+ docstring = _build_docstring (example )
203214
204- return textwrap .dedent (f"""\
205- { marker_lines }
206- def { func_name } () -> None:
207- { docstring }
208- # Given
209-
210- # When
211-
212- # Then
213- raise NotImplementedError
214- """ )
215+ lines = [
216+ marker_lines ,
217+ f"def { func_name } () -> None:" ,
218+ * docstring ,
219+ " # Given" ,
220+ "" ,
221+ " # When" ,
222+ "" ,
223+ " # Then" ,
224+ " raise NotImplementedError" ,
225+ ]
226+ return "\n " .join (lines ) + "\n "
215227
216228
217- def _build_docstring (example : GherkinExample ) -> str :
218- """Build a properly indented docstring for a test stub.
229+ def _build_docstring (example : GherkinExample ) -> list [ str ] :
230+ """Build properly indented docstring lines for a test stub.
219231
220232 Args:
221233 example: The parsed Gherkin example.
222234
223235 Returns:
224- Indented docstring block including triple quotes.
236+ List of indented lines (each with 4-space prefix) including triple quotes.
225237 """
226- lines = [
238+ return [
227239 ' """' ,
228240 f" Given: { example .given } " ,
229241 f" When: { example .when } " ,
230242 f" Then: { example .then } " ,
231243 ' """' ,
232244 ]
233- return "\n " .join (lines )
234245
235246
236247def generate_test_file (
0 commit comments