From af82daeb2a6edecd5383386b7bef42312d2298a5 Mon Sep 17 00:00:00 2001 From: Mikhail Golikov Date: Thu, 2 Jul 2026 23:20:57 +0100 Subject: [PATCH 1/2] Fix over-quoting of backslashes in table cells The gherkin library already resolves cell escaping per the Gherkin spec, so the extra backslash-doubling pass in Cell.from_dict double-quoted every backslash: a cell holding a single backslash reached the step as two. Drop the redundant _to_raw_string pass so cell values are delivered exactly as gherkin resolves them. Update the outline escaped-pipe expectations that encoded the old doubled output and add a datatable regression test. Fixes #769 --- CHANGES.rst | 1 + src/pytest_bdd/gherkin_parser.py | 6 +--- tests/datatable/test_datatable.py | 55 +++++++++++++++++++++++++++++++ tests/feature/test_outline.py | 5 +-- 4 files changed, 60 insertions(+), 7 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 442ba367c..860473996 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -30,6 +30,7 @@ Removed Fixed +++++ +* Backslashes in datatable and examples table cells are no longer over-quoted. A cell containing a single backslash now reaches the step as a single backslash, matching the Gherkin escaping rules. `#769 `_ * Made type annotations stronger and removed most of the ``typing.Any`` usages and ``# type: ignore`` annotations. `#658 `_ Security diff --git a/src/pytest_bdd/gherkin_parser.py b/src/pytest_bdd/gherkin_parser.py index 8a6e4abdd..3ea5b8f8b 100644 --- a/src/pytest_bdd/gherkin_parser.py +++ b/src/pytest_bdd/gherkin_parser.py @@ -83,7 +83,7 @@ class Cell: @classmethod def from_dict(cls, data: dict[str, Any]) -> Self: - return cls(location=Location.from_dict(data["location"]), value=_to_raw_string(data["value"])) + return cls(location=Location.from_dict(data["location"]), value=data["value"]) @dataclass @@ -304,10 +304,6 @@ def from_dict(cls, data: Mapping[str, Any]) -> Self: ) -def _to_raw_string(normal_string: str) -> str: - return normal_string.replace("\\", "\\\\") - - def get_gherkin_document(abs_filename: str, encoding: str = "utf-8") -> GherkinDocument: with open(abs_filename, encoding=encoding) as f: feature_file_text = f.read() diff --git a/tests/datatable/test_datatable.py b/tests/datatable/test_datatable.py index 606ec32ad..d1ad12002 100644 --- a/tests/datatable/test_datatable.py +++ b/tests/datatable/test_datatable.py @@ -214,6 +214,61 @@ def test_datatable(): result.assert_outcomes(passed=1) +def test_datatable_preserves_backslashes(pytester): + """Backslashes in a datatable cell must not be over-quoted (see issue #769). + + Gherkin already resolves cell escaping: "\\\\" is a single backslash, "\\|" is a + literal pipe, and a lone backslash stays a single backslash. The cell value must + reach the step exactly as the feature author wrote it. + """ + pytester.makefile( + ".feature", + backslash_datatable=textwrap.dedent( + r"""Feature: Backslashes in datatables + + Scenario: Backslashes are not over-quoted + Given a datatable with backslashes: + | single | double | escaped_pipe | path | + | \ | \\ | \| | C:\Users\John | + """ + ), + ) + pytester.makeconftest( + textwrap.dedent( + """\ + from pytest_bdd import given + from pytest_bdd.utils import dump_obj + + + @given("a datatable with backslashes:") + def _(datatable): + dump_obj(datatable) + + """ + ) + ) + pytester.makepyfile( + textwrap.dedent( + """\ + from pytest_bdd import scenario + + @scenario("backslash_datatable.feature", "Backslashes are not over-quoted") + def test_backslash_datatable(): + pass + """ + ) + ) + + result = pytester.runpytest("-s") + result.assert_outcomes(passed=1) + + datatable = collect_dumped_objects(result)[0] + assert datatable == [ + ["single", "double", "escaped_pipe", "path"], + ["\\", "\\", "|", r"C:\Users\John"], + ] + + def test_datatable_step_argument_is_reserved_and_cannot_be_used(pytester): pytester.makefile( ".feature", diff --git a/tests/feature/test_outline.py b/tests/feature/test_outline.py index ddeb5d562..e31937e4a 100644 --- a/tests/feature/test_outline.py +++ b/tests/feature/test_outline.py @@ -272,8 +272,9 @@ def _(string): r"bork |", r"bork||bork", r"|", - r"bork \\", - r"bork \\|", + # An escaped backslash "\\" in a cell is a single backslash, per the Gherkin spec. + "bork \\", + r"bork \|", ] From f62674d1cdc94fdc7a9dc6d1b81acc0dfc0007c3 Mon Sep 17 00:00:00 2001 From: Mikhail Golikov Date: Fri, 3 Jul 2026 10:33:46 +0100 Subject: [PATCH 2/2] Note the backslash migration in the changelog entry --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index ed52ea325..7ca202acc 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -30,7 +30,7 @@ Removed Fixed +++++ -* Backslashes in datatable and examples table cells are no longer over-quoted. A cell containing a single backslash now reaches the step as a single backslash, matching the Gherkin escaping rules. `#769 `_ +* Backslashes in datatable and examples table cells are no longer over-quoted. A cell containing a single backslash now reaches the step as a single backslash, matching the Gherkin escaping rules. If you compensated by doubling backslashes in feature files, undo that. `#769 `_ * Made type annotations stronger and removed most of the ``typing.Any`` usages and ``# type: ignore`` annotations. `#658 `_ * Empty docstrings are now correctly forwarded to step functions as an empty string instead of being silently dropped, which previously caused pytest to report a missing ``docstring`` fixture. `#809 `_