Skip to content

Commit 27a63b9

Browse files
committed
Add tests exercising the ensure_ascii=False encoder paths
Cover long runs that cross the scan windows and the short-string guard, with a special character at every offset in 1-byte and wider strings, plus the no-escape verbatim fast path and the escaped fallback.
1 parent 0d5d951 commit 27a63b9

1 file changed

Lines changed: 25 additions & 0 deletions

File tree

Lib/test/test_json/test_unicode.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,31 @@ def test_ascii_non_printable_encode(self):
3939
self.assertEqual(self.dumps(u, ensure_ascii=False),
4040
'"\\b\\t\\n\\f\\r\\u0000\\u001f\x7f"')
4141

42+
def test_ensure_ascii_false_long_string_paths(self):
43+
# Exercise the encoder's escape-size scan for ensure_ascii=False over
44+
# long runs that cross the 8-byte scan windows and the short-string
45+
# guard: a special character at every offset, in 1-byte (ASCII and
46+
# Latin-1) and wider (BMP, astral) strings.
47+
dumps, loads = self.dumps, self.loads
48+
for n in range(40):
49+
run = "a" * n
50+
for tail in ('"', "\\", "\n", "\x00", "\x1f", "\x7f", "\xe9",
51+
"中", "\U0001f600"):
52+
s = run + tail + "tail"
53+
self.assertEqual(loads(dumps(s, ensure_ascii=False)), s)
54+
# The no-escape fast path returns the string verbatim between quotes,
55+
# including kept-as-is Latin-1 and 0x7f.
56+
for s in ("x" * 20, "\xe9" * 20, "kept latin1 \xe9\xff \x7f text " * 3):
57+
self.assertEqual(dumps(s, ensure_ascii=False), '"' + s + '"')
58+
# The structural escapes and control chars are still escaped after a
59+
# long no-escape run.
60+
self.assertEqual(dumps("a" * 20 + '"', ensure_ascii=False),
61+
'"' + "a" * 20 + '\\""')
62+
self.assertEqual(dumps("a" * 20 + "\\", ensure_ascii=False),
63+
'"' + "a" * 20 + '\\\\"')
64+
self.assertEqual(dumps("a" * 20 + "\x01", ensure_ascii=False),
65+
'"' + "a" * 20 + '\\u0001"')
66+
4267
def test_ascii_non_printable_decode(self):
4368
self.assertEqual(self.loads('"\\b\\t\\n\\f\\r"'),
4469
'\b\t\n\f\r')

0 commit comments

Comments
 (0)