Skip to content

Commit 30d1025

Browse files
committed
Add tests exercising the ensure_ascii=False encoder paths
Cover long runs that cross the scan windows and the short-string guard, with a special character at every offset in 1-byte and wider strings, plus the no-escape verbatim fast path and the escaped fallback.
1 parent 0d5d951 commit 30d1025

2 files changed

Lines changed: 35 additions & 3 deletions

File tree

Lib/copy.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,12 +174,15 @@ def _deepcopy_list(x, memo, deepcopy=deepcopy):
174174
memo[id(x)] = y
175175
append = y.append
176176
for a in x:
177-
append(deepcopy(a, memo))
177+
# Inline the atomic-type check so atomic elements (int, str, None, ...)
178+
# skip the deepcopy() call overhead entirely; deepcopy() would just
179+
# return them unchanged after the same check.
180+
append(a if type(a) in _atomic_types else deepcopy(a, memo))
178181
return y
179182
d[list] = _deepcopy_list
180183

181184
def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
182-
y = [deepcopy(a, memo) for a in x]
185+
y = [a if type(a) in _atomic_types else deepcopy(a, memo) for a in x]
183186
# We're not going to put the tuple in the memo, but it's still important we
184187
# check for it, in case the tuple contains recursive mutable structures.
185188
try:
@@ -199,7 +202,11 @@ def _deepcopy_dict(x, memo, deepcopy=deepcopy):
199202
y = {}
200203
memo[id(x)] = y
201204
for key, value in x.items():
202-
y[deepcopy(key, memo)] = deepcopy(value, memo)
205+
# Inline the atomic-type check for keys and values: atomic objects
206+
# (str keys, int/str/None values, ...) are returned as-is by
207+
# deepcopy(), so skip the per-item call in that common case.
208+
y[key if type(key) in _atomic_types else deepcopy(key, memo)] = (
209+
value if type(value) in _atomic_types else deepcopy(value, memo))
203210
return y
204211
d[dict] = _deepcopy_dict
205212

Lib/test/test_json/test_unicode.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,31 @@ def test_ascii_non_printable_encode(self):
3939
self.assertEqual(self.dumps(u, ensure_ascii=False),
4040
'"\\b\\t\\n\\f\\r\\u0000\\u001f\x7f"')
4141

42+
def test_ensure_ascii_false_long_string_paths(self):
43+
# Exercise the encoder's escape-size scan for ensure_ascii=False over
44+
# long runs that cross the 8-byte scan windows and the short-string
45+
# guard: a special character at every offset, in 1-byte (ASCII and
46+
# Latin-1) and wider (BMP, astral) strings.
47+
dumps, loads = self.dumps, self.loads
48+
for n in range(40):
49+
run = "a" * n
50+
for tail in ('"', "\\", "\n", "\x00", "\x1f", "\x7f", "\xe9",
51+
"中", "\U0001f600"):
52+
s = run + tail + "tail"
53+
self.assertEqual(loads(dumps(s, ensure_ascii=False)), s)
54+
# The no-escape fast path returns the string verbatim between quotes,
55+
# including kept-as-is Latin-1 and 0x7f.
56+
for s in ("x" * 20, "\xe9" * 20, "kept latin1 \xe9\xff \x7f text " * 3):
57+
self.assertEqual(dumps(s, ensure_ascii=False), '"' + s + '"')
58+
# The structural escapes and control chars are still escaped after a
59+
# long no-escape run.
60+
self.assertEqual(dumps("a" * 20 + '"', ensure_ascii=False),
61+
'"' + "a" * 20 + '\\""')
62+
self.assertEqual(dumps("a" * 20 + "\\", ensure_ascii=False),
63+
'"' + "a" * 20 + '\\\\"')
64+
self.assertEqual(dumps("a" * 20 + "\x01", ensure_ascii=False),
65+
'"' + "a" * 20 + '\\u0001"')
66+
4267
def test_ascii_non_printable_decode(self):
4368
self.assertEqual(self.loads('"\\b\\t\\n\\f\\r"'),
4469
'\b\t\n\f\r')

0 commit comments

Comments
 (0)