From 414199e3e053844d2577c51b89184f97844da301 Mon Sep 17 00:00:00 2001 From: Thomas Foutrein Date: Tue, 16 Jun 2026 18:16:49 +0200 Subject: [PATCH] Render inline tables in O(n) instead of O(n^2) InlineTable.as_string() recomputed, on every separator comma, two any(body[i + 1:]) tail scans (is there a following Null / a following key?) to decide whether to drop a dangling separator. On an inline table with many keys that is O(n^2). Precompute the needed facts in a single pass instead -- whether any explicit-comma whitespace is present, the index of the last real key, and the index of the last Null (deleted) element -- and turn the two per-comma scans into O(1) index comparisons. The render is now O(n). No behaviour change: the precomputed predicates are exactly equivalent to the tail scans. Full suite incl. toml-test conformance passes; round-trip output is byte-identical -- including edited inline tables (keys added/removed, trailing/leading commas, comments, dotted/nested) -- verified by a differential over ~7k generated inline tables with del/setitem mutations. ~4x faster rendering a 50-key inline table, ~40x at 800 keys. --- CHANGELOG.md | 1 + tests/test_items.py | 22 ++++++++++++++++++++++ tomlkit/items.py | 38 +++++++++++++++++++------------------- 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26584a1c..3b6606dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ - Speed up parsing of arrays that close right after a value (e.g. the `files = [...]` blocks that dominate lock files): the parser no longer attempts to read a value while sitting on the closing `]`, which previously built an `UnexpectedCharError` just to discard it — and constructing that exception eagerly computes a line/column by scanning the whole document, making it O(document size) per such array. ([#517](https://github.com/python-poetry/tomlkit/pull/517)) - Speed up parsing of multiline strings by bulk-appending the run of ordinary characters — across raw line feeds and tabs — up to the next delimiter, backslash, carriage return or control character, instead of one character at a time. This extends to `"""`/`'''` bodies the single-line fast path added in [#491](https://github.com/python-poetry/tomlkit/pull/491); a `\r` still stops the scan so `\r\n` stays validated and byte-for-byte preserved. ([#518](https://github.com/python-poetry/tomlkit/pull/518)) - Speed up `unwrap()` (converting a parsed document to a plain `dict`) by resolving each key directly from the container's key map instead of iterating the inherited `MutableMapping` view, which rebuilt a `SingleKey` from the bare string for every key just to re-look-up the value. Out-of-order tables still resolve through their proxy, so their validation is unchanged. ([#521](https://github.com/python-poetry/tomlkit/pull/521)) +- Speed up rendering (`as_string()` / `dumps()`) of inline tables with many keys by precomputing the last-key and last-deleted-element indices in a single pass, instead of rescanning the remaining body on every separator comma — turning an O(n²) render into O(n). ([#525](https://github.com/python-poetry/tomlkit/pull/525)) ### Fixed diff --git a/tests/test_items.py b/tests/test_items.py index b4653e72..6256fbd7 100644 --- a/tests/test_items.py +++ b/tests/test_items.py @@ -958,6 +958,28 @@ def test_deleting_inline_table_middle_element_does_not_leave_double_separator() assert parse(rendered).as_string() == rendered +def test_inline_table_render_after_edits() -> None: + # InlineTable.as_string() precomputes the last-key / last-Null indices in a + # single pass instead of rescanning the tail on every separator comma. + # Deleting keys (which leaves Null placeholders and dangling separators) is + # the path that exercises those lookups, so pin the exact rendered output. + def edited(src: str, *dels: str) -> str: + doc = parse(src) + for key in dels: + del doc["t"][key] + out = doc.as_string() + # whatever the spacing, the result must be valid and round-trip + assert ",," not in out and ", ," not in out + assert parse(out).as_string() == out + return out + + assert edited("t = {a = 1, b = 2, c = 3}", "c") == "t = {a = 1, b = 2 }" + assert edited("t = {a = 1, b = 2, c = 3}", "b") == "t = {a = 1, c = 3}" + assert edited("t = {a = 1, b = 2}", "b") == "t = {a = 1 }" + assert edited("t = {a = 1, b = 2}", "a") == "t = { b = 2}" + assert edited("t = {a = 1, b = 2, c = 3}", "b", "c") == "t = {a = 1 }" + + def test_adding_to_dotted_key_inside_inline_table() -> None: doc = parse("a = {b.c = 1}\n") doc["a"]["b"]["d"] = 2 diff --git a/tomlkit/items.py b/tomlkit/items.py index 6cbb29a9..50001c27 100644 --- a/tomlkit/items.py +++ b/tomlkit/items.py @@ -2055,18 +2055,21 @@ def as_string(self) -> str: buf = "{" emitted_key = False needs_separator = False - has_explicit_commas = any( - k is None and isinstance(v, Whitespace) and "," in v.s - for k, v in self._value.body - ) - last_item_idx = next( - ( - i - for i in range(len(self._value.body) - 1, -1, -1) - if self._value.body[i][0] is not None - ), - None, - ) + # Single pass over the body to precompute everything the render loop + # needs, instead of rescanning the tail on every separator comma (which + # was O(n^2) on large inline tables): whether any explicit-comma + # whitespace is present, the index of the last real key, and the index + # of the last Null (deleted) element. + has_explicit_commas = False + last_item_idx = None + last_null_idx = -1 + for _i, (_k, _v) in enumerate(self._value.body): + if _k is not None: + last_item_idx = _i + elif isinstance(_v, Whitespace) and "," in _v.s: + has_explicit_commas = True + if isinstance(_v, Null): + last_null_idx = _i pending_separator = False # Buffer position right after the last rendered value, used to place a # deferred separator comma after the value rather than after a trailing @@ -2079,13 +2082,10 @@ def as_string(self) -> str: buf += v.as_string().replace(",", "", 1) continue - has_following_null = any( - isinstance(next_v, Null) - for _, next_v in self._value.body[i + 1 :] - ) - has_following_key = any( - next_k is not None for next_k, _ in self._value.body[i + 1 :] - ) + # Equivalent to scanning body[i + 1 :] for a Null / a real + # key, but O(1) using the indices precomputed above. + has_following_null = last_null_idx > i + has_following_key = last_item_idx is not None and last_item_idx > i if has_following_null and not has_following_key: buf += v.as_string().replace(",", "", 1) continue