Skip to content

key insertion is linear in size of document #540

Description

@dimbleby

So inserting lots of keys is quadratic in the number of keys

#!/usr/bin/env python3

import gc
import time

import tomlkit


def insert_keys(n: int) -> float:
    doc = tomlkit.parse("[t]\n")
    table = doc["t"]

    gc.collect()
    gc.disable()
    try:
        start = time.perf_counter()
        for i in range(n):
            table[f"k{i}"] = i
        elapsed = time.perf_counter() - start
    finally:
        gc.enable()

    return elapsed


def main() -> None:
    print()
    print(f"{'N':>7} {'total (ms)':>12} {'us/key':>10} {'ratio':>7}")

    prev = None
    for n in (500, 1000, 2000, 4000, 8000):
        elapsed = insert_keys(n)
        total_ms = elapsed * 1e3
        us_per_key = elapsed / n * 1e6
        ratio = "" if prev is None else f"{elapsed / prev:6.2f}x"
        print(f"{n:>7} {total_ms:12.2f} {us_per_key:10.2f} {ratio:>7}")
        prev = elapsed


if __name__ == "__main__":
    main()

results

      N   total (ms)     us/key   ratio
    500        59.58     119.16
   1000       225.36     225.36   3.78x
   2000       983.92     491.96   4.37x
   4000      3569.23     892.31   3.63x
   8000     16247.27    2030.91   4.55x

in which each doubling of the number of keys causes the benchmark to take four times as long.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions