|
| 1 | +"""Pin: the async cursor's NULL-rescue type-code resolution for |
| 2 | +``cursor.description`` yields cooperatively on large result sets |
| 3 | +instead of walking every row of an all-NULL column synchronously |
| 4 | +on the user loop. |
| 5 | +
|
| 6 | +PEP 249 §6.1.2 requires a real Type Object per column. When a |
| 7 | +column's row-0 tag is ``ValueType.NULL`` the resolver scans |
| 8 | +subsequent rows for the first non-NULL tag, falling back to the |
| 9 | +``UNKNOWN`` sentinel only when EVERY row at that column is NULL. |
| 10 | +A column that is NULL across the whole page (a LEFT JOIN |
| 11 | +right-side with no match, an always-NULL projection) is scanned |
| 12 | +end-to-end — O(n_null_cols × n_rows) of pure-Python iteration |
| 13 | +that ran BEFORE the first ``await`` in the result path, outside |
| 14 | +the cooperative-yield chain (wire read / drain / convert-rows). |
| 15 | +
|
| 16 | +The fix extracts the scan into an ``async def`` helper gated by |
| 17 | +``_LARGE_RESULT_ROW_THRESHOLD`` (small fetches keep the |
| 18 | +synchronous path, zero scheduler overhead) that yields |
| 19 | +``await asyncio.sleep(0)`` every ``_CONVERT_ROWS_YIELD_EVERY`` |
| 20 | +scanned inner-row steps. The resolved type-code list is |
| 21 | +byte-identical to the prior inline logic; only the all-NULL |
| 22 | +"every row is NULL" fallback walks the full count, and only that |
| 23 | +path benefits from the yields. The sync cursor surface is |
| 24 | +unchanged. |
| 25 | +""" |
| 26 | + |
| 27 | +from __future__ import annotations |
| 28 | + |
| 29 | +import asyncio |
| 30 | +import contextlib |
| 31 | +import time |
| 32 | + |
| 33 | +import pytest |
| 34 | + |
| 35 | +from dqlitedbapi.aio.cursor import _resolve_null_rescue_type_codes |
| 36 | +from dqlitedbapi.types import UNKNOWN as _UNKNOWN_TYPE |
| 37 | +from dqlitewire import ValueType |
| 38 | + |
| 39 | +pytestmark = pytest.mark.asyncio |
| 40 | + |
| 41 | + |
| 42 | +def _sync_reference( |
| 43 | + column_types: list[ValueType], |
| 44 | + row_types: list[list[ValueType]], |
| 45 | +) -> list[int | object]: |
| 46 | + """Byte-identical reference: the prior inline scan shape.""" |
| 47 | + type_codes: list[int | object] = [] |
| 48 | + for col_idx, c in enumerate(column_types): |
| 49 | + if c != ValueType.NULL: |
| 50 | + type_codes.append(int(c)) |
| 51 | + continue |
| 52 | + resolved: int | object = _UNKNOWN_TYPE |
| 53 | + for j in range(1, len(row_types)): |
| 54 | + if col_idx < len(row_types[j]): |
| 55 | + candidate = row_types[j][col_idx] |
| 56 | + if candidate != ValueType.NULL: |
| 57 | + resolved = int(candidate) |
| 58 | + break |
| 59 | + type_codes.append(resolved) |
| 60 | + return type_codes |
| 61 | + |
| 62 | + |
| 63 | +async def test_typed_row0_columns_resolve_directly() -> None: |
| 64 | + column_types = [ValueType.INTEGER, ValueType.TEXT] |
| 65 | + row_types = [[ValueType.INTEGER, ValueType.TEXT]] |
| 66 | + result = await _resolve_null_rescue_type_codes(column_types, row_types) |
| 67 | + assert result == [int(ValueType.INTEGER), int(ValueType.TEXT)] |
| 68 | + assert result == _sync_reference(column_types, row_types) |
| 69 | + |
| 70 | + |
| 71 | +async def test_null_first_row_rescued_from_later_row() -> None: |
| 72 | + # Column 0 is NULL in row 0 but INTEGER in row 2. |
| 73 | + column_types = [ValueType.NULL, ValueType.TEXT] |
| 74 | + row_types = [ |
| 75 | + [ValueType.NULL, ValueType.TEXT], |
| 76 | + [ValueType.NULL, ValueType.TEXT], |
| 77 | + [ValueType.INTEGER, ValueType.TEXT], |
| 78 | + ] |
| 79 | + result = await _resolve_null_rescue_type_codes(column_types, row_types) |
| 80 | + assert result[0] == int(ValueType.INTEGER) |
| 81 | + assert result == _sync_reference(column_types, row_types) |
| 82 | + |
| 83 | + |
| 84 | +async def test_all_null_column_falls_back_to_unknown() -> None: |
| 85 | + # Column 0 is NULL in EVERY row → UNKNOWN sentinel. |
| 86 | + column_types = [ValueType.NULL, ValueType.INTEGER] |
| 87 | + row_types = [[ValueType.NULL, ValueType.INTEGER] for _ in range(50)] |
| 88 | + result = await _resolve_null_rescue_type_codes(column_types, row_types) |
| 89 | + assert result[0] is _UNKNOWN_TYPE |
| 90 | + assert result[1] == int(ValueType.INTEGER) |
| 91 | + assert result == _sync_reference(column_types, row_types) |
| 92 | + |
| 93 | + |
| 94 | +async def test_ragged_rows_do_not_short_circuit() -> None: |
| 95 | + # Some rows are shorter than the column count; the scan must |
| 96 | + # skip them without breaking the all-row contract. |
| 97 | + column_types = [ValueType.NULL, ValueType.NULL] |
| 98 | + row_types = [ |
| 99 | + [ValueType.NULL], # ragged: only 1 col |
| 100 | + [ValueType.NULL, ValueType.NULL], |
| 101 | + [ValueType.NULL, ValueType.FLOAT], |
| 102 | + ] |
| 103 | + result = await _resolve_null_rescue_type_codes(column_types, row_types) |
| 104 | + assert result[0] is _UNKNOWN_TYPE # col 0 NULL everywhere |
| 105 | + assert result[1] == int(ValueType.FLOAT) # rescued from row 2 |
| 106 | + assert result == _sync_reference(column_types, row_types) |
| 107 | + |
| 108 | + |
| 109 | +async def test_large_all_null_column_yields_cooperatively() -> None: |
| 110 | + """A wide all-NULL fixture (200k rows, several all-NULL cols) |
| 111 | + must let a sibling coroutine make progress during the scan — |
| 112 | + the max inter-tick gap stays small. Pre-fix the scan ran |
| 113 | + synchronously with no yield, so the sibling never ticked |
| 114 | + until the entire O(cols × rows) walk finished. |
| 115 | + """ |
| 116 | + n_rows = 200_000 |
| 117 | + # 3 columns, all NULL in row 0 AND every subsequent row → each |
| 118 | + # triggers a full-row scan. |
| 119 | + column_types = [ValueType.NULL, ValueType.NULL, ValueType.NULL] |
| 120 | + row_types = [[ValueType.NULL, ValueType.NULL, ValueType.NULL] for _ in range(n_rows)] |
| 121 | + |
| 122 | + inter_tick_gaps: list[float] = [] |
| 123 | + stop = False |
| 124 | + last = time.perf_counter() |
| 125 | + |
| 126 | + async def _ticker() -> None: |
| 127 | + nonlocal last |
| 128 | + while not stop: |
| 129 | + await asyncio.sleep(0) |
| 130 | + now = time.perf_counter() |
| 131 | + inter_tick_gaps.append(now - last) |
| 132 | + last = now |
| 133 | + |
| 134 | + ticker = asyncio.create_task(_ticker()) |
| 135 | + try: |
| 136 | + last = time.perf_counter() |
| 137 | + result = await _resolve_null_rescue_type_codes(column_types, row_types) |
| 138 | + finally: |
| 139 | + stop = True |
| 140 | + await asyncio.sleep(0) |
| 141 | + ticker.cancel() |
| 142 | + with contextlib.suppress(asyncio.CancelledError): |
| 143 | + await ticker |
| 144 | + |
| 145 | + assert all(rc is _UNKNOWN_TYPE for rc in result) |
| 146 | + # Drop first/last samples (startup/shutdown); assert no gap |
| 147 | + # exceeds 200 ms. Pre-fix the single synchronous scan of |
| 148 | + # 600k cells blocked the ticker for the whole walk. |
| 149 | + if len(inter_tick_gaps) > 2: |
| 150 | + worst = max(inter_tick_gaps[1:-1]) |
| 151 | + assert worst < 0.200, ( |
| 152 | + f"description NULL-rescue scan pinned the loop for " |
| 153 | + f"{worst * 1000:.1f} ms; the per-step yield should cap " |
| 154 | + f"inter-tick gaps well under 200 ms." |
| 155 | + ) |
| 156 | + |
| 157 | + |
| 158 | +async def test_small_result_takes_synchronous_fast_path() -> None: |
| 159 | + """Below ``_LARGE_RESULT_ROW_THRESHOLD`` the resolver must NOT |
| 160 | + await ``asyncio.sleep(0)`` — small fetches pay no scheduler |
| 161 | + overhead. Instrument ``asyncio.sleep`` to confirm zero calls. |
| 162 | + """ |
| 163 | + from unittest.mock import patch |
| 164 | + |
| 165 | + column_types = [ValueType.NULL] |
| 166 | + row_types = [[ValueType.NULL] for _ in range(100)] # < threshold |
| 167 | + |
| 168 | + sleep_calls: list[float] = [] |
| 169 | + real_sleep = asyncio.sleep |
| 170 | + |
| 171 | + async def _tracking_sleep(delay: float, *a: object, **k: object) -> None: |
| 172 | + sleep_calls.append(delay) |
| 173 | + await real_sleep(delay, *a, **k) |
| 174 | + |
| 175 | + with patch.object(asyncio, "sleep", _tracking_sleep): |
| 176 | + result = await _resolve_null_rescue_type_codes(column_types, row_types) |
| 177 | + |
| 178 | + assert result[0] is _UNKNOWN_TYPE |
| 179 | + assert sleep_calls == [], f"small result should not yield; saw sleep calls: {sleep_calls!r}" |
0 commit comments