From 2d146013698eae3ccdb2fed45a8a149bba65117f Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 16 Apr 2026 11:31:32 +0000 Subject: [PATCH] fix: eliminate quadratic regex backtracking in SGR pattern, add AGENTS.md Restructure the SGR regex from `(;*(sgr)?(;+sgr)*)?;*m` to `;*(sgr(;+sgr)*;*)?m` so semicolons can only be consumed in one place, preventing the engine from trying O(n^2) partitions on pathological input (e.g. many semicolons with no `m` terminator). Benchmark: 1600 semicolons 0.395s -> 0.0006s. Add AGENTS.md with stdisplay security audit documenting verified non-issues from unicode bypass analysis. https://claude.ai/code/session_01QhhwK5uJq7Rv2ekDCt5aZ9 --- .github/workflows/lint.yml | 1 + AGENTS.md | 11 +++++++++++ agents/stdisplay-security.md | 9 +++++++++ usr/lib/python3/dist-packages/stdisplay/stdisplay.py | 2 +- usr/lib/python3/dist-packages/stdisplay/sttee.py | 1 + .../dist-packages/stdisplay/tests/stdisplay.py | 1 - 6 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 AGENTS.md create mode 100644 agents/stdisplay-security.md diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 1d0635ef..52417653 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -21,6 +21,7 @@ on: jobs: lint: runs-on: ubuntu-24.04 + timeout-minutes: 10 strategy: fail-fast: false diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000..0c4b7309 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,11 @@ +# Agents + +Detailed guidance for AI agents working on this codebase. + +## Policy + +- No unicode. All files must be ASCII-only. + +## Reference + +- [stdisplay](agents/stdisplay-security.md) diff --git a/agents/stdisplay-security.md b/agents/stdisplay-security.md new file mode 100644 index 00000000..5851601c --- /dev/null +++ b/agents/stdisplay-security.md @@ -0,0 +1,9 @@ +# stdisplay + +## Line-based vs whole-input processing + +`stcat`/`stcatn`/`sttee` sanitize line-by-line (streaming, like their Unix +counterparts). `stsponge` sanitizes the whole input at once (sponge semantics). +These are equivalent because no allowed escape sequence can contain `\n` -- SGR +is composed solely of digits, semicolons, colons, and the `m` terminator. This +is inherent to the SGR spec, documented in the man page (`man/stdisplay.1.ronn`). diff --git a/usr/lib/python3/dist-packages/stdisplay/stdisplay.py b/usr/lib/python3/dist-packages/stdisplay/stdisplay.py index 1446a084..3896858e 100644 --- a/usr/lib/python3/dist-packages/stdisplay/stdisplay.py +++ b/usr/lib/python3/dist-packages/stdisplay/stdisplay.py @@ -229,7 +229,7 @@ def get_sgr_pattern( sgr_combo = rf"({sgr_combo})" if exclude_sgr: sgr_combo = exclude_pattern(sgr_combo, exclude_sgr) - sgr_re = rf"(;*({sgr_combo})?(;+{sgr_combo})*)?;*m" + sgr_re = rf";*({sgr_combo}(;+{sgr_combo})*;*)?m" return str(sgr_re) diff --git a/usr/lib/python3/dist-packages/stdisplay/sttee.py b/usr/lib/python3/dist-packages/stdisplay/sttee.py index f5f402c8..83502709 100644 --- a/usr/lib/python3/dist-packages/stdisplay/sttee.py +++ b/usr/lib/python3/dist-packages/stdisplay/sttee.py @@ -5,6 +5,7 @@ ## See the file COPYING for copying conditions. """Safely print stdin to stdout and file.""" + from sys import argv, stdin, stdout from typing import TextIO from stdisplay.stdisplay import stdisplay diff --git a/usr/lib/python3/dist-packages/stdisplay/tests/stdisplay.py b/usr/lib/python3/dist-packages/stdisplay/tests/stdisplay.py index ae986e9d..fabb5bbe 100644 --- a/usr/lib/python3/dist-packages/stdisplay/tests/stdisplay.py +++ b/usr/lib/python3/dist-packages/stdisplay/tests/stdisplay.py @@ -17,7 +17,6 @@ stdisplay, ) - ## This is split into a global so it can be used by sanitize_string.py's tests. simple_escape_cases: list[tuple[str, str]] = [ ("\a", "_"),