Skip to content

Commit 4a29545

Browse files
StanFromIrelandpicnixzencukouserhiy-storchakamaurycy
authored andcommitted
Apply all review suggestions
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Petr Viktorin <encukou@gmail.com> Co-authored-by: Serhiy Storchaka <storchaka@gmail.com> Co-authored-by: Maurycy Pawłowski-Wieroński <maurycy@maurycy.com>
1 parent c8b3a94 commit 4a29545

3 files changed

Lines changed: 14 additions & 22 deletions

File tree

Lib/test/test_unicodedata.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,7 @@ def test_issue10254(self):
617617
self.assertEqual(self.db.normalize('NFC', a), b)
618618

619619
def test_long_combining_mark_run(self):
620-
# GH-XXXXX: avoid quadratic canonical ordering.
620+
# gh-149079: avoid quadratic canonical ordering.
621621
payload = "a" + ("\u0300\u0327" * 32)
622622
nfd = "a" + ("\u0327" * 32) + ("\u0300" * 32)
623623
nfc = "\u00e0" + ("\u0327" * 32) + ("\u0300" * 31)
@@ -628,7 +628,7 @@ def test_long_combining_mark_run(self):
628628
self.assertEqual(self.db.normalize("NFKC", payload), nfc)
629629

630630
def test_combining_mark_run_fast_paths(self):
631-
# GH-XXXXX: cover short runs and already-sorted long runs.
631+
# gh-149079: cover short runs and already-sorted long runs.
632632
short_payload = "a" + ("\u0300\u0327" * 9) + "\u0300"
633633
short_nfd = "a" + ("\u0327" * 9) + ("\u0300" * 10)
634634
short_nfc = "\u00e0" + ("\u0327" * 9) + ("\u0300" * 9)
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Fix a potential denial of service in :func:`unicodedata.normalize`. The
2-
canonical ordering step of Unicode normalization used an O(n²) insertion
2+
canonical ordering step of Unicode normalization used a quadratic-time insertion
33
sort for reordering combining characters, which could be exploited with
44
crafted input containing many combining characters in non-canonical order.
55
Replaced with a linear-time counting sort for long runs.

Modules/unicodedata.c

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -586,22 +586,14 @@ canonical_ordering_sort_counting(int kind, void *data,
586586
Py_ssize_t counts[256] = {0};
587587
Py_ssize_t run_length = end - start;
588588
Py_ssize_t total = 0;
589-
unsigned char min_combining = 255;
590-
unsigned char max_combining = 0;
591589

592590
for (Py_ssize_t i = start; i < end; i++) {
593591
Py_UCS4 code = PyUnicode_READ(kind, data, i);
594592
unsigned char combining = _getrecord_ex(code)->combining;
595593
counts[combining]++;
596-
if (combining < min_combining) {
597-
min_combining = combining;
598-
}
599-
if (combining > max_combining) {
600-
max_combining = combining;
601-
}
602594
}
603595

604-
for (Py_ssize_t i = min_combining; i <= max_combining; i++) {
596+
for (size_t i = 0; i < Py_ARRAY_LENGTH(counts); i++) {
605597
Py_ssize_t count = counts[i];
606598
counts[i] = total;
607599
total += count;
@@ -629,7 +621,7 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
629621
void *result_data;
630622
/* Longest decomposition in Unicode 3.2: U+FDFA */
631623
Py_UCS4 stack[20];
632-
Py_ssize_t space, isize, length;
624+
Py_ssize_t space, isize;
633625
int index, prefix, count, stackptr;
634626
unsigned char prev, cur;
635627
Py_UCS4 *sortbuf = NULL;
@@ -727,25 +719,24 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
727719

728720
result_kind = PyUnicode_KIND(result);
729721
result_data = PyUnicode_DATA(result);
730-
length = PyUnicode_GET_LENGTH(result);
731722

732723
/* Sort each consecutive combining-character run canonically. */
733724
i = 0;
734-
while (i < length) {
725+
while (i < o) {
735726
Py_ssize_t run_length, run_start;
736727
int needs_sort = 0;
737728

738-
prev = _getrecord_ex(
739-
PyUnicode_READ(result_kind, result_data, i))->combining;
729+
Py_UCS4 ch = PyUnicode_READ(result_kind, result_data, i);
730+
prev = _getrecord_ex(ch)->combining;
740731
if (prev == 0) {
741732
i++;
742733
continue;
743734
}
744735

745736
run_start = i++;
746-
while (i < length) {
747-
cur = _getrecord_ex(
748-
PyUnicode_READ(result_kind, result_data, i))->combining;
737+
while (i < o) {
738+
Py_UCS4 ch = PyUnicode_READ(result_kind, result_data, i);
739+
cur = _getrecord_ex(ch)->combining;
749740
if (cur == 0) {
750741
break;
751742
}
@@ -767,8 +758,9 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
767758
}
768759

769760
if (run_length > sortbuflen) {
770-
Py_UCS4 *new_sortbuf = PyMem_Realloc(sortbuf,
771-
run_length * sizeof(Py_UCS4));
761+
Py_UCS4 *new_sortbuf = PyMem_Resize(sortbuf,
762+
Py_UCS4,
763+
run_length);
772764
if (new_sortbuf == NULL) {
773765
PyErr_NoMemory();
774766
PyMem_Free(sortbuf);

0 commit comments

Comments
 (0)