@@ -508,19 +508,80 @@ get_decomp_record(PyObject *self, Py_UCS4 code,
508508 (* index )++ ;
509509}
510510
511+ /* Small combining runs are usually cheaper with insertion sort. */
512+ #define CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD 20
513+
514+ static void
515+ canonical_ordering_sort_insertion (int kind , void * data ,
516+ Py_ssize_t start , Py_ssize_t end )
517+ {
518+ for (Py_ssize_t i = start + 1 ; i < end ; i ++ ) {
519+ Py_UCS4 code = PyUnicode_READ (kind , data , i );
520+ unsigned char combining = _getrecord_ex (code )-> combining ;
521+ Py_ssize_t j = i ;
522+
523+ while (j > start ) {
524+ Py_UCS4 previous = PyUnicode_READ (kind , data , j - 1 );
525+ if (_getrecord_ex (previous )-> combining <= combining ) {
526+ break ;
527+ }
528+ PyUnicode_WRITE (kind , data , j , previous );
529+ j -- ;
530+ }
531+ if (j != i ) {
532+ PyUnicode_WRITE (kind , data , j , code );
533+ }
534+ }
535+ }
536+
537+ static void
538+ canonical_ordering_sort_counting (int kind , void * data ,
539+ Py_ssize_t start , Py_ssize_t end ,
540+ Py_UCS4 * sortbuf )
541+ {
542+ Py_ssize_t counts [256 ] = {0 };
543+ Py_ssize_t run_length = end - start ;
544+ Py_ssize_t total = 0 ;
545+
546+ for (Py_ssize_t i = start ; i < end ; i ++ ) {
547+ Py_UCS4 code = PyUnicode_READ (kind , data , i );
548+ unsigned char combining = _getrecord_ex (code )-> combining ;
549+ counts [combining ]++ ;
550+ }
551+
552+ for (size_t i = 0 ; i < Py_ARRAY_LENGTH (counts ); i ++ ) {
553+ Py_ssize_t count = counts [i ];
554+ counts [i ] = total ;
555+ total += count ;
556+ }
557+
558+ /* Reuse counts[] as the next output slot for each CCC. */
559+ for (Py_ssize_t i = start ; i < end ; i ++ ) {
560+ Py_UCS4 code = PyUnicode_READ (kind , data , i );
561+ unsigned char combining = _getrecord_ex (code )-> combining ;
562+ sortbuf [counts [combining ]++ ] = code ;
563+ }
564+ for (Py_ssize_t i = 0 ; i < run_length ; i ++ ) {
565+ PyUnicode_WRITE (kind , data , start + i , sortbuf [i ]);
566+ }
567+ }
568+
511569static PyObject *
512570nfd_nfkd (PyObject * self , PyObject * input , int k )
513571{
514572 PyObject * result ;
515573 Py_UCS4 * output ;
516574 Py_ssize_t i , o , osize ;
517- int kind ;
518- const void * data ;
575+ int input_kind , result_kind ;
576+ const void * input_data ;
577+ void * result_data ;
519578 /* Longest decomposition in Unicode 3.2: U+FDFA */
520579 Py_UCS4 stack [20 ];
521580 Py_ssize_t space , isize ;
522581 int index , prefix , count , stackptr ;
523582 unsigned char prev , cur ;
583+ Py_UCS4 * sortbuf = NULL ;
584+ Py_ssize_t sortbuflen = 0 ;
524585
525586 stackptr = 0 ;
526587 isize = PyUnicode_GET_LENGTH (input );
@@ -540,11 +601,11 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
540601 return NULL ;
541602 }
542603 i = o = 0 ;
543- kind = PyUnicode_KIND (input );
544- data = PyUnicode_DATA (input );
604+ input_kind = PyUnicode_KIND (input );
605+ input_data = PyUnicode_DATA (input );
545606
546607 while (i < isize ) {
547- stack [stackptr ++ ] = PyUnicode_READ (kind , data , i ++ );
608+ stack [stackptr ++ ] = PyUnicode_READ (input_kind , input_data , i ++ );
548609 while (stackptr ) {
549610 Py_UCS4 code = stack [-- stackptr ];
550611 /* Hangul Decomposition adds three characters in
@@ -612,34 +673,64 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
612673 if (!result )
613674 return NULL ;
614675 /* result is guaranteed to be ready, as it is compact. */
615- kind = PyUnicode_KIND (result );
616- data = PyUnicode_DATA (result );
676+ result_kind = PyUnicode_KIND (result );
677+ result_data = PyUnicode_DATA (result );
617678
618- /* Sort canonically. */
679+ /* Sort each consecutive combining-character run canonically. */
619680 i = 0 ;
620- prev = _getrecord_ex (PyUnicode_READ (kind , data , i ))-> combining ;
621- for (i ++ ; i < PyUnicode_GET_LENGTH (result ); i ++ ) {
622- cur = _getrecord_ex (PyUnicode_READ (kind , data , i ))-> combining ;
623- if (prev == 0 || cur == 0 || prev <= cur ) {
624- prev = cur ;
681+ while (i < o ) {
682+ Py_ssize_t run_length , run_start ;
683+ int needs_sort = 0 ;
684+
685+ Py_UCS4 ch = PyUnicode_READ (result_kind , result_data , i );
686+ prev = _getrecord_ex (ch )-> combining ;
687+ if (prev == 0 ) {
688+ i ++ ;
625689 continue ;
626690 }
627- /* Non-canonical order. Need to switch *i with previous. */
628- o = i - 1 ;
629- while (1 ) {
630- Py_UCS4 tmp = PyUnicode_READ (kind , data , o + 1 );
631- PyUnicode_WRITE (kind , data , o + 1 ,
632- PyUnicode_READ (kind , data , o ));
633- PyUnicode_WRITE (kind , data , o , tmp );
634- o -- ;
635- if (o < 0 )
636- break ;
637- prev = _getrecord_ex (PyUnicode_READ (kind , data , o ))-> combining ;
638- if (prev == 0 || prev <= cur )
691+
692+ run_start = i ++ ;
693+ while (i < o ) {
694+ Py_UCS4 ch = PyUnicode_READ (result_kind , result_data , i );
695+ cur = _getrecord_ex (ch )-> combining ;
696+ if (cur == 0 ) {
639697 break ;
698+ }
699+ if (prev > cur ) {
700+ needs_sort = 1 ;
701+ }
702+ prev = cur ;
703+ i ++ ;
704+ }
705+ if (!needs_sort ) {
706+ continue ;
707+ }
708+
709+ run_length = i - run_start ;
710+ if (run_length < CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD ) {
711+ canonical_ordering_sort_insertion (result_kind , result_data ,
712+ run_start , i );
713+ continue ;
640714 }
641- prev = _getrecord_ex (PyUnicode_READ (kind , data , i ))-> combining ;
715+
716+ if (run_length > sortbuflen ) {
717+ Py_UCS4 * new_sortbuf = PyMem_Resize (sortbuf ,
718+ Py_UCS4 ,
719+ run_length );
720+ if (new_sortbuf == NULL ) {
721+ PyErr_NoMemory ();
722+ PyMem_Free (sortbuf );
723+ Py_DECREF (result );
724+ return NULL ;
725+ }
726+ sortbuf = new_sortbuf ;
727+ sortbuflen = run_length ;
728+ }
729+
730+ canonical_ordering_sort_counting (result_kind , result_data ,
731+ run_start , i , sortbuf );
642732 }
733+ PyMem_Free (sortbuf );
643734 return result ;
644735}
645736
0 commit comments