NVIDIA · shrpawar-alt · Jun 11, 2026 · Jun 11, 2026 · mgrafu · Jun 15, 2026
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -28,7 +28,7 @@ pipeline {
     MR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-17-24-1'
     KO_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-23-26-0'
-    HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-04-26-0'
+    HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-11-26-0'
     DEFAULT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {

diff --git a/nemo_text_processing/text_normalization/hi/data/serial/chars.tsv b/nemo_text_processing/text_normalization/hi/data/serial/chars.tsv
@@ -0,0 +1,68 @@
+अ	अ
+आ	आ
+इ	इ
+ई	ई
+उ	उ
+ऊ	ऊ
+ऋ	ऋ
+ए	ए
+ऐ	ऐ
+ओ	ओ
+औ	औ
+ऑ	ऑ
+ा	ा
+ि	ि
+ी	ी
+ु	ु
+ू	ू
+ृ	ृ
+े	े
+ै	ै
+ो	ो
+ौ	ौ
+ॉ	ॉ
+ं	ं
+ः	ः
+ँ	ँ
+क	क
+ख	ख
+ग	ग
+घ	घ
+ङ	ङ
+च	च
+छ	छ
+ज	ज
+झ	झ
+ञ	ञ
+ट	ट
+ठ	ठ
+ड	ड
+ढ	ढ
+ण	ण
+त	त
+थ	थ
+द	द
+ध	ध
+न	न
+प	प
+फ	फ
+ब	ब
+भ	भ
+म	म
+य	य
+र	र
+ल	ल
+व	व
+श	श
+ष	ष
+स	स
+ह	ह
+क़	क़
+ख़	ख़
+ग़	ग़
+ज़	ज़
+ड़	ड़
+ढ़	ढ़
+फ़	फ़
+य़	य़
+्	्
diff --git a/nemo_text_processing/text_normalization/hi/data/serial/power_special.tsv b/nemo_text_processing/text_normalization/hi/data/serial/power_special.tsv
@@ -0,0 +1,4 @@
+^2	 स्क्वेर्ड
+^२	 स्क्वेर्ड
+^3	 क्यूब
+^३	 क्यूब
diff --git a/nemo_text_processing/text_normalization/hi/data/serial/special_symbols.tsv b/nemo_text_processing/text_normalization/hi/data/serial/special_symbols.tsv
@@ -0,0 +1,4 @@
+#	हैशटैग
+%	प्रतिशत
+&	एंड
+@	एट
diff --git a/nemo_text_processing/text_normalization/hi/taggers/serial.py b/nemo_text_processing/text_normalization/hi/taggers/serial.py
@@ -0,0 +1,161 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.text_normalization.hi.graph_utils import (
+    NEMO_ALPHA,
+    NEMO_DIGIT,
+    NEMO_NOT_SPACE,
+    NEMO_SIGMA,
+    GraphFst,
+    convert_space,
+)
+from nemo_text_processing.text_normalization.hi.utils import get_abs_path
+
+
+class SerialFst(GraphFst):
+    """
+    Finite state transducer for classifying serial strings in Hindi.
+    Handles Devanagari-numeric mixtures, complex delimited number chains,
+    symbols, and powers. Supports both ASCII (0-9) and Devanagari (०-९) digits.
+
+        e.g. कोविड-19  -> tokens { name: "कोविड-उन्नीस" }
+        e.g. 5जी       -> tokens { name: "पाँच जी" }
+        e.g. ३जी       -> tokens { name: "तीन जी" }
+        e.g. 2^2       -> tokens { name: "दो स्क्वेर्ड" }
+        e.g. 2^4       -> tokens { name: "दो टु द पावर चार" }
+        e.g. 1-800-555 -> tokens { name: "एक-आठ सौ-पाँच सौ पचपन" }
+
+    Note: Pure Latin-alpha + digit patterns (A12, B-60) are intentionally
+    excluded here so they fall through to the electronic classifier.
+    """
+
+    def __init__(
+        self,
+        cardinal: GraphFst,
+        deterministic: bool = True,
+    ):
+        super().__init__(name="serial", kind="classify", deterministic=deterministic)
+
+        digit_graph = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+        zero_graph = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
+
+        devanagari_digits = pynini.project(
+            pynini.union(digit_graph, zero_graph),
+            "input",
+        ).optimize()
+
+        any_digit = pynini.union(NEMO_DIGIT, devanagari_digits).optimize()
+
+        not_quote = pynini.closure(pynini.difference(NEMO_SIGMA, pynini.accep('"')), 1)
+        strip_cardinal_tags = pynutil.delete('cardinal { integer: "') + not_quote + pynutil.delete('" }')
+
+        pure_cardinal_words = pynini.compose(cardinal.fst, strip_cardinal_tags).optimize()
+
+        length_filter = pynini.closure(any_digit, 1, 3)
+        limited_cardinal = pynini.compose(length_filter, pure_cardinal_words).optimize()
+
+        num_graph = limited_cardinal
+
+        symbols_graph = pynini.string_file(get_abs_path("data/serial/special_symbols.tsv")).optimize()
+
+        devanagari_chars = pynini.project(
+            pynini.string_file(get_abs_path("data/serial/chars.tsv")),
+            "input",
+        ).optimize()
+
+        letter_graph = pynini.string_file(get_abs_path("data/address/letters.tsv"))
+        latin_letters = letter_graph + pynini.closure(pynutil.insert(" ") + letter_graph)
+        latin_letters = latin_letters.optimize()
+
+        devanagari_word = pynini.closure(devanagari_chars, 2).optimize()
+
+        delimiter = (pynini.accep("-") | pynini.accep("/") | pynini.accep(" ")).optimize()
+
+        alphas = (latin_letters | devanagari_word).optimize()
+        segment = (alphas | num_graph | symbols_graph).optimize()
+
+        serial_core = segment + pynini.closure(delimiter + segment, 1)
+        serial_core = serial_core.optimize()
+
+        serial_graph = serial_core
+
+        all_alphas = pynini.union(NEMO_ALPHA, devanagari_chars).optimize()
+
+        insert_space_alpha_digit = pynini.cdrewrite(pynutil.insert(" "), all_alphas, any_digit, NEMO_SIGMA)
+        insert_space_digit_alpha = pynini.cdrewrite(pynutil.insert(" "), any_digit, all_alphas, NEMO_SIGMA)
+        space_inserter = pynini.compose(insert_space_alpha_digit, insert_space_digit_alpha).optimize()
+
+        glued_serial = pynini.compose(space_inserter, serial_core).optimize()
+        serial_graph = pynini.union(serial_graph, glued_serial).optimize()
+
+        power_special = pynutil.add_weight(
+            pynini.string_file(get_abs_path("data/serial/power_special.tsv")), -1.0
+        ).optimize()
+
+        power_generic = pynutil.add_weight(
+            (pynutil.delete("^") + pynutil.insert(" टु द पावर ") + num_graph), 1.0
+        ).optimize()
+
+        power_suffix = pynini.union(power_special, power_generic).optimize()
+        power_graph = num_graph + power_suffix
+        serial_graph = pynini.union(serial_graph, power_graph).optimize()
+
+        serial_graph = pynini.compose(pynini.closure(NEMO_NOT_SPACE, 2), serial_graph).optimize()
+
+        pure_word_slash = pynini.closure(NEMO_ALPHA, 1) + pynini.accep("/") + pynini.closure(NEMO_ALPHA, 1)
+
+        dimension_pattern = (
+            pynini.closure(any_digit, 1) + (pynini.accep("x") | pynini.accep("X")) + pynini.closure(any_digit, 1)
+        )
+
+        _opt_delim = pynini.closure(pynini.accep("-") | pynini.accep(" "), 0, 1)
+        latin_alphanum = (pynini.closure(NEMO_ALPHA, 1) + _opt_delim + pynini.closure(any_digit, 1)) | (
+            pynini.closure(any_digit, 1) + _opt_delim + pynini.closure(NEMO_ALPHA, 1)
+        )
+
+        ordinal_suffixes = pynini.project(
+            pynini.union(
+                pynini.string_file(get_abs_path("data/ordinal/suffixes.tsv")),
+                pynini.string_file(get_abs_path("data/ordinal/suffixes_map.tsv")),
+            ),
+            "input",
+        ).optimize()
+        ordinal_pattern = pynini.closure(any_digit, 1) + ordinal_suffixes
+
+        date_year_suffix = pynini.project(
+            pynini.string_file(get_abs_path("data/date/year_suffix.tsv")),
+            "input",
+        ).optimize()
+        date_suffixes = pynini.project(
+            pynini.string_file(get_abs_path("data/date/suffixes.tsv")),
+            "input",
+        ).optimize()
+        date_pattern = (
+            pynini.closure(any_digit, 1)
+            + pynini.closure(pynini.accep("-") + pynini.closure(any_digit, 1), 0)
+            + pynini.accep(" ")
+            + pynini.union(date_year_suffix, date_suffixes)
+        )
+
+        exclusions = pure_word_slash | dimension_pattern | latin_alphanum | ordinal_pattern | date_pattern
+        accepted_inputs = pynini.difference(NEMO_SIGMA, exclusions).optimize()
+
+        serial_graph = pynini.compose(accepted_inputs, serial_graph).optimize()
+
+        self.graph = serial_graph.optimize()
+        graph = pynutil.insert('name: "') + convert_space(self.graph).optimize() + pynutil.insert('"')
+        self.fst = graph.optimize()
diff --git a/nemo_text_processing/text_normalization/hi/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/hi/taggers/tokenize_and_classify.py
@@ -35,6 +35,7 @@
 from nemo_text_processing.text_normalization.hi.taggers.money import MoneyFst
 from nemo_text_processing.text_normalization.hi.taggers.ordinal import OrdinalFst
 from nemo_text_processing.text_normalization.hi.taggers.punctuation import PunctuationFst
+from nemo_text_processing.text_normalization.hi.taggers.serial import SerialFst
 from nemo_text_processing.text_normalization.hi.taggers.telephone import TelephoneFst
 from nemo_text_processing.text_normalization.hi.taggers.time import TimeFst
 from nemo_text_processing.text_normalization.hi.taggers.whitelist import WhiteListFst
@@ -111,12 +112,18 @@ def __init__(
             punctuation = PunctuationFst(deterministic=deterministic)
             punct_graph = punctuation.fst
 
+            word = WordFst(punctuation=punctuation, deterministic=deterministic)
+            word_graph = word.fst
+
             telephone = TelephoneFst()
             telephone_graph = telephone.fst
 
             electronic = ElectronicFst(deterministic=deterministic)
             electronic_graph = electronic.fst
 
+            serial = SerialFst(cardinal=cardinal, deterministic=deterministic)
+            serial_graph = serial.fst
+
             classify = (
                 pynutil.add_weight(whitelist_graph, 1.01)
                 | pynutil.add_weight(cardinal_graph, 1.1)
@@ -129,10 +136,9 @@ def __init__(
                 | pynutil.add_weight(telephone_graph, 1.1)
                 | pynutil.add_weight(ordinal_graph, 1.1)
                 | pynutil.add_weight(electronic_graph, 1.1)
+                | pynutil.add_weight(serial_graph, 1.11)
             )
 
-            word_graph = WordFst(punctuation=punctuation, deterministic=deterministic).fst
-
             punct = pynutil.insert("tokens { ") + pynutil.add_weight(punct_graph, weight=2.1) + pynutil.insert(" }")
             punct = pynini.closure(
                 pynini.union(

diff --git a/tests/nemo_text_processing/hi/data_text_normalization/test_cases_serial.txt b/tests/nemo_text_processing/hi/data_text_normalization/test_cases_serial.txt
@@ -0,0 +1,21 @@
+कोविड-19~कोविड-उन्नीस
+कोविड-१९~कोविड-उन्नीस
+5जी~पाँच जी
+५जी~पाँच जी
+2^2~दो स्क्वेर्ड
+२^२~दो स्क्वेर्ड
+1-800-555~एक-आठ सौ-पाँच सौ पचपन
+3जी~तीन जी
+4जी~चार जी
+कोरोना-2~कोरोना-दो
+अग्नि-5~अग्नि-पाँच
+ओमिक्रॉन-2~ओमिक्रॉन-दो
+3^2~तीन स्क्वेर्ड
+2^3~दो क्यूब
+5^3~पाँच क्यूब
+४^५~चार टु द पावर पाँच
+99-1~निन्यानबे-एक
+10-20-30~दस-बीस-तीस
+1-800-999~एक-आठ सौ-नौ सौ निन्यानबे
+पृथ्वी-4~पृथ्वी-चार
+ब्रह्मोस-1~ब्रह्मोस-एक
diff --git a/tests/nemo_text_processing/hi/test_serial.py b/tests/nemo_text_processing/hi/test_serial.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+from parameterized import parameterized
+
+from nemo_text_processing.text_normalization.normalize import Normalizer
+
+from ..utils import CACHE_DIR, parse_test_case_file
+
+
+class TestSerial:
+    normalizer = Normalizer(
+        input_case='cased', lang='hi', cache_dir=CACHE_DIR, overwrite_cache=False, post_process=True
+    )
+
+    @parameterized.expand(parse_test_case_file('hi/data_text_normalization/test_cases_serial.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_norm(self, test_input, expected):
+        pred = self.normalizer.normalize(test_input, verbose=False, punct_post_process=True)
+        assert pred == expected
diff --git a/tests/nemo_text_processing/hi/test_sparrowhawk_normalization.sh b/tests/nemo_text_processing/hi/test_sparrowhawk_normalization.sh
@@ -52,10 +52,10 @@ testTNDecimal() {
 #  runtest $input
 #}
 
-#testTNSerial() {
-#  input=$PROJECT_DIR/hi/data_text_normalization/test_cases_serial.txt
-#  runtest $input
-#}
+testTNSerial() {
+  input=$PROJECT_DIR/hi/data_text_normalization/test_cases_serial.txt
+  runtest $input
+}
 
 #testTNRoman() {
 #  input=$PROJECT_DIR/en/data_text_normalization/test_cases_roman.txt