diff --git a/nemo_text_processing/text_normalization/ja/taggers/word.py b/nemo_text_processing/text_normalization/ja/taggers/word.py index b1403221b..2d8290bea 100644 --- a/nemo_text_processing/text_normalization/ja/taggers/word.py +++ b/nemo_text_processing/text_normalization/ja/taggers/word.py @@ -15,7 +15,7 @@ from pynini.lib import pynutil -from nemo_text_processing.text_normalization.ja.graph_utils import NEMO_NOT_SPACE, GraphFst +from nemo_text_processing.text_normalization.ja.graph_utils import NEMO_NOT_QUOTE, GraphFst class WordFst(GraphFst): @@ -26,5 +26,5 @@ class WordFst(GraphFst): def __init__(self, deterministic: bool = True): super().__init__(name="word", kind="classify", deterministic=deterministic) - word = pynutil.insert("name: \"") + NEMO_NOT_SPACE + pynutil.insert("\"") - self.fst = word.optimize() + word = pynutil.insert("name: \"") + NEMO_NOT_QUOTE + pynutil.insert("\"") + self.fst = word.optimize() diff --git a/tests/nemo_text_processing/ja/data_text_normalization/test_cases_word.txt b/tests/nemo_text_processing/ja/data_text_normalization/test_cases_word.txt new file mode 100644 index 000000000..7fd308f62 --- /dev/null +++ b/tests/nemo_text_processing/ja/data_text_normalization/test_cases_word.txt @@ -0,0 +1 @@ +彼は“Hello World”というプログラムを実行した。~彼は“Hello World”というプログラムを実行した。 diff --git a/tests/nemo_text_processing/ja/test_word.py b/tests/nemo_text_processing/ja/test_word.py new file mode 100644 index 000000000..24cc28a5e --- /dev/null +++ b/tests/nemo_text_processing/ja/test_word.py @@ -0,0 +1,32 @@ +# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pytest +from parameterized import parameterized + +from nemo_text_processing.text_normalization.normalize import Normalizer + +from ..utils import CACHE_DIR, parse_test_case_file + + +class TestWord: + normalizer_ja = Normalizer(lang='ja', cache_dir=CACHE_DIR, overwrite_cache=False, input_case='cased') + + @parameterized.expand(parse_test_case_file('ja/data_text_normalization/test_cases_word.txt')) + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_norm_word(self, test_input, expected): + preds = self.normalizer_ja.normalize(test_input) + assert expected == preds