Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pipeline {
MR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-1'
JA_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-17-24-1'
KO_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-23-26-0'
HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-23-26-0'
HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-28-26-0'
DEFAULT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
}
stages {
Expand Down
47 changes: 32 additions & 15 deletions nemo_text_processing/text_normalization/hi/taggers/fraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,6 @@


class FractionFst(GraphFst):
"""
Finite state transducer for classifying fraction
"२३ ४/६" ->
fraction { integer: "तेईस" numerator: "चार" denominator: "छः"}
४/६" ->
fraction { numerator: "चार" denominator: "छः"}


Args:
cardinal: cardinal GraphFst
deterministic: if True will provide a single transduction option,
for False multiple transduction are generated (used for audio-based normalization)
"""

def __init__(self, cardinal, deterministic: bool = True):
super().__init__(name="fraction", kind="classify", deterministic=deterministic)

Expand All @@ -54,15 +40,20 @@ def __init__(self, cardinal, deterministic: bool = True):
self.optional_graph_negative = pynini.closure(
pynutil.insert("negative: ") + pynini.cross("-", "\"true\"") + pynutil.insert(NEMO_SPACE), 0, 1
)

self.integer = pynutil.insert("integer_part: \"") + cardinal_graph + pynutil.insert("\"")

self.numerator = (
pynutil.insert("numerator: \"")
+ cardinal_graph
+ pynini.cross(pynini.union("/", NEMO_SPACE + "/" + NEMO_SPACE), "\"")
+ pynutil.insert(NEMO_SPACE)
)

self.denominator = pynutil.insert("denominator: \"") + cardinal_graph + pynutil.insert("\"")

# ---------------- EXISTING SPECIAL FORMS ----------------

dedh_dhai_graph = pynini.string_map(
[("१" + NEMO_SPACE + HI_ONE_HALF, HI_DEDH), ("२" + NEMO_SPACE + HI_ONE_HALF, HI_DHAI)]
)
Expand All @@ -77,6 +68,27 @@ def __init__(self, cardinal, deterministic: bool = True):
paune_numbers = paune + pynini.cross(NEMO_SPACE + HI_THREE_QUARTERS, "")
paune_graph = pynutil.insert(HI_PAUNE) + pynutil.insert(NEMO_SPACE) + paune_numbers

# ---------------- COMMON FRACTION FORMS ----------------

common_fraction_map = pynini.string_map(
[
("१/२", "आधा"),
("१/३", "तिहाई"),
("२/३", "दो तिहाई"),
("१/४", "चौथाई"),
("३/४", "तीन चौथाई"),
]
)

graph_common_fraction = (
pynutil.insert("morphosyntactic_features: \"")
+ common_fraction_map
+ pynutil.insert("\"")
+ pynutil.insert(NEMO_SPACE)
)

# ---------------- WRAPPING GRAPHS ----------------

graph_dedh_dhai = (
pynutil.insert("morphosyntactic_features: \"")
+ dedh_dhai_graph
Expand Down Expand Up @@ -105,19 +117,24 @@ def __init__(self, cardinal, deterministic: bool = True):
+ pynutil.insert(NEMO_SPACE)
)

# ---------------- DEFAULT FRACTION ----------------

final_graph = (
self.optional_graph_negative
+ pynini.closure(self.integer + pynini.accep(NEMO_SPACE), 0, 1)
+ self.numerator
+ self.denominator
)

# ---------------- PRIORITY HANDLING ----------------

weighted_graph = (
final_graph
| pynutil.add_weight(graph_common_fraction, -0.3) # ensures override of "बटा"
| pynutil.add_weight(graph_dedh_dhai, -0.2)
| pynutil.add_weight(graph_paune, -0.2)
| pynutil.add_weight(graph_savva, -0.1)
| pynutil.add_weight(graph_sadhe, -0.1)
| pynutil.add_weight(graph_paune, -0.2)
)

self.graph = weighted_graph
Expand Down