From 234f3eb486b4d1cdd94c7871e2315f4a98352b2d Mon Sep 17 00:00:00 2001
From: Chit Lin Su <chitlinsu@gmail.com>
Date: Wed, 24 Jun 2026 11:47:15 +0800
Subject: [PATCH] feat: add IPA-to-word evaluation direction with homophone
 support and auto-detection

---
 evaluation_function/evaluation.py      | 177 ++++++++++++++++++++-----
 evaluation_function/evaluation_test.py |  19 +++
 2 files changed, 164 insertions(+), 32 deletions(-)

diff --git a/evaluation_function/evaluation.py b/evaluation_function/evaluation.py
index 8d1b554..a9c7b98 100755
--- a/evaluation_function/evaluation.py
+++ b/evaluation_function/evaluation.py
@@ -16,9 +16,19 @@
 # Accents checked by default; override per question via params["accents"].
 _DEFAULT_ACCENTS = ("en_US", "en_UK")
 
+# Human-readable names for the accents, used in feedback.
+_ACCENT_NAMES = {
+    "en_US": "General American (en_US)",
+    "en_UK": "Received Pronunciation (en_UK)",
+}
+
 # Decoration that carries no phonemic content for the comparison.
 _STRIP_CHARS = ("/", "[", "]", "ˈ", "'", "ˌ", ".", " ", "\t", "\n", "\r")
 
+# Symbols that only appear in IPA, used to auto-detect the question direction
+# when params["direction"] is not set.
+_IPA_MARKERS = set("ˈˌːəɪʊɛɔæɑɒʌɜɝɚɹɫŋʃʒθðˠʔɡɲʎɥʁɣχ̃")
+
 
 @lru_cache(maxsize=None)
 def _load_accent(accent: str) -> Dict[str, List[str]]:
@@ -43,6 +53,22 @@ def _load_accent(accent: str) -> Dict[str, List[str]]:
     return mapping
 
 
+@lru_cache(maxsize=None)
+def _reverse_index(accent: str) -> Dict[str, Set[str]]:
+    """Build {normalised IPA: {word, ...}} for an accent, for IPA->word lookups.
+
+    Words sharing a normalised transcription are homophones, so this lets the
+    reverse direction accept any correctly-spelled homophone.
+    """
+    index: Dict[str, Set[str]] = {}
+    for word, transcriptions in _load_accent(accent).items():
+        for raw in transcriptions:
+            normalised = _normalise(raw)
+            if normalised:
+                index.setdefault(normalised, set()).add(word)
+    return index
+
+
 def _normalise(text: Any) -> str:
     """Reduce a transcription to bare phonemes for comparison.
 
@@ -56,11 +82,9 @@ def _normalise(text: Any) -> str:
     return text.replace("ɹ", "r").strip()
 
 
-# Human-readable names for the accents, used in feedback.
-_ACCENT_NAMES = {
-    "en_US": "General American (en_US)",
-    "en_UK": "Received Pronunciation (en_UK)",
-}
+def _normalise_word(text: Any) -> str:
+    """Normalise a spelled word for comparison (case- and whitespace-insensitive)."""
+    return str(text).strip().lower()
 
 
 def _accepted_map(word: str, accents: Set[str]) -> Dict[str, List[tuple]]:
@@ -86,37 +110,35 @@ def _requested_accents(params: Params) -> List[str]:
     return [a for a in requested if a in _ACCENT_FILES] or list(_DEFAULT_ACCENTS)
 
 
-def evaluation_function(
-    response: Any,
-    answer: Any,
-    params: Params,
-) -> Result:
-    """
-    Evaluate a student's IPA transcription of a word against the
-    open-dict-data/ipa-dict dictionaries.
-
-    The word to transcribe is the teacher-configured `answer` (e.g. "battery"),
-    or params["word"] if given. Its IPA is looked up across the accepted accents,
-    which default to General American (en_US) and Received Pronunciation (en_UK)
-    and are overridable via params["accents"]. If the word is not in the
-    dictionaries, `answer` is treated as a literal IPA transcription instead, so a
-    teacher can supply one directly.
-
-    Stress marks, slashes/brackets, syllable dots and whitespace are ignored when
-    comparing, and the broad (r) and narrow (ɹ) rhotic symbols are treated as
-    equivalent.
+def _looks_like_ipa(value: Any) -> bool:
+    """Heuristic: does this configured answer look like an IPA transcription?"""
+    text = str(value)
+    if "/" in text or "[" in text:
+        return True
+    return any(ch in _IPA_MARKERS for ch in text)
+
+
+def _resolve_direction(answer: Any, params: Params) -> str:
+    """Decide whether to grade word->IPA or IPA->word.
+
+    Honours params["direction"] ("word_to_ipa" | "ipa_to_word"); otherwise infers
+    it from the shape of the configured `answer`.
     """
+    direction = params.get("direction")
+    if direction in ("word_to_ipa", "ipa_to_word"):
+        return direction
+    return "ipa_to_word" if _looks_like_ipa(answer) else "word_to_ipa"
 
+
+def _evaluate_word_to_ipa(response: Any, answer: Any, params: Params, accents: List[str]) -> Result:
+    """Grade a student's IPA transcription of a word (the forward direction)."""
     word = params.get("word") or answer
     if not word:
         return Result(
             is_correct=False,
-            feedback_items=[
-                ("no_word", "No word was configured to transcribe."),
-            ],
+            feedback_items=[("no_word", "No word was configured to transcribe.")],
         )
 
-    accents = _requested_accents(params)
     accepted = _accepted_map(str(word), set(accents))
 
     if not accepted:
@@ -157,10 +179,7 @@ def evaluation_function(
             )
         else:
             why = f"Correct! That matches the expected IPA transcription of \"{word}\"."
-        return Result(
-            is_correct=True,
-            feedback_items=[("correct", why)],
-        )
+        return Result(is_correct=True, feedback_items=[("correct", why)])
 
     return Result(
         is_correct=False,
@@ -172,3 +191,97 @@ def evaluation_function(
             ),
         ],
     )
+
+
+def _evaluate_ipa_to_word(response: Any, answer: Any, accents: List[str]) -> Result:
+    """Grade a student's spelled word for a given IPA transcription (the reverse).
+
+    The configured `answer` may be the IPA itself (e.g. "/ˈbætɝi/") or the target
+    word, whose transcription is then looked up. Any homophone — a word sharing
+    that transcription — is accepted.
+    """
+    answer_key = _normalise_word(answer)
+    targets: Set[str] = set()
+    display_ipa = None
+
+    # If the answer is a known word, use its transcriptions as the target IPA.
+    for accent in accents:
+        for raw in _load_accent(accent).get(answer_key, []):
+            normalised = _normalise(raw)
+            if normalised:
+                targets.add(normalised)
+                if display_ipa is None:
+                    display_ipa = raw
+
+    # Otherwise treat the answer as a literal IPA transcription.
+    if not targets:
+        literal = _normalise(answer)
+        if literal:
+            targets.add(literal)
+            display_ipa = str(answer)
+
+    if not targets:
+        return Result(
+            is_correct=False,
+            feedback_items=[("no_ipa", "No IPA transcription was configured for this question.")],
+        )
+
+    # Collect every word whose transcription matches the target IPA (homophones).
+    accepted_words: Set[str] = set()
+    for accent in accents:
+        index = _reverse_index(accent)
+        for target in targets:
+            accepted_words |= index.get(target, set())
+    accepted_words.add(answer_key)  # ensure the configured word itself is accepted
+
+    display = str(display_ipa).strip("/")
+    response_word = _normalise_word(response)
+
+    if not response_word:
+        return Result(
+            is_correct=False,
+            feedback_items=[("empty", f"No word was provided. Which word is transcribed /{display}/?")],
+        )
+
+    if response_word in accepted_words:
+        return Result(
+            is_correct=True,
+            feedback_items=[("correct", f"Correct! /{display}/ is the IPA transcription of \"{response_word}\".")],
+        )
+
+    return Result(
+        is_correct=False,
+        feedback_items=[
+            ("incorrect", f"That isn't the word transcribed as /{display}/. Sound out each symbol and try again."),
+        ],
+    )
+
+
+def evaluation_function(
+    response: Any,
+    answer: Any,
+    params: Params,
+) -> Result:
+    """
+    Evaluate an IPA question in either direction, against the
+    open-dict-data/ipa-dict dictionaries (en_US + en_UK by default).
+
+    Direction is taken from params["direction"] ("word_to_ipa" | "ipa_to_word"),
+    or inferred from the configured `answer`:
+
+    - word_to_ipa (forward): `answer` is a word (e.g. "battery"); the student
+      submits the IPA. Any listed pronunciation in the chosen accents is accepted.
+    - ipa_to_word (reverse): `answer` is an IPA transcription (e.g. "/ˈbætɝi/") or
+      a word whose transcription is looked up; the student submits the spelled
+      word. Any homophone sharing that transcription is accepted.
+
+    Other params: params["word"] overrides the forward target word;
+    params["accents"] restricts the accents (default both). Stress marks,
+    slashes/brackets, syllable dots and whitespace are ignored when comparing IPA,
+    and the broad (r) and narrow (ɹ) rhotic symbols are treated as equivalent.
+    """
+    accents = _requested_accents(params)
+
+    if _resolve_direction(answer, params) == "ipa_to_word":
+        return _evaluate_ipa_to_word(response, answer, accents)
+    return _evaluate_word_to_ipa(response, answer, params, accents)
diff --git a/evaluation_function/evaluation_test.py b/evaluation_function/evaluation_test.py
index d6e3634..759c26b 100755
--- a/evaluation_function/evaluation_test.py
+++ b/evaluation_function/evaluation_test.py
@@ -40,3 +40,22 @@ def test_incorrect_transcription(self):
 
         self.assertEqual(result.get("is_correct"), False)
         self.assertTrue(result.get("feedback"))
+
+    def test_reverse_ipa_to_word(self):
+        # answer is IPA -> direction auto-detected as ipa_to_word.
+        result = evaluation_function("battery", "/ˈbætɝi/", Params()).to_dict()
+
+        self.assertEqual(result.get("is_correct"), True)
+        self.assertIn("battery", result.get("feedback"))
+
+    def test_reverse_accepts_homophone(self):
+        # "read" (past tense) shares /ˈɹɛd/ with "red", so it should be accepted.
+        result = evaluation_function("read", "/ˈɹɛd/", Params()).to_dict()
+
+        self.assertEqual(result.get("is_correct"), True)
+
+    def test_reverse_wrong_word(self):
+        result = evaluation_function("computer", "/ˈbætɝi/", Params()).to_dict()
+
+        self.assertEqual(result.get("is_correct"), False)
+        self.assertTrue(result.get("feedback"))