From 90ca133b7bc58c18c208e138db60702730ca18d2 Mon Sep 17 00:00:00 2001
From: rakeshv <rakeshv@speechmatics.com>
Date: Mon, 4 May 2026 11:12:53 +0530
Subject: [PATCH 01/11] Batch language hints support for the next gen model

---
 sdk/batch/speechmatics/batch/_models.py |  4 ++
 tests/batch/test_models.py              | 61 +++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py
index c375d34..8f1246b 100644
--- a/sdk/batch/speechmatics/batch/_models.py
+++ b/sdk/batch/speechmatics/batch/_models.py
@@ -101,6 +101,8 @@ class TranscriptionConfig:
             defaults to None.
         audio_filtering_config: Configuration for limiting the transcription of quiet audio.
             Defaults to None.
+        language_hints: Configuration for language hinting, applicable only for the next gen model.
+        language_hints_strict: Configuration for strict language hinting, applicable only for the next gen model.
     """
 
     language: str = "en"
@@ -118,6 +120,8 @@ class TranscriptionConfig:
     max_delay_mode: Optional[str] = None
     transcript_filtering_config: Optional[TranscriptFilteringConfig] = None
     audio_filtering_config: Optional[AudioFilteringConfig] = None
+    language_hints: Optional[list[str]] = None
+    language_hints_strict: Optional[bool] = None
 
     def to_dict(self) -> dict[str, Any]:
         result: dict[str, Any] = {k: v for k, v in asdict(self).items() if v is not None}
diff --git a/tests/batch/test_models.py b/tests/batch/test_models.py
index d262685..55b38a4 100644
--- a/tests/batch/test_models.py
+++ b/tests/batch/test_models.py
@@ -127,3 +127,64 @@ def test_absent_output_config_is_none(self):
         data = {"type": "transcription"}
         job_config = JobConfig.from_dict(data)
         assert job_config.output_config is None
+
+
+class TestLanguageHintsToDict:
+    def test_language_hints_serializes_correctly(self):
+        config = TranscriptionConfig(language_hints=["en", "fr"])
+        result = config.to_dict()
+        assert result["language_hints"] == ["en", "fr"]
+
+    def test_language_hints_strict_true_serializes_correctly(self):
+        config = TranscriptionConfig(language_hints=["en"], language_hints_strict=True)
+        result = config.to_dict()
+        assert result["language_hints_strict"] is True
+
+    def test_language_hints_strict_false_included_in_output(self):
+        config = TranscriptionConfig(language_hints=["en"], language_hints_strict=False)
+        result = config.to_dict()
+        assert "language_hints_strict" in result
+        assert result["language_hints_strict"] is False
+
+    def test_language_hints_absent_when_none(self):
+        config = TranscriptionConfig()
+        result = config.to_dict()
+        assert "language_hints" not in result
+        assert "language_hints_strict" not in result
+
+
+class TestLanguageHintsFromDict:
+    def test_language_hints_deserializes_correctly(self):
+        data = {
+            "type": "transcription",
+            "transcription_config": {
+                "language": "en",
+                "language_hints": ["en", "fr"],
+            },
+        }
+        job_config = JobConfig.from_dict(data)
+        assert job_config.transcription_config is not None
+        assert job_config.transcription_config.language_hints == ["en", "fr"]
+
+    def test_language_hints_strict_deserializes_correctly(self):
+        data = {
+            "type": "transcription",
+            "transcription_config": {
+                "language": "en",
+                "language_hints": ["en"],
+                "language_hints_strict": True,
+            },
+        }
+        job_config = JobConfig.from_dict(data)
+        assert job_config.transcription_config is not None
+        assert job_config.transcription_config.language_hints_strict is True
+
+    def test_absent_fields_are_none(self):
+        data = {
+            "type": "transcription",
+            "transcription_config": {"language": "en"},
+        }
+        job_config = JobConfig.from_dict(data)
+        assert job_config.transcription_config is not None
+        assert job_config.transcription_config.language_hints is None
+        assert job_config.transcription_config.language_hints_strict is None

From e5f5a748f781eef45a1c9e30be939358b376a660 Mon Sep 17 00:00:00 2001
From: rakeshv <rakeshv@speechmatics.com>
Date: Mon, 4 May 2026 12:09:27 +0530
Subject: [PATCH 02/11] language_pack_info update to support language hints in
 the transcript results

---
 sdk/batch/speechmatics/batch/_models.py | 32 +++++++++++++++----------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py
index 8f1246b..6143bc4 100644
--- a/sdk/batch/speechmatics/batch/_models.py
+++ b/sdk/batch/speechmatics/batch/_models.py
@@ -742,14 +742,20 @@ def transcript_text(self) -> str:
             return ""
 
         # Get language pack info for word delimiter
-        word_delimiter = " "  # Default
-        if self.metadata and self.metadata.language_pack_info and "word_delimiter" in self.metadata.language_pack_info:
-            word_delimiter = self.metadata.language_pack_info["word_delimiter"]
+        default_word_delimiter = " "  # Default
+        # Applicable only for the next gen models
+        per_lang_word_delimiter: dict = {}
+        if self.metadata and self.metadata.language_pack_info:
+            if "word_delimiter" in self.metadata.language_pack_info:
+                default_word_delimiter = self.metadata.language_pack_info["word_delimiter"]
+
+            if "per_language_word_delimiters" in self.metadata.language_pack_info:
+                per_lang_word_delimiter = self.metadata.language_pack_info["per_language_word_delimiters"]
 
         # Group results by speaker and process
         transcript_parts = []
         current_speaker = None
-        current_group: list[str] = []
+        current_group: list[tuple[str, str]] = []
 
         for result in self.results:
             if not result.alternatives:
@@ -758,12 +764,15 @@ def transcript_text(self) -> str:
             alternative = result.alternatives[0]
             content = alternative.content
             speaker = alternative.speaker
+            word_delimiter = default_word_delimiter
+            if alternative.language and alternative.language in per_lang_word_delimiter:
+                word_delimiter = per_lang_word_delimiter[alternative.language]
 
             # Handle speaker changes
             if speaker != current_speaker:
                 # Process accumulated group for previous speaker
                 if current_group:
-                    text = self._join_content_items(current_group, word_delimiter)
+                    text = self._join_content_items(current_group)
                     if current_speaker:
                         transcript_parts.append(f"SPEAKER {current_speaker}: {text}")  # type: ignore[unreachable]
                     else:
@@ -772,13 +781,13 @@ def transcript_text(self) -> str:
 
                 current_speaker = speaker
 
-            # Add content to current group
+            # Add content to current group with its word delimiter
             if content:
-                current_group.append(content)
+                current_group.append((content, word_delimiter))
 
         # Process final group
         if current_group:
-            text = self._join_content_items(current_group, word_delimiter)
+            text = self._join_content_items(current_group)
             if current_speaker:
                 transcript_parts.append(f"SPEAKER {current_speaker}: {text}")
             else:
@@ -786,13 +795,12 @@ def transcript_text(self) -> str:
 
         return "\n".join(transcript_parts)
 
-    def _join_content_items(self, content_items: list[str], word_delimiter: str) -> str:
+    def _join_content_items(self, content_items: list[tuple[str, str]]) -> str:
         """
         Join content items with appropriate spacing and punctuation handling.
 
         Args:
-            content_items: List of content strings to join.
-            word_delimiter: Delimiter to use between words.
+            content_items: List of (content, word_delimiter) pairs to join.
 
         Returns:
             Properly formatted text string.
@@ -802,7 +810,7 @@ def _join_content_items(self, content_items: list[str], word_delimiter: str) ->
 
         result: list[str] = []
 
-        for i, content in enumerate(content_items):
+        for i, (content, word_delimiter) in enumerate(content_items):
             if not content:
                 continue
 

From f5de88e9f674d89bcfc45586ded40223ecc4e402 Mon Sep 17 00:00:00 2001
From: rakeshv <rakeshv@speechmatics.com>
Date: Wed, 6 May 2026 09:32:57 +0530
Subject: [PATCH 03/11] rename per_lang_word_delimiter to
 per_lang_word_delimiters

---
 sdk/batch/speechmatics/batch/_models.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py
index 6143bc4..f1691a7 100644
--- a/sdk/batch/speechmatics/batch/_models.py
+++ b/sdk/batch/speechmatics/batch/_models.py
@@ -744,13 +744,13 @@ def transcript_text(self) -> str:
         # Get language pack info for word delimiter
         default_word_delimiter = " "  # Default
         # Applicable only for the next gen models
-        per_lang_word_delimiter: dict = {}
+        per_lang_word_delimiters: dict = {}
         if self.metadata and self.metadata.language_pack_info:
             if "word_delimiter" in self.metadata.language_pack_info:
                 default_word_delimiter = self.metadata.language_pack_info["word_delimiter"]
 
             if "per_language_word_delimiters" in self.metadata.language_pack_info:
-                per_lang_word_delimiter = self.metadata.language_pack_info["per_language_word_delimiters"]
+                per_lang_word_delimiters = self.metadata.language_pack_info["per_language_word_delimiters"]
 
         # Group results by speaker and process
         transcript_parts = []
@@ -765,8 +765,8 @@ def transcript_text(self) -> str:
             content = alternative.content
             speaker = alternative.speaker
             word_delimiter = default_word_delimiter
-            if alternative.language and alternative.language in per_lang_word_delimiter:
-                word_delimiter = per_lang_word_delimiter[alternative.language]
+            if alternative.language and alternative.language in per_lang_word_delimiters:
+                word_delimiter = per_lang_word_delimiters[alternative.language]
 
             # Handle speaker changes
             if speaker != current_speaker:

From c6c06e253ab931f74109491700e07b67dbe66d8c Mon Sep 17 00:00:00 2001
From: rakeshv <rakeshv@speechmatics.com>
Date: Wed, 6 May 2026 09:44:25 +0530
Subject: [PATCH 04/11] update docs string for language hints

---
 sdk/batch/speechmatics/batch/_models.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py
index f1691a7..9228bb8 100644
--- a/sdk/batch/speechmatics/batch/_models.py
+++ b/sdk/batch/speechmatics/batch/_models.py
@@ -101,8 +101,15 @@ class TranscriptionConfig:
             defaults to None.
         audio_filtering_config: Configuration for limiting the transcription of quiet audio.
             Defaults to None.
-        language_hints: Configuration for language hinting, applicable only for the next gen model.
-        language_hints_strict: Configuration for strict language hinting, applicable only for the next gen model.
+        language_hints: Configuration for the list of languages that are most likely to appear in your audio,
+            This improves accuracy by biasing recognition toward the specified languages.
+            Use ``language_hints_strict`` to control whether other languages can also be detected.
+            Applicable only for omni-v1 models (not yet available).
+        language_hints_strict: Configuration that controls how strictly language hints are applied.
+            When ``True``, the transcript will only contain languages specified in ``language_hints``.
+            When ``False``, recognition is biased toward the specified languages while still allowing other
+            languages to be detected if present.
+            Applicable only for omni-v1 models (not yet available).
     """
 
     language: str = "en"

From 5dfe6520387ea014f43c23baf751f918b0c30ab4 Mon Sep 17 00:00:00 2001
From: rakeshv <rakeshv@speechmatics.com>
Date: Wed, 6 May 2026 10:01:32 +0530
Subject: [PATCH 05/11] add constants

---
 sdk/batch/speechmatics/batch/_models.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py
index 9228bb8..fd51e9a 100644
--- a/sdk/batch/speechmatics/batch/_models.py
+++ b/sdk/batch/speechmatics/batch/_models.py
@@ -721,6 +721,9 @@ class Transcript:
         audio_event_summary: Optional audio event statistics.
     """
 
+    _LANG_PACK_WORD_DELIMITER_KEY = "word_delimiter"
+    _LANG_PACK_PER_LANG_DELIMITERS_KEY = "per_language_word_delimiters"
+
     format: str
     job: JobInfo
     metadata: RecognitionMetadata
@@ -753,11 +756,11 @@ def transcript_text(self) -> str:
         # Applicable only for the next gen models
         per_lang_word_delimiters: dict = {}
         if self.metadata and self.metadata.language_pack_info:
-            if "word_delimiter" in self.metadata.language_pack_info:
-                default_word_delimiter = self.metadata.language_pack_info["word_delimiter"]
+            if self._LANG_PACK_WORD_DELIMITER_KEY in self.metadata.language_pack_info:
+                default_word_delimiter = self.metadata.language_pack_info[self._LANG_PACK_WORD_DELIMITER_KEY]
 
-            if "per_language_word_delimiters" in self.metadata.language_pack_info:
-                per_lang_word_delimiters = self.metadata.language_pack_info["per_language_word_delimiters"]
+            if self._LANG_PACK_PER_LANG_DELIMITERS_KEY in self.metadata.language_pack_info:
+                per_lang_word_delimiters = self.metadata.language_pack_info[self._LANG_PACK_PER_LANG_DELIMITERS_KEY]
 
         # Group results by speaker and process
         transcript_parts = []

From 7bd422b75959b3fd81db3efb9a79cda4ee78144e Mon Sep 17 00:00:00 2001
From: rakeshv <rakeshv@speechmatics.com>
Date: Wed, 6 May 2026 10:01:40 +0530
Subject: [PATCH 06/11] update assertions

---
 tests/batch/test_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/batch/test_models.py b/tests/batch/test_models.py
index 55b38a4..bc1b35c 100644
--- a/tests/batch/test_models.py
+++ b/tests/batch/test_models.py
@@ -185,6 +185,6 @@ def test_absent_fields_are_none(self):
             "transcription_config": {"language": "en"},
         }
         job_config = JobConfig.from_dict(data)
-        assert job_config.transcription_config is not None
+        assert job_config.transcription_config
         assert job_config.transcription_config.language_hints is None
         assert job_config.transcription_config.language_hints_strict is None

From dbd87f0e4c5680fcfcbce4a5a6fbd1b64e407f83 Mon Sep 17 00:00:00 2001
From: rakeshv <rakeshv@speechmatics.com>
Date: Wed, 6 May 2026 10:07:31 +0530
Subject: [PATCH 07/11] update tests

---
 tests/batch/test_models.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/batch/test_models.py b/tests/batch/test_models.py
index bc1b35c..86403f3 100644
--- a/tests/batch/test_models.py
+++ b/tests/batch/test_models.py
@@ -134,15 +134,18 @@ def test_language_hints_serializes_correctly(self):
         config = TranscriptionConfig(language_hints=["en", "fr"])
         result = config.to_dict()
         assert result["language_hints"] == ["en", "fr"]
+        assert "language_hints_strict" not in result
 
     def test_language_hints_strict_true_serializes_correctly(self):
         config = TranscriptionConfig(language_hints=["en"], language_hints_strict=True)
         result = config.to_dict()
+        assert result["language_hints"] == ["en"]
         assert result["language_hints_strict"] is True
 
-    def test_language_hints_strict_false_included_in_output(self):
+    def test_language_hints_strict_false_serializes_correctly(self):
         config = TranscriptionConfig(language_hints=["en"], language_hints_strict=False)
         result = config.to_dict()
+        assert result["language_hints"] == ["en"]
         assert "language_hints_strict" in result
         assert result["language_hints_strict"] is False
 

From be2deba173977a95264238d80536d850b54eec7e Mon Sep 17 00:00:00 2001
From: rakeshv <rakeshv@speechmatics.com>
Date: Wed, 6 May 2026 10:13:31 +0530
Subject: [PATCH 08/11] comment on current_group structure

---
 sdk/batch/speechmatics/batch/_models.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py
index fd51e9a..8393b73 100644
--- a/sdk/batch/speechmatics/batch/_models.py
+++ b/sdk/batch/speechmatics/batch/_models.py
@@ -765,6 +765,8 @@ def transcript_text(self) -> str:
         # Group results by speaker and process
         transcript_parts = []
         current_speaker = None
+        # Each entry is (word, delimiter), where delimiter is looked up from per_language_word_delimiters
+        # using the word's language code, falling back to the default word delimiter.
         current_group: list[tuple[str, str]] = []
 
         for result in self.results:

From 0e1e57da72d51aa19631fa7c94470a74ece8e55f Mon Sep 17 00:00:00 2001
From: rakeshv <rakeshv@speechmatics.com>
Date: Wed, 6 May 2026 10:15:40 +0530
Subject: [PATCH 09/11] comment on current_group structure

---
 sdk/batch/speechmatics/batch/_models.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py
index 8393b73..9d9e3c2 100644
--- a/sdk/batch/speechmatics/batch/_models.py
+++ b/sdk/batch/speechmatics/batch/_models.py
@@ -767,6 +767,7 @@ def transcript_text(self) -> str:
         current_speaker = None
         # Each entry is (word, delimiter), where delimiter is looked up from per_language_word_delimiters
         # using the word's language code, falling back to the default word delimiter.
+        # For example, [("hello", " "), ("world", " ")]
         current_group: list[tuple[str, str]] = []
 
         for result in self.results:

From 2216a204fb78fb73fb532a42787a6561ebcc5bfb Mon Sep 17 00:00:00 2001
From: rakeshv <rakeshv@speechmatics.com>
Date: Wed, 6 May 2026 12:43:10 +0530
Subject: [PATCH 10/11] add model field which is an alias of operating point

---
 sdk/batch/speechmatics/batch/_models.py |  9 +++++++--
 tests/batch/test_models.py              | 15 ++++++++++++++-
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py
index 9d9e3c2..1603019 100644
--- a/sdk/batch/speechmatics/batch/_models.py
+++ b/sdk/batch/speechmatics/batch/_models.py
@@ -44,6 +44,8 @@ class OperatingPoint(str, Enum):
 
     ENHANCED = "enhanced"
     STANDARD = "standard"
+    # Not yet available for general use. Support for omni-v1 models is coming soon.
+    OMNI = "omni-v1"
 
 
 class NotificationContents(str, Enum):
@@ -104,16 +106,17 @@ class TranscriptionConfig:
         language_hints: Configuration for the list of languages that are most likely to appear in your audio,
             This improves accuracy by biasing recognition toward the specified languages.
             Use ``language_hints_strict`` to control whether other languages can also be detected.
-            Applicable only for omni-v1 models (not yet available).
+            Applicable only for omni-v1 models. Support for omni-v1 models is coming soon.
         language_hints_strict: Configuration that controls how strictly language hints are applied.
             When ``True``, the transcript will only contain languages specified in ``language_hints``.
             When ``False``, recognition is biased toward the specified languages while still allowing other
             languages to be detected if present.
-            Applicable only for omni-v1 models (not yet available).
+            Applicable only for omni-v1 models. Support for omni-v1 models is coming soon.
     """
 
     language: str = "en"
     operating_point: OperatingPoint = OperatingPoint.ENHANCED
+    model: Optional[OperatingPoint] = None
     output_locale: Optional[str] = None
     diarization: Optional[str] = None
     additional_vocab: Optional[list[dict[str, Any]]] = None
@@ -132,6 +135,8 @@ class TranscriptionConfig:
 
     def to_dict(self) -> dict[str, Any]:
         result: dict[str, Any] = {k: v for k, v in asdict(self).items() if v is not None}
+        if self.model:
+            result["operating_point"] = self.model
         if self.transcript_filtering_config is not None:
             result["transcript_filtering_config"] = self.transcript_filtering_config.to_dict()
         if self.audio_filtering_config is not None:
diff --git a/tests/batch/test_models.py b/tests/batch/test_models.py
index 86403f3..6983d61 100644
--- a/tests/batch/test_models.py
+++ b/tests/batch/test_models.py
@@ -1,4 +1,4 @@
-from speechmatics.batch._models import JobConfig, TranscriptFilteringConfig, TranscriptionConfig
+from speechmatics.batch._models import JobConfig, OperatingPoint, TranscriptFilteringConfig, TranscriptionConfig
 
 
 class TestTranscriptFilteringConfigToDict:
@@ -129,6 +129,19 @@ def test_absent_output_config_is_none(self):
         assert job_config.output_config is None
 
 
+class TestModelToDict:
+    def test_model_serializes_as_operating_point(self):
+        config = TranscriptionConfig(model=OperatingPoint.OMNI)
+        result = config.to_dict()
+        assert result["operating_point"] == OperatingPoint.OMNI
+
+    def test_model_absent_leaves_operating_point_unchanged(self):
+        config = TranscriptionConfig(operating_point=OperatingPoint.ENHANCED)
+        result = config.to_dict()
+        assert result["operating_point"] == OperatingPoint.ENHANCED
+        assert "model" not in result
+
+
 class TestLanguageHintsToDict:
     def test_language_hints_serializes_correctly(self):
         config = TranscriptionConfig(language_hints=["en", "fr"])

From 5fa2d39f52e79a1067260ad2c5a3783e618107ba Mon Sep 17 00:00:00 2001
From: rakeshv <rakeshv@speechmatics.com>
Date: Wed, 6 May 2026 12:52:27 +0530
Subject: [PATCH 11/11] model and op cannot coexist

---
 sdk/batch/speechmatics/batch/_models.py | 2 ++
 tests/batch/test_models.py              | 1 +
 2 files changed, 3 insertions(+)

diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py
index 1603019..1d14b32 100644
--- a/sdk/batch/speechmatics/batch/_models.py
+++ b/sdk/batch/speechmatics/batch/_models.py
@@ -136,7 +136,9 @@ class TranscriptionConfig:
     def to_dict(self) -> dict[str, Any]:
         result: dict[str, Any] = {k: v for k, v in asdict(self).items() if v is not None}
         if self.model:
+            # model is an alias for operating_point for omni-v1 models; they cannot coexist in the request.
             result["operating_point"] = self.model
+            result.pop("model")
         if self.transcript_filtering_config is not None:
             result["transcript_filtering_config"] = self.transcript_filtering_config.to_dict()
         if self.audio_filtering_config is not None:
diff --git a/tests/batch/test_models.py b/tests/batch/test_models.py
index 6983d61..743550b 100644
--- a/tests/batch/test_models.py
+++ b/tests/batch/test_models.py
@@ -134,6 +134,7 @@ def test_model_serializes_as_operating_point(self):
         config = TranscriptionConfig(model=OperatingPoint.OMNI)
         result = config.to_dict()
         assert result["operating_point"] == OperatingPoint.OMNI
+        assert "model" not in result
 
     def test_model_absent_leaves_operating_point_unchanged(self):
         config = TranscriptionConfig(operating_point=OperatingPoint.ENHANCED)