From 90ca133b7bc58c18c208e138db60702730ca18d2 Mon Sep 17 00:00:00 2001 From: rakeshv Date: Mon, 4 May 2026 11:12:53 +0530 Subject: [PATCH 01/11] Batch language hints support for the next gen model --- sdk/batch/speechmatics/batch/_models.py | 4 ++ tests/batch/test_models.py | 61 +++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index c375d34..8f1246b 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -101,6 +101,8 @@ class TranscriptionConfig: defaults to None. audio_filtering_config: Configuration for limiting the transcription of quiet audio. Defaults to None. + language_hints: Configuration for language hinting, applicable only for the next gen model. + language_hints_strict: Configuration for strict language hinting, applicable only for the next gen model. """ language: str = "en" @@ -118,6 +120,8 @@ class TranscriptionConfig: max_delay_mode: Optional[str] = None transcript_filtering_config: Optional[TranscriptFilteringConfig] = None audio_filtering_config: Optional[AudioFilteringConfig] = None + language_hints: Optional[list[str]] = None + language_hints_strict: Optional[bool] = None def to_dict(self) -> dict[str, Any]: result: dict[str, Any] = {k: v for k, v in asdict(self).items() if v is not None} diff --git a/tests/batch/test_models.py b/tests/batch/test_models.py index d262685..55b38a4 100644 --- a/tests/batch/test_models.py +++ b/tests/batch/test_models.py @@ -127,3 +127,64 @@ def test_absent_output_config_is_none(self): data = {"type": "transcription"} job_config = JobConfig.from_dict(data) assert job_config.output_config is None + + +class TestLanguageHintsToDict: + def test_language_hints_serializes_correctly(self): + config = TranscriptionConfig(language_hints=["en", "fr"]) + result = config.to_dict() + assert result["language_hints"] == ["en", "fr"] + + def test_language_hints_strict_true_serializes_correctly(self): + config = TranscriptionConfig(language_hints=["en"], language_hints_strict=True) + result = config.to_dict() + assert result["language_hints_strict"] is True + + def test_language_hints_strict_false_included_in_output(self): + config = TranscriptionConfig(language_hints=["en"], language_hints_strict=False) + result = config.to_dict() + assert "language_hints_strict" in result + assert result["language_hints_strict"] is False + + def test_language_hints_absent_when_none(self): + config = TranscriptionConfig() + result = config.to_dict() + assert "language_hints" not in result + assert "language_hints_strict" not in result + + +class TestLanguageHintsFromDict: + def test_language_hints_deserializes_correctly(self): + data = { + "type": "transcription", + "transcription_config": { + "language": "en", + "language_hints": ["en", "fr"], + }, + } + job_config = JobConfig.from_dict(data) + assert job_config.transcription_config is not None + assert job_config.transcription_config.language_hints == ["en", "fr"] + + def test_language_hints_strict_deserializes_correctly(self): + data = { + "type": "transcription", + "transcription_config": { + "language": "en", + "language_hints": ["en"], + "language_hints_strict": True, + }, + } + job_config = JobConfig.from_dict(data) + assert job_config.transcription_config is not None + assert job_config.transcription_config.language_hints_strict is True + + def test_absent_fields_are_none(self): + data = { + "type": "transcription", + "transcription_config": {"language": "en"}, + } + job_config = JobConfig.from_dict(data) + assert job_config.transcription_config is not None + assert job_config.transcription_config.language_hints is None + assert job_config.transcription_config.language_hints_strict is None From e5f5a748f781eef45a1c9e30be939358b376a660 Mon Sep 17 00:00:00 2001 From: rakeshv Date: Mon, 4 May 2026 12:09:27 +0530 Subject: [PATCH 02/11] language_pack_info update to support language hints in the transcript results --- sdk/batch/speechmatics/batch/_models.py | 32 +++++++++++++++---------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index 8f1246b..6143bc4 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -742,14 +742,20 @@ def transcript_text(self) -> str: return "" # Get language pack info for word delimiter - word_delimiter = " " # Default - if self.metadata and self.metadata.language_pack_info and "word_delimiter" in self.metadata.language_pack_info: - word_delimiter = self.metadata.language_pack_info["word_delimiter"] + default_word_delimiter = " " # Default + # Applicable only for the next gen models + per_lang_word_delimiter: dict = {} + if self.metadata and self.metadata.language_pack_info: + if "word_delimiter" in self.metadata.language_pack_info: + default_word_delimiter = self.metadata.language_pack_info["word_delimiter"] + + if "per_language_word_delimiters" in self.metadata.language_pack_info: + per_lang_word_delimiter = self.metadata.language_pack_info["per_language_word_delimiters"] # Group results by speaker and process transcript_parts = [] current_speaker = None - current_group: list[str] = [] + current_group: list[tuple[str, str]] = [] for result in self.results: if not result.alternatives: @@ -758,12 +764,15 @@ def transcript_text(self) -> str: alternative = result.alternatives[0] content = alternative.content speaker = alternative.speaker + word_delimiter = default_word_delimiter + if alternative.language and alternative.language in per_lang_word_delimiter: + word_delimiter = per_lang_word_delimiter[alternative.language] # Handle speaker changes if speaker != current_speaker: # Process accumulated group for previous speaker if current_group: - text = self._join_content_items(current_group, word_delimiter) + text = self._join_content_items(current_group) if current_speaker: transcript_parts.append(f"SPEAKER {current_speaker}: {text}") # type: ignore[unreachable] else: @@ -772,13 +781,13 @@ def transcript_text(self) -> str: current_speaker = speaker - # Add content to current group + # Add content to current group with its word delimiter if content: - current_group.append(content) + current_group.append((content, word_delimiter)) # Process final group if current_group: - text = self._join_content_items(current_group, word_delimiter) + text = self._join_content_items(current_group) if current_speaker: transcript_parts.append(f"SPEAKER {current_speaker}: {text}") else: @@ -786,13 +795,12 @@ def transcript_text(self) -> str: return "\n".join(transcript_parts) - def _join_content_items(self, content_items: list[str], word_delimiter: str) -> str: + def _join_content_items(self, content_items: list[tuple[str, str]]) -> str: """ Join content items with appropriate spacing and punctuation handling. Args: - content_items: List of content strings to join. - word_delimiter: Delimiter to use between words. + content_items: List of (content, word_delimiter) pairs to join. Returns: Properly formatted text string. @@ -802,7 +810,7 @@ def _join_content_items(self, content_items: list[str], word_delimiter: str) -> result: list[str] = [] - for i, content in enumerate(content_items): + for i, (content, word_delimiter) in enumerate(content_items): if not content: continue From f5de88e9f674d89bcfc45586ded40223ecc4e402 Mon Sep 17 00:00:00 2001 From: rakeshv Date: Wed, 6 May 2026 09:32:57 +0530 Subject: [PATCH 03/11] rename per_lang_word_delimiter to per_lang_word_delimiters --- sdk/batch/speechmatics/batch/_models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index 6143bc4..f1691a7 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -744,13 +744,13 @@ def transcript_text(self) -> str: # Get language pack info for word delimiter default_word_delimiter = " " # Default # Applicable only for the next gen models - per_lang_word_delimiter: dict = {} + per_lang_word_delimiters: dict = {} if self.metadata and self.metadata.language_pack_info: if "word_delimiter" in self.metadata.language_pack_info: default_word_delimiter = self.metadata.language_pack_info["word_delimiter"] if "per_language_word_delimiters" in self.metadata.language_pack_info: - per_lang_word_delimiter = self.metadata.language_pack_info["per_language_word_delimiters"] + per_lang_word_delimiters = self.metadata.language_pack_info["per_language_word_delimiters"] # Group results by speaker and process transcript_parts = [] @@ -765,8 +765,8 @@ def transcript_text(self) -> str: content = alternative.content speaker = alternative.speaker word_delimiter = default_word_delimiter - if alternative.language and alternative.language in per_lang_word_delimiter: - word_delimiter = per_lang_word_delimiter[alternative.language] + if alternative.language and alternative.language in per_lang_word_delimiters: + word_delimiter = per_lang_word_delimiters[alternative.language] # Handle speaker changes if speaker != current_speaker: From c6c06e253ab931f74109491700e07b67dbe66d8c Mon Sep 17 00:00:00 2001 From: rakeshv Date: Wed, 6 May 2026 09:44:25 +0530 Subject: [PATCH 04/11] update docs string for language hints --- sdk/batch/speechmatics/batch/_models.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index f1691a7..9228bb8 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -101,8 +101,15 @@ class TranscriptionConfig: defaults to None. audio_filtering_config: Configuration for limiting the transcription of quiet audio. Defaults to None. - language_hints: Configuration for language hinting, applicable only for the next gen model. - language_hints_strict: Configuration for strict language hinting, applicable only for the next gen model. + language_hints: Configuration for the list of languages that are most likely to appear in your audio, + This improves accuracy by biasing recognition toward the specified languages. + Use ``language_hints_strict`` to control whether other languages can also be detected. + Applicable only for omni-v1 models (not yet available). + language_hints_strict: Configuration that controls how strictly language hints are applied. + When ``True``, the transcript will only contain languages specified in ``language_hints``. + When ``False``, recognition is biased toward the specified languages while still allowing other + languages to be detected if present. + Applicable only for omni-v1 models (not yet available). """ language: str = "en" From 5dfe6520387ea014f43c23baf751f918b0c30ab4 Mon Sep 17 00:00:00 2001 From: rakeshv Date: Wed, 6 May 2026 10:01:32 +0530 Subject: [PATCH 05/11] add constants --- sdk/batch/speechmatics/batch/_models.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index 9228bb8..fd51e9a 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -721,6 +721,9 @@ class Transcript: audio_event_summary: Optional audio event statistics. """ + _LANG_PACK_WORD_DELIMITER_KEY = "word_delimiter" + _LANG_PACK_PER_LANG_DELIMITERS_KEY = "per_language_word_delimiters" + format: str job: JobInfo metadata: RecognitionMetadata @@ -753,11 +756,11 @@ def transcript_text(self) -> str: # Applicable only for the next gen models per_lang_word_delimiters: dict = {} if self.metadata and self.metadata.language_pack_info: - if "word_delimiter" in self.metadata.language_pack_info: - default_word_delimiter = self.metadata.language_pack_info["word_delimiter"] + if self._LANG_PACK_WORD_DELIMITER_KEY in self.metadata.language_pack_info: + default_word_delimiter = self.metadata.language_pack_info[self._LANG_PACK_WORD_DELIMITER_KEY] - if "per_language_word_delimiters" in self.metadata.language_pack_info: - per_lang_word_delimiters = self.metadata.language_pack_info["per_language_word_delimiters"] + if self._LANG_PACK_PER_LANG_DELIMITERS_KEY in self.metadata.language_pack_info: + per_lang_word_delimiters = self.metadata.language_pack_info[self._LANG_PACK_PER_LANG_DELIMITERS_KEY] # Group results by speaker and process transcript_parts = [] From 7bd422b75959b3fd81db3efb9a79cda4ee78144e Mon Sep 17 00:00:00 2001 From: rakeshv Date: Wed, 6 May 2026 10:01:40 +0530 Subject: [PATCH 06/11] update assertions --- tests/batch/test_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/batch/test_models.py b/tests/batch/test_models.py index 55b38a4..bc1b35c 100644 --- a/tests/batch/test_models.py +++ b/tests/batch/test_models.py @@ -185,6 +185,6 @@ def test_absent_fields_are_none(self): "transcription_config": {"language": "en"}, } job_config = JobConfig.from_dict(data) - assert job_config.transcription_config is not None + assert job_config.transcription_config assert job_config.transcription_config.language_hints is None assert job_config.transcription_config.language_hints_strict is None From dbd87f0e4c5680fcfcbce4a5a6fbd1b64e407f83 Mon Sep 17 00:00:00 2001 From: rakeshv Date: Wed, 6 May 2026 10:07:31 +0530 Subject: [PATCH 07/11] update tests --- tests/batch/test_models.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/batch/test_models.py b/tests/batch/test_models.py index bc1b35c..86403f3 100644 --- a/tests/batch/test_models.py +++ b/tests/batch/test_models.py @@ -134,15 +134,18 @@ def test_language_hints_serializes_correctly(self): config = TranscriptionConfig(language_hints=["en", "fr"]) result = config.to_dict() assert result["language_hints"] == ["en", "fr"] + assert "language_hints_strict" not in result def test_language_hints_strict_true_serializes_correctly(self): config = TranscriptionConfig(language_hints=["en"], language_hints_strict=True) result = config.to_dict() + assert result["language_hints"] == ["en"] assert result["language_hints_strict"] is True - def test_language_hints_strict_false_included_in_output(self): + def test_language_hints_strict_false_serializes_correctly(self): config = TranscriptionConfig(language_hints=["en"], language_hints_strict=False) result = config.to_dict() + assert result["language_hints"] == ["en"] assert "language_hints_strict" in result assert result["language_hints_strict"] is False From be2deba173977a95264238d80536d850b54eec7e Mon Sep 17 00:00:00 2001 From: rakeshv Date: Wed, 6 May 2026 10:13:31 +0530 Subject: [PATCH 08/11] comment on current_group structure --- sdk/batch/speechmatics/batch/_models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index fd51e9a..8393b73 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -765,6 +765,8 @@ def transcript_text(self) -> str: # Group results by speaker and process transcript_parts = [] current_speaker = None + # Each entry is (word, delimiter), where delimiter is looked up from per_language_word_delimiters + # using the word's language code, falling back to the default word delimiter. current_group: list[tuple[str, str]] = [] for result in self.results: From 0e1e57da72d51aa19631fa7c94470a74ece8e55f Mon Sep 17 00:00:00 2001 From: rakeshv Date: Wed, 6 May 2026 10:15:40 +0530 Subject: [PATCH 09/11] comment on current_group structure --- sdk/batch/speechmatics/batch/_models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index 8393b73..9d9e3c2 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -767,6 +767,7 @@ def transcript_text(self) -> str: current_speaker = None # Each entry is (word, delimiter), where delimiter is looked up from per_language_word_delimiters # using the word's language code, falling back to the default word delimiter. + # For example, [("hello", " "), ("world", " ")] current_group: list[tuple[str, str]] = [] for result in self.results: From 2216a204fb78fb73fb532a42787a6561ebcc5bfb Mon Sep 17 00:00:00 2001 From: rakeshv Date: Wed, 6 May 2026 12:43:10 +0530 Subject: [PATCH 10/11] add model field which is an alias of operating point --- sdk/batch/speechmatics/batch/_models.py | 9 +++++++-- tests/batch/test_models.py | 15 ++++++++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index 9d9e3c2..1603019 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -44,6 +44,8 @@ class OperatingPoint(str, Enum): ENHANCED = "enhanced" STANDARD = "standard" + # Not yet available for general use. Support for omni-v1 models is coming soon. + OMNI = "omni-v1" class NotificationContents(str, Enum): @@ -104,16 +106,17 @@ class TranscriptionConfig: language_hints: Configuration for the list of languages that are most likely to appear in your audio, This improves accuracy by biasing recognition toward the specified languages. Use ``language_hints_strict`` to control whether other languages can also be detected. - Applicable only for omni-v1 models (not yet available). + Applicable only for omni-v1 models. Support for omni-v1 models is coming soon. language_hints_strict: Configuration that controls how strictly language hints are applied. When ``True``, the transcript will only contain languages specified in ``language_hints``. When ``False``, recognition is biased toward the specified languages while still allowing other languages to be detected if present. - Applicable only for omni-v1 models (not yet available). + Applicable only for omni-v1 models. Support for omni-v1 models is coming soon. """ language: str = "en" operating_point: OperatingPoint = OperatingPoint.ENHANCED + model: Optional[OperatingPoint] = None output_locale: Optional[str] = None diarization: Optional[str] = None additional_vocab: Optional[list[dict[str, Any]]] = None @@ -132,6 +135,8 @@ class TranscriptionConfig: def to_dict(self) -> dict[str, Any]: result: dict[str, Any] = {k: v for k, v in asdict(self).items() if v is not None} + if self.model: + result["operating_point"] = self.model if self.transcript_filtering_config is not None: result["transcript_filtering_config"] = self.transcript_filtering_config.to_dict() if self.audio_filtering_config is not None: diff --git a/tests/batch/test_models.py b/tests/batch/test_models.py index 86403f3..6983d61 100644 --- a/tests/batch/test_models.py +++ b/tests/batch/test_models.py @@ -1,4 +1,4 @@ -from speechmatics.batch._models import JobConfig, TranscriptFilteringConfig, TranscriptionConfig +from speechmatics.batch._models import JobConfig, OperatingPoint, TranscriptFilteringConfig, TranscriptionConfig class TestTranscriptFilteringConfigToDict: @@ -129,6 +129,19 @@ def test_absent_output_config_is_none(self): assert job_config.output_config is None +class TestModelToDict: + def test_model_serializes_as_operating_point(self): + config = TranscriptionConfig(model=OperatingPoint.OMNI) + result = config.to_dict() + assert result["operating_point"] == OperatingPoint.OMNI + + def test_model_absent_leaves_operating_point_unchanged(self): + config = TranscriptionConfig(operating_point=OperatingPoint.ENHANCED) + result = config.to_dict() + assert result["operating_point"] == OperatingPoint.ENHANCED + assert "model" not in result + + class TestLanguageHintsToDict: def test_language_hints_serializes_correctly(self): config = TranscriptionConfig(language_hints=["en", "fr"]) From 5fa2d39f52e79a1067260ad2c5a3783e618107ba Mon Sep 17 00:00:00 2001 From: rakeshv Date: Wed, 6 May 2026 12:52:27 +0530 Subject: [PATCH 11/11] model and op cannot coexist --- sdk/batch/speechmatics/batch/_models.py | 2 ++ tests/batch/test_models.py | 1 + 2 files changed, 3 insertions(+) diff --git a/sdk/batch/speechmatics/batch/_models.py b/sdk/batch/speechmatics/batch/_models.py index 1603019..1d14b32 100644 --- a/sdk/batch/speechmatics/batch/_models.py +++ b/sdk/batch/speechmatics/batch/_models.py @@ -136,7 +136,9 @@ class TranscriptionConfig: def to_dict(self) -> dict[str, Any]: result: dict[str, Any] = {k: v for k, v in asdict(self).items() if v is not None} if self.model: + # model is an alias for operating_point for omni-v1 models; they cannot coexist in the request. result["operating_point"] = self.model + result.pop("model") if self.transcript_filtering_config is not None: result["transcript_filtering_config"] = self.transcript_filtering_config.to_dict() if self.audio_filtering_config is not None: diff --git a/tests/batch/test_models.py b/tests/batch/test_models.py index 6983d61..743550b 100644 --- a/tests/batch/test_models.py +++ b/tests/batch/test_models.py @@ -134,6 +134,7 @@ def test_model_serializes_as_operating_point(self): config = TranscriptionConfig(model=OperatingPoint.OMNI) result = config.to_dict() assert result["operating_point"] == OperatingPoint.OMNI + assert "model" not in result def test_model_absent_leaves_operating_point_unchanged(self): config = TranscriptionConfig(operating_point=OperatingPoint.ENHANCED)