Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
bc8b401
feat: add optional headers support to WebSocket connection
sam-s10s Feb 11, 2026
33e693f
refactor: enhance end of turn penalty logic
sam-s10s Feb 23, 2026
301bcf4
refactor: enhance end of turn penalty logic
sam-s10s Feb 23, 2026
d9de589
Add Penalty when Smart Turn hasn't been run (#86)
LArmstrongDev Feb 25, 2026
3375c3d
Merge branch 'fix/smart-turn' of https://github.com/speechmatics/spee…
sam-s10s Feb 25, 2026
7a52b3f
test: add `test_no_feou_fix` for FEOU disabled
sam-s10s Mar 2, 2026
1443b33
feat: integrate config validation and improve presets
sam-s10s Mar 2, 2026
386f37b
fix: enforce use of forced end of utterance
sam-s10s Mar 3, 2026
155fceb
refactor: simplify EOU and VAD logic, improve readability
sam-s10s Mar 3, 2026
0b28473
refactor: remove forced end-of-utterance config from tests
sam-s10s Mar 3, 2026
31aa3ac
remove: Delete outdated conditional validation for 'use_forced_eou' i…
sam-s10s Mar 3, 2026
ca0f22f
fix: handle forced EOU more securely in turn management
sam-s10s Mar 3, 2026
ce88321
test: add tests for STT client header handling
sam-s10s Mar 3, 2026
95dda05
manually set FEOU to be disabled for the tests.
sam-s10s Mar 3, 2026
5ecc473
remove `ws_headers` as part of a different PR
sam-s10s Mar 3, 2026
e30cc5e
fix: correct logic for end of utterance handling
sam-s10s Mar 3, 2026
cd7de39
`refactor: extract config setup and ensure client disconnect`
sam-s10s Mar 3, 2026
7f03cc5
chore: pin speechmatics-rt dependency version for voice
sam-s10s Mar 5, 2026
81815ee
fix: support 8kHz audio in VAD and smart turn
sam-s10s Mar 10, 2026
0e56620
fix: only predict end of turn when speech ended
sam-s10s Mar 11, 2026
4182979
test: re-enable speaker focus test cases
sam-s10s Mar 11, 2026
5583174
test: use env var for RT URL and fix assertions
sam-s10s Mar 11, 2026
18b56f9
fix: remove unused turn extend delay and dead code
sam-s10s Mar 11, 2026
c27fcb1
Merge branch 'fix/websocket-headers' into feat/va-rel
sam-s10s Mar 11, 2026
58fa7d6
Merge branch 'fix/feou' into feat/va-rel
sam-s10s Mar 12, 2026
942d23c
chore: add uv source for speechmatics-rt dependency
sam-s10s Mar 12, 2026
d8ccb41
chore: remove uv source override for speechmatics-rt
sam-s10s Mar 12, 2026
5c7ab13
test: switch EOU/FEOU endpoint to eu production
sam-s10s Mar 13, 2026
8720ed4
Merge branch 'main' into fix/smart-turn
sam-s10s Mar 24, 2026
103cac4
Relax speechmatics-rt version pin to minimum
sam-s10s Mar 24, 2026
d365f7b
Merge branch 'main' into feat/va-rel
sam-s10s Apr 10, 2026
d94b34d
Update speechmatics-rt dependency to version 1.0.0 or higher
sam-s10s Apr 10, 2026
9d8e69c
Remove timestamp parameter from force_end_of_utterance call in smart …
sam-s10s Apr 10, 2026
04ab6ec
pad the timestamp
sam-s10s Jun 25, 2026
d52b42d
Merge branch 'main' into feat/va-rel
sam-s10s Jun 25, 2026
27acdca
feat(voice): add configurable padding for forced EOU timestamp
sam-s10s Jun 26, 2026
51f27c2
feat: add padding to FEOU timestamp
sam-s10s Jun 26, 2026
d47e4f3
use configured padding if not supplied
sam-s10s Jun 26, 2026
1932544
Support for `SPEECHMATICS_FEOU_PAD` override for uses where plugins h…
sam-s10s Jun 29, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions sdk/voice/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ authors = [{ name = "Speechmatics", email = "support@speechmatics.com" }]
license = "MIT"
requires-python = ">=3.9"
dependencies = [
"speechmatics-rt>=0.5.3",
"speechmatics-rt>=1.0.0",
"pydantic>=2.10.6,<3",
"numpy>=1.26.4,<3"
]
Expand Down Expand Up @@ -42,7 +42,7 @@ keywords = [
smart = [
"certifi>=2025.10.5",
"onnxruntime>=1.20.1,<2",
"transformers>=4.57.0,<5",
"transformers>=4.57.0,<6",
]
dev = [
"black",
Expand Down
37 changes: 28 additions & 9 deletions sdk/voice/speechmatics/voice/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,14 +717,14 @@ def update_diarization_config(self, config: SpeakerFocusConfig) -> None:
# PUBLIC UTTERANCE / TURN MANAGEMENT
# ============================================================================

def finalize(self, end_of_turn: bool = False) -> None:
def finalize(self, pad: float | None = None) -> None:
"""Finalize segments.

This function will emit segments in the buffer without any further checks
on the contents of the segments.

Args:
end_of_turn: Whether to emit an end of turn message.
pad: the number of seconds to pad the timestamp for the FEOU (optional)
"""

# Clear smart turn cutoff
Expand All @@ -738,8 +738,8 @@ async def emit() -> None:
"""Wait for EndOfUtterance if needed, then emit segments."""

# Forced end of utterance message (only when no speaker is detected)
if self._config.end_of_turn_config.use_forced_eou:
await self._await_forced_eou()
if self._uses_forced_eou:
await self._await_forced_eou(pad=pad)

# Check if the turn has changed
if self._turn_handler.handler_id != _turn_id:
Expand Down Expand Up @@ -1667,7 +1667,7 @@ async def _smart_turn_prediction(
# Return the prediction
return prediction

async def _await_forced_eou(self, timeout: float = 1.0) -> None:
async def _await_forced_eou(self, timeout: float = 1.0, pad: float | None = None) -> None:
"""Await the forced end of utterance."""

# Received EOU
Expand All @@ -1676,17 +1676,36 @@ async def _await_forced_eou(self, timeout: float = 1.0) -> None:
# Add listener
self.once(AgentServerMessageType.END_OF_UTTERANCE, lambda message: eou_received.set())

# Trigger EOU message
self._emit_diagnostic_message("ForceEndOfUtterance sent - waiting for EndOfUtterance")

# Wait for EOU
try:
# Track the start time
start_time = time.time()
self._forced_eou_active = True

# Timings
audio_sent = self.audio_seconds_sent

# Padding precedence: explicit `pad` arg > SPEECHMATICS_FEOU_PAD env var > config
if pad is not None:
padding = pad
else:
env_pad = os.environ.get("SPEECHMATICS_FEOU_PAD")
if env_pad is not None:
padding = float(env_pad)
else:
padding = self._config.end_of_turn_config.forced_eou_padding

# Establish amount of time to wait for EOU
timestamp: float = max(audio_sent + padding, 0.0)

# Info
info = {"audio_sent": audio_sent, "padding": padding, "timestamp": timestamp}

# Send the force EOU and wait for the response
await self.force_end_of_utterance()
await self.force_end_of_utterance(timestamp=timestamp)
self._emit_diagnostic_message(f"ForceEndOfUtterance sent - waiting for EndOfUtterance ({info})")

# Wait for the response
await asyncio.wait_for(eou_received.wait(), timeout=timeout)

# Record the latency
Expand Down
6 changes: 4 additions & 2 deletions sdk/voice/speechmatics/voice/_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ class SpeechSegmentConfig(BaseModel):

Parameters:
add_trailing_eos: Add trailing end of sentence to segments. When enabled, segments are
emitted with missing trailing end of sentence added. Defaults to False.
emitted with missing trailing end of sentence added. Defaults to True.

emit_sentences: Emit segments when a sentence has ended. A finalized segment is emitted
as soon as a finalized end of sentence is detected. If a speaker continues to speak during
Expand All @@ -384,7 +384,7 @@ class SpeechSegmentConfig(BaseModel):
Defaults to None.
"""

add_trailing_eos: bool = False
add_trailing_eos: bool = True
emit_sentences: bool = True
pause_mark: Optional[str] = None

Expand All @@ -411,6 +411,7 @@ class EndOfTurnConfig(BaseModel):
min_end_of_turn_delay: Minimum end of turn delay.
penalties: List of end of turn penalty items.
use_forced_eou: Whether to use forced end of utterance detection.
forced_eou_padding: the padding to use when sending ForceEndOfUtterance with timestamp
"""

base_multiplier: float = 1.0
Expand Down Expand Up @@ -439,6 +440,7 @@ class EndOfTurnConfig(BaseModel):
]
)
use_forced_eou: bool = False
forced_eou_padding: float = 0.2


class VoiceActivityConfig(BaseModel):
Expand Down
Loading