Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .fern/metadata.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"cliVersion": "4.62.5",
"cliVersion": "4.65.2",
"generatorName": "fernapi/fern-python-sdk",
"generatorVersion": "4.61.0",
"generatorConfig": {
Expand Down Expand Up @@ -64,6 +64,6 @@
}
]
},
"originGitCommit": "a07a0deaa640e8b286f9c8e4e47426b37a083a67",
"sdkVersion": "0.13.11"
"originGitCommit": "4d3b0e268ae51b18618f73109de010b707efad88",
"sdkVersion": "0.13.12"
}
365 changes: 182 additions & 183 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ dynamic = ["version"]

[tool.poetry]
name = "hume"
version = "0.13.11"
version = "0.13.12"
description = "A Python SDK for Hume AI"
readme = "README.md"
authors = []
Expand Down
4 changes: 2 additions & 2 deletions src/hume/core/client_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ def get_headers(self) -> typing.Dict[str, str]:
import platform

headers: typing.Dict[str, str] = {
"User-Agent": "hume/0.13.11",
"User-Agent": "hume/0.13.12",
"X-Fern-Language": "Python",
"X-Fern-Runtime": f"python/{platform.python_version()}",
"X-Fern-Platform": f"{platform.system().lower()}/{platform.release()}",
"X-Fern-SDK-Name": "hume",
"X-Fern-SDK-Version": "0.13.11",
"X-Fern-SDK-Version": "0.13.12",
**(self.get_custom_headers() or {}),
}
if self.api_key is not None:
Expand Down
6 changes: 6 additions & 0 deletions src/hume/tts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .types import (
AudioEncoding,
AudioFormatType,
Context,
ErrorResponse,
Format,
FormatMp3,
Expand All @@ -21,6 +22,7 @@
PostedContextWithGenerationId,
PostedContextWithUtterances,
PostedTts,
PostedTtsStream,
PostedUtterance,
PostedUtteranceVoice,
PostedUtteranceVoiceWithId,
Expand All @@ -46,6 +48,7 @@
"AudioEncoding": ".types",
"AudioFormatType": ".types",
"BadRequestError": ".errors",
"Context": ".types",
"ErrorResponse": ".types",
"Format": ".types",
"FormatMp3": ".types",
Expand All @@ -58,6 +61,7 @@
"PostedContextWithGenerationId": ".types",
"PostedContextWithUtterances": ".types",
"PostedTts": ".types",
"PostedTtsStream": ".types",
"PostedUtterance": ".types",
"PostedUtteranceVoice": ".types",
"PostedUtteranceVoiceWithId": ".types",
Expand Down Expand Up @@ -107,6 +111,7 @@ def __dir__():
"AudioEncoding",
"AudioFormatType",
"BadRequestError",
"Context",
"ErrorResponse",
"Format",
"FormatMp3",
Expand All @@ -119,6 +124,7 @@ def __dir__():
"PostedContextWithGenerationId",
"PostedContextWithUtterances",
"PostedTts",
"PostedTtsStream",
"PostedUtterance",
"PostedUtteranceVoice",
"PostedUtteranceVoiceWithId",
Expand Down
153 changes: 109 additions & 44 deletions src/hume/tts/raw_client.py

Large diffs are not rendered by default.

34 changes: 24 additions & 10 deletions src/hume/tts/stream_input/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ def connect(
context_generation_id: typing.Optional[str] = None,
format_type: typing.Optional[AudioFormatType] = None,
include_timestamp_types: typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]] = None,
instant_mode: typing.Optional[bool] = None,
no_binary: typing.Optional[bool] = None,
strip_headers: typing.Optional[bool] = None,
temperature: typing.Optional[float] = None,
version: typing.Optional[OctaveVersion] = None,
api_key: typing.Optional[str] = None,
request_options: typing.Optional[RequestOptions] = None,
Expand All @@ -75,15 +75,22 @@ def connect(
include_timestamp_types : typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]]
The set of timestamp types to include in the response. Only supported for Octave 2 requests.

instant_mode : typing.Optional[bool]
Enables ultra-low latency streaming, significantly reducing the time until the first audio chunk is received. Recommended for real-time applications requiring immediate audio playback. For further details, see our documentation on [instant mode](/docs/text-to-speech-tts/overview#ultra-low-latency-streaming-instant-mode).

no_binary : typing.Optional[bool]
If enabled, no binary websocket messages will be sent to the client.

strip_headers : typing.Optional[bool]
If enabled, the audio for all the chunks of a generation, once concatenated together, will constitute a single audio file. Otherwise, if disabled, each chunk's audio will be its own audio file, each with its own headers (if applicable).

temperature : typing.Optional[float]
Sampling temperature for the speech generation model. Higher values increase variation; lower values increase consistency.

**This is an experimental parameter.** It is recommended to use the default values for most use cases.

Defaults when omitted:
- Octave 1 voice creation (no voice specified): `0.9`
- Octave 1 text-to-speech: `0.8`
- Octave 2 text-to-speech: `0.75`

version : typing.Optional[OctaveVersion]
The version of the Octave Model to use. 1 for the legacy model, 2 for the new model.

Expand All @@ -108,9 +115,9 @@ def connect(
"context_generation_id": context_generation_id,
"format_type": format_type,
"include_timestamp_types": include_timestamp_types,
"instant_mode": instant_mode,
"no_binary": no_binary,
"strip_headers": strip_headers,
"temperature": temperature,
"version": version,
"api_key": api_key,
**(
Expand Down Expand Up @@ -168,9 +175,9 @@ async def connect(
context_generation_id: typing.Optional[str] = None,
format_type: typing.Optional[AudioFormatType] = None,
include_timestamp_types: typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]] = None,
instant_mode: typing.Optional[bool] = None,
no_binary: typing.Optional[bool] = None,
strip_headers: typing.Optional[bool] = None,
temperature: typing.Optional[float] = None,
version: typing.Optional[OctaveVersion] = None,
api_key: typing.Optional[str] = None,
request_options: typing.Optional[RequestOptions] = None,
Expand All @@ -196,15 +203,22 @@ async def connect(
include_timestamp_types : typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]]
The set of timestamp types to include in the response. Only supported for Octave 2 requests.

instant_mode : typing.Optional[bool]
Enables ultra-low latency streaming, significantly reducing the time until the first audio chunk is received. Recommended for real-time applications requiring immediate audio playback. For further details, see our documentation on [instant mode](/docs/text-to-speech-tts/overview#ultra-low-latency-streaming-instant-mode).

no_binary : typing.Optional[bool]
If enabled, no binary websocket messages will be sent to the client.

strip_headers : typing.Optional[bool]
If enabled, the audio for all the chunks of a generation, once concatenated together, will constitute a single audio file. Otherwise, if disabled, each chunk's audio will be its own audio file, each with its own headers (if applicable).

temperature : typing.Optional[float]
Sampling temperature for the speech generation model. Higher values increase variation; lower values increase consistency.

**This is an experimental parameter.** It is recommended to use the default values for most use cases.

Defaults when omitted:
- Octave 1 voice creation (no voice specified): `0.9`
- Octave 1 text-to-speech: `0.8`
- Octave 2 text-to-speech: `0.75`

version : typing.Optional[OctaveVersion]
The version of the Octave Model to use. 1 for the legacy model, 2 for the new model.

Expand All @@ -229,9 +243,9 @@ async def connect(
"context_generation_id": context_generation_id,
"format_type": format_type,
"include_timestamp_types": include_timestamp_types,
"instant_mode": instant_mode,
"no_binary": no_binary,
"strip_headers": strip_headers,
"temperature": temperature,
"version": version,
"api_key": api_key,
**(
Expand Down
34 changes: 24 additions & 10 deletions src/hume/tts/stream_input/raw_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ def connect(
context_generation_id: typing.Optional[str] = None,
format_type: typing.Optional[AudioFormatType] = None,
include_timestamp_types: typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]] = None,
instant_mode: typing.Optional[bool] = None,
no_binary: typing.Optional[bool] = None,
strip_headers: typing.Optional[bool] = None,
temperature: typing.Optional[float] = None,
version: typing.Optional[OctaveVersion] = None,
api_key: typing.Optional[str] = None,
request_options: typing.Optional[RequestOptions] = None,
Expand All @@ -63,15 +63,22 @@ def connect(
include_timestamp_types : typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]]
The set of timestamp types to include in the response. Only supported for Octave 2 requests.

instant_mode : typing.Optional[bool]
Enables ultra-low latency streaming, significantly reducing the time until the first audio chunk is received. Recommended for real-time applications requiring immediate audio playback. For further details, see our documentation on [instant mode](/docs/text-to-speech-tts/overview#ultra-low-latency-streaming-instant-mode).

no_binary : typing.Optional[bool]
If enabled, no binary websocket messages will be sent to the client.

strip_headers : typing.Optional[bool]
If enabled, the audio for all the chunks of a generation, once concatenated together, will constitute a single audio file. Otherwise, if disabled, each chunk's audio will be its own audio file, each with its own headers (if applicable).

temperature : typing.Optional[float]
Sampling temperature for the speech generation model. Higher values increase variation; lower values increase consistency.

**This is an experimental parameter.** It is recommended to use the default values for most use cases.

Defaults when omitted:
- Octave 1 voice creation (no voice specified): `0.9`
- Octave 1 text-to-speech: `0.8`
- Octave 2 text-to-speech: `0.75`

version : typing.Optional[OctaveVersion]
The version of the Octave Model to use. 1 for the legacy model, 2 for the new model.

Expand All @@ -96,9 +103,9 @@ def connect(
"context_generation_id": context_generation_id,
"format_type": format_type,
"include_timestamp_types": include_timestamp_types,
"instant_mode": instant_mode,
"no_binary": no_binary,
"strip_headers": strip_headers,
"temperature": temperature,
"version": version,
"api_key": api_key,
**(
Expand Down Expand Up @@ -145,9 +152,9 @@ async def connect(
context_generation_id: typing.Optional[str] = None,
format_type: typing.Optional[AudioFormatType] = None,
include_timestamp_types: typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]] = None,
instant_mode: typing.Optional[bool] = None,
no_binary: typing.Optional[bool] = None,
strip_headers: typing.Optional[bool] = None,
temperature: typing.Optional[float] = None,
version: typing.Optional[OctaveVersion] = None,
api_key: typing.Optional[str] = None,
request_options: typing.Optional[RequestOptions] = None,
Expand All @@ -173,15 +180,22 @@ async def connect(
include_timestamp_types : typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]]
The set of timestamp types to include in the response. Only supported for Octave 2 requests.

instant_mode : typing.Optional[bool]
Enables ultra-low latency streaming, significantly reducing the time until the first audio chunk is received. Recommended for real-time applications requiring immediate audio playback. For further details, see our documentation on [instant mode](/docs/text-to-speech-tts/overview#ultra-low-latency-streaming-instant-mode).

no_binary : typing.Optional[bool]
If enabled, no binary websocket messages will be sent to the client.

strip_headers : typing.Optional[bool]
If enabled, the audio for all the chunks of a generation, once concatenated together, will constitute a single audio file. Otherwise, if disabled, each chunk's audio will be its own audio file, each with its own headers (if applicable).

temperature : typing.Optional[float]
Sampling temperature for the speech generation model. Higher values increase variation; lower values increase consistency.

**This is an experimental parameter.** It is recommended to use the default values for most use cases.

Defaults when omitted:
- Octave 1 voice creation (no voice specified): `0.9`
- Octave 1 text-to-speech: `0.8`
- Octave 2 text-to-speech: `0.75`

version : typing.Optional[OctaveVersion]
The version of the Octave Model to use. 1 for the legacy model, 2 for the new model.

Expand All @@ -206,9 +220,9 @@ async def connect(
"context_generation_id": context_generation_id,
"format_type": format_type,
"include_timestamp_types": include_timestamp_types,
"instant_mode": instant_mode,
"no_binary": no_binary,
"strip_headers": strip_headers,
"temperature": temperature,
"version": version,
"api_key": api_key,
**(
Expand Down
6 changes: 6 additions & 0 deletions src/hume/tts/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
if typing.TYPE_CHECKING:
from .audio_encoding import AudioEncoding
from .audio_format_type import AudioFormatType
from .context import Context
from .error_response import ErrorResponse
from .format import Format
from .format_mp_3 import FormatMp3
Expand All @@ -20,6 +21,7 @@
from .posted_context_with_generation_id import PostedContextWithGenerationId
from .posted_context_with_utterances import PostedContextWithUtterances
from .posted_tts import PostedTts
from .posted_tts_stream import PostedTtsStream
from .posted_utterance import PostedUtterance
from .posted_utterance_voice import PostedUtteranceVoice
from .posted_utterance_voice_with_id import PostedUtteranceVoiceWithId
Expand All @@ -41,6 +43,7 @@
_dynamic_imports: typing.Dict[str, str] = {
"AudioEncoding": ".audio_encoding",
"AudioFormatType": ".audio_format_type",
"Context": ".context",
"ErrorResponse": ".error_response",
"Format": ".format",
"FormatMp3": ".format_mp_3",
Expand All @@ -53,6 +56,7 @@
"PostedContextWithGenerationId": ".posted_context_with_generation_id",
"PostedContextWithUtterances": ".posted_context_with_utterances",
"PostedTts": ".posted_tts",
"PostedTtsStream": ".posted_tts_stream",
"PostedUtterance": ".posted_utterance",
"PostedUtteranceVoice": ".posted_utterance_voice",
"PostedUtteranceVoiceWithId": ".posted_utterance_voice_with_id",
Expand Down Expand Up @@ -98,6 +102,7 @@ def __dir__():
__all__ = [
"AudioEncoding",
"AudioFormatType",
"Context",
"ErrorResponse",
"Format",
"FormatMp3",
Expand All @@ -110,6 +115,7 @@ def __dir__():
"PostedContextWithGenerationId",
"PostedContextWithUtterances",
"PostedTts",
"PostedTtsStream",
"PostedUtterance",
"PostedUtteranceVoice",
"PostedUtteranceVoiceWithId",
Expand Down
8 changes: 8 additions & 0 deletions src/hume/tts/types/context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# This file was auto-generated by Fern from our API Definition.

import typing

from .posted_context_with_generation_id import PostedContextWithGenerationId
from .posted_context_with_utterances import PostedContextWithUtterances

Context = typing.Union[PostedContextWithGenerationId, PostedContextWithUtterances]
14 changes: 12 additions & 2 deletions src/hume/tts/types/posted_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,18 @@ class PostedTts(UniversalBaseModel):
If enabled, the audio for all the chunks of a generation, once concatenated together, will constitute a single audio file. Otherwise, if disabled, each chunk's audio will be its own audio file, each with its own headers (if applicable).
"""

temperature: typing.Optional[float] = pydantic.Field(default=None)
"""
Sampling temperature for the speech generation model. Higher values increase variation; lower values increase consistency.

**This is an experimental parameter.** It is recommended to use the default values for most use cases.

Defaults when omitted:
- Octave 1 voice creation (no voice specified): `0.9`
- Octave 1 text-to-speech: `0.8`
- Octave 2 text-to-speech: `0.75`
"""

utterances: typing.List[PostedUtterance] = pydantic.Field()
"""
A list of **Utterances** to be converted to speech output.
Expand All @@ -66,8 +78,6 @@ class PostedTts(UniversalBaseModel):
For a comparison of Octave versions, see the [Octave versions](/docs/text-to-speech-tts/overview#octave-versions) section in the TTS overview.
"""

instant_mode: typing.Optional[bool] = None

if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
Expand Down
Loading
Loading