HumeAI · fern-api · May 18, 2026
diff --git a/.fern/metadata.json b/.fern/metadata.json
@@ -1,5 +1,5 @@
 {
-  "cliVersion": "4.62.5",
+  "cliVersion": "4.65.2",
   "generatorName": "fernapi/fern-python-sdk",
   "generatorVersion": "4.61.0",
   "generatorConfig": {
@@ -64,6 +64,6 @@
       }
     ]
   },
-  "originGitCommit": "a07a0deaa640e8b286f9c8e4e47426b37a083a67",
-  "sdkVersion": "0.13.11"
+  "originGitCommit": "4d3b0e268ae51b18618f73109de010b707efad88",
+  "sdkVersion": "0.13.12"
 }
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ dynamic = ["version"]
 
 [tool.poetry]
 name = "hume"
-version = "0.13.11"
+version = "0.13.12"
 description = "A Python SDK for Hume AI"
 readme = "README.md"
 authors = []

diff --git a/src/hume/core/client_wrapper.py b/src/hume/core/client_wrapper.py
@@ -28,12 +28,12 @@ def get_headers(self) -> typing.Dict[str, str]:
         import platform
 
         headers: typing.Dict[str, str] = {
-            "User-Agent": "hume/0.13.11",
+            "User-Agent": "hume/0.13.12",
             "X-Fern-Language": "Python",
             "X-Fern-Runtime": f"python/{platform.python_version()}",
             "X-Fern-Platform": f"{platform.system().lower()}/{platform.release()}",
             "X-Fern-SDK-Name": "hume",
-            "X-Fern-SDK-Version": "0.13.11",
+            "X-Fern-SDK-Version": "0.13.12",
             **(self.get_custom_headers() or {}),
         }
         if self.api_key is not None:

diff --git a/src/hume/tts/__init__.py b/src/hume/tts/__init__.py
@@ -9,6 +9,7 @@
     from .types import (
         AudioEncoding,
         AudioFormatType,
+        Context,
         ErrorResponse,
         Format,
         FormatMp3,
@@ -21,6 +22,7 @@
         PostedContextWithGenerationId,
         PostedContextWithUtterances,
         PostedTts,
+        PostedTtsStream,
         PostedUtterance,
         PostedUtteranceVoice,
         PostedUtteranceVoiceWithId,
@@ -46,6 +48,7 @@
     "AudioEncoding": ".types",
     "AudioFormatType": ".types",
     "BadRequestError": ".errors",
+    "Context": ".types",
     "ErrorResponse": ".types",
     "Format": ".types",
     "FormatMp3": ".types",
@@ -58,6 +61,7 @@
     "PostedContextWithGenerationId": ".types",
     "PostedContextWithUtterances": ".types",
     "PostedTts": ".types",
+    "PostedTtsStream": ".types",
     "PostedUtterance": ".types",
     "PostedUtteranceVoice": ".types",
     "PostedUtteranceVoiceWithId": ".types",
@@ -107,6 +111,7 @@ def __dir__():
     "AudioEncoding",
     "AudioFormatType",
     "BadRequestError",
+    "Context",
     "ErrorResponse",
     "Format",
     "FormatMp3",
@@ -119,6 +124,7 @@ def __dir__():
     "PostedContextWithGenerationId",
     "PostedContextWithUtterances",
     "PostedTts",
+    "PostedTtsStream",
     "PostedUtterance",
     "PostedUtteranceVoice",
     "PostedUtteranceVoiceWithId",

diff --git a/src/hume/tts/raw_client.py b/src/hume/tts/raw_client.py
diff --git a/src/hume/tts/stream_input/client.py b/src/hume/tts/stream_input/client.py
@@ -47,9 +47,9 @@ def connect(
         context_generation_id: typing.Optional[str] = None,
         format_type: typing.Optional[AudioFormatType] = None,
         include_timestamp_types: typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]] = None,
-        instant_mode: typing.Optional[bool] = None,
         no_binary: typing.Optional[bool] = None,
         strip_headers: typing.Optional[bool] = None,
+        temperature: typing.Optional[float] = None,
         version: typing.Optional[OctaveVersion] = None,
         api_key: typing.Optional[str] = None,
         request_options: typing.Optional[RequestOptions] = None,
@@ -75,15 +75,22 @@ def connect(
         include_timestamp_types : typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]]
             The set of timestamp types to include in the response. Only supported for Octave 2 requests.
 
-        instant_mode : typing.Optional[bool]
-            Enables ultra-low latency streaming, significantly reducing the time until the first audio chunk is received. Recommended for real-time applications requiring immediate audio playback. For further details, see our documentation on [instant mode](/docs/text-to-speech-tts/overview#ultra-low-latency-streaming-instant-mode).
-
         no_binary : typing.Optional[bool]
             If enabled, no binary websocket messages will be sent to the client.
 
         strip_headers : typing.Optional[bool]
             If enabled, the audio for all the chunks of a generation, once concatenated together, will constitute a single audio file. Otherwise, if disabled, each chunk's audio will be its own audio file, each with its own headers (if applicable).
 
+        temperature : typing.Optional[float]
+            Sampling temperature for the speech generation model. Higher values increase variation; lower values increase consistency.
+
+            **This is an experimental parameter.** It is recommended to use the default values for most use cases.
+
+            Defaults when omitted:
+            - Octave 1 voice creation (no voice specified): `0.9`
+            - Octave 1 text-to-speech: `0.8`
+            - Octave 2 text-to-speech: `0.75`
+
         version : typing.Optional[OctaveVersion]
             The version of the Octave Model to use. 1 for the legacy model, 2 for the new model.
 
@@ -108,9 +115,9 @@ def connect(
                         "context_generation_id": context_generation_id,
                         "format_type": format_type,
                         "include_timestamp_types": include_timestamp_types,
-                        "instant_mode": instant_mode,
                         "no_binary": no_binary,
                         "strip_headers": strip_headers,
+                        "temperature": temperature,
                         "version": version,
                         "api_key": api_key,
                         **(
@@ -168,9 +175,9 @@ async def connect(
         context_generation_id: typing.Optional[str] = None,
         format_type: typing.Optional[AudioFormatType] = None,
         include_timestamp_types: typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]] = None,
-        instant_mode: typing.Optional[bool] = None,
         no_binary: typing.Optional[bool] = None,
         strip_headers: typing.Optional[bool] = None,
+        temperature: typing.Optional[float] = None,
         version: typing.Optional[OctaveVersion] = None,
         api_key: typing.Optional[str] = None,
         request_options: typing.Optional[RequestOptions] = None,
@@ -196,15 +203,22 @@ async def connect(
         include_timestamp_types : typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]]
             The set of timestamp types to include in the response. Only supported for Octave 2 requests.
 
-        instant_mode : typing.Optional[bool]
-            Enables ultra-low latency streaming, significantly reducing the time until the first audio chunk is received. Recommended for real-time applications requiring immediate audio playback. For further details, see our documentation on [instant mode](/docs/text-to-speech-tts/overview#ultra-low-latency-streaming-instant-mode).
-
         no_binary : typing.Optional[bool]
             If enabled, no binary websocket messages will be sent to the client.
 
         strip_headers : typing.Optional[bool]
             If enabled, the audio for all the chunks of a generation, once concatenated together, will constitute a single audio file. Otherwise, if disabled, each chunk's audio will be its own audio file, each with its own headers (if applicable).
 
+        temperature : typing.Optional[float]
+            Sampling temperature for the speech generation model. Higher values increase variation; lower values increase consistency.
+
+            **This is an experimental parameter.** It is recommended to use the default values for most use cases.
+
+            Defaults when omitted:
+            - Octave 1 voice creation (no voice specified): `0.9`
+            - Octave 1 text-to-speech: `0.8`
+            - Octave 2 text-to-speech: `0.75`
+
         version : typing.Optional[OctaveVersion]
             The version of the Octave Model to use. 1 for the legacy model, 2 for the new model.
 
@@ -229,9 +243,9 @@ async def connect(
                         "context_generation_id": context_generation_id,
                         "format_type": format_type,
                         "include_timestamp_types": include_timestamp_types,
-                        "instant_mode": instant_mode,
                         "no_binary": no_binary,
                         "strip_headers": strip_headers,
+                        "temperature": temperature,
                         "version": version,
                         "api_key": api_key,
                         **(

diff --git a/src/hume/tts/stream_input/raw_client.py b/src/hume/tts/stream_input/raw_client.py
@@ -35,9 +35,9 @@ def connect(
         context_generation_id: typing.Optional[str] = None,
         format_type: typing.Optional[AudioFormatType] = None,
         include_timestamp_types: typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]] = None,
-        instant_mode: typing.Optional[bool] = None,
         no_binary: typing.Optional[bool] = None,
         strip_headers: typing.Optional[bool] = None,
+        temperature: typing.Optional[float] = None,
         version: typing.Optional[OctaveVersion] = None,
         api_key: typing.Optional[str] = None,
         request_options: typing.Optional[RequestOptions] = None,
@@ -63,15 +63,22 @@ def connect(
         include_timestamp_types : typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]]
             The set of timestamp types to include in the response. Only supported for Octave 2 requests.
 
-        instant_mode : typing.Optional[bool]
-            Enables ultra-low latency streaming, significantly reducing the time until the first audio chunk is received. Recommended for real-time applications requiring immediate audio playback. For further details, see our documentation on [instant mode](/docs/text-to-speech-tts/overview#ultra-low-latency-streaming-instant-mode).
-
         no_binary : typing.Optional[bool]
             If enabled, no binary websocket messages will be sent to the client.
 
         strip_headers : typing.Optional[bool]
             If enabled, the audio for all the chunks of a generation, once concatenated together, will constitute a single audio file. Otherwise, if disabled, each chunk's audio will be its own audio file, each with its own headers (if applicable).
 
+        temperature : typing.Optional[float]
+            Sampling temperature for the speech generation model. Higher values increase variation; lower values increase consistency.
+
+            **This is an experimental parameter.** It is recommended to use the default values for most use cases.
+
+            Defaults when omitted:
+            - Octave 1 voice creation (no voice specified): `0.9`
+            - Octave 1 text-to-speech: `0.8`
+            - Octave 2 text-to-speech: `0.75`
+
         version : typing.Optional[OctaveVersion]
             The version of the Octave Model to use. 1 for the legacy model, 2 for the new model.
 
@@ -96,9 +103,9 @@ def connect(
                         "context_generation_id": context_generation_id,
                         "format_type": format_type,
                         "include_timestamp_types": include_timestamp_types,
-                        "instant_mode": instant_mode,
                         "no_binary": no_binary,
                         "strip_headers": strip_headers,
+                        "temperature": temperature,
                         "version": version,
                         "api_key": api_key,
                         **(
@@ -145,9 +152,9 @@ async def connect(
         context_generation_id: typing.Optional[str] = None,
         format_type: typing.Optional[AudioFormatType] = None,
         include_timestamp_types: typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]] = None,
-        instant_mode: typing.Optional[bool] = None,
         no_binary: typing.Optional[bool] = None,
         strip_headers: typing.Optional[bool] = None,
+        temperature: typing.Optional[float] = None,
         version: typing.Optional[OctaveVersion] = None,
         api_key: typing.Optional[str] = None,
         request_options: typing.Optional[RequestOptions] = None,
@@ -173,15 +180,22 @@ async def connect(
         include_timestamp_types : typing.Optional[typing.Union[TimestampType, typing.Sequence[TimestampType]]]
             The set of timestamp types to include in the response. Only supported for Octave 2 requests.
 
-        instant_mode : typing.Optional[bool]
-            Enables ultra-low latency streaming, significantly reducing the time until the first audio chunk is received. Recommended for real-time applications requiring immediate audio playback. For further details, see our documentation on [instant mode](/docs/text-to-speech-tts/overview#ultra-low-latency-streaming-instant-mode).
-
         no_binary : typing.Optional[bool]
             If enabled, no binary websocket messages will be sent to the client.
 
         strip_headers : typing.Optional[bool]
             If enabled, the audio for all the chunks of a generation, once concatenated together, will constitute a single audio file. Otherwise, if disabled, each chunk's audio will be its own audio file, each with its own headers (if applicable).
 
+        temperature : typing.Optional[float]
+            Sampling temperature for the speech generation model. Higher values increase variation; lower values increase consistency.
+
+            **This is an experimental parameter.** It is recommended to use the default values for most use cases.
+
+            Defaults when omitted:
+            - Octave 1 voice creation (no voice specified): `0.9`
+            - Octave 1 text-to-speech: `0.8`
+            - Octave 2 text-to-speech: `0.75`
+
         version : typing.Optional[OctaveVersion]
             The version of the Octave Model to use. 1 for the legacy model, 2 for the new model.
 
@@ -206,9 +220,9 @@ async def connect(
                         "context_generation_id": context_generation_id,
                         "format_type": format_type,
                         "include_timestamp_types": include_timestamp_types,
-                        "instant_mode": instant_mode,
                         "no_binary": no_binary,
                         "strip_headers": strip_headers,
+                        "temperature": temperature,
                         "version": version,
                         "api_key": api_key,
                         **(

diff --git a/src/hume/tts/types/__init__.py b/src/hume/tts/types/__init__.py
@@ -8,6 +8,7 @@
 if typing.TYPE_CHECKING:
     from .audio_encoding import AudioEncoding
     from .audio_format_type import AudioFormatType
+    from .context import Context
     from .error_response import ErrorResponse
     from .format import Format
     from .format_mp_3 import FormatMp3
@@ -20,6 +21,7 @@
     from .posted_context_with_generation_id import PostedContextWithGenerationId
     from .posted_context_with_utterances import PostedContextWithUtterances
     from .posted_tts import PostedTts
+    from .posted_tts_stream import PostedTtsStream
     from .posted_utterance import PostedUtterance
     from .posted_utterance_voice import PostedUtteranceVoice
     from .posted_utterance_voice_with_id import PostedUtteranceVoiceWithId
@@ -41,6 +43,7 @@
 _dynamic_imports: typing.Dict[str, str] = {
     "AudioEncoding": ".audio_encoding",
     "AudioFormatType": ".audio_format_type",
+    "Context": ".context",
     "ErrorResponse": ".error_response",
     "Format": ".format",
     "FormatMp3": ".format_mp_3",
@@ -53,6 +56,7 @@
     "PostedContextWithGenerationId": ".posted_context_with_generation_id",
     "PostedContextWithUtterances": ".posted_context_with_utterances",
     "PostedTts": ".posted_tts",
+    "PostedTtsStream": ".posted_tts_stream",
     "PostedUtterance": ".posted_utterance",
     "PostedUtteranceVoice": ".posted_utterance_voice",
     "PostedUtteranceVoiceWithId": ".posted_utterance_voice_with_id",
@@ -98,6 +102,7 @@ def __dir__():
 __all__ = [
     "AudioEncoding",
     "AudioFormatType",
+    "Context",
     "ErrorResponse",
     "Format",
     "FormatMp3",
@@ -110,6 +115,7 @@ def __dir__():
     "PostedContextWithGenerationId",
     "PostedContextWithUtterances",
     "PostedTts",
+    "PostedTtsStream",
     "PostedUtterance",
     "PostedUtteranceVoice",
     "PostedUtteranceVoiceWithId",

diff --git a/src/hume/tts/types/context.py b/src/hume/tts/types/context.py
@@ -0,0 +1,8 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from .posted_context_with_generation_id import PostedContextWithGenerationId
+from .posted_context_with_utterances import PostedContextWithUtterances
+
+Context = typing.Union[PostedContextWithGenerationId, PostedContextWithUtterances]
diff --git a/src/hume/tts/types/posted_tts.py b/src/hume/tts/types/posted_tts.py
@@ -50,6 +50,18 @@ class PostedTts(UniversalBaseModel):
     If enabled, the audio for all the chunks of a generation, once concatenated together, will constitute a single audio file. Otherwise, if disabled, each chunk's audio will be its own audio file, each with its own headers (if applicable).
     """
 
+    temperature: typing.Optional[float] = pydantic.Field(default=None)
+    """
+    Sampling temperature for the speech generation model. Higher values increase variation; lower values increase consistency.
+
+    **This is an experimental parameter.** It is recommended to use the default values for most use cases.
+
+    Defaults when omitted:
+    - Octave 1 voice creation (no voice specified): `0.9`
+    - Octave 1 text-to-speech: `0.8`
+    - Octave 2 text-to-speech: `0.75`
+    """
+
     utterances: typing.List[PostedUtterance] = pydantic.Field()
     """
     A list of **Utterances** to be converted to speech output.
@@ -66,8 +78,6 @@ class PostedTts(UniversalBaseModel):
     For a comparison of Octave versions, see the [Octave versions](/docs/text-to-speech-tts/overview#octave-versions) section in the TTS overview.
     """
 
-    instant_mode: typing.Optional[bool] = None
-
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
     else: