Skip to content

Commit fd586cb

Browse files
feat(api): responses x eval api
1 parent f26c5fc commit fd586cb

24 files changed

+645
-1097
lines changed

.stats.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 101
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-794a6ed3c3d3d77887564755168056af8a426b17cf1ec721e3a300503dc22a41.yml
3-
openapi_spec_hash: 25a81c220713cd5b0bafc221d1dfa79a
4-
config_hash: 0b768ed1b56c6d82816f0fa40dc4aaf5
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-161ca7f1cfd7b33c1fc07d0ce25dfe4be5a7271c394f4cb526b7fb21b0729900.yml
3+
openapi_spec_hash: 602e14add4bee018c6774e320ce309b8
4+
config_hash: 7da27f7260075e8813ddcea542fba1bf

api.md

+2
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,7 @@ Types:
787787
```python
788788
from openai.types import (
789789
EvalCustomDataSourceConfig,
790+
EvalLogsDataSourceConfig,
790791
EvalStoredCompletionsDataSourceConfig,
791792
EvalCreateResponse,
792793
EvalRetrieveResponse,
@@ -812,6 +813,7 @@ Types:
812813
from openai.types.evals import (
813814
CreateEvalCompletionsRunDataSource,
814815
CreateEvalJSONLRunDataSource,
816+
CreateEvalResponsesRunDataSource,
815817
EvalAPIError,
816818
RunCreateResponse,
817819
RunRetrieveResponse,

src/openai/resources/audio/transcriptions.py

+93-1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def create(
5757
*,
5858
file: FileTypes,
5959
model: Union[str, AudioModel],
60+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
6061
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
6162
response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
6263
language: str | NotGiven = NOT_GIVEN,
@@ -118,6 +119,7 @@ def create(
118119
file: FileTypes,
119120
model: Union[str, AudioModel],
120121
stream: Literal[True],
122+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
121123
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
122124
language: str | NotGiven = NOT_GIVEN,
123125
prompt: str | NotGiven = NOT_GIVEN,
@@ -152,6 +154,11 @@ def create(
152154
153155
Note: Streaming is not supported for the `whisper-1` model and will be ignored.
154156
157+
chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
158+
first normalizes loudness and then uses voice activity detection (VAD) to choose
159+
boundaries. `server_vad` object can be provided to tweak VAD detection
160+
parameters manually. If unset, the audio is transcribed as a single block.
161+
155162
include: Additional information to include in the transcription response. `logprobs` will
156163
return the log probabilities of the tokens in the response to understand the
157164
model's confidence in the transcription. `logprobs` only works with
@@ -200,6 +207,7 @@ def create(
200207
file: FileTypes,
201208
model: Union[str, AudioModel],
202209
stream: bool,
210+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
203211
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
204212
language: str | NotGiven = NOT_GIVEN,
205213
prompt: str | NotGiven = NOT_GIVEN,
@@ -234,6 +242,11 @@ def create(
234242
235243
Note: Streaming is not supported for the `whisper-1` model and will be ignored.
236244
245+
chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
246+
first normalizes loudness and then uses voice activity detection (VAD) to choose
247+
boundaries. `server_vad` object can be provided to tweak VAD detection
248+
parameters manually. If unset, the audio is transcribed as a single block.
249+
237250
include: Additional information to include in the transcription response. `logprobs` will
238251
return the log probabilities of the tokens in the response to understand the
239252
model's confidence in the transcription. `logprobs` only works with
@@ -281,6 +294,7 @@ def create(
281294
*,
282295
file: FileTypes,
283296
model: Union[str, AudioModel],
297+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
284298
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
285299
language: str | NotGiven = NOT_GIVEN,
286300
prompt: str | NotGiven = NOT_GIVEN,
@@ -299,6 +313,7 @@ def create(
299313
{
300314
"file": file,
301315
"model": model,
316+
"chunking_strategy": chunking_strategy,
302317
"include": include,
303318
"language": language,
304319
"prompt": prompt,
@@ -357,6 +372,8 @@ async def create(
357372
*,
358373
file: FileTypes,
359374
model: Union[str, AudioModel],
375+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
376+
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
360377
response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
361378
language: str | NotGiven = NOT_GIVEN,
362379
prompt: str | NotGiven = NOT_GIVEN,
@@ -369,7 +386,68 @@ async def create(
369386
extra_query: Query | None = None,
370387
extra_body: Body | None = None,
371388
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
372-
) -> Transcription: ...
389+
) -> TranscriptionCreateResponse:
390+
"""
391+
Transcribes audio into the input language.
392+
393+
Args:
394+
file:
395+
The audio file object (not file name) to transcribe, in one of these formats:
396+
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
397+
398+
model: ID of the model to use. The options are `gpt-4o-transcribe`,
399+
`gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
400+
Whisper V2 model).
401+
402+
chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
403+
first normalizes loudness and then uses voice activity detection (VAD) to choose
404+
boundaries. `server_vad` object can be provided to tweak VAD detection
405+
parameters manually. If unset, the audio is transcribed as a single block.
406+
407+
include: Additional information to include in the transcription response. `logprobs` will
408+
return the log probabilities of the tokens in the response to understand the
409+
model's confidence in the transcription. `logprobs` only works with
410+
response_format set to `json` and only with the models `gpt-4o-transcribe` and
411+
`gpt-4o-mini-transcribe`.
412+
413+
language: The language of the input audio. Supplying the input language in
414+
[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
415+
format will improve accuracy and latency.
416+
417+
prompt: An optional text to guide the model's style or continue a previous audio
418+
segment. The
419+
[prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
420+
should match the audio language.
421+
422+
response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
423+
`verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
424+
the only supported format is `json`.
425+
426+
stream: If set to true, the model response data will be streamed to the client as it is
427+
generated using
428+
[server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
429+
See the
430+
[Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
431+
for more information.
432+
433+
Note: Streaming is not supported for the `whisper-1` model and will be ignored.
434+
435+
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
436+
output more random, while lower values like 0.2 will make it more focused and
437+
deterministic. If set to 0, the model will use
438+
[log probability](https://en.wikipedia.org/wiki/Log_probability) to
439+
automatically increase the temperature until certain thresholds are hit.
440+
441+
timestamp_granularities: The timestamp granularities to populate for this transcription.
442+
`response_format` must be set `verbose_json` to use timestamp granularities.
443+
Either or both of these options are supported: `word`, or `segment`. Note: There
444+
is no additional latency for segment timestamps, but generating word timestamps
445+
incurs additional latency.
446+
447+
extra_headers: Send extra headers
448+
449+
extra_query: Add additional query parameters to the request
450+
"""
373451

374452
@overload
375453
async def create(
@@ -418,6 +496,7 @@ async def create(
418496
file: FileTypes,
419497
model: Union[str, AudioModel],
420498
stream: Literal[True],
499+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
421500
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
422501
language: str | NotGiven = NOT_GIVEN,
423502
prompt: str | NotGiven = NOT_GIVEN,
@@ -452,6 +531,11 @@ async def create(
452531
453532
Note: Streaming is not supported for the `whisper-1` model and will be ignored.
454533
534+
chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
535+
first normalizes loudness and then uses voice activity detection (VAD) to choose
536+
boundaries. `server_vad` object can be provided to tweak VAD detection
537+
parameters manually. If unset, the audio is transcribed as a single block.
538+
455539
include: Additional information to include in the transcription response. `logprobs` will
456540
return the log probabilities of the tokens in the response to understand the
457541
model's confidence in the transcription. `logprobs` only works with
@@ -500,6 +584,7 @@ async def create(
500584
file: FileTypes,
501585
model: Union[str, AudioModel],
502586
stream: bool,
587+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
503588
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
504589
language: str | NotGiven = NOT_GIVEN,
505590
prompt: str | NotGiven = NOT_GIVEN,
@@ -534,6 +619,11 @@ async def create(
534619
535620
Note: Streaming is not supported for the `whisper-1` model and will be ignored.
536621
622+
chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
623+
first normalizes loudness and then uses voice activity detection (VAD) to choose
624+
boundaries. `server_vad` object can be provided to tweak VAD detection
625+
parameters manually. If unset, the audio is transcribed as a single block.
626+
537627
include: Additional information to include in the transcription response. `logprobs` will
538628
return the log probabilities of the tokens in the response to understand the
539629
model's confidence in the transcription. `logprobs` only works with
@@ -581,6 +671,7 @@ async def create(
581671
*,
582672
file: FileTypes,
583673
model: Union[str, AudioModel],
674+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
584675
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
585676
language: str | NotGiven = NOT_GIVEN,
586677
prompt: str | NotGiven = NOT_GIVEN,
@@ -599,6 +690,7 @@ async def create(
599690
{
600691
"file": file,
601692
"model": model,
693+
"chunking_strategy": chunking_strategy,
602694
"include": include,
603695
"language": language,
604696
"prompt": prompt,

src/openai/resources/embeddings.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,12 @@ def create(
6666
input: Input text to embed, encoded as a string or array of tokens. To embed multiple
6767
inputs in a single request, pass an array of strings or array of token arrays.
6868
The input must not exceed the max input tokens for the model (8192 tokens for
69-
`text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
69+
all embedding models), cannot be an empty string, and any array must be 2048
7070
dimensions or less.
7171
[Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
72-
for counting tokens. Some models may also impose a limit on total number of
73-
tokens summed across inputs.
72+
for counting tokens. In addition to the per-input token limit, all embedding
73+
models enforce a maximum of 300,000 tokens summed across all inputs in a single
74+
request.
7475
7576
model: ID of the model to use. You can use the
7677
[List models](https://platform.openai.com/docs/api-reference/models/list) API to
@@ -181,11 +182,12 @@ async def create(
181182
input: Input text to embed, encoded as a string or array of tokens. To embed multiple
182183
inputs in a single request, pass an array of strings or array of token arrays.
183184
The input must not exceed the max input tokens for the model (8192 tokens for
184-
`text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
185+
all embedding models), cannot be an empty string, and any array must be 2048
185186
dimensions or less.
186187
[Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
187-
for counting tokens. Some models may also impose a limit on total number of
188-
tokens summed across inputs.
188+
for counting tokens. In addition to the per-input token limit, all embedding
189+
models enforce a maximum of 300,000 tokens summed across all inputs in a single
190+
request.
189191
190192
model: ID of the model to use. You can use the
191193
[List models](https://platform.openai.com/docs/api-reference/models/list) API to

src/openai/types/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
from .vector_store_search_params import VectorStoreSearchParams as VectorStoreSearchParams
7171
from .vector_store_update_params import VectorStoreUpdateParams as VectorStoreUpdateParams
7272
from .moderation_text_input_param import ModerationTextInputParam as ModerationTextInputParam
73+
from .eval_logs_data_source_config import EvalLogsDataSourceConfig as EvalLogsDataSourceConfig
7374
from .file_chunking_strategy_param import FileChunkingStrategyParam as FileChunkingStrategyParam
7475
from .vector_store_search_response import VectorStoreSearchResponse as VectorStoreSearchResponse
7576
from .websocket_connection_options import WebsocketConnectionOptions as WebsocketConnectionOptions

src/openai/types/audio/transcription_create_params.py

+37-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from __future__ import annotations
44

55
from typing import List, Union, Optional
6-
from typing_extensions import Literal, Required, TypedDict
6+
from typing_extensions import Literal, Required, TypeAlias, TypedDict
77

88
from ..._types import FileTypes
99
from ..audio_model import AudioModel
@@ -12,6 +12,8 @@
1212

1313
__all__ = [
1414
"TranscriptionCreateParamsBase",
15+
"ChunkingStrategy",
16+
"ChunkingStrategyVadConfig",
1517
"TranscriptionCreateParamsNonStreaming",
1618
"TranscriptionCreateParamsStreaming",
1719
]
@@ -31,6 +33,15 @@ class TranscriptionCreateParamsBase(TypedDict, total=False):
3133
(which is powered by our open source Whisper V2 model).
3234
"""
3335

36+
chunking_strategy: Optional[ChunkingStrategy]
37+
"""Controls how the audio is cut into chunks.
38+
39+
When set to `"auto"`, the server first normalizes loudness and then uses voice
40+
activity detection (VAD) to choose boundaries. `server_vad` object can be
41+
provided to tweak VAD detection parameters manually. If unset, the audio is
42+
transcribed as a single block.
43+
"""
44+
3445
include: List[TranscriptionInclude]
3546
"""Additional information to include in the transcription response.
3647
@@ -82,6 +93,31 @@ class TranscriptionCreateParamsBase(TypedDict, total=False):
8293
"""
8394

8495

96+
class ChunkingStrategyVadConfig(TypedDict, total=False):
97+
type: Required[Literal["server_vad"]]
98+
"""Must be set to `server_vad` to enable manual chunking using server side VAD."""
99+
100+
prefix_padding_ms: int
101+
"""Amount of audio to include before the VAD detected speech (in milliseconds)."""
102+
103+
silence_duration_ms: int
104+
"""
105+
Duration of silence to detect speech stop (in milliseconds). With shorter values
106+
the model will respond more quickly, but may jump in on short pauses from the
107+
user.
108+
"""
109+
110+
threshold: float
111+
"""Sensitivity threshold (0.0 to 1.0) for voice activity detection.
112+
113+
A higher threshold will require louder audio to activate the model, and thus
114+
might perform better in noisy environments.
115+
"""
116+
117+
118+
ChunkingStrategy: TypeAlias = Union[Literal["auto"], ChunkingStrategyVadConfig]
119+
120+
85121
class TranscriptionCreateParamsNonStreaming(TranscriptionCreateParamsBase, total=False):
86122
stream: Optional[Literal[False]]
87123
"""

src/openai/types/embedding_create_params.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@ class EmbeddingCreateParams(TypedDict, total=False):
1616
1717
To embed multiple inputs in a single request, pass an array of strings or array
1818
of token arrays. The input must not exceed the max input tokens for the model
19-
(8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any
20-
array must be 2048 dimensions or less.
19+
(8192 tokens for all embedding models), cannot be an empty string, and any array
20+
must be 2048 dimensions or less.
2121
[Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
22-
for counting tokens. Some models may also impose a limit on total number of
23-
tokens summed across inputs.
22+
for counting tokens. In addition to the per-input token limit, all embedding
23+
models enforce a maximum of 300,000 tokens summed across all inputs in a single
24+
request.
2425
"""
2526

2627
model: Required[Union[str, EmbeddingModel]]

src/openai/types/eval_create_params.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"EvalCreateParams",
1717
"DataSourceConfig",
1818
"DataSourceConfigCustom",
19+
"DataSourceConfigLogs",
1920
"DataSourceConfigStoredCompletions",
2021
"TestingCriterion",
2122
"TestingCriterionLabelModel",
@@ -65,15 +66,23 @@ class DataSourceConfigCustom(TypedDict, total=False):
6566
"""
6667

6768

69+
class DataSourceConfigLogs(TypedDict, total=False):
70+
type: Required[Literal["logs"]]
71+
"""The type of data source. Always `logs`."""
72+
73+
metadata: Dict[str, object]
74+
"""Metadata filters for the logs data source."""
75+
76+
6877
class DataSourceConfigStoredCompletions(TypedDict, total=False):
69-
type: Required[Literal["stored_completions"]]
70-
"""The type of data source. Always `stored_completions`."""
78+
type: Required[Literal["stored-completions"]]
79+
"""The type of data source. Always `stored-completions`."""
7180

7281
metadata: Dict[str, object]
7382
"""Metadata filters for the stored completions data source."""
7483

7584

76-
DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigStoredCompletions]
85+
DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigLogs, DataSourceConfigStoredCompletions]
7786

7887

7988
class TestingCriterionLabelModelInputSimpleInputMessage(TypedDict, total=False):

src/openai/types/eval_create_response.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from .graders.label_model_grader import LabelModelGrader
1111
from .graders.score_model_grader import ScoreModelGrader
1212
from .graders.string_check_grader import StringCheckGrader
13+
from .eval_logs_data_source_config import EvalLogsDataSourceConfig
1314
from .eval_custom_data_source_config import EvalCustomDataSourceConfig
1415
from .graders.text_similarity_grader import TextSimilarityGrader
1516
from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
@@ -24,7 +25,8 @@
2425
]
2526

2627
DataSourceConfig: TypeAlias = Annotated[
27-
Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")
28+
Union[EvalCustomDataSourceConfig, EvalLogsDataSourceConfig, EvalStoredCompletionsDataSourceConfig],
29+
PropertyInfo(discriminator="type"),
2830
]
2931

3032

0 commit comments

Comments
 (0)