From dcf75ec60c4bc2c8048af28863ec51c9584351f6 Mon Sep 17 00:00:00 2001 From: abonneth <206544678+abonneth@users.noreply.github.com> Date: Mon, 29 Jun 2026 09:00:09 +0000 Subject: [PATCH] chore(sdk): sync to agent_platform@3805647 (v1.0.1) --- openapi.json | 142 +++++++++--------- pyproject.toml | 2 +- src/hai_agents/__init__.py | 18 +++ src/hai_agents/core/client_wrapper.py | 4 +- src/hai_agents/environments/client.py | 92 +----------- src/hai_agents/environments/raw_client.py | 116 +++----------- .../types/patch_environment_mode.py | 5 +- src/hai_agents/types/__init__.py | 18 ++- src/hai_agents/types/browser.py | 17 +-- src/hai_agents/types/browser_mode.py | 49 +++++- src/hai_agents/types/browser_text_mode.py | 28 ++++ .../types/browser_text_mode_type.py | 5 + src/hai_agents/types/browser_visual_mode.py | 38 +++++ .../types/browser_visual_mode_type.py | 5 + uv.lock | 2 +- 15 files changed, 266 insertions(+), 275 deletions(-) create mode 100644 src/hai_agents/types/browser_text_mode.py create mode 100644 src/hai_agents/types/browser_text_mode_type.py create mode 100644 src/hai_agents/types/browser_visual_mode.py create mode 100644 src/hai_agents/types/browser_visual_mode_type.py diff --git a/openapi.json b/openapi.json index f73da24..68d7794 100644 --- a/openapi.json +++ b/openapi.json @@ -1,7 +1,7 @@ { "openapi": "3.1.0", "info": { - "title": "Agent API", + "title": "Computer-Use Agents", "version": "1.0.0" }, "servers": [ @@ -3817,20 +3817,6 @@ "title": "Kind", "default": "web" }, - "width": { - "type": "integer", - "exclusiveMinimum": 0.0, - "title": "Width", - "description": "Viewport width in pixels.", - "default": 1200 - }, - "height": { - "type": "integer", - "exclusiveMinimum": 0.0, - "title": "Height", - "description": "Viewport height in pixels.", - "default": 1200 - }, "start_url": { "type": "string", "title": "Start Url", @@ -3838,22 +3824,23 @@ "default": "https://www.bing.com" }, "mode": { - "type": "string", - "enum": [ - "visual", - "text", - "multimodal" + "oneOf": [ + { + "$ref": "#/components/schemas/BrowserVisualMode" + }, + { + "$ref": "#/components/schemas/BrowserTextMode" + } ], "title": "Mode", - "description": "How the agent perceives and drives the browser. 'visual': act on screenshots by viewport coordinates. 'multimodal': the same, with the page also included as markdown text alongside each screenshot. 'text': read-only markdown with URL navigation, no screenshots.", - "default": "visual" - }, - "page_chars": { - "type": "integer", - "exclusiveMinimum": 0.0, - "title": "Page Chars", - "description": "Characters of page text shown per page in 'text' mode.", - "default": 20000 + "description": "How the agent perceives and drives the browser.", + "discriminator": { + "propertyName": "type", + "mapping": { + "text": "#/components/schemas/BrowserTextMode", + "visual": "#/components/schemas/BrowserVisualMode" + } + } }, "vault_id": { "anyOf": [ @@ -4120,6 +4107,59 @@ "title": "BrowserProfileRead", "description": "Response model for a browser profile." }, + "BrowserTextMode": { + "properties": { + "type": { + "type": "string", + "const": "text", + "title": "Type", + "default": "text" + }, + "chunk_size": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Chunk Size", + "description": "Characters of page text shown per page.", + "default": 20000 + } + }, + "type": "object", + "title": "BrowserTextMode", + "description": "Read-only markdown with URL navigation, no screenshots." + }, + "BrowserVisualMode": { + "properties": { + "type": { + "type": "string", + "const": "visual", + "title": "Type", + "default": "visual" + }, + "width": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Width", + "description": "Viewport width in pixels.", + "default": 1200 + }, + "height": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Height", + "description": "Viewport height in pixels.", + "default": 1200 + }, + "markdown": { + "type": "boolean", + "title": "Markdown", + "description": "Also include the viewport's text as markdown alongside each screenshot.", + "default": false + } + }, + "type": "object", + "title": "BrowserVisualMode", + "description": "Act on screenshots by viewport coordinates." + }, "CreateWebhook": { "properties": { "url": { @@ -5077,30 +5117,6 @@ }, "PatchEnvironment": { "properties": { - "width": { - "anyOf": [ - { - "type": "integer", - "exclusiveMinimum": 0.0 - }, - { - "type": "null" - } - ], - "title": "Width" - }, - "height": { - "anyOf": [ - { - "type": "integer", - "exclusiveMinimum": 0.0 - }, - { - "type": "null" - } - ], - "title": "Height" - }, "start_url": { "anyOf": [ { @@ -5126,30 +5142,16 @@ "mode": { "anyOf": [ { - "type": "string", - "enum": [ - "visual", - "text", - "multimodal" - ] + "$ref": "#/components/schemas/BrowserVisualMode" }, { - "type": "null" - } - ], - "title": "Mode" - }, - "page_chars": { - "anyOf": [ - { - "type": "integer", - "exclusiveMinimum": 0.0 + "$ref": "#/components/schemas/BrowserTextMode" }, { "type": "null" } ], - "title": "Page Chars" + "title": "Mode" }, "vault_id": { "anyOf": [ diff --git a/pyproject.toml b/pyproject.toml index 61e2dd6..9201e6d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "hai-agents" -version = "1.0.0" +version = "1.0.1" description = "Python SDK for H Company's Computer-Use Agents: autonomous agents powered by Holo." requires-python = ">=3.10" readme = "README.md" diff --git a/src/hai_agents/__init__.py b/src/hai_agents/__init__.py index 9b5a4b3..06228dc 100644 --- a/src/hai_agents/__init__.py +++ b/src/hai_agents/__init__.py @@ -38,9 +38,15 @@ Browser, BrowserKind, BrowserMode, + BrowserMode_Text, + BrowserMode_Visual, BrowserNetwork, BrowserProfileList, BrowserProfileRead, + BrowserTextMode, + BrowserTextModeType, + BrowserVisualMode, + BrowserVisualModeType, Environment, EnvironmentKind, EnvironmentPage, @@ -202,9 +208,15 @@ "Browser": ".types", "BrowserKind": ".types", "BrowserMode": ".types", + "BrowserMode_Text": ".types", + "BrowserMode_Visual": ".types", "BrowserNetwork": ".types", "BrowserProfileList": ".types", "BrowserProfileRead": ".types", + "BrowserTextMode": ".types", + "BrowserTextModeType": ".types", + "BrowserVisualMode": ".types", + "BrowserVisualModeType": ".types", "Client": ".client", "DefaultAioHttpClient": "._default_clients", "DefaultAsyncHttpxClient": "._default_clients", @@ -396,9 +408,15 @@ def __dir__(): "Browser", "BrowserKind", "BrowserMode", + "BrowserMode_Text", + "BrowserMode_Visual", "BrowserNetwork", "BrowserProfileList", "BrowserProfileRead", + "BrowserTextMode", + "BrowserTextModeType", + "BrowserVisualMode", + "BrowserVisualModeType", "Client", "DefaultAioHttpClient", "DefaultAsyncHttpxClient", diff --git a/src/hai_agents/core/client_wrapper.py b/src/hai_agents/core/client_wrapper.py index 4db9121..b9c309b 100644 --- a/src/hai_agents/core/client_wrapper.py +++ b/src/hai_agents/core/client_wrapper.py @@ -29,9 +29,9 @@ def get_headers(self) -> typing.Dict[str, str]: import platform headers: typing.Dict[str, str] = { - "User-Agent": "hai_agents/1.0.0", + "User-Agent": "hai_agents/1.0.1", "X-HCompany-Client-Name": "hai_agents", - "X-HCompany-Client-Version": "1.0.0", + "X-HCompany-Client-Version": "1.0.1", "X-HCompany-Client-Type": "sdk", "X-HCompany-Language": "Python", "X-HCompany-Runtime": f"python/{platform.python_version()}", diff --git a/src/hai_agents/environments/client.py b/src/hai_agents/environments/client.py index 9978b5a..cface4d 100644 --- a/src/hai_agents/environments/client.py +++ b/src/hai_agents/environments/client.py @@ -95,11 +95,8 @@ def create_environment( *, id: str, kind: typing.Optional[BrowserKind] = OMIT, - width: typing.Optional[int] = OMIT, - height: typing.Optional[int] = OMIT, start_url: typing.Optional[str] = OMIT, mode: typing.Optional[BrowserMode] = OMIT, - page_chars: typing.Optional[int] = OMIT, vault_id: typing.Optional[str] = OMIT, browser_profile_id: typing.Optional[str] = OMIT, network: typing.Optional[BrowserNetwork] = OMIT, @@ -115,20 +112,11 @@ def create_environment( kind : typing.Optional[BrowserKind] - width : typing.Optional[int] - Viewport width in pixels. - - height : typing.Optional[int] - Viewport height in pixels. - start_url : typing.Optional[str] Initial URL to open. mode : typing.Optional[BrowserMode] - How the agent perceives and drives the browser. 'visual': act on screenshots by viewport coordinates. 'multimodal': the same, with the page also included as markdown text alongside each screenshot. 'text': read-only markdown with URL navigation, no screenshots. - - page_chars : typing.Optional[int] - Characters of page text shown per page in 'text' mode. + How the agent perceives and drives the browser. vault_id : typing.Optional[str] Id of a vault config to bind to this browser, letting the agent sign in to sites with secrets resolved from the vault. The vault must belong to the caller's organization. Omit to run without secret access. @@ -161,11 +149,8 @@ def create_environment( _response = self._raw_client.create_environment( id=id, kind=kind, - width=width, - height=height, start_url=start_url, mode=mode, - page_chars=page_chars, vault_id=vault_id, browser_profile_id=browser_profile_id, network=network, @@ -209,11 +194,8 @@ def update_environment( *, id: str, kind: typing.Optional[BrowserKind] = OMIT, - width: typing.Optional[int] = OMIT, - height: typing.Optional[int] = OMIT, start_url: typing.Optional[str] = OMIT, mode: typing.Optional[BrowserMode] = OMIT, - page_chars: typing.Optional[int] = OMIT, vault_id: typing.Optional[str] = OMIT, browser_profile_id: typing.Optional[str] = OMIT, network: typing.Optional[BrowserNetwork] = OMIT, @@ -231,20 +213,11 @@ def update_environment( kind : typing.Optional[BrowserKind] - width : typing.Optional[int] - Viewport width in pixels. - - height : typing.Optional[int] - Viewport height in pixels. - start_url : typing.Optional[str] Initial URL to open. mode : typing.Optional[BrowserMode] - How the agent perceives and drives the browser. 'visual': act on screenshots by viewport coordinates. 'multimodal': the same, with the page also included as markdown text alongside each screenshot. 'text': read-only markdown with URL navigation, no screenshots. - - page_chars : typing.Optional[int] - Characters of page text shown per page in 'text' mode. + How the agent perceives and drives the browser. vault_id : typing.Optional[str] Id of a vault config to bind to this browser, letting the agent sign in to sites with secrets resolved from the vault. The vault must belong to the caller's organization. Omit to run without secret access. @@ -279,11 +252,8 @@ def update_environment( id_, id=id, kind=kind, - width=width, - height=height, start_url=start_url, mode=mode, - page_chars=page_chars, vault_id=vault_id, browser_profile_id=browser_profile_id, network=network, @@ -324,12 +294,9 @@ def patch_environment( self, id: str, *, - width: typing.Optional[int] = OMIT, - height: typing.Optional[int] = OMIT, start_url: typing.Optional[str] = OMIT, session_id: typing.Optional[str] = OMIT, mode: typing.Optional[PatchEnvironmentMode] = OMIT, - page_chars: typing.Optional[int] = OMIT, vault_id: typing.Optional[str] = OMIT, browser_profile_id: typing.Optional[str] = OMIT, network: typing.Optional[BrowserNetwork] = OMIT, @@ -347,18 +314,12 @@ def patch_environment( ---------- id : str - width : typing.Optional[int] - - height : typing.Optional[int] - start_url : typing.Optional[str] session_id : typing.Optional[str] mode : typing.Optional[PatchEnvironmentMode] - page_chars : typing.Optional[int] - vault_id : typing.Optional[str] browser_profile_id : typing.Optional[str] @@ -396,12 +357,9 @@ def patch_environment( """ _response = self._raw_client.patch_environment( id, - width=width, - height=height, start_url=start_url, session_id=session_id, mode=mode, - page_chars=page_chars, vault_id=vault_id, browser_profile_id=browser_profile_id, network=network, @@ -499,11 +457,8 @@ async def create_environment( *, id: str, kind: typing.Optional[BrowserKind] = OMIT, - width: typing.Optional[int] = OMIT, - height: typing.Optional[int] = OMIT, start_url: typing.Optional[str] = OMIT, mode: typing.Optional[BrowserMode] = OMIT, - page_chars: typing.Optional[int] = OMIT, vault_id: typing.Optional[str] = OMIT, browser_profile_id: typing.Optional[str] = OMIT, network: typing.Optional[BrowserNetwork] = OMIT, @@ -519,20 +474,11 @@ async def create_environment( kind : typing.Optional[BrowserKind] - width : typing.Optional[int] - Viewport width in pixels. - - height : typing.Optional[int] - Viewport height in pixels. - start_url : typing.Optional[str] Initial URL to open. mode : typing.Optional[BrowserMode] - How the agent perceives and drives the browser. 'visual': act on screenshots by viewport coordinates. 'multimodal': the same, with the page also included as markdown text alongside each screenshot. 'text': read-only markdown with URL navigation, no screenshots. - - page_chars : typing.Optional[int] - Characters of page text shown per page in 'text' mode. + How the agent perceives and drives the browser. vault_id : typing.Optional[str] Id of a vault config to bind to this browser, letting the agent sign in to sites with secrets resolved from the vault. The vault must belong to the caller's organization. Omit to run without secret access. @@ -573,11 +519,8 @@ async def main() -> None: _response = await self._raw_client.create_environment( id=id, kind=kind, - width=width, - height=height, start_url=start_url, mode=mode, - page_chars=page_chars, vault_id=vault_id, browser_profile_id=browser_profile_id, network=network, @@ -629,11 +572,8 @@ async def update_environment( *, id: str, kind: typing.Optional[BrowserKind] = OMIT, - width: typing.Optional[int] = OMIT, - height: typing.Optional[int] = OMIT, start_url: typing.Optional[str] = OMIT, mode: typing.Optional[BrowserMode] = OMIT, - page_chars: typing.Optional[int] = OMIT, vault_id: typing.Optional[str] = OMIT, browser_profile_id: typing.Optional[str] = OMIT, network: typing.Optional[BrowserNetwork] = OMIT, @@ -651,20 +591,11 @@ async def update_environment( kind : typing.Optional[BrowserKind] - width : typing.Optional[int] - Viewport width in pixels. - - height : typing.Optional[int] - Viewport height in pixels. - start_url : typing.Optional[str] Initial URL to open. mode : typing.Optional[BrowserMode] - How the agent perceives and drives the browser. 'visual': act on screenshots by viewport coordinates. 'multimodal': the same, with the page also included as markdown text alongside each screenshot. 'text': read-only markdown with URL navigation, no screenshots. - - page_chars : typing.Optional[int] - Characters of page text shown per page in 'text' mode. + How the agent perceives and drives the browser. vault_id : typing.Optional[str] Id of a vault config to bind to this browser, letting the agent sign in to sites with secrets resolved from the vault. The vault must belong to the caller's organization. Omit to run without secret access. @@ -707,11 +638,8 @@ async def main() -> None: id_, id=id, kind=kind, - width=width, - height=height, start_url=start_url, mode=mode, - page_chars=page_chars, vault_id=vault_id, browser_profile_id=browser_profile_id, network=network, @@ -760,12 +688,9 @@ async def patch_environment( self, id: str, *, - width: typing.Optional[int] = OMIT, - height: typing.Optional[int] = OMIT, start_url: typing.Optional[str] = OMIT, session_id: typing.Optional[str] = OMIT, mode: typing.Optional[PatchEnvironmentMode] = OMIT, - page_chars: typing.Optional[int] = OMIT, vault_id: typing.Optional[str] = OMIT, browser_profile_id: typing.Optional[str] = OMIT, network: typing.Optional[BrowserNetwork] = OMIT, @@ -783,18 +708,12 @@ async def patch_environment( ---------- id : str - width : typing.Optional[int] - - height : typing.Optional[int] - start_url : typing.Optional[str] session_id : typing.Optional[str] mode : typing.Optional[PatchEnvironmentMode] - page_chars : typing.Optional[int] - vault_id : typing.Optional[str] browser_profile_id : typing.Optional[str] @@ -840,12 +759,9 @@ async def main() -> None: """ _response = await self._raw_client.patch_environment( id, - width=width, - height=height, start_url=start_url, session_id=session_id, mode=mode, - page_chars=page_chars, vault_id=vault_id, browser_profile_id=browser_profile_id, network=network, diff --git a/src/hai_agents/environments/raw_client.py b/src/hai_agents/environments/raw_client.py index 6e348d1..6d52b4f 100644 --- a/src/hai_agents/environments/raw_client.py +++ b/src/hai_agents/environments/raw_client.py @@ -122,11 +122,8 @@ def create_environment( *, id: str, kind: typing.Optional[BrowserKind] = OMIT, - width: typing.Optional[int] = OMIT, - height: typing.Optional[int] = OMIT, start_url: typing.Optional[str] = OMIT, mode: typing.Optional[BrowserMode] = OMIT, - page_chars: typing.Optional[int] = OMIT, vault_id: typing.Optional[str] = OMIT, browser_profile_id: typing.Optional[str] = OMIT, network: typing.Optional[BrowserNetwork] = OMIT, @@ -142,20 +139,11 @@ def create_environment( kind : typing.Optional[BrowserKind] - width : typing.Optional[int] - Viewport width in pixels. - - height : typing.Optional[int] - Viewport height in pixels. - start_url : typing.Optional[str] Initial URL to open. mode : typing.Optional[BrowserMode] - How the agent perceives and drives the browser. 'visual': act on screenshots by viewport coordinates. 'multimodal': the same, with the page also included as markdown text alongside each screenshot. 'text': read-only markdown with URL navigation, no screenshots. - - page_chars : typing.Optional[int] - Characters of page text shown per page in 'text' mode. + How the agent perceives and drives the browser. vault_id : typing.Optional[str] Id of a vault config to bind to this browser, letting the agent sign in to sites with secrets resolved from the vault. The vault must belong to the caller's organization. Omit to run without secret access. @@ -180,11 +168,10 @@ def create_environment( json={ "id": id, "kind": kind, - "width": width, - "height": height, "start_url": start_url, - "mode": mode, - "page_chars": page_chars, + "mode": convert_and_respect_annotation_metadata( + object_=mode, annotation=BrowserMode, direction="write" + ), "vault_id": vault_id, "browser_profile_id": browser_profile_id, "network": convert_and_respect_annotation_metadata( @@ -286,11 +273,8 @@ def update_environment( *, id: str, kind: typing.Optional[BrowserKind] = OMIT, - width: typing.Optional[int] = OMIT, - height: typing.Optional[int] = OMIT, start_url: typing.Optional[str] = OMIT, mode: typing.Optional[BrowserMode] = OMIT, - page_chars: typing.Optional[int] = OMIT, vault_id: typing.Optional[str] = OMIT, browser_profile_id: typing.Optional[str] = OMIT, network: typing.Optional[BrowserNetwork] = OMIT, @@ -308,20 +292,11 @@ def update_environment( kind : typing.Optional[BrowserKind] - width : typing.Optional[int] - Viewport width in pixels. - - height : typing.Optional[int] - Viewport height in pixels. - start_url : typing.Optional[str] Initial URL to open. mode : typing.Optional[BrowserMode] - How the agent perceives and drives the browser. 'visual': act on screenshots by viewport coordinates. 'multimodal': the same, with the page also included as markdown text alongside each screenshot. 'text': read-only markdown with URL navigation, no screenshots. - - page_chars : typing.Optional[int] - Characters of page text shown per page in 'text' mode. + How the agent perceives and drives the browser. vault_id : typing.Optional[str] Id of a vault config to bind to this browser, letting the agent sign in to sites with secrets resolved from the vault. The vault must belong to the caller's organization. Omit to run without secret access. @@ -346,11 +321,10 @@ def update_environment( json={ "id": id, "kind": kind, - "width": width, - "height": height, "start_url": start_url, - "mode": mode, - "page_chars": page_chars, + "mode": convert_and_respect_annotation_metadata( + object_=mode, annotation=BrowserMode, direction="write" + ), "vault_id": vault_id, "browser_profile_id": browser_profile_id, "network": convert_and_respect_annotation_metadata( @@ -442,12 +416,9 @@ def patch_environment( self, id: str, *, - width: typing.Optional[int] = OMIT, - height: typing.Optional[int] = OMIT, start_url: typing.Optional[str] = OMIT, session_id: typing.Optional[str] = OMIT, mode: typing.Optional[PatchEnvironmentMode] = OMIT, - page_chars: typing.Optional[int] = OMIT, vault_id: typing.Optional[str] = OMIT, browser_profile_id: typing.Optional[str] = OMIT, network: typing.Optional[BrowserNetwork] = OMIT, @@ -465,18 +436,12 @@ def patch_environment( ---------- id : str - width : typing.Optional[int] - - height : typing.Optional[int] - start_url : typing.Optional[str] session_id : typing.Optional[str] mode : typing.Optional[PatchEnvironmentMode] - page_chars : typing.Optional[int] - vault_id : typing.Optional[str] browser_profile_id : typing.Optional[str] @@ -505,12 +470,11 @@ def patch_environment( f"api/v2/environments/{encode_path_param(id)}", method="PATCH", json={ - "width": width, - "height": height, "start_url": start_url, "session_id": session_id, - "mode": mode, - "page_chars": page_chars, + "mode": convert_and_respect_annotation_metadata( + object_=mode, annotation=typing.Optional[PatchEnvironmentMode], direction="write" + ), "vault_id": vault_id, "browser_profile_id": browser_profile_id, "network": convert_and_respect_annotation_metadata( @@ -657,11 +621,8 @@ async def create_environment( *, id: str, kind: typing.Optional[BrowserKind] = OMIT, - width: typing.Optional[int] = OMIT, - height: typing.Optional[int] = OMIT, start_url: typing.Optional[str] = OMIT, mode: typing.Optional[BrowserMode] = OMIT, - page_chars: typing.Optional[int] = OMIT, vault_id: typing.Optional[str] = OMIT, browser_profile_id: typing.Optional[str] = OMIT, network: typing.Optional[BrowserNetwork] = OMIT, @@ -677,20 +638,11 @@ async def create_environment( kind : typing.Optional[BrowserKind] - width : typing.Optional[int] - Viewport width in pixels. - - height : typing.Optional[int] - Viewport height in pixels. - start_url : typing.Optional[str] Initial URL to open. mode : typing.Optional[BrowserMode] - How the agent perceives and drives the browser. 'visual': act on screenshots by viewport coordinates. 'multimodal': the same, with the page also included as markdown text alongside each screenshot. 'text': read-only markdown with URL navigation, no screenshots. - - page_chars : typing.Optional[int] - Characters of page text shown per page in 'text' mode. + How the agent perceives and drives the browser. vault_id : typing.Optional[str] Id of a vault config to bind to this browser, letting the agent sign in to sites with secrets resolved from the vault. The vault must belong to the caller's organization. Omit to run without secret access. @@ -715,11 +667,10 @@ async def create_environment( json={ "id": id, "kind": kind, - "width": width, - "height": height, "start_url": start_url, - "mode": mode, - "page_chars": page_chars, + "mode": convert_and_respect_annotation_metadata( + object_=mode, annotation=BrowserMode, direction="write" + ), "vault_id": vault_id, "browser_profile_id": browser_profile_id, "network": convert_and_respect_annotation_metadata( @@ -821,11 +772,8 @@ async def update_environment( *, id: str, kind: typing.Optional[BrowserKind] = OMIT, - width: typing.Optional[int] = OMIT, - height: typing.Optional[int] = OMIT, start_url: typing.Optional[str] = OMIT, mode: typing.Optional[BrowserMode] = OMIT, - page_chars: typing.Optional[int] = OMIT, vault_id: typing.Optional[str] = OMIT, browser_profile_id: typing.Optional[str] = OMIT, network: typing.Optional[BrowserNetwork] = OMIT, @@ -843,20 +791,11 @@ async def update_environment( kind : typing.Optional[BrowserKind] - width : typing.Optional[int] - Viewport width in pixels. - - height : typing.Optional[int] - Viewport height in pixels. - start_url : typing.Optional[str] Initial URL to open. mode : typing.Optional[BrowserMode] - How the agent perceives and drives the browser. 'visual': act on screenshots by viewport coordinates. 'multimodal': the same, with the page also included as markdown text alongside each screenshot. 'text': read-only markdown with URL navigation, no screenshots. - - page_chars : typing.Optional[int] - Characters of page text shown per page in 'text' mode. + How the agent perceives and drives the browser. vault_id : typing.Optional[str] Id of a vault config to bind to this browser, letting the agent sign in to sites with secrets resolved from the vault. The vault must belong to the caller's organization. Omit to run without secret access. @@ -881,11 +820,10 @@ async def update_environment( json={ "id": id, "kind": kind, - "width": width, - "height": height, "start_url": start_url, - "mode": mode, - "page_chars": page_chars, + "mode": convert_and_respect_annotation_metadata( + object_=mode, annotation=BrowserMode, direction="write" + ), "vault_id": vault_id, "browser_profile_id": browser_profile_id, "network": convert_and_respect_annotation_metadata( @@ -977,12 +915,9 @@ async def patch_environment( self, id: str, *, - width: typing.Optional[int] = OMIT, - height: typing.Optional[int] = OMIT, start_url: typing.Optional[str] = OMIT, session_id: typing.Optional[str] = OMIT, mode: typing.Optional[PatchEnvironmentMode] = OMIT, - page_chars: typing.Optional[int] = OMIT, vault_id: typing.Optional[str] = OMIT, browser_profile_id: typing.Optional[str] = OMIT, network: typing.Optional[BrowserNetwork] = OMIT, @@ -1000,18 +935,12 @@ async def patch_environment( ---------- id : str - width : typing.Optional[int] - - height : typing.Optional[int] - start_url : typing.Optional[str] session_id : typing.Optional[str] mode : typing.Optional[PatchEnvironmentMode] - page_chars : typing.Optional[int] - vault_id : typing.Optional[str] browser_profile_id : typing.Optional[str] @@ -1040,12 +969,11 @@ async def patch_environment( f"api/v2/environments/{encode_path_param(id)}", method="PATCH", json={ - "width": width, - "height": height, "start_url": start_url, "session_id": session_id, - "mode": mode, - "page_chars": page_chars, + "mode": convert_and_respect_annotation_metadata( + object_=mode, annotation=typing.Optional[PatchEnvironmentMode], direction="write" + ), "vault_id": vault_id, "browser_profile_id": browser_profile_id, "network": convert_and_respect_annotation_metadata( diff --git a/src/hai_agents/environments/types/patch_environment_mode.py b/src/hai_agents/environments/types/patch_environment_mode.py index 8bf0208..cb88f9d 100644 --- a/src/hai_agents/environments/types/patch_environment_mode.py +++ b/src/hai_agents/environments/types/patch_environment_mode.py @@ -2,4 +2,7 @@ import typing -PatchEnvironmentMode = typing.Union[typing.Literal["visual", "text", "multimodal"], typing.Any] +from ...types.browser_text_mode import BrowserTextMode +from ...types.browser_visual_mode import BrowserVisualMode + +PatchEnvironmentMode = typing.Union[BrowserVisualMode, BrowserTextMode] diff --git a/src/hai_agents/types/__init__.py b/src/hai_agents/types/__init__.py index 3187803..2e562f0 100644 --- a/src/hai_agents/types/__init__.py +++ b/src/hai_agents/types/__init__.py @@ -38,10 +38,14 @@ from .answer_event_answer import AnswerEventAnswer from .browser import Browser from .browser_kind import BrowserKind - from .browser_mode import BrowserMode + from .browser_mode import BrowserMode, BrowserMode_Text, BrowserMode_Visual from .browser_network import BrowserNetwork from .browser_profile_list import BrowserProfileList from .browser_profile_read import BrowserProfileRead + from .browser_text_mode import BrowserTextMode + from .browser_text_mode_type import BrowserTextModeType + from .browser_visual_mode import BrowserVisualMode + from .browser_visual_mode_type import BrowserVisualModeType from .environment import Environment from .environment_kind import EnvironmentKind from .environment_page import EnvironmentPage @@ -160,9 +164,15 @@ "Browser": ".browser", "BrowserKind": ".browser_kind", "BrowserMode": ".browser_mode", + "BrowserMode_Text": ".browser_mode", + "BrowserMode_Visual": ".browser_mode", "BrowserNetwork": ".browser_network", "BrowserProfileList": ".browser_profile_list", "BrowserProfileRead": ".browser_profile_read", + "BrowserTextMode": ".browser_text_mode", + "BrowserTextModeType": ".browser_text_mode_type", + "BrowserVisualMode": ".browser_visual_mode", + "BrowserVisualModeType": ".browser_visual_mode_type", "Environment": ".environment", "EnvironmentKind": ".environment_kind", "EnvironmentPage": ".environment_page", @@ -301,9 +311,15 @@ def __dir__(): "Browser", "BrowserKind", "BrowserMode", + "BrowserMode_Text", + "BrowserMode_Visual", "BrowserNetwork", "BrowserProfileList", "BrowserProfileRead", + "BrowserTextMode", + "BrowserTextModeType", + "BrowserVisualMode", + "BrowserVisualModeType", "Environment", "EnvironmentKind", "EnvironmentPage", diff --git a/src/hai_agents/types/browser.py b/src/hai_agents/types/browser.py index 0f854ac..b399c4f 100644 --- a/src/hai_agents/types/browser.py +++ b/src/hai_agents/types/browser.py @@ -20,16 +20,6 @@ class Browser(UniversalBaseModel): """ kind: typing.Optional[BrowserKind] = "web" - width: typing.Optional[int] = pydantic.Field(default=None) - """ - Viewport width in pixels. - """ - - height: typing.Optional[int] = pydantic.Field(default=None) - """ - Viewport height in pixels. - """ - start_url: typing.Optional[str] = pydantic.Field(default=None) """ Initial URL to open. @@ -37,12 +27,7 @@ class Browser(UniversalBaseModel): mode: typing.Optional[BrowserMode] = pydantic.Field(default=None) """ - How the agent perceives and drives the browser. 'visual': act on screenshots by viewport coordinates. 'multimodal': the same, with the page also included as markdown text alongside each screenshot. 'text': read-only markdown with URL navigation, no screenshots. - """ - - page_chars: typing.Optional[int] = pydantic.Field(default=None) - """ - Characters of page text shown per page in 'text' mode. + How the agent perceives and drives the browser. """ vault_id: typing.Optional[str] = pydantic.Field(default=None) diff --git a/src/hai_agents/types/browser_mode.py b/src/hai_agents/types/browser_mode.py index 6e54ce6..f70b0f8 100644 --- a/src/hai_agents/types/browser_mode.py +++ b/src/hai_agents/types/browser_mode.py @@ -1,5 +1,52 @@ # This file was auto-generated by Fern from our API Definition. +from __future__ import annotations + import typing -BrowserMode = typing.Union[typing.Literal["visual", "text", "multimodal"], typing.Any] +import pydantic +import typing_extensions +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel + + +class BrowserMode_Text(UniversalBaseModel): + """ + How the agent perceives and drives the browser. + """ + + type: typing.Literal["text"] = "text" + chunk_size: typing.Optional[int] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class BrowserMode_Visual(UniversalBaseModel): + """ + How the agent perceives and drives the browser. + """ + + type: typing.Literal["visual"] = "visual" + width: typing.Optional[int] = None + height: typing.Optional[int] = None + markdown: typing.Optional[bool] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +BrowserMode = typing_extensions.Annotated[ + typing.Union[BrowserMode_Text, BrowserMode_Visual], pydantic.Field(discriminator="type") +] diff --git a/src/hai_agents/types/browser_text_mode.py b/src/hai_agents/types/browser_text_mode.py new file mode 100644 index 0000000..aa9f29f --- /dev/null +++ b/src/hai_agents/types/browser_text_mode.py @@ -0,0 +1,28 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from .browser_text_mode_type import BrowserTextModeType + + +class BrowserTextMode(UniversalBaseModel): + """ + Read-only markdown with URL navigation, no screenshots. + """ + + type: typing.Optional[BrowserTextModeType] = "text" + chunk_size: typing.Optional[int] = pydantic.Field(default=None) + """ + Characters of page text shown per page. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/hai_agents/types/browser_text_mode_type.py b/src/hai_agents/types/browser_text_mode_type.py new file mode 100644 index 0000000..5b0eb10 --- /dev/null +++ b/src/hai_agents/types/browser_text_mode_type.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +BrowserTextModeType = typing.Union[typing.Literal["text"], typing.Any] diff --git a/src/hai_agents/types/browser_visual_mode.py b/src/hai_agents/types/browser_visual_mode.py new file mode 100644 index 0000000..615b80d --- /dev/null +++ b/src/hai_agents/types/browser_visual_mode.py @@ -0,0 +1,38 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from .browser_visual_mode_type import BrowserVisualModeType + + +class BrowserVisualMode(UniversalBaseModel): + """ + Act on screenshots by viewport coordinates. + """ + + type: typing.Optional[BrowserVisualModeType] = "visual" + width: typing.Optional[int] = pydantic.Field(default=None) + """ + Viewport width in pixels. + """ + + height: typing.Optional[int] = pydantic.Field(default=None) + """ + Viewport height in pixels. + """ + + markdown: typing.Optional[bool] = pydantic.Field(default=None) + """ + Also include the viewport's text as markdown alongside each screenshot. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/hai_agents/types/browser_visual_mode_type.py b/src/hai_agents/types/browser_visual_mode_type.py new file mode 100644 index 0000000..ec0dda9 --- /dev/null +++ b/src/hai_agents/types/browser_visual_mode_type.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +BrowserVisualModeType = typing.Union[typing.Literal["visual"], typing.Any] diff --git a/uv.lock b/uv.lock index cc933e8..7a6923d 100644 --- a/uv.lock +++ b/uv.lock @@ -88,7 +88,7 @@ wheels = [ [[package]] name = "hai-agents" -version = "1.0.0" +version = "1.0.1" source = { editable = "." } dependencies = [ { name = "httpx" },