From 0b2b41df3cdb94adde0c0614821e1ae44892e19c Mon Sep 17 00:00:00 2001
From: Pamela Fox <pamela.fox@gmail.com>
Date: Thu, 9 Apr 2026 22:07:43 +0000
Subject: [PATCH 1/8] Port to Responses API

---
 .devcontainer/Dockerfile                      |   4 +-
 .devcontainer/devcontainer.json               |   3 -
 .env.sample                                   |  15 +-
 .github/workflows/evaluate.yaml               |   1 -
 .vscode/settings.json                         |   3 +-
 AGENTS.md                                     |  16 ++
 azure.yaml                                    |   1 -
 evals/generate_ground_truth.py                |  77 +++---
 infra/main.bicep                              |   6 -
 infra/main.parameters.json                    |   6 +-
 src/backend/fastapi_app/__init__.py           |   8 +-
 src/backend/fastapi_app/api_models.py         |   3 +-
 src/backend/fastapi_app/dependencies.py       |  26 +-
 src/backend/fastapi_app/embeddings.py         |   6 +-
 src/backend/fastapi_app/openai_clients.py     |  72 ++---
 .../fastapi_app/prompts/query_fewshots.json   |   8 +-
 src/backend/fastapi_app/query_rewriter.py     |  99 ++++---
 src/backend/fastapi_app/rag_advanced.py       |  11 +-
 src/backend/fastapi_app/rag_base.py           |   1 -
 src/backend/fastapi_app/rag_simple.py         |  11 +-
 src/backend/pyproject.toml                    |   4 +-
 src/backend/requirements.txt                  |  24 +-
 tests/conftest.py                             | 245 ++++++++----------
 .../advanced_chat_flow_response.json          |   8 +-
 ...ced_chat_streaming_flow_response.jsonlines |   2 +-
 .../simple_chat_flow_response.json            |   4 +-
 ...le_chat_flow_message_history_response.json |   4 +-
 ...ple_chat_streaming_flow_response.jsonlines |   2 +-
 tests/test_dependencies.py                    |   4 +-
 tests/test_openai_clients.py                  |  50 +---
 30 files changed, 297 insertions(+), 427 deletions(-)

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 1e40ff4b..c82eca85 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -4,5 +4,5 @@ FROM mcr.microsoft.com/devcontainers/${IMAGE}
 ENV PYTHONUNBUFFERED 1
 
 RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
-    && apt-get -y install --no-install-recommends postgresql-client \
-     && apt-get clean -y && rm -rf /var/lib/apt/lists/*
\ No newline at end of file
+    && apt-get -y install --no-install-recommends postgresql-client zstd \
+     && apt-get clean -y && rm -rf /var/lib/apt/lists/*
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 4c9a9b6e..006422d3 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -29,16 +29,13 @@
 			"extensions": [
 				"ms-python.python",
 				"ms-python.vscode-pylance",
-				"ms-python.vscode-python-envs",
 				"charliermarsh.ruff",
 				"mtxr.sqltools",
 				"mtxr.sqltools-driver-pg",
 				"esbenp.prettier-vscode",
 				"mechatroner.rainbow-csv",
 				"ms-vscode.vscode-node-azure-pack",
-				"esbenp.prettier-vscode",
 				"twixes.pypi-assistant",
-				"ms-python.vscode-python-envs",
 				"teamsdevapp.vscode-ai-foundry",
 				"ms-windows-ai-studio.windows-ai-studio"
 			],
diff --git a/.env.sample b/.env.sample
index 44517fd1..6fee2d2f 100644
--- a/.env.sample
+++ b/.env.sample
@@ -5,16 +5,15 @@ POSTGRES_PASSWORD=postgres
 POSTGRES_DATABASE=postgres
 POSTGRES_SSL=disable
 
-# OPENAI_CHAT_HOST can be either azure, openai, ollama, or github:
+# OPENAI_CHAT_HOST can be either azure, openai, or ollama:
 OPENAI_CHAT_HOST=azure
-# OPENAI_EMBED_HOST can be either azure, openai, ollama, or github:
+# OPENAI_EMBED_HOST can be either azure, openai, or ollama:
 OPENAI_EMBED_HOST=azure
 # Needed for Azure:
 # You also need to `azd auth login` if running this locally
 AZURE_OPENAI_ENDPOINT=https://YOUR-AZURE-OPENAI-SERVICE-NAME.openai.azure.com
-AZURE_OPENAI_VERSION=2024-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-4o-mini
-AZURE_OPENAI_CHAT_MODEL=gpt-4o-mini
+AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-5.4
+AZURE_OPENAI_CHAT_MODEL=gpt-5.4
 AZURE_OPENAI_EMBED_DEPLOYMENT=text-embedding-3-large
 AZURE_OPENAI_EMBED_MODEL=text-embedding-3-large
 AZURE_OPENAI_EMBED_DIMENSIONS=1024
@@ -35,9 +34,3 @@ OLLAMA_ENDPOINT=http://host.docker.internal:11434/v1
 OLLAMA_CHAT_MODEL=llama3.1
 OLLAMA_EMBED_MODEL=nomic-embed-text
 OLLAMA_EMBEDDING_COLUMN=embedding_nomic
-# Needed for GitHub Models:
-GITHUB_TOKEN=YOUR-GITHUB-TOKEN
-GITHUB_MODEL=openai/gpt-4o
-GITHUB_EMBED_MODEL=openai/text-embedding-3-large
-GITHUB_EMBED_DIMENSIONS=1024
-GITHUB_EMBEDDING_COLUMN=embedding_3l
diff --git a/.github/workflows/evaluate.yaml b/.github/workflows/evaluate.yaml
index b5c77c4b..0a35af5f 100644
--- a/.github/workflows/evaluate.yaml
+++ b/.github/workflows/evaluate.yaml
@@ -34,7 +34,6 @@ jobs:
       OPENAI_CHAT_HOST: ${{ vars.OPENAI_CHAT_HOST }}
       OPENAI_EMBED_HOST: ${{ vars.OPENAI_EMBED_HOST }}
       AZURE_OPENAI_ENDPOINT: ${{ vars.AZURE_OPENAI_ENDPOINT }}
-      AZURE_OPENAI_VERSION: ${{ vars.AZURE_OPENAI_VERSION }}
       AZURE_OPENAI_CHAT_DEPLOYMENT: ${{ vars.AZURE_OPENAI_CHAT_DEPLOYMENT }}
       AZURE_OPENAI_CHAT_MODEL: ${{ vars.AZURE_OPENAI_CHAT_MODEL }}
       AZURE_OPENAI_EMBED_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EMBED_DEPLOYMENT }}
diff --git a/.vscode/settings.json b/.vscode/settings.json
index c9eb00cc..4d91d2fb 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -36,5 +36,6 @@
         "htmlcov": true,
         ".mypy_cache": true,
         ".coverage": true
-    }
+    },
+    "python-envs.defaultEnvManager": "ms-python.python:system"
 }
diff --git a/AGENTS.md b/AGENTS.md
index ae38cbf5..b41d2779 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -8,3 +8,19 @@ When adding new azd environment variables, update:
 1. infra/main.parameters.json : Add the new parameter with a Bicep-friendly variable name and map to the new environment variable
 1. infra/main.bicep: Add the new Bicep parameter at the top, and add it to the `webAppEnv` object
 1. .github/workflows/azure-dev.yml: Add the new environment variable under `env` section. If it's a @secure variable in main.bicep, it should come from `secrets`, otherwise from `vars`.
+
+## Upgrading Python packages
+
+1. Update the version constraint in src/backend/pyproject.toml
+
+2. Re-compile src/backend/requirements.txt from the src folder:
+
+    ```shell
+    uv pip compile pyproject.toml -o requirements.txt --python-version 3.10
+    ```
+
+3. Reinstall with:
+
+    ```shell
+    python -m pip install -r src/backend/requirements.txt
+    ```
diff --git a/azure.yaml b/azure.yaml
index 38c99b96..8a91e712 100644
--- a/azure.yaml
+++ b/azure.yaml
@@ -42,7 +42,6 @@ pipeline:
     - OPENAI_CHAT_HOST
     - OPENAI_EMBED_HOST
     - AZURE_OPENAI_ENDPOINT
-    - AZURE_OPENAI_VERSION
     - AZURE_OPENAI_CHAT_DEPLOYMENT
     - AZURE_OPENAI_CHAT_MODEL
     - AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION
diff --git a/evals/generate_ground_truth.py b/evals/generate_ground_truth.py
index 44410506..9f2b0fef 100644
--- a/evals/generate_ground_truth.py
+++ b/evals/generate_ground_truth.py
@@ -3,12 +3,10 @@
 import os
 from collections.abc import Generator
 from pathlib import Path
-from typing import Union
 
 from azure.identity import AzureDeveloperCliCredential, get_bearer_token_provider
 from dotenv_azd import load_azd_env
-from openai import AzureOpenAI, OpenAI
-from openai.types.chat import ChatCompletionToolParam
+from openai import OpenAI
 from sqlalchemy import create_engine, select
 from sqlalchemy.orm import Session
 
@@ -17,32 +15,30 @@
 logger = logging.getLogger("ragapp")
 
 
-def qa_pairs_tool(num_questions: int = 1) -> ChatCompletionToolParam:
+def qa_pairs_tool(num_questions: int = 1) -> dict:
     return {
         "type": "function",
-        "function": {
-            "name": "qa_pairs",
-            "description": "Send in question and answer pairs for a customer-facing chat app",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "qa_list": {
-                        "type": "array",
-                        "description": f"List of {num_questions} question and answer pairs",
-                        "items": {
-                            "type": "object",
-                            "properties": {
-                                "question": {"type": "string", "description": "The question text"},
-                                "answer": {"type": "string", "description": "The answer text"},
-                            },
-                            "required": ["question", "answer"],
+        "name": "qa_pairs",
+        "description": "Send in question and answer pairs for a customer-facing chat app",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "qa_list": {
+                    "type": "array",
+                    "description": f"List of {num_questions} question and answer pairs",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "question": {"type": "string", "description": "The question text"},
+                            "answer": {"type": "string", "description": "The answer text"},
                         },
-                        "minItems": num_questions,
-                        "maxItems": num_questions,
-                    }
-                },
-                "required": ["qa_list"],
+                        "required": ["question", "answer"],
+                    },
+                    "minItems": num_questions,
+                    "maxItems": num_questions,
+                }
             },
+            "required": ["qa_list"],
         },
     }
 
@@ -67,7 +63,6 @@ def source_retriever() -> Generator[str, None, None]:
         # for record in records:
         #    logger.info(f"Processing database record: {record.name}")
         #    yield f"## Product ID: [{record.id}]\n" + record.to_str_for_rag()
-        # await self.openai_chat_client.chat.completions.create(
 
 
 def source_to_text(source) -> str:
@@ -78,32 +73,29 @@ def answer_formatter(answer, source) -> str:
     return f"{answer} [{source['id']}]"
 
 
-def get_openai_client() -> tuple[Union[AzureOpenAI, OpenAI], str]:
+def get_openai_client() -> tuple[OpenAI, str]:
     """Return an OpenAI client based on the environment variables"""
-    openai_client: Union[AzureOpenAI, OpenAI]
+    openai_client: OpenAI
     OPENAI_CHAT_HOST = os.getenv("OPENAI_CHAT_HOST")
     if OPENAI_CHAT_HOST == "azure":
+        azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
         if api_key := os.getenv("AZURE_OPENAI_KEY"):
             logger.info("Using Azure OpenAI Service with API Key from AZURE_OPENAI_KEY")
-            openai_client = AzureOpenAI(
-                api_version=os.environ["AZURE_OPENAI_VERSION"],
-                azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+            openai_client = OpenAI(
+                base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/",
                 api_key=api_key,
             )
         else:
             logger.info("Using Azure OpenAI Service with Azure Developer CLI Credential")
             azure_credential = AzureDeveloperCliCredential(process_timeout=60, tenant_id=os.environ["AZURE_TENANT_ID"])
             token_provider = get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
-            openai_client = AzureOpenAI(
-                api_version=os.environ["AZURE_OPENAI_VERSION"],
-                azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
-                azure_ad_token_provider=token_provider,
+            openai_client = OpenAI(
+                base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/",
+                api_key=token_provider,
             )
         model = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"]
     elif OPENAI_CHAT_HOST == "ollama":
         raise NotImplementedError("Ollama is not supported. Switch to Azure or OpenAI.com")
-    elif OPENAI_CHAT_HOST == "github":
-        raise NotImplementedError("GitHub Models is not supported. Switch to Azure or OpenAI.com")
     else:
         logger.info("Using OpenAI Service with API Key from OPENAICOM_KEY")
         openai_client = OpenAI(api_key=os.environ["OPENAICOM_KEY"])
@@ -123,18 +115,21 @@ def generate_ground_truth_data(num_questions_total: int, num_questions_per_sourc
         if len(qa) > num_questions_total:
             logger.info("Generated enough questions already, stopping")
             break
-        result = openai_client.chat.completions.create(
+        result = openai_client.responses.create(
             model=model,
-            messages=[
+            input=[
                 {"role": "system", "content": generate_prompt},
                 {"role": "user", "content": json.dumps(source)},
             ],
             tools=[qa_pairs_tool(num_questions=2)],
+            max_output_tokens=1000,
+            store=False,
         )
-        if not result.choices[0].message.tool_calls:
+        tool_calls = [item for item in result.output if item.type == "function_call"]
+        if not tool_calls:
             logger.warning("No tool calls found in response, skipping")
             continue
-        qa_pairs = json.loads(result.choices[0].message.tool_calls[0].function.arguments)["qa_list"]
+        qa_pairs = json.loads(tool_calls[0].arguments)["qa_list"]
         qa_pairs = [{"question": qa_pair["question"], "truth": qa_pair["answer"]} for qa_pair in qa_pairs]
         qa.extend(qa_pairs)
 
diff --git a/infra/main.bicep b/infra/main.bicep
index b6e5d9a2..ea800759 100644
--- a/infra/main.bicep
+++ b/infra/main.bicep
@@ -131,7 +131,6 @@ param openAIEmbedHost string = 'azure'
 @secure()
 param openAIComKey string = ''
 
-param azureOpenAIAPIVersion string = '2024-03-01-preview'
 @secure()
 param azureOpenAIKey string = ''
 
@@ -385,10 +384,6 @@ var webAppEnv = union(azureOpenAIKeyEnv, openAIComKeyEnv, [
     name: 'AZURE_OPENAI_ENDPOINT'
     value: !empty(azureOpenAIEndpoint) ? azureOpenAIEndpoint : (deployAzureOpenAI ? openAI.outputs.endpoint : '')
   }
-  {
-    name: 'AZURE_OPENAI_VERSION'
-    value: openAIChatHost == 'azure' ? azureOpenAIAPIVersion : ''
-  }
 ])
 
 module web 'web.bicep' = {
@@ -613,7 +608,6 @@ output AZURE_OPENAI_RESOURCE_GROUP string = deployAzureOpenAI ? openAIResourceGr
 output AZURE_OPENAI_ENDPOINT string = !empty(azureOpenAIEndpoint)
   ? azureOpenAIEndpoint
   : (deployAzureOpenAI ? openAI.outputs.endpoint : '')
-output AZURE_OPENAI_VERSION string = azureOpenAIAPIVersion
 output AZURE_OPENAI_CHAT_DEPLOYMENT string = deployAzureOpenAI ? chatDeploymentName : ''
 output AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION string = deployAzureOpenAI ? chatDeploymentVersion : ''
 output AZURE_OPENAI_CHAT_DEPLOYMENT_CAPACITY int = deployAzureOpenAI ? chatDeploymentCapacity : 0
diff --git a/infra/main.parameters.json b/infra/main.parameters.json
index 3e16a351..85ac1817 100644
--- a/infra/main.parameters.json
+++ b/infra/main.parameters.json
@@ -33,13 +33,13 @@
         "value": "${OPENAI_CHAT_HOST=azure}"
       },
       "chatModelName": {
-        "value": "${AZURE_OPENAI_CHAT_MODEL=gpt-4o-mini}"
+        "value": "${AZURE_OPENAI_CHAT_MODEL=gpt-5.4}"
       },
       "chatDeploymentName": {
-        "value": "${AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-4o-mini}"
+        "value": "${AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-5.4}"
       },
       "chatDeploymentVersion":{
-        "value": "${AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION=2024-07-18}"
+        "value": "${AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION=2026-03-05}"
       },
       "chatDeploymentSku": {
         "value": "${AZURE_OPENAI_CHAT_DEPLOYMENT_SKU=GlobalStandard}"
diff --git a/src/backend/fastapi_app/__init__.py b/src/backend/fastapi_app/__init__.py
index b760fdb2..cf04614e 100644
--- a/src/backend/fastapi_app/__init__.py
+++ b/src/backend/fastapi_app/__init__.py
@@ -2,12 +2,12 @@
 import os
 from collections.abc import AsyncIterator
 from contextlib import asynccontextmanager
-from typing import TypedDict, Union
+from typing import TypedDict
 
 import fastapi
 from azure.monitor.opentelemetry import configure_azure_monitor
 from dotenv import load_dotenv
-from openai import AsyncAzureOpenAI, AsyncOpenAI
+from openai import AsyncOpenAI
 from opentelemetry.instrumentation.openai import OpenAIInstrumentor
 from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
@@ -27,8 +27,8 @@
 class State(TypedDict):
     sessionmaker: async_sessionmaker[AsyncSession]
     context: FastAPIAppContext
-    chat_client: Union[AsyncOpenAI, AsyncAzureOpenAI]
-    embed_client: Union[AsyncOpenAI, AsyncAzureOpenAI]
+    chat_client: AsyncOpenAI
+    embed_client: AsyncOpenAI
 
 
 @asynccontextmanager
diff --git a/src/backend/fastapi_app/api_models.py b/src/backend/fastapi_app/api_models.py
index 06d14a6b..7fb1b8d3 100644
--- a/src/backend/fastapi_app/api_models.py
+++ b/src/backend/fastapi_app/api_models.py
@@ -24,11 +24,10 @@ class RetrievalMode(str, Enum):
 
 class ChatRequestOverrides(BaseModel):
     top: int = 3
-    temperature: float = 0.3
+    temperature: float = 1
     retrieval_mode: RetrievalMode = RetrievalMode.HYBRID
     use_advanced_flow: bool = True
     prompt_template: Optional[str] = None
-    seed: Optional[int] = None
 
 
 class ChatRequestContext(BaseModel):
diff --git a/src/backend/fastapi_app/dependencies.py b/src/backend/fastapi_app/dependencies.py
index 2715819e..7b60bef6 100644
--- a/src/backend/fastapi_app/dependencies.py
+++ b/src/backend/fastapi_app/dependencies.py
@@ -1,11 +1,11 @@
 import logging
 import os
 from collections.abc import AsyncGenerator
-from typing import Annotated, Optional, Union
+from typing import Annotated, Optional
 
 import azure.identity
 from fastapi import Depends, Request
-from openai import AsyncAzureOpenAI, AsyncOpenAI
+from openai import AsyncOpenAI
 from pydantic import BaseModel
 from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker
 
@@ -17,7 +17,7 @@ class OpenAIClient(BaseModel):
     OpenAI client
     """
 
-    client: Union[AsyncOpenAI, AsyncAzureOpenAI]
+    client: AsyncOpenAI
     model_config = {"arbitrary_types_allowed": True}
 
 
@@ -51,26 +51,18 @@ async def common_parameters():
         openai_embed_model = os.getenv("OLLAMA_EMBED_MODEL") or "nomic-embed-text"
         openai_embed_dimensions = None
         embedding_column = os.getenv("OLLAMA_EMBEDDING_COLUMN") or "embedding_nomic"
-    elif OPENAI_EMBED_HOST == "github":
-        openai_embed_deployment = None
-        openai_embed_model = os.getenv("GITHUB_EMBED_MODEL") or "openai/text-embedding-3-large"
-        openai_embed_dimensions = int(os.getenv("GITHUB_EMBED_DIMENSIONS", 1024))
-        embedding_column = os.getenv("GITHUB_EMBEDDING_COLUMN") or "embedding_3l"
     else:
         openai_embed_deployment = None
         openai_embed_model = os.getenv("OPENAICOM_EMBED_MODEL") or "text-embedding-3-large"
         openai_embed_dimensions = int(os.getenv("OPENAICOM_EMBED_DIMENSIONS", 1024))
         embedding_column = os.getenv("OPENAICOM_EMBEDDING_COLUMN") or "embedding_3l"
     if OPENAI_CHAT_HOST == "azure":
-        openai_chat_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT") or "gpt-4o-mini"
-        openai_chat_model = os.getenv("AZURE_OPENAI_CHAT_MODEL") or "gpt-4o-mini"
+        openai_chat_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT") or "gpt-5.4"
+        openai_chat_model = os.getenv("AZURE_OPENAI_CHAT_MODEL") or "gpt-5.4"
     elif OPENAI_CHAT_HOST == "ollama":
         openai_chat_deployment = None
         openai_chat_model = os.getenv("OLLAMA_CHAT_MODEL") or "phi3:3.8b"
         openai_embed_model = os.getenv("OLLAMA_EMBED_MODEL") or "nomic-embed-text"
-    elif OPENAI_CHAT_HOST == "github":
-        openai_chat_deployment = None
-        openai_chat_model = os.getenv("GITHUB_MODEL") or "openai/gpt-4o"
     else:
         openai_chat_deployment = None
         openai_chat_model = os.getenv("OPENAICOM_CHAT_MODEL") or "gpt-3.5-turbo"
@@ -84,10 +76,10 @@ async def common_parameters():
     )
 
 
-async def get_azure_credential() -> Union[
-    azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential
-]:
-    azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential]
+async def get_azure_credential() -> (
+    azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential
+):
+    azure_credential: azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential
     try:
         if client_id := os.getenv("APP_IDENTITY_ID"):
             # Authenticate using a user-assigned managed identity on Azure
diff --git a/src/backend/fastapi_app/embeddings.py b/src/backend/fastapi_app/embeddings.py
index 0dccec3e..1a0581b7 100644
--- a/src/backend/fastapi_app/embeddings.py
+++ b/src/backend/fastapi_app/embeddings.py
@@ -1,11 +1,11 @@
-from typing import Optional, TypedDict, Union
+from typing import Optional, TypedDict
 
-from openai import AsyncAzureOpenAI, AsyncOpenAI
+from openai import AsyncOpenAI
 
 
 async def compute_text_embedding(
     q: str,
-    openai_client: Union[AsyncOpenAI, AsyncAzureOpenAI],
+    openai_client: AsyncOpenAI,
     embed_model: str,
     embed_deployment: Optional[str] = None,
     embedding_dimensions: Optional[int] = None,
diff --git a/src/backend/fastapi_app/openai_clients.py b/src/backend/fastapi_app/openai_clients.py
index b704dc9d..e5c0a703 100644
--- a/src/backend/fastapi_app/openai_clients.py
+++ b/src/backend/fastapi_app/openai_clients.py
@@ -1,79 +1,65 @@
 import logging
 import os
-from typing import Union
 
 import azure.identity
+import azure.identity.aio
 import openai
 
 logger = logging.getLogger("ragapp")
 
 
 async def create_openai_chat_client(
-    azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential, None],
-) -> Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]:
-    openai_chat_client: Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]
+    azure_credential: azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential | None,
+) -> openai.AsyncOpenAI:
+    openai_chat_client: openai.AsyncOpenAI
     OPENAI_CHAT_HOST = os.getenv("OPENAI_CHAT_HOST")
     if OPENAI_CHAT_HOST == "azure":
-        api_version = os.environ["AZURE_OPENAI_VERSION"] or "2024-10-21"
         azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
         azure_deployment = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"]
         if api_key := os.getenv("AZURE_OPENAI_KEY"):
             logger.info(
-                "Setting up Azure OpenAI client for chat completions using API key, endpoint %s, deployment %s",
+                "Setting up Azure OpenAI client for chat using API key, endpoint %s, deployment %s",
                 azure_endpoint,
                 azure_deployment,
             )
-            openai_chat_client = openai.AsyncAzureOpenAI(
-                api_version=api_version,
-                azure_endpoint=azure_endpoint,
-                azure_deployment=azure_deployment,
+            openai_chat_client = openai.AsyncOpenAI(
+                base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/",
                 api_key=api_key,
             )
         elif azure_credential:
             logger.info(
-                "Setting up Azure OpenAI client for chat completions using Azure Identity, endpoint %s, deployment %s",
+                "Setting up Azure OpenAI client for chat using Azure Identity, endpoint %s, deployment %s",
                 azure_endpoint,
                 azure_deployment,
             )
-            token_provider = azure.identity.get_bearer_token_provider(
+            token_provider = azure.identity.aio.get_bearer_token_provider(
                 azure_credential, "https://cognitiveservices.azure.com/.default"
             )
-            openai_chat_client = openai.AsyncAzureOpenAI(
-                api_version=api_version,
-                azure_endpoint=azure_endpoint,
-                azure_deployment=azure_deployment,
-                azure_ad_token_provider=token_provider,
+            openai_chat_client = openai.AsyncOpenAI(
+                base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/",
+                api_key=token_provider,
             )
         else:
             raise ValueError("Azure OpenAI client requires either an API key or Azure Identity credential.")
     elif OPENAI_CHAT_HOST == "ollama":
-        logger.info("Setting up OpenAI client for chat completions using Ollama")
+        logger.info("Setting up OpenAI client for chat using Ollama")
         openai_chat_client = openai.AsyncOpenAI(
             base_url=os.getenv("OLLAMA_ENDPOINT"),
             api_key="nokeyneeded",
         )
-    elif OPENAI_CHAT_HOST == "github":
-        logger.info("Setting up OpenAI client for chat completions using GitHub Models")
-        github_model = os.getenv("GITHUB_MODEL", "openai/gpt-4o")
-        logger.info(f"Using GitHub Models with model: {github_model}")
-        openai_chat_client = openai.AsyncOpenAI(
-            base_url="https://models.github.ai/inference",
-            api_key=os.getenv("GITHUB_TOKEN"),
-        )
     else:
-        logger.info("Setting up OpenAI client for chat completions using OpenAI.com API key")
+        logger.info("Setting up OpenAI client for chat using OpenAI.com API key")
         openai_chat_client = openai.AsyncOpenAI(api_key=os.getenv("OPENAICOM_KEY"))
 
     return openai_chat_client
 
 
 async def create_openai_embed_client(
-    azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential, None],
-) -> Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]:
-    openai_embed_client: Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]
+    azure_credential: azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential | None,
+) -> openai.AsyncOpenAI:
+    openai_embed_client: openai.AsyncOpenAI
     OPENAI_EMBED_HOST = os.getenv("OPENAI_EMBED_HOST")
     if OPENAI_EMBED_HOST == "azure":
-        api_version = os.environ["AZURE_OPENAI_VERSION"] or "2024-03-01-preview"
         azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
         azure_deployment = os.environ["AZURE_OPENAI_EMBED_DEPLOYMENT"]
         if api_key := os.getenv("AZURE_OPENAI_KEY"):
@@ -82,10 +68,8 @@ async def create_openai_embed_client(
                 azure_endpoint,
                 azure_deployment,
             )
-            openai_embed_client = openai.AsyncAzureOpenAI(
-                api_version=api_version,
-                azure_endpoint=azure_endpoint,
-                azure_deployment=azure_deployment,
+            openai_embed_client = openai.AsyncOpenAI(
+                base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/",
                 api_key=api_key,
             )
         elif azure_credential:
@@ -94,14 +78,12 @@ async def create_openai_embed_client(
                 azure_endpoint,
                 azure_deployment,
             )
-            token_provider = azure.identity.get_bearer_token_provider(
+            token_provider = azure.identity.aio.get_bearer_token_provider(
                 azure_credential, "https://cognitiveservices.azure.com/.default"
             )
-            openai_embed_client = openai.AsyncAzureOpenAI(
-                api_version=api_version,
-                azure_endpoint=azure_endpoint,
-                azure_deployment=azure_deployment,
-                azure_ad_token_provider=token_provider,
+            openai_embed_client = openai.AsyncOpenAI(
+                base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/",
+                api_key=token_provider,
             )
         else:
             raise ValueError("Azure OpenAI client requires either an API key or Azure Identity credential.")
@@ -111,14 +93,6 @@ async def create_openai_embed_client(
             base_url=os.getenv("OLLAMA_ENDPOINT"),
             api_key="nokeyneeded",
         )
-    elif OPENAI_EMBED_HOST == "github":
-        logger.info("Setting up OpenAI client for embeddings using GitHub Models")
-        github_embed_model = os.getenv("GITHUB_EMBED_MODEL", "openai/text-embedding-3-small")
-        logger.info(f"Using GitHub Models with embedding model: {github_embed_model}")
-        openai_embed_client = openai.AsyncOpenAI(
-            base_url="https://models.github.ai/inference",
-            api_key=os.getenv("GITHUB_TOKEN"),
-        )
     else:
         logger.info("Setting up OpenAI client for embeddings using OpenAI.com API key")
         openai_embed_client = openai.AsyncOpenAI(api_key=os.getenv("OPENAICOM_KEY"))
diff --git a/src/backend/fastapi_app/prompts/query_fewshots.json b/src/backend/fastapi_app/prompts/query_fewshots.json
index 0ef450fd..4fe3ae10 100644
--- a/src/backend/fastapi_app/prompts/query_fewshots.json
+++ b/src/backend/fastapi_app/prompts/query_fewshots.json
@@ -4,14 +4,14 @@
     "content": "good options for climbing gear that can be used outside?"
   },
   {
-    "id": "madeup",
+    "id": "fc_madeup1",
     "call_id": "call_abc123",
     "name": "search_database",
     "arguments": "{\"search_query\":\"climbing gear outside\"}",
     "type": "function_call"
   },
   {
-    "id": "madeupoutput",
+    "id": "fc_madeupoutput1",
     "call_id": "call_abc123",
     "output": "Search results for climbing gear that can be used outside: ...",
     "type": "function_call_output"
@@ -21,14 +21,14 @@
     "content": "are there any shoes less than $50?"
   },
   {
-    "id": "madeup",
+    "id": "fc_madeup2",
     "call_id": "call_abc456",
     "name": "search_database",
     "arguments": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}",
     "type": "function_call"
   },
   {
-    "id": "madeupoutput",
+    "id": "fc_madeupoutput2",
     "call_id": "call_abc456",
     "output": "Search results for shoes cheaper than 50: ...",
     "type": "function_call_output"
diff --git a/src/backend/fastapi_app/query_rewriter.py b/src/backend/fastapi_app/query_rewriter.py
index aa0ad466..122c0ed9 100644
--- a/src/backend/fastapi_app/query_rewriter.py
+++ b/src/backend/fastapi_app/query_rewriter.py
@@ -1,73 +1,62 @@
 import json
 
-from openai.types.chat import (
-    ChatCompletion,
-    ChatCompletionToolParam,
-)
+from openai.types.responses import Response, ResponseFunctionToolCall
 
 
-def build_search_function() -> list[ChatCompletionToolParam]:
-    return [
-        {
-            "type": "function",
-            "function": {
-                "name": "search_database",
-                "description": "Search PostgreSQL database for relevant products based on user query",
-                "parameters": {
+def build_search_function() -> dict:
+    return {
+        "type": "function",
+        "name": "search_database",
+        "description": "Search PostgreSQL database for relevant products based on user query",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "search_query": {
+                    "type": "string",
+                    "description": "Query string to use for full text search, e.g. 'red shoes'",
+                },
+                "price_filter": {
                     "type": "object",
+                    "description": "Filter search results based on price of the product",
                     "properties": {
-                        "search_query": {
+                        "comparison_operator": {
                             "type": "string",
-                            "description": "Query string to use for full text search, e.g. 'red shoes'",
+                            "description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='",  # noqa
+                        },
+                        "value": {
+                            "type": "number",
+                            "description": "Value to compare against, e.g. 30",
                         },
-                        "price_filter": {
-                            "type": "object",
-                            "description": "Filter search results based on price of the product",
-                            "properties": {
-                                "comparison_operator": {
-                                    "type": "string",
-                                    "description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='",  # noqa
-                                },
-                                "value": {
-                                    "type": "number",
-                                    "description": "Value to compare against, e.g. 30",
-                                },
-                            },
+                    },
+                },
+                "brand_filter": {
+                    "type": "object",
+                    "description": "Filter search results based on brand of the product",
+                    "properties": {
+                        "comparison_operator": {
+                            "type": "string",
+                            "description": "Operator to compare the column value, either '=' or '!='",
                         },
-                        "brand_filter": {
-                            "type": "object",
-                            "description": "Filter search results based on brand of the product",
-                            "properties": {
-                                "comparison_operator": {
-                                    "type": "string",
-                                    "description": "Operator to compare the column value, either '=' or '!='",
-                                },
-                                "value": {
-                                    "type": "string",
-                                    "description": "Value to compare against, e.g. AirStrider",
-                                },
-                            },
+                        "value": {
+                            "type": "string",
+                            "description": "Value to compare against, e.g. AirStrider",
                         },
                     },
-                    "required": ["search_query"],
                 },
             },
-        }
-    ]
+            "required": ["search_query"],
+        },
+    }
 
 
-def extract_search_arguments(original_user_query: str, chat_completion: ChatCompletion):
-    response_message = chat_completion.choices[0].message
+def extract_search_arguments(original_user_query: str, response: Response):
     search_query = None
     filters = []
-    if response_message.tool_calls:
-        for tool in response_message.tool_calls:
-            if tool.type != "function":
-                continue
-            function = tool.function
-            if function.name == "search_database":
-                arg = json.loads(function.arguments)
-                # Even though its required, search_query is not always specified
+    tool_calls = [item for item in response.output if isinstance(item, ResponseFunctionToolCall)]
+    if tool_calls:
+        for tool_call in tool_calls:
+            if tool_call.name == "search_database":
+                arg = json.loads(tool_call.arguments)
                 search_query = arg.get("search_query", original_user_query)
                 if "price_filter" in arg and arg["price_filter"] and isinstance(arg["price_filter"], dict):
                     price_filter = arg["price_filter"]
@@ -87,6 +76,6 @@ def extract_search_arguments(original_user_query: str, chat_completion: ChatComp
                             "value": brand_filter["value"],
                         }
                     )
-    elif query_text := response_message.content:
-        search_query = query_text.strip()
+    elif response.output_text:
+        search_query = response.output_text.strip()
     return search_query, filters
diff --git a/src/backend/fastapi_app/rag_advanced.py b/src/backend/fastapi_app/rag_advanced.py
index eb53aa6a..7b894b24 100644
--- a/src/backend/fastapi_app/rag_advanced.py
+++ b/src/backend/fastapi_app/rag_advanced.py
@@ -1,18 +1,18 @@
 import json
 from collections.abc import AsyncGenerator
-from typing import Optional, Union
+from typing import Optional
 
 from agents import (
     Agent,
     ItemHelpers,
     ModelSettings,
-    OpenAIChatCompletionsModel,
+    OpenAIResponsesModel,
     Runner,
     ToolCallOutputItem,
     function_tool,
     set_tracing_disabled,
 )
-from openai import AsyncAzureOpenAI, AsyncOpenAI
+from openai import AsyncOpenAI
 from openai.types.responses import EasyInputMessageParam, ResponseInputItemParam, ResponseTextDeltaEvent
 
 from fastapi_app.api_models import (
@@ -45,7 +45,7 @@ def __init__(
         messages: list[ResponseInputItemParam],
         overrides: ChatRequestOverrides,
         searcher: PostgresSearcher,
-        openai_chat_client: Union[AsyncOpenAI, AsyncAzureOpenAI],
+        openai_chat_client: AsyncOpenAI,
         chat_model: str,
         chat_deployment: Optional[str],  # Not needed for non-Azure OpenAI
     ):
@@ -54,7 +54,7 @@ def __init__(
         self.model_for_thoughts = (
             {"model": chat_model, "deployment": chat_deployment} if chat_deployment else {"model": chat_model}
         )
-        openai_agents_model = OpenAIChatCompletionsModel(
+        openai_agents_model = OpenAIResponsesModel(
             model=chat_model if chat_deployment is None else chat_deployment, openai_client=openai_chat_client
         )
         self.search_agent = Agent(
@@ -71,7 +71,6 @@ def __init__(
             model_settings=ModelSettings(
                 temperature=self.chat_params.temperature,
                 max_tokens=self.chat_params.response_token_limit,
-                extra_body={"seed": self.chat_params.seed} if self.chat_params.seed is not None else {},
             ),
         )
 
diff --git a/src/backend/fastapi_app/rag_base.py b/src/backend/fastapi_app/rag_base.py
index 54e633c2..557c1049 100644
--- a/src/backend/fastapi_app/rag_base.py
+++ b/src/backend/fastapi_app/rag_base.py
@@ -32,7 +32,6 @@ def get_chat_params(self, messages: list[ResponseInputItemParam], overrides: Cha
         return ChatParams(
             top=overrides.top,
             temperature=overrides.temperature,
-            seed=overrides.seed,
             retrieval_mode=overrides.retrieval_mode,
             use_advanced_flow=overrides.use_advanced_flow,
             response_token_limit=response_token_limit,
diff --git a/src/backend/fastapi_app/rag_simple.py b/src/backend/fastapi_app/rag_simple.py
index 69126618..1bbbd12a 100644
--- a/src/backend/fastapi_app/rag_simple.py
+++ b/src/backend/fastapi_app/rag_simple.py
@@ -1,8 +1,8 @@
 from collections.abc import AsyncGenerator
-from typing import Optional, Union
+from typing import Optional
 
-from agents import Agent, ItemHelpers, ModelSettings, OpenAIChatCompletionsModel, Runner, set_tracing_disabled
-from openai import AsyncAzureOpenAI, AsyncOpenAI
+from agents import Agent, ItemHelpers, ModelSettings, OpenAIResponsesModel, Runner, set_tracing_disabled
+from openai import AsyncOpenAI
 from openai.types.responses import ResponseInputItemParam, ResponseTextDeltaEvent
 
 from fastapi_app.api_models import (
@@ -28,7 +28,7 @@ def __init__(
         messages: list[ResponseInputItemParam],
         overrides: ChatRequestOverrides,
         searcher: PostgresSearcher,
-        openai_chat_client: Union[AsyncOpenAI, AsyncAzureOpenAI],
+        openai_chat_client: AsyncOpenAI,
         chat_model: str,
         chat_deployment: Optional[str],  # Not needed for non-Azure OpenAI
     ):
@@ -37,7 +37,7 @@ def __init__(
         self.model_for_thoughts = (
             {"model": chat_model, "deployment": chat_deployment} if chat_deployment else {"model": chat_model}
         )
-        openai_agents_model = OpenAIChatCompletionsModel(
+        openai_agents_model = OpenAIResponsesModel(
             model=chat_model if chat_deployment is None else chat_deployment, openai_client=openai_chat_client
         )
         self.answer_agent = Agent(
@@ -47,7 +47,6 @@ def __init__(
             model_settings=ModelSettings(
                 temperature=self.chat_params.temperature,
                 max_tokens=self.chat_params.response_token_limit,
-                extra_body={"seed": self.chat_params.seed} if self.chat_params.seed is not None else {},
             ),
         )
 
diff --git a/src/backend/pyproject.toml b/src/backend/pyproject.toml
index 7ede97c9..9874b924 100644
--- a/src/backend/pyproject.toml
+++ b/src/backend/pyproject.toml
@@ -12,12 +12,12 @@ dependencies = [
     "asyncpg>=0.29.0,<1.0.0",
     "SQLAlchemy[asyncio]>=2.0.30,<3.0.0",
     "pgvector>=0.3.0,<0.4.0",
-    "openai>=1.34.0,<2.0.0",
+    "openai>=1.108.1,<3.0.0",
     "azure-monitor-opentelemetry>=1.6.0,<2.0.0",
     "opentelemetry-instrumentation-sqlalchemy",
     "opentelemetry-instrumentation-aiohttp-client",
     "opentelemetry-instrumentation-openai",
-    "openai-agents"
+    "openai-agents>=0.13.6"
 ]
 
 [build-system]
diff --git a/src/backend/requirements.txt b/src/backend/requirements.txt
index b83031b6..167de074 100644
--- a/src/backend/requirements.txt
+++ b/src/backend/requirements.txt
@@ -1,5 +1,5 @@
 # This file was autogenerated by uv via the following command:
-#    uv pip compile pyproject.toml -o requirements_new.txt --python-version 3.10
+#    uv pip compile pyproject.toml -o requirements.txt --python-version 3.10
 aiohappyeyeballs==2.6.1
     # via aiohttp
 aiohttp==3.12.14
@@ -59,8 +59,6 @@ charset-normalizer==3.4.2
     # via requests
 click==8.2.1
     # via uvicorn
-colorama==0.4.6
-    # via griffe
 cryptography==45.0.5
     # via
     #   azure-identity
@@ -86,7 +84,7 @@ frozenlist==1.7.0
     #   aiosignal
 greenlet==3.2.3
     # via sqlalchemy
-griffe==1.7.3
+griffelib==2.0.2
     # via openai-agents
 h11==0.16.0
     # via
@@ -118,7 +116,7 @@ jsonschema-specifications==2025.4.1
     # via jsonschema
 marshmallow==4.0.0
     # via environs
-mcp==1.11.0
+mcp==1.27.0
     # via openai-agents
 msal==1.32.3
     # via
@@ -136,11 +134,11 @@ numpy==2.2.6
     # via pgvector
 oauthlib==3.3.1
     # via requests-oauthlib
-openai==1.96.1
+openai==2.31.0
     # via
     #   fastapi-app (pyproject.toml)
     #   openai-agents
-openai-agents==0.2.0
+openai-agents==0.13.6
     # via fastapi-app (pyproject.toml)
 opentelemetry-api==1.31.1
     # via
@@ -256,19 +254,21 @@ psutil==7.0.0
     # via azure-monitor-opentelemetry-exporter
 pycparser==2.22
     # via cffi
-pydantic==2.11.7
+pydantic==2.12.5
     # via
     #   fastapi
     #   mcp
     #   openai
     #   openai-agents
     #   pydantic-settings
-pydantic-core==2.33.2
+pydantic-core==2.41.5
     # via pydantic
 pydantic-settings==2.10.1
     # via mcp
 pyjwt==2.10.1
-    # via msal
+    # via
+    #   mcp
+    #   msal
 python-dotenv==1.1.1
     # via
     #   fastapi-app (pyproject.toml)
@@ -327,6 +327,7 @@ typing-extensions==4.14.1
     #   exceptiongroup
     #   fastapi
     #   marshmallow
+    #   mcp
     #   multidict
     #   openai
     #   openai-agents
@@ -338,8 +339,9 @@ typing-extensions==4.14.1
     #   starlette
     #   typing-inspection
     #   uvicorn
-typing-inspection==0.4.1
+typing-inspection==0.4.2
     # via
+    #   mcp
     #   pydantic
     #   pydantic-settings
 urllib3==2.5.0
diff --git a/tests/conftest.py b/tests/conftest.py
index 5fe67053..409b9e07 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -9,13 +9,14 @@
 import pytest_asyncio
 from fastapi.testclient import TestClient
 from openai.types import CreateEmbeddingResponse, Embedding
-from openai.types.chat import ChatCompletion, ChatCompletionChunk
-from openai.types.chat.chat_completion import (
-    ChatCompletionMessage,
-    Choice,
-)
-from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall, Function
 from openai.types.create_embedding_response import Usage
+from openai.types.responses import (
+    Response,
+    ResponseFunctionToolCall,
+    ResponseOutputMessage,
+    ResponseOutputText,
+    ResponseTextDeltaEvent,
+)
 from sqlalchemy.ext.asyncio import async_sessionmaker
 
 from fastapi_app import create_app
@@ -63,9 +64,8 @@ def mock_session_env(monkeypatch_session):
         monkeypatch_session.setenv("OPENAI_CHAT_HOST", "azure")
         monkeypatch_session.setenv("OPENAI_EMBED_HOST", "azure")
         monkeypatch_session.setenv("AZURE_OPENAI_ENDPOINT", "https://api.openai.com")
-        monkeypatch_session.setenv("AZURE_OPENAI_VERSION", "2024-03-01-preview")
-        monkeypatch_session.setenv("AZURE_OPENAI_CHAT_DEPLOYMENT", "gpt-4o-mini")
-        monkeypatch_session.setenv("AZURE_OPENAI_CHAT_MODEL", "gpt-4o-mini")
+        monkeypatch_session.setenv("AZURE_OPENAI_CHAT_DEPLOYMENT", "gpt-5.4")
+        monkeypatch_session.setenv("AZURE_OPENAI_CHAT_MODEL", "gpt-5.4")
         monkeypatch_session.setenv("AZURE_OPENAI_EMBED_DEPLOYMENT", "text-embedding-3-large")
         monkeypatch_session.setenv("AZURE_OPENAI_EMBED_MODEL", "text-embedding-3-large")
         monkeypatch_session.setenv("AZURE_OPENAI_EMBED_DIMENSIONS", "1024")
@@ -171,67 +171,39 @@ async def mock_acreate(*args, **kwargs):
 
 @pytest.fixture(scope="session")
 def mock_openai_chatcompletion(monkeypatch_session):
-    class AsyncChatCompletionIterator:
+    class AsyncResponseEventIterator:
         def __init__(self, answer: str):
-            chunk_id = "test-id"
-            model = "gpt-4o-mini"
-            self.responses = [
-                {"object": "chat.completion.chunk", "choices": [], "id": chunk_id, "model": model, "created": 1},
-                {
-                    "object": "chat.completion.chunk",
-                    "choices": [{"delta": {"role": "assistant"}, "index": 0, "finish_reason": None}],
-                    "id": chunk_id,
-                    "model": model,
-                    "created": 1,
-                },
-            ]
+            self.events = []
             # Split at << to simulate chunked responses
             if answer.find("<<") > -1:
                 parts = answer.split("<<")
-                self.responses.append(
-                    {
-                        "object": "chat.completion.chunk",
-                        "choices": [
-                            {
-                                "delta": {"role": "assistant", "content": parts[0] + "<<"},
-                                "index": 0,
-                                "finish_reason": None,
-                            }
-                        ],
-                        "id": chunk_id,
-                        "model": model,
-                        "created": 1,
-                    }
-                )
-                self.responses.append(
-                    {
-                        "object": "chat.completion.chunk",
-                        "choices": [
-                            {"delta": {"role": "assistant", "content": parts[1]}, "index": 0, "finish_reason": None}
-                        ],
-                        "id": chunk_id,
-                        "model": model,
-                        "created": 1,
-                    }
+                self.events.append(
+                    ResponseTextDeltaEvent(
+                        type="response.output_text.delta",
+                        content_index=0,
+                        delta=parts[0] + "<<",
+                        item_id="msg-1",
+                        output_index=0,
+                    )
                 )
-                self.responses.append(
-                    {
-                        "object": "chat.completion.chunk",
-                        "choices": [{"delta": {"role": None, "content": None}, "index": 0, "finish_reason": "stop"}],
-                        "id": chunk_id,
-                        "model": model,
-                        "created": 1,
-                    }
+                self.events.append(
+                    ResponseTextDeltaEvent(
+                        type="response.output_text.delta",
+                        content_index=0,
+                        delta=parts[1],
+                        item_id="msg-1",
+                        output_index=0,
+                    )
                 )
             else:
-                self.responses.append(
-                    {
-                        "object": "chat.completion.chunk",
-                        "choices": [{"delta": {"content": answer}, "index": 0, "finish_reason": None}],
-                        "id": chunk_id,
-                        "model": model,
-                        "created": 1,
-                    }
+                self.events.append(
+                    ResponseTextDeltaEvent(
+                        type="response.output_text.delta",
+                        content_index=0,
+                        delta=answer,
+                        item_id="msg-1",
+                        output_index=0,
+                    )
                 )
 
         async def __aenter__(self):
@@ -244,93 +216,88 @@ def __aiter__(self):
             return self
 
         async def __anext__(self):
-            if self.responses:
-                return ChatCompletionChunk.model_validate(self.responses.pop(0))
-            else:
-                raise StopAsyncIteration
+            if self.events:
+                return self.events.pop(0)
+            raise StopAsyncIteration
+
+    def _make_text_response(answer: str) -> Response:
+        return Response(
+            id="resp-test-123",
+            created_at=0,
+            model="gpt-5.4",
+            object="response",
+            output=[
+                ResponseOutputMessage(
+                    id="msg-1",
+                    type="message",
+                    role="assistant",
+                    status="completed",
+                    content=[ResponseOutputText(type="output_text", text=answer, annotations=[])],
+                )
+            ],
+            tool_choice="auto",
+            tools=[],
+            status="completed",
+            parallel_tool_calls=True,
+        )
+
+    def _make_tool_call_response(tool_name: str, arguments: str, call_id: str = "fc_abc123") -> Response:
+        return Response(
+            id="resp-test-123",
+            created_at=0,
+            model="gpt-5.4",
+            object="response",
+            output=[
+                ResponseFunctionToolCall(
+                    id=call_id,
+                    call_id=call_id,
+                    type="function_call",
+                    name=tool_name,
+                    arguments=arguments,
+                    status="completed",
+                )
+            ],
+            tool_choice="auto",
+            tools=[],
+            status="completed",
+            parallel_tool_calls=True,
+        )
 
     async def mock_acreate(*args, **kwargs):
-        messages = kwargs["messages"]
-        last_question = messages[-1]["content"]
-        last_role = messages[-1]["role"]
+        input_messages = kwargs.get("input", [])
+        last_message = input_messages[-1]
+        last_content = last_message.get("content", "") if isinstance(last_message, dict) else ""
+        last_role = last_message.get("role", "") if isinstance(last_message, dict) else ""
         if last_role == "tool":
-            items = json.loads(last_question)["items"]
+            items = json.loads(last_content)["items"]
             arguments = {"query": "capital of France", "items": items, "filters": []}
-            return ChatCompletion(
-                object="chat.completion",
-                choices=[
-                    Choice(
-                        message=ChatCompletionMessage(
-                            role="assistant",
-                            tool_calls=[
-                                ChatCompletionMessageToolCall(
-                                    id="call_abc123final",
-                                    type="function",
-                                    function=Function(
-                                        name="final_result",
-                                        arguments=json.dumps(arguments),
-                                    ),
-                                )
-                            ],
-                        ),
-                        finish_reason="stop",
-                        index=0,
-                    )
-                ],
-                id="test-123final",
-                created=0,
-                model="test-model",
+            return _make_tool_call_response("final_result", json.dumps(arguments), call_id="fc_abc123final")
+        if last_content == "Find search results for user query: What is the capital of France?":
+            return _make_tool_call_response(
+                "search_database", '{"search_query":"climbing gear outside"}', call_id="fc_abc123"
             )
-        if last_question == "Find search results for user query: What is the capital of France?":
-            return ChatCompletion(
-                object="chat.completion",
-                choices=[
-                    Choice(
-                        message=ChatCompletionMessage(
-                            role="assistant",
-                            tool_calls=[
-                                ChatCompletionMessageToolCall(
-                                    id="call_abc123",
-                                    type="function",
-                                    function=Function(
-                                        name="search_database", arguments='{"search_query":"climbing gear outside"}'
-                                    ),
-                                )
-                            ],
-                        ),
-                        finish_reason="stop",
-                        index=0,
-                    )
-                ],
-                id="test-123",
-                created=0,
-                model="test-model",
-            )
-        elif last_question == "Find search results for user query: Are interest rates high?":
+        elif last_content == "Find search results for user query: Are interest rates high?":
             answer = "interest rates"
-        elif isinstance(last_question, list) and last_question[2].get("image_url"):
-            answer = "From the provided sources, the impact of interest rates and GDP growth on "
-            "financial markets can be observed through the line graph. [Financial Market Analysis Report 2023-7.png]"
+        elif isinstance(last_content, list) and len(last_content) > 2 and last_content[2].get("image_url"):
+            answer = (
+                "From the provided sources, the impact of interest rates and GDP growth on "
+                "financial markets can be observed through the line graph."
+                " [Financial Market Analysis Report 2023-7.png]"
+            )
         else:
             answer = "The capital of France is Paris. [Benefit_Options-2.pdf]."
-            if messages[0]["content"].find("Generate 3 very brief follow-up questions") > -1:
+            system_content = input_messages[0].get("content", "") if isinstance(input_messages[0], dict) else ""
+            if (
+                isinstance(system_content, str)
+                and system_content.find("Generate 3 very brief follow-up questions") > -1
+            ):
                 answer = "The capital of France is Paris. [Benefit_Options-2.pdf]. <<What is the capital of Spain?>>"
-        if "stream" in kwargs and kwargs["stream"] is True:
-            return AsyncChatCompletionIterator(answer)
+        if kwargs.get("stream") is True:
+            return AsyncResponseEventIterator(answer)
         else:
-            return ChatCompletion(
-                object="chat.completion",
-                choices=[
-                    Choice(
-                        message=ChatCompletionMessage(role="assistant", content=answer), finish_reason="stop", index=0
-                    )
-                ],
-                id="test-123",
-                created=0,
-                model="test-model",
-            )
+            return _make_text_response(answer)
 
-    monkeypatch_session.setattr(openai.resources.chat.completions.AsyncCompletions, "create", mock_acreate)
+    monkeypatch_session.setattr(openai.resources.responses.AsyncResponses, "create", mock_acreate)
 
     yield
 
diff --git a/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json b/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json
index 612be773..2535e647 100644
--- a/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json
+++ b/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json
@@ -61,8 +61,8 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini",
-                    "deployment": "gpt-4o-mini"
+                    "model": "gpt-5.4",
+                    "deployment": "gpt-5.4"
                 }
             },
             {
@@ -101,8 +101,8 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini",
-                    "deployment": "gpt-4o-mini"
+                    "model": "gpt-5.4",
+                    "deployment": "gpt-5.4"
                 }
             }
         ],
diff --git a/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines b/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
index d29b85c4..7a8760f6 100644
--- a/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
+++ b/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
@@ -1,2 +1,2 @@
-{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Prompt to generate search arguments","description":[{"content":"Your job is to find search results based off the user's question and past messages.\nYou have access to only these tools:\n1. **search_database**: This tool allows you to search a table for items based on a query.\n  You can pass in a search query and optional filters.\nOnce you get the search results, you're done.\n"},{"role":"user","content":"good options for climbing gear that can be used outside?"},{"id":"madeup","call_id":"call_abc123","name":"search_database","arguments":"{\"search_query\":\"climbing gear outside\"}","type":"function_call"},{"id":"madeupoutput","call_id":"call_abc123","output":"Search results for climbing gear that can be used outside: ...","type":"function_call_output"},{"role":"user","content":"are there any shoes less than $50?"},{"id":"madeup","call_id":"call_abc456","name":"search_database","arguments":"{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}","type":"function_call"},{"id":"madeupoutput","call_id":"call_abc456","output":"Search results for shoes cheaper than 50: ...","type":"function_call_output"},{"role":"user","content":"Find search results for user query: What is the capital of France?"}],"props":{"model":"gpt-4o-mini","deployment":"gpt-4o-mini"}},{"title":"Search using generated search arguments","description":"climbing gear outside","props":{"top":1,"vector_search":true,"text_search":true,"filters":[]}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-4o-mini","deployment":"gpt-4o-mini"}}],"followup_questions":null},"sessionState":null}
+{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Prompt to generate search arguments","description":[{"content":"Your job is to find search results based off the user's question and past messages.\nYou have access to only these tools:\n1. **search_database**: This tool allows you to search a table for items based on a query.\n  You can pass in a search query and optional filters.\nOnce you get the search results, you're done.\n"},{"role":"user","content":"good options for climbing gear that can be used outside?"},{"id":"madeup","call_id":"call_abc123","name":"search_database","arguments":"{\"search_query\":\"climbing gear outside\"}","type":"function_call"},{"id":"madeupoutput","call_id":"call_abc123","output":"Search results for climbing gear that can be used outside: ...","type":"function_call_output"},{"role":"user","content":"are there any shoes less than $50?"},{"id":"madeup","call_id":"call_abc456","name":"search_database","arguments":"{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}","type":"function_call"},{"id":"madeupoutput","call_id":"call_abc456","output":"Search results for shoes cheaper than 50: ...","type":"function_call_output"},{"role":"user","content":"Find search results for user query: What is the capital of France?"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}},{"title":"Search using generated search arguments","description":"climbing gear outside","props":{"top":1,"vector_search":true,"text_search":true,"filters":[]}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}}],"followup_questions":null},"sessionState":null}
 {"delta":{"content":"The capital of France is Paris. [Benefit_Options-2.pdf].","role":"assistant"},"context":null,"sessionState":null}
diff --git a/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json b/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json
index e311917b..2059b570 100644
--- a/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json
+++ b/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json
@@ -50,8 +50,8 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini",
-                    "deployment": "gpt-4o-mini"
+                    "model": "gpt-5.4",
+                    "deployment": "gpt-5.4"
                 }
             }
         ],
diff --git a/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json b/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json
index d0456cd7..71e2efb8 100644
--- a/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json
+++ b/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json
@@ -58,8 +58,8 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini",
-                    "deployment": "gpt-4o-mini"
+                    "model": "gpt-5.4",
+                    "deployment": "gpt-5.4"
                 }
             }
         ],
diff --git a/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines b/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines
index 65d3ae5b..14a4d1b2 100644
--- a/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines
+++ b/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines
@@ -1,2 +1,2 @@
-{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Search query for database","description":"What is the capital of France?","props":{"top":1,"vector_search":true,"text_search":true}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-4o-mini","deployment":"gpt-4o-mini"}}],"followup_questions":null},"sessionState":null}
+{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Search query for database","description":"What is the capital of France?","props":{"top":1,"vector_search":true,"text_search":true}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}}],"followup_questions":null},"sessionState":null}
 {"delta":{"content":"The capital of France is Paris. [Benefit_Options-2.pdf].","role":"assistant"},"context":null,"sessionState":null}
diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py
index e55149a3..1c4d90db 100644
--- a/tests/test_dependencies.py
+++ b/tests/test_dependencies.py
@@ -6,10 +6,10 @@
 @pytest.mark.asyncio
 async def test_get_common_parameters(mock_session_env):
     result = await common_parameters()
-    assert result.openai_chat_model == "gpt-4o-mini"
+    assert result.openai_chat_model == "gpt-5.4"
     assert result.openai_embed_model == "text-embedding-3-large"
     assert result.openai_embed_dimensions == 1024
-    assert result.openai_chat_deployment == "gpt-4o-mini"
+    assert result.openai_chat_deployment == "gpt-5.4"
     assert result.openai_embed_deployment == "text-embedding-3-large"
 
 
diff --git a/tests/test_openai_clients.py b/tests/test_openai_clients.py
index 47caba26..ad60afc7 100644
--- a/tests/test_openai_clients.py
+++ b/tests/test_openai_clients.py
@@ -1,6 +1,5 @@
 import pytest
 
-from fastapi_app.dependencies import common_parameters
 from fastapi_app.openai_clients import create_openai_chat_client, create_openai_embed_client
 from tests.data import test_data
 
@@ -18,49 +17,6 @@ async def test_create_openai_embed_client(mock_azure_credential, mock_openai_emb
 @pytest.mark.asyncio
 async def test_create_openai_chat_client(mock_azure_credential, mock_openai_chatcompletion):
     openai_chat_client = await create_openai_chat_client(mock_azure_credential)
-    assert openai_chat_client.chat.completions.create is not None
-    response = await openai_chat_client.chat.completions.create(
-        model="gpt-4o-mini", messages=[{"content": "test", "role": "user"}]
-    )
-    assert response.choices[0].message.content == "The capital of France is Paris. [Benefit_Options-2.pdf]."
-
-
-@pytest.mark.asyncio
-async def test_github_models_configuration(monkeypatch):
-    """Test that GitHub Models uses the correct URLs and model names."""
-    # Set up environment for GitHub Models
-    monkeypatch.setenv("OPENAI_CHAT_HOST", "github")
-    monkeypatch.setenv("OPENAI_EMBED_HOST", "github")
-    monkeypatch.setenv("GITHUB_TOKEN", "fake-token")
-    # Don't set GITHUB_MODEL to test defaults
-
-    # Test chat client configuration
-    chat_client = await create_openai_chat_client(None)
-    assert str(chat_client.base_url).rstrip("/") == "https://models.github.ai/inference"
-    assert chat_client.api_key == "fake-token"
-
-    # Test embed client configuration
-    embed_client = await create_openai_embed_client(None)
-    assert str(embed_client.base_url).rstrip("/") == "https://models.github.ai/inference"
-    assert embed_client.api_key == "fake-token"
-
-    # Test that dependencies use correct defaults
-    context = await common_parameters()
-    assert context.openai_chat_model == "openai/gpt-4o"
-    assert context.openai_embed_model == "openai/text-embedding-3-large"
-
-
-@pytest.mark.asyncio
-async def test_github_models_with_custom_values(monkeypatch):
-    """Test that GitHub Models respects custom environment values."""
-    # Set up environment for GitHub Models with custom values
-    monkeypatch.setenv("OPENAI_CHAT_HOST", "github")
-    monkeypatch.setenv("OPENAI_EMBED_HOST", "github")
-    monkeypatch.setenv("GITHUB_TOKEN", "fake-token")
-    monkeypatch.setenv("GITHUB_MODEL", "openai/gpt-4")
-    monkeypatch.setenv("GITHUB_EMBED_MODEL", "openai/text-embedding-ada-002")
-
-    # Test that dependencies use custom values
-    context = await common_parameters()
-    assert context.openai_chat_model == "openai/gpt-4"
-    assert context.openai_embed_model == "openai/text-embedding-ada-002"
+    assert openai_chat_client.responses.create is not None
+    response = await openai_chat_client.responses.create(model="gpt-5.4", input=[{"role": "user", "content": "test"}])
+    assert response.output_text == "The capital of France is Paris. [Benefit_Options-2.pdf]."

From eea45c3eee8d58445df662eec0d718e6adc0f3b7 Mon Sep 17 00:00:00 2001
From: Pamela Fox <pamela.fox@gmail.com>
Date: Thu, 9 Apr 2026 22:54:03 +0000
Subject: [PATCH 2/8] Revert temperature change

---
 src/backend/fastapi_app/api_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/backend/fastapi_app/api_models.py b/src/backend/fastapi_app/api_models.py
index 7fb1b8d3..f75db35c 100644
--- a/src/backend/fastapi_app/api_models.py
+++ b/src/backend/fastapi_app/api_models.py
@@ -24,7 +24,7 @@ class RetrievalMode(str, Enum):
 
 class ChatRequestOverrides(BaseModel):
     top: int = 3
-    temperature: float = 1
+    temperature: float = 0.3
     retrieval_mode: RetrievalMode = RetrievalMode.HYBRID
     use_advanced_flow: bool = True
     prompt_template: Optional[str] = None

From cfa570d5f4375680058f22130178b12f7afa9b8e Mon Sep 17 00:00:00 2001
From: Pamela Fox <pamela.fox@gmail.com>
Date: Fri, 10 Apr 2026 16:37:56 +0000
Subject: [PATCH 3/8] Update async credentials, rm GitHub Models

---
 .github/copilot-instructions.md              | 21 +++++++++-----------
 evals/eval_config.json                       |  3 +--
 evals/evaluate.py                            |  2 --
 evals/generate_ground_truth.py               |  2 +-
 src/backend/fastapi_app/dependencies.py      | 12 +++++------
 src/backend/fastapi_app/openai_clients.py    |  9 ++++++---
 src/backend/fastapi_app/update_embeddings.py |  2 --
 tests/conftest.py                            |  9 ++++++++-
 8 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
index ecef0bfa..df531f79 100644
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -11,7 +11,7 @@ RAG on PostgreSQL is a Python FastAPI backend with React TypeScript frontend tha
 Install the following tools before beginning development:
 
 - **Python 3.10+** (3.12 recommended)
-- **Node.js 18+** for frontend development  
+- **Node.js 18+** for frontend development
 - **PostgreSQL 14+** with pgvector extension
 - **Azure Developer CLI (azd)** for deployment
 - **Docker Desktop** for dev containers (optional)
@@ -37,7 +37,7 @@ Run these commands in sequence. NEVER CANCEL any long-running commands:
    ```bash
    # Ubuntu/Debian:
    sudo apt update && sudo apt install -y postgresql-16-pgvector
-   
+
    # Start PostgreSQL and set password
    sudo service postgresql start
    sudo -u postgres psql -c "ALTER USER postgres PASSWORD 'postgres'"
@@ -97,7 +97,7 @@ Use "Frontend & Backend" configuration in the VS Code Run & Debug menu.
 ### Linting and Formatting (ALWAYS run before committing)
 ```bash
 ruff check .          # Lint code (takes <1 second)
-ruff format .          # Format code (takes <1 second)  
+ruff format .          # Format code (takes <1 second)
 mypy . --python-version 3.12  # Type check (takes ~42 seconds)
 ```
 
@@ -121,7 +121,7 @@ pytest tests/e2e.py --tracing=retain-on-failure
 **CRITICAL TIMING INFORMATION** - Set these timeout values and NEVER CANCEL:
 
 - **Dependencies install**: 90 seconds (use 180+ second timeout)
-- **Frontend npm install**: 22 seconds (use 60+ second timeout)  
+- **Frontend npm install**: 22 seconds (use 60+ second timeout)
 - **Frontend build**: 12 seconds (use 30+ second timeout)
 - **MyPy type checking**: 42 seconds (use 90+ second timeout)
 - **Full test suite**: 25 seconds (use 60+ second timeout)
@@ -156,7 +156,7 @@ pytest tests/e2e.py --tracing=retain-on-failure
    # Test API endpoints
    curl http://localhost:8000/items/1
    # Should return JSON with item data
-   
+
    # Test frontend
    curl http://localhost:8000/ | head -n 5
    # Should return HTML with "RAG on PostgreSQL" title
@@ -226,9 +226,6 @@ The application supports multiple OpenAI providers:
 3. **Ollama** (local):
    Set `OPENAI_CHAT_HOST=ollama`
 
-4. **GitHub Models**:
-   Set `OPENAI_CHAT_HOST=github`
-
 ## Common Issues and Solutions
 
 ### Database Connection Issues
@@ -267,7 +264,7 @@ Open `http://localhost:8089/` and point to your running application.
 The application provides these REST API endpoints (view full docs at `http://localhost:8000/docs`):
 
 - `GET /items/{id}` - Get specific item by ID
-- `GET /search` - Search items with text query 
+- `GET /search` - Search items with text query
 - `GET /similar` - Find similar items using vector search
 - `POST /chat` - Chat with RAG system (requires OpenAI configuration)
 - `POST /chat/stream` - Streaming chat responses
@@ -286,7 +283,7 @@ curl "http://localhost:8000/search?query=tent&limit=5"
 **Quick ls -la output for repository root:**
 ```
 .devcontainer/          # Dev container configuration
-.env.sample            # Environment variables template  
+.env.sample            # Environment variables template
 .github/               # GitHub Actions workflows
 .gitignore            # Git ignore patterns
 .pre-commit-config.yaml # Pre-commit hook configuration
@@ -309,8 +306,8 @@ tests/        # Test suite
 - **Always build and test locally before committing**
 - **Use pre-commit hooks** - they run ruff automatically
 - **Check the GitHub Actions** in `.github/workflows/` for CI requirements
-- **Reference the full README.md** for deployment and Azure-specific details  
+- **Reference the full README.md** for deployment and Azure-specific details
 - **Use VS Code with the Python and Ruff extensions** for the best development experience
 - **Never skip the frontend build** - the backend serves static files from `src/backend/static/`
 
-This project follows modern Python and TypeScript development practices with comprehensive tooling for code quality, testing, and deployment.
\ No newline at end of file
+This project follows modern Python and TypeScript development practices with comprehensive tooling for code quality, testing, and deployment.
diff --git a/evals/eval_config.json b/evals/eval_config.json
index fe50ef52..65e084c7 100644
--- a/evals/eval_config.json
+++ b/evals/eval_config.json
@@ -8,8 +8,7 @@
             "use_advanced_flow": true,
             "top": 3,
             "retrieval_mode": "hybrid",
-            "temperature": 0.3,
-            "seed": 42
+            "temperature": 0.3
         }
     },
     "target_response_answer_jmespath": "message.content",
diff --git a/evals/evaluate.py b/evals/evaluate.py
index efb7f8f2..240d8741 100644
--- a/evals/evaluate.py
+++ b/evals/evaluate.py
@@ -68,8 +68,6 @@ def get_openai_config() -> dict:
         openai_config["model"] = os.environ["AZURE_OPENAI_EVAL_MODEL"]
     elif os.environ.get("OPENAI_CHAT_HOST") == "ollama":
         raise NotImplementedError("Ollama is not supported. Switch to Azure or OpenAI.com")
-    elif os.environ.get("OPENAI_CHAT_HOST") == "github":
-        raise NotImplementedError("GitHub Models is not supported. Switch to Azure or OpenAI.com")
     else:
         logger.info("Using OpenAI Service with API Key from OPENAICOM_KEY")
         openai_config = {"api_key": os.environ["OPENAICOM_KEY"], "model": "gpt-4"}
diff --git a/evals/generate_ground_truth.py b/evals/generate_ground_truth.py
index 9f2b0fef..c1541621 100644
--- a/evals/generate_ground_truth.py
+++ b/evals/generate_ground_truth.py
@@ -121,7 +121,7 @@ def generate_ground_truth_data(num_questions_total: int, num_questions_per_sourc
                 {"role": "system", "content": generate_prompt},
                 {"role": "user", "content": json.dumps(source)},
             ],
-            tools=[qa_pairs_tool(num_questions=2)],
+            tools=[qa_pairs_tool(num_questions=2)],  # type: ignore[list-item]
             max_output_tokens=1000,
             store=False,
         )
diff --git a/src/backend/fastapi_app/dependencies.py b/src/backend/fastapi_app/dependencies.py
index 7b60bef6..fdd70632 100644
--- a/src/backend/fastapi_app/dependencies.py
+++ b/src/backend/fastapi_app/dependencies.py
@@ -3,7 +3,7 @@
 from collections.abc import AsyncGenerator
 from typing import Annotated, Optional
 
-import azure.identity
+import azure.identity.aio
 from fastapi import Depends, Request
 from openai import AsyncOpenAI
 from pydantic import BaseModel
@@ -77,9 +77,9 @@ async def common_parameters():
 
 
 async def get_azure_credential() -> (
-    azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential
+    azure.identity.aio.AzureDeveloperCliCredential | azure.identity.aio.ManagedIdentityCredential
 ):
-    azure_credential: azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential
+    azure_credential: azure.identity.aio.AzureDeveloperCliCredential | azure.identity.aio.ManagedIdentityCredential
     try:
         if client_id := os.getenv("APP_IDENTITY_ID"):
             # Authenticate using a user-assigned managed identity on Azure
@@ -88,14 +88,14 @@ async def get_azure_credential() -> (
                 "Using managed identity for client ID %s",
                 client_id,
             )
-            azure_credential = azure.identity.ManagedIdentityCredential(client_id=client_id)
+            azure_credential = azure.identity.aio.ManagedIdentityCredential(client_id=client_id)
         else:
             if tenant_id := os.getenv("AZURE_TENANT_ID"):
                 logger.info("Authenticating to Azure using Azure Developer CLI Credential for tenant %s", tenant_id)
-                azure_credential = azure.identity.AzureDeveloperCliCredential(tenant_id=tenant_id, process_timeout=60)
+                azure_credential = azure.identity.aio.AzureDeveloperCliCredential(tenant_id=tenant_id)
             else:
                 logger.info("Authenticating to Azure using Azure Developer CLI Credential")
-                azure_credential = azure.identity.AzureDeveloperCliCredential(process_timeout=60)
+                azure_credential = azure.identity.aio.AzureDeveloperCliCredential()
         return azure_credential
     except Exception as e:
         logger.warning("Failed to authenticate to Azure: %s", e)
diff --git a/src/backend/fastapi_app/openai_clients.py b/src/backend/fastapi_app/openai_clients.py
index e5c0a703..9a8bde14 100644
--- a/src/backend/fastapi_app/openai_clients.py
+++ b/src/backend/fastapi_app/openai_clients.py
@@ -1,7 +1,6 @@
 import logging
 import os
 
-import azure.identity
 import azure.identity.aio
 import openai
 
@@ -9,7 +8,9 @@
 
 
 async def create_openai_chat_client(
-    azure_credential: azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential | None,
+    azure_credential: azure.identity.aio.AzureDeveloperCliCredential
+    | azure.identity.aio.ManagedIdentityCredential
+    | None,
 ) -> openai.AsyncOpenAI:
     openai_chat_client: openai.AsyncOpenAI
     OPENAI_CHAT_HOST = os.getenv("OPENAI_CHAT_HOST")
@@ -55,7 +56,9 @@ async def create_openai_chat_client(
 
 
 async def create_openai_embed_client(
-    azure_credential: azure.identity.AzureDeveloperCliCredential | azure.identity.ManagedIdentityCredential | None,
+    azure_credential: azure.identity.aio.AzureDeveloperCliCredential
+    | azure.identity.aio.ManagedIdentityCredential
+    | None,
 ) -> openai.AsyncOpenAI:
     openai_embed_client: openai.AsyncOpenAI
     OPENAI_EMBED_HOST = os.getenv("OPENAI_EMBED_HOST")
diff --git a/src/backend/fastapi_app/update_embeddings.py b/src/backend/fastapi_app/update_embeddings.py
index b36113b2..83744ead 100644
--- a/src/backend/fastapi_app/update_embeddings.py
+++ b/src/backend/fastapi_app/update_embeddings.py
@@ -29,8 +29,6 @@ async def update_embeddings(in_seed_data=False):
         embedding_column = os.getenv("AZURE_OPENAI_EMBEDDING_COLUMN", "embedding_3l")
     elif OPENAI_EMBED_HOST == "ollama":
         embedding_column = os.getenv("OLLAMA_EMBEDDING_COLUMN", "embedding_nomic")
-    elif OPENAI_EMBED_HOST == "github":
-        embedding_column = os.getenv("GITHUB_EMBEDDING_COLUMN", "embedding_3l")
     else:
         embedding_column = os.getenv("OPENAICOM_EMBEDDING_COLUMN", "embedding_3l")
     logger.info(f"Updating embeddings in column: {embedding_column}")
diff --git a/tests/conftest.py b/tests/conftest.py
index 409b9e07..6435789a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,6 +5,7 @@
 
 import openai
 import openai.resources
+import openai.resources.responses
 import pytest
 import pytest_asyncio
 from fastapi.testclient import TestClient
@@ -184,6 +185,8 @@ def __init__(self, answer: str):
                         delta=parts[0] + "<<",
                         item_id="msg-1",
                         output_index=0,
+                        logprobs=[],
+                        sequence_number=0,
                     )
                 )
                 self.events.append(
@@ -193,6 +196,8 @@ def __init__(self, answer: str):
                         delta=parts[1],
                         item_id="msg-1",
                         output_index=0,
+                        logprobs=[],
+                        sequence_number=1,
                     )
                 )
             else:
@@ -203,6 +208,8 @@ def __init__(self, answer: str):
                         delta=answer,
                         item_id="msg-1",
                         output_index=0,
+                        logprobs=[],
+                        sequence_number=0,
                     )
                 )
 
@@ -305,7 +312,7 @@ async def mock_acreate(*args, **kwargs):
 @pytest.fixture(scope="function")
 def mock_azure_credential(mock_session_env):
     """Mock the Azure credential for testing."""
-    with mock.patch("azure.identity.AzureDeveloperCliCredential") as mock_azure_credential:
+    with mock.patch("azure.identity.aio.AzureDeveloperCliCredential") as mock_azure_credential:
         mock_azure_credential.return_value = MockAzureCredential()
         yield mock_azure_credential
 

From 596153f5f697761ab78cfcb411187c351d5013cb Mon Sep 17 00:00:00 2001
From: Pamela Fox <pamela.fox@gmail.com>
Date: Sat, 11 Apr 2026 05:20:04 +0000
Subject: [PATCH 4/8] Port frontend to be more Responses like, remove unused
 session state and follow-up Qs

---
 src/backend/fastapi_app/api_models.py         |  22 +---
 src/backend/fastapi_app/rag_advanced.py       |  13 +-
 src/backend/fastapi_app/rag_simple.py         |  11 +-
 src/backend/fastapi_app/routes/api_routes.py  |   8 +-
 src/frontend/package-lock.json                | 119 +++---------------
 src/frontend/package.json                     |   8 +-
 src/frontend/src/api/models.ts                |  28 +++--
 src/frontend/src/components/Answer/Answer.tsx |  24 +---
 src/frontend/src/pages/chat/Chat.tsx          |  68 +++++-----
 tests/e2e.py                                  |   4 -
 .../advanced_chat_flow_response.json          |  21 ++--
 ...ced_chat_streaming_flow_response.jsonlines |   4 +-
 .../simple_chat_flow_response.json            |  12 +-
 ...le_chat_flow_message_history_response.json |  12 +-
 ...ple_chat_streaming_flow_response.jsonlines |   4 +-
 tests/test_api_routes.py                      |  10 +-
 16 files changed, 122 insertions(+), 246 deletions(-)

diff --git a/src/backend/fastapi_app/api_models.py b/src/backend/fastapi_app/api_models.py
index f75db35c..fefcbdf1 100644
--- a/src/backend/fastapi_app/api_models.py
+++ b/src/backend/fastapi_app/api_models.py
@@ -5,17 +5,6 @@
 from pydantic import BaseModel, Field
 
 
-class AIChatRoles(str, Enum):
-    USER = "user"
-    ASSISTANT = "assistant"
-    SYSTEM = "system"
-
-
-class Message(BaseModel):
-    content: str
-    role: AIChatRoles = AIChatRoles.USER
-
-
 class RetrievalMode(str, Enum):
     TEXT = "text"
     VECTORS = "vectors"
@@ -35,9 +24,8 @@ class ChatRequestContext(BaseModel):
 
 
 class ChatRequest(BaseModel):
-    messages: list[ResponseInputItemParam]
+    input: list[ResponseInputItemParam]
     context: ChatRequestContext
-    sessionState: Optional[Any] = None
 
 
 class ItemPublic(BaseModel):
@@ -69,7 +57,6 @@ class ThoughtStep(BaseModel):
 class RAGContext(BaseModel):
     data_points: dict[int, ItemPublic]
     thoughts: list[ThoughtStep]
-    followup_questions: Optional[list[str]] = None
 
 
 class ErrorResponse(BaseModel):
@@ -77,15 +64,14 @@ class ErrorResponse(BaseModel):
 
 
 class RetrievalResponse(BaseModel):
-    message: Message
+    output_text: str
     context: RAGContext
-    sessionState: Optional[Any] = None
 
 
 class RetrievalResponseDelta(BaseModel):
-    delta: Optional[Message] = None
+    type: str
+    delta: Optional[str] = None
     context: Optional[RAGContext] = None
-    sessionState: Optional[Any] = None
 
 
 class ChatParams(ChatRequestOverrides):
diff --git a/src/backend/fastapi_app/rag_advanced.py b/src/backend/fastapi_app/rag_advanced.py
index 7b894b24..501df89f 100644
--- a/src/backend/fastapi_app/rag_advanced.py
+++ b/src/backend/fastapi_app/rag_advanced.py
@@ -16,12 +16,10 @@
 from openai.types.responses import EasyInputMessageParam, ResponseInputItemParam, ResponseTextDeltaEvent
 
 from fastapi_app.api_models import (
-    AIChatRoles,
     BrandFilter,
     ChatRequestOverrides,
     Filter,
     ItemPublic,
-    Message,
     PriceFilter,
     RAGContext,
     RetrievalResponse,
@@ -124,7 +122,7 @@ async def prepare_context(self) -> tuple[list[ItemPublic], list[ThoughtStep]]:
         thoughts = [
             ThoughtStep(
                 title="Prompt to generate search arguments",
-                description=[{"content": self.query_prompt_template}]
+                description=[{"role": "system", "content": self.query_prompt_template}]
                 + ItemHelpers.input_to_new_input_list(run_results.input),
                 props=self.model_for_thoughts,
             ),
@@ -157,14 +155,14 @@ async def answer(
         )
 
         return RetrievalResponse(
-            message=Message(content=str(run_results.final_output), role=AIChatRoles.ASSISTANT),
+            output_text=str(run_results.final_output),
             context=RAGContext(
                 data_points={item.id: item for item in items},
                 thoughts=earlier_thoughts
                 + [
                     ThoughtStep(
                         title="Prompt to generate answer",
-                        description=[{"content": self.answer_prompt_template}]
+                        description=[{"role": "system", "content": self.answer_prompt_template}]
                         + ItemHelpers.input_to_new_input_list(run_results.input),
                         props=self.model_for_thoughts,
                     ),
@@ -184,13 +182,14 @@ async def answer_stream(
         )
 
         yield RetrievalResponseDelta(
+            type="response.context",
             context=RAGContext(
                 data_points={item.id: item for item in items},
                 thoughts=earlier_thoughts
                 + [
                     ThoughtStep(
                         title="Prompt to generate answer",
-                        description=[{"content": self.answer_prompt_template}]
+                        description=[{"role": "system", "content": self.answer_prompt_template}]
                         + ItemHelpers.input_to_new_input_list(run_results.input),
                         props=self.model_for_thoughts,
                     ),
@@ -200,5 +199,5 @@ async def answer_stream(
 
         async for event in run_results.stream_events():
             if event.type == "raw_response_event" and isinstance(event.data, ResponseTextDeltaEvent):
-                yield RetrievalResponseDelta(delta=Message(content=str(event.data.delta), role=AIChatRoles.ASSISTANT))
+                yield RetrievalResponseDelta(type="response.output_text.delta", delta=str(event.data.delta))
         return
diff --git a/src/backend/fastapi_app/rag_simple.py b/src/backend/fastapi_app/rag_simple.py
index 1bbbd12a..1b8fd86c 100644
--- a/src/backend/fastapi_app/rag_simple.py
+++ b/src/backend/fastapi_app/rag_simple.py
@@ -6,10 +6,8 @@
 from openai.types.responses import ResponseInputItemParam, ResponseTextDeltaEvent
 
 from fastapi_app.api_models import (
-    AIChatRoles,
     ChatRequestOverrides,
     ItemPublic,
-    Message,
     RAGContext,
     RetrievalResponse,
     RetrievalResponseDelta,
@@ -90,14 +88,14 @@ async def answer(
         )
 
         return RetrievalResponse(
-            message=Message(content=str(run_results.final_output), role=AIChatRoles.ASSISTANT),
+            output_text=str(run_results.final_output),
             context=RAGContext(
                 data_points={item.id: item for item in items},
                 thoughts=earlier_thoughts
                 + [
                     ThoughtStep(
                         title="Prompt to generate answer",
-                        description=[{"content": self.answer_prompt_template}]
+                        description=[{"role": "system", "content": self.answer_prompt_template}]
                         + ItemHelpers.input_to_new_input_list(run_results.input),
                         props=self.model_for_thoughts,
                     ),
@@ -117,13 +115,14 @@ async def answer_stream(
         )
 
         yield RetrievalResponseDelta(
+            type="response.context",
             context=RAGContext(
                 data_points={item.id: item for item in items},
                 thoughts=earlier_thoughts
                 + [
                     ThoughtStep(
                         title="Prompt to generate answer",
-                        description=[{"content": self.answer_agent.instructions}]
+                        description=[{"role": "system", "content": self.answer_agent.instructions}]
                         + ItemHelpers.input_to_new_input_list(run_results.input),
                         props=self.model_for_thoughts,
                     ),
@@ -133,5 +132,5 @@ async def answer_stream(
 
         async for event in run_results.stream_events():
             if event.type == "raw_response_event" and isinstance(event.data, ResponseTextDeltaEvent):
-                yield RetrievalResponseDelta(delta=Message(content=str(event.data.delta), role=AIChatRoles.ASSISTANT))
+                yield RetrievalResponseDelta(type="response.output_text.delta", delta=str(event.data.delta))
         return
diff --git a/src/backend/fastapi_app/routes/api_routes.py b/src/backend/fastapi_app/routes/api_routes.py
index f566886c..5821eaf7 100644
--- a/src/backend/fastapi_app/routes/api_routes.py
+++ b/src/backend/fastapi_app/routes/api_routes.py
@@ -121,7 +121,7 @@ async def chat_handler(
         rag_flow: Union[SimpleRAGChat, AdvancedRAGChat]
         if chat_request.context.overrides.use_advanced_flow:
             rag_flow = AdvancedRAGChat(
-                messages=chat_request.messages,
+                messages=chat_request.input,
                 overrides=chat_request.context.overrides,
                 searcher=searcher,
                 openai_chat_client=openai_chat.client,
@@ -130,7 +130,7 @@ async def chat_handler(
             )
         else:
             rag_flow = SimpleRAGChat(
-                messages=chat_request.messages,
+                messages=chat_request.input,
                 overrides=chat_request.context.overrides,
                 searcher=searcher,
                 openai_chat_client=openai_chat.client,
@@ -169,7 +169,7 @@ async def chat_stream_handler(
     rag_flow: Union[SimpleRAGChat, AdvancedRAGChat]
     if chat_request.context.overrides.use_advanced_flow:
         rag_flow = AdvancedRAGChat(
-            messages=chat_request.messages,
+            messages=chat_request.input,
             overrides=chat_request.context.overrides,
             searcher=searcher,
             openai_chat_client=openai_chat.client,
@@ -178,7 +178,7 @@ async def chat_stream_handler(
         )
     else:
         rag_flow = SimpleRAGChat(
-            messages=chat_request.messages,
+            messages=chat_request.input,
             overrides=chat_request.context.overrides,
             searcher=searcher,
             openai_chat_client=openai_chat.client,
diff --git a/src/frontend/package-lock.json b/src/frontend/package-lock.json
index 252ddabd..b157ccc9 100644
--- a/src/frontend/package-lock.json
+++ b/src/frontend/package-lock.json
@@ -13,10 +13,10 @@
         "@fluentui/react": "^8.112.5",
         "@fluentui/react-components": "^9.37.3",
         "@fluentui/react-icons": "^2.0.221",
-        "@microsoft/ai-chat-protocol": "1.0.0-beta.20240610.1",
         "@react-spring/web": "^9.7.3",
         "dompurify": "^3.2.4",
         "marked": "^9.1.6",
+        "ndjson-readablestream": "^1.4.0",
         "react": "^18.2.0",
         "react-dom": "^18.2.0",
         "react-router-dom": "^6.18.0",
@@ -2823,14 +2823,6 @@
         "@jridgewell/sourcemap-codec": "^1.4.14"
       }
     },
-    "node_modules/@microsoft/ai-chat-protocol": {
-      "version": "1.0.0-beta.20240610.1",
-      "resolved": "https://registry.npmjs.org/@microsoft/ai-chat-protocol/-/ai-chat-protocol-1.0.0-beta.20240610.1.tgz",
-      "integrity": "sha512-VGRt4DTCnoCKLqXs1H+3F9yeD8kTATktWxL4j2OUeOoqEiqWUiNm66qQMBzQJRv9Oi+vV9weQyZ6O6mHrf91HQ==",
-      "dependencies": {
-        "@typespec/ts-http-runtime": "^1.0.0-alpha.20240228.1"
-      }
-    },
     "node_modules/@microsoft/load-themed-styles": {
       "version": "1.10.295",
       "license": "MIT"
@@ -3323,19 +3315,6 @@
       "version": "2.0.7",
       "license": "MIT"
     },
-    "node_modules/@typespec/ts-http-runtime": {
-      "version": "1.0.0-alpha.20240610.1",
-      "resolved": "https://registry.npmjs.org/@typespec/ts-http-runtime/-/ts-http-runtime-1.0.0-alpha.20240610.1.tgz",
-      "integrity": "sha512-f1pHRnMpCZG1u7EucgZ00E9MpqI/HpZZ7FOu8oub/QH/9ki+5BtRbQfM17EDTi5w5JDWlp9Os+7fQVWLidozKQ==",
-      "dependencies": {
-        "http-proxy-agent": "^7.0.0",
-        "https-proxy-agent": "^7.0.0",
-        "tslib": "^2.6.2"
-      },
-      "engines": {
-        "node": ">=18.0.0"
-      }
-    },
     "node_modules/@vitejs/plugin-react": {
       "version": "4.3.4",
       "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.3.4.tgz",
@@ -3356,17 +3335,6 @@
         "vite": "^4.2.0 || ^5.0.0 || ^6.0.0"
       }
     },
-    "node_modules/agent-base": {
-      "version": "7.1.1",
-      "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz",
-      "integrity": "sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==",
-      "dependencies": {
-        "debug": "^4.3.4"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
     "node_modules/browserslist": {
       "version": "4.24.4",
       "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.24.4.tgz",
@@ -3466,6 +3434,7 @@
     },
     "node_modules/debug": {
       "version": "4.3.4",
+      "dev": true,
       "license": "MIT",
       "dependencies": {
         "ms": "2.1.2"
@@ -3658,30 +3627,6 @@
         "node": "*"
       }
     },
-    "node_modules/http-proxy-agent": {
-      "version": "7.0.2",
-      "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
-      "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==",
-      "dependencies": {
-        "agent-base": "^7.1.0",
-        "debug": "^4.3.4"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/https-proxy-agent": {
-      "version": "7.0.4",
-      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.4.tgz",
-      "integrity": "sha512-wlwpilI7YdjSkWaQ/7omYBMTliDcmCN8OLihO6I9B86g06lMyAoqgoDpV0XqoaPOKj+0DIdAvnsWfyAAhmimcg==",
-      "dependencies": {
-        "agent-base": "^7.0.2",
-        "debug": "4"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
     "node_modules/is-alphabetical": {
       "version": "1.0.4",
       "license": "MIT",
@@ -3807,6 +3752,7 @@
     },
     "node_modules/ms": {
       "version": "2.1.2",
+      "dev": true,
       "license": "MIT"
     },
     "node_modules/nanoid": {
@@ -3828,6 +3774,12 @@
         "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
       }
     },
+    "node_modules/ndjson-readablestream": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/ndjson-readablestream/-/ndjson-readablestream-1.4.0.tgz",
+      "integrity": "sha512-aufwPqwZzsS+NTekCJbHz8kJ6VlyeI8LD7owYSWm3cs4rx3CkY+J+DoD1af/ySLbc4ESJkESgJeMj1V4t3ZYPg==",
+      "license": "MIT"
+    },
     "node_modules/node-releases": {
       "version": "2.0.19",
       "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz",
@@ -6189,14 +6141,6 @@
         "@jridgewell/sourcemap-codec": "^1.4.14"
       }
     },
-    "@microsoft/ai-chat-protocol": {
-      "version": "1.0.0-beta.20240610.1",
-      "resolved": "https://registry.npmjs.org/@microsoft/ai-chat-protocol/-/ai-chat-protocol-1.0.0-beta.20240610.1.tgz",
-      "integrity": "sha512-VGRt4DTCnoCKLqXs1H+3F9yeD8kTATktWxL4j2OUeOoqEiqWUiNm66qQMBzQJRv9Oi+vV9weQyZ6O6mHrf91HQ==",
-      "requires": {
-        "@typespec/ts-http-runtime": "^1.0.0-alpha.20240228.1"
-      }
-    },
     "@microsoft/load-themed-styles": {
       "version": "1.10.295"
     },
@@ -6485,16 +6429,6 @@
     "@types/unist": {
       "version": "2.0.7"
     },
-    "@typespec/ts-http-runtime": {
-      "version": "1.0.0-alpha.20240610.1",
-      "resolved": "https://registry.npmjs.org/@typespec/ts-http-runtime/-/ts-http-runtime-1.0.0-alpha.20240610.1.tgz",
-      "integrity": "sha512-f1pHRnMpCZG1u7EucgZ00E9MpqI/HpZZ7FOu8oub/QH/9ki+5BtRbQfM17EDTi5w5JDWlp9Os+7fQVWLidozKQ==",
-      "requires": {
-        "http-proxy-agent": "^7.0.0",
-        "https-proxy-agent": "^7.0.0",
-        "tslib": "^2.6.2"
-      }
-    },
     "@vitejs/plugin-react": {
       "version": "4.3.4",
       "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.3.4.tgz",
@@ -6508,14 +6442,6 @@
         "react-refresh": "^0.14.2"
       }
     },
-    "agent-base": {
-      "version": "7.1.1",
-      "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz",
-      "integrity": "sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==",
-      "requires": {
-        "debug": "^4.3.4"
-      }
-    },
     "browserslist": {
       "version": "4.24.4",
       "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.24.4.tgz",
@@ -6557,6 +6483,7 @@
     },
     "debug": {
       "version": "4.3.4",
+      "dev": true,
       "requires": {
         "ms": "2.1.2"
       }
@@ -6680,24 +6607,6 @@
     "highlight.js": {
       "version": "10.7.3"
     },
-    "http-proxy-agent": {
-      "version": "7.0.2",
-      "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
-      "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==",
-      "requires": {
-        "agent-base": "^7.1.0",
-        "debug": "^4.3.4"
-      }
-    },
-    "https-proxy-agent": {
-      "version": "7.0.4",
-      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.4.tgz",
-      "integrity": "sha512-wlwpilI7YdjSkWaQ/7omYBMTliDcmCN8OLihO6I9B86g06lMyAoqgoDpV0XqoaPOKj+0DIdAvnsWfyAAhmimcg==",
-      "requires": {
-        "agent-base": "^7.0.2",
-        "debug": "4"
-      }
-    },
     "is-alphabetical": {
       "version": "1.0.4"
     },
@@ -6766,7 +6675,8 @@
       "integrity": "sha512-jcByLnIFkd5gSXZmjNvS1TlmRhCXZjIzHYlaGkPlLIekG55JDR2Z4va9tZwCiP+/RDERiNhMOFu01xd6O5ct1Q=="
     },
     "ms": {
-      "version": "2.1.2"
+      "version": "2.1.2",
+      "dev": true
     },
     "nanoid": {
       "version": "3.3.10",
@@ -6774,6 +6684,11 @@
       "integrity": "sha512-vSJJTG+t/dIKAUhUDw/dLdZ9s//5OxcHqLaDWWrW4Cdq7o6tdLIczUkMXt2MBNmk6sJRZBZRXVixs7URY1CmIg==",
       "dev": true
     },
+    "ndjson-readablestream": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/ndjson-readablestream/-/ndjson-readablestream-1.4.0.tgz",
+      "integrity": "sha512-aufwPqwZzsS+NTekCJbHz8kJ6VlyeI8LD7owYSWm3cs4rx3CkY+J+DoD1af/ySLbc4ESJkESgJeMj1V4t3ZYPg=="
+    },
     "node-releases": {
       "version": "2.0.19",
       "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz",
diff --git a/src/frontend/package.json b/src/frontend/package.json
index 5d8aecfd..bcab349c 100644
--- a/src/frontend/package.json
+++ b/src/frontend/package.json
@@ -12,18 +12,18 @@
     "preview": "vite preview"
   },
   "dependencies": {
-    "@azure/msal-react": "^2.0.6",
     "@azure/msal-browser": "^3.1.0",
+    "@azure/msal-react": "^2.0.6",
     "@fluentui/react": "^8.112.5",
     "@fluentui/react-components": "^9.37.3",
     "@fluentui/react-icons": "^2.0.221",
     "@react-spring/web": "^9.7.3",
-    "marked": "^9.1.6",
     "dompurify": "^3.2.4",
+    "marked": "^9.1.6",
+    "ndjson-readablestream": "^1.4.0",
     "react": "^18.2.0",
     "react-dom": "^18.2.0",
     "react-router-dom": "^6.18.0",
-    "@microsoft/ai-chat-protocol": "1.0.0-beta.20240610.1",
     "react-syntax-highlighter": "^15.5.0",
     "scheduler": "^0.20.2"
   },
@@ -31,10 +31,10 @@
     "@types/dompurify": "^3.0.4",
     "@types/react": "^18.2.34",
     "@types/react-dom": "^18.2.14",
+    "@types/react-syntax-highlighter": "^15.5.7",
     "@vitejs/plugin-react": "^4.3.4",
     "prettier": "^3.0.3",
     "typescript": "^5.2.2",
-    "@types/react-syntax-highlighter": "^15.5.7",
     "vite": "^6.3.4"
   }
 }
diff --git a/src/frontend/src/api/models.ts b/src/frontend/src/api/models.ts
index 4e9c3e26..e7e6efd4 100644
--- a/src/frontend/src/api/models.ts
+++ b/src/frontend/src/api/models.ts
@@ -1,5 +1,3 @@
-import { AIChatCompletion, AIChatCompletionDelta, AIChatCompletionOperationOptions } from "@microsoft/ai-chat-protocol";
-
 export const enum RetrievalMode {
     Hybrid = "hybrid",
     Vectors = "vectors",
@@ -18,9 +16,14 @@ export type ChatAppRequestContext = {
     overrides: ChatAppRequestOverrides;
 };
 
-export interface ChatAppRequestOptions extends AIChatCompletionOperationOptions {
-    context: ChatAppRequestContext
-}
+export type ChatAppRequestOptions = {
+    context: ChatAppRequestContext;
+};
+
+export type ChatAppRequest = {
+    input: { content: string; role: string }[];
+    context: ChatAppRequestContext;
+};
 
 export type Thoughts = {
     title: string;
@@ -30,14 +33,17 @@ export type Thoughts = {
 
 export type RAGContext = {
     data_points: { [key: string]: any };
-    followup_questions: string[] | null;
     thoughts: Thoughts[];
 };
 
-export interface RAGChatCompletion extends AIChatCompletion {
+export type RAGChatCompletion = {
+    output_text: string;
     context: RAGContext;
-}
+};
 
-export interface RAGChatCompletionDelta extends AIChatCompletionDelta {
-    context: RAGContext;
-}
+export type RAGChatCompletionDelta = {
+    type: string;
+    delta?: string;
+    context?: RAGContext;
+    error?: string;
+};
diff --git a/src/frontend/src/components/Answer/Answer.tsx b/src/frontend/src/components/Answer/Answer.tsx
index 01b8bd3f..163aa68e 100644
--- a/src/frontend/src/components/Answer/Answer.tsx
+++ b/src/frontend/src/components/Answer/Answer.tsx
@@ -15,8 +15,6 @@ interface Props {
     onCitationClicked: (filePath: string) => void;
     onThoughtProcessClicked: () => void;
     onSupportingContentClicked: () => void;
-    onFollowupQuestionClicked?: (question: string) => void;
-    showFollowupQuestions?: boolean;
 }
 
 export const Answer = ({
@@ -25,13 +23,10 @@ export const Answer = ({
     isStreaming,
     onCitationClicked,
     onThoughtProcessClicked,
-    onSupportingContentClicked,
-    onFollowupQuestionClicked,
-    showFollowupQuestions
+    onSupportingContentClicked
 }: Props) => {
     const [isReferencesCollapsed, setIsReferencesCollapsed] = useState(true);
-    const followupQuestions = answer.context.followup_questions;
-    const messageContent = answer.message.content;
+    const messageContent = answer.output_text;
     const parsedAnswer = useMemo(() => parseAnswerToHtml(messageContent, isStreaming, onCitationClicked), [answer]);
 
     const sanitizedAnswerHtml = DOMPurify.sanitize(parsedAnswer.answerHtml);
@@ -89,21 +84,6 @@ export const Answer = ({
                     )}
                 </Stack.Item>
             )}
-
-            {!!followupQuestions?.length && showFollowupQuestions && onFollowupQuestionClicked && (
-                <Stack.Item>
-                    <Stack horizontal wrap className={`${!!parsedAnswer.citations.length ? styles.followupQuestionsList : ""}`} tokens={{ childrenGap: 6 }}>
-                        <span className={styles.followupQuestionLearnMore}>Follow-up questions:</span>
-                        {followupQuestions.map((x, i) => {
-                            return (
-                                <a key={i} className={styles.followupQuestion} title={x} onClick={() => onFollowupQuestionClicked(x)}>
-                                    {`${x}`}
-                                </a>
-                            );
-                        })}
-                    </Stack>
-                </Stack.Item>
-            )}
         </Stack>
     );
 };
diff --git a/src/frontend/src/pages/chat/Chat.tsx b/src/frontend/src/pages/chat/Chat.tsx
index f583f012..9750967e 100644
--- a/src/frontend/src/pages/chat/Chat.tsx
+++ b/src/frontend/src/pages/chat/Chat.tsx
@@ -4,8 +4,8 @@ import { SparkleFilled } from "@fluentui/react-icons";
 
 import styles from "./Chat.module.css";
 
-import { RetrievalMode, RAGChatCompletion, RAGChatCompletionDelta, ChatAppRequestOptions } from "../../api";
-import { AIChatProtocolClient, AIChatMessage } from "@microsoft/ai-chat-protocol";
+import { RetrievalMode, RAGChatCompletion, RAGChatCompletionDelta, ChatAppRequest } from "../../api";
+import readNDJSONStream from "ndjson-readablestream";
 import { Answer, AnswerError, AnswerLoading } from "../../components/Answer";
 import { QuestionInput } from "../../components/QuestionInput";
 import { ExampleList } from "../../components/Example";
@@ -38,24 +38,22 @@ const Chat = () => {
     const [answers, setAnswers] = useState<[user: string, response: RAGChatCompletion][]>([]);
     const [streamedAnswers, setStreamedAnswers] = useState<[user: string, response: RAGChatCompletion][]>([]);
 
-    const handleAsyncRequest = async (question: string, answers: [string, RAGChatCompletion][], result: AsyncIterable<RAGChatCompletionDelta>) => {
+    const handleAsyncRequest = async (question: string, answers: [string, RAGChatCompletion][], responseBody: ReadableStream<Uint8Array>) => {
         let answer = "";
         let chatCompletion: RAGChatCompletion = {
             context: {
                 data_points: {},
-                followup_questions: null,
                 thoughts: []
             },
-            message: { content: "", role: "assistant" }
+            output_text: ""
         };
         const updateState = (newContent: string) => {
             return new Promise(resolve => {
                 setTimeout(() => {
                     answer += newContent;
-                    // We need to create a new object to trigger a re-render
                     const latestCompletion: RAGChatCompletion = {
                         ...chatCompletion,
-                        message: { content: answer, role: chatCompletion.message.role }
+                        output_text: answer
                     };
                     setStreamedAnswers([...answers, [question, latestCompletion]]);
                     resolve(null);
@@ -64,25 +62,21 @@ const Chat = () => {
         };
         try {
             setIsStreaming(true);
-            for await (const response of result) {
-                if (response.context) {
-                    chatCompletion.context = {
-                        ...chatCompletion.context,
-                        ...response.context
-                    };
-                }
-                if (response.delta && response.delta.role) {
-                    chatCompletion.message.role = response.delta.role;
+            for await (const event of readNDJSONStream<RAGChatCompletionDelta>(responseBody)) {
+                if (event.error) {
+                    throw new Error(event.error);
                 }
-                if (response.delta && response.delta.content) {
+                if (event.type === "response.context" && event.context) {
+                    chatCompletion.context = { ...chatCompletion.context, ...event.context };
+                } else if (event.type === "response.output_text.delta" && event.delta !== undefined) {
                     setIsLoading(false);
-                    await updateState(response.delta.content);
+                    await updateState(event.delta);
                 }
             }
         } finally {
             setIsStreaming(false);
         }
-        chatCompletion.message.content = answer;
+        chatCompletion.output_text = answer;
         return chatCompletion;
     };
     const makeApiRequest = async (question: string) => {
@@ -94,12 +88,13 @@ const Chat = () => {
         setActiveAnalysisPanelTab(undefined);
 
         try {
-            const messages: AIChatMessage[] = answers.flatMap(answer => [
+            const messages = answers.flatMap(answer => [
                 { content: answer[0], role: "user" },
-                { content: answer[1].message.content, role: "assistant" }
+                { content: answer[1].output_text, role: "assistant" }
             ]);
-            const allMessages: AIChatMessage[] = [...messages, { content: question, role: "user" }];
-            const options: ChatAppRequestOptions = {
+            const allMessages = [...messages, { content: question, role: "user" }];
+            const request: ChatAppRequest = {
+                input: allMessages,
                 context: {
                     overrides: {
                         use_advanced_flow: useAdvancedFlow,
@@ -108,16 +103,29 @@ const Chat = () => {
                         prompt_template: promptTemplate.length === 0 ? undefined : promptTemplate,
                         temperature: temperature
                     }
-                },
-                sessionState: answers.length ? answers[answers.length - 1][1].sessionState : null
+                }
             };
-            const chatClient: AIChatProtocolClient = new AIChatProtocolClient("/chat");
             if (shouldStream) {
-                const result = (await chatClient.getStreamedCompletion(allMessages, options)) as AsyncIterable<RAGChatCompletionDelta>;
-                const parsedResponse = await handleAsyncRequest(question, answers, result);
+                const response = await fetch("/chat/stream", {
+                    method: "POST",
+                    headers: { "Content-Type": "application/json" },
+                    body: JSON.stringify(request)
+                });
+                if (!response.ok || !response.body) {
+                    throw new Error(`Request failed with status ${response.status}`);
+                }
+                const parsedResponse = await handleAsyncRequest(question, answers, response.body);
                 setAnswers([...answers, [question, parsedResponse]]);
             } else {
-                const result = (await chatClient.getCompletion(allMessages, options)) as RAGChatCompletion;
+                const response = await fetch("/chat", {
+                    method: "POST",
+                    headers: { "Content-Type": "application/json" },
+                    body: JSON.stringify(request)
+                });
+                if (!response.ok) {
+                    throw new Error(`Request failed with status ${response.status}`);
+                }
+                const result: RAGChatCompletion = await response.json();
                 setAnswers([...answers, [question, result]]);
             }
         } catch (e) {
@@ -220,7 +228,6 @@ const Chat = () => {
                                                 onCitationClicked={c => onShowCitation(c, index)}
                                                 onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)}
                                                 onSupportingContentClicked={() => onToggleTab(AnalysisPanelTabs.SupportingContentTab, index)}
-                                                onFollowupQuestionClicked={q => makeApiRequest(q)}
                                             />
                                         </div>
                                     </div>
@@ -238,7 +245,6 @@ const Chat = () => {
                                                 onCitationClicked={c => onShowCitation(c, index)}
                                                 onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)}
                                                 onSupportingContentClicked={() => onToggleTab(AnalysisPanelTabs.SupportingContentTab, index)}
-                                                onFollowupQuestionClicked={q => makeApiRequest(q)}
                                             />
                                         </div>
                                     </div>
diff --git a/tests/e2e.py b/tests/e2e.py
index 56f6023d..5a019688 100644
--- a/tests/e2e.py
+++ b/tests/e2e.py
@@ -59,10 +59,6 @@ def test_home(page: Page, live_server_url: str):
 def test_chat(page: Page, live_server_url: str):
     # Set up a mock route to the /chat endpoint with streaming results
     def handle(route: Route):
-        # Assert that session_state is specified in the request (None for now)
-        if route.request.post_data_json:
-            session_state = route.request.post_data_json["sessionState"]
-            assert session_state is None
         # Read the JSONL from our snapshot results and return as the response
         f = open(
             "tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines"
diff --git a/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json b/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json
index 2535e647..1437bdb6 100644
--- a/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json
+++ b/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json
@@ -1,8 +1,5 @@
 {
-    "message": {
-        "content": "The capital of France is Paris. [Benefit_Options-2.pdf].",
-        "role": "assistant"
-    },
+    "output_text": "The capital of France is Paris. [Benefit_Options-2.pdf].",
     "context": {
         "data_points": {
             "1": {
@@ -19,6 +16,7 @@
                 "title": "Prompt to generate search arguments",
                 "description": [
                     {
+                        "role": "system",
                         "content": "Your job is to find search results based off the user's question and past messages.\nYou have access to only these tools:\n1. **search_database**: This tool allows you to search a table for items based on a query.\n  You can pass in a search query and optional filters.\nOnce you get the search results, you're done.\n"
                     },
                     {
@@ -26,14 +24,14 @@
                         "content": "good options for climbing gear that can be used outside?"
                     },
                     {
-                        "id": "madeup",
+                        "id": "fc_madeup1",
                         "call_id": "call_abc123",
                         "name": "search_database",
                         "arguments": "{\"search_query\":\"climbing gear outside\"}",
                         "type": "function_call"
                     },
                     {
-                        "id": "madeupoutput",
+                        "id": "fc_madeupoutput1",
                         "call_id": "call_abc123",
                         "output": "Search results for climbing gear that can be used outside: ...",
                         "type": "function_call_output"
@@ -43,14 +41,14 @@
                         "content": "are there any shoes less than $50?"
                     },
                     {
-                        "id": "madeup",
+                        "id": "fc_madeup2",
                         "call_id": "call_abc456",
                         "name": "search_database",
                         "arguments": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}",
                         "type": "function_call"
                     },
                     {
-                        "id": "madeupoutput",
+                        "id": "fc_madeupoutput2",
                         "call_id": "call_abc456",
                         "output": "Search results for shoes cheaper than 50: ...",
                         "type": "function_call_output"
@@ -93,6 +91,7 @@
                 "title": "Prompt to generate answer",
                 "description": [
                     {
+                        "role": "system",
                         "content": "Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."
                     },
                     {
@@ -105,8 +104,6 @@
                     "deployment": "gpt-5.4"
                 }
             }
-        ],
-        "followup_questions": null
-    },
-    "sessionState": null
+        ]
+    }
 }
\ No newline at end of file
diff --git a/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines b/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
index 7a8760f6..22d88584 100644
--- a/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
+++ b/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
@@ -1,2 +1,2 @@
-{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Prompt to generate search arguments","description":[{"content":"Your job is to find search results based off the user's question and past messages.\nYou have access to only these tools:\n1. **search_database**: This tool allows you to search a table for items based on a query.\n  You can pass in a search query and optional filters.\nOnce you get the search results, you're done.\n"},{"role":"user","content":"good options for climbing gear that can be used outside?"},{"id":"madeup","call_id":"call_abc123","name":"search_database","arguments":"{\"search_query\":\"climbing gear outside\"}","type":"function_call"},{"id":"madeupoutput","call_id":"call_abc123","output":"Search results for climbing gear that can be used outside: ...","type":"function_call_output"},{"role":"user","content":"are there any shoes less than $50?"},{"id":"madeup","call_id":"call_abc456","name":"search_database","arguments":"{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}","type":"function_call"},{"id":"madeupoutput","call_id":"call_abc456","output":"Search results for shoes cheaper than 50: ...","type":"function_call_output"},{"role":"user","content":"Find search results for user query: What is the capital of France?"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}},{"title":"Search using generated search arguments","description":"climbing gear outside","props":{"top":1,"vector_search":true,"text_search":true,"filters":[]}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}}],"followup_questions":null},"sessionState":null}
-{"delta":{"content":"The capital of France is Paris. [Benefit_Options-2.pdf].","role":"assistant"},"context":null,"sessionState":null}
+{"type":"response.context","delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Prompt to generate search arguments","description":[{"role":"system","content":"Your job is to find search results based off the user's question and past messages.\nYou have access to only these tools:\n1. **search_database**: This tool allows you to search a table for items based on a query.\n  You can pass in a search query and optional filters.\nOnce you get the search results, you're done.\n"},{"role":"user","content":"good options for climbing gear that can be used outside?"},{"id":"fc_madeup1","call_id":"call_abc123","name":"search_database","arguments":"{\"search_query\":\"climbing gear outside\"}","type":"function_call"},{"id":"fc_madeupoutput1","call_id":"call_abc123","output":"Search results for climbing gear that can be used outside: ...","type":"function_call_output"},{"role":"user","content":"are there any shoes less than $50?"},{"id":"fc_madeup2","call_id":"call_abc456","name":"search_database","arguments":"{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}","type":"function_call"},{"id":"fc_madeupoutput2","call_id":"call_abc456","output":"Search results for shoes cheaper than 50: ...","type":"function_call_output"},{"role":"user","content":"Find search results for user query: What is the capital of France?"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}},{"title":"Search using generated search arguments","description":"climbing gear outside","props":{"top":1,"vector_search":true,"text_search":true,"filters":[]}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"role":"system","content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}}]}}
+{"error": "'AsyncResponseEventIterator' object has no attribute 'close'"}
diff --git a/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json b/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json
index 2059b570..76177c90 100644
--- a/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json
+++ b/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json
@@ -1,8 +1,5 @@
 {
-    "message": {
-        "content": "The capital of France is Paris. [Benefit_Options-2.pdf].",
-        "role": "assistant"
-    },
+    "output_text": "The capital of France is Paris. [Benefit_Options-2.pdf].",
     "context": {
         "data_points": {
             "1": {
@@ -42,6 +39,7 @@
                 "title": "Prompt to generate answer",
                 "description": [
                     {
+                        "role": "system",
                         "content": "Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."
                     },
                     {
@@ -54,8 +52,6 @@
                     "deployment": "gpt-5.4"
                 }
             }
-        ],
-        "followup_questions": null
-    },
-    "sessionState": null
+        ]
+    }
 }
\ No newline at end of file
diff --git a/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json b/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json
index 71e2efb8..de49c7c4 100644
--- a/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json
+++ b/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json
@@ -1,8 +1,5 @@
 {
-    "message": {
-        "content": "The capital of France is Paris. [Benefit_Options-2.pdf].",
-        "role": "assistant"
-    },
+    "output_text": "The capital of France is Paris. [Benefit_Options-2.pdf].",
     "context": {
         "data_points": {
             "1": {
@@ -42,6 +39,7 @@
                 "title": "Prompt to generate answer",
                 "description": [
                     {
+                        "role": "system",
                         "content": "Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."
                     },
                     {
@@ -62,8 +60,6 @@
                     "deployment": "gpt-5.4"
                 }
             }
-        ],
-        "followup_questions": null
-    },
-    "sessionState": null
+        ]
+    }
 }
\ No newline at end of file
diff --git a/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines b/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines
index 14a4d1b2..6924538c 100644
--- a/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines
+++ b/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines
@@ -1,2 +1,2 @@
-{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Search query for database","description":"What is the capital of France?","props":{"top":1,"vector_search":true,"text_search":true}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}}],"followup_questions":null},"sessionState":null}
-{"delta":{"content":"The capital of France is Paris. [Benefit_Options-2.pdf].","role":"assistant"},"context":null,"sessionState":null}
+{"type":"response.context","delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Search query for database","description":"What is the capital of France?","props":{"top":1,"vector_search":true,"text_search":true}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"role":"system","content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}}]}}
+{"error": "'AsyncResponseEventIterator' object has no attribute 'close'"}
diff --git a/tests/test_api_routes.py b/tests/test_api_routes.py
index 55da4d6f..3349b984 100644
--- a/tests/test_api_routes.py
+++ b/tests/test_api_routes.py
@@ -115,7 +115,7 @@ async def test_simple_chat_flow(test_client, snapshot):
             "context": {
                 "overrides": {"top": 1, "use_advanced_flow": False, "retrieval_mode": "hybrid", "temperature": 0.3}
             },
-            "messages": [{"content": "What is the capital of France?", "role": "user"}],
+            "input": [{"content": "What is the capital of France?", "role": "user"}],
         },
     )
     response_data = response.json()
@@ -134,7 +134,7 @@ async def test_simple_chat_flow_message_history(test_client, snapshot):
             "context": {
                 "overrides": {"top": 1, "use_advanced_flow": False, "retrieval_mode": "hybrid", "temperature": 0.3}
             },
-            "messages": [
+            "input": [
                 {"content": "What is the capital of France?", "role": "user"},
                 {"content": "The capital of France is Paris.", "role": "assistant"},
                 {"content": "What is the capital of France?", "role": "user"},
@@ -157,7 +157,7 @@ async def test_simple_chat_streaming_flow(test_client, snapshot):
             "context": {
                 "overrides": {"top": 1, "use_advanced_flow": False, "retrieval_mode": "hybrid", "temperature": 0.3}
             },
-            "messages": [{"content": "What is the capital of France?", "role": "user"}],
+            "input": [{"content": "What is the capital of France?", "role": "user"}],
         },
     )
     response_data = response.content
@@ -175,7 +175,7 @@ async def test_advanced_chat_flow(test_client, snapshot):
             "context": {
                 "overrides": {"top": 1, "use_advanced_flow": True, "retrieval_mode": "hybrid", "temperature": 0.3}
             },
-            "messages": [{"content": "What is the capital of France?", "role": "user"}],
+            "input": [{"content": "What is the capital of France?", "role": "user"}],
         },
     )
     response_data = response.json()
@@ -194,7 +194,7 @@ async def test_advanced_chat_streaming_flow(test_client, snapshot):
             "context": {
                 "overrides": {"top": 1, "use_advanced_flow": True, "retrieval_mode": "hybrid", "temperature": 0.3}
             },
-            "messages": [{"content": "What is the capital of France?", "role": "user"}],
+            "input": [{"content": "What is the capital of France?", "role": "user"}],
         },
     )
     response_data = response.content

From 46c371be4228edd8872353cf15fe23ad3879875f Mon Sep 17 00:00:00 2001
From: Pamela Fox <pamela.fox@gmail.com>
Date: Sat, 11 Apr 2026 05:30:27 +0000
Subject: [PATCH 5/8] Hopefully fix the CI and tests

---
 .github/workflows/app-tests.yaml              |  3 ++
 tests/conftest.py                             | 33 +++++++++++++++++++
 ...ced_chat_streaming_flow_response.jsonlines |  2 +-
 ...ple_chat_streaming_flow_response.jsonlines |  2 +-
 4 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/app-tests.yaml b/.github/workflows/app-tests.yaml
index acf59b6d..0dbdbe1b 100644
--- a/.github/workflows/app-tests.yaml
+++ b/.github/workflows/app-tests.yaml
@@ -48,6 +48,9 @@ jobs:
             brew install postgresql@14
             brew link --overwrite postgresql@14
             brew install pgvector
+            # pgvector installs into the default PostgreSQL, copy extension files to postgresql@14
+            cp /opt/homebrew/share/postgresql/extension/vector* /opt/homebrew/share/postgresql@14/extension/
+            cp /opt/homebrew/lib/postgresql/vector.dylib /opt/homebrew/lib/postgresql@14/vector.dylib 2>/dev/null || true
             brew services start postgresql@14 && sleep 1
             createuser -s ${{ env.POSTGRES_USERNAME }}
             psql -d postgres -c "ALTER USER ${{ env.POSTGRES_USERNAME }} WITH PASSWORD '${{ env.POSTGRES_PASSWORD }}'"
diff --git a/tests/conftest.py b/tests/conftest.py
index 6435789a..4294c7cc 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -13,6 +13,7 @@
 from openai.types.create_embedding_response import Usage
 from openai.types.responses import (
     Response,
+    ResponseCompletedEvent,
     ResponseFunctionToolCall,
     ResponseOutputMessage,
     ResponseOutputText,
@@ -212,6 +213,32 @@ def __init__(self, answer: str):
                         sequence_number=0,
                     )
                 )
+            # Agents SDK requires a ResponseCompletedEvent to finalize the stream
+            self.events.append(
+                ResponseCompletedEvent(
+                    type="response.completed",
+                    sequence_number=len(self.events),
+                    response=Response(
+                        id="resp-test-stream",
+                        created_at=0,
+                        model="gpt-5.4",
+                        object="response",
+                        output=[
+                            ResponseOutputMessage(
+                                id="msg-1",
+                                type="message",
+                                role="assistant",
+                                status="completed",
+                                content=[ResponseOutputText(type="output_text", text=answer, annotations=[])],
+                            )
+                        ],
+                        tool_choice="auto",
+                        tools=[],
+                        status="completed",
+                        parallel_tool_calls=True,
+                    ),
+                )
+            )
 
         async def __aenter__(self):
             return self
@@ -219,6 +246,12 @@ async def __aenter__(self):
         async def __aexit__(self, exc_type, exc_val, exc_tb):
             return None
 
+        async def close(self):
+            pass
+
+        async def parse(self):
+            return self
+
         def __aiter__(self):
             return self
 
diff --git a/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines b/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
index 22d88584..9ee6166a 100644
--- a/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
+++ b/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
@@ -1,2 +1,2 @@
 {"type":"response.context","delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Prompt to generate search arguments","description":[{"role":"system","content":"Your job is to find search results based off the user's question and past messages.\nYou have access to only these tools:\n1. **search_database**: This tool allows you to search a table for items based on a query.\n  You can pass in a search query and optional filters.\nOnce you get the search results, you're done.\n"},{"role":"user","content":"good options for climbing gear that can be used outside?"},{"id":"fc_madeup1","call_id":"call_abc123","name":"search_database","arguments":"{\"search_query\":\"climbing gear outside\"}","type":"function_call"},{"id":"fc_madeupoutput1","call_id":"call_abc123","output":"Search results for climbing gear that can be used outside: ...","type":"function_call_output"},{"role":"user","content":"are there any shoes less than $50?"},{"id":"fc_madeup2","call_id":"call_abc456","name":"search_database","arguments":"{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}","type":"function_call"},{"id":"fc_madeupoutput2","call_id":"call_abc456","output":"Search results for shoes cheaper than 50: ...","type":"function_call_output"},{"role":"user","content":"Find search results for user query: What is the capital of France?"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}},{"title":"Search using generated search arguments","description":"climbing gear outside","props":{"top":1,"vector_search":true,"text_search":true,"filters":[]}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"role":"system","content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}}]}}
-{"error": "'AsyncResponseEventIterator' object has no attribute 'close'"}
+{"type":"response.output_text.delta","delta":"The capital of France is Paris. [Benefit_Options-2.pdf].","context":null}
diff --git a/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines b/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines
index 6924538c..57fdd33b 100644
--- a/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines
+++ b/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines
@@ -1,2 +1,2 @@
 {"type":"response.context","delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Search query for database","description":"What is the capital of France?","props":{"top":1,"vector_search":true,"text_search":true}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"role":"system","content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}}]}}
-{"error": "'AsyncResponseEventIterator' object has no attribute 'close'"}
+{"type":"response.output_text.delta","delta":"The capital of France is Paris. [Benefit_Options-2.pdf].","context":null}

From 36d54c69f3bf25c6b3d58f6530894da3b9630a23 Mon Sep 17 00:00:00 2001
From: Pamela Fox <pamela.fox@gmail.com>
Date: Sat, 11 Apr 2026 05:39:13 +0000
Subject: [PATCH 6/8] Use async cred for pg in prod

---
 src/backend/fastapi_app/postgres_engine.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/backend/fastapi_app/postgres_engine.py b/src/backend/fastapi_app/postgres_engine.py
index 8de2324b..3e4028f9 100644
--- a/src/backend/fastapi_app/postgres_engine.py
+++ b/src/backend/fastapi_app/postgres_engine.py
@@ -1,3 +1,4 @@
+import asyncio
 import logging
 import os
 
@@ -13,8 +14,8 @@
 
 
 async def create_postgres_engine(*, host, username, database, password, sslmode, azure_credential) -> AsyncEngine:
-    def get_password_from_azure_credential():
-        token = azure_credential.get_token("https://ossrdbms-aad.database.windows.net/.default")
+    async def get_password_from_azure_credential():
+        token = await azure_credential.get_token("https://ossrdbms-aad.database.windows.net/.default")
         return token.token
 
     token_based_password = False
@@ -23,7 +24,7 @@ def get_password_from_azure_credential():
         logger.info("Authenticating to Azure Database for PostgreSQL using Azure Identity...")
         if azure_credential is None:
             raise ValueError("Azure credential must be provided for Azure Database for PostgreSQL")
-        password = get_password_from_azure_credential()
+        password = await get_password_from_azure_credential()
     else:
         logger.info("Authenticating to PostgreSQL using password...")
 
@@ -46,7 +47,8 @@ def register_custom_types(dbapi_connection: AdaptedConnection, *args):
     def update_password_token(dialect, conn_rec, cargs, cparams):
         if token_based_password:
             logger.info("Updating password token for Azure Database for PostgreSQL")
-            cparams["password"] = get_password_from_azure_credential()
+            loop = asyncio.get_event_loop()
+            cparams["password"] = loop.run_until_complete(get_password_from_azure_credential())
 
     return engine
 

From 670b83e532ca76d903fcd247be0833636c3564dc Mon Sep 17 00:00:00 2001
From: Pamela Fox <pamela.fox@gmail.com>
Date: Sat, 11 Apr 2026 05:45:47 +0000
Subject: [PATCH 7/8] Fix CI and mypy

---
 .github/workflows/app-tests.yaml | 16 +++++-----------
 tests/conftest.py                |  2 +-
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/app-tests.yaml b/.github/workflows/app-tests.yaml
index 0dbdbe1b..00ea63d9 100644
--- a/.github/workflows/app-tests.yaml
+++ b/.github/workflows/app-tests.yaml
@@ -27,11 +27,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: ["ubuntu-latest", "macos-latest-xlarge", "macos-13", "windows-latest"]
+        os: ["ubuntu-latest", "macos-26", "windows-latest"]
         python_version: ["3.10", "3.11", "3.12"]
-        exclude:
-          - os: macos-latest-xlarge
-            python_version: "3.10"
     env:
       UV_SYSTEM_PYTHON: 1
       POSTGRES_HOST: localhost
@@ -43,15 +40,12 @@ jobs:
         - uses: actions/checkout@v4
 
         - name: (MacOS) Install postgreSQL and pgvector using brew
-          if: matrix.os == 'macos-13' || matrix.os == 'macos-latest-xlarge'
+          if: matrix.os == 'macos-26'
           run: |
-            brew install postgresql@14
-            brew link --overwrite postgresql@14
+            brew install postgresql@18
+            brew link --overwrite postgresql@18
             brew install pgvector
-            # pgvector installs into the default PostgreSQL, copy extension files to postgresql@14
-            cp /opt/homebrew/share/postgresql/extension/vector* /opt/homebrew/share/postgresql@14/extension/
-            cp /opt/homebrew/lib/postgresql/vector.dylib /opt/homebrew/lib/postgresql@14/vector.dylib 2>/dev/null || true
-            brew services start postgresql@14 && sleep 1
+            brew services start postgresql@18 && sleep 1
             createuser -s ${{ env.POSTGRES_USERNAME }}
             psql -d postgres -c "ALTER USER ${{ env.POSTGRES_USERNAME }} WITH PASSWORD '${{ env.POSTGRES_PASSWORD }}'"
             psql -d postgres -c 'CREATE EXTENSION vector'
diff --git a/tests/conftest.py b/tests/conftest.py
index 4294c7cc..44006855 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -175,7 +175,7 @@ async def mock_acreate(*args, **kwargs):
 def mock_openai_chatcompletion(monkeypatch_session):
     class AsyncResponseEventIterator:
         def __init__(self, answer: str):
-            self.events = []
+            self.events: list = []
             # Split at << to simulate chunked responses
             if answer.find("<<") > -1:
                 parts = answer.split("<<")

From 6914ac807065bd766508bd21268649520cc4e449 Mon Sep 17 00:00:00 2001
From: Pamela Fox <pamela.fox@gmail.com>
Date: Sat, 11 Apr 2026 05:48:30 +0000
Subject: [PATCH 8/8] Exclude 3.10 from macos arm64, they dont work

---
 .github/workflows/app-tests.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/app-tests.yaml b/.github/workflows/app-tests.yaml
index 00ea63d9..9b39414b 100644
--- a/.github/workflows/app-tests.yaml
+++ b/.github/workflows/app-tests.yaml
@@ -29,6 +29,9 @@ jobs:
       matrix:
         os: ["ubuntu-latest", "macos-26", "windows-latest"]
         python_version: ["3.10", "3.11", "3.12"]
+        exclude:
+          - os: macos-26
+            python_version: "3.10"
     env:
       UV_SYSTEM_PYTHON: 1
       POSTGRES_HOST: localhost