diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 1e40ff4b..c82eca85 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -4,5 +4,5 @@ FROM mcr.microsoft.com/devcontainers/${IMAGE}
ENV PYTHONUNBUFFERED 1
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
- && apt-get -y install --no-install-recommends postgresql-client \
- && apt-get clean -y && rm -rf /var/lib/apt/lists/*
\ No newline at end of file
+ && apt-get -y install --no-install-recommends postgresql-client zstd \
+ && apt-get clean -y && rm -rf /var/lib/apt/lists/*
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 4c9a9b6e..006422d3 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -29,16 +29,13 @@
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance",
- "ms-python.vscode-python-envs",
"charliermarsh.ruff",
"mtxr.sqltools",
"mtxr.sqltools-driver-pg",
"esbenp.prettier-vscode",
"mechatroner.rainbow-csv",
"ms-vscode.vscode-node-azure-pack",
- "esbenp.prettier-vscode",
"twixes.pypi-assistant",
- "ms-python.vscode-python-envs",
"teamsdevapp.vscode-ai-foundry",
"ms-windows-ai-studio.windows-ai-studio"
],
diff --git a/.env.sample b/.env.sample
index 44517fd1..6fee2d2f 100644
--- a/.env.sample
+++ b/.env.sample
@@ -5,16 +5,15 @@ POSTGRES_PASSWORD=postgres
POSTGRES_DATABASE=postgres
POSTGRES_SSL=disable
-# OPENAI_CHAT_HOST can be either azure, openai, ollama, or github:
+# OPENAI_CHAT_HOST can be either azure, openai, or ollama:
OPENAI_CHAT_HOST=azure
-# OPENAI_EMBED_HOST can be either azure, openai, ollama, or github:
+# OPENAI_EMBED_HOST can be either azure, openai, or ollama:
OPENAI_EMBED_HOST=azure
# Needed for Azure:
# You also need to `azd auth login` if running this locally
AZURE_OPENAI_ENDPOINT=https://YOUR-AZURE-OPENAI-SERVICE-NAME.openai.azure.com
-AZURE_OPENAI_VERSION=2024-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-4o-mini
-AZURE_OPENAI_CHAT_MODEL=gpt-4o-mini
+AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-5.4
+AZURE_OPENAI_CHAT_MODEL=gpt-5.4
AZURE_OPENAI_EMBED_DEPLOYMENT=text-embedding-3-large
AZURE_OPENAI_EMBED_MODEL=text-embedding-3-large
AZURE_OPENAI_EMBED_DIMENSIONS=1024
@@ -35,9 +34,3 @@ OLLAMA_ENDPOINT=http://host.docker.internal:11434/v1
OLLAMA_CHAT_MODEL=llama3.1
OLLAMA_EMBED_MODEL=nomic-embed-text
OLLAMA_EMBEDDING_COLUMN=embedding_nomic
-# Needed for GitHub Models:
-GITHUB_TOKEN=YOUR-GITHUB-TOKEN
-GITHUB_MODEL=openai/gpt-4o
-GITHUB_EMBED_MODEL=openai/text-embedding-3-large
-GITHUB_EMBED_DIMENSIONS=1024
-GITHUB_EMBEDDING_COLUMN=embedding_3l
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
index ecef0bfa..df531f79 100644
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -11,7 +11,7 @@ RAG on PostgreSQL is a Python FastAPI backend with React TypeScript frontend tha
Install the following tools before beginning development:
- **Python 3.10+** (3.12 recommended)
-- **Node.js 18+** for frontend development
+- **Node.js 18+** for frontend development
- **PostgreSQL 14+** with pgvector extension
- **Azure Developer CLI (azd)** for deployment
- **Docker Desktop** for dev containers (optional)
@@ -37,7 +37,7 @@ Run these commands in sequence. NEVER CANCEL any long-running commands:
```bash
# Ubuntu/Debian:
sudo apt update && sudo apt install -y postgresql-16-pgvector
-
+
# Start PostgreSQL and set password
sudo service postgresql start
sudo -u postgres psql -c "ALTER USER postgres PASSWORD 'postgres'"
@@ -97,7 +97,7 @@ Use "Frontend & Backend" configuration in the VS Code Run & Debug menu.
### Linting and Formatting (ALWAYS run before committing)
```bash
ruff check . # Lint code (takes <1 second)
-ruff format . # Format code (takes <1 second)
+ruff format . # Format code (takes <1 second)
mypy . --python-version 3.12 # Type check (takes ~42 seconds)
```
@@ -121,7 +121,7 @@ pytest tests/e2e.py --tracing=retain-on-failure
**CRITICAL TIMING INFORMATION** - Set these timeout values and NEVER CANCEL:
- **Dependencies install**: 90 seconds (use 180+ second timeout)
-- **Frontend npm install**: 22 seconds (use 60+ second timeout)
+- **Frontend npm install**: 22 seconds (use 60+ second timeout)
- **Frontend build**: 12 seconds (use 30+ second timeout)
- **MyPy type checking**: 42 seconds (use 90+ second timeout)
- **Full test suite**: 25 seconds (use 60+ second timeout)
@@ -156,7 +156,7 @@ pytest tests/e2e.py --tracing=retain-on-failure
# Test API endpoints
curl http://localhost:8000/items/1
# Should return JSON with item data
-
+
# Test frontend
curl http://localhost:8000/ | head -n 5
# Should return HTML with "RAG on PostgreSQL" title
@@ -226,9 +226,6 @@ The application supports multiple OpenAI providers:
3. **Ollama** (local):
Set `OPENAI_CHAT_HOST=ollama`
-4. **GitHub Models**:
- Set `OPENAI_CHAT_HOST=github`
-
## Common Issues and Solutions
### Database Connection Issues
@@ -267,7 +264,7 @@ Open `http://localhost:8089/` and point to your running application.
The application provides these REST API endpoints (view full docs at `http://localhost:8000/docs`):
- `GET /items/{id}` - Get specific item by ID
-- `GET /search` - Search items with text query
+- `GET /search` - Search items with text query
- `GET /similar` - Find similar items using vector search
- `POST /chat` - Chat with RAG system (requires OpenAI configuration)
- `POST /chat/stream` - Streaming chat responses
@@ -286,7 +283,7 @@ curl "http://localhost:8000/search?query=tent&limit=5"
**Quick ls -la output for repository root:**
```
.devcontainer/ # Dev container configuration
-.env.sample # Environment variables template
+.env.sample # Environment variables template
.github/ # GitHub Actions workflows
.gitignore # Git ignore patterns
.pre-commit-config.yaml # Pre-commit hook configuration
@@ -309,8 +306,8 @@ tests/ # Test suite
- **Always build and test locally before committing**
- **Use pre-commit hooks** - they run ruff automatically
- **Check the GitHub Actions** in `.github/workflows/` for CI requirements
-- **Reference the full README.md** for deployment and Azure-specific details
+- **Reference the full README.md** for deployment and Azure-specific details
- **Use VS Code with the Python and Ruff extensions** for the best development experience
- **Never skip the frontend build** - the backend serves static files from `src/backend/static/`
-This project follows modern Python and TypeScript development practices with comprehensive tooling for code quality, testing, and deployment.
\ No newline at end of file
+This project follows modern Python and TypeScript development practices with comprehensive tooling for code quality, testing, and deployment.
diff --git a/.github/workflows/app-tests.yaml b/.github/workflows/app-tests.yaml
index acf59b6d..9b39414b 100644
--- a/.github/workflows/app-tests.yaml
+++ b/.github/workflows/app-tests.yaml
@@ -27,10 +27,10 @@ jobs:
strategy:
fail-fast: false
matrix:
- os: ["ubuntu-latest", "macos-latest-xlarge", "macos-13", "windows-latest"]
+ os: ["ubuntu-latest", "macos-26", "windows-latest"]
python_version: ["3.10", "3.11", "3.12"]
exclude:
- - os: macos-latest-xlarge
+ - os: macos-26
python_version: "3.10"
env:
UV_SYSTEM_PYTHON: 1
@@ -43,12 +43,12 @@ jobs:
- uses: actions/checkout@v4
- name: (MacOS) Install postgreSQL and pgvector using brew
- if: matrix.os == 'macos-13' || matrix.os == 'macos-latest-xlarge'
+ if: matrix.os == 'macos-26'
run: |
- brew install postgresql@14
- brew link --overwrite postgresql@14
+ brew install postgresql@18
+ brew link --overwrite postgresql@18
brew install pgvector
- brew services start postgresql@14 && sleep 1
+ brew services start postgresql@18 && sleep 1
createuser -s ${{ env.POSTGRES_USERNAME }}
psql -d postgres -c "ALTER USER ${{ env.POSTGRES_USERNAME }} WITH PASSWORD '${{ env.POSTGRES_PASSWORD }}'"
psql -d postgres -c 'CREATE EXTENSION vector'
diff --git a/.github/workflows/evaluate.yaml b/.github/workflows/evaluate.yaml
index b5c77c4b..0a35af5f 100644
--- a/.github/workflows/evaluate.yaml
+++ b/.github/workflows/evaluate.yaml
@@ -34,7 +34,6 @@ jobs:
OPENAI_CHAT_HOST: ${{ vars.OPENAI_CHAT_HOST }}
OPENAI_EMBED_HOST: ${{ vars.OPENAI_EMBED_HOST }}
AZURE_OPENAI_ENDPOINT: ${{ vars.AZURE_OPENAI_ENDPOINT }}
- AZURE_OPENAI_VERSION: ${{ vars.AZURE_OPENAI_VERSION }}
AZURE_OPENAI_CHAT_DEPLOYMENT: ${{ vars.AZURE_OPENAI_CHAT_DEPLOYMENT }}
AZURE_OPENAI_CHAT_MODEL: ${{ vars.AZURE_OPENAI_CHAT_MODEL }}
AZURE_OPENAI_EMBED_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EMBED_DEPLOYMENT }}
diff --git a/.vscode/settings.json b/.vscode/settings.json
index c9eb00cc..4d91d2fb 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -36,5 +36,6 @@
"htmlcov": true,
".mypy_cache": true,
".coverage": true
- }
+ },
+ "python-envs.defaultEnvManager": "ms-python.python:system"
}
diff --git a/AGENTS.md b/AGENTS.md
index ae38cbf5..b41d2779 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -8,3 +8,19 @@ When adding new azd environment variables, update:
1. infra/main.parameters.json : Add the new parameter with a Bicep-friendly variable name and map to the new environment variable
1. infra/main.bicep: Add the new Bicep parameter at the top, and add it to the `webAppEnv` object
1. .github/workflows/azure-dev.yml: Add the new environment variable under `env` section. If it's a @secure variable in main.bicep, it should come from `secrets`, otherwise from `vars`.
+
+## Upgrading Python packages
+
+1. Update the version constraint in src/backend/pyproject.toml
+
+2. Re-compile src/backend/requirements.txt from the src folder:
+
+ ```shell
+ uv pip compile pyproject.toml -o requirements.txt --python-version 3.10
+ ```
+
+3. Reinstall with:
+
+ ```shell
+ python -m pip install -r src/backend/requirements.txt
+ ```
diff --git a/azure.yaml b/azure.yaml
index 38c99b96..8a91e712 100644
--- a/azure.yaml
+++ b/azure.yaml
@@ -42,7 +42,6 @@ pipeline:
- OPENAI_CHAT_HOST
- OPENAI_EMBED_HOST
- AZURE_OPENAI_ENDPOINT
- - AZURE_OPENAI_VERSION
- AZURE_OPENAI_CHAT_DEPLOYMENT
- AZURE_OPENAI_CHAT_MODEL
- AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION
diff --git a/evals/eval_config.json b/evals/eval_config.json
index fe50ef52..65e084c7 100644
--- a/evals/eval_config.json
+++ b/evals/eval_config.json
@@ -8,8 +8,7 @@
"use_advanced_flow": true,
"top": 3,
"retrieval_mode": "hybrid",
- "temperature": 0.3,
- "seed": 42
+ "temperature": 0.3
}
},
"target_response_answer_jmespath": "message.content",
diff --git a/evals/evaluate.py b/evals/evaluate.py
index efb7f8f2..240d8741 100644
--- a/evals/evaluate.py
+++ b/evals/evaluate.py
@@ -68,8 +68,6 @@ def get_openai_config() -> dict:
openai_config["model"] = os.environ["AZURE_OPENAI_EVAL_MODEL"]
elif os.environ.get("OPENAI_CHAT_HOST") == "ollama":
raise NotImplementedError("Ollama is not supported. Switch to Azure or OpenAI.com")
- elif os.environ.get("OPENAI_CHAT_HOST") == "github":
- raise NotImplementedError("GitHub Models is not supported. Switch to Azure or OpenAI.com")
else:
logger.info("Using OpenAI Service with API Key from OPENAICOM_KEY")
openai_config = {"api_key": os.environ["OPENAICOM_KEY"], "model": "gpt-4"}
diff --git a/evals/generate_ground_truth.py b/evals/generate_ground_truth.py
index 44410506..c1541621 100644
--- a/evals/generate_ground_truth.py
+++ b/evals/generate_ground_truth.py
@@ -3,12 +3,10 @@
import os
from collections.abc import Generator
from pathlib import Path
-from typing import Union
from azure.identity import AzureDeveloperCliCredential, get_bearer_token_provider
from dotenv_azd import load_azd_env
-from openai import AzureOpenAI, OpenAI
-from openai.types.chat import ChatCompletionToolParam
+from openai import OpenAI
from sqlalchemy import create_engine, select
from sqlalchemy.orm import Session
@@ -17,32 +15,30 @@
logger = logging.getLogger("ragapp")
-def qa_pairs_tool(num_questions: int = 1) -> ChatCompletionToolParam:
+def qa_pairs_tool(num_questions: int = 1) -> dict:
return {
"type": "function",
- "function": {
- "name": "qa_pairs",
- "description": "Send in question and answer pairs for a customer-facing chat app",
- "parameters": {
- "type": "object",
- "properties": {
- "qa_list": {
- "type": "array",
- "description": f"List of {num_questions} question and answer pairs",
- "items": {
- "type": "object",
- "properties": {
- "question": {"type": "string", "description": "The question text"},
- "answer": {"type": "string", "description": "The answer text"},
- },
- "required": ["question", "answer"],
+ "name": "qa_pairs",
+ "description": "Send in question and answer pairs for a customer-facing chat app",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "qa_list": {
+ "type": "array",
+ "description": f"List of {num_questions} question and answer pairs",
+ "items": {
+ "type": "object",
+ "properties": {
+ "question": {"type": "string", "description": "The question text"},
+ "answer": {"type": "string", "description": "The answer text"},
},
- "minItems": num_questions,
- "maxItems": num_questions,
- }
- },
- "required": ["qa_list"],
+ "required": ["question", "answer"],
+ },
+ "minItems": num_questions,
+ "maxItems": num_questions,
+ }
},
+ "required": ["qa_list"],
},
}
@@ -67,7 +63,6 @@ def source_retriever() -> Generator[str, None, None]:
# for record in records:
# logger.info(f"Processing database record: {record.name}")
# yield f"## Product ID: [{record.id}]\n" + record.to_str_for_rag()
- # await self.openai_chat_client.chat.completions.create(
def source_to_text(source) -> str:
@@ -78,32 +73,29 @@ def answer_formatter(answer, source) -> str:
return f"{answer} [{source['id']}]"
-def get_openai_client() -> tuple[Union[AzureOpenAI, OpenAI], str]:
+def get_openai_client() -> tuple[OpenAI, str]:
"""Return an OpenAI client based on the environment variables"""
- openai_client: Union[AzureOpenAI, OpenAI]
+ openai_client: OpenAI
OPENAI_CHAT_HOST = os.getenv("OPENAI_CHAT_HOST")
if OPENAI_CHAT_HOST == "azure":
+ azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
if api_key := os.getenv("AZURE_OPENAI_KEY"):
logger.info("Using Azure OpenAI Service with API Key from AZURE_OPENAI_KEY")
- openai_client = AzureOpenAI(
- api_version=os.environ["AZURE_OPENAI_VERSION"],
- azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+ openai_client = OpenAI(
+ base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/",
api_key=api_key,
)
else:
logger.info("Using Azure OpenAI Service with Azure Developer CLI Credential")
azure_credential = AzureDeveloperCliCredential(process_timeout=60, tenant_id=os.environ["AZURE_TENANT_ID"])
token_provider = get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
- openai_client = AzureOpenAI(
- api_version=os.environ["AZURE_OPENAI_VERSION"],
- azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
- azure_ad_token_provider=token_provider,
+ openai_client = OpenAI(
+ base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/",
+ api_key=token_provider,
)
model = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"]
elif OPENAI_CHAT_HOST == "ollama":
raise NotImplementedError("Ollama is not supported. Switch to Azure or OpenAI.com")
- elif OPENAI_CHAT_HOST == "github":
- raise NotImplementedError("GitHub Models is not supported. Switch to Azure or OpenAI.com")
else:
logger.info("Using OpenAI Service with API Key from OPENAICOM_KEY")
openai_client = OpenAI(api_key=os.environ["OPENAICOM_KEY"])
@@ -123,18 +115,21 @@ def generate_ground_truth_data(num_questions_total: int, num_questions_per_sourc
if len(qa) > num_questions_total:
logger.info("Generated enough questions already, stopping")
break
- result = openai_client.chat.completions.create(
+ result = openai_client.responses.create(
model=model,
- messages=[
+ input=[
{"role": "system", "content": generate_prompt},
{"role": "user", "content": json.dumps(source)},
],
- tools=[qa_pairs_tool(num_questions=2)],
+ tools=[qa_pairs_tool(num_questions=2)], # type: ignore[list-item]
+ max_output_tokens=1000,
+ store=False,
)
- if not result.choices[0].message.tool_calls:
+ tool_calls = [item for item in result.output if item.type == "function_call"]
+ if not tool_calls:
logger.warning("No tool calls found in response, skipping")
continue
- qa_pairs = json.loads(result.choices[0].message.tool_calls[0].function.arguments)["qa_list"]
+ qa_pairs = json.loads(tool_calls[0].arguments)["qa_list"]
qa_pairs = [{"question": qa_pair["question"], "truth": qa_pair["answer"]} for qa_pair in qa_pairs]
qa.extend(qa_pairs)
diff --git a/infra/main.bicep b/infra/main.bicep
index b6e5d9a2..ea800759 100644
--- a/infra/main.bicep
+++ b/infra/main.bicep
@@ -131,7 +131,6 @@ param openAIEmbedHost string = 'azure'
@secure()
param openAIComKey string = ''
-param azureOpenAIAPIVersion string = '2024-03-01-preview'
@secure()
param azureOpenAIKey string = ''
@@ -385,10 +384,6 @@ var webAppEnv = union(azureOpenAIKeyEnv, openAIComKeyEnv, [
name: 'AZURE_OPENAI_ENDPOINT'
value: !empty(azureOpenAIEndpoint) ? azureOpenAIEndpoint : (deployAzureOpenAI ? openAI.outputs.endpoint : '')
}
- {
- name: 'AZURE_OPENAI_VERSION'
- value: openAIChatHost == 'azure' ? azureOpenAIAPIVersion : ''
- }
])
module web 'web.bicep' = {
@@ -613,7 +608,6 @@ output AZURE_OPENAI_RESOURCE_GROUP string = deployAzureOpenAI ? openAIResourceGr
output AZURE_OPENAI_ENDPOINT string = !empty(azureOpenAIEndpoint)
? azureOpenAIEndpoint
: (deployAzureOpenAI ? openAI.outputs.endpoint : '')
-output AZURE_OPENAI_VERSION string = azureOpenAIAPIVersion
output AZURE_OPENAI_CHAT_DEPLOYMENT string = deployAzureOpenAI ? chatDeploymentName : ''
output AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION string = deployAzureOpenAI ? chatDeploymentVersion : ''
output AZURE_OPENAI_CHAT_DEPLOYMENT_CAPACITY int = deployAzureOpenAI ? chatDeploymentCapacity : 0
diff --git a/infra/main.parameters.json b/infra/main.parameters.json
index 3e16a351..85ac1817 100644
--- a/infra/main.parameters.json
+++ b/infra/main.parameters.json
@@ -33,13 +33,13 @@
"value": "${OPENAI_CHAT_HOST=azure}"
},
"chatModelName": {
- "value": "${AZURE_OPENAI_CHAT_MODEL=gpt-4o-mini}"
+ "value": "${AZURE_OPENAI_CHAT_MODEL=gpt-5.4}"
},
"chatDeploymentName": {
- "value": "${AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-4o-mini}"
+ "value": "${AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-5.4}"
},
"chatDeploymentVersion":{
- "value": "${AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION=2024-07-18}"
+ "value": "${AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION=2026-03-05}"
},
"chatDeploymentSku": {
"value": "${AZURE_OPENAI_CHAT_DEPLOYMENT_SKU=GlobalStandard}"
diff --git a/src/backend/fastapi_app/__init__.py b/src/backend/fastapi_app/__init__.py
index b760fdb2..cf04614e 100644
--- a/src/backend/fastapi_app/__init__.py
+++ b/src/backend/fastapi_app/__init__.py
@@ -2,12 +2,12 @@
import os
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
-from typing import TypedDict, Union
+from typing import TypedDict
import fastapi
from azure.monitor.opentelemetry import configure_azure_monitor
from dotenv import load_dotenv
-from openai import AsyncAzureOpenAI, AsyncOpenAI
+from openai import AsyncOpenAI
from opentelemetry.instrumentation.openai import OpenAIInstrumentor
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
@@ -27,8 +27,8 @@
class State(TypedDict):
sessionmaker: async_sessionmaker[AsyncSession]
context: FastAPIAppContext
- chat_client: Union[AsyncOpenAI, AsyncAzureOpenAI]
- embed_client: Union[AsyncOpenAI, AsyncAzureOpenAI]
+ chat_client: AsyncOpenAI
+ embed_client: AsyncOpenAI
@asynccontextmanager
diff --git a/src/backend/fastapi_app/api_models.py b/src/backend/fastapi_app/api_models.py
index 06d14a6b..fefcbdf1 100644
--- a/src/backend/fastapi_app/api_models.py
+++ b/src/backend/fastapi_app/api_models.py
@@ -5,17 +5,6 @@
from pydantic import BaseModel, Field
-class AIChatRoles(str, Enum):
- USER = "user"
- ASSISTANT = "assistant"
- SYSTEM = "system"
-
-
-class Message(BaseModel):
- content: str
- role: AIChatRoles = AIChatRoles.USER
-
-
class RetrievalMode(str, Enum):
TEXT = "text"
VECTORS = "vectors"
@@ -28,7 +17,6 @@ class ChatRequestOverrides(BaseModel):
retrieval_mode: RetrievalMode = RetrievalMode.HYBRID
use_advanced_flow: bool = True
prompt_template: Optional[str] = None
- seed: Optional[int] = None
class ChatRequestContext(BaseModel):
@@ -36,9 +24,8 @@ class ChatRequestContext(BaseModel):
class ChatRequest(BaseModel):
- messages: list[ResponseInputItemParam]
+ input: list[ResponseInputItemParam]
context: ChatRequestContext
- sessionState: Optional[Any] = None
class ItemPublic(BaseModel):
@@ -70,7 +57,6 @@ class ThoughtStep(BaseModel):
class RAGContext(BaseModel):
data_points: dict[int, ItemPublic]
thoughts: list[ThoughtStep]
- followup_questions: Optional[list[str]] = None
class ErrorResponse(BaseModel):
@@ -78,15 +64,14 @@ class ErrorResponse(BaseModel):
class RetrievalResponse(BaseModel):
- message: Message
+ output_text: str
context: RAGContext
- sessionState: Optional[Any] = None
class RetrievalResponseDelta(BaseModel):
- delta: Optional[Message] = None
+ type: str
+ delta: Optional[str] = None
context: Optional[RAGContext] = None
- sessionState: Optional[Any] = None
class ChatParams(ChatRequestOverrides):
diff --git a/src/backend/fastapi_app/dependencies.py b/src/backend/fastapi_app/dependencies.py
index 2715819e..fdd70632 100644
--- a/src/backend/fastapi_app/dependencies.py
+++ b/src/backend/fastapi_app/dependencies.py
@@ -1,11 +1,11 @@
import logging
import os
from collections.abc import AsyncGenerator
-from typing import Annotated, Optional, Union
+from typing import Annotated, Optional
-import azure.identity
+import azure.identity.aio
from fastapi import Depends, Request
-from openai import AsyncAzureOpenAI, AsyncOpenAI
+from openai import AsyncOpenAI
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker
@@ -17,7 +17,7 @@ class OpenAIClient(BaseModel):
OpenAI client
"""
- client: Union[AsyncOpenAI, AsyncAzureOpenAI]
+ client: AsyncOpenAI
model_config = {"arbitrary_types_allowed": True}
@@ -51,26 +51,18 @@ async def common_parameters():
openai_embed_model = os.getenv("OLLAMA_EMBED_MODEL") or "nomic-embed-text"
openai_embed_dimensions = None
embedding_column = os.getenv("OLLAMA_EMBEDDING_COLUMN") or "embedding_nomic"
- elif OPENAI_EMBED_HOST == "github":
- openai_embed_deployment = None
- openai_embed_model = os.getenv("GITHUB_EMBED_MODEL") or "openai/text-embedding-3-large"
- openai_embed_dimensions = int(os.getenv("GITHUB_EMBED_DIMENSIONS", 1024))
- embedding_column = os.getenv("GITHUB_EMBEDDING_COLUMN") or "embedding_3l"
else:
openai_embed_deployment = None
openai_embed_model = os.getenv("OPENAICOM_EMBED_MODEL") or "text-embedding-3-large"
openai_embed_dimensions = int(os.getenv("OPENAICOM_EMBED_DIMENSIONS", 1024))
embedding_column = os.getenv("OPENAICOM_EMBEDDING_COLUMN") or "embedding_3l"
if OPENAI_CHAT_HOST == "azure":
- openai_chat_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT") or "gpt-4o-mini"
- openai_chat_model = os.getenv("AZURE_OPENAI_CHAT_MODEL") or "gpt-4o-mini"
+ openai_chat_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT") or "gpt-5.4"
+ openai_chat_model = os.getenv("AZURE_OPENAI_CHAT_MODEL") or "gpt-5.4"
elif OPENAI_CHAT_HOST == "ollama":
openai_chat_deployment = None
openai_chat_model = os.getenv("OLLAMA_CHAT_MODEL") or "phi3:3.8b"
openai_embed_model = os.getenv("OLLAMA_EMBED_MODEL") or "nomic-embed-text"
- elif OPENAI_CHAT_HOST == "github":
- openai_chat_deployment = None
- openai_chat_model = os.getenv("GITHUB_MODEL") or "openai/gpt-4o"
else:
openai_chat_deployment = None
openai_chat_model = os.getenv("OPENAICOM_CHAT_MODEL") or "gpt-3.5-turbo"
@@ -84,10 +76,10 @@ async def common_parameters():
)
-async def get_azure_credential() -> Union[
- azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential
-]:
- azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential]
+async def get_azure_credential() -> (
+ azure.identity.aio.AzureDeveloperCliCredential | azure.identity.aio.ManagedIdentityCredential
+):
+ azure_credential: azure.identity.aio.AzureDeveloperCliCredential | azure.identity.aio.ManagedIdentityCredential
try:
if client_id := os.getenv("APP_IDENTITY_ID"):
# Authenticate using a user-assigned managed identity on Azure
@@ -96,14 +88,14 @@ async def get_azure_credential() -> Union[
"Using managed identity for client ID %s",
client_id,
)
- azure_credential = azure.identity.ManagedIdentityCredential(client_id=client_id)
+ azure_credential = azure.identity.aio.ManagedIdentityCredential(client_id=client_id)
else:
if tenant_id := os.getenv("AZURE_TENANT_ID"):
logger.info("Authenticating to Azure using Azure Developer CLI Credential for tenant %s", tenant_id)
- azure_credential = azure.identity.AzureDeveloperCliCredential(tenant_id=tenant_id, process_timeout=60)
+ azure_credential = azure.identity.aio.AzureDeveloperCliCredential(tenant_id=tenant_id)
else:
logger.info("Authenticating to Azure using Azure Developer CLI Credential")
- azure_credential = azure.identity.AzureDeveloperCliCredential(process_timeout=60)
+ azure_credential = azure.identity.aio.AzureDeveloperCliCredential()
return azure_credential
except Exception as e:
logger.warning("Failed to authenticate to Azure: %s", e)
diff --git a/src/backend/fastapi_app/embeddings.py b/src/backend/fastapi_app/embeddings.py
index 0dccec3e..1a0581b7 100644
--- a/src/backend/fastapi_app/embeddings.py
+++ b/src/backend/fastapi_app/embeddings.py
@@ -1,11 +1,11 @@
-from typing import Optional, TypedDict, Union
+from typing import Optional, TypedDict
-from openai import AsyncAzureOpenAI, AsyncOpenAI
+from openai import AsyncOpenAI
async def compute_text_embedding(
q: str,
- openai_client: Union[AsyncOpenAI, AsyncAzureOpenAI],
+ openai_client: AsyncOpenAI,
embed_model: str,
embed_deployment: Optional[str] = None,
embedding_dimensions: Optional[int] = None,
diff --git a/src/backend/fastapi_app/openai_clients.py b/src/backend/fastapi_app/openai_clients.py
index b704dc9d..9a8bde14 100644
--- a/src/backend/fastapi_app/openai_clients.py
+++ b/src/backend/fastapi_app/openai_clients.py
@@ -1,79 +1,68 @@
import logging
import os
-from typing import Union
-import azure.identity
+import azure.identity.aio
import openai
logger = logging.getLogger("ragapp")
async def create_openai_chat_client(
- azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential, None],
-) -> Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]:
- openai_chat_client: Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]
+ azure_credential: azure.identity.aio.AzureDeveloperCliCredential
+ | azure.identity.aio.ManagedIdentityCredential
+ | None,
+) -> openai.AsyncOpenAI:
+ openai_chat_client: openai.AsyncOpenAI
OPENAI_CHAT_HOST = os.getenv("OPENAI_CHAT_HOST")
if OPENAI_CHAT_HOST == "azure":
- api_version = os.environ["AZURE_OPENAI_VERSION"] or "2024-10-21"
azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
azure_deployment = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"]
if api_key := os.getenv("AZURE_OPENAI_KEY"):
logger.info(
- "Setting up Azure OpenAI client for chat completions using API key, endpoint %s, deployment %s",
+ "Setting up Azure OpenAI client for chat using API key, endpoint %s, deployment %s",
azure_endpoint,
azure_deployment,
)
- openai_chat_client = openai.AsyncAzureOpenAI(
- api_version=api_version,
- azure_endpoint=azure_endpoint,
- azure_deployment=azure_deployment,
+ openai_chat_client = openai.AsyncOpenAI(
+ base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/",
api_key=api_key,
)
elif azure_credential:
logger.info(
- "Setting up Azure OpenAI client for chat completions using Azure Identity, endpoint %s, deployment %s",
+ "Setting up Azure OpenAI client for chat using Azure Identity, endpoint %s, deployment %s",
azure_endpoint,
azure_deployment,
)
- token_provider = azure.identity.get_bearer_token_provider(
+ token_provider = azure.identity.aio.get_bearer_token_provider(
azure_credential, "https://cognitiveservices.azure.com/.default"
)
- openai_chat_client = openai.AsyncAzureOpenAI(
- api_version=api_version,
- azure_endpoint=azure_endpoint,
- azure_deployment=azure_deployment,
- azure_ad_token_provider=token_provider,
+ openai_chat_client = openai.AsyncOpenAI(
+ base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/",
+ api_key=token_provider,
)
else:
raise ValueError("Azure OpenAI client requires either an API key or Azure Identity credential.")
elif OPENAI_CHAT_HOST == "ollama":
- logger.info("Setting up OpenAI client for chat completions using Ollama")
+ logger.info("Setting up OpenAI client for chat using Ollama")
openai_chat_client = openai.AsyncOpenAI(
base_url=os.getenv("OLLAMA_ENDPOINT"),
api_key="nokeyneeded",
)
- elif OPENAI_CHAT_HOST == "github":
- logger.info("Setting up OpenAI client for chat completions using GitHub Models")
- github_model = os.getenv("GITHUB_MODEL", "openai/gpt-4o")
- logger.info(f"Using GitHub Models with model: {github_model}")
- openai_chat_client = openai.AsyncOpenAI(
- base_url="https://models.github.ai/inference",
- api_key=os.getenv("GITHUB_TOKEN"),
- )
else:
- logger.info("Setting up OpenAI client for chat completions using OpenAI.com API key")
+ logger.info("Setting up OpenAI client for chat using OpenAI.com API key")
openai_chat_client = openai.AsyncOpenAI(api_key=os.getenv("OPENAICOM_KEY"))
return openai_chat_client
async def create_openai_embed_client(
- azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential, None],
-) -> Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]:
- openai_embed_client: Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]
+ azure_credential: azure.identity.aio.AzureDeveloperCliCredential
+ | azure.identity.aio.ManagedIdentityCredential
+ | None,
+) -> openai.AsyncOpenAI:
+ openai_embed_client: openai.AsyncOpenAI
OPENAI_EMBED_HOST = os.getenv("OPENAI_EMBED_HOST")
if OPENAI_EMBED_HOST == "azure":
- api_version = os.environ["AZURE_OPENAI_VERSION"] or "2024-03-01-preview"
azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
azure_deployment = os.environ["AZURE_OPENAI_EMBED_DEPLOYMENT"]
if api_key := os.getenv("AZURE_OPENAI_KEY"):
@@ -82,10 +71,8 @@ async def create_openai_embed_client(
azure_endpoint,
azure_deployment,
)
- openai_embed_client = openai.AsyncAzureOpenAI(
- api_version=api_version,
- azure_endpoint=azure_endpoint,
- azure_deployment=azure_deployment,
+ openai_embed_client = openai.AsyncOpenAI(
+ base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/",
api_key=api_key,
)
elif azure_credential:
@@ -94,14 +81,12 @@ async def create_openai_embed_client(
azure_endpoint,
azure_deployment,
)
- token_provider = azure.identity.get_bearer_token_provider(
+ token_provider = azure.identity.aio.get_bearer_token_provider(
azure_credential, "https://cognitiveservices.azure.com/.default"
)
- openai_embed_client = openai.AsyncAzureOpenAI(
- api_version=api_version,
- azure_endpoint=azure_endpoint,
- azure_deployment=azure_deployment,
- azure_ad_token_provider=token_provider,
+ openai_embed_client = openai.AsyncOpenAI(
+ base_url=f"{azure_endpoint.rstrip('/')}/openai/v1/",
+ api_key=token_provider,
)
else:
raise ValueError("Azure OpenAI client requires either an API key or Azure Identity credential.")
@@ -111,14 +96,6 @@ async def create_openai_embed_client(
base_url=os.getenv("OLLAMA_ENDPOINT"),
api_key="nokeyneeded",
)
- elif OPENAI_EMBED_HOST == "github":
- logger.info("Setting up OpenAI client for embeddings using GitHub Models")
- github_embed_model = os.getenv("GITHUB_EMBED_MODEL", "openai/text-embedding-3-small")
- logger.info(f"Using GitHub Models with embedding model: {github_embed_model}")
- openai_embed_client = openai.AsyncOpenAI(
- base_url="https://models.github.ai/inference",
- api_key=os.getenv("GITHUB_TOKEN"),
- )
else:
logger.info("Setting up OpenAI client for embeddings using OpenAI.com API key")
openai_embed_client = openai.AsyncOpenAI(api_key=os.getenv("OPENAICOM_KEY"))
diff --git a/src/backend/fastapi_app/postgres_engine.py b/src/backend/fastapi_app/postgres_engine.py
index 8de2324b..3e4028f9 100644
--- a/src/backend/fastapi_app/postgres_engine.py
+++ b/src/backend/fastapi_app/postgres_engine.py
@@ -1,3 +1,4 @@
+import asyncio
import logging
import os
@@ -13,8 +14,8 @@
async def create_postgres_engine(*, host, username, database, password, sslmode, azure_credential) -> AsyncEngine:
- def get_password_from_azure_credential():
- token = azure_credential.get_token("https://ossrdbms-aad.database.windows.net/.default")
+ async def get_password_from_azure_credential():
+ token = await azure_credential.get_token("https://ossrdbms-aad.database.windows.net/.default")
return token.token
token_based_password = False
@@ -23,7 +24,7 @@ def get_password_from_azure_credential():
logger.info("Authenticating to Azure Database for PostgreSQL using Azure Identity...")
if azure_credential is None:
raise ValueError("Azure credential must be provided for Azure Database for PostgreSQL")
- password = get_password_from_azure_credential()
+ password = await get_password_from_azure_credential()
else:
logger.info("Authenticating to PostgreSQL using password...")
@@ -46,7 +47,8 @@ def register_custom_types(dbapi_connection: AdaptedConnection, *args):
def update_password_token(dialect, conn_rec, cargs, cparams):
if token_based_password:
logger.info("Updating password token for Azure Database for PostgreSQL")
- cparams["password"] = get_password_from_azure_credential()
+ loop = asyncio.get_event_loop()
+ cparams["password"] = loop.run_until_complete(get_password_from_azure_credential())
return engine
diff --git a/src/backend/fastapi_app/prompts/query_fewshots.json b/src/backend/fastapi_app/prompts/query_fewshots.json
index 0ef450fd..4fe3ae10 100644
--- a/src/backend/fastapi_app/prompts/query_fewshots.json
+++ b/src/backend/fastapi_app/prompts/query_fewshots.json
@@ -4,14 +4,14 @@
"content": "good options for climbing gear that can be used outside?"
},
{
- "id": "madeup",
+ "id": "fc_madeup1",
"call_id": "call_abc123",
"name": "search_database",
"arguments": "{\"search_query\":\"climbing gear outside\"}",
"type": "function_call"
},
{
- "id": "madeupoutput",
+ "id": "fc_madeupoutput1",
"call_id": "call_abc123",
"output": "Search results for climbing gear that can be used outside: ...",
"type": "function_call_output"
@@ -21,14 +21,14 @@
"content": "are there any shoes less than $50?"
},
{
- "id": "madeup",
+ "id": "fc_madeup2",
"call_id": "call_abc456",
"name": "search_database",
"arguments": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}",
"type": "function_call"
},
{
- "id": "madeupoutput",
+ "id": "fc_madeupoutput2",
"call_id": "call_abc456",
"output": "Search results for shoes cheaper than 50: ...",
"type": "function_call_output"
diff --git a/src/backend/fastapi_app/query_rewriter.py b/src/backend/fastapi_app/query_rewriter.py
index aa0ad466..122c0ed9 100644
--- a/src/backend/fastapi_app/query_rewriter.py
+++ b/src/backend/fastapi_app/query_rewriter.py
@@ -1,73 +1,62 @@
import json
-from openai.types.chat import (
- ChatCompletion,
- ChatCompletionToolParam,
-)
+from openai.types.responses import Response, ResponseFunctionToolCall
-def build_search_function() -> list[ChatCompletionToolParam]:
- return [
- {
- "type": "function",
- "function": {
- "name": "search_database",
- "description": "Search PostgreSQL database for relevant products based on user query",
- "parameters": {
+def build_search_function() -> dict:
+ return {
+ "type": "function",
+ "name": "search_database",
+ "description": "Search PostgreSQL database for relevant products based on user query",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "search_query": {
+ "type": "string",
+ "description": "Query string to use for full text search, e.g. 'red shoes'",
+ },
+ "price_filter": {
"type": "object",
+ "description": "Filter search results based on price of the product",
"properties": {
- "search_query": {
+ "comparison_operator": {
"type": "string",
- "description": "Query string to use for full text search, e.g. 'red shoes'",
+ "description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='", # noqa
+ },
+ "value": {
+ "type": "number",
+ "description": "Value to compare against, e.g. 30",
},
- "price_filter": {
- "type": "object",
- "description": "Filter search results based on price of the product",
- "properties": {
- "comparison_operator": {
- "type": "string",
- "description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='", # noqa
- },
- "value": {
- "type": "number",
- "description": "Value to compare against, e.g. 30",
- },
- },
+ },
+ },
+ "brand_filter": {
+ "type": "object",
+ "description": "Filter search results based on brand of the product",
+ "properties": {
+ "comparison_operator": {
+ "type": "string",
+ "description": "Operator to compare the column value, either '=' or '!='",
},
- "brand_filter": {
- "type": "object",
- "description": "Filter search results based on brand of the product",
- "properties": {
- "comparison_operator": {
- "type": "string",
- "description": "Operator to compare the column value, either '=' or '!='",
- },
- "value": {
- "type": "string",
- "description": "Value to compare against, e.g. AirStrider",
- },
- },
+ "value": {
+ "type": "string",
+ "description": "Value to compare against, e.g. AirStrider",
},
},
- "required": ["search_query"],
},
},
- }
- ]
+ "required": ["search_query"],
+ },
+ }
-def extract_search_arguments(original_user_query: str, chat_completion: ChatCompletion):
- response_message = chat_completion.choices[0].message
+def extract_search_arguments(original_user_query: str, response: Response):
search_query = None
filters = []
- if response_message.tool_calls:
- for tool in response_message.tool_calls:
- if tool.type != "function":
- continue
- function = tool.function
- if function.name == "search_database":
- arg = json.loads(function.arguments)
- # Even though its required, search_query is not always specified
+ tool_calls = [item for item in response.output if isinstance(item, ResponseFunctionToolCall)]
+ if tool_calls:
+ for tool_call in tool_calls:
+ if tool_call.name == "search_database":
+ arg = json.loads(tool_call.arguments)
search_query = arg.get("search_query", original_user_query)
if "price_filter" in arg and arg["price_filter"] and isinstance(arg["price_filter"], dict):
price_filter = arg["price_filter"]
@@ -87,6 +76,6 @@ def extract_search_arguments(original_user_query: str, chat_completion: ChatComp
"value": brand_filter["value"],
}
)
- elif query_text := response_message.content:
- search_query = query_text.strip()
+ elif response.output_text:
+ search_query = response.output_text.strip()
return search_query, filters
diff --git a/src/backend/fastapi_app/rag_advanced.py b/src/backend/fastapi_app/rag_advanced.py
index eb53aa6a..501df89f 100644
--- a/src/backend/fastapi_app/rag_advanced.py
+++ b/src/backend/fastapi_app/rag_advanced.py
@@ -1,27 +1,25 @@
import json
from collections.abc import AsyncGenerator
-from typing import Optional, Union
+from typing import Optional
from agents import (
Agent,
ItemHelpers,
ModelSettings,
- OpenAIChatCompletionsModel,
+ OpenAIResponsesModel,
Runner,
ToolCallOutputItem,
function_tool,
set_tracing_disabled,
)
-from openai import AsyncAzureOpenAI, AsyncOpenAI
+from openai import AsyncOpenAI
from openai.types.responses import EasyInputMessageParam, ResponseInputItemParam, ResponseTextDeltaEvent
from fastapi_app.api_models import (
- AIChatRoles,
BrandFilter,
ChatRequestOverrides,
Filter,
ItemPublic,
- Message,
PriceFilter,
RAGContext,
RetrievalResponse,
@@ -45,7 +43,7 @@ def __init__(
messages: list[ResponseInputItemParam],
overrides: ChatRequestOverrides,
searcher: PostgresSearcher,
- openai_chat_client: Union[AsyncOpenAI, AsyncAzureOpenAI],
+ openai_chat_client: AsyncOpenAI,
chat_model: str,
chat_deployment: Optional[str], # Not needed for non-Azure OpenAI
):
@@ -54,7 +52,7 @@ def __init__(
self.model_for_thoughts = (
{"model": chat_model, "deployment": chat_deployment} if chat_deployment else {"model": chat_model}
)
- openai_agents_model = OpenAIChatCompletionsModel(
+ openai_agents_model = OpenAIResponsesModel(
model=chat_model if chat_deployment is None else chat_deployment, openai_client=openai_chat_client
)
self.search_agent = Agent(
@@ -71,7 +69,6 @@ def __init__(
model_settings=ModelSettings(
temperature=self.chat_params.temperature,
max_tokens=self.chat_params.response_token_limit,
- extra_body={"seed": self.chat_params.seed} if self.chat_params.seed is not None else {},
),
)
@@ -125,7 +122,7 @@ async def prepare_context(self) -> tuple[list[ItemPublic], list[ThoughtStep]]:
thoughts = [
ThoughtStep(
title="Prompt to generate search arguments",
- description=[{"content": self.query_prompt_template}]
+ description=[{"role": "system", "content": self.query_prompt_template}]
+ ItemHelpers.input_to_new_input_list(run_results.input),
props=self.model_for_thoughts,
),
@@ -158,14 +155,14 @@ async def answer(
)
return RetrievalResponse(
- message=Message(content=str(run_results.final_output), role=AIChatRoles.ASSISTANT),
+ output_text=str(run_results.final_output),
context=RAGContext(
data_points={item.id: item for item in items},
thoughts=earlier_thoughts
+ [
ThoughtStep(
title="Prompt to generate answer",
- description=[{"content": self.answer_prompt_template}]
+ description=[{"role": "system", "content": self.answer_prompt_template}]
+ ItemHelpers.input_to_new_input_list(run_results.input),
props=self.model_for_thoughts,
),
@@ -185,13 +182,14 @@ async def answer_stream(
)
yield RetrievalResponseDelta(
+ type="response.context",
context=RAGContext(
data_points={item.id: item for item in items},
thoughts=earlier_thoughts
+ [
ThoughtStep(
title="Prompt to generate answer",
- description=[{"content": self.answer_prompt_template}]
+ description=[{"role": "system", "content": self.answer_prompt_template}]
+ ItemHelpers.input_to_new_input_list(run_results.input),
props=self.model_for_thoughts,
),
@@ -201,5 +199,5 @@ async def answer_stream(
async for event in run_results.stream_events():
if event.type == "raw_response_event" and isinstance(event.data, ResponseTextDeltaEvent):
- yield RetrievalResponseDelta(delta=Message(content=str(event.data.delta), role=AIChatRoles.ASSISTANT))
+ yield RetrievalResponseDelta(type="response.output_text.delta", delta=str(event.data.delta))
return
diff --git a/src/backend/fastapi_app/rag_base.py b/src/backend/fastapi_app/rag_base.py
index 54e633c2..557c1049 100644
--- a/src/backend/fastapi_app/rag_base.py
+++ b/src/backend/fastapi_app/rag_base.py
@@ -32,7 +32,6 @@ def get_chat_params(self, messages: list[ResponseInputItemParam], overrides: Cha
return ChatParams(
top=overrides.top,
temperature=overrides.temperature,
- seed=overrides.seed,
retrieval_mode=overrides.retrieval_mode,
use_advanced_flow=overrides.use_advanced_flow,
response_token_limit=response_token_limit,
diff --git a/src/backend/fastapi_app/rag_simple.py b/src/backend/fastapi_app/rag_simple.py
index 69126618..1b8fd86c 100644
--- a/src/backend/fastapi_app/rag_simple.py
+++ b/src/backend/fastapi_app/rag_simple.py
@@ -1,15 +1,13 @@
from collections.abc import AsyncGenerator
-from typing import Optional, Union
+from typing import Optional
-from agents import Agent, ItemHelpers, ModelSettings, OpenAIChatCompletionsModel, Runner, set_tracing_disabled
-from openai import AsyncAzureOpenAI, AsyncOpenAI
+from agents import Agent, ItemHelpers, ModelSettings, OpenAIResponsesModel, Runner, set_tracing_disabled
+from openai import AsyncOpenAI
from openai.types.responses import ResponseInputItemParam, ResponseTextDeltaEvent
from fastapi_app.api_models import (
- AIChatRoles,
ChatRequestOverrides,
ItemPublic,
- Message,
RAGContext,
RetrievalResponse,
RetrievalResponseDelta,
@@ -28,7 +26,7 @@ def __init__(
messages: list[ResponseInputItemParam],
overrides: ChatRequestOverrides,
searcher: PostgresSearcher,
- openai_chat_client: Union[AsyncOpenAI, AsyncAzureOpenAI],
+ openai_chat_client: AsyncOpenAI,
chat_model: str,
chat_deployment: Optional[str], # Not needed for non-Azure OpenAI
):
@@ -37,7 +35,7 @@ def __init__(
self.model_for_thoughts = (
{"model": chat_model, "deployment": chat_deployment} if chat_deployment else {"model": chat_model}
)
- openai_agents_model = OpenAIChatCompletionsModel(
+ openai_agents_model = OpenAIResponsesModel(
model=chat_model if chat_deployment is None else chat_deployment, openai_client=openai_chat_client
)
self.answer_agent = Agent(
@@ -47,7 +45,6 @@ def __init__(
model_settings=ModelSettings(
temperature=self.chat_params.temperature,
max_tokens=self.chat_params.response_token_limit,
- extra_body={"seed": self.chat_params.seed} if self.chat_params.seed is not None else {},
),
)
@@ -91,14 +88,14 @@ async def answer(
)
return RetrievalResponse(
- message=Message(content=str(run_results.final_output), role=AIChatRoles.ASSISTANT),
+ output_text=str(run_results.final_output),
context=RAGContext(
data_points={item.id: item for item in items},
thoughts=earlier_thoughts
+ [
ThoughtStep(
title="Prompt to generate answer",
- description=[{"content": self.answer_prompt_template}]
+ description=[{"role": "system", "content": self.answer_prompt_template}]
+ ItemHelpers.input_to_new_input_list(run_results.input),
props=self.model_for_thoughts,
),
@@ -118,13 +115,14 @@ async def answer_stream(
)
yield RetrievalResponseDelta(
+ type="response.context",
context=RAGContext(
data_points={item.id: item for item in items},
thoughts=earlier_thoughts
+ [
ThoughtStep(
title="Prompt to generate answer",
- description=[{"content": self.answer_agent.instructions}]
+ description=[{"role": "system", "content": self.answer_agent.instructions}]
+ ItemHelpers.input_to_new_input_list(run_results.input),
props=self.model_for_thoughts,
),
@@ -134,5 +132,5 @@ async def answer_stream(
async for event in run_results.stream_events():
if event.type == "raw_response_event" and isinstance(event.data, ResponseTextDeltaEvent):
- yield RetrievalResponseDelta(delta=Message(content=str(event.data.delta), role=AIChatRoles.ASSISTANT))
+ yield RetrievalResponseDelta(type="response.output_text.delta", delta=str(event.data.delta))
return
diff --git a/src/backend/fastapi_app/routes/api_routes.py b/src/backend/fastapi_app/routes/api_routes.py
index f566886c..5821eaf7 100644
--- a/src/backend/fastapi_app/routes/api_routes.py
+++ b/src/backend/fastapi_app/routes/api_routes.py
@@ -121,7 +121,7 @@ async def chat_handler(
rag_flow: Union[SimpleRAGChat, AdvancedRAGChat]
if chat_request.context.overrides.use_advanced_flow:
rag_flow = AdvancedRAGChat(
- messages=chat_request.messages,
+ messages=chat_request.input,
overrides=chat_request.context.overrides,
searcher=searcher,
openai_chat_client=openai_chat.client,
@@ -130,7 +130,7 @@ async def chat_handler(
)
else:
rag_flow = SimpleRAGChat(
- messages=chat_request.messages,
+ messages=chat_request.input,
overrides=chat_request.context.overrides,
searcher=searcher,
openai_chat_client=openai_chat.client,
@@ -169,7 +169,7 @@ async def chat_stream_handler(
rag_flow: Union[SimpleRAGChat, AdvancedRAGChat]
if chat_request.context.overrides.use_advanced_flow:
rag_flow = AdvancedRAGChat(
- messages=chat_request.messages,
+ messages=chat_request.input,
overrides=chat_request.context.overrides,
searcher=searcher,
openai_chat_client=openai_chat.client,
@@ -178,7 +178,7 @@ async def chat_stream_handler(
)
else:
rag_flow = SimpleRAGChat(
- messages=chat_request.messages,
+ messages=chat_request.input,
overrides=chat_request.context.overrides,
searcher=searcher,
openai_chat_client=openai_chat.client,
diff --git a/src/backend/fastapi_app/update_embeddings.py b/src/backend/fastapi_app/update_embeddings.py
index b36113b2..83744ead 100644
--- a/src/backend/fastapi_app/update_embeddings.py
+++ b/src/backend/fastapi_app/update_embeddings.py
@@ -29,8 +29,6 @@ async def update_embeddings(in_seed_data=False):
embedding_column = os.getenv("AZURE_OPENAI_EMBEDDING_COLUMN", "embedding_3l")
elif OPENAI_EMBED_HOST == "ollama":
embedding_column = os.getenv("OLLAMA_EMBEDDING_COLUMN", "embedding_nomic")
- elif OPENAI_EMBED_HOST == "github":
- embedding_column = os.getenv("GITHUB_EMBEDDING_COLUMN", "embedding_3l")
else:
embedding_column = os.getenv("OPENAICOM_EMBEDDING_COLUMN", "embedding_3l")
logger.info(f"Updating embeddings in column: {embedding_column}")
diff --git a/src/backend/pyproject.toml b/src/backend/pyproject.toml
index 7ede97c9..9874b924 100644
--- a/src/backend/pyproject.toml
+++ b/src/backend/pyproject.toml
@@ -12,12 +12,12 @@ dependencies = [
"asyncpg>=0.29.0,<1.0.0",
"SQLAlchemy[asyncio]>=2.0.30,<3.0.0",
"pgvector>=0.3.0,<0.4.0",
- "openai>=1.34.0,<2.0.0",
+ "openai>=1.108.1,<3.0.0",
"azure-monitor-opentelemetry>=1.6.0,<2.0.0",
"opentelemetry-instrumentation-sqlalchemy",
"opentelemetry-instrumentation-aiohttp-client",
"opentelemetry-instrumentation-openai",
- "openai-agents"
+ "openai-agents>=0.13.6"
]
[build-system]
diff --git a/src/backend/requirements.txt b/src/backend/requirements.txt
index b83031b6..167de074 100644
--- a/src/backend/requirements.txt
+++ b/src/backend/requirements.txt
@@ -1,5 +1,5 @@
# This file was autogenerated by uv via the following command:
-# uv pip compile pyproject.toml -o requirements_new.txt --python-version 3.10
+# uv pip compile pyproject.toml -o requirements.txt --python-version 3.10
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.12.14
@@ -59,8 +59,6 @@ charset-normalizer==3.4.2
# via requests
click==8.2.1
# via uvicorn
-colorama==0.4.6
- # via griffe
cryptography==45.0.5
# via
# azure-identity
@@ -86,7 +84,7 @@ frozenlist==1.7.0
# aiosignal
greenlet==3.2.3
# via sqlalchemy
-griffe==1.7.3
+griffelib==2.0.2
# via openai-agents
h11==0.16.0
# via
@@ -118,7 +116,7 @@ jsonschema-specifications==2025.4.1
# via jsonschema
marshmallow==4.0.0
# via environs
-mcp==1.11.0
+mcp==1.27.0
# via openai-agents
msal==1.32.3
# via
@@ -136,11 +134,11 @@ numpy==2.2.6
# via pgvector
oauthlib==3.3.1
# via requests-oauthlib
-openai==1.96.1
+openai==2.31.0
# via
# fastapi-app (pyproject.toml)
# openai-agents
-openai-agents==0.2.0
+openai-agents==0.13.6
# via fastapi-app (pyproject.toml)
opentelemetry-api==1.31.1
# via
@@ -256,19 +254,21 @@ psutil==7.0.0
# via azure-monitor-opentelemetry-exporter
pycparser==2.22
# via cffi
-pydantic==2.11.7
+pydantic==2.12.5
# via
# fastapi
# mcp
# openai
# openai-agents
# pydantic-settings
-pydantic-core==2.33.2
+pydantic-core==2.41.5
# via pydantic
pydantic-settings==2.10.1
# via mcp
pyjwt==2.10.1
- # via msal
+ # via
+ # mcp
+ # msal
python-dotenv==1.1.1
# via
# fastapi-app (pyproject.toml)
@@ -327,6 +327,7 @@ typing-extensions==4.14.1
# exceptiongroup
# fastapi
# marshmallow
+ # mcp
# multidict
# openai
# openai-agents
@@ -338,8 +339,9 @@ typing-extensions==4.14.1
# starlette
# typing-inspection
# uvicorn
-typing-inspection==0.4.1
+typing-inspection==0.4.2
# via
+ # mcp
# pydantic
# pydantic-settings
urllib3==2.5.0
diff --git a/src/frontend/package-lock.json b/src/frontend/package-lock.json
index 252ddabd..b157ccc9 100644
--- a/src/frontend/package-lock.json
+++ b/src/frontend/package-lock.json
@@ -13,10 +13,10 @@
"@fluentui/react": "^8.112.5",
"@fluentui/react-components": "^9.37.3",
"@fluentui/react-icons": "^2.0.221",
- "@microsoft/ai-chat-protocol": "1.0.0-beta.20240610.1",
"@react-spring/web": "^9.7.3",
"dompurify": "^3.2.4",
"marked": "^9.1.6",
+ "ndjson-readablestream": "^1.4.0",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-router-dom": "^6.18.0",
@@ -2823,14 +2823,6 @@
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
- "node_modules/@microsoft/ai-chat-protocol": {
- "version": "1.0.0-beta.20240610.1",
- "resolved": "https://registry.npmjs.org/@microsoft/ai-chat-protocol/-/ai-chat-protocol-1.0.0-beta.20240610.1.tgz",
- "integrity": "sha512-VGRt4DTCnoCKLqXs1H+3F9yeD8kTATktWxL4j2OUeOoqEiqWUiNm66qQMBzQJRv9Oi+vV9weQyZ6O6mHrf91HQ==",
- "dependencies": {
- "@typespec/ts-http-runtime": "^1.0.0-alpha.20240228.1"
- }
- },
"node_modules/@microsoft/load-themed-styles": {
"version": "1.10.295",
"license": "MIT"
@@ -3323,19 +3315,6 @@
"version": "2.0.7",
"license": "MIT"
},
- "node_modules/@typespec/ts-http-runtime": {
- "version": "1.0.0-alpha.20240610.1",
- "resolved": "https://registry.npmjs.org/@typespec/ts-http-runtime/-/ts-http-runtime-1.0.0-alpha.20240610.1.tgz",
- "integrity": "sha512-f1pHRnMpCZG1u7EucgZ00E9MpqI/HpZZ7FOu8oub/QH/9ki+5BtRbQfM17EDTi5w5JDWlp9Os+7fQVWLidozKQ==",
- "dependencies": {
- "http-proxy-agent": "^7.0.0",
- "https-proxy-agent": "^7.0.0",
- "tslib": "^2.6.2"
- },
- "engines": {
- "node": ">=18.0.0"
- }
- },
"node_modules/@vitejs/plugin-react": {
"version": "4.3.4",
"resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.3.4.tgz",
@@ -3356,17 +3335,6 @@
"vite": "^4.2.0 || ^5.0.0 || ^6.0.0"
}
},
- "node_modules/agent-base": {
- "version": "7.1.1",
- "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz",
- "integrity": "sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==",
- "dependencies": {
- "debug": "^4.3.4"
- },
- "engines": {
- "node": ">= 14"
- }
- },
"node_modules/browserslist": {
"version": "4.24.4",
"resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.24.4.tgz",
@@ -3466,6 +3434,7 @@
},
"node_modules/debug": {
"version": "4.3.4",
+ "dev": true,
"license": "MIT",
"dependencies": {
"ms": "2.1.2"
@@ -3658,30 +3627,6 @@
"node": "*"
}
},
- "node_modules/http-proxy-agent": {
- "version": "7.0.2",
- "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
- "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==",
- "dependencies": {
- "agent-base": "^7.1.0",
- "debug": "^4.3.4"
- },
- "engines": {
- "node": ">= 14"
- }
- },
- "node_modules/https-proxy-agent": {
- "version": "7.0.4",
- "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.4.tgz",
- "integrity": "sha512-wlwpilI7YdjSkWaQ/7omYBMTliDcmCN8OLihO6I9B86g06lMyAoqgoDpV0XqoaPOKj+0DIdAvnsWfyAAhmimcg==",
- "dependencies": {
- "agent-base": "^7.0.2",
- "debug": "4"
- },
- "engines": {
- "node": ">= 14"
- }
- },
"node_modules/is-alphabetical": {
"version": "1.0.4",
"license": "MIT",
@@ -3807,6 +3752,7 @@
},
"node_modules/ms": {
"version": "2.1.2",
+ "dev": true,
"license": "MIT"
},
"node_modules/nanoid": {
@@ -3828,6 +3774,12 @@
"node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
}
},
+ "node_modules/ndjson-readablestream": {
+ "version": "1.4.0",
+ "resolved": "https://registry.npmjs.org/ndjson-readablestream/-/ndjson-readablestream-1.4.0.tgz",
+ "integrity": "sha512-aufwPqwZzsS+NTekCJbHz8kJ6VlyeI8LD7owYSWm3cs4rx3CkY+J+DoD1af/ySLbc4ESJkESgJeMj1V4t3ZYPg==",
+ "license": "MIT"
+ },
"node_modules/node-releases": {
"version": "2.0.19",
"resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz",
@@ -6189,14 +6141,6 @@
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
- "@microsoft/ai-chat-protocol": {
- "version": "1.0.0-beta.20240610.1",
- "resolved": "https://registry.npmjs.org/@microsoft/ai-chat-protocol/-/ai-chat-protocol-1.0.0-beta.20240610.1.tgz",
- "integrity": "sha512-VGRt4DTCnoCKLqXs1H+3F9yeD8kTATktWxL4j2OUeOoqEiqWUiNm66qQMBzQJRv9Oi+vV9weQyZ6O6mHrf91HQ==",
- "requires": {
- "@typespec/ts-http-runtime": "^1.0.0-alpha.20240228.1"
- }
- },
"@microsoft/load-themed-styles": {
"version": "1.10.295"
},
@@ -6485,16 +6429,6 @@
"@types/unist": {
"version": "2.0.7"
},
- "@typespec/ts-http-runtime": {
- "version": "1.0.0-alpha.20240610.1",
- "resolved": "https://registry.npmjs.org/@typespec/ts-http-runtime/-/ts-http-runtime-1.0.0-alpha.20240610.1.tgz",
- "integrity": "sha512-f1pHRnMpCZG1u7EucgZ00E9MpqI/HpZZ7FOu8oub/QH/9ki+5BtRbQfM17EDTi5w5JDWlp9Os+7fQVWLidozKQ==",
- "requires": {
- "http-proxy-agent": "^7.0.0",
- "https-proxy-agent": "^7.0.0",
- "tslib": "^2.6.2"
- }
- },
"@vitejs/plugin-react": {
"version": "4.3.4",
"resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.3.4.tgz",
@@ -6508,14 +6442,6 @@
"react-refresh": "^0.14.2"
}
},
- "agent-base": {
- "version": "7.1.1",
- "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz",
- "integrity": "sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==",
- "requires": {
- "debug": "^4.3.4"
- }
- },
"browserslist": {
"version": "4.24.4",
"resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.24.4.tgz",
@@ -6557,6 +6483,7 @@
},
"debug": {
"version": "4.3.4",
+ "dev": true,
"requires": {
"ms": "2.1.2"
}
@@ -6680,24 +6607,6 @@
"highlight.js": {
"version": "10.7.3"
},
- "http-proxy-agent": {
- "version": "7.0.2",
- "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
- "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==",
- "requires": {
- "agent-base": "^7.1.0",
- "debug": "^4.3.4"
- }
- },
- "https-proxy-agent": {
- "version": "7.0.4",
- "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.4.tgz",
- "integrity": "sha512-wlwpilI7YdjSkWaQ/7omYBMTliDcmCN8OLihO6I9B86g06lMyAoqgoDpV0XqoaPOKj+0DIdAvnsWfyAAhmimcg==",
- "requires": {
- "agent-base": "^7.0.2",
- "debug": "4"
- }
- },
"is-alphabetical": {
"version": "1.0.4"
},
@@ -6766,7 +6675,8 @@
"integrity": "sha512-jcByLnIFkd5gSXZmjNvS1TlmRhCXZjIzHYlaGkPlLIekG55JDR2Z4va9tZwCiP+/RDERiNhMOFu01xd6O5ct1Q=="
},
"ms": {
- "version": "2.1.2"
+ "version": "2.1.2",
+ "dev": true
},
"nanoid": {
"version": "3.3.10",
@@ -6774,6 +6684,11 @@
"integrity": "sha512-vSJJTG+t/dIKAUhUDw/dLdZ9s//5OxcHqLaDWWrW4Cdq7o6tdLIczUkMXt2MBNmk6sJRZBZRXVixs7URY1CmIg==",
"dev": true
},
+ "ndjson-readablestream": {
+ "version": "1.4.0",
+ "resolved": "https://registry.npmjs.org/ndjson-readablestream/-/ndjson-readablestream-1.4.0.tgz",
+ "integrity": "sha512-aufwPqwZzsS+NTekCJbHz8kJ6VlyeI8LD7owYSWm3cs4rx3CkY+J+DoD1af/ySLbc4ESJkESgJeMj1V4t3ZYPg=="
+ },
"node-releases": {
"version": "2.0.19",
"resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz",
diff --git a/src/frontend/package.json b/src/frontend/package.json
index 5d8aecfd..bcab349c 100644
--- a/src/frontend/package.json
+++ b/src/frontend/package.json
@@ -12,18 +12,18 @@
"preview": "vite preview"
},
"dependencies": {
- "@azure/msal-react": "^2.0.6",
"@azure/msal-browser": "^3.1.0",
+ "@azure/msal-react": "^2.0.6",
"@fluentui/react": "^8.112.5",
"@fluentui/react-components": "^9.37.3",
"@fluentui/react-icons": "^2.0.221",
"@react-spring/web": "^9.7.3",
- "marked": "^9.1.6",
"dompurify": "^3.2.4",
+ "marked": "^9.1.6",
+ "ndjson-readablestream": "^1.4.0",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-router-dom": "^6.18.0",
- "@microsoft/ai-chat-protocol": "1.0.0-beta.20240610.1",
"react-syntax-highlighter": "^15.5.0",
"scheduler": "^0.20.2"
},
@@ -31,10 +31,10 @@
"@types/dompurify": "^3.0.4",
"@types/react": "^18.2.34",
"@types/react-dom": "^18.2.14",
+ "@types/react-syntax-highlighter": "^15.5.7",
"@vitejs/plugin-react": "^4.3.4",
"prettier": "^3.0.3",
"typescript": "^5.2.2",
- "@types/react-syntax-highlighter": "^15.5.7",
"vite": "^6.3.4"
}
}
diff --git a/src/frontend/src/api/models.ts b/src/frontend/src/api/models.ts
index 4e9c3e26..e7e6efd4 100644
--- a/src/frontend/src/api/models.ts
+++ b/src/frontend/src/api/models.ts
@@ -1,5 +1,3 @@
-import { AIChatCompletion, AIChatCompletionDelta, AIChatCompletionOperationOptions } from "@microsoft/ai-chat-protocol";
-
export const enum RetrievalMode {
Hybrid = "hybrid",
Vectors = "vectors",
@@ -18,9 +16,14 @@ export type ChatAppRequestContext = {
overrides: ChatAppRequestOverrides;
};
-export interface ChatAppRequestOptions extends AIChatCompletionOperationOptions {
- context: ChatAppRequestContext
-}
+export type ChatAppRequestOptions = {
+ context: ChatAppRequestContext;
+};
+
+export type ChatAppRequest = {
+ input: { content: string; role: string }[];
+ context: ChatAppRequestContext;
+};
export type Thoughts = {
title: string;
@@ -30,14 +33,17 @@ export type Thoughts = {
export type RAGContext = {
data_points: { [key: string]: any };
- followup_questions: string[] | null;
thoughts: Thoughts[];
};
-export interface RAGChatCompletion extends AIChatCompletion {
+export type RAGChatCompletion = {
+ output_text: string;
context: RAGContext;
-}
+};
-export interface RAGChatCompletionDelta extends AIChatCompletionDelta {
- context: RAGContext;
-}
+export type RAGChatCompletionDelta = {
+ type: string;
+ delta?: string;
+ context?: RAGContext;
+ error?: string;
+};
diff --git a/src/frontend/src/components/Answer/Answer.tsx b/src/frontend/src/components/Answer/Answer.tsx
index 01b8bd3f..163aa68e 100644
--- a/src/frontend/src/components/Answer/Answer.tsx
+++ b/src/frontend/src/components/Answer/Answer.tsx
@@ -15,8 +15,6 @@ interface Props {
onCitationClicked: (filePath: string) => void;
onThoughtProcessClicked: () => void;
onSupportingContentClicked: () => void;
- onFollowupQuestionClicked?: (question: string) => void;
- showFollowupQuestions?: boolean;
}
export const Answer = ({
@@ -25,13 +23,10 @@ export const Answer = ({
isStreaming,
onCitationClicked,
onThoughtProcessClicked,
- onSupportingContentClicked,
- onFollowupQuestionClicked,
- showFollowupQuestions
+ onSupportingContentClicked
}: Props) => {
const [isReferencesCollapsed, setIsReferencesCollapsed] = useState(true);
- const followupQuestions = answer.context.followup_questions;
- const messageContent = answer.message.content;
+ const messageContent = answer.output_text;
const parsedAnswer = useMemo(() => parseAnswerToHtml(messageContent, isStreaming, onCitationClicked), [answer]);
const sanitizedAnswerHtml = DOMPurify.sanitize(parsedAnswer.answerHtml);
@@ -89,21 +84,6 @@ export const Answer = ({
)}
)}
-
- {!!followupQuestions?.length && showFollowupQuestions && onFollowupQuestionClicked && (
-
-
- Follow-up questions:
- {followupQuestions.map((x, i) => {
- return (
- onFollowupQuestionClicked(x)}>
- {`${x}`}
-
- );
- })}
-
-
- )}
);
};
diff --git a/src/frontend/src/pages/chat/Chat.tsx b/src/frontend/src/pages/chat/Chat.tsx
index f583f012..9750967e 100644
--- a/src/frontend/src/pages/chat/Chat.tsx
+++ b/src/frontend/src/pages/chat/Chat.tsx
@@ -4,8 +4,8 @@ import { SparkleFilled } from "@fluentui/react-icons";
import styles from "./Chat.module.css";
-import { RetrievalMode, RAGChatCompletion, RAGChatCompletionDelta, ChatAppRequestOptions } from "../../api";
-import { AIChatProtocolClient, AIChatMessage } from "@microsoft/ai-chat-protocol";
+import { RetrievalMode, RAGChatCompletion, RAGChatCompletionDelta, ChatAppRequest } from "../../api";
+import readNDJSONStream from "ndjson-readablestream";
import { Answer, AnswerError, AnswerLoading } from "../../components/Answer";
import { QuestionInput } from "../../components/QuestionInput";
import { ExampleList } from "../../components/Example";
@@ -38,24 +38,22 @@ const Chat = () => {
const [answers, setAnswers] = useState<[user: string, response: RAGChatCompletion][]>([]);
const [streamedAnswers, setStreamedAnswers] = useState<[user: string, response: RAGChatCompletion][]>([]);
- const handleAsyncRequest = async (question: string, answers: [string, RAGChatCompletion][], result: AsyncIterable) => {
+ const handleAsyncRequest = async (question: string, answers: [string, RAGChatCompletion][], responseBody: ReadableStream) => {
let answer = "";
let chatCompletion: RAGChatCompletion = {
context: {
data_points: {},
- followup_questions: null,
thoughts: []
},
- message: { content: "", role: "assistant" }
+ output_text: ""
};
const updateState = (newContent: string) => {
return new Promise(resolve => {
setTimeout(() => {
answer += newContent;
- // We need to create a new object to trigger a re-render
const latestCompletion: RAGChatCompletion = {
...chatCompletion,
- message: { content: answer, role: chatCompletion.message.role }
+ output_text: answer
};
setStreamedAnswers([...answers, [question, latestCompletion]]);
resolve(null);
@@ -64,25 +62,21 @@ const Chat = () => {
};
try {
setIsStreaming(true);
- for await (const response of result) {
- if (response.context) {
- chatCompletion.context = {
- ...chatCompletion.context,
- ...response.context
- };
- }
- if (response.delta && response.delta.role) {
- chatCompletion.message.role = response.delta.role;
+ for await (const event of readNDJSONStream(responseBody)) {
+ if (event.error) {
+ throw new Error(event.error);
}
- if (response.delta && response.delta.content) {
+ if (event.type === "response.context" && event.context) {
+ chatCompletion.context = { ...chatCompletion.context, ...event.context };
+ } else if (event.type === "response.output_text.delta" && event.delta !== undefined) {
setIsLoading(false);
- await updateState(response.delta.content);
+ await updateState(event.delta);
}
}
} finally {
setIsStreaming(false);
}
- chatCompletion.message.content = answer;
+ chatCompletion.output_text = answer;
return chatCompletion;
};
const makeApiRequest = async (question: string) => {
@@ -94,12 +88,13 @@ const Chat = () => {
setActiveAnalysisPanelTab(undefined);
try {
- const messages: AIChatMessage[] = answers.flatMap(answer => [
+ const messages = answers.flatMap(answer => [
{ content: answer[0], role: "user" },
- { content: answer[1].message.content, role: "assistant" }
+ { content: answer[1].output_text, role: "assistant" }
]);
- const allMessages: AIChatMessage[] = [...messages, { content: question, role: "user" }];
- const options: ChatAppRequestOptions = {
+ const allMessages = [...messages, { content: question, role: "user" }];
+ const request: ChatAppRequest = {
+ input: allMessages,
context: {
overrides: {
use_advanced_flow: useAdvancedFlow,
@@ -108,16 +103,29 @@ const Chat = () => {
prompt_template: promptTemplate.length === 0 ? undefined : promptTemplate,
temperature: temperature
}
- },
- sessionState: answers.length ? answers[answers.length - 1][1].sessionState : null
+ }
};
- const chatClient: AIChatProtocolClient = new AIChatProtocolClient("/chat");
if (shouldStream) {
- const result = (await chatClient.getStreamedCompletion(allMessages, options)) as AsyncIterable;
- const parsedResponse = await handleAsyncRequest(question, answers, result);
+ const response = await fetch("/chat/stream", {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify(request)
+ });
+ if (!response.ok || !response.body) {
+ throw new Error(`Request failed with status ${response.status}`);
+ }
+ const parsedResponse = await handleAsyncRequest(question, answers, response.body);
setAnswers([...answers, [question, parsedResponse]]);
} else {
- const result = (await chatClient.getCompletion(allMessages, options)) as RAGChatCompletion;
+ const response = await fetch("/chat", {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify(request)
+ });
+ if (!response.ok) {
+ throw new Error(`Request failed with status ${response.status}`);
+ }
+ const result: RAGChatCompletion = await response.json();
setAnswers([...answers, [question, result]]);
}
} catch (e) {
@@ -220,7 +228,6 @@ const Chat = () => {
onCitationClicked={c => onShowCitation(c, index)}
onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)}
onSupportingContentClicked={() => onToggleTab(AnalysisPanelTabs.SupportingContentTab, index)}
- onFollowupQuestionClicked={q => makeApiRequest(q)}
/>
@@ -238,7 +245,6 @@ const Chat = () => {
onCitationClicked={c => onShowCitation(c, index)}
onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)}
onSupportingContentClicked={() => onToggleTab(AnalysisPanelTabs.SupportingContentTab, index)}
- onFollowupQuestionClicked={q => makeApiRequest(q)}
/>
diff --git a/tests/conftest.py b/tests/conftest.py
index 5fe67053..44006855 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,17 +5,20 @@
import openai
import openai.resources
+import openai.resources.responses
import pytest
import pytest_asyncio
from fastapi.testclient import TestClient
from openai.types import CreateEmbeddingResponse, Embedding
-from openai.types.chat import ChatCompletion, ChatCompletionChunk
-from openai.types.chat.chat_completion import (
- ChatCompletionMessage,
- Choice,
-)
-from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall, Function
from openai.types.create_embedding_response import Usage
+from openai.types.responses import (
+ Response,
+ ResponseCompletedEvent,
+ ResponseFunctionToolCall,
+ ResponseOutputMessage,
+ ResponseOutputText,
+ ResponseTextDeltaEvent,
+)
from sqlalchemy.ext.asyncio import async_sessionmaker
from fastapi_app import create_app
@@ -63,9 +66,8 @@ def mock_session_env(monkeypatch_session):
monkeypatch_session.setenv("OPENAI_CHAT_HOST", "azure")
monkeypatch_session.setenv("OPENAI_EMBED_HOST", "azure")
monkeypatch_session.setenv("AZURE_OPENAI_ENDPOINT", "https://api.openai.com")
- monkeypatch_session.setenv("AZURE_OPENAI_VERSION", "2024-03-01-preview")
- monkeypatch_session.setenv("AZURE_OPENAI_CHAT_DEPLOYMENT", "gpt-4o-mini")
- monkeypatch_session.setenv("AZURE_OPENAI_CHAT_MODEL", "gpt-4o-mini")
+ monkeypatch_session.setenv("AZURE_OPENAI_CHAT_DEPLOYMENT", "gpt-5.4")
+ monkeypatch_session.setenv("AZURE_OPENAI_CHAT_MODEL", "gpt-5.4")
monkeypatch_session.setenv("AZURE_OPENAI_EMBED_DEPLOYMENT", "text-embedding-3-large")
monkeypatch_session.setenv("AZURE_OPENAI_EMBED_MODEL", "text-embedding-3-large")
monkeypatch_session.setenv("AZURE_OPENAI_EMBED_DIMENSIONS", "1024")
@@ -171,68 +173,72 @@ async def mock_acreate(*args, **kwargs):
@pytest.fixture(scope="session")
def mock_openai_chatcompletion(monkeypatch_session):
- class AsyncChatCompletionIterator:
+ class AsyncResponseEventIterator:
def __init__(self, answer: str):
- chunk_id = "test-id"
- model = "gpt-4o-mini"
- self.responses = [
- {"object": "chat.completion.chunk", "choices": [], "id": chunk_id, "model": model, "created": 1},
- {
- "object": "chat.completion.chunk",
- "choices": [{"delta": {"role": "assistant"}, "index": 0, "finish_reason": None}],
- "id": chunk_id,
- "model": model,
- "created": 1,
- },
- ]
+ self.events: list = []
# Split at << to simulate chunked responses
if answer.find("<<") > -1:
parts = answer.split("<<")
- self.responses.append(
- {
- "object": "chat.completion.chunk",
- "choices": [
- {
- "delta": {"role": "assistant", "content": parts[0] + "<<"},
- "index": 0,
- "finish_reason": None,
- }
- ],
- "id": chunk_id,
- "model": model,
- "created": 1,
- }
- )
- self.responses.append(
- {
- "object": "chat.completion.chunk",
- "choices": [
- {"delta": {"role": "assistant", "content": parts[1]}, "index": 0, "finish_reason": None}
- ],
- "id": chunk_id,
- "model": model,
- "created": 1,
- }
+ self.events.append(
+ ResponseTextDeltaEvent(
+ type="response.output_text.delta",
+ content_index=0,
+ delta=parts[0] + "<<",
+ item_id="msg-1",
+ output_index=0,
+ logprobs=[],
+ sequence_number=0,
+ )
)
- self.responses.append(
- {
- "object": "chat.completion.chunk",
- "choices": [{"delta": {"role": None, "content": None}, "index": 0, "finish_reason": "stop"}],
- "id": chunk_id,
- "model": model,
- "created": 1,
- }
+ self.events.append(
+ ResponseTextDeltaEvent(
+ type="response.output_text.delta",
+ content_index=0,
+ delta=parts[1],
+ item_id="msg-1",
+ output_index=0,
+ logprobs=[],
+ sequence_number=1,
+ )
)
else:
- self.responses.append(
- {
- "object": "chat.completion.chunk",
- "choices": [{"delta": {"content": answer}, "index": 0, "finish_reason": None}],
- "id": chunk_id,
- "model": model,
- "created": 1,
- }
+ self.events.append(
+ ResponseTextDeltaEvent(
+ type="response.output_text.delta",
+ content_index=0,
+ delta=answer,
+ item_id="msg-1",
+ output_index=0,
+ logprobs=[],
+ sequence_number=0,
+ )
+ )
+ # Agents SDK requires a ResponseCompletedEvent to finalize the stream
+ self.events.append(
+ ResponseCompletedEvent(
+ type="response.completed",
+ sequence_number=len(self.events),
+ response=Response(
+ id="resp-test-stream",
+ created_at=0,
+ model="gpt-5.4",
+ object="response",
+ output=[
+ ResponseOutputMessage(
+ id="msg-1",
+ type="message",
+ role="assistant",
+ status="completed",
+ content=[ResponseOutputText(type="output_text", text=answer, annotations=[])],
+ )
+ ],
+ tool_choice="auto",
+ tools=[],
+ status="completed",
+ parallel_tool_calls=True,
+ ),
)
+ )
async def __aenter__(self):
return self
@@ -240,97 +246,98 @@ async def __aenter__(self):
async def __aexit__(self, exc_type, exc_val, exc_tb):
return None
+ async def close(self):
+ pass
+
+ async def parse(self):
+ return self
+
def __aiter__(self):
return self
async def __anext__(self):
- if self.responses:
- return ChatCompletionChunk.model_validate(self.responses.pop(0))
- else:
- raise StopAsyncIteration
+ if self.events:
+ return self.events.pop(0)
+ raise StopAsyncIteration
+
+ def _make_text_response(answer: str) -> Response:
+ return Response(
+ id="resp-test-123",
+ created_at=0,
+ model="gpt-5.4",
+ object="response",
+ output=[
+ ResponseOutputMessage(
+ id="msg-1",
+ type="message",
+ role="assistant",
+ status="completed",
+ content=[ResponseOutputText(type="output_text", text=answer, annotations=[])],
+ )
+ ],
+ tool_choice="auto",
+ tools=[],
+ status="completed",
+ parallel_tool_calls=True,
+ )
+
+ def _make_tool_call_response(tool_name: str, arguments: str, call_id: str = "fc_abc123") -> Response:
+ return Response(
+ id="resp-test-123",
+ created_at=0,
+ model="gpt-5.4",
+ object="response",
+ output=[
+ ResponseFunctionToolCall(
+ id=call_id,
+ call_id=call_id,
+ type="function_call",
+ name=tool_name,
+ arguments=arguments,
+ status="completed",
+ )
+ ],
+ tool_choice="auto",
+ tools=[],
+ status="completed",
+ parallel_tool_calls=True,
+ )
async def mock_acreate(*args, **kwargs):
- messages = kwargs["messages"]
- last_question = messages[-1]["content"]
- last_role = messages[-1]["role"]
+ input_messages = kwargs.get("input", [])
+ last_message = input_messages[-1]
+ last_content = last_message.get("content", "") if isinstance(last_message, dict) else ""
+ last_role = last_message.get("role", "") if isinstance(last_message, dict) else ""
if last_role == "tool":
- items = json.loads(last_question)["items"]
+ items = json.loads(last_content)["items"]
arguments = {"query": "capital of France", "items": items, "filters": []}
- return ChatCompletion(
- object="chat.completion",
- choices=[
- Choice(
- message=ChatCompletionMessage(
- role="assistant",
- tool_calls=[
- ChatCompletionMessageToolCall(
- id="call_abc123final",
- type="function",
- function=Function(
- name="final_result",
- arguments=json.dumps(arguments),
- ),
- )
- ],
- ),
- finish_reason="stop",
- index=0,
- )
- ],
- id="test-123final",
- created=0,
- model="test-model",
- )
- if last_question == "Find search results for user query: What is the capital of France?":
- return ChatCompletion(
- object="chat.completion",
- choices=[
- Choice(
- message=ChatCompletionMessage(
- role="assistant",
- tool_calls=[
- ChatCompletionMessageToolCall(
- id="call_abc123",
- type="function",
- function=Function(
- name="search_database", arguments='{"search_query":"climbing gear outside"}'
- ),
- )
- ],
- ),
- finish_reason="stop",
- index=0,
- )
- ],
- id="test-123",
- created=0,
- model="test-model",
+ return _make_tool_call_response("final_result", json.dumps(arguments), call_id="fc_abc123final")
+ if last_content == "Find search results for user query: What is the capital of France?":
+ return _make_tool_call_response(
+ "search_database", '{"search_query":"climbing gear outside"}', call_id="fc_abc123"
)
- elif last_question == "Find search results for user query: Are interest rates high?":
+ elif last_content == "Find search results for user query: Are interest rates high?":
answer = "interest rates"
- elif isinstance(last_question, list) and last_question[2].get("image_url"):
- answer = "From the provided sources, the impact of interest rates and GDP growth on "
- "financial markets can be observed through the line graph. [Financial Market Analysis Report 2023-7.png]"
+ elif isinstance(last_content, list) and len(last_content) > 2 and last_content[2].get("image_url"):
+ answer = (
+ "From the provided sources, the impact of interest rates and GDP growth on "
+ "financial markets can be observed through the line graph."
+ " [Financial Market Analysis Report 2023-7.png]"
+ )
else:
answer = "The capital of France is Paris. [Benefit_Options-2.pdf]."
- if messages[0]["content"].find("Generate 3 very brief follow-up questions") > -1:
+ system_content = input_messages[0].get("content", "") if isinstance(input_messages[0], dict) else ""
+ if (
+ isinstance(system_content, str)
+ and system_content.find("Generate 3 very brief follow-up questions") > -1
+ ):
answer = "The capital of France is Paris. [Benefit_Options-2.pdf]. <>"
- if "stream" in kwargs and kwargs["stream"] is True:
- return AsyncChatCompletionIterator(answer)
+ if kwargs.get("stream") is True:
+ return AsyncResponseEventIterator(answer)
else:
- return ChatCompletion(
- object="chat.completion",
- choices=[
- Choice(
- message=ChatCompletionMessage(role="assistant", content=answer), finish_reason="stop", index=0
- )
- ],
- id="test-123",
- created=0,
- model="test-model",
- )
+ return _make_text_response(answer)
- monkeypatch_session.setattr(openai.resources.chat.completions.AsyncCompletions, "create", mock_acreate)
+ monkeypatch_session.setattr(openai.resources.responses.AsyncResponses, "create", mock_acreate)
yield
@@ -338,7 +345,7 @@ async def mock_acreate(*args, **kwargs):
@pytest.fixture(scope="function")
def mock_azure_credential(mock_session_env):
"""Mock the Azure credential for testing."""
- with mock.patch("azure.identity.AzureDeveloperCliCredential") as mock_azure_credential:
+ with mock.patch("azure.identity.aio.AzureDeveloperCliCredential") as mock_azure_credential:
mock_azure_credential.return_value = MockAzureCredential()
yield mock_azure_credential
diff --git a/tests/e2e.py b/tests/e2e.py
index 56f6023d..5a019688 100644
--- a/tests/e2e.py
+++ b/tests/e2e.py
@@ -59,10 +59,6 @@ def test_home(page: Page, live_server_url: str):
def test_chat(page: Page, live_server_url: str):
# Set up a mock route to the /chat endpoint with streaming results
def handle(route: Route):
- # Assert that session_state is specified in the request (None for now)
- if route.request.post_data_json:
- session_state = route.request.post_data_json["sessionState"]
- assert session_state is None
# Read the JSONL from our snapshot results and return as the response
f = open(
"tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines"
diff --git a/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json b/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json
index 612be773..1437bdb6 100644
--- a/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json
+++ b/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json
@@ -1,8 +1,5 @@
{
- "message": {
- "content": "The capital of France is Paris. [Benefit_Options-2.pdf].",
- "role": "assistant"
- },
+ "output_text": "The capital of France is Paris. [Benefit_Options-2.pdf].",
"context": {
"data_points": {
"1": {
@@ -19,6 +16,7 @@
"title": "Prompt to generate search arguments",
"description": [
{
+ "role": "system",
"content": "Your job is to find search results based off the user's question and past messages.\nYou have access to only these tools:\n1. **search_database**: This tool allows you to search a table for items based on a query.\n You can pass in a search query and optional filters.\nOnce you get the search results, you're done.\n"
},
{
@@ -26,14 +24,14 @@
"content": "good options for climbing gear that can be used outside?"
},
{
- "id": "madeup",
+ "id": "fc_madeup1",
"call_id": "call_abc123",
"name": "search_database",
"arguments": "{\"search_query\":\"climbing gear outside\"}",
"type": "function_call"
},
{
- "id": "madeupoutput",
+ "id": "fc_madeupoutput1",
"call_id": "call_abc123",
"output": "Search results for climbing gear that can be used outside: ...",
"type": "function_call_output"
@@ -43,14 +41,14 @@
"content": "are there any shoes less than $50?"
},
{
- "id": "madeup",
+ "id": "fc_madeup2",
"call_id": "call_abc456",
"name": "search_database",
"arguments": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}",
"type": "function_call"
},
{
- "id": "madeupoutput",
+ "id": "fc_madeupoutput2",
"call_id": "call_abc456",
"output": "Search results for shoes cheaper than 50: ...",
"type": "function_call_output"
@@ -61,8 +59,8 @@
}
],
"props": {
- "model": "gpt-4o-mini",
- "deployment": "gpt-4o-mini"
+ "model": "gpt-5.4",
+ "deployment": "gpt-5.4"
}
},
{
@@ -93,6 +91,7 @@
"title": "Prompt to generate answer",
"description": [
{
+ "role": "system",
"content": "Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."
},
{
@@ -101,12 +100,10 @@
}
],
"props": {
- "model": "gpt-4o-mini",
- "deployment": "gpt-4o-mini"
+ "model": "gpt-5.4",
+ "deployment": "gpt-5.4"
}
}
- ],
- "followup_questions": null
- },
- "sessionState": null
+ ]
+ }
}
\ No newline at end of file
diff --git a/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines b/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
index d29b85c4..9ee6166a 100644
--- a/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
+++ b/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
@@ -1,2 +1,2 @@
-{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Prompt to generate search arguments","description":[{"content":"Your job is to find search results based off the user's question and past messages.\nYou have access to only these tools:\n1. **search_database**: This tool allows you to search a table for items based on a query.\n You can pass in a search query and optional filters.\nOnce you get the search results, you're done.\n"},{"role":"user","content":"good options for climbing gear that can be used outside?"},{"id":"madeup","call_id":"call_abc123","name":"search_database","arguments":"{\"search_query\":\"climbing gear outside\"}","type":"function_call"},{"id":"madeupoutput","call_id":"call_abc123","output":"Search results for climbing gear that can be used outside: ...","type":"function_call_output"},{"role":"user","content":"are there any shoes less than $50?"},{"id":"madeup","call_id":"call_abc456","name":"search_database","arguments":"{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}","type":"function_call"},{"id":"madeupoutput","call_id":"call_abc456","output":"Search results for shoes cheaper than 50: ...","type":"function_call_output"},{"role":"user","content":"Find search results for user query: What is the capital of France?"}],"props":{"model":"gpt-4o-mini","deployment":"gpt-4o-mini"}},{"title":"Search using generated search arguments","description":"climbing gear outside","props":{"top":1,"vector_search":true,"text_search":true,"filters":[]}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-4o-mini","deployment":"gpt-4o-mini"}}],"followup_questions":null},"sessionState":null}
-{"delta":{"content":"The capital of France is Paris. [Benefit_Options-2.pdf].","role":"assistant"},"context":null,"sessionState":null}
+{"type":"response.context","delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Prompt to generate search arguments","description":[{"role":"system","content":"Your job is to find search results based off the user's question and past messages.\nYou have access to only these tools:\n1. **search_database**: This tool allows you to search a table for items based on a query.\n You can pass in a search query and optional filters.\nOnce you get the search results, you're done.\n"},{"role":"user","content":"good options for climbing gear that can be used outside?"},{"id":"fc_madeup1","call_id":"call_abc123","name":"search_database","arguments":"{\"search_query\":\"climbing gear outside\"}","type":"function_call"},{"id":"fc_madeupoutput1","call_id":"call_abc123","output":"Search results for climbing gear that can be used outside: ...","type":"function_call_output"},{"role":"user","content":"are there any shoes less than $50?"},{"id":"fc_madeup2","call_id":"call_abc456","name":"search_database","arguments":"{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}","type":"function_call"},{"id":"fc_madeupoutput2","call_id":"call_abc456","output":"Search results for shoes cheaper than 50: ...","type":"function_call_output"},{"role":"user","content":"Find search results for user query: What is the capital of France?"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}},{"title":"Search using generated search arguments","description":"climbing gear outside","props":{"top":1,"vector_search":true,"text_search":true,"filters":[]}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"role":"system","content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}}]}}
+{"type":"response.output_text.delta","delta":"The capital of France is Paris. [Benefit_Options-2.pdf].","context":null}
diff --git a/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json b/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json
index e311917b..76177c90 100644
--- a/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json
+++ b/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json
@@ -1,8 +1,5 @@
{
- "message": {
- "content": "The capital of France is Paris. [Benefit_Options-2.pdf].",
- "role": "assistant"
- },
+ "output_text": "The capital of France is Paris. [Benefit_Options-2.pdf].",
"context": {
"data_points": {
"1": {
@@ -42,6 +39,7 @@
"title": "Prompt to generate answer",
"description": [
{
+ "role": "system",
"content": "Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."
},
{
@@ -50,12 +48,10 @@
}
],
"props": {
- "model": "gpt-4o-mini",
- "deployment": "gpt-4o-mini"
+ "model": "gpt-5.4",
+ "deployment": "gpt-5.4"
}
}
- ],
- "followup_questions": null
- },
- "sessionState": null
+ ]
+ }
}
\ No newline at end of file
diff --git a/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json b/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json
index d0456cd7..de49c7c4 100644
--- a/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json
+++ b/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json
@@ -1,8 +1,5 @@
{
- "message": {
- "content": "The capital of France is Paris. [Benefit_Options-2.pdf].",
- "role": "assistant"
- },
+ "output_text": "The capital of France is Paris. [Benefit_Options-2.pdf].",
"context": {
"data_points": {
"1": {
@@ -42,6 +39,7 @@
"title": "Prompt to generate answer",
"description": [
{
+ "role": "system",
"content": "Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."
},
{
@@ -58,12 +56,10 @@
}
],
"props": {
- "model": "gpt-4o-mini",
- "deployment": "gpt-4o-mini"
+ "model": "gpt-5.4",
+ "deployment": "gpt-5.4"
}
}
- ],
- "followup_questions": null
- },
- "sessionState": null
+ ]
+ }
}
\ No newline at end of file
diff --git a/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines b/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines
index 65d3ae5b..57fdd33b 100644
--- a/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines
+++ b/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines
@@ -1,2 +1,2 @@
-{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Search query for database","description":"What is the capital of France?","props":{"top":1,"vector_search":true,"text_search":true}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-4o-mini","deployment":"gpt-4o-mini"}}],"followup_questions":null},"sessionState":null}
-{"delta":{"content":"The capital of France is Paris. [Benefit_Options-2.pdf].","role":"assistant"},"context":null,"sessionState":null}
+{"type":"response.context","delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Search query for database","description":"What is the capital of France?","props":{"top":1,"vector_search":true,"text_search":true}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"role":"system","content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-5.4","deployment":"gpt-5.4"}}]}}
+{"type":"response.output_text.delta","delta":"The capital of France is Paris. [Benefit_Options-2.pdf].","context":null}
diff --git a/tests/test_api_routes.py b/tests/test_api_routes.py
index 55da4d6f..3349b984 100644
--- a/tests/test_api_routes.py
+++ b/tests/test_api_routes.py
@@ -115,7 +115,7 @@ async def test_simple_chat_flow(test_client, snapshot):
"context": {
"overrides": {"top": 1, "use_advanced_flow": False, "retrieval_mode": "hybrid", "temperature": 0.3}
},
- "messages": [{"content": "What is the capital of France?", "role": "user"}],
+ "input": [{"content": "What is the capital of France?", "role": "user"}],
},
)
response_data = response.json()
@@ -134,7 +134,7 @@ async def test_simple_chat_flow_message_history(test_client, snapshot):
"context": {
"overrides": {"top": 1, "use_advanced_flow": False, "retrieval_mode": "hybrid", "temperature": 0.3}
},
- "messages": [
+ "input": [
{"content": "What is the capital of France?", "role": "user"},
{"content": "The capital of France is Paris.", "role": "assistant"},
{"content": "What is the capital of France?", "role": "user"},
@@ -157,7 +157,7 @@ async def test_simple_chat_streaming_flow(test_client, snapshot):
"context": {
"overrides": {"top": 1, "use_advanced_flow": False, "retrieval_mode": "hybrid", "temperature": 0.3}
},
- "messages": [{"content": "What is the capital of France?", "role": "user"}],
+ "input": [{"content": "What is the capital of France?", "role": "user"}],
},
)
response_data = response.content
@@ -175,7 +175,7 @@ async def test_advanced_chat_flow(test_client, snapshot):
"context": {
"overrides": {"top": 1, "use_advanced_flow": True, "retrieval_mode": "hybrid", "temperature": 0.3}
},
- "messages": [{"content": "What is the capital of France?", "role": "user"}],
+ "input": [{"content": "What is the capital of France?", "role": "user"}],
},
)
response_data = response.json()
@@ -194,7 +194,7 @@ async def test_advanced_chat_streaming_flow(test_client, snapshot):
"context": {
"overrides": {"top": 1, "use_advanced_flow": True, "retrieval_mode": "hybrid", "temperature": 0.3}
},
- "messages": [{"content": "What is the capital of France?", "role": "user"}],
+ "input": [{"content": "What is the capital of France?", "role": "user"}],
},
)
response_data = response.content
diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py
index e55149a3..1c4d90db 100644
--- a/tests/test_dependencies.py
+++ b/tests/test_dependencies.py
@@ -6,10 +6,10 @@
@pytest.mark.asyncio
async def test_get_common_parameters(mock_session_env):
result = await common_parameters()
- assert result.openai_chat_model == "gpt-4o-mini"
+ assert result.openai_chat_model == "gpt-5.4"
assert result.openai_embed_model == "text-embedding-3-large"
assert result.openai_embed_dimensions == 1024
- assert result.openai_chat_deployment == "gpt-4o-mini"
+ assert result.openai_chat_deployment == "gpt-5.4"
assert result.openai_embed_deployment == "text-embedding-3-large"
diff --git a/tests/test_openai_clients.py b/tests/test_openai_clients.py
index 47caba26..ad60afc7 100644
--- a/tests/test_openai_clients.py
+++ b/tests/test_openai_clients.py
@@ -1,6 +1,5 @@
import pytest
-from fastapi_app.dependencies import common_parameters
from fastapi_app.openai_clients import create_openai_chat_client, create_openai_embed_client
from tests.data import test_data
@@ -18,49 +17,6 @@ async def test_create_openai_embed_client(mock_azure_credential, mock_openai_emb
@pytest.mark.asyncio
async def test_create_openai_chat_client(mock_azure_credential, mock_openai_chatcompletion):
openai_chat_client = await create_openai_chat_client(mock_azure_credential)
- assert openai_chat_client.chat.completions.create is not None
- response = await openai_chat_client.chat.completions.create(
- model="gpt-4o-mini", messages=[{"content": "test", "role": "user"}]
- )
- assert response.choices[0].message.content == "The capital of France is Paris. [Benefit_Options-2.pdf]."
-
-
-@pytest.mark.asyncio
-async def test_github_models_configuration(monkeypatch):
- """Test that GitHub Models uses the correct URLs and model names."""
- # Set up environment for GitHub Models
- monkeypatch.setenv("OPENAI_CHAT_HOST", "github")
- monkeypatch.setenv("OPENAI_EMBED_HOST", "github")
- monkeypatch.setenv("GITHUB_TOKEN", "fake-token")
- # Don't set GITHUB_MODEL to test defaults
-
- # Test chat client configuration
- chat_client = await create_openai_chat_client(None)
- assert str(chat_client.base_url).rstrip("/") == "https://models.github.ai/inference"
- assert chat_client.api_key == "fake-token"
-
- # Test embed client configuration
- embed_client = await create_openai_embed_client(None)
- assert str(embed_client.base_url).rstrip("/") == "https://models.github.ai/inference"
- assert embed_client.api_key == "fake-token"
-
- # Test that dependencies use correct defaults
- context = await common_parameters()
- assert context.openai_chat_model == "openai/gpt-4o"
- assert context.openai_embed_model == "openai/text-embedding-3-large"
-
-
-@pytest.mark.asyncio
-async def test_github_models_with_custom_values(monkeypatch):
- """Test that GitHub Models respects custom environment values."""
- # Set up environment for GitHub Models with custom values
- monkeypatch.setenv("OPENAI_CHAT_HOST", "github")
- monkeypatch.setenv("OPENAI_EMBED_HOST", "github")
- monkeypatch.setenv("GITHUB_TOKEN", "fake-token")
- monkeypatch.setenv("GITHUB_MODEL", "openai/gpt-4")
- monkeypatch.setenv("GITHUB_EMBED_MODEL", "openai/text-embedding-ada-002")
-
- # Test that dependencies use custom values
- context = await common_parameters()
- assert context.openai_chat_model == "openai/gpt-4"
- assert context.openai_embed_model == "openai/text-embedding-ada-002"
+ assert openai_chat_client.responses.create is not None
+ response = await openai_chat_client.responses.create(model="gpt-5.4", input=[{"role": "user", "content": "test"}])
+ assert response.output_text == "The capital of France is Paris. [Benefit_Options-2.pdf]."