From 7993047348ffaf15487d63a331ad7be915e1f68d Mon Sep 17 00:00:00 2001 From: mkulakow Date: Thu, 30 Apr 2026 14:28:53 +0200 Subject: [PATCH 01/20] Support functions in responses api --- src/llm/apis/openai_responses.cpp | 616 ++++++++++++++++++++---- src/llm/py_jinja_template_processor.cpp | 2 +- src/llm/servable.cpp | 5 +- 3 files changed, 525 insertions(+), 98 deletions(-) diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index 89b897dc4a..33341f185c 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -57,6 +57,498 @@ static std::string joinServerSideEvents(const std::vector& events) return ss.str(); } +// Convert the Responses API tools array (flat function format) into the chat/completions +// nested format ({type:"function", function:{name, description, parameters, ...}}) in place +// on the request document. The chat template (e.g. gpt-oss) and the chat/completions tools +// schema both expect the nested shape; doing this once up front lets every downstream +// consumer (chat history path, processedJson builder for Python Jinja, parseToolsToJsonContainer) +// share the same representation. Tools already in nested form, or non-function tools, are +// left untouched. +static void convertResponsesToolsInPlace(rapidjson::Value& toolsArray, rapidjson::Document::AllocatorType& alloc) { + if (!toolsArray.IsArray()) { + return; + } + for (auto& tool : toolsArray.GetArray()) { + if (!tool.IsObject()) { + continue; + } + auto toolObj = tool.GetObject(); + if (toolObj.FindMember("function") != toolObj.MemberEnd()) { + continue; // Already in nested chat/completions format. + } + auto typeIt = toolObj.FindMember("type"); + const std::string toolType = (typeIt != toolObj.MemberEnd() && typeIt->value.IsString()) + ? typeIt->value.GetString() + : ""; + if (toolType != "function") { + continue; // Preserve non-function tools as-is. + } + rapidjson::Value funcObj(rapidjson::kObjectType); + for (auto memberIt = toolObj.MemberBegin(); memberIt != toolObj.MemberEnd();) { + if (!memberIt->name.IsString()) { + ++memberIt; + continue; + } + const std::string fieldName = memberIt->name.GetString(); + if (fieldName == "type" || fieldName == "response") { + ++memberIt; + continue; + } + rapidjson::Value keyCopy(memberIt->name, alloc); + rapidjson::Value valCopy(memberIt->value, alloc); + funcObj.AddMember(keyCopy, valCopy, alloc); + memberIt = tool.EraseMember(memberIt); + } + tool.AddMember("function", funcObj, alloc); + } +} + +// Pull the reasoning text out of a Responses API "reasoning" item. +// Prefers the newer content[].text shape over the legacy summary[].text shape. +static std::string extractReasoningText(const rapidjson::Value::ConstObject& itemObj) { + auto contentIt = itemObj.FindMember("content"); + if (contentIt != itemObj.MemberEnd() && contentIt->value.IsArray()) { + for (const auto& ci : contentIt->value.GetArray()) { + if (!ci.IsObject()) + continue; + auto textIt = ci.GetObject().FindMember("text"); + if (textIt != ci.GetObject().MemberEnd() && textIt->value.IsString()) { + return textIt->value.GetString(); + } + } + } + auto summaryIt = itemObj.FindMember("summary"); + if (summaryIt != itemObj.MemberEnd() && summaryIt->value.IsArray()) { + for (const auto& si : summaryIt->value.GetArray()) { + if (!si.IsObject()) + continue; + auto textIt = si.GetObject().FindMember("text"); + if (textIt != si.GetObject().MemberEnd() && textIt->value.IsString()) { + return textIt->value.GetString(); + } + } + } + return ""; +} + +// Extract a flat text string from a Responses API content field which may be +// either a string or an array of {type,text} objects. +static std::string extractTextContent(const rapidjson::Value& contentVal) { + if (contentVal.IsString()) { + return contentVal.GetString(); + } + if (!contentVal.IsArray()) { + return ""; + } + for (const auto& ci : contentVal.GetArray()) { + if (!ci.IsObject()) + continue; + auto ctTypeIt = ci.GetObject().FindMember("type"); + if (ctTypeIt == ci.GetObject().MemberEnd() || !ctTypeIt->value.IsString()) + continue; + const std::string ctType = ctTypeIt->value.GetString(); + if (ctType == "input_text" || ctType == "output_text") { + auto textIt = ci.GetObject().FindMember("text"); + if (textIt != ci.GetObject().MemberEnd() && textIt->value.IsString()) { + return textIt->value.GetString(); + } + } + } + return ""; +} + +// Read the three string fields (id, name, arguments) out of a function_call item. +struct FunctionCallFields { + std::string id; + std::string name; + std::string arguments; +}; +static FunctionCallFields readFunctionCallFields(const rapidjson::Value& item) { + FunctionCallFields out; + auto fcObj = item.GetObject(); + auto idIt = fcObj.FindMember("id"); + if (idIt != fcObj.MemberEnd() && idIt->value.IsString()) + out.id = idIt->value.GetString(); + auto nameIt = fcObj.FindMember("name"); + if (nameIt != fcObj.MemberEnd() && nameIt->value.IsString()) + out.name = nameIt->value.GetString(); + auto argsIt = fcObj.FindMember("arguments"); + if (argsIt != fcObj.MemberEnd() && argsIt->value.IsString()) + out.arguments = argsIt->value.GetString(); + return out; +} + +// Classification of a Responses API input item used to dispatch to per-type +// handlers in the builders below. +enum class ResponsesInputItemKind { + REASONING, + FUNCTION_CALL, + FUNCTION_CALL_OUTPUT, + ROLE_ITEM, + MISSING_ROLE, +}; + +static absl::StatusOr classifyInputItem(const rapidjson::Value& item) { + if (!item.IsObject()) { + return absl::InvalidArgumentError("input array items must be objects"); + } + auto itemObj = item.GetObject(); + auto itemTypeIt = itemObj.FindMember("type"); + const std::string itemType = (itemTypeIt != itemObj.MemberEnd() && itemTypeIt->value.IsString()) + ? itemTypeIt->value.GetString() + : ""; + if (itemType == "reasoning") + return ResponsesInputItemKind::REASONING; + if (itemType == "function_call") + return ResponsesInputItemKind::FUNCTION_CALL; + if (itemType == "function_call_output") + return ResponsesInputItemKind::FUNCTION_CALL_OUTPUT; + auto roleIt = itemObj.FindMember("role"); + if (roleIt == itemObj.MemberEnd() || !roleIt->value.IsString()) + return ResponsesInputItemKind::MISSING_ROLE; + return ResponsesInputItemKind::ROLE_ITEM; +} + +// Builds chat/completions-shaped messages from a Responses API input array. +// +// Reasoning items are buffered and attached as `reasoning_content` on the next +// assistant message (matching the gpt-oss template's expected field). +// Reasoning that is not followed by an assistant/function_call item is dropped, +// since emitting a standalone {role:assistant, reasoning_content:...} message +// with no content/tool_calls would confuse most chat templates. +// +// Pending function_call items are merged into the next assistant message as a +// chat/completions-shaped tool_calls[] array. Without this, the assistant turn +// would have no tool_calls field, the chat template would treat it as a final +// answer, and a subsequent tool message would fail (e.g. gpt-oss raises +// "Message has tool role, but there was no previous assistant message with a +// tool call!"). +// +// The algorithm is sink-agnostic; concrete output (ov::genai::ChatHistory vs a +// rapidjson messages array) is provided by the Sink template parameter, which +// must implement: +// absl::Status extractContent(itemObj, index, std::string& outText); +// void emitToolMessage(callId, output); +// void emitMessage(role, contentText, reasoning); // reasoning empty -> skip +// void emitAssistantWithToolCalls(contentText, reasoning, toolCalls); +// absl::Status onMissingRole(itemObj); +template +class ResponsesInputBuilder { +public: + explicit ResponsesInputBuilder(Sink& sink) : + sink(sink) {} + + absl::Status build(const rapidjson::Value& inputArray) { + if (!inputArray.IsArray()) { + return absl::InvalidArgumentError("input is not an array"); + } + for (rapidjson::SizeType i = 0; i < inputArray.GetArray().Size(); ++i) { + const auto& item = inputArray.GetArray()[i]; + auto kind = classifyInputItem(item); + if (!kind.ok()) + return kind.status(); + absl::Status status; + switch (kind.value()) { + case ResponsesInputItemKind::REASONING: + status = onReasoningItem(item.GetObject()); + break; + case ResponsesInputItemKind::FUNCTION_CALL: + pendingFunctionCalls.push_back(&item); + break; + case ResponsesInputItemKind::FUNCTION_CALL_OUTPUT: + status = onFunctionCallOutputItem(item.GetObject()); + break; + case ResponsesInputItemKind::ROLE_ITEM: + status = onRoleItem(item.GetObject(), i); + break; + case ResponsesInputItemKind::MISSING_ROLE: + status = sink.onMissingRole(item.GetObject()); + break; + } + if (!status.ok()) + return status; + } + // Flush any trailing buffered function_calls (e.g. input ends with a + // function_call item that has no corresponding output yet). + flushPendingFunctionCalls(""); + return absl::OkStatus(); + } + +private: + absl::Status onReasoningItem(const rapidjson::Value::ConstObject& itemObj) { + std::string text = extractReasoningText(itemObj); + if (!text.empty()) { + if (!pendingReasoningContent.empty()) + pendingReasoningContent += "\n"; + pendingReasoningContent += text; + } + return absl::OkStatus(); + } + + absl::Status onFunctionCallOutputItem(const rapidjson::Value::ConstObject& itemObj) { + flushPendingFunctionCalls(""); + std::string callId; + auto callIdIt = itemObj.FindMember("call_id"); + if (callIdIt != itemObj.MemberEnd() && callIdIt->value.IsString()) + callId = callIdIt->value.GetString(); + std::string output; + auto outputIt = itemObj.FindMember("output"); + if (outputIt != itemObj.MemberEnd() && outputIt->value.IsString()) + output = outputIt->value.GetString(); + sink.emitToolMessage(callId, output); + return absl::OkStatus(); + } + + absl::Status onRoleItem(const rapidjson::Value::ConstObject& itemObj, rapidjson::SizeType index) { + const std::string role = itemObj.FindMember("role")->value.GetString(); + std::string contentText; + auto status = sink.extractContent(itemObj, index, contentText); + if (!status.ok()) + return status; + + // Assistant role with buffered function_calls: merge into one message + // (so the tool_calls field rides on the same assistant turn). + if (role == "assistant" && !pendingFunctionCalls.empty()) { + flushPendingFunctionCalls(contentText); + return absl::OkStatus(); + } + // Non-assistant items must not absorb pending tool_calls; flush first. + // (flushPendingFunctionCalls also clears any orphan reasoning content.) + if (role != "assistant") { + flushPendingFunctionCalls(""); + } + + std::string reasoning; + if (role == "assistant" && !pendingReasoningContent.empty()) { + reasoning = std::move(pendingReasoningContent); + pendingReasoningContent.clear(); + } + sink.emitMessage(role, contentText, reasoning); + return absl::OkStatus(); + } + + void flushPendingFunctionCalls(const std::string& assistantText) { + if (pendingFunctionCalls.empty()) { + pendingReasoningContent.clear(); + return; + } + std::string reasoning = std::move(pendingReasoningContent); + pendingReasoningContent.clear(); + sink.emitAssistantWithToolCalls(assistantText, reasoning, pendingFunctionCalls); + pendingFunctionCalls.clear(); + } + + Sink& sink; + std::vector pendingFunctionCalls; + std::string pendingReasoningContent; +}; + +// Sink that appends to ov::genai::ChatHistory (used when Python is disabled +// or as the fallback C++ chat-history path). Owns a scratch rapidjson document +// whose allocator backs the tool_calls Values until they are deep-copied into +// a JsonContainer. +class ChatHistorySink { +public: + ChatHistorySink(ov::genai::ChatHistory& chatHistory, ImageHistory& imageHistory, + const std::optional& allowedLocalMediaPath, + const std::optional>& allowedMediaDomains) : + chatHistory(chatHistory), + imageHistory(imageHistory), + allowedLocalMediaPath(allowedLocalMediaPath), + allowedMediaDomains(allowedMediaDomains) { + scratchDoc.SetObject(); + } + + absl::Status extractContent(const rapidjson::Value::ConstObject& itemObj, + rapidjson::SizeType index, std::string& outText) { + outText.clear(); + auto contentIt = itemObj.FindMember("content"); + if (contentIt == itemObj.MemberEnd()) + return absl::OkStatus(); + if (contentIt->value.IsString()) { + outText = contentIt->value.GetString(); + return absl::OkStatus(); + } + if (!contentIt->value.IsArray()) + return absl::InvalidArgumentError("input item content must be a string or array"); + for (const auto& contentItem : contentIt->value.GetArray()) { + if (!contentItem.IsObject()) + return absl::InvalidArgumentError("input content items must be objects"); + auto contentObj = contentItem.GetObject(); + auto typeIt = contentObj.FindMember("type"); + if (typeIt == contentObj.MemberEnd() || !typeIt->value.IsString()) + return absl::InvalidArgumentError("input content item type is missing or invalid"); + const std::string type = typeIt->value.GetString(); + if (type == "input_text" || type == "output_text") { + auto textIt = contentObj.FindMember("text"); + if (textIt == contentObj.MemberEnd() || !textIt->value.IsString()) + return absl::InvalidArgumentError(absl::StrCat(type, " requires a valid text field")); + // Last text-bearing item wins, matching pre-refactor behaviour. + outText = textIt->value.GetString(); + } else if (type == "input_image") { + auto status = appendInputImage(contentObj, index); + if (!status.ok()) + return status; + } else { + // Skip unrecognised content item types for forward compatibility. + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Skipping unsupported content type: {}", type); + } + } + return absl::OkStatus(); + } + + void emitToolMessage(const std::string& callId, const std::string& output) { + chatHistory.push_back({}); + chatHistory.last()["role"] = "tool"; + if (!callId.empty()) + chatHistory.last()["tool_call_id"] = callId; + chatHistory.last()["content"] = output; + } + + void emitMessage(const std::string& role, const std::string& contentText, const std::string& reasoning) { + chatHistory.push_back({}); + chatHistory.last()["role"] = role; + chatHistory.last()["content"] = contentText; + if (!reasoning.empty()) + chatHistory.last()["reasoning_content"] = reasoning; + } + + void emitAssistantWithToolCalls(const std::string& contentText, const std::string& reasoning, + const std::vector& toolCalls) { + chatHistory.push_back({}); + chatHistory.last()["role"] = "assistant"; + chatHistory.last()["content"] = contentText; + if (!reasoning.empty()) + chatHistory.last()["reasoning_content"] = reasoning; + auto& alloc = scratchDoc.GetAllocator(); + rapidjson::Value toolCallsArray(rapidjson::kArrayType); + buildToolCallsArray(toolCalls, toolCallsArray, alloc); + // rapidJsonValueToJsonContainer deep-copies, so scratchDoc can be reused. + chatHistory.last()["tool_calls"] = rapidJsonValueToJsonContainer(toolCallsArray); + } + + absl::Status onMissingRole(const rapidjson::Value::ConstObject&) { + return absl::InvalidArgumentError("input item role is missing or invalid"); + } + +private: + absl::Status appendInputImage(const rapidjson::Value::ConstObject& contentObj, rapidjson::SizeType index) { + auto imageUrlIt = contentObj.FindMember("image_url"); + if (imageUrlIt == contentObj.MemberEnd()) + return absl::InvalidArgumentError("input_image requires image_url field"); + + std::string imageUrl; + if (imageUrlIt->value.IsString()) { + imageUrl = imageUrlIt->value.GetString(); + } else if (imageUrlIt->value.IsObject()) { + auto imageUrlObj = imageUrlIt->value.GetObject(); + auto urlIt = imageUrlObj.FindMember("url"); + if (urlIt == imageUrlObj.MemberEnd() || !urlIt->value.IsString()) + return absl::InvalidArgumentError("input_image.image_url.url is missing or invalid"); + imageUrl = urlIt->value.GetString(); + } else { + return absl::InvalidArgumentError("input_image.image_url must be a string or object"); + } + + auto tensorResult = loadImage(imageUrl, allowedLocalMediaPath, allowedMediaDomains); + if (!tensorResult.ok()) + return tensorResult.status(); + imageHistory.push_back({index, tensorResult.value()}); + return absl::OkStatus(); + } + + // Build a chat/completions tool_calls[] array into outArr using the given allocator. + static void buildToolCallsArray(const std::vector& toolCalls, + rapidjson::Value& outArr, rapidjson::Document::AllocatorType& alloc) { + for (const auto* fc : toolCalls) { + const FunctionCallFields fields = readFunctionCallFields(*fc); + rapidjson::Value funcObj(rapidjson::kObjectType); + funcObj.AddMember("name", rapidjson::Value(fields.name.c_str(), alloc), alloc); + funcObj.AddMember("arguments", rapidjson::Value(fields.arguments.c_str(), alloc), alloc); + rapidjson::Value tcObj(rapidjson::kObjectType); + tcObj.AddMember("id", rapidjson::Value(fields.id.c_str(), alloc), alloc); + tcObj.AddMember("type", rapidjson::Value("function", alloc), alloc); + tcObj.AddMember("function", funcObj, alloc); + outArr.PushBack(tcObj, alloc); + } + } + + ov::genai::ChatHistory& chatHistory; + ImageHistory& imageHistory; + const std::optional& allowedLocalMediaPath; + const std::optional>& allowedMediaDomains; + rapidjson::Document scratchDoc; +}; + +#if (PYTHON_DISABLE == 0) +// Sink that appends to a rapidjson messages array, used to feed the Python +// Jinja chat template path. Image content items are silently dropped (the +// Python path receives only text). +class ProcessedJsonSink { +public: + ProcessedJsonSink(rapidjson::Value& messagesArray, rapidjson::Document::AllocatorType& alloc) : + messagesArray(messagesArray), + alloc(alloc) {} + + absl::Status extractContent(const rapidjson::Value::ConstObject& itemObj, + rapidjson::SizeType /*index*/, std::string& outText) { + auto contentIt = itemObj.FindMember("content"); + outText = (contentIt != itemObj.MemberEnd()) ? extractTextContent(contentIt->value) : ""; + return absl::OkStatus(); + } + + void emitToolMessage(const std::string& callId, const std::string& output) { + rapidjson::Value msgObj(rapidjson::kObjectType); + msgObj.AddMember("role", rapidjson::Value("tool", alloc), alloc); + if (!callId.empty()) + msgObj.AddMember("tool_call_id", rapidjson::Value(callId.c_str(), alloc), alloc); + msgObj.AddMember("content", rapidjson::Value(output.c_str(), alloc), alloc); + messagesArray.PushBack(msgObj, alloc); + } + + void emitMessage(const std::string& role, const std::string& contentText, const std::string& reasoning) { + rapidjson::Value msgObj(rapidjson::kObjectType); + msgObj.AddMember("role", rapidjson::Value(role.c_str(), alloc), alloc); + msgObj.AddMember("content", rapidjson::Value(contentText.c_str(), alloc), alloc); + if (!reasoning.empty()) + msgObj.AddMember("reasoning_content", rapidjson::Value(reasoning.c_str(), alloc), alloc); + messagesArray.PushBack(msgObj, alloc); + } + + void emitAssistantWithToolCalls(const std::string& contentText, const std::string& reasoning, + const std::vector& toolCalls) { + rapidjson::Value msgObj(rapidjson::kObjectType); + msgObj.AddMember("role", rapidjson::Value("assistant", alloc), alloc); + msgObj.AddMember("content", rapidjson::Value(contentText.c_str(), alloc), alloc); + if (!reasoning.empty()) + msgObj.AddMember("reasoning_content", rapidjson::Value(reasoning.c_str(), alloc), alloc); + rapidjson::Value toolCallsArray(rapidjson::kArrayType); + for (const auto* fc : toolCalls) { + const FunctionCallFields fields = readFunctionCallFields(*fc); + rapidjson::Value funcObj(rapidjson::kObjectType); + funcObj.AddMember("name", rapidjson::Value(fields.name.c_str(), alloc), alloc); + funcObj.AddMember("arguments", rapidjson::Value(fields.arguments.c_str(), alloc), alloc); + rapidjson::Value tcObj(rapidjson::kObjectType); + tcObj.AddMember("id", rapidjson::Value(fields.id.c_str(), alloc), alloc); + tcObj.AddMember("type", rapidjson::Value("function", alloc), alloc); + tcObj.AddMember("function", funcObj, alloc); + toolCallsArray.PushBack(tcObj, alloc); + } + msgObj.AddMember("tool_calls", toolCallsArray, alloc); + messagesArray.PushBack(msgObj, alloc); + } + + absl::Status onMissingRole(const rapidjson::Value::ConstObject&) { + // Silently skip unknown items without a role in the processed JSON path. + return absl::OkStatus(); + } + +private: + rapidjson::Value& messagesArray; + rapidjson::Document::AllocatorType& alloc; +}; +#endif // PYTHON_DISABLE == 0 + // --- Request parsing --- absl::Status OpenAIResponsesHandler::parseRequest(std::optional maxTokensLimit, uint32_t bestOfLimit, std::optional maxModelLength, @@ -87,87 +579,12 @@ absl::Status OpenAIResponsesHandler::parseInput(std::optional allow if (inputIt->value.GetArray().Size() == 0) { return absl::InvalidArgumentError("Messages array cannot be empty"); } - - for (size_t i = 0; i < inputIt->value.GetArray().Size(); ++i) { - auto& item = inputIt->value.GetArray()[i]; - if (!item.IsObject()) { - return absl::InvalidArgumentError("input array items must be objects"); - } - - auto itemObj = item.GetObject(); - auto roleIt = itemObj.FindMember("role"); - if (roleIt == itemObj.MemberEnd() || !roleIt->value.IsString()) { - return absl::InvalidArgumentError("input item role is missing or invalid"); - } - - request.chatHistory.push_back({}); - request.chatHistory.last()["role"] = roleIt->value.GetString(); - - auto contentIt = itemObj.FindMember("content"); - if (contentIt == itemObj.MemberEnd()) { - return absl::InvalidArgumentError("input item content is missing"); - } - - if (contentIt->value.IsString()) { - request.chatHistory.last()["content"] = contentIt->value.GetString(); - continue; - } - - if (!contentIt->value.IsArray()) { - return absl::InvalidArgumentError("input item content must be a string or array"); - } - if (contentIt->value.GetArray().Size() == 0) { - return absl::InvalidArgumentError("Invalid message structure - content array is empty"); - } - - std::string contentText = ""; - for (auto& contentItem : contentIt->value.GetArray()) { - if (!contentItem.IsObject()) { - return absl::InvalidArgumentError("input content items must be objects"); - } - auto contentObj = contentItem.GetObject(); - auto typeIt = contentObj.FindMember("type"); - if (typeIt == contentObj.MemberEnd() || !typeIt->value.IsString()) { - return absl::InvalidArgumentError("input content item type is missing or invalid"); - } - - const std::string type = typeIt->value.GetString(); - if (type == "input_text") { - auto textIt = contentObj.FindMember("text"); - if (textIt == contentObj.MemberEnd() || !textIt->value.IsString()) { - return absl::InvalidArgumentError("input_text requires a valid text field"); - } - contentText = textIt->value.GetString(); - } else if (type == "input_image") { - std::string imageUrl; - auto imageUrlIt = contentObj.FindMember("image_url"); - if (imageUrlIt == contentObj.MemberEnd()) { - return absl::InvalidArgumentError("input_image requires image_url field"); - } - if (imageUrlIt->value.IsString()) { - imageUrl = imageUrlIt->value.GetString(); - } else if (imageUrlIt->value.IsObject()) { - auto imageUrlObj = imageUrlIt->value.GetObject(); - auto urlIt = imageUrlObj.FindMember("url"); - if (urlIt == imageUrlObj.MemberEnd() || !urlIt->value.IsString()) { - return absl::InvalidArgumentError("input_image.image_url.url is missing or invalid"); - } - imageUrl = urlIt->value.GetString(); - } else { - return absl::InvalidArgumentError("input_image.image_url must be a string or object"); - } - - auto tensorResult = loadImage(imageUrl, allowedLocalMediaPath, allowedMediaDomains); - if (!tensorResult.ok()) { - return tensorResult.status(); - } - request.imageHistory.push_back({i, tensorResult.value()}); - } else { - return absl::InvalidArgumentError("Unsupported content type. Supported types are input_text and input_image."); - } - } - - request.chatHistory.last()["content"] = contentText; + ChatHistorySink sink(request.chatHistory, request.imageHistory, + allowedLocalMediaPath, allowedMediaDomains); + ResponsesInputBuilder builder(sink); + auto status = builder.build(inputIt->value); + if (!status.ok()) { + return status; } } else { return absl::InvalidArgumentError("input is not a string or array"); @@ -189,6 +606,14 @@ absl::Status OpenAIResponsesHandler::parseResponsesPart(std::optional return absl::InvalidArgumentError("input missing in request"); } + // Convert tools array (Responses-flat -> chat/completions-nested) once, in place, + // before any consumer reads it. parseInput, parseToolsToJsonContainer and the + // processedJson builder all rely on the nested shape. + auto toolsIt = doc.FindMember("tools"); + if (toolsIt != doc.MemberEnd() && toolsIt->value.IsArray()) { + convertResponsesToolsInPlace(toolsIt->value, doc.GetAllocator()); + } + auto messagesStatus = parseInput(allowedLocalMediaPath, allowedMediaDomains); if (!messagesStatus.ok()) { return messagesStatus; @@ -228,30 +653,31 @@ absl::Status OpenAIResponsesHandler::parseResponsesPart(std::optional } #if (PYTHON_DISABLE == 0) - // Build processedJson with "messages" array from chatHistory so that - // the Python chat template path (which reads request_json["messages"]) - // can consume Responses API input without a separate code path. + // Build processedJson with a "messages" array in chat/completions format so that + // the Python Jinja template path can consume Responses API input without a separate code path. + // Handles reasoning, function_call (merged into assistant tool_calls), and + // function_call_output (converted to role:tool messages). { Document processedDoc; processedDoc.SetObject(); auto& alloc = processedDoc.GetAllocator(); Value messagesArray(kArrayType); - for (size_t i = 0; i < request.chatHistory.size(); ++i) { - Value msgObj(kObjectType); - auto role = request.chatHistory[i]["role"].as_string(); - if (role.has_value()) { - msgObj.AddMember("role", Value(role.value().c_str(), alloc), alloc); - } - auto content = request.chatHistory[i]["content"].as_string(); - if (content.has_value()) { - msgObj.AddMember("content", Value(content.value().c_str(), alloc), alloc); + + auto inputArrIt = doc.FindMember("input"); + if (inputArrIt != doc.MemberEnd() && inputArrIt->value.IsArray()) { + ProcessedJsonSink sink(messagesArray, alloc); + ResponsesInputBuilder builder(sink); + auto processedStatus = builder.build(inputArrIt->value); + if (!processedStatus.ok()) { + return processedStatus; } - messagesArray.PushBack(msgObj, alloc); } + processedDoc.AddMember("messages", messagesArray, alloc); - // Copy tools from original doc if present + // Tools were already normalised to chat/completions nested format by + // convertResponsesToolsInPlace earlier in parseResponsesPart — just copy verbatim. auto toolsIt = doc.FindMember("tools"); if (toolsIt != doc.MemberEnd() && !toolsIt->value.IsNull()) { Value toolsCopy(toolsIt->value, alloc); diff --git a/src/llm/py_jinja_template_processor.cpp b/src/llm/py_jinja_template_processor.cpp index 432aa8e722..61116d3c5d 100644 --- a/src/llm/py_jinja_template_processor.cpp +++ b/src/llm/py_jinja_template_processor.cpp @@ -40,7 +40,7 @@ bool PyJinjaTemplateProcessor::applyChatTemplate(PyJinjaTemplateProcessor& templ output = "Error: Chat template not loaded correctly, so it cannot be applied"; return false; } - + SPDLOG_DEBUG("Before chat template: \n {}", requestBody); py::gil_scoped_acquire acquire; try { auto locals = py::dict("request_body"_a = requestBody, "chat_template"_a = templateProcessor.chatTemplate->getObject(), diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp index 5a0955b4f5..b15bc12400 100644 --- a/src/llm/servable.cpp +++ b/src/llm/servable.cpp @@ -22,6 +22,7 @@ #pragma warning(disable : 4005 4309 6001 6385 6386 6326 6011 4005 4456 6246 6313) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#include "absl/strings/str_cat.h" #include "mediapipe/framework/calculator_graph.h" #include #include @@ -209,7 +210,7 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptrtokenizer.apply_chat_template(chatHistory, addGenerationPrompt, {}, tools, chatTemplateKwargs); } catch (const std::exception& e) { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to apply chat template: {}", e.what()); - return absl::Status(absl::StatusCode::kInvalidArgument, "Failed to apply chat template. The model either does not have chat template or has an invalid one."); + return absl::Status(absl::StatusCode::kInvalidArgument, absl::StrCat("Failed to apply chat template: ", e.what())); } #endif if (inputText.size() == 0) { @@ -241,7 +242,7 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptrtokenizer.apply_chat_template(chatHistory, addGenerationPrompt, {}, tools, chatTemplateKwargs); } catch (const std::exception& e) { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to apply chat template: {}", e.what()); - return absl::Status(absl::StatusCode::kInvalidArgument, "Failed to apply chat template. The model either does not have chat template or has an invalid one."); + return absl::Status(absl::StatusCode::kInvalidArgument, absl::StrCat("Failed to apply chat template: ", e.what())); } #endif if (inputText.size() == 0) { From 3f0a8580463bdb0984f5b5c5c96453afe24208f4 Mon Sep 17 00:00:00 2001 From: mkulakow Date: Wed, 13 May 2026 13:21:49 +0200 Subject: [PATCH 02/20] uts --- .../continuous_batching/agentic_ai/README.md | 2 +- src/llm/apis/openai_responses.cpp | 39 +- src/test/http_openai_handler_test.cpp | 761 ++++++++++++++++++ 3 files changed, 799 insertions(+), 3 deletions(-) diff --git a/demos/continuous_batching/agentic_ai/README.md b/demos/continuous_batching/agentic_ai/README.md index b630158a9d..2ba22afb8c 100644 --- a/demos/continuous_batching/agentic_ai/README.md +++ b/demos/continuous_batching/agentic_ai/README.md @@ -330,7 +330,7 @@ Pull and start OVMS: ```bash mkdir -p ${HOME}/models docker run -d --user $(id -u):$(id -g) --rm -p 8000:8000 -v ${HOME}/models:/models --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) openvino/model_server:weekly \ ---rest_port 8000 --model_repository_path /models --source_model Junrui2021/Qwen3-VL-8B-Instruct-int4 --tool_parser hermes3 --target_device GPU --task text_generation --pipeline_type VLM_CB --allowed_media_domains raw.githubusercontent.com +--rest_port 8122 --model_repository_path /models --source_model Junrui2021/Qwen3-VL-8B-Instruct-int4 --model_name ovms-model --tool_parser hermes3 --target_device GPU --task text_generation --pipeline_type VLM_CB --allowed_media_domains raw.githubusercontent.com ``` Use MCP server, with additional image of Gdańsk old town. VLM model deduces location and calls `get_weather` tool to summarize the weather conditions in the city. diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index 33341f185c..98f1549ecf 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -224,6 +224,12 @@ static absl::StatusOr classifyInputItem(const rapidjson: // "Message has tool role, but there was no previous assistant message with a // tool call!"). // +// Reasoning that is not followed by an assistant or function_call item is +// emitted as a standalone assistant turn with empty content and the buffered +// reasoning attached as `reasoning_content`. This preserves the model's +// chain-of-thought across turns even when the prior turn produced no visible +// output. +// // The algorithm is sink-agnostic; concrete output (ov::genai::ChatHistory vs a // rapidjson messages array) is provided by the Sink template parameter, which // must implement: @@ -231,6 +237,7 @@ static absl::StatusOr classifyInputItem(const rapidjson: // void emitToolMessage(callId, output); // void emitMessage(role, contentText, reasoning); // reasoning empty -> skip // void emitAssistantWithToolCalls(contentText, reasoning, toolCalls); +// void emitStandaloneReasoning(reasoning); // assistant turn carrying only reasoning_content // absl::Status onMissingRole(itemObj); template class ResponsesInputBuilder { @@ -313,7 +320,8 @@ class ResponsesInputBuilder { return absl::OkStatus(); } // Non-assistant items must not absorb pending tool_calls; flush first. - // (flushPendingFunctionCalls also clears any orphan reasoning content.) + // (flushPendingFunctionCalls also emits any standalone reasoning content + // as a standalone assistant turn.) if (role != "assistant") { flushPendingFunctionCalls(""); } @@ -329,7 +337,16 @@ class ResponsesInputBuilder { void flushPendingFunctionCalls(const std::string& assistantText) { if (pendingFunctionCalls.empty()) { - pendingReasoningContent.clear(); + // No tool calls, but possibly buffered reasoning to flush as a + // standalone assistant turn carrying only reasoning_content (no + // `content` field at all, so templates that gate on `message.content` + // skip the content branch and templates that gate on + // `message.reasoning_content` still see the buffered text). + if (!pendingReasoningContent.empty()) { + std::string reasoning = std::move(pendingReasoningContent); + pendingReasoningContent.clear(); + sink.emitStandaloneReasoning(reasoning); + } return; } std::string reasoning = std::move(pendingReasoningContent); @@ -427,6 +444,15 @@ class ChatHistorySink { chatHistory.last()["tool_calls"] = rapidJsonValueToJsonContainer(toolCallsArray); } + // Emit an assistant turn that carries only reasoning_content (no content, + // no tool_calls). Used when reasoning is not followed by an assistant or + // function_call item. + void emitStandaloneReasoning(const std::string& reasoning) { + chatHistory.push_back({}); + chatHistory.last()["role"] = "assistant"; + chatHistory.last()["reasoning_content"] = reasoning; + } + absl::Status onMissingRole(const rapidjson::Value::ConstObject&) { return absl::InvalidArgumentError("input item role is missing or invalid"); } @@ -515,6 +541,15 @@ class ProcessedJsonSink { messagesArray.PushBack(msgObj, alloc); } + // Emit an assistant turn that carries only reasoning_content (no content, + // no tool_calls). See ChatHistorySink::emitStandaloneReasoning for rationale. + void emitStandaloneReasoning(const std::string& reasoning) { + rapidjson::Value msgObj(rapidjson::kObjectType); + msgObj.AddMember("role", rapidjson::Value("assistant", alloc), alloc); + msgObj.AddMember("reasoning_content", rapidjson::Value(reasoning.c_str(), alloc), alloc); + messagesArray.PushBack(msgObj, alloc); + } + void emitAssistantWithToolCalls(const std::string& contentText, const std::string& reasoning, const std::vector& toolCalls) { rapidjson::Value msgObj(rapidjson::kObjectType); diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index 1587568466..202576e11b 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -4292,3 +4292,764 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParseMessagesRegularMessageHasNoToolFields) EXPECT_FALSE(history[1].contains("tool_call_id")); EXPECT_FALSE(history[1].contains("name")); } + +namespace { +std::shared_ptr parseResponses(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer, const std::string& json) { + doc.Parse(json.c_str()); + EXPECT_FALSE(doc.HasParseError()) << json; + std::optional maxTokensLimit; + uint32_t bestOfLimit = 0; + std::optional maxModelLength; + auto apiHandler = std::make_shared( + doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), tokenizer); + EXPECT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus()) << json; + return apiHandler; +} + +// Variant for negative tests: returns the parseRequest status without asserting +// it is OK, so the caller can verify the failure mode. +absl::Status tryParseResponses(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer, const std::string& json) { + doc.Parse(json.c_str()); + EXPECT_FALSE(doc.HasParseError()) << json; + std::optional maxTokensLimit; + uint32_t bestOfLimit = 0; + std::optional maxModelLength; + auto apiHandler = std::make_shared( + doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), tokenizer); + return apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength); +} +} // namespace + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolsNormalisedToNestedInDoc) { + // The chat template (e.g. gpt-oss) iterates tools looking up tool.function.name / + // tool.function.parameters. The Responses-flat shape ({type, name, parameters}) + // must be rewritten in-place to chat/completions nested shape before it is + // forwarded to the template. + std::string json = R"({ + "model": "llama", + "input": "hello", + "tools": [{ + "type": "function", + "name": "get_weather", + "description": "Get current weather", + "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]} + }] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + EXPECT_TRUE(apiHandler->areToolsAvailable()); + + // Inspect the (now normalised) tools array on the request document directly. + ASSERT_TRUE(doc.HasMember("tools")); + ASSERT_TRUE(doc["tools"].IsArray()); + ASSERT_EQ(doc["tools"].Size(), 1u); + const auto& tool = doc["tools"][0]; + ASSERT_TRUE(tool.HasMember("function")); + ASSERT_TRUE(tool["function"].IsObject()); + EXPECT_STREQ(tool["function"]["name"].GetString(), "get_weather"); + EXPECT_STREQ(tool["function"]["description"].GetString(), "Get current weather"); + ASSERT_TRUE(tool["function"].HasMember("parameters")); + EXPECT_TRUE(tool["function"]["parameters"].IsObject()); + // The flat fields should have been moved under `function`, leaving only `type` + `function`. + EXPECT_FALSE(tool.HasMember("name")); + EXPECT_FALSE(tool.HasMember("parameters")); + EXPECT_FALSE(tool.HasMember("description")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAlreadyNestedToolsAreLeftIntact) { + // Tools that are already in chat/completions nested shape must pass through + // untouched (no double-wrapping). + std::string json = R"({ + "model": "llama", + "input": "hello", + "tools": [{ + "type": "function", + "function": { + "name": "get_weather", + "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]} + } + }] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + EXPECT_TRUE(apiHandler->areToolsAvailable()); + ASSERT_TRUE(doc["tools"][0].HasMember("function")); + EXPECT_STREQ(doc["tools"][0]["function"]["name"].GetString(), "get_weather"); + // No spurious nested wrap. + EXPECT_FALSE(doc["tools"][0]["function"].HasMember("function")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningBufferedOntoNextAssistantMessage) { + // A bare reasoning item, then an assistant message: the reasoning text should + // ride on the next assistant message as reasoning_content (matching the + // gpt-oss template's expected field). It must NOT produce its own message. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"think first"}]}, + {"role": "assistant", "content": [{"type":"output_text","text":"hello"}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 2); + EXPECT_EQ(history[0]["role"].get_string(), "user"); + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + EXPECT_EQ(history[1]["content"].get_string(), "hello"); + ASSERT_TRUE(history[1].contains("reasoning_content")); + EXPECT_EQ(history[1]["reasoning_content"].get_string(), "think first"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesStandaloneReasoningWithoutAssistantIsEmitted) { + // Reasoning followed directly by a user message (no assistant/function_call + // in between) is emitted as a standalone assistant turn with empty content + // and the buffered text attached as reasoning_content. This preserves the + // model's chain-of-thought across turns even when the prior turn produced + // no visible output. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]}, + {"role": "user", "content": [{"type":"input_text","text":"again"}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 3); + EXPECT_EQ(history[0]["role"].get_string(), "user"); + + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + EXPECT_FALSE(history[1].contains("content")); + ASSERT_TRUE(history[1].contains("reasoning_content")); + EXPECT_EQ(history[1]["reasoning_content"].get_string(), "orphan"); + EXPECT_FALSE(history[1].contains("tool_calls")); + + EXPECT_EQ(history[2]["role"].get_string(), "user"); + EXPECT_FALSE(history[2].contains("reasoning_content")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesTrailingStandaloneReasoningIsEmitted) { + // Input ending with a reasoning item (no following assistant/function_call) + // — the buffered reasoning is flushed as a standalone trailing assistant + // turn rather than silently lost. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"trailing"}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 2); + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + EXPECT_FALSE(history[1].contains("content")); + ASSERT_TRUE(history[1].contains("reasoning_content")); + EXPECT_EQ(history[1]["reasoning_content"].get_string(), "trailing"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallMergedIntoAssistantToolCalls) { + // function_call followed by function_call_output should produce: + // user -> assistant(content="", tool_calls=[...]) -> tool(tool_call_id=...) + // The assistant message MUST own a tool_calls field; otherwise gpt-oss + // raises "Message has tool role, but there was no previous assistant + // message with a tool call!". + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", + "output": "{\"temp_c\":17}"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 3); + + EXPECT_EQ(history[0]["role"].get_string(), "user"); + + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + EXPECT_EQ(history[1]["content"].get_string(), ""); + ASSERT_TRUE(history[1].contains("tool_calls")); + ASSERT_TRUE(history[1]["tool_calls"].is_array()); + ASSERT_EQ(history[1]["tool_calls"].size(), 1); + EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); + EXPECT_EQ(history[1]["tool_calls"][0]["type"].get_string(), "function"); + EXPECT_EQ(history[1]["tool_calls"][0]["function"]["name"].get_string(), "get_weather"); + EXPECT_EQ(history[1]["tool_calls"][0]["function"]["arguments"].get_string(), "{\"city\":\"Paris\"}"); + + EXPECT_EQ(history[2]["role"].get_string(), "tool"); + EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1"); + EXPECT_EQ(history[2]["content"].get_string(), "{\"temp_c\":17}"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningPlusFunctionCallRidesOnAssistant) { + // reasoning + function_call should both attach to the synthesised assistant + // turn that owns the tool_calls. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "ok"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 3); + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + ASSERT_TRUE(history[1].contains("tool_calls")); + ASSERT_TRUE(history[1].contains("reasoning_content")); + EXPECT_EQ(history[1]["reasoning_content"].get_string(), "need to call get_weather"); + EXPECT_EQ(history[2]["role"].get_string(), "tool"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesMultipleFunctionCallsMergedInOneAssistant) { + // Two function_calls back-to-back must produce a single assistant message + // with two entries in tool_calls, not two assistant turns. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call", "id": "call_2", "call_id": "call_2", + "name": "get_weather", "arguments": "{\"city\":\"London\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "15C"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + // user, assistant(2 tool_calls), tool + ASSERT_EQ(history.size(), 3); + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + ASSERT_TRUE(history[1].contains("tool_calls")); + ASSERT_EQ(history[1]["tool_calls"].size(), 2); + EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); + EXPECT_EQ(history[1]["tool_calls"][1]["id"].get_string(), "call_2"); + EXPECT_EQ(history[2]["role"].get_string(), "tool"); + EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesTrailingFunctionCallFlushedAsAssistant) { + // Input ending with a function_call (no matching output) — the trailing + // function_call must still be flushed as an assistant message rather than + // silently lost. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 2); + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + ASSERT_TRUE(history[1].contains("tool_calls")); + ASSERT_EQ(history[1]["tool_calls"].size(), 1); + EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAssistantMessageAbsorbsBufferedFunctionCall) { + // If an assistant role item follows a function_call, its text content should + // ride on the same merged message (assistant-with-tool_calls), not produce + // a second assistant turn. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"role": "assistant", "content": "calling tool"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 2); + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + EXPECT_EQ(history[1]["content"].get_string(), "calling tool"); + ASSERT_TRUE(history[1].contains("tool_calls")); + ASSERT_EQ(history[1]["tool_calls"].size(), 1); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningContentArrayShapeAccepted) { + // The newer reasoning shape: content[].text instead of summary[].text. + // OVMS accepts both. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "content": [{"type":"reasoning_text","text":"new shape"}]}, + {"role": "assistant", "content": "ok"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 2); + ASSERT_TRUE(history[1].contains("reasoning_content")); + EXPECT_EQ(history[1]["reasoning_content"].get_string(), "new shape"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallOutputWithoutCallIdAccepted) { + // function_call_output without call_id: should still emit a tool message + // (with no tool_call_id field) rather than failing parsing. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{}"}, + {"type": "function_call_output", "output": "ok"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 3); + EXPECT_EQ(history[2]["role"].get_string(), "tool"); + EXPECT_FALSE(history[2].contains("tool_call_id")); + EXPECT_EQ(history[2]["content"].get_string(), "ok"); +} + +#if (PYTHON_DISABLE == 0) +// processedJson (the chat/completions-shaped messages array fed to the Python +// Jinja chat template) must mirror the chat history layout for the same input. +// These tests assert the same buffering invariants on that path. + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMirrorsFunctionCallMerge) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "ok"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + ASSERT_TRUE(processedDoc.HasMember("messages")); + const auto& messages = processedDoc["messages"]; + ASSERT_TRUE(messages.IsArray()); + ASSERT_EQ(messages.Size(), 3u); + + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + ASSERT_TRUE(messages[1].HasMember("tool_calls")); + ASSERT_TRUE(messages[1]["tool_calls"].IsArray()); + ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); + EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); + EXPECT_STREQ(messages[1]["tool_calls"][0]["type"].GetString(), "function"); + EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather"); + + EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); + EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1"); + EXPECT_STREQ(messages[2]["content"].GetString(), "ok"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonContainsNormalisedTools) { + // The tools forwarded to the template via processedJson must be in the + // chat/completions nested shape (because convertResponsesToolsInPlace + // normalised the doc before processedJson is built). + std::string json = R"({ + "model": "llama", + "input": "hello", + "tools": [{ + "type": "function", + "name": "get_weather", + "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]} + }] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + ASSERT_TRUE(processedDoc.HasMember("tools")); + ASSERT_TRUE(processedDoc["tools"].IsArray()); + ASSERT_EQ(processedDoc["tools"].Size(), 1u); + ASSERT_TRUE(processedDoc["tools"][0].HasMember("function")); + EXPECT_STREQ(processedDoc["tools"][0]["function"]["name"].GetString(), "get_weather"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonAttachesReasoningOnAssistant) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"think"}]}, + {"role": "assistant", "content": [{"type":"output_text","text":"answer"}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 2u); + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + EXPECT_STREQ(messages[1]["content"].GetString(), "answer"); + ASSERT_TRUE(messages[1].HasMember("reasoning_content")); + EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "think"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonStandaloneReasoningOmitsContent) { + // Mirror of ResponsesStandaloneReasoningWithoutAssistantIsEmitted on the + // processedJson path: an assistant turn carrying only reasoning_content + // (no `content`, no `tool_calls`). + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]}, + {"role": "user", "content": [{"type":"input_text","text":"again"}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 3u); + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + EXPECT_FALSE(messages[1].HasMember("content")); + EXPECT_FALSE(messages[1].HasMember("tool_calls")); + ASSERT_TRUE(messages[1].HasMember("reasoning_content")); + EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "orphan"); + EXPECT_STREQ(messages[2]["role"].GetString(), "user"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonTrailingStandaloneReasoningOmitsContent) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "content": [{"type":"reasoning_text","text":"trailing"}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 2u); + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + EXPECT_FALSE(messages[1].HasMember("content")); + EXPECT_FALSE(messages[1].HasMember("tool_calls")); + ASSERT_TRUE(messages[1].HasMember("reasoning_content")); + EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "trailing"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonReasoningPlusFunctionCallRidesOnAssistant) { + // Mirror of ResponsesReasoningPlusFunctionCallRidesOnAssistant: reasoning + // and tool_calls must land on the same JSON object. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "ok"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 3u); + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + ASSERT_TRUE(messages[1].HasMember("tool_calls")); + ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); + ASSERT_TRUE(messages[1].HasMember("reasoning_content")); + EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "need to call get_weather"); + EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMultipleFunctionCallsMergedInOneAssistant) { + // Mirror of ResponsesMultipleFunctionCallsMergedInOneAssistant: validates + // the rapidjson tool_calls array growth across PushBack calls. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call", "id": "call_2", "call_id": "call_2", + "name": "get_weather", "arguments": "{\"city\":\"London\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "15C"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 3u); + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + ASSERT_TRUE(messages[1].HasMember("tool_calls")); + ASSERT_EQ(messages[1]["tool_calls"].Size(), 2u); + EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); + EXPECT_STREQ(messages[1]["tool_calls"][1]["id"].GetString(), "call_2"); + EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather"); + EXPECT_STREQ(messages[1]["tool_calls"][1]["function"]["arguments"].GetString(), "{\"city\":\"London\"}"); + EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); + EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonTrailingFunctionCallFlushedAsAssistant) { + // Mirror of ResponsesTrailingFunctionCallFlushedAsAssistant: trailing + // function_call without output produces an assistant turn with tool_calls + // and no following tool message. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 2u); + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + ASSERT_TRUE(messages[1].HasMember("tool_calls")); + ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); + EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonAssistantMessageAbsorbsBufferedFunctionCall) { + // Mirror of ResponsesAssistantMessageAbsorbsBufferedFunctionCall: assistant + // text content and tool_calls coexist on a single JSON object. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"role": "assistant", "content": "calling tool"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 2u); + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + ASSERT_TRUE(messages[1].HasMember("content")); + EXPECT_STREQ(messages[1]["content"].GetString(), "calling tool"); + ASSERT_TRUE(messages[1].HasMember("tool_calls")); + ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); + EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); +} +#endif // PYTHON_DISABLE == 0 + +// --- Tools normalisation edge cases --- + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolWithoutParametersIsNormalised) { + // Flat Responses tools may omit `parameters` for zero-arg functions. The + // nested form should still be produced (with no `parameters` key under + // function), not fail or fabricate one. + std::string json = R"({ + "model": "llama", + "input": "hello", + "tools": [{"type": "function", "name": "ping", "description": "no args"}] + })"; + parseResponses(doc, *tokenizer, json); + ASSERT_TRUE(doc.HasMember("tools")); + ASSERT_TRUE(doc["tools"].IsArray()); + ASSERT_EQ(doc["tools"].Size(), 1u); + const auto& tool = doc["tools"][0]; + ASSERT_TRUE(tool.HasMember("function")); + EXPECT_STREQ(tool["function"]["name"].GetString(), "ping"); + EXPECT_STREQ(tool["function"]["description"].GetString(), "no args"); + EXPECT_FALSE(tool["function"].HasMember("parameters")); + // The flat-shape `name` field at top level must have been removed. + EXPECT_FALSE(tool.HasMember("name")); + EXPECT_FALSE(tool.HasMember("description")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesNonFunctionToolLeftIntact) { + // Tools with an unrecognised `type` (e.g. a future built-in tool) must be + // passed through verbatim rather than being incorrectly rewrapped. + std::string json = R"({ + "model": "llama", + "input": "hello", + "tools": [{"type": "web_search", "name": "search"}] + })"; + parseResponses(doc, *tokenizer, json); + ASSERT_TRUE(doc["tools"].IsArray()); + ASSERT_EQ(doc["tools"].Size(), 1u); + const auto& tool = doc["tools"][0]; + EXPECT_STREQ(tool["type"].GetString(), "web_search"); + EXPECT_STREQ(tool["name"].GetString(), "search"); + EXPECT_FALSE(tool.HasMember("function")); +} + +// --- Error paths --- + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputItemMissingRoleIsRejected) { + // An input item with no recognised `type` and no `role` cannot be + // classified — the chat-history sink must surface this as an + // InvalidArgumentError rather than silently dropping the turn. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"content": [{"type":"output_text","text":"orphaned"}]} + ] + })"; + auto status = tryParseResponses(doc, *tokenizer, json); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); + EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("role")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputContentNotStringOrArrayIsRejected) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": 42} + ] + })"; + auto status = tryParseResponses(doc, *tokenizer, json); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); + EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("content")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputContentItemMissingTypeIsRejected) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"text":"no type field"}]} + ] + })"; + auto status = tryParseResponses(doc, *tokenizer, json); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); + EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("type")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputTextMissingTextFieldIsRejected) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text"}]} + ] + })"; + auto status = tryParseResponses(doc, *tokenizer, json); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); + EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("text")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputArrayItemNotObjectIsRejected) { + std::string json = R"({ + "model": "llama", + "input": ["not an object"] + })"; + auto status = tryParseResponses(doc, *tokenizer, json); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); + EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("must be objects")); +} + +// --- Multi-turn composite --- + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesMultiTurnReasoningFunctionCallAndFollowupAssistant) { + // End-to-end: user -> reasoning + function_call (merged on synthesised + // assistant) -> function_call_output -> reasoning + assistant final answer. + // Validates that buffering state is correctly reset between turns. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]}, + {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 4); + + // user + EXPECT_EQ(history[0]["role"].get_string(), "user"); + + // synthesised assistant: empty content + reasoning + tool_calls + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + EXPECT_EQ(history[1]["content"].get_string(), ""); + ASSERT_TRUE(history[1].contains("reasoning_content")); + EXPECT_EQ(history[1]["reasoning_content"].get_string(), "need to call get_weather"); + ASSERT_TRUE(history[1].contains("tool_calls")); + ASSERT_EQ(history[1]["tool_calls"].size(), 1); + EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); + EXPECT_EQ(history[1]["tool_calls"][0]["function"]["name"].get_string(), "get_weather"); + + // tool result + EXPECT_EQ(history[2]["role"].get_string(), "tool"); + EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1"); + EXPECT_EQ(history[2]["content"].get_string(), "sunny, 22C"); + EXPECT_FALSE(history[2].contains("reasoning_content")); + EXPECT_FALSE(history[2].contains("tool_calls")); + + // final assistant turn: second reasoning buffer must have been used here, + // not leaked from the first turn or carried over. + EXPECT_EQ(history[3]["role"].get_string(), "assistant"); + EXPECT_EQ(history[3]["content"].get_string(), "It is sunny and 22C in Paris."); + ASSERT_TRUE(history[3].contains("reasoning_content")); + EXPECT_EQ(history[3]["reasoning_content"].get_string(), "format the answer"); + EXPECT_FALSE(history[3].contains("tool_calls")); +} + +#if (PYTHON_DISABLE == 0) +// Re-open the PYTHON_DISABLE block to keep the processedJson companion next to +// the chat-history multi-turn test above. +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMultiTurnMirrorsChatHistory) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]}, + {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 4u); + + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + EXPECT_STREQ(messages[1]["content"].GetString(), ""); + ASSERT_TRUE(messages[1].HasMember("reasoning_content")); + EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "need to call get_weather"); + ASSERT_TRUE(messages[1].HasMember("tool_calls")); + ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); + EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather"); + + EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); + EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1"); + EXPECT_STREQ(messages[2]["content"].GetString(), "sunny, 22C"); + + EXPECT_STREQ(messages[3]["role"].GetString(), "assistant"); + EXPECT_STREQ(messages[3]["content"].GetString(), "It is sunny and 22C in Paris."); + ASSERT_TRUE(messages[3].HasMember("reasoning_content")); + EXPECT_STREQ(messages[3]["reasoning_content"].GetString(), "format the answer"); + EXPECT_FALSE(messages[3].HasMember("tool_calls")); +} +#endif // PYTHON_DISABLE == 0 From 041265c73366ffc4863cc2600dce76e601efe02a Mon Sep 17 00:00:00 2001 From: mkulakow Date: Wed, 13 May 2026 14:46:30 +0200 Subject: [PATCH 03/20] Update tests --- src/test/http_openai_handler_test.cpp | 926 +++++++++----------------- 1 file changed, 331 insertions(+), 595 deletions(-) diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index 202576e11b..d4cdfb6c7c 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -4318,46 +4318,112 @@ absl::Status tryParseResponses(rapidjson::Document& doc, ov::genai::Tokenizer& t doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), tokenizer); return apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength); } -} // namespace -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolsNormalisedToNestedInDoc) { - // The chat template (e.g. gpt-oss) iterates tools looking up tool.function.name / - // tool.function.parameters. The Responses-flat shape ({type, name, parameters}) - // must be rewritten in-place to chat/completions nested shape before it is - // forwarded to the template. - std::string json = R"({ - "model": "llama", - "input": "hello", - "tools": [{ - "type": "function", - "name": "get_weather", - "description": "Get current weather", - "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]} - }] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - EXPECT_TRUE(apiHandler->areToolsAvailable()); +// Assert that parsing the given Responses API request produces a chat history +// (and processedJson, when Python is enabled) equivalent to the expected +// chat/completions request. +// +// The expected JSON is a chat/completions REQUEST body — an object with a +// "messages" array and optionally a "tools" array. This makes each test read as +// "given this Responses input, OVMS should produce this chat/completions +// request" — which is exactly the contract of the Responses-to-chat/completions +// translator. +// +// Comparison is structural via rapidjson Value::operator== (member order inside +// objects is irrelevant). +// +// Both the chat-history path (used in the C++/non-Python build) and the +// processedJson path (used by the Python Jinja template) are checked, so a +// single test pins both downstream consumers. +void expectResponsesEquivalentToChatCompletions(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer, + const std::string& responsesRequest, const std::string& expectedChatCompletions) { + auto handler = parseResponses(doc, tokenizer, responsesRequest); - // Inspect the (now normalised) tools array on the request document directly. - ASSERT_TRUE(doc.HasMember("tools")); - ASSERT_TRUE(doc["tools"].IsArray()); - ASSERT_EQ(doc["tools"].Size(), 1u); - const auto& tool = doc["tools"][0]; - ASSERT_TRUE(tool.HasMember("function")); - ASSERT_TRUE(tool["function"].IsObject()); - EXPECT_STREQ(tool["function"]["name"].GetString(), "get_weather"); - EXPECT_STREQ(tool["function"]["description"].GetString(), "Get current weather"); - ASSERT_TRUE(tool["function"].HasMember("parameters")); - EXPECT_TRUE(tool["function"]["parameters"].IsObject()); - // The flat fields should have been moved under `function`, leaving only `type` + `function`. - EXPECT_FALSE(tool.HasMember("name")); - EXPECT_FALSE(tool.HasMember("parameters")); - EXPECT_FALSE(tool.HasMember("description")); + rapidjson::Document expectedDoc; + expectedDoc.Parse(expectedChatCompletions.c_str()); + ASSERT_FALSE(expectedDoc.HasParseError()) + << "could not parse expected chat/completions: " << expectedChatCompletions; + ASSERT_TRUE(expectedDoc.HasMember("messages")) + << "expected chat/completions JSON must contain a 'messages' array"; + + // --- ChatHistory path (C++ / non-Python build) --- + const std::string actualHistoryJson = handler->getChatHistory().get_messages().to_json_string(); + rapidjson::Document actualHistoryDoc; + actualHistoryDoc.Parse(actualHistoryJson.c_str()); + ASSERT_FALSE(actualHistoryDoc.HasParseError()) << actualHistoryJson; + EXPECT_TRUE(actualHistoryDoc == expectedDoc["messages"]) + << "ChatHistory messages mismatch.\n actual: " << actualHistoryJson + << "\n expected: " << expectedChatCompletions; + // Tools on the C++ path are exposed via parseToolsToJsonContainer() — that + // is exactly what the non-Python servable forwards to GenAI. Compare its + // serialised JSON against the expected chat/completions tools. + if (expectedDoc.HasMember("tools")) { + auto toolsStatus = handler->parseToolsToJsonContainer(); + ASSERT_TRUE(toolsStatus.ok()) << "parseToolsToJsonContainer failed: " << toolsStatus.status().message(); + ASSERT_TRUE(toolsStatus.value().has_value()) << "parseToolsToJsonContainer returned nullopt"; + const std::string actualToolsJson = toolsStatus.value()->to_json_string(); + rapidjson::Document actualToolsDoc; + actualToolsDoc.Parse(actualToolsJson.c_str()); + ASSERT_FALSE(actualToolsDoc.HasParseError()) << actualToolsJson; + EXPECT_TRUE(actualToolsDoc == expectedDoc["tools"]) + << "parseToolsToJsonContainer mismatch.\n actual: " << actualToolsJson + << "\n expected: " << expectedChatCompletions; + } + +#if (PYTHON_DISABLE == 0) + // --- processedJson path (Python Jinja chat template) --- + const std::string actualProcessedJson = handler->getProcessedJson(); + rapidjson::Document actualProcessedDoc; + actualProcessedDoc.Parse(actualProcessedJson.c_str()); + ASSERT_FALSE(actualProcessedDoc.HasParseError()) << actualProcessedJson; + ASSERT_TRUE(actualProcessedDoc.HasMember("messages")) << actualProcessedJson; + EXPECT_TRUE(actualProcessedDoc["messages"] == expectedDoc["messages"]) + << "processedJson messages mismatch.\n actual: " << actualProcessedJson + << "\n expected: " << expectedChatCompletions; + if (expectedDoc.HasMember("tools")) { + ASSERT_TRUE(actualProcessedDoc.HasMember("tools")) << actualProcessedJson; + EXPECT_TRUE(actualProcessedDoc["tools"] == expectedDoc["tools"]) + << "processedJson tools mismatch.\n actual: " << actualProcessedJson + << "\n expected: " << expectedChatCompletions; + } +#endif +} +} // namespace + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolsNormaliseToChatCompletions) { + // Responses-flat tools shape ({type, name, parameters}) must be rewritten + // to chat/completions nested shape ({type, function:{...}}) before the + // request is forwarded to the chat template. Input is given as an array so + // both ChatHistory and processedJson sinks populate the messages array. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [{"role":"user","content":[{"type":"input_text","text":"hello"}]}], + "tools": [{ + "type": "function", + "name": "get_weather", + "description": "Get current weather", + "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]} + }] + })", + R"({ + "messages": [{"role":"user","content":"hello"}], + "tools": [{ + "type":"function", + "function":{ + "name":"get_weather", + "description":"Get current weather", + "parameters":{"type":"object","properties":{"city":{"type":"string"}},"required":["city"]} + } + }] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAlreadyNestedToolsAreLeftIntact) { - // Tools that are already in chat/completions nested shape must pass through - // untouched (no double-wrapping). + // Tools already in chat/completions nested shape must pass through without + // double-wrapping. This is asserted directly on the (in-place mutated) + // request document because the equivalence helper would not detect a + // spurious unwrap+rewrap that nets to the same shape. std::string json = R"({ "model": "llama", "input": "hello", @@ -4373,527 +4439,260 @@ TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAlreadyNestedToolsAreLeftIntact) { EXPECT_TRUE(apiHandler->areToolsAvailable()); ASSERT_TRUE(doc["tools"][0].HasMember("function")); EXPECT_STREQ(doc["tools"][0]["function"]["name"].GetString(), "get_weather"); - // No spurious nested wrap. EXPECT_FALSE(doc["tools"][0]["function"].HasMember("function")); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningBufferedOntoNextAssistantMessage) { - // A bare reasoning item, then an assistant message: the reasoning text should - // ride on the next assistant message as reasoning_content (matching the - // gpt-oss template's expected field). It must NOT produce its own message. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"think first"}]}, - {"role": "assistant", "content": [{"type":"output_text","text":"hello"}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 2); - EXPECT_EQ(history[0]["role"].get_string(), "user"); - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - EXPECT_EQ(history[1]["content"].get_string(), "hello"); - ASSERT_TRUE(history[1].contains("reasoning_content")); - EXPECT_EQ(history[1]["reasoning_content"].get_string(), "think first"); + // A bare reasoning item, then an assistant message: the reasoning text + // rides on the next assistant message as reasoning_content and does NOT + // produce its own message. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"think first"}]}, + {"role": "assistant", "content": [{"type":"output_text","text":"hello"}]} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"hi"}, + {"role":"assistant","content":"hello","reasoning_content":"think first"} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesStandaloneReasoningWithoutAssistantIsEmitted) { - // Reasoning followed directly by a user message (no assistant/function_call - // in between) is emitted as a standalone assistant turn with empty content - // and the buffered text attached as reasoning_content. This preserves the - // model's chain-of-thought across turns even when the prior turn produced - // no visible output. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]}, - {"role": "user", "content": [{"type":"input_text","text":"again"}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 3); - EXPECT_EQ(history[0]["role"].get_string(), "user"); - - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - EXPECT_FALSE(history[1].contains("content")); - ASSERT_TRUE(history[1].contains("reasoning_content")); - EXPECT_EQ(history[1]["reasoning_content"].get_string(), "orphan"); - EXPECT_FALSE(history[1].contains("tool_calls")); - - EXPECT_EQ(history[2]["role"].get_string(), "user"); - EXPECT_FALSE(history[2].contains("reasoning_content")); + // Reasoning followed by a non-assistant/non-function_call item is flushed + // as a standalone assistant turn carrying ONLY reasoning_content (no + // `content`, no `tool_calls`). This preserves the chain-of-thought across + // turns even when the prior turn produced no visible output. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]}, + {"role": "user", "content": [{"type":"input_text","text":"again"}]} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"hi"}, + {"role":"assistant","reasoning_content":"orphan"}, + {"role":"user","content":"again"} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesTrailingStandaloneReasoningIsEmitted) { - // Input ending with a reasoning item (no following assistant/function_call) - // — the buffered reasoning is flushed as a standalone trailing assistant - // turn rather than silently lost. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"trailing"}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 2); - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - EXPECT_FALSE(history[1].contains("content")); - ASSERT_TRUE(history[1].contains("reasoning_content")); - EXPECT_EQ(history[1]["reasoning_content"].get_string(), "trailing"); + // Input ending with a reasoning item — the buffered reasoning is flushed + // as a trailing standalone assistant turn rather than silently lost. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"trailing"}]} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"hi"}, + {"role":"assistant","reasoning_content":"trailing"} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallMergedIntoAssistantToolCalls) { // function_call followed by function_call_output should produce: // user -> assistant(content="", tool_calls=[...]) -> tool(tool_call_id=...) - // The assistant message MUST own a tool_calls field; otherwise gpt-oss - // raises "Message has tool role, but there was no previous assistant - // message with a tool call!". - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call_output", "call_id": "call_1", - "output": "{\"temp_c\":17}"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 3); - - EXPECT_EQ(history[0]["role"].get_string(), "user"); - - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - EXPECT_EQ(history[1]["content"].get_string(), ""); - ASSERT_TRUE(history[1].contains("tool_calls")); - ASSERT_TRUE(history[1]["tool_calls"].is_array()); - ASSERT_EQ(history[1]["tool_calls"].size(), 1); - EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); - EXPECT_EQ(history[1]["tool_calls"][0]["type"].get_string(), "function"); - EXPECT_EQ(history[1]["tool_calls"][0]["function"]["name"].get_string(), "get_weather"); - EXPECT_EQ(history[1]["tool_calls"][0]["function"]["arguments"].get_string(), "{\"city\":\"Paris\"}"); - - EXPECT_EQ(history[2]["role"].get_string(), "tool"); - EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1"); - EXPECT_EQ(history[2]["content"].get_string(), "{\"temp_c\":17}"); + // The synthesised assistant message MUST own a tool_calls field; otherwise + // gpt-oss raises "Message has tool role, but there was no previous + // assistant message with a tool call!". + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", + "output": "{\"temp_c\":17}"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather?"}, + {"role":"assistant","content":"","tool_calls":[ + {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}} + ]}, + {"role":"tool","tool_call_id":"call_1","content":"{\"temp_c\":17}"} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningPlusFunctionCallRidesOnAssistant) { // reasoning + function_call should both attach to the synthesised assistant // turn that owns the tool_calls. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call_output", "call_id": "call_1", "output": "ok"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 3); - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - ASSERT_TRUE(history[1].contains("tool_calls")); - ASSERT_TRUE(history[1].contains("reasoning_content")); - EXPECT_EQ(history[1]["reasoning_content"].get_string(), "need to call get_weather"); - EXPECT_EQ(history[2]["role"].get_string(), "tool"); + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "ok"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather?"}, + {"role":"assistant","content":"","reasoning_content":"need to call get_weather","tool_calls":[ + {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}} + ]}, + {"role":"tool","tool_call_id":"call_1","content":"ok"} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesMultipleFunctionCallsMergedInOneAssistant) { // Two function_calls back-to-back must produce a single assistant message // with two entries in tool_calls, not two assistant turns. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call", "id": "call_2", "call_id": "call_2", - "name": "get_weather", "arguments": "{\"city\":\"London\"}"}, - {"type": "function_call_output", "call_id": "call_1", "output": "15C"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - // user, assistant(2 tool_calls), tool - ASSERT_EQ(history.size(), 3); - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - ASSERT_TRUE(history[1].contains("tool_calls")); - ASSERT_EQ(history[1]["tool_calls"].size(), 2); - EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); - EXPECT_EQ(history[1]["tool_calls"][1]["id"].get_string(), "call_2"); - EXPECT_EQ(history[2]["role"].get_string(), "tool"); - EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1"); + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call", "id": "call_2", "call_id": "call_2", + "name": "get_weather", "arguments": "{\"city\":\"London\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "15C"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather?"}, + {"role":"assistant","content":"","tool_calls":[ + {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}}, + {"id":"call_2","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"London\"}"}} + ]}, + {"role":"tool","tool_call_id":"call_1","content":"15C"} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesTrailingFunctionCallFlushedAsAssistant) { // Input ending with a function_call (no matching output) — the trailing - // function_call must still be flushed as an assistant message rather than + // function_call must still be flushed as an assistant turn rather than // silently lost. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 2); - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - ASSERT_TRUE(history[1].contains("tool_calls")); - ASSERT_EQ(history[1]["tool_calls"].size(), 1); - EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather?"}, + {"role":"assistant","content":"","tool_calls":[ + {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}} + ]} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAssistantMessageAbsorbsBufferedFunctionCall) { - // If an assistant role item follows a function_call, its text content should - // ride on the same merged message (assistant-with-tool_calls), not produce - // a second assistant turn. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"role": "assistant", "content": "calling tool"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 2); - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - EXPECT_EQ(history[1]["content"].get_string(), "calling tool"); - ASSERT_TRUE(history[1].contains("tool_calls")); - ASSERT_EQ(history[1]["tool_calls"].size(), 1); + // If an assistant role item follows a function_call, its text content + // should ride on the same merged message (assistant-with-tool_calls), not + // produce a second assistant turn. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"role": "assistant", "content": "calling tool"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather?"}, + {"role":"assistant","content":"calling tool","tool_calls":[ + {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}} + ]} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningContentArrayShapeAccepted) { // The newer reasoning shape: content[].text instead of summary[].text. - // OVMS accepts both. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, - {"type": "reasoning", "content": [{"type":"reasoning_text","text":"new shape"}]}, - {"role": "assistant", "content": "ok"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 2); - ASSERT_TRUE(history[1].contains("reasoning_content")); - EXPECT_EQ(history[1]["reasoning_content"].get_string(), "new shape"); + // OVMS accepts both and produces the same chat/completions output. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "content": [{"type":"reasoning_text","text":"new shape"}]}, + {"role": "assistant", "content": "ok"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"hi"}, + {"role":"assistant","content":"ok","reasoning_content":"new shape"} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallOutputWithoutCallIdAccepted) { - // function_call_output without call_id: should still emit a tool message - // (with no tool_call_id field) rather than failing parsing. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{}"}, - {"type": "function_call_output", "output": "ok"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 3); - EXPECT_EQ(history[2]["role"].get_string(), "tool"); - EXPECT_FALSE(history[2].contains("tool_call_id")); - EXPECT_EQ(history[2]["content"].get_string(), "ok"); + // function_call_output without call_id: the resulting tool message has no + // tool_call_id field rather than failing parsing or carrying an empty id. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{}"}, + {"type": "function_call_output", "output": "ok"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather?"}, + {"role":"assistant","content":"","tool_calls":[ + {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{}"}} + ]}, + {"role":"tool","content":"ok"} + ] + })"); } -#if (PYTHON_DISABLE == 0) -// processedJson (the chat/completions-shaped messages array fed to the Python -// Jinja chat template) must mirror the chat history layout for the same input. -// These tests assert the same buffering invariants on that path. - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMirrorsFunctionCallMerge) { - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call_output", "call_id": "call_1", "output": "ok"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - ASSERT_TRUE(processedDoc.HasMember("messages")); - const auto& messages = processedDoc["messages"]; - ASSERT_TRUE(messages.IsArray()); - ASSERT_EQ(messages.Size(), 3u); - - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - ASSERT_TRUE(messages[1].HasMember("tool_calls")); - ASSERT_TRUE(messages[1]["tool_calls"].IsArray()); - ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); - EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); - EXPECT_STREQ(messages[1]["tool_calls"][0]["type"].GetString(), "function"); - EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather"); - - EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); - EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1"); - EXPECT_STREQ(messages[2]["content"].GetString(), "ok"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonContainsNormalisedTools) { - // The tools forwarded to the template via processedJson must be in the - // chat/completions nested shape (because convertResponsesToolsInPlace - // normalised the doc before processedJson is built). - std::string json = R"({ - "model": "llama", - "input": "hello", - "tools": [{ - "type": "function", - "name": "get_weather", - "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]} - }] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - ASSERT_TRUE(processedDoc.HasMember("tools")); - ASSERT_TRUE(processedDoc["tools"].IsArray()); - ASSERT_EQ(processedDoc["tools"].Size(), 1u); - ASSERT_TRUE(processedDoc["tools"][0].HasMember("function")); - EXPECT_STREQ(processedDoc["tools"][0]["function"]["name"].GetString(), "get_weather"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonAttachesReasoningOnAssistant) { - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"think"}]}, - {"role": "assistant", "content": [{"type":"output_text","text":"answer"}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 2u); - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - EXPECT_STREQ(messages[1]["content"].GetString(), "answer"); - ASSERT_TRUE(messages[1].HasMember("reasoning_content")); - EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "think"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonStandaloneReasoningOmitsContent) { - // Mirror of ResponsesStandaloneReasoningWithoutAssistantIsEmitted on the - // processedJson path: an assistant turn carrying only reasoning_content - // (no `content`, no `tool_calls`). - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]}, - {"role": "user", "content": [{"type":"input_text","text":"again"}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 3u); - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - EXPECT_FALSE(messages[1].HasMember("content")); - EXPECT_FALSE(messages[1].HasMember("tool_calls")); - ASSERT_TRUE(messages[1].HasMember("reasoning_content")); - EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "orphan"); - EXPECT_STREQ(messages[2]["role"].GetString(), "user"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonTrailingStandaloneReasoningOmitsContent) { - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, - {"type": "reasoning", "content": [{"type":"reasoning_text","text":"trailing"}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 2u); - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - EXPECT_FALSE(messages[1].HasMember("content")); - EXPECT_FALSE(messages[1].HasMember("tool_calls")); - ASSERT_TRUE(messages[1].HasMember("reasoning_content")); - EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "trailing"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonReasoningPlusFunctionCallRidesOnAssistant) { - // Mirror of ResponsesReasoningPlusFunctionCallRidesOnAssistant: reasoning - // and tool_calls must land on the same JSON object. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call_output", "call_id": "call_1", "output": "ok"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 3u); - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - ASSERT_TRUE(messages[1].HasMember("tool_calls")); - ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); - ASSERT_TRUE(messages[1].HasMember("reasoning_content")); - EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "need to call get_weather"); - EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMultipleFunctionCallsMergedInOneAssistant) { - // Mirror of ResponsesMultipleFunctionCallsMergedInOneAssistant: validates - // the rapidjson tool_calls array growth across PushBack calls. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call", "id": "call_2", "call_id": "call_2", - "name": "get_weather", "arguments": "{\"city\":\"London\"}"}, - {"type": "function_call_output", "call_id": "call_1", "output": "15C"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 3u); - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - ASSERT_TRUE(messages[1].HasMember("tool_calls")); - ASSERT_EQ(messages[1]["tool_calls"].Size(), 2u); - EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); - EXPECT_STREQ(messages[1]["tool_calls"][1]["id"].GetString(), "call_2"); - EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather"); - EXPECT_STREQ(messages[1]["tool_calls"][1]["function"]["arguments"].GetString(), "{\"city\":\"London\"}"); - EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); - EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonTrailingFunctionCallFlushedAsAssistant) { - // Mirror of ResponsesTrailingFunctionCallFlushedAsAssistant: trailing - // function_call without output produces an assistant turn with tool_calls - // and no following tool message. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 2u); - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - ASSERT_TRUE(messages[1].HasMember("tool_calls")); - ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); - EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonAssistantMessageAbsorbsBufferedFunctionCall) { - // Mirror of ResponsesAssistantMessageAbsorbsBufferedFunctionCall: assistant - // text content and tool_calls coexist on a single JSON object. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"role": "assistant", "content": "calling tool"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 2u); - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - ASSERT_TRUE(messages[1].HasMember("content")); - EXPECT_STREQ(messages[1]["content"].GetString(), "calling tool"); - ASSERT_TRUE(messages[1].HasMember("tool_calls")); - ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); - EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); -} -#endif // PYTHON_DISABLE == 0 - // --- Tools normalisation edge cases --- TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolWithoutParametersIsNormalised) { // Flat Responses tools may omit `parameters` for zero-arg functions. The // nested form should still be produced (with no `parameters` key under - // function), not fail or fabricate one. - std::string json = R"({ - "model": "llama", - "input": "hello", - "tools": [{"type": "function", "name": "ping", "description": "no args"}] - })"; - parseResponses(doc, *tokenizer, json); - ASSERT_TRUE(doc.HasMember("tools")); - ASSERT_TRUE(doc["tools"].IsArray()); - ASSERT_EQ(doc["tools"].Size(), 1u); - const auto& tool = doc["tools"][0]; - ASSERT_TRUE(tool.HasMember("function")); - EXPECT_STREQ(tool["function"]["name"].GetString(), "ping"); - EXPECT_STREQ(tool["function"]["description"].GetString(), "no args"); - EXPECT_FALSE(tool["function"].HasMember("parameters")); - // The flat-shape `name` field at top level must have been removed. - EXPECT_FALSE(tool.HasMember("name")); - EXPECT_FALSE(tool.HasMember("description")); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesNonFunctionToolLeftIntact) { - // Tools with an unrecognised `type` (e.g. a future built-in tool) must be - // passed through verbatim rather than being incorrectly rewrapped. - std::string json = R"({ - "model": "llama", - "input": "hello", - "tools": [{"type": "web_search", "name": "search"}] - })"; - parseResponses(doc, *tokenizer, json); - ASSERT_TRUE(doc["tools"].IsArray()); - ASSERT_EQ(doc["tools"].Size(), 1u); - const auto& tool = doc["tools"][0]; - EXPECT_STREQ(tool["type"].GetString(), "web_search"); - EXPECT_STREQ(tool["name"].GetString(), "search"); - EXPECT_FALSE(tool.HasMember("function")); + // function), not fail or fabricate one. Input is given as an array so + // both ChatHistory and processedJson sinks populate the messages array. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [{"role":"user","content":[{"type":"input_text","text":"hello"}]}], + "tools": [{"type": "function", "name": "ping", "description": "no args"}] + })", + R"({ + "messages": [{"role":"user","content":"hello"}], + "tools": [{"type":"function","function":{"name":"ping","description":"no args"}}] + })"); } // --- Error paths --- @@ -4966,90 +4765,27 @@ TEST_F(HttpOpenAIHandlerParsingTest, ResponsesMultiTurnReasoningFunctionCallAndF // End-to-end: user -> reasoning + function_call (merged on synthesised // assistant) -> function_call_output -> reasoning + assistant final answer. // Validates that buffering state is correctly reset between turns. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]}, - {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 4); - - // user - EXPECT_EQ(history[0]["role"].get_string(), "user"); - - // synthesised assistant: empty content + reasoning + tool_calls - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - EXPECT_EQ(history[1]["content"].get_string(), ""); - ASSERT_TRUE(history[1].contains("reasoning_content")); - EXPECT_EQ(history[1]["reasoning_content"].get_string(), "need to call get_weather"); - ASSERT_TRUE(history[1].contains("tool_calls")); - ASSERT_EQ(history[1]["tool_calls"].size(), 1); - EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); - EXPECT_EQ(history[1]["tool_calls"][0]["function"]["name"].get_string(), "get_weather"); - - // tool result - EXPECT_EQ(history[2]["role"].get_string(), "tool"); - EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1"); - EXPECT_EQ(history[2]["content"].get_string(), "sunny, 22C"); - EXPECT_FALSE(history[2].contains("reasoning_content")); - EXPECT_FALSE(history[2].contains("tool_calls")); - - // final assistant turn: second reasoning buffer must have been used here, - // not leaked from the first turn or carried over. - EXPECT_EQ(history[3]["role"].get_string(), "assistant"); - EXPECT_EQ(history[3]["content"].get_string(), "It is sunny and 22C in Paris."); - ASSERT_TRUE(history[3].contains("reasoning_content")); - EXPECT_EQ(history[3]["reasoning_content"].get_string(), "format the answer"); - EXPECT_FALSE(history[3].contains("tool_calls")); + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]}, + {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather in Paris?"}, + {"role":"assistant","content":"","reasoning_content":"need to call get_weather","tool_calls":[ + {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}} + ]}, + {"role":"tool","tool_call_id":"call_1","content":"sunny, 22C"}, + {"role":"assistant","content":"It is sunny and 22C in Paris.","reasoning_content":"format the answer"} + ] + })"); } - -#if (PYTHON_DISABLE == 0) -// Re-open the PYTHON_DISABLE block to keep the processedJson companion next to -// the chat-history multi-turn test above. -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMultiTurnMirrorsChatHistory) { - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]}, - {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 4u); - - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - EXPECT_STREQ(messages[1]["content"].GetString(), ""); - ASSERT_TRUE(messages[1].HasMember("reasoning_content")); - EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "need to call get_weather"); - ASSERT_TRUE(messages[1].HasMember("tool_calls")); - ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); - EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather"); - - EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); - EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1"); - EXPECT_STREQ(messages[2]["content"].GetString(), "sunny, 22C"); - - EXPECT_STREQ(messages[3]["role"].GetString(), "assistant"); - EXPECT_STREQ(messages[3]["content"].GetString(), "It is sunny and 22C in Paris."); - ASSERT_TRUE(messages[3].HasMember("reasoning_content")); - EXPECT_STREQ(messages[3]["reasoning_content"].GetString(), "format the answer"); - EXPECT_FALSE(messages[3].HasMember("tool_calls")); -} -#endif // PYTHON_DISABLE == 0 From 9a925220219e74e80301b3b7eba8365c836c5d4a Mon Sep 17 00:00:00 2001 From: mkulakow Date: Thu, 14 May 2026 11:35:26 +0200 Subject: [PATCH 04/20] fix --- .../continuous_batching/agentic_ai/README.md | 2 +- src/llm/apis/openai_responses.cpp | 6 ++- src/llm/py_jinja_template_processor.cpp | 1 - src/llm/servable.cpp | 2 +- .../continuous_batching/servable.cpp | 6 +++ src/test/http_openai_handler_test.cpp | 46 +++++++++++++++++++ 6 files changed, 59 insertions(+), 4 deletions(-) diff --git a/demos/continuous_batching/agentic_ai/README.md b/demos/continuous_batching/agentic_ai/README.md index 2ba22afb8c..b630158a9d 100644 --- a/demos/continuous_batching/agentic_ai/README.md +++ b/demos/continuous_batching/agentic_ai/README.md @@ -330,7 +330,7 @@ Pull and start OVMS: ```bash mkdir -p ${HOME}/models docker run -d --user $(id -u):$(id -g) --rm -p 8000:8000 -v ${HOME}/models:/models --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) openvino/model_server:weekly \ ---rest_port 8122 --model_repository_path /models --source_model Junrui2021/Qwen3-VL-8B-Instruct-int4 --model_name ovms-model --tool_parser hermes3 --target_device GPU --task text_generation --pipeline_type VLM_CB --allowed_media_domains raw.githubusercontent.com +--rest_port 8000 --model_repository_path /models --source_model Junrui2021/Qwen3-VL-8B-Instruct-int4 --tool_parser hermes3 --target_device GPU --task text_generation --pipeline_type VLM_CB --allowed_media_domains raw.githubusercontent.com ``` Use MCP server, with additional image of Gdańsk old town. VLM model deduces location and calls `get_weather` tool to summarize the weather conditions in the city. diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index 98f1549ecf..8bfd992fce 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -90,10 +90,14 @@ static void convertResponsesToolsInPlace(rapidjson::Value& toolsArray, rapidjson continue; } const std::string fieldName = memberIt->name.GetString(); - if (fieldName == "type" || fieldName == "response") { + if (fieldName == "type") { ++memberIt; continue; } + if (fieldName == "response") { + memberIt = tool.EraseMember(memberIt); + continue; + } rapidjson::Value keyCopy(memberIt->name, alloc); rapidjson::Value valCopy(memberIt->value, alloc); funcObj.AddMember(keyCopy, valCopy, alloc); diff --git a/src/llm/py_jinja_template_processor.cpp b/src/llm/py_jinja_template_processor.cpp index 61116d3c5d..188a3c0daa 100644 --- a/src/llm/py_jinja_template_processor.cpp +++ b/src/llm/py_jinja_template_processor.cpp @@ -40,7 +40,6 @@ bool PyJinjaTemplateProcessor::applyChatTemplate(PyJinjaTemplateProcessor& templ output = "Error: Chat template not loaded correctly, so it cannot be applied"; return false; } - SPDLOG_DEBUG("Before chat template: \n {}", requestBody); py::gil_scoped_acquire acquire; try { auto locals = py::dict("request_body"_a = requestBody, "chat_template"_a = templateProcessor.chatTemplate->getObject(), diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp index b15bc12400..d35db8d3b2 100644 --- a/src/llm/servable.cpp +++ b/src/llm/servable.cpp @@ -242,7 +242,7 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptrtokenizer.apply_chat_template(chatHistory, addGenerationPrompt, {}, tools, chatTemplateKwargs); } catch (const std::exception& e) { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to apply chat template: {}", e.what()); - return absl::Status(absl::StatusCode::kInvalidArgument, absl::StrCat("Failed to apply chat template: ", e.what())); + return absl::Status(absl::StatusCode::kInvalidArgument, "Failed to apply chat template. The model either does not have chat template or has an invalid one."); } #endif if (inputText.size() == 0) { diff --git a/src/llm/visual_language_model/continuous_batching/servable.cpp b/src/llm/visual_language_model/continuous_batching/servable.cpp index 0ef06d22df..defa1af281 100644 --- a/src/llm/visual_language_model/continuous_batching/servable.cpp +++ b/src/llm/visual_language_model/continuous_batching/servable.cpp @@ -105,6 +105,12 @@ absl::Status VisualLanguageModelServable::prepareInputs(std::shared_ptrto_json_string() : std::string("")); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM chatTemplateKwargs: {}", chatTemplateKwargs.has_value() ? chatTemplateKwargs->to_json_string() : std::string("")); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM addGenerationPrompt: {}", addGenerationPrompt); vlmExecutionContext->inputText = properties->tokenizer.apply_chat_template(chatHistory, addGenerationPrompt, {}, tools, chatTemplateKwargs); } else { return absl::InvalidArgumentError("Unsupported endpoint"); diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index d4cdfb6c7c..1b9d62b31d 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -4789,3 +4789,49 @@ TEST_F(HttpOpenAIHandlerParsingTest, ResponsesMultiTurnReasoningFunctionCallAndF ] })"); } + +// Real BFCL replay shape: between every function_call and its function_call_output +// the OpenAI SDK echoes back the empty assistant message that ovms returned in +// `output[]`. With multiple turns this looks like: +// user -> fc1 -> {id:msg-0,role:assistant,type:message,content:[{type:output_text,text:""}]} +// -> fco1 -> fc2 -> msg-0 -> fco2 -> ... +// The 4th request OVMS sees while running BFCL multi_turn_base_0 reports 128 +// MORE input_tokens on /responses than the equivalent /chat/completions call, +// even though the message lists are structurally equivalent. This test +// reproduces the exact shape so processedJson can be compared head-to-head. +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesBfclReplayShapeWithEchoedAssistantMessages) { + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": "do work"}, + {"type": "function_call", "id": "fc1", "call_id": "fc1", + "name": "mkdir", "arguments": "{\"dir_name\":\"temp\"}", + "namespace": null, "status": "completed"}, + {"id": "msg-0", "type": "message", "role": "assistant", "status": "completed", + "content": [{"type": "output_text", "text": "", "annotations": [], "logprobs": null}], + "phase": null}, + {"type": "function_call_output", "call_id": "fc1", "output": "None"}, + {"type": "function_call", "id": "fc2", "call_id": "fc2", + "name": "mv", "arguments": "{\"source\":\"a\",\"destination\":\"temp\"}", + "namespace": null, "status": "completed"}, + {"id": "msg-0", "type": "message", "role": "assistant", "status": "completed", + "content": [{"type": "output_text", "text": "", "annotations": [], "logprobs": null}], + "phase": null}, + {"type": "function_call_output", "call_id": "fc2", "output": "{\"error\":\"no\"}"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"do work"}, + {"role":"assistant","content":"","tool_calls":[ + {"id":"fc1","type":"function","function":{"name":"mkdir","arguments":"{\"dir_name\":\"temp\"}"}} + ]}, + {"role":"tool","tool_call_id":"fc1","content":"None"}, + {"role":"assistant","content":"","tool_calls":[ + {"id":"fc2","type":"function","function":{"name":"mv","arguments":"{\"source\":\"a\",\"destination\":\"temp\"}"}} + ]}, + {"role":"tool","tool_call_id":"fc2","content":"{\"error\":\"no\"}"} + ] + })"); +} From 35c8fbab1fbae8734ad7d0072eb710c89978103b Mon Sep 17 00:00:00 2001 From: mkulakow Date: Thu, 14 May 2026 13:01:43 +0200 Subject: [PATCH 05/20] fix --- src/llm/apis/openai_responses.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index 8bfd992fce..17dbf2bb0b 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -717,9 +717,9 @@ absl::Status OpenAIResponsesHandler::parseResponsesPart(std::optional // Tools were already normalised to chat/completions nested format by // convertResponsesToolsInPlace earlier in parseResponsesPart — just copy verbatim. - auto toolsIt = doc.FindMember("tools"); - if (toolsIt != doc.MemberEnd() && !toolsIt->value.IsNull()) { - Value toolsCopy(toolsIt->value, alloc); + auto processedToolsIt = doc.FindMember("tools"); + if (processedToolsIt != doc.MemberEnd() && !processedToolsIt->value.IsNull()) { + Value toolsCopy(processedToolsIt->value, alloc); processedDoc.AddMember("tools", toolsCopy, alloc); } From 6137d45057b05ce59424c57df745afacdefc7346 Mon Sep 17 00:00:00 2001 From: mkulakow Date: Thu, 14 May 2026 14:20:41 +0200 Subject: [PATCH 06/20] fix --- src/llm/apis/openai_responses.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index 17dbf2bb0b..1bc5df95fc 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -711,6 +711,13 @@ absl::Status OpenAIResponsesHandler::parseResponsesPart(std::optional if (!processedStatus.ok()) { return processedStatus; } + } else if (inputArrIt != doc.MemberEnd() && inputArrIt->value.IsString()) { + // String input: emit a single user message so the Python Jinja path + // sees the same content the C++ chatHistory path does. + Value msgObj(kObjectType); + msgObj.AddMember("role", Value("user", alloc), alloc); + msgObj.AddMember("content", Value(inputArrIt->value.GetString(), alloc), alloc); + messagesArray.PushBack(msgObj, alloc); } processedDoc.AddMember("messages", messagesArray, alloc); From d803ae2856c6e2059dfd8e251910f75dff8a5938 Mon Sep 17 00:00:00 2001 From: mkulakow Date: Fri, 15 May 2026 13:23:53 +0200 Subject: [PATCH 07/20] Fix response streaming --- src/llm/apis/openai_responses.cpp | 10 +++++++ src/test/http_openai_handler_test.cpp | 41 +++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index 1bc5df95fc..932e3e7a94 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -1537,6 +1537,16 @@ std::string OpenAIResponsesHandler::serializeStreamingChunk(const std::string& c events.emplace_back(std::move(inProgressEvent)); } + // Lifecycle priming: when the servable invokes serializeStreamingChunk("") + // before the first token is generated (Responses-only behavior), we must + // only emit lifecycle events and skip the parser. Feeding an empty chunk + // into outputParser->parseChunk would advance processingPhase from UNKNOWN + // to CONTENT and cause subsequent reasoning-tag chunks to leak into + // delta.content. + if (chunkResponse.empty() && finishReason == ov::genai::GenerationFinishReason::NONE) { + return joinServerSideEvents(events); + } + if (outputParser != nullptr) { // Use output parser to separate reasoning from content std::optional delta = outputParser->parseChunk(chunkResponse, areToolsAvailable(), finishReason); diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index 1b9d62b31d..708170216b 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -1388,6 +1388,47 @@ TEST_F(HttpOpenAIHandlerParsingTest, serializeStreamingChunkForResponsesWithReas ASSERT_NE(finalChunk.find("\"type\":\"reasoning\""), std::string::npos) << "Completed response should include reasoning: " << finalChunk; } +// Regression test: the Responses streaming path may call serializeStreamingChunk("") +// before the first token is generated to flush lifecycle events +// (response.created / response.in_progress). That priming call must NOT feed +// the empty chunk to the output parser, otherwise the parser advances its +// processing phase from UNKNOWN to CONTENT on an empty buffer and subsequent +// reasoning-tag chunks (e.g. ...) are misclassified as content. +TEST_F(HttpOpenAIHandlerParsingTest, serializeStreamingChunkEmptyPrimingDoesNotPoisonReasoningParser) { + std::string json = R"({ + "model": "llama", + "input": "Think about this", + "stream": true + })"; + doc.Parse(json.c_str()); + ASSERT_FALSE(doc.HasParseError()); + + auto apiHandler = std::make_shared(doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), *tokenizer, "", "qwen3"); + std::optional maxTokensLimit; + uint32_t bestOfLimit = 0; + std::optional maxModelLength; + ASSERT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus()); + + // Empty priming call: should emit only lifecycle events, never output_text.delta, + // and must not move the parser past the reasoning start tag. + std::string primingChunk = apiHandler->serializeStreamingChunk("", ov::genai::GenerationFinishReason::NONE); + ASSERT_NE(primingChunk.find("\"type\":\"response.created\""), std::string::npos) << primingChunk; + ASSERT_NE(primingChunk.find("\"type\":\"response.in_progress\""), std::string::npos) << primingChunk; + ASSERT_EQ(primingChunk.find("\"type\":\"response.output_text.delta\""), std::string::npos) + << "Empty priming chunk must not produce content delta: " << primingChunk; + ASSERT_EQ(primingChunk.find("\"type\":\"response.output_item.added\""), std::string::npos) + << "Empty priming chunk must not open an output item: " << primingChunk; + + // Now the parser must still recognise the reasoning start tag and route the + // following text to reasoning, not content. + apiHandler->serializeStreamingChunk("", ov::genai::GenerationFinishReason::NONE); + std::string reasoningChunk = apiHandler->serializeStreamingChunk("hello", ov::genai::GenerationFinishReason::NONE); + ASSERT_NE(reasoningChunk.find("\"type\":\"response.reasoning_summary_text.delta\""), std::string::npos) + << "Reasoning text must be routed to reasoning_summary_text.delta: " << reasoningChunk; + ASSERT_EQ(reasoningChunk.find("\"type\":\"response.output_text.delta\""), std::string::npos) + << "Reasoning text must NOT be emitted as output_text.delta: " << reasoningChunk; +} + TEST_F(HttpOpenAIHandlerParsingTest, serializeStreamingChunkForResponsesWithoutReasoningWorksNormally) { std::string json = R"({ "model": "llama", From f6e5fd5f3595a4aa95ee07a9c554fb859fe25a97 Mon Sep 17 00:00:00 2001 From: mkulakow Date: Fri, 15 May 2026 16:24:39 +0200 Subject: [PATCH 08/20] Copilot suggestions --- src/llm/apis/openai_responses.cpp | 6 +-- .../continuous_batching/servable.cpp | 14 +++--- src/test/http_openai_handler_test.cpp | 48 +++++++++++++++++-- 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index 932e3e7a94..c88279cd2b 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -217,9 +217,6 @@ static absl::StatusOr classifyInputItem(const rapidjson: // // Reasoning items are buffered and attached as `reasoning_content` on the next // assistant message (matching the gpt-oss template's expected field). -// Reasoning that is not followed by an assistant/function_call item is dropped, -// since emitting a standalone {role:assistant, reasoning_content:...} message -// with no content/tool_calls would confuse most chat templates. // // Pending function_call items are merged into the next assistant message as a // chat/completions-shaped tool_calls[] array. Without this, the assistant turn @@ -411,8 +408,7 @@ class ChatHistorySink { if (!status.ok()) return status; } else { - // Skip unrecognised content item types for forward compatibility. - SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Skipping unsupported content type: {}", type); + return absl::InvalidArgumentError(absl::StrCat("unsupported input content item type: ", type)); } } return absl::OkStatus(); diff --git a/src/llm/visual_language_model/continuous_batching/servable.cpp b/src/llm/visual_language_model/continuous_batching/servable.cpp index defa1af281..3e1f8cbec1 100644 --- a/src/llm/visual_language_model/continuous_batching/servable.cpp +++ b/src/llm/visual_language_model/continuous_batching/servable.cpp @@ -105,12 +105,14 @@ absl::Status VisualLanguageModelServable::prepareInputs(std::shared_ptrto_json_string() : std::string("")); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM chatTemplateKwargs: {}", chatTemplateKwargs.has_value() ? chatTemplateKwargs->to_json_string() : std::string("")); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM addGenerationPrompt: {}", addGenerationPrompt); + if (llm_calculator_logger->should_log(spdlog::level::trace)) { + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM chatHistory messages: {}", chatHistory.get_messages().to_json_string()); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM chatHistory.get_tools(): {}", chatHistory.get_tools().to_json_string()); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM chatHistory.get_extra_context(): {}", chatHistory.get_extra_context().to_json_string()); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM tools: {}", tools.has_value() ? tools->to_json_string() : std::string("")); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM chatTemplateKwargs: {}", chatTemplateKwargs.has_value() ? chatTemplateKwargs->to_json_string() : std::string("")); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM addGenerationPrompt: {}", addGenerationPrompt); + } vlmExecutionContext->inputText = properties->tokenizer.apply_chat_template(chatHistory, addGenerationPrompt, {}, tools, chatTemplateKwargs); } else { return absl::InvalidArgumentError("Unsupported endpoint"); diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index 708170216b..c469307b55 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -664,12 +664,19 @@ class HttpOpenAIHandlerChatAndResponsesParsingTest : public HttpOpenAIHandlerPar std::shared_ptr parseCurrentRequest(const std::string& json) { doc.Parse(json.c_str()); - EXPECT_FALSE(doc.HasParseError()) << json; + if (doc.HasParseError()) { + ADD_FAILURE() << "Failed to parse JSON: " << json; + return nullptr; + } std::optional maxTokensLimit; uint32_t bestOfLimit = 0; std::optional maxModelLength; auto apiHandler = createHandler(endpoint()); - EXPECT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus()) << json; + auto status = apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength); + if (!status.ok()) { + ADD_FAILURE() << "parseRequest failed: " << status << " for JSON: " << json; + return nullptr; + } return apiHandler; } }; @@ -3421,6 +3428,29 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingResponsesInputImageUrlInvalidTypeFai EXPECT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::InvalidArgumentError("input_image.image_url must be a string or object")); } +TEST_F(HttpOpenAIHandlerParsingTest, ParsingResponsesUnsupportedContentTypeFails) { + std::string json = R"({ + "model": "llama", + "input": [ + { + "role": "user", + "content": [ + {"type": "input_text", "text": "hi"}, + {"type": "input_audio", "audio": "abc"} + ] + } + ] + })"; + doc.Parse(json.c_str()); + ASSERT_FALSE(doc.HasParseError()); + std::optional maxTokensLimit; + uint32_t bestOfLimit = 0; + std::optional maxModelLength; + std::shared_ptr apiHandler = + std::make_shared(doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), *tokenizer); + EXPECT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::InvalidArgumentError("unsupported input content item type: input_audio")); +} + TEST_F(HttpOpenAIHandlerParsingTest, ParsingResponsesUnsupportedToolTypeFails) { std::string json = R"({ "model": "llama", @@ -4337,13 +4367,20 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParseMessagesRegularMessageHasNoToolFields) namespace { std::shared_ptr parseResponses(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer, const std::string& json) { doc.Parse(json.c_str()); - EXPECT_FALSE(doc.HasParseError()) << json; + if (doc.HasParseError()) { + ADD_FAILURE() << "Failed to parse JSON: " << json; + return nullptr; + } std::optional maxTokensLimit; uint32_t bestOfLimit = 0; std::optional maxModelLength; auto apiHandler = std::make_shared( doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), tokenizer); - EXPECT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus()) << json; + auto status = apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength); + if (!status.ok()) { + ADD_FAILURE() << "parseRequest failed: " << status << " for JSON: " << json; + return nullptr; + } return apiHandler; } @@ -4351,7 +4388,8 @@ std::shared_ptr parseResponses(rapidjson::Document // it is OK, so the caller can verify the failure mode. absl::Status tryParseResponses(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer, const std::string& json) { doc.Parse(json.c_str()); - EXPECT_FALSE(doc.HasParseError()) << json; + if (doc.HasParseError()) + return absl::InvalidArgumentError(absl::StrCat("Failed to parse JSON: ", json)); std::optional maxTokensLimit; uint32_t bestOfLimit = 0; std::optional maxModelLength; From 7946a1e0de347bc29af9af79cb9cade12ff81645 Mon Sep 17 00:00:00 2001 From: mkulakow Date: Fri, 15 May 2026 17:09:11 +0200 Subject: [PATCH 09/20] fix --- src/llm/apis/openai_responses.cpp | 49 ++++++++++++++++----------- src/test/http_openai_handler_test.cpp | 13 +++++++ 2 files changed, 42 insertions(+), 20 deletions(-) diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index c88279cd2b..4511ade1cd 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -444,12 +444,14 @@ class ChatHistorySink { chatHistory.last()["tool_calls"] = rapidJsonValueToJsonContainer(toolCallsArray); } - // Emit an assistant turn that carries only reasoning_content (no content, - // no tool_calls). Used when reasoning is not followed by an assistant or - // function_call item. + // Emit an assistant turn that carries only reasoning_content (no + // tool_calls). Used when reasoning is not followed by an assistant or + // function_call item. content is set to an empty string so chat templates + // that access message.content unconditionally do not raise UndefinedError. void emitStandaloneReasoning(const std::string& reasoning) { chatHistory.push_back({}); chatHistory.last()["role"] = "assistant"; + chatHistory.last()["content"] = ""; chatHistory.last()["reasoning_content"] = reasoning; } @@ -541,11 +543,12 @@ class ProcessedJsonSink { messagesArray.PushBack(msgObj, alloc); } - // Emit an assistant turn that carries only reasoning_content (no content, - // no tool_calls). See ChatHistorySink::emitStandaloneReasoning for rationale. + // Emit an assistant turn that carries only reasoning_content (no + // tool_calls). See ChatHistorySink::emitStandaloneReasoning for rationale. void emitStandaloneReasoning(const std::string& reasoning) { rapidjson::Value msgObj(rapidjson::kObjectType); msgObj.AddMember("role", rapidjson::Value("assistant", alloc), alloc); + msgObj.AddMember("content", rapidjson::Value("", alloc), alloc); msgObj.AddMember("reasoning_content", rapidjson::Value(reasoning.c_str(), alloc), alloc); messagesArray.PushBack(msgObj, alloc); } @@ -687,11 +690,32 @@ absl::Status OpenAIResponsesHandler::parseResponsesPart(std::optional // summary field is accepted but ignored } + // logprobs: bool; optional - defaults to false + it = doc.FindMember("logprobs"); + if (it != doc.MemberEnd() && !it->value.IsNull()) { + if (!it->value.IsBool()) + return absl::InvalidArgumentError("logprobs accepts values true or false"); + request.logprobschat = it->value.GetBool(); + } + if (request.logprobschat && request.stream) { + return absl::InvalidArgumentError("logprobs are not supported in streaming mode."); + } + + auto toolsStatus = parseTools(); + if (!toolsStatus.ok()) { + return toolsStatus; + } + #if (PYTHON_DISABLE == 0) // Build processedJson with a "messages" array in chat/completions format so that // the Python Jinja template path can consume Responses API input without a separate code path. // Handles reasoning, function_call (merged into assistant tool_calls), and // function_call_output (converted to role:tool messages). + // + // Built after parseTools() so any tool filtering (e.g. tool_choice removing + // unselected tools) is reflected here, and so parseTools()'s own write to + // request.processedJson (Responses-shaped doc with "input") does not + // clobber the chat/completions-shaped JSON the Python Jinja path expects. { Document processedDoc; processedDoc.SetObject(); @@ -739,21 +763,6 @@ absl::Status OpenAIResponsesHandler::parseResponsesPart(std::optional request.processedJson = buffer.GetString(); } #endif - // logprobs: bool; optional - defaults to false - it = doc.FindMember("logprobs"); - if (it != doc.MemberEnd() && !it->value.IsNull()) { - if (!it->value.IsBool()) - return absl::InvalidArgumentError("logprobs accepts values true or false"); - request.logprobschat = it->value.GetBool(); - } - if (request.logprobschat && request.stream) { - return absl::InvalidArgumentError("logprobs are not supported in streaming mode."); - } - - auto toolsStatus = parseTools(); - if (!toolsStatus.ok()) { - return toolsStatus; - } // max_output_tokens: uint; optional // OpenAI Responses API uses this field for output token limit. diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index c469307b55..e540493fbe 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -24,6 +24,8 @@ #include #include +#include + #include "../http_rest_api_handler.hpp" #include "../filesystem/filesystem.hpp" #include "../llm/apis/openai_completions.hpp" @@ -684,6 +686,7 @@ class HttpOpenAIHandlerChatAndResponsesParsingTest : public HttpOpenAIHandlerPar TEST_P(HttpOpenAIHandlerChatAndResponsesParsingTest, ParsingTextInputCreatesUserChatMessage) { std::string json = createTextRequest("What is OpenVINO?"); auto apiHandler = parseCurrentRequest(json); + ASSERT_NE(apiHandler, nullptr); auto& chatHistory = apiHandler->getChatHistory(); ASSERT_EQ(chatHistory.size(), 1); @@ -700,6 +703,7 @@ TEST_P(HttpOpenAIHandlerChatAndResponsesParsingTest, ParsingTextInputCreatesUser TEST_P(HttpOpenAIHandlerChatAndResponsesParsingTest, ProcessedJsonContainsEquivalentMessages) { std::string json = createTextRequest("What is OpenVINO?"); auto apiHandler = parseCurrentRequest(json); + ASSERT_NE(apiHandler, nullptr); // For Responses, processedJson is always built from chatHistory. // For chat/completions with simple text, processedJson is empty (original body is used instead). @@ -746,6 +750,7 @@ TEST_P(HttpOpenAIHandlerChatAndResponsesParsingTest, ProcessedJsonEquivalentMult ]})"; } auto apiHandler = parseCurrentRequest(json); + ASSERT_NE(apiHandler, nullptr); auto& chatHistory = apiHandler->getChatHistory(); ASSERT_EQ(chatHistory.size(), 2); @@ -778,6 +783,7 @@ TEST_P(HttpOpenAIHandlerChatAndResponsesParsingTest, ProcessedJsonEquivalentMult TEST_P(HttpOpenAIHandlerChatAndResponsesParsingTest, ProcessedJsonIncludesToolsWhenPresent) { std::string json = createToolRequest("\"auto\""); auto apiHandler = parseCurrentRequest(json); + ASSERT_NE(apiHandler, nullptr); EXPECT_TRUE(apiHandler->areToolsAvailable()); @@ -804,6 +810,7 @@ TEST_P(HttpOpenAIHandlerChatAndResponsesParsingTest, ParsingTokenLimitSetsMaxTok std::string tokenField = endpoint() == ovms::Endpoint::RESPONSES ? "max_output_tokens" : "max_completion_tokens"; std::string json = createTextRequest("valid prompt", ",\"" + tokenField + "\":7"); auto apiHandler = parseCurrentRequest(json); + ASSERT_NE(apiHandler, nullptr); EXPECT_TRUE(apiHandler->getMaxTokens().has_value()); EXPECT_EQ(apiHandler->getMaxTokens().value(), 7); @@ -812,6 +819,7 @@ TEST_P(HttpOpenAIHandlerChatAndResponsesParsingTest, ParsingTokenLimitSetsMaxTok TEST_P(HttpOpenAIHandlerChatAndResponsesParsingTest, ParsingFunctionToolsWithAutoChoiceSucceeds) { std::string json = createToolRequest("\"auto\""); auto apiHandler = parseCurrentRequest(json); + ASSERT_NE(apiHandler, nullptr); EXPECT_TRUE(apiHandler->areToolsAvailable()); EXPECT_EQ(apiHandler->getToolChoice(), "auto"); @@ -820,6 +828,7 @@ TEST_P(HttpOpenAIHandlerChatAndResponsesParsingTest, ParsingFunctionToolsWithAut TEST_P(HttpOpenAIHandlerChatAndResponsesParsingTest, ParsingToolChoiceFunctionObjectSucceeds) { std::string json = createToolRequest("{\"type\":\"function\",\"function\":{\"name\":\"get_current_weather\"}}"); auto apiHandler = parseCurrentRequest(json); + ASSERT_NE(apiHandler, nullptr); EXPECT_TRUE(apiHandler->areToolsAvailable()); EXPECT_EQ(apiHandler->getToolChoice(), "get_current_weather"); @@ -828,6 +837,7 @@ TEST_P(HttpOpenAIHandlerChatAndResponsesParsingTest, ParsingToolChoiceFunctionOb TEST_P(HttpOpenAIHandlerChatAndResponsesParsingTest, ParsingToolChoiceNoneRemovesTools) { std::string json = createToolRequest("\"none\""); auto apiHandler = parseCurrentRequest(json); + ASSERT_NE(apiHandler, nullptr); EXPECT_FALSE(apiHandler->areToolsAvailable()); EXPECT_EQ(apiHandler->getToolChoice(), "none"); @@ -837,6 +847,7 @@ TEST_P(HttpOpenAIHandlerChatAndResponsesParsingTest, ParsingMultimodalInputImage const std::string base64Image = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAAEElEQVR4nGLK27oAEAAA//8DYAHGgEvy5AAAAABJRU5ErkJggg=="; std::string json = createMultimodalRequestWithImageUrl(base64Image); auto apiHandler = parseCurrentRequest(json); + ASSERT_NE(apiHandler, nullptr); EXPECT_EQ(apiHandler->getImageHistory().size(), 1); } @@ -4417,6 +4428,7 @@ absl::Status tryParseResponses(rapidjson::Document& doc, ov::genai::Tokenizer& t void expectResponsesEquivalentToChatCompletions(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer, const std::string& responsesRequest, const std::string& expectedChatCompletions) { auto handler = parseResponses(doc, tokenizer, responsesRequest); + ASSERT_NE(handler, nullptr); rapidjson::Document expectedDoc; expectedDoc.Parse(expectedChatCompletions.c_str()); @@ -4515,6 +4527,7 @@ TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAlreadyNestedToolsAreLeftIntact) { }] })"; auto apiHandler = parseResponses(doc, *tokenizer, json); + ASSERT_NE(apiHandler, nullptr); EXPECT_TRUE(apiHandler->areToolsAvailable()); ASSERT_TRUE(doc["tools"][0].HasMember("function")); EXPECT_STREQ(doc["tools"][0]["function"]["name"].GetString(), "get_weather"); From 690a5fc12f38c33cb21f3627595e0f4b75f736b3 Mon Sep 17 00:00:00 2001 From: mkulakow Date: Mon, 18 May 2026 09:43:17 +0200 Subject: [PATCH 10/20] fix --- src/test/http_openai_handler_test.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index e540493fbe..3d440e01d2 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -24,8 +24,6 @@ #include #include -#include - #include "../http_rest_api_handler.hpp" #include "../filesystem/filesystem.hpp" #include "../llm/apis/openai_completions.hpp" @@ -4400,7 +4398,7 @@ std::shared_ptr parseResponses(rapidjson::Document absl::Status tryParseResponses(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer, const std::string& json) { doc.Parse(json.c_str()); if (doc.HasParseError()) - return absl::InvalidArgumentError(absl::StrCat("Failed to parse JSON: ", json)); + return absl::InvalidArgumentError("Failed to parse JSON: " + json); std::optional maxTokensLimit; uint32_t bestOfLimit = 0; std::optional maxModelLength; From 83e275b213b6ab679f738380e82f4ac9ff2ac16d Mon Sep 17 00:00:00 2001 From: mkulakow Date: Mon, 18 May 2026 10:18:48 +0200 Subject: [PATCH 11/20] fix --- src/llm/apis/openai_api_handler.cpp | 16 ++++-------- src/llm/apis/openai_responses.cpp | 14 ++++++++--- src/test/http_openai_handler_test.cpp | 36 ++++++++++++++++++++++++++- 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/src/llm/apis/openai_api_handler.cpp b/src/llm/apis/openai_api_handler.cpp index c7899ea938..c52136d67c 100644 --- a/src/llm/apis/openai_api_handler.cpp +++ b/src/llm/apis/openai_api_handler.cpp @@ -213,10 +213,8 @@ absl::StatusOr loadImage(const std::string& imageSource, try { tensor = loadImageStbiFromMemory(decoded); } catch (std::runtime_error& e) { - std::stringstream ss; - ss << "Image parsing failed: " << e.what(); - SPDLOG_LOGGER_DEBUG(llm_calculator_logger, ss.str()); - return absl::InvalidArgumentError(ss.str()); + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Image parsing failed: {}", e.what()); + return absl::InvalidArgumentError("Image parsing failed"); } } else if (std::regex_match(imageSource.c_str(), std::regex("^(http|https|ftp|sftp|)://(.*)"))) { SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Loading image using curl"); @@ -230,9 +228,7 @@ absl::StatusOr loadImage(const std::string& imageSource, try { tensor = loadImageStbiFromMemory(decoded); } catch (std::runtime_error& e) { - std::stringstream ss; - ss << "Image parsing failed: " << e.what(); - SPDLOG_LOGGER_DEBUG(llm_calculator_logger, ss.str()); + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Image parsing failed: {}", e.what()); return absl::InvalidArgumentError("Image parsing failed"); } } else { @@ -255,10 +251,8 @@ absl::StatusOr loadImage(const std::string& imageSource, try { tensor = loadImageStbiFromFile(resolvedImagePathStr.c_str()); } catch (std::runtime_error& e) { - std::stringstream ss; - ss << "Image file " << resolvedImagePathStr << " parsing failed: " << e.what(); - SPDLOG_LOGGER_DEBUG(llm_calculator_logger, ss.str()); - return absl::InvalidArgumentError(ss.str()); + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Image file {} parsing failed: {}", resolvedImagePathStr, e.what()); + return absl::InvalidArgumentError("Image file parsing failed"); } } return tensor; diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index 4511ade1cd..308bfc1d7f 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -136,7 +136,10 @@ static std::string extractReasoningText(const rapidjson::Value::ConstObject& ite } // Extract a flat text string from a Responses API content field which may be -// either a string or an array of {type,text} objects. +// either a string or an array of {type,text} objects. When multiple text items +// are present, the last one wins, matching ChatHistorySink::extractContent so +// the Python/Jinja processedJson path and the C++ chatHistory path produce the +// same prompt. static std::string extractTextContent(const rapidjson::Value& contentVal) { if (contentVal.IsString()) { return contentVal.GetString(); @@ -144,6 +147,7 @@ static std::string extractTextContent(const rapidjson::Value& contentVal) { if (!contentVal.IsArray()) { return ""; } + std::string result; for (const auto& ci : contentVal.GetArray()) { if (!ci.IsObject()) continue; @@ -154,11 +158,11 @@ static std::string extractTextContent(const rapidjson::Value& contentVal) { if (ctType == "input_text" || ctType == "output_text") { auto textIt = ci.GetObject().FindMember("text"); if (textIt != ci.GetObject().MemberEnd() && textIt->value.IsString()) { - return textIt->value.GetString(); + result = textIt->value.GetString(); } } } - return ""; + return result; } // Read the three string fields (id, name, arguments) out of a function_call item. @@ -382,13 +386,15 @@ class ChatHistorySink { outText.clear(); auto contentIt = itemObj.FindMember("content"); if (contentIt == itemObj.MemberEnd()) - return absl::OkStatus(); + return absl::InvalidArgumentError("input item is missing required content field"); if (contentIt->value.IsString()) { outText = contentIt->value.GetString(); return absl::OkStatus(); } if (!contentIt->value.IsArray()) return absl::InvalidArgumentError("input item content must be a string or array"); + if (contentIt->value.Empty()) + return absl::InvalidArgumentError("input item content array must not be empty"); for (const auto& contentItem : contentIt->value.GetArray()) { if (!contentItem.IsObject()) return absl::InvalidArgumentError("input content items must be objects"); diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index 3d440e01d2..9a55321739 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -2712,7 +2712,7 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesImageLocalFilesystemInvalidP doc.Parse(json.c_str()); ASSERT_FALSE(doc.HasParseError()); std::shared_ptr apiHandler = std::make_shared(doc, ovms::Endpoint::CHAT_COMPLETIONS, std::chrono::system_clock::now(), *tokenizer); - EXPECT_EQ(apiHandler->parseMessages(allowedPath), absl::InvalidArgumentError("Image file " + ovms::FileSystem::normalizeConfiguredPath(imageUrl) + " parsing failed: can't fopen")); + EXPECT_EQ(apiHandler->parseMessages(allowedPath), absl::InvalidArgumentError("Image file parsing failed")); } TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesImageLocalFilesystemInvalidEscaped) { @@ -3460,6 +3460,40 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingResponsesUnsupportedContentTypeFails EXPECT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::InvalidArgumentError("unsupported input content item type: input_audio")); } +TEST_F(HttpOpenAIHandlerParsingTest, ParsingResponsesMissingContentFails) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user"} + ] + })"; + doc.Parse(json.c_str()); + ASSERT_FALSE(doc.HasParseError()); + std::optional maxTokensLimit; + uint32_t bestOfLimit = 0; + std::optional maxModelLength; + std::shared_ptr apiHandler = + std::make_shared(doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), *tokenizer); + EXPECT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::InvalidArgumentError("input item is missing required content field")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ParsingResponsesEmptyContentArrayFails) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": []} + ] + })"; + doc.Parse(json.c_str()); + ASSERT_FALSE(doc.HasParseError()); + std::optional maxTokensLimit; + uint32_t bestOfLimit = 0; + std::optional maxModelLength; + std::shared_ptr apiHandler = + std::make_shared(doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), *tokenizer); + EXPECT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::InvalidArgumentError("input item content array must not be empty")); +} + TEST_F(HttpOpenAIHandlerParsingTest, ParsingResponsesUnsupportedToolTypeFails) { std::string json = R"({ "model": "llama", From 5e2e401f0bf1a64156ae5717511f1f8bfab22410 Mon Sep 17 00:00:00 2001 From: mkulakow Date: Mon, 18 May 2026 10:22:51 +0200 Subject: [PATCH 12/20] fix --- src/test/http_openai_handler_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index 9a55321739..d0b74e4931 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -4604,7 +4604,7 @@ TEST_F(HttpOpenAIHandlerParsingTest, ResponsesStandaloneReasoningWithoutAssistan R"({ "messages": [ {"role":"user","content":"hi"}, - {"role":"assistant","reasoning_content":"orphan"}, + {"role":"assistant","content":"","reasoning_content":"orphan"}, {"role":"user","content":"again"} ] })"); @@ -4624,7 +4624,7 @@ TEST_F(HttpOpenAIHandlerParsingTest, ResponsesTrailingStandaloneReasoningIsEmitt R"({ "messages": [ {"role":"user","content":"hi"}, - {"role":"assistant","reasoning_content":"trailing"} + {"role":"assistant","content":"","reasoning_content":"trailing"} ] })"); } From 221e0ce8b5cc4a825cbbd41df73a7eca1246f434 Mon Sep 17 00:00:00 2001 From: mkulakow Date: Mon, 18 May 2026 10:40:51 +0200 Subject: [PATCH 13/20] fix --- src/llm/apis/openai_responses.cpp | 39 ++++++++++-- src/llm/servable.cpp | 2 +- src/test/http_openai_handler_test.cpp | 87 +++++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 5 deletions(-) diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index 308bfc1d7f..1b565a7f7d 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -166,6 +166,13 @@ static std::string extractTextContent(const rapidjson::Value& contentVal) { } // Read the three string fields (id, name, arguments) out of a function_call item. +// +// The Responses API function_call item carries both "id" (the *item* id, e.g. +// "fc_...") and "call_id" (the *call* identifier, e.g. "call_...", referenced +// by function_call_output.call_id and by tool messages' tool_call_id). We +// prefer "call_id" so the chat/completions-shaped assistant.tool_calls[].id +// matches the subsequent tool message's tool_call_id, and fall back to "id" +// only when "call_id" is absent. struct FunctionCallFields { std::string id; std::string name; @@ -174,9 +181,14 @@ struct FunctionCallFields { static FunctionCallFields readFunctionCallFields(const rapidjson::Value& item) { FunctionCallFields out; auto fcObj = item.GetObject(); - auto idIt = fcObj.FindMember("id"); - if (idIt != fcObj.MemberEnd() && idIt->value.IsString()) - out.id = idIt->value.GetString(); + auto callIdIt = fcObj.FindMember("call_id"); + if (callIdIt != fcObj.MemberEnd() && callIdIt->value.IsString()) { + out.id = callIdIt->value.GetString(); + } else { + auto idIt = fcObj.FindMember("id"); + if (idIt != fcObj.MemberEnd() && idIt->value.IsString()) + out.id = idIt->value.GetString(); + } auto nameIt = fcObj.FindMember("name"); if (nameIt != fcObj.MemberEnd() && nameIt->value.IsString()) out.name = nameIt->value.GetString(); @@ -186,6 +198,23 @@ static FunctionCallFields readFunctionCallFields(const rapidjson::Value& item) { return out; } +// Reject function_call items that would translate to a syntactically valid but +// semantically broken assistant.tool_calls entry (missing identifier, name, or +// arguments). The call_id/id mismatch is also what breaks tool_call_id linkage +// with subsequent tool messages, so surfacing it here as 400 is better than +// passing through and producing a malformed prompt. +static absl::Status validateFunctionCallItem(const rapidjson::Value& item) { + const FunctionCallFields fields = readFunctionCallFields(item); + if (fields.id.empty()) + return absl::InvalidArgumentError("function_call item is missing required call_id (or id) field"); + if (fields.name.empty()) + return absl::InvalidArgumentError("function_call item is missing required name field"); + auto argsIt = item.GetObject().FindMember("arguments"); + if (argsIt == item.GetObject().MemberEnd() || !argsIt->value.IsString()) + return absl::InvalidArgumentError("function_call item is missing required arguments field"); + return absl::OkStatus(); +} + // Classification of a Responses API input item used to dispatch to per-type // handlers in the builders below. enum class ResponsesInputItemKind { @@ -265,7 +294,9 @@ class ResponsesInputBuilder { status = onReasoningItem(item.GetObject()); break; case ResponsesInputItemKind::FUNCTION_CALL: - pendingFunctionCalls.push_back(&item); + status = validateFunctionCallItem(item); + if (status.ok()) + pendingFunctionCalls.push_back(&item); break; case ResponsesInputItemKind::FUNCTION_CALL_OUTPUT: status = onFunctionCallOutputItem(item.GetObject()); diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp index d35db8d3b2..f4eba2079b 100644 --- a/src/llm/servable.cpp +++ b/src/llm/servable.cpp @@ -210,7 +210,7 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptrtokenizer.apply_chat_template(chatHistory, addGenerationPrompt, {}, tools, chatTemplateKwargs); } catch (const std::exception& e) { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to apply chat template: {}", e.what()); - return absl::Status(absl::StatusCode::kInvalidArgument, absl::StrCat("Failed to apply chat template: ", e.what())); + return absl::Status(absl::StatusCode::kInvalidArgument, "Failed to apply chat template. The model either does not have chat template or has an invalid one."); } #endif if (inputText.size() == 0) { diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index d0b74e4931..642924dd14 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -4800,6 +4800,93 @@ TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallOutputWithoutCallIdAcc })"); } +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallPrefersCallIdOverId) { + // When id and call_id differ, the assistant.tool_calls[].id must use + // call_id so it matches the subsequent tool message's tool_call_id (which + // is built from function_call_output.call_id). Otherwise chat templates + // see a tool result with no matching call (e.g. gpt-oss raises "Message + // has tool role, but there was no previous assistant message with a tool + // call!"). + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "fc_abc", "call_id": "call_xyz", + "name": "get_weather", "arguments": "{}"}, + {"type": "function_call_output", "call_id": "call_xyz", "output": "ok"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather?"}, + {"role":"assistant","content":"","tool_calls":[ + {"id":"call_xyz","type":"function","function":{"name":"get_weather","arguments":"{}"}} + ]}, + {"role":"tool","tool_call_id":"call_xyz","content":"ok"} + ] + })"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallOnlyCallIdSupplied) { + // call_id alone (no "id") is accepted; it is used as the tool_calls[].id. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "call_id": "call_xyz", + "name": "get_weather", "arguments": "{}"}, + {"type": "function_call_output", "call_id": "call_xyz", "output": "ok"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather?"}, + {"role":"assistant","content":"","tool_calls":[ + {"id":"call_xyz","type":"function","function":{"name":"get_weather","arguments":"{}"}} + ]}, + {"role":"tool","tool_call_id":"call_xyz","content":"ok"} + ] + })"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallMissingIdRejected) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "name": "get_weather", "arguments": "{}"} + ] + })"; + auto status = tryParseResponses(doc, *tokenizer, json); + EXPECT_EQ(status, absl::InvalidArgumentError("function_call item is missing required call_id (or id) field")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallMissingNameRejected) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "call_id": "call_1", "arguments": "{}"} + ] + })"; + auto status = tryParseResponses(doc, *tokenizer, json); + EXPECT_EQ(status, absl::InvalidArgumentError("function_call item is missing required name field")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallMissingArgumentsRejected) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "call_id": "call_1", "name": "get_weather"} + ] + })"; + auto status = tryParseResponses(doc, *tokenizer, json); + EXPECT_EQ(status, absl::InvalidArgumentError("function_call item is missing required arguments field")); +} + // --- Tools normalisation edge cases --- TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolWithoutParametersIsNormalised) { From 250e3cad262a22184ce2e13d8b12f5ae9a9cfd5e Mon Sep 17 00:00:00 2001 From: mkulakow Date: Mon, 18 May 2026 10:54:34 +0200 Subject: [PATCH 14/20] fix --- src/llm/servable.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp index f4eba2079b..5a0955b4f5 100644 --- a/src/llm/servable.cpp +++ b/src/llm/servable.cpp @@ -22,7 +22,6 @@ #pragma warning(disable : 4005 4309 6001 6385 6386 6326 6011 4005 4456 6246 6313) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#include "absl/strings/str_cat.h" #include "mediapipe/framework/calculator_graph.h" #include #include From 5e2588212fd421697aabceb1d423fc48d3c93e0a Mon Sep 17 00:00:00 2001 From: mkulakow Date: Mon, 18 May 2026 11:37:11 +0200 Subject: [PATCH 15/20] fix --- src/llm/apis/openai_responses.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index 1b565a7f7d..6b008ec05e 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -72,19 +72,18 @@ static void convertResponsesToolsInPlace(rapidjson::Value& toolsArray, rapidjson if (!tool.IsObject()) { continue; } - auto toolObj = tool.GetObject(); - if (toolObj.FindMember("function") != toolObj.MemberEnd()) { + if (tool.FindMember("function") != tool.MemberEnd()) { continue; // Already in nested chat/completions format. } - auto typeIt = toolObj.FindMember("type"); - const std::string toolType = (typeIt != toolObj.MemberEnd() && typeIt->value.IsString()) + auto typeIt = tool.FindMember("type"); + const std::string toolType = (typeIt != tool.MemberEnd() && typeIt->value.IsString()) ? typeIt->value.GetString() : ""; if (toolType != "function") { continue; // Preserve non-function tools as-is. } rapidjson::Value funcObj(rapidjson::kObjectType); - for (auto memberIt = toolObj.MemberBegin(); memberIt != toolObj.MemberEnd();) { + for (auto memberIt = tool.MemberBegin(); memberIt != tool.MemberEnd();) { if (!memberIt->name.IsString()) { ++memberIt; continue; @@ -652,7 +651,7 @@ absl::Status OpenAIResponsesHandler::parseInput(std::optional allow request.chatHistory.last()["content"] = request.prompt.value(); } else if (inputIt->value.IsArray()) { if (inputIt->value.GetArray().Size() == 0) { - return absl::InvalidArgumentError("Messages array cannot be empty"); + return absl::InvalidArgumentError("input array must not be empty"); } ChatHistorySink sink(request.chatHistory, request.imageHistory, allowedLocalMediaPath, allowedMediaDomains); From 7f3328a651190cea956588f467b0a54a410919f3 Mon Sep 17 00:00:00 2001 From: mkulakow Date: Mon, 18 May 2026 13:11:22 +0200 Subject: [PATCH 16/20] fix --- src/llm/apis/openai_responses.cpp | 46 +++++++++++++------------------ 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index 6b008ec05e..a8eb85ecda 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -214,6 +214,24 @@ static absl::Status validateFunctionCallItem(const rapidjson::Value& item) { return absl::OkStatus(); } +// Build a chat/completions tool_calls[] array into outArr using the given +// allocator. Shared by ChatHistorySink and ProcessedJsonSink so the two paths +// cannot drift on id/call_id handling or field layout. +static void buildToolCallsArray(const std::vector& toolCalls, + rapidjson::Value& outArr, rapidjson::Document::AllocatorType& alloc) { + for (const auto* fc : toolCalls) { + const FunctionCallFields fields = readFunctionCallFields(*fc); + rapidjson::Value funcObj(rapidjson::kObjectType); + funcObj.AddMember("name", rapidjson::Value(fields.name.c_str(), alloc), alloc); + funcObj.AddMember("arguments", rapidjson::Value(fields.arguments.c_str(), alloc), alloc); + rapidjson::Value tcObj(rapidjson::kObjectType); + tcObj.AddMember("id", rapidjson::Value(fields.id.c_str(), alloc), alloc); + tcObj.AddMember("type", rapidjson::Value("function", alloc), alloc); + tcObj.AddMember("function", funcObj, alloc); + outArr.PushBack(tcObj, alloc); + } +} + // Classification of a Responses API input item used to dispatch to per-type // handlers in the builders below. enum class ResponsesInputItemKind { @@ -521,22 +539,6 @@ class ChatHistorySink { return absl::OkStatus(); } - // Build a chat/completions tool_calls[] array into outArr using the given allocator. - static void buildToolCallsArray(const std::vector& toolCalls, - rapidjson::Value& outArr, rapidjson::Document::AllocatorType& alloc) { - for (const auto* fc : toolCalls) { - const FunctionCallFields fields = readFunctionCallFields(*fc); - rapidjson::Value funcObj(rapidjson::kObjectType); - funcObj.AddMember("name", rapidjson::Value(fields.name.c_str(), alloc), alloc); - funcObj.AddMember("arguments", rapidjson::Value(fields.arguments.c_str(), alloc), alloc); - rapidjson::Value tcObj(rapidjson::kObjectType); - tcObj.AddMember("id", rapidjson::Value(fields.id.c_str(), alloc), alloc); - tcObj.AddMember("type", rapidjson::Value("function", alloc), alloc); - tcObj.AddMember("function", funcObj, alloc); - outArr.PushBack(tcObj, alloc); - } - } - ov::genai::ChatHistory& chatHistory; ImageHistory& imageHistory; const std::optional& allowedLocalMediaPath; @@ -597,17 +599,7 @@ class ProcessedJsonSink { if (!reasoning.empty()) msgObj.AddMember("reasoning_content", rapidjson::Value(reasoning.c_str(), alloc), alloc); rapidjson::Value toolCallsArray(rapidjson::kArrayType); - for (const auto* fc : toolCalls) { - const FunctionCallFields fields = readFunctionCallFields(*fc); - rapidjson::Value funcObj(rapidjson::kObjectType); - funcObj.AddMember("name", rapidjson::Value(fields.name.c_str(), alloc), alloc); - funcObj.AddMember("arguments", rapidjson::Value(fields.arguments.c_str(), alloc), alloc); - rapidjson::Value tcObj(rapidjson::kObjectType); - tcObj.AddMember("id", rapidjson::Value(fields.id.c_str(), alloc), alloc); - tcObj.AddMember("type", rapidjson::Value("function", alloc), alloc); - tcObj.AddMember("function", funcObj, alloc); - toolCallsArray.PushBack(tcObj, alloc); - } + buildToolCallsArray(toolCalls, toolCallsArray, alloc); msgObj.AddMember("tool_calls", toolCallsArray, alloc); messagesArray.PushBack(msgObj, alloc); } From 55f707635229d69478849c0da9863f4518f2767b Mon Sep 17 00:00:00 2001 From: mkulakow Date: Mon, 18 May 2026 13:19:16 +0200 Subject: [PATCH 17/20] update comment --- src/llm/apis/openai_responses.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index a8eb85ecda..72f1ec9aa9 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -391,10 +391,11 @@ class ResponsesInputBuilder { void flushPendingFunctionCalls(const std::string& assistantText) { if (pendingFunctionCalls.empty()) { // No tool calls, but possibly buffered reasoning to flush as a - // standalone assistant turn carrying only reasoning_content (no - // `content` field at all, so templates that gate on `message.content` - // skip the content branch and templates that gate on - // `message.reasoning_content` still see the buffered text). + // standalone assistant turn carrying reasoning_content alongside + // an empty string `content` (templates that gate on + // `message.content` then see an empty body and skip the content + // branch, while templates that gate on `message.reasoning_content` + // still pick up the buffered text). if (!pendingReasoningContent.empty()) { std::string reasoning = std::move(pendingReasoningContent); pendingReasoningContent.clear(); From ac7f667d30b3b43877bb7c9531a16bfc6fc0e8b2 Mon Sep 17 00:00:00 2001 From: mkulakow Date: Mon, 18 May 2026 13:53:11 +0200 Subject: [PATCH 18/20] fix demo --- demos/continuous_batching/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/demos/continuous_batching/README.md b/demos/continuous_batching/README.md index 34e0c61ee3..dfc6fef039 100644 --- a/demos/continuous_batching/README.md +++ b/demos/continuous_batching/README.md @@ -156,7 +156,7 @@ curl -s http://localhost:8000/v3/chat/completions -H "Content-Type: application/ curl http://localhost:8000/v3/responses \ -H "Content-Type: application/json" \ -d '{ - "model": "meta-llama/Meta-Llama-3-8B-Instruct", + "model": "Qwen3-30B-A3B-Instruct-2507-int4-ov", "max_output_tokens":30, "input": "What is OpenVINO?" }'| jq . @@ -169,12 +169,12 @@ Windows Powershell (Invoke-WebRequest -Uri "http://localhost:8000/v3/responses" ` -Method POST ` -Headers @{ "Content-Type" = "application/json" } ` - -Body '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "max_output_tokens": 30, "input": "What is OpenVINO?"}').Content + -Body '{"model": "Qwen3-30B-A3B-Instruct-2507-int4-ov", "max_output_tokens": 30, "input": "What is OpenVINO?"}').Content ``` Windows Command Prompt ```bat -curl -s http://localhost:8000/v3/responses -H "Content-Type: application/json" -d "{\"model\": \"meta-llama/Meta-Llama-3-8B-Instruct\", \"max_output_tokens\": 30, \"input\": \"What is OpenVINO?\"}" +curl -s http://localhost:8000/v3/responses -H "Content-Type: application/json" -d "{\"model\": \"Qwen3-30B-A3B-Instruct-2507-int4-ov\", \"max_output_tokens\": 30, \"input\": \"What is OpenVINO?\"}" ``` ::: @@ -186,7 +186,7 @@ curl -s http://localhost:8000/v3/responses -H "Content-Type: application/json" - "id": "resp-1724405400", "object": "response", "created_at": 1724405400, - "model": "meta-llama/Meta-Llama-3-8B-Instruct", + "model": "Qwen3-30B-A3B-Instruct-2507-int4-ov", "status": "completed", "output": [ { @@ -337,7 +337,7 @@ client = OpenAI( ) stream = client.responses.create( - model="meta-llama/Meta-Llama-3-8B-Instruct", + model="Qwen3-30B-A3B-Instruct-2507-int4-ov", input="Say this is a test", stream=True, ) From 5978a635bb760256737f55d2b29f45ca6ac50b15 Mon Sep 17 00:00:00 2001 From: mkulakow Date: Mon, 18 May 2026 14:30:40 +0200 Subject: [PATCH 19/20] fix --- src/llm/apis/openai_responses.cpp | 32 ++++++++++++++------- src/test/http_openai_handler_test.cpp | 41 +++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 10 deletions(-) diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index 72f1ec9aa9..4b20b054b9 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -361,6 +361,19 @@ class ResponsesInputBuilder { absl::Status onRoleItem(const rapidjson::Value::ConstObject& itemObj, rapidjson::SizeType index) { const std::string role = itemObj.FindMember("role")->value.GetString(); + // Non-assistant items must not absorb pending tool_calls; flush first. + // (flushPendingFunctionCalls also emits any standalone reasoning content + // as a standalone assistant turn.) + // + // Flushing BEFORE extractContent is intentional: it makes + // chatHistory.size() equal the index this item's message will land at, + // so the ChatHistorySink can record image-history turn indices directly + // from chatHistory.size() instead of from the Responses input-array + // index (which drifts when items are buffered/merged). + if (role != "assistant") { + flushPendingFunctionCalls(""); + } + std::string contentText; auto status = sink.extractContent(itemObj, index, contentText); if (!status.ok()) @@ -372,12 +385,6 @@ class ResponsesInputBuilder { flushPendingFunctionCalls(contentText); return absl::OkStatus(); } - // Non-assistant items must not absorb pending tool_calls; flush first. - // (flushPendingFunctionCalls also emits any standalone reasoning content - // as a standalone assistant turn.) - if (role != "assistant") { - flushPendingFunctionCalls(""); - } std::string reasoning; if (role == "assistant" && !pendingReasoningContent.empty()) { @@ -431,7 +438,7 @@ class ChatHistorySink { } absl::Status extractContent(const rapidjson::Value::ConstObject& itemObj, - rapidjson::SizeType index, std::string& outText) { + rapidjson::SizeType /*index*/, std::string& outText) { outText.clear(); auto contentIt = itemObj.FindMember("content"); if (contentIt == itemObj.MemberEnd()) @@ -459,7 +466,7 @@ class ChatHistorySink { // Last text-bearing item wins, matching pre-refactor behaviour. outText = textIt->value.GetString(); } else if (type == "input_image") { - auto status = appendInputImage(contentObj, index); + auto status = appendInputImage(contentObj); if (!status.ok()) return status; } else { @@ -515,7 +522,12 @@ class ChatHistorySink { } private: - absl::Status appendInputImage(const rapidjson::Value::ConstObject& contentObj, rapidjson::SizeType index) { + // Record (chatTurnIndex, tensor) immediately. This is correct because + // onRoleItem() flushes any buffered standalone messages BEFORE calling + // extractContent(), so at this point chatHistory.size() is the index that + // the upcoming emitMessage()/emitAssistantWithToolCalls() will push the + // image-bearing message into. + absl::Status appendInputImage(const rapidjson::Value::ConstObject& contentObj) { auto imageUrlIt = contentObj.FindMember("image_url"); if (imageUrlIt == contentObj.MemberEnd()) return absl::InvalidArgumentError("input_image requires image_url field"); @@ -536,7 +548,7 @@ class ChatHistorySink { auto tensorResult = loadImage(imageUrl, allowedLocalMediaPath, allowedMediaDomains); if (!tensorResult.ok()) return tensorResult.status(); - imageHistory.push_back({index, tensorResult.value()}); + imageHistory.push_back({chatHistory.size(), tensorResult.value()}); return absl::OkStatus(); } diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index 642924dd14..c054ac25e1 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -4887,6 +4887,47 @@ TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallMissingArgumentsReject EXPECT_EQ(status, absl::InvalidArgumentError("function_call item is missing required arguments field")); } +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesImageHistoryIndexMatchesChatHistoryTurn) { + // Regression test for the image-index drift bug: when an input item is + // merged (function_call buffered, then absorbed into the next assistant + // message), the Responses input-array index no longer matches the + // resulting chatHistory index. ChatHistorySink::appendInputImage must + // record the actual chatHistory turn index so the VLM servable can + // prepend the tag to the correct message (and not + // index out-of-bounds). + const std::string base64Image = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAAEElEQVR4nGLK27oAEAAA//8DYAHGgEvy5AAAAABJRU5ErkJggg=="; + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "function_call", "call_id": "call_1", "name": "x", "arguments": "{}"}, + {"role": "assistant", "content": [{"type":"output_text","text":"calling tool"}]}, + {"role": "user", "content": [ + {"type":"input_text","text":"and now?"}, + {"type":"input_image","image_url":")" + base64Image + R"("} + ]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + ASSERT_NE(apiHandler, nullptr); + + // Input has 4 items but item [1] (function_call) is buffered and merged + // into item [2] (assistant) — chatHistory ends up with 3 entries: + // [0] user "hi" + // [1] assistant "calling tool" with tool_calls + // [2] user "and now?" (image-bearing) + auto& chatHistory = apiHandler->getChatHistory(); + ASSERT_EQ(chatHistory.size(), 3u); + + const ovms::ImageHistory& imageHistory = apiHandler->getImageHistory(); + ASSERT_EQ(imageHistory.size(), 1u); + auto [turnIndex, image] = imageHistory[0]; + // Must point at the second user (chatHistory[2]) — NOT the input-array + // index 3, which would be out-of-bounds for chatHistory[3]. + EXPECT_EQ(turnIndex, 2u); + EXPECT_LT(turnIndex, chatHistory.size()); +} + // --- Tools normalisation edge cases --- TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolWithoutParametersIsNormalised) { From e836d74502a9456fc25da12c5ce4fafaa3c3e9ac Mon Sep 17 00:00:00 2001 From: mkulakow Date: Mon, 18 May 2026 15:16:11 +0200 Subject: [PATCH 20/20] style --- src/test/http_openai_handler_test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index c054ac25e1..f62f302020 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -4904,7 +4904,8 @@ TEST_F(HttpOpenAIHandlerParsingTest, ResponsesImageHistoryIndexMatchesChatHistor {"role": "assistant", "content": [{"type":"output_text","text":"calling tool"}]}, {"role": "user", "content": [ {"type":"input_text","text":"and now?"}, - {"type":"input_image","image_url":")" + base64Image + R"("} + {"type":"input_image","image_url":")" + + base64Image + R"("} ]} ] })";