diff --git a/src/http_rest_api_handler.cpp b/src/http_rest_api_handler.cpp index 82911119b0..5208e784e2 100644 --- a/src/http_rest_api_handler.cpp +++ b/src/http_rest_api_handler.cpp @@ -103,7 +103,7 @@ namespace ovms { const std::string HttpRestApiHandler::predictionRegexExp = R"((.?)\/v1\/models\/([^\/:]+)(?:(?:\/versions\/(\d+))|(?:\/labels\/(\w+)))?:(classify|regress|predict))"; const std::string HttpRestApiHandler::modelstatusRegexExp = - R"((.?)\/v1\/models(?:\/([^\/:]+))?(?:(?:\/versions\/(\d+))|(?:\/labels\/(\w+)))?(?:\/(metadata))?)"; + R"((.?)\/v1\/models\/([^\/:]+)(?:(?:\/versions\/(\d+))|(?:\/labels\/(\w+))|(?:\/(metadata))))"; const std::string HttpRestApiHandler::configReloadRegexExp = R"((.?)\/v1\/config\/reload)"; const std::string HttpRestApiHandler::configStatusRegexExp = R"((.?)\/v1\/config)"; @@ -121,11 +121,11 @@ const std::string HttpRestApiHandler::kfs_servermetadataRegexExp = R"(/v2)"; const std::string HttpRestApiHandler::v3_ListModelsRegexExp = - R"(/v3/(v1/)?models)"; + R"((?:/v3/|/v1/)(v1/)?models)"; const std::string HttpRestApiHandler::v3_RetrieveModelRegexExp = - R"(/v3/(v1/)?models/(.+))"; + R"((?:/v3/|/v1/)(v1/)?models/(.+))"; const std::string HttpRestApiHandler::v3_RegexExp = - R"(/v3/.*?(/|$))"; + R"((?:/v3/|/v1/).*?(/|$))"; const std::string HttpRestApiHandler::metricsRegexExp = R"((.?)\/metrics(\?(.*))?)"; @@ -1022,6 +1022,10 @@ Status HttpRestApiHandler::parseRequestComponents(HttpRequestComponents& request return status; return StatusCode::OK; } + if (std::regex_match(request_path, sm, configReloadRegex)) { + requestComponents.type = ConfigReload; + return StatusCode::OK; + } if (std::regex_match(request_path, sm, v3_Regex)) { requestComponents.type = V3; auto status = parseInferenceHeaderContentLength(requestComponents, headers); @@ -1030,10 +1034,6 @@ Status HttpRestApiHandler::parseRequestComponents(HttpRequestComponents& request requestComponents.headers = headers; return StatusCode::OK; } - if (std::regex_match(request_path, sm, configReloadRegex)) { - requestComponents.type = ConfigReload; - return StatusCode::OK; - } return (std::regex_match(request_path, sm, modelstatusRegex) || std::regex_match(request_path, sm, kfs_serverliveRegex) || std::regex_match(request_path, sm, configStatusRegex) || diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp index b4d5ca8185..a92e161f4d 100644 --- a/src/llm/servable.cpp +++ b/src/llm/servable.cpp @@ -65,11 +65,14 @@ absl::Status GenAiServable::loadRequest(std::shared_ptrHasParseError()) { return absl::InvalidArgumentError("Non-json request received in text generation calculator"); } - if (payload.uri == "/v3/chat/completions" || payload.uri == "/v3/v1/chat/completions") { + if (payload.uri == "/v3/chat/completions" || payload.uri == "/v3/v1/chat/completions" || + payload.uri == "/v1/chat/completions" || payload.uri == "/v1/v1/chat/completions") { executionContext->endpoint = Endpoint::CHAT_COMPLETIONS; - } else if (payload.uri == "/v3/completions" || payload.uri == "/v3/v1/completions") { + } else if (payload.uri == "/v3/completions" || payload.uri == "/v3/v1/completions" || + payload.uri == "/v1/completions" || payload.uri == "/v1/v1/completions") { executionContext->endpoint = Endpoint::COMPLETIONS; - } else if (payload.uri == "/v3/responses" || payload.uri == "/v3/v1/responses") { + } else if (payload.uri == "/v3/responses" || payload.uri == "/v3/v1/responses" || + payload.uri == "/v1/responses" || payload.uri == "/v1/v1/responses") { executionContext->endpoint = Endpoint::RESPONSES; } else if (TokenizeParser::isTokenizeEndpoint(payload.uri)) { executionContext->endpoint = Endpoint::TOKENIZE; diff --git a/src/llm/visual_language_model/continuous_batching/servable.cpp b/src/llm/visual_language_model/continuous_batching/servable.cpp index 7779d9c0be..f78ef72773 100644 --- a/src/llm/visual_language_model/continuous_batching/servable.cpp +++ b/src/llm/visual_language_model/continuous_batching/servable.cpp @@ -44,9 +44,11 @@ absl::Status VisualLanguageModelServable::loadRequest(std::shared_ptrHasParseError()) { return absl::InvalidArgumentError("Non-json request received in text generation calculator"); } - if (payload.uri == "/v3/chat/completions" || payload.uri == "/v3/v1/chat/completions") { + if (payload.uri == "/v3/chat/completions" || payload.uri == "/v3/v1/chat/completions" || + payload.uri == "/v1/chat/completions" || payload.uri == "/v1/v1/chat/completions") { executionContext->endpoint = Endpoint::CHAT_COMPLETIONS; - } else if (payload.uri == "/v3/responses" || payload.uri == "/v3/v1/responses") { + } else if (payload.uri == "/v3/responses" || payload.uri == "/v3/v1/responses" || + payload.uri == "/v1/responses" || payload.uri == "/v1/v1/responses") { executionContext->endpoint = Endpoint::RESPONSES; } else if (TokenizeParser::isTokenizeEndpoint(payload.uri)) { executionContext->endpoint = Endpoint::TOKENIZE; diff --git a/src/llm/visual_language_model/legacy/servable.cpp b/src/llm/visual_language_model/legacy/servable.cpp index 9c8e02c5df..f81dc2bc28 100644 --- a/src/llm/visual_language_model/legacy/servable.cpp +++ b/src/llm/visual_language_model/legacy/servable.cpp @@ -53,9 +53,11 @@ absl::Status VisualLanguageModelLegacyServable::loadRequest(std::shared_ptrHasParseError()) { return absl::InvalidArgumentError("Non-json request received in text generation calculator"); } - if (payload.uri == "/v3/chat/completions" || payload.uri == "/v3/v1/chat/completions") { + if (payload.uri == "/v3/chat/completions" || payload.uri == "/v3/v1/chat/completions" || + payload.uri == "/v1/chat/completions" || payload.uri == "/v1/v1/chat/completions") { executionContext->endpoint = Endpoint::CHAT_COMPLETIONS; - } else if (payload.uri == "/v3/responses" || payload.uri == "/v3/v1/responses") { + } else if (payload.uri == "/v3/responses" || payload.uri == "/v3/v1/responses" || + payload.uri == "/v1/responses" || payload.uri == "/v1/v1/responses") { executionContext->endpoint = Endpoint::RESPONSES; } else if (TokenizeParser::isTokenizeEndpoint(payload.uri)) { executionContext->endpoint = Endpoint::TOKENIZE;