fix(llm): normalize OpenAI-compatible chat URLs

Normalize OpenAI-compatible chat URL shapes so base /v1 endpoints route to /v1/chat/completions while already-full chat endpoints remain idempotent. Preserve native local Ollama routing for bare localhost:11434 endpoints, keep localhost:11434/v1 as OpenAI-compatible, and add focused regression coverage for provider detection, chat target URLs, and model listing from /v1. Part of #541.
2026-07-02 01:22:07 -04:00 · 2026-06-28 16:30:15 +02:00
parent bb2148db73
commit 927b1f7ecf
2 changed files with 97 additions and 3 deletions
@@ -345,6 +345,18 @@ def _normalize_ollama_url(url: str) -> str:
    return base.rstrip("/") + "/chat"


+def _normalize_openai_chat_url(url: str) -> str:
+    """Ensure an OpenAI-compatible base URL points at /chat/completions."""
+    base = (url or "").strip().rstrip("/")
+    if not base:
+        return base
+    if base.endswith("/chat/completions") or base.endswith("/completions"):
+        return base
+    if base.endswith("/models"):
+        base = base[: -len("/models")].rstrip("/")
+    return base + "/chat/completions"
+
+
 def _ollama_normalize_messages(messages: List[Dict]) -> List[Dict]:
    """Adapt Odysseus' canonical OpenAI-style messages to native Ollama /api/chat.

@@ -1563,7 +1575,7 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL
            stream=False, num_ctx=get_context_length(url, model),
        )
    else:
-        target_url = url
+        target_url = _normalize_openai_chat_url(url)
        if provider == "copilot":
            from src.copilot import apply_request_headers
            apply_request_headers(h, messages_copy)
@@ -1767,7 +1779,7 @@ async def llm_call_async(
            stream=False, num_ctx=get_context_length(url, model),
        )
    else:
-        target_url = url
+        target_url = _normalize_openai_chat_url(url)
        h = _provider_headers(provider, headers)
        if provider == "copilot":
            from src.copilot import apply_request_headers
@@ -1889,7 +1901,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
        h = _provider_headers(provider, headers)
        payload = _build_chatgpt_responses_payload(model, messages_copy, temperature, max_tokens, stream=True)
    else:
-        target_url = url
+        target_url = _normalize_openai_chat_url(url)
        payload = {
            "model": model,
            "messages": messages_copy,