fix(llm): normalize OpenAI-compatible chat URLs

Normalize OpenAI-compatible chat URL shapes so base /v1 endpoints route to /v1/chat/completions while already-full chat endpoints remain idempotent. Preserve native local Ollama routing for bare localhost:11434 endpoints, keep localhost:11434/v1 as OpenAI-compatible, and add focused regression coverage for provider detection, chat target URLs, and model listing from /v1. Part of #541.
2026-06-28 23:52:09 -04:00 · 2026-06-28 16:30:15 +02:00
parent bb2148db73
commit 927b1f7ecf
2 changed files with 97 additions and 3 deletions
@@ -345,6 +345,18 @@ def _normalize_ollama_url(url: str) -> str:
    return base.rstrip("/") + "/chat"


+def _normalize_openai_chat_url(url: str) -> str:
+    """Ensure an OpenAI-compatible base URL points at /chat/completions."""
+    base = (url or "").strip().rstrip("/")
+    if not base:
+        return base
+    if base.endswith("/chat/completions") or base.endswith("/completions"):
+        return base
+    if base.endswith("/models"):
+        base = base[: -len("/models")].rstrip("/")
+    return base + "/chat/completions"
+
+
 def _ollama_normalize_messages(messages: List[Dict]) -> List[Dict]:
    """Adapt Odysseus' canonical OpenAI-style messages to native Ollama /api/chat.

@@ -1563,7 +1575,7 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL
            stream=False, num_ctx=get_context_length(url, model),
        )
    else:
-        target_url = url
+        target_url = _normalize_openai_chat_url(url)
        if provider == "copilot":
            from src.copilot import apply_request_headers
            apply_request_headers(h, messages_copy)
@@ -1767,7 +1779,7 @@ async def llm_call_async(
            stream=False, num_ctx=get_context_length(url, model),
        )
    else:
-        target_url = url
+        target_url = _normalize_openai_chat_url(url)
        h = _provider_headers(provider, headers)
        if provider == "copilot":
            from src.copilot import apply_request_headers
@@ -1889,7 +1901,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
        h = _provider_headers(provider, headers)
        payload = _build_chatgpt_responses_payload(model, messages_copy, temperature, max_tokens, stream=True)
    else:
-        target_url = url
+        target_url = _normalize_openai_chat_url(url)
        payload = {
            "model": model,
            "messages": messages_copy,
@@ -9,6 +9,12 @@ def test_detects_ollama_cloud_native_provider():
    assert llm_core._detect_provider("https://ollama.com/api/chat") == "ollama"


+def test_detects_bare_local_ollama_as_native_provider():
+    assert llm_core._detect_provider("http://localhost:11434") == "ollama"
+    assert llm_core._detect_provider("http://127.0.0.1:11434/") == "ollama"
+    assert llm_core._detect_provider("http://localhost:11434/v1") == "openai"
+
+
 def test_llm_call_posts_native_ollama_payload(monkeypatch):
    seen = {}

@@ -43,6 +49,82 @@ def test_llm_call_posts_native_ollama_payload(monkeypatch):
    assert seen["json"]["options"] == {"temperature": 0.2, "num_predict": 7}


+def test_llm_call_posts_bare_local_ollama_to_native_api(monkeypatch):
+    seen = {}
+
+    def fake_post(url, headers=None, json=None, timeout=None):
+        seen["url"] = url
+        seen["json"] = json
+        request = httpx.Request("POST", url)
+        return httpx.Response(
+            200,
+            request=request,
+            json={"message": {"content": "OK"}, "done": True},
+        )
+
+    monkeypatch.setattr(llm_core.httpx, "post", fake_post)
+
+    result = llm_core.llm_call(
+        "http://localhost:11434",
+        "llama3.2",
+        [{"role": "user", "content": "Say OK"}],
+    )
+
+    assert result == "OK"
+    assert seen["url"] == "http://localhost:11434/api/chat"
+    assert seen["json"]["stream"] is False
+
+
+def test_openai_compatible_chat_url_shapes(monkeypatch):
+    seen = []
+
+    def fake_post(url, headers=None, json=None, timeout=None):
+        seen.append(url)
+        request = httpx.Request("POST", url)
+        return httpx.Response(
+            200,
+            request=request,
+            json={"choices": [{"message": {"content": "OK"}}]},
+        )
+
+    monkeypatch.setattr(llm_core.httpx, "post", fake_post)
+    llm_core._response_cache.clear()
+
+    cases = [
+        ("http://localhost:11434/v1", "http://localhost:11434/v1/chat/completions"),
+        (
+            "http://localhost:11434/v1/chat/completions",
+            "http://localhost:11434/v1/chat/completions",
+        ),
+    ]
+    for i, (base_url, expected_url) in enumerate(cases):
+        result = llm_core.llm_call(
+            base_url,
+            f"openai-compatible-{i}",
+            [{"role": "user", "content": f"Say OK {i}"}],
+        )
+        assert result == "OK"
+        assert seen[-1] == expected_url
+
+
+def test_list_model_ids_from_openai_compatible_v1(monkeypatch):
+    seen = {}
+
+    def fake_get(url, headers=None, timeout=None):
+        seen["url"] = url
+        request = httpx.Request("GET", url)
+        return httpx.Response(
+            200,
+            request=request,
+            json={"data": [{"id": "qwen2.5-coder:7b"}]},
+        )
+
+    monkeypatch.setattr(llm_core.httpx, "get", fake_get)
+
+    assert llm_core.list_model_ids("http://localhost:11434/v1") == ["qwen2.5-coder:7b"]
+    assert seen["url"] == "http://localhost:11434/v1/models"
+
+
 # ---------------------------------------------------------------------------
 # Tool-call argument serialization for native Ollama
 #