fix(llm): normalize OpenAI-compatible chat URLs

Normalize OpenAI-compatible chat URL shapes so base /v1 endpoints route to /v1/chat/completions while already-full chat endpoints remain idempotent.

Preserve native local Ollama routing for bare localhost:11434 endpoints, keep localhost:11434/v1 as OpenAI-compatible, and add focused regression coverage for provider detection, chat target URLs, and model listing from /v1.

Part of #541.
This commit is contained in:
nikakhalatiani
2026-06-28 16:30:15 +02:00
committed by GitHub
parent bb2148db73
commit 927b1f7ecf
2 changed files with 97 additions and 3 deletions
+82
View File
@@ -9,6 +9,12 @@ def test_detects_ollama_cloud_native_provider():
assert llm_core._detect_provider("https://ollama.com/api/chat") == "ollama"
def test_detects_bare_local_ollama_as_native_provider():
assert llm_core._detect_provider("http://localhost:11434") == "ollama"
assert llm_core._detect_provider("http://127.0.0.1:11434/") == "ollama"
assert llm_core._detect_provider("http://localhost:11434/v1") == "openai"
def test_llm_call_posts_native_ollama_payload(monkeypatch):
seen = {}
@@ -43,6 +49,82 @@ def test_llm_call_posts_native_ollama_payload(monkeypatch):
assert seen["json"]["options"] == {"temperature": 0.2, "num_predict": 7}
def test_llm_call_posts_bare_local_ollama_to_native_api(monkeypatch):
seen = {}
def fake_post(url, headers=None, json=None, timeout=None):
seen["url"] = url
seen["json"] = json
request = httpx.Request("POST", url)
return httpx.Response(
200,
request=request,
json={"message": {"content": "OK"}, "done": True},
)
monkeypatch.setattr(llm_core.httpx, "post", fake_post)
result = llm_core.llm_call(
"http://localhost:11434",
"llama3.2",
[{"role": "user", "content": "Say OK"}],
)
assert result == "OK"
assert seen["url"] == "http://localhost:11434/api/chat"
assert seen["json"]["stream"] is False
def test_openai_compatible_chat_url_shapes(monkeypatch):
seen = []
def fake_post(url, headers=None, json=None, timeout=None):
seen.append(url)
request = httpx.Request("POST", url)
return httpx.Response(
200,
request=request,
json={"choices": [{"message": {"content": "OK"}}]},
)
monkeypatch.setattr(llm_core.httpx, "post", fake_post)
llm_core._response_cache.clear()
cases = [
("http://localhost:11434/v1", "http://localhost:11434/v1/chat/completions"),
(
"http://localhost:11434/v1/chat/completions",
"http://localhost:11434/v1/chat/completions",
),
]
for i, (base_url, expected_url) in enumerate(cases):
result = llm_core.llm_call(
base_url,
f"openai-compatible-{i}",
[{"role": "user", "content": f"Say OK {i}"}],
)
assert result == "OK"
assert seen[-1] == expected_url
def test_list_model_ids_from_openai_compatible_v1(monkeypatch):
seen = {}
def fake_get(url, headers=None, timeout=None):
seen["url"] = url
request = httpx.Request("GET", url)
return httpx.Response(
200,
request=request,
json={"data": [{"id": "qwen2.5-coder:7b"}]},
)
monkeypatch.setattr(llm_core.httpx, "get", fake_get)
assert llm_core.list_model_ids("http://localhost:11434/v1") == ["qwen2.5-coder:7b"]
assert seen["url"] == "http://localhost:11434/v1/models"
# ---------------------------------------------------------------------------
# Tool-call argument serialization for native Ollama
#