mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-28 23:52:09 -04:00
fix(llm): normalize OpenAI-compatible chat URLs
Normalize OpenAI-compatible chat URL shapes so base /v1 endpoints route to /v1/chat/completions while already-full chat endpoints remain idempotent. Preserve native local Ollama routing for bare localhost:11434 endpoints, keep localhost:11434/v1 as OpenAI-compatible, and add focused regression coverage for provider detection, chat target URLs, and model listing from /v1. Part of #541.
This commit is contained in:
+15
-3
@@ -345,6 +345,18 @@ def _normalize_ollama_url(url: str) -> str:
|
||||
return base.rstrip("/") + "/chat"
|
||||
|
||||
|
||||
def _normalize_openai_chat_url(url: str) -> str:
|
||||
"""Ensure an OpenAI-compatible base URL points at /chat/completions."""
|
||||
base = (url or "").strip().rstrip("/")
|
||||
if not base:
|
||||
return base
|
||||
if base.endswith("/chat/completions") or base.endswith("/completions"):
|
||||
return base
|
||||
if base.endswith("/models"):
|
||||
base = base[: -len("/models")].rstrip("/")
|
||||
return base + "/chat/completions"
|
||||
|
||||
|
||||
def _ollama_normalize_messages(messages: List[Dict]) -> List[Dict]:
|
||||
"""Adapt Odysseus' canonical OpenAI-style messages to native Ollama /api/chat.
|
||||
|
||||
@@ -1563,7 +1575,7 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL
|
||||
stream=False, num_ctx=get_context_length(url, model),
|
||||
)
|
||||
else:
|
||||
target_url = url
|
||||
target_url = _normalize_openai_chat_url(url)
|
||||
if provider == "copilot":
|
||||
from src.copilot import apply_request_headers
|
||||
apply_request_headers(h, messages_copy)
|
||||
@@ -1767,7 +1779,7 @@ async def llm_call_async(
|
||||
stream=False, num_ctx=get_context_length(url, model),
|
||||
)
|
||||
else:
|
||||
target_url = url
|
||||
target_url = _normalize_openai_chat_url(url)
|
||||
h = _provider_headers(provider, headers)
|
||||
if provider == "copilot":
|
||||
from src.copilot import apply_request_headers
|
||||
@@ -1889,7 +1901,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
|
||||
h = _provider_headers(provider, headers)
|
||||
payload = _build_chatgpt_responses_payload(model, messages_copy, temperature, max_tokens, stream=True)
|
||||
else:
|
||||
target_url = url
|
||||
target_url = _normalize_openai_chat_url(url)
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": messages_copy,
|
||||
|
||||
@@ -9,6 +9,12 @@ def test_detects_ollama_cloud_native_provider():
|
||||
assert llm_core._detect_provider("https://ollama.com/api/chat") == "ollama"
|
||||
|
||||
|
||||
def test_detects_bare_local_ollama_as_native_provider():
|
||||
assert llm_core._detect_provider("http://localhost:11434") == "ollama"
|
||||
assert llm_core._detect_provider("http://127.0.0.1:11434/") == "ollama"
|
||||
assert llm_core._detect_provider("http://localhost:11434/v1") == "openai"
|
||||
|
||||
|
||||
def test_llm_call_posts_native_ollama_payload(monkeypatch):
|
||||
seen = {}
|
||||
|
||||
@@ -43,6 +49,82 @@ def test_llm_call_posts_native_ollama_payload(monkeypatch):
|
||||
assert seen["json"]["options"] == {"temperature": 0.2, "num_predict": 7}
|
||||
|
||||
|
||||
def test_llm_call_posts_bare_local_ollama_to_native_api(monkeypatch):
|
||||
seen = {}
|
||||
|
||||
def fake_post(url, headers=None, json=None, timeout=None):
|
||||
seen["url"] = url
|
||||
seen["json"] = json
|
||||
request = httpx.Request("POST", url)
|
||||
return httpx.Response(
|
||||
200,
|
||||
request=request,
|
||||
json={"message": {"content": "OK"}, "done": True},
|
||||
)
|
||||
|
||||
monkeypatch.setattr(llm_core.httpx, "post", fake_post)
|
||||
|
||||
result = llm_core.llm_call(
|
||||
"http://localhost:11434",
|
||||
"llama3.2",
|
||||
[{"role": "user", "content": "Say OK"}],
|
||||
)
|
||||
|
||||
assert result == "OK"
|
||||
assert seen["url"] == "http://localhost:11434/api/chat"
|
||||
assert seen["json"]["stream"] is False
|
||||
|
||||
|
||||
def test_openai_compatible_chat_url_shapes(monkeypatch):
|
||||
seen = []
|
||||
|
||||
def fake_post(url, headers=None, json=None, timeout=None):
|
||||
seen.append(url)
|
||||
request = httpx.Request("POST", url)
|
||||
return httpx.Response(
|
||||
200,
|
||||
request=request,
|
||||
json={"choices": [{"message": {"content": "OK"}}]},
|
||||
)
|
||||
|
||||
monkeypatch.setattr(llm_core.httpx, "post", fake_post)
|
||||
llm_core._response_cache.clear()
|
||||
|
||||
cases = [
|
||||
("http://localhost:11434/v1", "http://localhost:11434/v1/chat/completions"),
|
||||
(
|
||||
"http://localhost:11434/v1/chat/completions",
|
||||
"http://localhost:11434/v1/chat/completions",
|
||||
),
|
||||
]
|
||||
for i, (base_url, expected_url) in enumerate(cases):
|
||||
result = llm_core.llm_call(
|
||||
base_url,
|
||||
f"openai-compatible-{i}",
|
||||
[{"role": "user", "content": f"Say OK {i}"}],
|
||||
)
|
||||
assert result == "OK"
|
||||
assert seen[-1] == expected_url
|
||||
|
||||
|
||||
def test_list_model_ids_from_openai_compatible_v1(monkeypatch):
|
||||
seen = {}
|
||||
|
||||
def fake_get(url, headers=None, timeout=None):
|
||||
seen["url"] = url
|
||||
request = httpx.Request("GET", url)
|
||||
return httpx.Response(
|
||||
200,
|
||||
request=request,
|
||||
json={"data": [{"id": "qwen2.5-coder:7b"}]},
|
||||
)
|
||||
|
||||
monkeypatch.setattr(llm_core.httpx, "get", fake_get)
|
||||
|
||||
assert llm_core.list_model_ids("http://localhost:11434/v1") == ["qwen2.5-coder:7b"]
|
||||
assert seen["url"] == "http://localhost:11434/v1/models"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool-call argument serialization for native Ollama
|
||||
#
|
||||
|
||||
Reference in New Issue
Block a user