diff --git a/src/llm_core.py b/src/llm_core.py index e8331279f..9861ef01a 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -345,6 +345,18 @@ def _normalize_ollama_url(url: str) -> str: return base.rstrip("/") + "/chat" +def _normalize_openai_chat_url(url: str) -> str: + """Ensure an OpenAI-compatible base URL points at /chat/completions.""" + base = (url or "").strip().rstrip("/") + if not base: + return base + if base.endswith("/chat/completions") or base.endswith("/completions"): + return base + if base.endswith("/models"): + base = base[: -len("/models")].rstrip("/") + return base + "/chat/completions" + + def _ollama_normalize_messages(messages: List[Dict]) -> List[Dict]: """Adapt Odysseus' canonical OpenAI-style messages to native Ollama /api/chat. @@ -1563,7 +1575,7 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL stream=False, num_ctx=get_context_length(url, model), ) else: - target_url = url + target_url = _normalize_openai_chat_url(url) if provider == "copilot": from src.copilot import apply_request_headers apply_request_headers(h, messages_copy) @@ -1767,7 +1779,7 @@ async def llm_call_async( stream=False, num_ctx=get_context_length(url, model), ) else: - target_url = url + target_url = _normalize_openai_chat_url(url) h = _provider_headers(provider, headers) if provider == "copilot": from src.copilot import apply_request_headers @@ -1889,7 +1901,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl h = _provider_headers(provider, headers) payload = _build_chatgpt_responses_payload(model, messages_copy, temperature, max_tokens, stream=True) else: - target_url = url + target_url = _normalize_openai_chat_url(url) payload = { "model": model, "messages": messages_copy, diff --git a/tests/test_llm_core_ollama.py b/tests/test_llm_core_ollama.py index b334f260c..afe806617 100644 --- a/tests/test_llm_core_ollama.py +++ b/tests/test_llm_core_ollama.py @@ -9,6 +9,12 @@ def test_detects_ollama_cloud_native_provider(): assert llm_core._detect_provider("https://ollama.com/api/chat") == "ollama" +def test_detects_bare_local_ollama_as_native_provider(): + assert llm_core._detect_provider("http://localhost:11434") == "ollama" + assert llm_core._detect_provider("http://127.0.0.1:11434/") == "ollama" + assert llm_core._detect_provider("http://localhost:11434/v1") == "openai" + + def test_llm_call_posts_native_ollama_payload(monkeypatch): seen = {} @@ -43,6 +49,82 @@ def test_llm_call_posts_native_ollama_payload(monkeypatch): assert seen["json"]["options"] == {"temperature": 0.2, "num_predict": 7} +def test_llm_call_posts_bare_local_ollama_to_native_api(monkeypatch): + seen = {} + + def fake_post(url, headers=None, json=None, timeout=None): + seen["url"] = url + seen["json"] = json + request = httpx.Request("POST", url) + return httpx.Response( + 200, + request=request, + json={"message": {"content": "OK"}, "done": True}, + ) + + monkeypatch.setattr(llm_core.httpx, "post", fake_post) + + result = llm_core.llm_call( + "http://localhost:11434", + "llama3.2", + [{"role": "user", "content": "Say OK"}], + ) + + assert result == "OK" + assert seen["url"] == "http://localhost:11434/api/chat" + assert seen["json"]["stream"] is False + + +def test_openai_compatible_chat_url_shapes(monkeypatch): + seen = [] + + def fake_post(url, headers=None, json=None, timeout=None): + seen.append(url) + request = httpx.Request("POST", url) + return httpx.Response( + 200, + request=request, + json={"choices": [{"message": {"content": "OK"}}]}, + ) + + monkeypatch.setattr(llm_core.httpx, "post", fake_post) + llm_core._response_cache.clear() + + cases = [ + ("http://localhost:11434/v1", "http://localhost:11434/v1/chat/completions"), + ( + "http://localhost:11434/v1/chat/completions", + "http://localhost:11434/v1/chat/completions", + ), + ] + for i, (base_url, expected_url) in enumerate(cases): + result = llm_core.llm_call( + base_url, + f"openai-compatible-{i}", + [{"role": "user", "content": f"Say OK {i}"}], + ) + assert result == "OK" + assert seen[-1] == expected_url + + +def test_list_model_ids_from_openai_compatible_v1(monkeypatch): + seen = {} + + def fake_get(url, headers=None, timeout=None): + seen["url"] = url + request = httpx.Request("GET", url) + return httpx.Response( + 200, + request=request, + json={"data": [{"id": "qwen2.5-coder:7b"}]}, + ) + + monkeypatch.setattr(llm_core.httpx, "get", fake_get) + + assert llm_core.list_model_ids("http://localhost:11434/v1") == ["qwen2.5-coder:7b"] + assert seen["url"] == "http://localhost:11434/v1/models" + + # --------------------------------------------------------------------------- # Tool-call argument serialization for native Ollama #