mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
8ae2b5f58c
* fix(llm): suppress thinking for qwen3/gemma4 on Ollama /v1 compat endpoint When using qwen3, QwQ, gemma4, or other thinking models via Ollama's OpenAI-compatible /v1 endpoint, the model routes all output into its <think>...</think> reasoning block. Since Odysseus strips thinking content from round_response and only accumulates native tool_calls, this produces a round with 0 chars, 0 native calls, 0 tool blocks — the agent appears to silently do nothing. Root cause: Odysseus classifies the /v1 endpoint as provider="openai" (not "ollama"), so the payload is built as a standard OpenAI payload without any Ollama-specific options. Ollama's /v1 endpoint accepts "think": false as a top-level parameter to suppress extended thinking, but this was never sent. Fix: - Add _is_ollama_openai_compat_url() to detect local Ollama /v1 URLs - Inject "think": false in both stream_llm and llm_call_async for thinking models (qwen3, QwQ, gemma4, DeepSeek-R1, etc.) on this endpoint Verified with qwen3:14b on Ollama 0.24: with think=False the model correctly emits native tool_calls in a single streaming chunk and the agent executes bash/file/web tools as expected. * fix(llm): extend _is_ollama_openai_compat_url to match localhost on any port Per reviewer feedback on PR #3228: 1. Generalize host detection to mirror _is_ollama_native_url: match any localhost/127.0.0.1/0.0.0.0/::1 host (not just port 11434) so that custom OLLAMA_HOST ports and container remaps are also covered. 2. Add tests/test_llm_core_ollama_thinking.py covering: - _is_ollama_openai_compat_url for all positive/negative URL cases including IPv6, non-default port, native /api path, and real OpenAI - Payload injection: think:false set for Ollama /v1 thinking model, not set for non-thinking model, not set for real OpenAI endpoint, and set for localhost on a non-default port (the new case)
166 lines
6.5 KiB
Python
166 lines
6.5 KiB
Python
"""Tests for Ollama /v1 thinking-suppression helpers.
|
|
|
|
Covers:
|
|
- _is_ollama_openai_compat_url: URL classification (local host + /v1 path)
|
|
- think: false is injected into the payload for Ollama /v1 thinking models
|
|
- think: false is NOT injected for non-thinking models or non-Ollama /v1 endpoints
|
|
"""
|
|
import asyncio
|
|
import json
|
|
|
|
from src import llm_core
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fake HTTP client — captures the outgoing payload without network I/O
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class _FakeResp:
|
|
status_code = 200
|
|
|
|
async def aiter_lines(self):
|
|
# Yield a minimal done event so stream_llm exits cleanly
|
|
yield json.dumps({"choices": [{"delta": {"content": "ok"}, "finish_reason": "stop"}]})
|
|
yield "data: [DONE]"
|
|
|
|
async def aread(self):
|
|
return b""
|
|
|
|
|
|
class _FakeStreamCtx:
|
|
def __init__(self, captured):
|
|
self._captured = captured
|
|
|
|
async def __aenter__(self):
|
|
return _FakeResp()
|
|
|
|
async def __aexit__(self, *a):
|
|
return False
|
|
|
|
|
|
class _FakeClient:
|
|
"""Minimal stand-in for httpx.AsyncClient that captures request payload."""
|
|
|
|
def __init__(self):
|
|
self.captured_payload = {}
|
|
|
|
def stream(self, method, url, **kw):
|
|
self.captured_payload = kw.get("json") or {}
|
|
return _FakeStreamCtx(self.captured_payload)
|
|
|
|
|
|
def _capture_payload(monkeypatch, url, model):
|
|
"""Run stream_llm, intercept the HTTP payload, and return it."""
|
|
client = _FakeClient()
|
|
monkeypatch.setattr(llm_core, "_get_http_client", lambda: client)
|
|
monkeypatch.setattr(llm_core, "_is_host_dead", lambda u: False)
|
|
monkeypatch.setattr(llm_core, "note_model_activity", lambda *a, **k: None)
|
|
monkeypatch.setattr(llm_core, "_clear_host_dead", lambda *a, **k: None)
|
|
monkeypatch.setattr(llm_core, "get_context_length", lambda u, m: 32768)
|
|
|
|
async def run():
|
|
return [c async for c in llm_core.stream_llm(
|
|
url, model, [{"role": "user", "content": "hi"}],
|
|
)]
|
|
|
|
asyncio.run(run())
|
|
return client.captured_payload
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _is_ollama_openai_compat_url — pure function, no I/O
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestIsOllamaOpenAICompatUrl:
|
|
"""Unit tests for the URL classifier that gates think-suppression."""
|
|
|
|
# Positive cases — should be True
|
|
def test_default_port_v1_root(self):
|
|
assert llm_core._is_ollama_openai_compat_url("http://127.0.0.1:11434/v1")
|
|
|
|
def test_default_port_chat_completions(self):
|
|
assert llm_core._is_ollama_openai_compat_url("http://127.0.0.1:11434/v1/chat/completions")
|
|
|
|
def test_localhost_default_port(self):
|
|
assert llm_core._is_ollama_openai_compat_url("http://localhost:11434/v1")
|
|
|
|
def test_localhost_default_port_with_path(self):
|
|
assert llm_core._is_ollama_openai_compat_url("http://localhost:11434/v1/chat/completions")
|
|
|
|
def test_loopback_ipv6(self):
|
|
# IPv6 addresses in URLs require square brackets per RFC 3986
|
|
assert llm_core._is_ollama_openai_compat_url("http://[::1]:11434/v1")
|
|
|
|
def test_any_local_non_default_port(self):
|
|
"""Localhost on a non-default port (custom OLLAMA_HOST) must also match."""
|
|
assert llm_core._is_ollama_openai_compat_url("http://127.0.0.1:11435/v1")
|
|
|
|
def test_localhost_non_default_port(self):
|
|
assert llm_core._is_ollama_openai_compat_url("http://localhost:8080/v1/chat/completions")
|
|
|
|
def test_zero_dot_zero_host(self):
|
|
assert llm_core._is_ollama_openai_compat_url("http://0.0.0.0:11434/v1")
|
|
|
|
# Negative cases — should be False
|
|
def test_openai_api_v1(self):
|
|
"""Real OpenAI endpoint must never match, even though path is /v1."""
|
|
assert not llm_core._is_ollama_openai_compat_url("https://api.openai.com/v1")
|
|
|
|
def test_openai_chat_completions(self):
|
|
assert not llm_core._is_ollama_openai_compat_url("https://api.openai.com/v1/chat/completions")
|
|
|
|
def test_ollama_native_api_path(self):
|
|
"""The native /api path is a different surface and must not match /v1."""
|
|
assert not llm_core._is_ollama_openai_compat_url("http://localhost:11434/api")
|
|
|
|
def test_ollama_native_api_chat(self):
|
|
assert not llm_core._is_ollama_openai_compat_url("http://localhost:11434/api/chat")
|
|
|
|
def test_remote_openrouter(self):
|
|
assert not llm_core._is_ollama_openai_compat_url("https://openrouter.ai/api/v1")
|
|
|
|
def test_empty_string(self):
|
|
assert not llm_core._is_ollama_openai_compat_url("")
|
|
|
|
def test_none_like_empty(self):
|
|
assert not llm_core._is_ollama_openai_compat_url(None) # type: ignore[arg-type]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Payload injection — think: false only when both conditions hold
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestThinkSuppression:
|
|
"""Assert think:false is present/absent in the outgoing HTTP payload."""
|
|
|
|
def test_think_false_for_ollama_v1_thinking_model(self, monkeypatch):
|
|
"""think:false must be set for qwen3 on Ollama /v1."""
|
|
payload = _capture_payload(
|
|
monkeypatch, "http://127.0.0.1:11434/v1/chat/completions", "qwen3:14b"
|
|
)
|
|
assert payload.get("think") is False
|
|
|
|
def test_no_think_for_ollama_v1_non_thinking_model(self, monkeypatch):
|
|
"""think must NOT be set for a plain (non-thinking) model on Ollama /v1."""
|
|
payload = _capture_payload(
|
|
monkeypatch, "http://127.0.0.1:11434/v1/chat/completions", "llama3.2:3b"
|
|
)
|
|
assert "think" not in payload
|
|
|
|
def test_no_think_for_openai_endpoint_with_thinking_model_name(self, monkeypatch):
|
|
"""think must NOT leak to a real OpenAI endpoint even if the model name
|
|
matches a thinking pattern — the URL guard is what matters."""
|
|
payload = _capture_payload(
|
|
monkeypatch, "https://api.openai.com/v1/chat/completions", "qwen3:14b"
|
|
)
|
|
assert "think" not in payload
|
|
|
|
def test_think_false_for_non_default_port_thinking_model(self, monkeypatch):
|
|
"""Custom-port localhost Ollama (e.g. OLLAMA_HOST=0.0.0.0:11435) must
|
|
also receive think:false — this is the regression guarded by the
|
|
host-set check added in this fix."""
|
|
payload = _capture_payload(
|
|
monkeypatch, "http://127.0.0.1:11435/v1/chat/completions", "qwen3:14b"
|
|
)
|
|
assert payload.get("think") is False
|