Fix native tool-calling follow-up round on Gemini and Ollama (#867)

The agent's multi-round (tool-result) follow-up request was rejected with HTTP 400 on two providers, so tools ran but the agent never produced an answer: - OpenAI-compatible streaming (Gemini 3) dropped the per-call thought_signature and collided parallel tool calls, which arrive with index=None: they all landed in slot 0, overwriting the first call's name and corrupting its arguments by concatenation, so the follow-up request 400'd. Capture and replay each call's extra_content (thought_signature), and give every parallel call its own accumulator slot (allocated above the max key, so sparse or mixed indices can't collide). - Native Ollama /api/chat expects object tool-call arguments, but Odysseus carries them as a JSON string, which Ollama rejected ("Value looks like object, but can't find closing '}' symbol"). Convert them to objects in the Ollama payload builder. Both compose with the no-prose null-content sanitize fix from #862. Tested: python -m pytest tests/test_llm_core_streaming.py tests/test_llm_core_ollama.py tests/test_agent_loop.py (53 pass), and python -m py_compile src/llm_core.py src/agent_loop.py.
2026-06-15 17:25:26 -04:00 · 2026-06-02 04:39:40 +02:00
parent 54ac4a74fb
commit a327df6936
5 changed files with 334 additions and 3 deletions
@@ -0,0 +1,151 @@
+"""Streaming tool-call accumulation tests for the OpenAI-compatible path.
+
+Regression for Gemini's OpenAI-compat layer, which (a) attaches an opaque
+thought_signature in `extra_content` on the function-call delta and (b) omits
+`index` on PARALLEL tool calls — every parallel delta arrives as index=None.
+The accumulator must give each parallel call its own slot (otherwise they
+collide into slot 0, overwriting the first call's name and concatenating —
+corrupting — its arguments) and must preserve extra_content per call.
+"""
+import json
+import asyncio
+
+from src import llm_core
+
+
+class _FakeResp:
+    def __init__(self, lines):
+        self._lines = lines
+        self.status_code = 200
+
+    async def aiter_lines(self):
+        for ln in self._lines:
+            yield ln
+
+    async def aread(self):
+        return b""
+
+
+class _FakeStreamCtx:
+    def __init__(self, lines):
+        self._lines = lines
+
+    async def __aenter__(self):
+        return _FakeResp(self._lines)
+
+    async def __aexit__(self, *a):
+        return False
+
+
+class _FakeClient:
+    def __init__(self, lines):
+        self._lines = lines
+
+    def stream(self, method, url, **kw):
+        return _FakeStreamCtx(self._lines)
+
+
+def _drive(monkeypatch, lines, model="gemini-3.1-pro-preview-customtools"):
+    """Run stream_llm against a canned SSE line list; return parsed events."""
+    monkeypatch.setattr(llm_core, "_get_http_client", lambda: _FakeClient(lines))
+    monkeypatch.setattr(llm_core, "_is_host_dead", lambda u: False)
+    monkeypatch.setattr(llm_core, "note_model_activity", lambda *a, **k: None)
+    monkeypatch.setattr(llm_core, "_clear_host_dead", lambda *a, **k: None)
+
+    async def run():
+        events = []
+        async for chunk in llm_core.stream_llm(
+            "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
+            model,
+            [{"role": "user", "content": "hi"}],
+            headers={"Authorization": "Bearer k"},
+            tools=[{"type": "function", "function": {"name": "x", "parameters": {}}}],
+        ):
+            for ln in chunk.split("\n"):
+                ln = ln.strip()
+                if ln.startswith("data: ") and ln[6:] != "[DONE]":
+                    try:
+                        events.append(json.loads(ln[6:]))
+                    except ValueError:
+                        pass
+        return events
+
+    return asyncio.run(run())
+
+
+def _sse(delta):
+    return "data: " + json.dumps({"choices": [{"delta": delta}]})
+
+
+def test_parallel_calls_with_null_index_do_not_collide(monkeypatch):
+    # Two parallel calls, each complete in one delta, both with index=None
+    # (exactly what Gemini's OpenAI-compat layer emits). Only the first carries
+    # a thought_signature.
+    lines = [
+        _sse({"tool_calls": [{
+            "index": None, "id": "call_a", "type": "function",
+            "function": {"name": "get_memory", "arguments": "{}"},
+            "extra_content": {"google": {"thought_signature": "SIG0"}},
+        }]}),
+        _sse({"tool_calls": [{
+            "index": None, "id": "call_b", "type": "function",
+            "function": {"name": "bash", "arguments": '{"command":"echo hi"}'},
+        }]}),
+        "data: [DONE]",
+    ]
+    events = _drive(monkeypatch, lines)
+    calls = next(e["calls"] for e in events if e.get("type") == "tool_calls")
+    assert len(calls) == 2, f"parallel calls collided: {calls}"
+    by_name = {c["name"]: c for c in calls}
+    assert set(by_name) == {"get_memory", "bash"}
+    # arguments are NOT corrupted by concatenation
+    assert by_name["get_memory"]["arguments"] == "{}"
+    assert by_name["bash"]["arguments"] == '{"command":"echo hi"}'
+    # signature preserved on the first call only, exactly as received
+    assert by_name["get_memory"]["extra_content"] == {"google": {"thought_signature": "SIG0"}}
+    assert "extra_content" not in by_name["bash"]
+
+
+def test_single_call_chunked_arguments_still_accumulate(monkeypatch):
+    # Conformant OpenAI style: index present, arguments streamed in pieces.
+    lines = [
+        _sse({"tool_calls": [{"index": 0, "id": "c", "type": "function",
+                              "function": {"name": "search", "arguments": '{"q":"'}}]}),
+        _sse({"tool_calls": [{"index": 0, "function": {"arguments": 'cats"}'}}]}),
+        "data: [DONE]",
+    ]
+    events = _drive(monkeypatch, lines, model="gpt-4o-test")
+    calls = next(e["calls"] for e in events if e.get("type") == "tool_calls")
+    assert len(calls) == 1
+    assert calls[0]["name"] == "search"
+    assert calls[0]["arguments"] == '{"q":"cats"}'
+
+
+def test_null_index_chunked_arguments_attach_to_last_call(monkeypatch):
+    # index=None where the name arrives first, then an arg-only continuation:
+    # the continuation must attach to the just-started call, not open a new one.
+    lines = [
+        _sse({"tool_calls": [{"index": None, "id": "c", "type": "function",
+                              "function": {"name": "search", "arguments": '{"q":'}}]}),
+        _sse({"tool_calls": [{"index": None, "function": {"arguments": '"dogs"}'}}]}),
+        "data: [DONE]",
+    ]
+    events = _drive(monkeypatch, lines)
+    calls = next(e["calls"] for e in events if e.get("type") == "tool_calls")
+    assert len(calls) == 1, f"continuation opened a spurious call: {calls}"
+    assert calls[0]["arguments"] == '{"q":"dogs"}'
+
+
+def test_sparse_integer_indices_then_null_do_not_collide(monkeypatch):
+    # Hardening: a provider that uses sparse integer indices (0 and 2) and then
+    # a null-index call must allocate ABOVE the max key, not at len()==2 (which
+    # would overwrite slot 2). Three distinct calls must survive.
+    lines = [
+        _sse({"tool_calls": [{"index": 0, "id": "a", "function": {"name": "f0", "arguments": "{}"}}]}),
+        _sse({"tool_calls": [{"index": 2, "id": "b", "function": {"name": "f2", "arguments": "{}"}}]}),
+        _sse({"tool_calls": [{"index": None, "id": "c", "function": {"name": "fn", "arguments": "{}"}}]}),
+        "data: [DONE]",
+    ]
+    events = _drive(monkeypatch, lines)
+    calls = next(e["calls"] for e in events if e.get("type") == "tool_calls")
+    assert sorted(c["name"] for c in calls) == ["f0", "f2", "fn"], f"collision: {calls}"