mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
Fix native tool-calling follow-up round on Gemini and Ollama (#867)
The agent's multi-round (tool-result) follow-up request was rejected with
HTTP 400 on two providers, so tools ran but the agent never produced an answer:
- OpenAI-compatible streaming (Gemini 3) dropped the per-call thought_signature
and collided parallel tool calls, which arrive with index=None: they all
landed in slot 0, overwriting the first call's name and corrupting its
arguments by concatenation, so the follow-up request 400'd. Capture and replay
each call's extra_content (thought_signature), and give every parallel call
its own accumulator slot (allocated above the max key, so sparse or mixed
indices can't collide).
- Native Ollama /api/chat expects object tool-call arguments, but Odysseus
carries them as a JSON string, which Ollama rejected ("Value looks like
object, but can't find closing '}' symbol"). Convert them to objects in the
Ollama payload builder.
Both compose with the no-prose null-content sanitize fix from #862.
Tested: python -m pytest tests/test_llm_core_streaming.py
tests/test_llm_core_ollama.py tests/test_agent_loop.py (53 pass), and
python -m py_compile src/llm_core.py src/agent_loop.py.
This commit is contained in:
@@ -0,0 +1,151 @@
|
||||
"""Streaming tool-call accumulation tests for the OpenAI-compatible path.
|
||||
|
||||
Regression for Gemini's OpenAI-compat layer, which (a) attaches an opaque
|
||||
thought_signature in `extra_content` on the function-call delta and (b) omits
|
||||
`index` on PARALLEL tool calls — every parallel delta arrives as index=None.
|
||||
The accumulator must give each parallel call its own slot (otherwise they
|
||||
collide into slot 0, overwriting the first call's name and concatenating —
|
||||
corrupting — its arguments) and must preserve extra_content per call.
|
||||
"""
|
||||
import json
|
||||
import asyncio
|
||||
|
||||
from src import llm_core
|
||||
|
||||
|
||||
class _FakeResp:
|
||||
def __init__(self, lines):
|
||||
self._lines = lines
|
||||
self.status_code = 200
|
||||
|
||||
async def aiter_lines(self):
|
||||
for ln in self._lines:
|
||||
yield ln
|
||||
|
||||
async def aread(self):
|
||||
return b""
|
||||
|
||||
|
||||
class _FakeStreamCtx:
|
||||
def __init__(self, lines):
|
||||
self._lines = lines
|
||||
|
||||
async def __aenter__(self):
|
||||
return _FakeResp(self._lines)
|
||||
|
||||
async def __aexit__(self, *a):
|
||||
return False
|
||||
|
||||
|
||||
class _FakeClient:
|
||||
def __init__(self, lines):
|
||||
self._lines = lines
|
||||
|
||||
def stream(self, method, url, **kw):
|
||||
return _FakeStreamCtx(self._lines)
|
||||
|
||||
|
||||
def _drive(monkeypatch, lines, model="gemini-3.1-pro-preview-customtools"):
|
||||
"""Run stream_llm against a canned SSE line list; return parsed events."""
|
||||
monkeypatch.setattr(llm_core, "_get_http_client", lambda: _FakeClient(lines))
|
||||
monkeypatch.setattr(llm_core, "_is_host_dead", lambda u: False)
|
||||
monkeypatch.setattr(llm_core, "note_model_activity", lambda *a, **k: None)
|
||||
monkeypatch.setattr(llm_core, "_clear_host_dead", lambda *a, **k: None)
|
||||
|
||||
async def run():
|
||||
events = []
|
||||
async for chunk in llm_core.stream_llm(
|
||||
"https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
|
||||
model,
|
||||
[{"role": "user", "content": "hi"}],
|
||||
headers={"Authorization": "Bearer k"},
|
||||
tools=[{"type": "function", "function": {"name": "x", "parameters": {}}}],
|
||||
):
|
||||
for ln in chunk.split("\n"):
|
||||
ln = ln.strip()
|
||||
if ln.startswith("data: ") and ln[6:] != "[DONE]":
|
||||
try:
|
||||
events.append(json.loads(ln[6:]))
|
||||
except ValueError:
|
||||
pass
|
||||
return events
|
||||
|
||||
return asyncio.run(run())
|
||||
|
||||
|
||||
def _sse(delta):
|
||||
return "data: " + json.dumps({"choices": [{"delta": delta}]})
|
||||
|
||||
|
||||
def test_parallel_calls_with_null_index_do_not_collide(monkeypatch):
|
||||
# Two parallel calls, each complete in one delta, both with index=None
|
||||
# (exactly what Gemini's OpenAI-compat layer emits). Only the first carries
|
||||
# a thought_signature.
|
||||
lines = [
|
||||
_sse({"tool_calls": [{
|
||||
"index": None, "id": "call_a", "type": "function",
|
||||
"function": {"name": "get_memory", "arguments": "{}"},
|
||||
"extra_content": {"google": {"thought_signature": "SIG0"}},
|
||||
}]}),
|
||||
_sse({"tool_calls": [{
|
||||
"index": None, "id": "call_b", "type": "function",
|
||||
"function": {"name": "bash", "arguments": '{"command":"echo hi"}'},
|
||||
}]}),
|
||||
"data: [DONE]",
|
||||
]
|
||||
events = _drive(monkeypatch, lines)
|
||||
calls = next(e["calls"] for e in events if e.get("type") == "tool_calls")
|
||||
assert len(calls) == 2, f"parallel calls collided: {calls}"
|
||||
by_name = {c["name"]: c for c in calls}
|
||||
assert set(by_name) == {"get_memory", "bash"}
|
||||
# arguments are NOT corrupted by concatenation
|
||||
assert by_name["get_memory"]["arguments"] == "{}"
|
||||
assert by_name["bash"]["arguments"] == '{"command":"echo hi"}'
|
||||
# signature preserved on the first call only, exactly as received
|
||||
assert by_name["get_memory"]["extra_content"] == {"google": {"thought_signature": "SIG0"}}
|
||||
assert "extra_content" not in by_name["bash"]
|
||||
|
||||
|
||||
def test_single_call_chunked_arguments_still_accumulate(monkeypatch):
|
||||
# Conformant OpenAI style: index present, arguments streamed in pieces.
|
||||
lines = [
|
||||
_sse({"tool_calls": [{"index": 0, "id": "c", "type": "function",
|
||||
"function": {"name": "search", "arguments": '{"q":"'}}]}),
|
||||
_sse({"tool_calls": [{"index": 0, "function": {"arguments": 'cats"}'}}]}),
|
||||
"data: [DONE]",
|
||||
]
|
||||
events = _drive(monkeypatch, lines, model="gpt-4o-test")
|
||||
calls = next(e["calls"] for e in events if e.get("type") == "tool_calls")
|
||||
assert len(calls) == 1
|
||||
assert calls[0]["name"] == "search"
|
||||
assert calls[0]["arguments"] == '{"q":"cats"}'
|
||||
|
||||
|
||||
def test_null_index_chunked_arguments_attach_to_last_call(monkeypatch):
|
||||
# index=None where the name arrives first, then an arg-only continuation:
|
||||
# the continuation must attach to the just-started call, not open a new one.
|
||||
lines = [
|
||||
_sse({"tool_calls": [{"index": None, "id": "c", "type": "function",
|
||||
"function": {"name": "search", "arguments": '{"q":'}}]}),
|
||||
_sse({"tool_calls": [{"index": None, "function": {"arguments": '"dogs"}'}}]}),
|
||||
"data: [DONE]",
|
||||
]
|
||||
events = _drive(monkeypatch, lines)
|
||||
calls = next(e["calls"] for e in events if e.get("type") == "tool_calls")
|
||||
assert len(calls) == 1, f"continuation opened a spurious call: {calls}"
|
||||
assert calls[0]["arguments"] == '{"q":"dogs"}'
|
||||
|
||||
|
||||
def test_sparse_integer_indices_then_null_do_not_collide(monkeypatch):
|
||||
# Hardening: a provider that uses sparse integer indices (0 and 2) and then
|
||||
# a null-index call must allocate ABOVE the max key, not at len()==2 (which
|
||||
# would overwrite slot 2). Three distinct calls must survive.
|
||||
lines = [
|
||||
_sse({"tool_calls": [{"index": 0, "id": "a", "function": {"name": "f0", "arguments": "{}"}}]}),
|
||||
_sse({"tool_calls": [{"index": 2, "id": "b", "function": {"name": "f2", "arguments": "{}"}}]}),
|
||||
_sse({"tool_calls": [{"index": None, "id": "c", "function": {"name": "fn", "arguments": "{}"}}]}),
|
||||
"data: [DONE]",
|
||||
]
|
||||
events = _drive(monkeypatch, lines)
|
||||
calls = next(e["calls"] for e in events if e.get("type") == "tool_calls")
|
||||
assert sorted(c["name"] for c in calls) == ["f0", "f2", "fn"], f"collision: {calls}"
|
||||
Reference in New Issue
Block a user