mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-16 09:45:24 -04:00
Fix native tool-calling follow-up round on Gemini and Ollama (#867)
The agent's multi-round (tool-result) follow-up request was rejected with
HTTP 400 on two providers, so tools ran but the agent never produced an answer:
- OpenAI-compatible streaming (Gemini 3) dropped the per-call thought_signature
and collided parallel tool calls, which arrive with index=None: they all
landed in slot 0, overwriting the first call's name and corrupting its
arguments by concatenation, so the follow-up request 400'd. Capture and replay
each call's extra_content (thought_signature), and give every parallel call
its own accumulator slot (allocated above the max key, so sparse or mixed
indices can't collide).
- Native Ollama /api/chat expects object tool-call arguments, but Odysseus
carries them as a JSON string, which Ollama rejected ("Value looks like
object, but can't find closing '}' symbol"). Convert them to objects in the
Ollama payload builder.
Both compose with the no-prose null-content sanitize fix from #862.
Tested: python -m pytest tests/test_llm_core_streaming.py
tests/test_llm_core_ollama.py tests/test_agent_loop.py (53 pass), and
python -m py_compile src/llm_core.py src/agent_loop.py.
This commit is contained in:
@@ -301,3 +301,39 @@ class TestAppendToolResultsNativeContent:
|
||||
assert messages[0]["content"] == "thinking..."
|
||||
assert messages[1]["role"] == "user"
|
||||
assert "tool output" in messages[1]["content"]
|
||||
|
||||
|
||||
class TestAppendToolResultsThoughtSignature:
|
||||
"""Gemini 3 returns an opaque thought_signature (in extra_content) with each
|
||||
function call and rejects the follow-up turn with HTTP 400 unless it is
|
||||
echoed back on the assistant tool_call. _append_tool_results must replay it
|
||||
when present, and omit the field entirely otherwise (other providers never
|
||||
send it)."""
|
||||
|
||||
def test_extra_content_is_replayed_when_present(self):
|
||||
native = [{
|
||||
"id": "call_g",
|
||||
"name": "app_api",
|
||||
"arguments": '{"action": "get_memory"}',
|
||||
"extra_content": {"google": {"thought_signature": "EuIDCt8DAQ=="}},
|
||||
}]
|
||||
messages = []
|
||||
_append_tool_results(
|
||||
messages, "", native, [{}], ["mem"],
|
||||
used_native=True, round_num=1,
|
||||
)
|
||||
tc = messages[0]["tool_calls"][0]
|
||||
assert tc["extra_content"] == {"google": {"thought_signature": "EuIDCt8DAQ=="}}
|
||||
# function payload is still well-formed alongside it
|
||||
assert tc["function"]["name"] == "app_api"
|
||||
assert tc["id"] == "call_g"
|
||||
|
||||
def test_no_extra_content_key_when_absent(self):
|
||||
native = [{"id": "call_o", "name": "app_api", "arguments": "{}"}]
|
||||
messages = []
|
||||
_append_tool_results(
|
||||
messages, "", native, [{}], ["r"],
|
||||
used_native=True, round_num=1,
|
||||
)
|
||||
# No empty/None extra_content leaks onto non-Gemini tool calls.
|
||||
assert "extra_content" not in messages[0]["tool_calls"][0]
|
||||
|
||||
@@ -41,3 +41,75 @@ def test_llm_call_posts_native_ollama_payload(monkeypatch):
|
||||
assert seen["headers"]["Authorization"] == "Bearer ollama-key"
|
||||
assert seen["json"]["stream"] is False
|
||||
assert seen["json"]["options"] == {"temperature": 0.2, "num_predict": 7}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool-call argument serialization for native Ollama
|
||||
#
|
||||
# Odysseus carries assistant tool calls in the OpenAI shape, where
|
||||
# `function.arguments` is a JSON *string*. Native Ollama /api/chat expects a
|
||||
# JSON *object* and rejects the string form with HTTP 400 ("Value looks like
|
||||
# object, but can't find closing '}' symbol"), aborting every follow-up
|
||||
# (tool-result) round. _build_ollama_payload must parse it back to an object.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _assistant_tool_call_msgs():
|
||||
"""A canonical OpenAI-style assistant tool call + tool result, as produced by
|
||||
agent_loop._append_tool_results (arguments are a JSON string)."""
|
||||
return [
|
||||
{"role": "user", "content": "what do you know about me?"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_0",
|
||||
"type": "function",
|
||||
"function": {"name": "app_api", "arguments": '{"action": "get_memory"}'},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_0", "content": "Memory: user is James."},
|
||||
]
|
||||
|
||||
|
||||
def test_ollama_payload_parses_string_arguments_to_object():
|
||||
payload = llm_core._build_ollama_payload(
|
||||
"gpt-oss:120b", _assistant_tool_call_msgs(), temperature=0.0, max_tokens=0,
|
||||
)
|
||||
asst = payload["messages"][1]
|
||||
args = asst["tool_calls"][0]["function"]["arguments"]
|
||||
# The whole point: arguments must be a dict, not the JSON string.
|
||||
assert args == {"action": "get_memory"}
|
||||
assert not isinstance(args, str)
|
||||
assert asst["tool_calls"][0]["function"]["name"] == "app_api"
|
||||
assert asst["tool_calls"][0]["id"] == "call_0"
|
||||
|
||||
|
||||
def test_ollama_payload_drops_gemini_thought_signature():
|
||||
"""A cross-provider fallback can hand Ollama a tool call that still carries
|
||||
Gemini's opaque extra_content; it is meaningless to Ollama and must not leak."""
|
||||
msgs = _assistant_tool_call_msgs()
|
||||
msgs[1]["tool_calls"][0]["extra_content"] = {"google": {"thought_signature": "AAAA"}}
|
||||
payload = llm_core._build_ollama_payload(
|
||||
"gpt-oss:120b", msgs, temperature=0.0, max_tokens=0,
|
||||
)
|
||||
tc = payload["messages"][1]["tool_calls"][0]
|
||||
assert "extra_content" not in tc
|
||||
assert tc["function"]["arguments"] == {"action": "get_memory"}
|
||||
|
||||
|
||||
def test_ollama_payload_leaves_plain_messages_untouched():
|
||||
msgs = [{"role": "user", "content": "hello"}]
|
||||
payload = llm_core._build_ollama_payload("m", msgs, temperature=0.0, max_tokens=0)
|
||||
assert payload["messages"][0] == {"role": "user", "content": "hello"}
|
||||
|
||||
|
||||
def test_ollama_payload_tolerates_malformed_arguments():
|
||||
msgs = [{
|
||||
"role": "assistant",
|
||||
"tool_calls": [{"function": {"name": "x", "arguments": "{not json"}}],
|
||||
}]
|
||||
payload = llm_core._build_ollama_payload("m", msgs, temperature=0.0, max_tokens=0)
|
||||
# Falls back to an empty object rather than raising.
|
||||
assert payload["messages"][0]["tool_calls"][0]["function"]["arguments"] == {}
|
||||
|
||||
@@ -0,0 +1,151 @@
|
||||
"""Streaming tool-call accumulation tests for the OpenAI-compatible path.
|
||||
|
||||
Regression for Gemini's OpenAI-compat layer, which (a) attaches an opaque
|
||||
thought_signature in `extra_content` on the function-call delta and (b) omits
|
||||
`index` on PARALLEL tool calls — every parallel delta arrives as index=None.
|
||||
The accumulator must give each parallel call its own slot (otherwise they
|
||||
collide into slot 0, overwriting the first call's name and concatenating —
|
||||
corrupting — its arguments) and must preserve extra_content per call.
|
||||
"""
|
||||
import json
|
||||
import asyncio
|
||||
|
||||
from src import llm_core
|
||||
|
||||
|
||||
class _FakeResp:
|
||||
def __init__(self, lines):
|
||||
self._lines = lines
|
||||
self.status_code = 200
|
||||
|
||||
async def aiter_lines(self):
|
||||
for ln in self._lines:
|
||||
yield ln
|
||||
|
||||
async def aread(self):
|
||||
return b""
|
||||
|
||||
|
||||
class _FakeStreamCtx:
|
||||
def __init__(self, lines):
|
||||
self._lines = lines
|
||||
|
||||
async def __aenter__(self):
|
||||
return _FakeResp(self._lines)
|
||||
|
||||
async def __aexit__(self, *a):
|
||||
return False
|
||||
|
||||
|
||||
class _FakeClient:
|
||||
def __init__(self, lines):
|
||||
self._lines = lines
|
||||
|
||||
def stream(self, method, url, **kw):
|
||||
return _FakeStreamCtx(self._lines)
|
||||
|
||||
|
||||
def _drive(monkeypatch, lines, model="gemini-3.1-pro-preview-customtools"):
|
||||
"""Run stream_llm against a canned SSE line list; return parsed events."""
|
||||
monkeypatch.setattr(llm_core, "_get_http_client", lambda: _FakeClient(lines))
|
||||
monkeypatch.setattr(llm_core, "_is_host_dead", lambda u: False)
|
||||
monkeypatch.setattr(llm_core, "note_model_activity", lambda *a, **k: None)
|
||||
monkeypatch.setattr(llm_core, "_clear_host_dead", lambda *a, **k: None)
|
||||
|
||||
async def run():
|
||||
events = []
|
||||
async for chunk in llm_core.stream_llm(
|
||||
"https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
|
||||
model,
|
||||
[{"role": "user", "content": "hi"}],
|
||||
headers={"Authorization": "Bearer k"},
|
||||
tools=[{"type": "function", "function": {"name": "x", "parameters": {}}}],
|
||||
):
|
||||
for ln in chunk.split("\n"):
|
||||
ln = ln.strip()
|
||||
if ln.startswith("data: ") and ln[6:] != "[DONE]":
|
||||
try:
|
||||
events.append(json.loads(ln[6:]))
|
||||
except ValueError:
|
||||
pass
|
||||
return events
|
||||
|
||||
return asyncio.run(run())
|
||||
|
||||
|
||||
def _sse(delta):
|
||||
return "data: " + json.dumps({"choices": [{"delta": delta}]})
|
||||
|
||||
|
||||
def test_parallel_calls_with_null_index_do_not_collide(monkeypatch):
|
||||
# Two parallel calls, each complete in one delta, both with index=None
|
||||
# (exactly what Gemini's OpenAI-compat layer emits). Only the first carries
|
||||
# a thought_signature.
|
||||
lines = [
|
||||
_sse({"tool_calls": [{
|
||||
"index": None, "id": "call_a", "type": "function",
|
||||
"function": {"name": "get_memory", "arguments": "{}"},
|
||||
"extra_content": {"google": {"thought_signature": "SIG0"}},
|
||||
}]}),
|
||||
_sse({"tool_calls": [{
|
||||
"index": None, "id": "call_b", "type": "function",
|
||||
"function": {"name": "bash", "arguments": '{"command":"echo hi"}'},
|
||||
}]}),
|
||||
"data: [DONE]",
|
||||
]
|
||||
events = _drive(monkeypatch, lines)
|
||||
calls = next(e["calls"] for e in events if e.get("type") == "tool_calls")
|
||||
assert len(calls) == 2, f"parallel calls collided: {calls}"
|
||||
by_name = {c["name"]: c for c in calls}
|
||||
assert set(by_name) == {"get_memory", "bash"}
|
||||
# arguments are NOT corrupted by concatenation
|
||||
assert by_name["get_memory"]["arguments"] == "{}"
|
||||
assert by_name["bash"]["arguments"] == '{"command":"echo hi"}'
|
||||
# signature preserved on the first call only, exactly as received
|
||||
assert by_name["get_memory"]["extra_content"] == {"google": {"thought_signature": "SIG0"}}
|
||||
assert "extra_content" not in by_name["bash"]
|
||||
|
||||
|
||||
def test_single_call_chunked_arguments_still_accumulate(monkeypatch):
|
||||
# Conformant OpenAI style: index present, arguments streamed in pieces.
|
||||
lines = [
|
||||
_sse({"tool_calls": [{"index": 0, "id": "c", "type": "function",
|
||||
"function": {"name": "search", "arguments": '{"q":"'}}]}),
|
||||
_sse({"tool_calls": [{"index": 0, "function": {"arguments": 'cats"}'}}]}),
|
||||
"data: [DONE]",
|
||||
]
|
||||
events = _drive(monkeypatch, lines, model="gpt-4o-test")
|
||||
calls = next(e["calls"] for e in events if e.get("type") == "tool_calls")
|
||||
assert len(calls) == 1
|
||||
assert calls[0]["name"] == "search"
|
||||
assert calls[0]["arguments"] == '{"q":"cats"}'
|
||||
|
||||
|
||||
def test_null_index_chunked_arguments_attach_to_last_call(monkeypatch):
|
||||
# index=None where the name arrives first, then an arg-only continuation:
|
||||
# the continuation must attach to the just-started call, not open a new one.
|
||||
lines = [
|
||||
_sse({"tool_calls": [{"index": None, "id": "c", "type": "function",
|
||||
"function": {"name": "search", "arguments": '{"q":'}}]}),
|
||||
_sse({"tool_calls": [{"index": None, "function": {"arguments": '"dogs"}'}}]}),
|
||||
"data: [DONE]",
|
||||
]
|
||||
events = _drive(monkeypatch, lines)
|
||||
calls = next(e["calls"] for e in events if e.get("type") == "tool_calls")
|
||||
assert len(calls) == 1, f"continuation opened a spurious call: {calls}"
|
||||
assert calls[0]["arguments"] == '{"q":"dogs"}'
|
||||
|
||||
|
||||
def test_sparse_integer_indices_then_null_do_not_collide(monkeypatch):
|
||||
# Hardening: a provider that uses sparse integer indices (0 and 2) and then
|
||||
# a null-index call must allocate ABOVE the max key, not at len()==2 (which
|
||||
# would overwrite slot 2). Three distinct calls must survive.
|
||||
lines = [
|
||||
_sse({"tool_calls": [{"index": 0, "id": "a", "function": {"name": "f0", "arguments": "{}"}}]}),
|
||||
_sse({"tool_calls": [{"index": 2, "id": "b", "function": {"name": "f2", "arguments": "{}"}}]}),
|
||||
_sse({"tool_calls": [{"index": None, "id": "c", "function": {"name": "fn", "arguments": "{}"}}]}),
|
||||
"data: [DONE]",
|
||||
]
|
||||
events = _drive(monkeypatch, lines)
|
||||
calls = next(e["calls"] for e in events if e.get("type") == "tool_calls")
|
||||
assert sorted(c["name"] for c in calls) == ["f0", "f2", "fn"], f"collision: {calls}"
|
||||
Reference in New Issue
Block a user