mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-30 00:22:10 -04:00
fix: tool results misthreaded to the wrong tool_call_id when a native call fails to convert (#1917)
* fix: tool results misthreaded when a native call fails to convert * Unpack the third converted_calls return from _resolve_tool_blocks in the fenced-example tests
This commit is contained in:
+10
-3
@@ -1648,6 +1648,7 @@ def _build_base_prompt(
|
|||||||
def _resolve_tool_blocks(round_response: str, native_tool_calls: list, round_num: int, is_api_model: bool = False):
|
def _resolve_tool_blocks(round_response: str, native_tool_calls: list, round_num: int, is_api_model: bool = False):
|
||||||
"""Choose native function calls or fenced code block parsing. Returns (tool_blocks, used_native)."""
|
"""Choose native function calls or fenced code block parsing. Returns (tool_blocks, used_native)."""
|
||||||
used_native = False
|
used_native = False
|
||||||
|
converted_calls = [] # native calls that converted, ALIGNED with tool_blocks
|
||||||
if native_tool_calls:
|
if native_tool_calls:
|
||||||
tool_blocks = []
|
tool_blocks = []
|
||||||
for tc in native_tool_calls:
|
for tc in native_tool_calls:
|
||||||
@@ -1656,6 +1657,7 @@ def _resolve_tool_blocks(round_response: str, native_tool_calls: list, round_num
|
|||||||
block = function_call_to_tool_block(tc_name, tc_args)
|
block = function_call_to_tool_block(tc_name, tc_args)
|
||||||
if block:
|
if block:
|
||||||
tool_blocks.append(block)
|
tool_blocks.append(block)
|
||||||
|
converted_calls.append(tc)
|
||||||
logger.info(f" -> converted: {tc_name} -> {block.tool_type}")
|
logger.info(f" -> converted: {tc_name} -> {block.tool_type}")
|
||||||
else:
|
else:
|
||||||
logger.warning(f" -> FAILED to convert native call: {tc_name} args={tc_args[:200]}")
|
logger.warning(f" -> FAILED to convert native call: {tc_name} args={tc_args[:200]}")
|
||||||
@@ -1685,7 +1687,7 @@ def _resolve_tool_blocks(round_response: str, native_tool_calls: list, round_num
|
|||||||
f"{len(native_tool_calls)} native calls, "
|
f"{len(native_tool_calls)} native calls, "
|
||||||
f"{len(tool_blocks)} tool blocks. Preview: {resp_preview}")
|
f"{len(tool_blocks)} tool blocks. Preview: {resp_preview}")
|
||||||
|
|
||||||
return tool_blocks, used_native
|
return tool_blocks, used_native, converted_calls
|
||||||
|
|
||||||
|
|
||||||
def _append_tool_results(
|
def _append_tool_results(
|
||||||
@@ -2868,7 +2870,7 @@ async def stream_agent_loop(
|
|||||||
_round_first_event_logged,
|
_round_first_event_logged,
|
||||||
_round_first_token_logged,
|
_round_first_token_logged,
|
||||||
)
|
)
|
||||||
tool_blocks, used_native = _resolve_tool_blocks(
|
tool_blocks, used_native, converted_calls = _resolve_tool_blocks(
|
||||||
round_response,
|
round_response,
|
||||||
native_tool_calls,
|
native_tool_calls,
|
||||||
round_num,
|
round_num,
|
||||||
@@ -3500,7 +3502,12 @@ async def stream_agent_loop(
|
|||||||
break
|
break
|
||||||
|
|
||||||
# Feed results back to LLM for next round
|
# Feed results back to LLM for next round
|
||||||
_append_tool_results(messages, round_response, native_tool_calls,
|
# Pass the CONVERTED calls (aligned 1:1 with tool_result_texts), not the
|
||||||
|
# raw native_tool_calls: a call that failed to convert is dropped from
|
||||||
|
# tool_blocks but stayed in native_tool_calls, so indexing results by
|
||||||
|
# native position mis-attached each result to the wrong tool_call_id
|
||||||
|
# (and left the real call answered empty).
|
||||||
|
_append_tool_results(messages, round_response, converted_calls,
|
||||||
tool_results, tool_result_texts, used_native, round_num,
|
tool_results, tool_result_texts, used_native, round_num,
|
||||||
round_reasoning=round_reasoning)
|
round_reasoning=round_reasoning)
|
||||||
|
|
||||||
|
|||||||
@@ -178,14 +178,14 @@ def test_issue_3222_repro_guide_only_response_resolves_no_tool_actions(monkeypat
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
def test_resolve_tool_blocks_skips_textual_fallback_for_native_models_with_no_native_calls():
|
def test_resolve_tool_blocks_skips_textual_fallback_for_native_models_with_no_native_calls():
|
||||||
guide_only = "```bash\nnpm run plan:articles\n```\n```json\n{\"a\": 1}\n```"
|
guide_only = "```bash\nnpm run plan:articles\n```\n```json\n{\"a\": 1}\n```"
|
||||||
blocks, used_native = al._resolve_tool_blocks(guide_only, [], round_num=1, is_api_model=True)
|
blocks, used_native, _ = al._resolve_tool_blocks(guide_only, [], round_num=1, is_api_model=True)
|
||||||
assert blocks == []
|
assert blocks == []
|
||||||
assert used_native is False
|
assert used_native is False
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_tool_blocks_keeps_textual_fallback_for_non_native_models():
|
def test_resolve_tool_blocks_keeps_textual_fallback_for_non_native_models():
|
||||||
text = "```bash\necho hi\n```"
|
text = "```bash\necho hi\n```"
|
||||||
blocks, used_native = al._resolve_tool_blocks(text, [], round_num=1, is_api_model=False)
|
blocks, used_native, _ = al._resolve_tool_blocks(text, [], round_num=1, is_api_model=False)
|
||||||
assert len(blocks) == 1
|
assert len(blocks) == 1
|
||||||
assert blocks[0].tool_type == "bash"
|
assert blocks[0].tool_type == "bash"
|
||||||
assert used_native is False
|
assert used_native is False
|
||||||
@@ -193,7 +193,7 @@ def test_resolve_tool_blocks_keeps_textual_fallback_for_non_native_models():
|
|||||||
|
|
||||||
def test_resolve_tool_blocks_native_path_untouched_when_native_calls_present():
|
def test_resolve_tool_blocks_native_path_untouched_when_native_calls_present():
|
||||||
native_calls = [{"name": "bash", "arguments": json.dumps({"command": "echo hi"})}]
|
native_calls = [{"name": "bash", "arguments": json.dumps({"command": "echo hi"})}]
|
||||||
blocks, used_native = al._resolve_tool_blocks("some prose", native_calls, round_num=1, is_api_model=True)
|
blocks, used_native, _ = al._resolve_tool_blocks("some prose", native_calls, round_num=1, is_api_model=True)
|
||||||
assert used_native is True
|
assert used_native is True
|
||||||
assert len(blocks) == 1
|
assert len(blocks) == 1
|
||||||
assert blocks[0].tool_type == "bash"
|
assert blocks[0].tool_type == "bash"
|
||||||
@@ -305,7 +305,7 @@ def test_resolve_tool_blocks_recovers_invoke_markup_for_native_model_with_no_nat
|
|||||||
"I'll search for that now.\n"
|
"I'll search for that now.\n"
|
||||||
'<invoke name="web_search"><parameter name="query">odysseus changelog</parameter></invoke>'
|
'<invoke name="web_search"><parameter name="query">odysseus changelog</parameter></invoke>'
|
||||||
)
|
)
|
||||||
blocks, used_native = al._resolve_tool_blocks(leaked, [], round_num=1, is_api_model=True)
|
blocks, used_native, _ = al._resolve_tool_blocks(leaked, [], round_num=1, is_api_model=True)
|
||||||
assert used_native is False
|
assert used_native is False
|
||||||
assert len(blocks) == 1
|
assert len(blocks) == 1
|
||||||
assert blocks[0].tool_type == "web_search"
|
assert blocks[0].tool_type == "web_search"
|
||||||
|
|||||||
@@ -0,0 +1,39 @@
|
|||||||
|
"""Native tool-call results must be threaded by CONVERTED-call position.
|
||||||
|
|
||||||
|
When an OpenAI/Anthropic model emits several tool_calls in one round and one
|
||||||
|
fails to convert (hallucinated name or bad-JSON args), it is dropped from
|
||||||
|
tool_blocks (so it produces no result) but used to stay in native_tool_calls.
|
||||||
|
_append_tool_results indexed tool_result_texts by native-call position, so the
|
||||||
|
surviving result was attached to the wrong tool_call_id and the real call was
|
||||||
|
answered with an empty string. _resolve_tool_blocks now returns the converted
|
||||||
|
calls aligned 1:1 with tool_blocks/tool_result_texts, and that aligned list is
|
||||||
|
what is threaded back.
|
||||||
|
"""
|
||||||
|
import src.agent_loop as al
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_returns_converted_calls_aligned():
|
||||||
|
native = [
|
||||||
|
{"name": "bogus_unknown_tool", "arguments": "{}", "id": "A"},
|
||||||
|
{"name": "web_search", "arguments": '{"query": "hello"}', "id": "B"},
|
||||||
|
]
|
||||||
|
tool_blocks, used_native, converted = al._resolve_tool_blocks("", native, 1)
|
||||||
|
assert used_native is True
|
||||||
|
assert len(tool_blocks) == 1 # only web_search converted
|
||||||
|
assert [c["name"] for c in converted] == ["web_search"]
|
||||||
|
assert len(converted) == len(tool_blocks) # aligned 1:1
|
||||||
|
|
||||||
|
|
||||||
|
def test_append_threads_result_to_correct_tool_call_id():
|
||||||
|
messages = []
|
||||||
|
converted = [{"id": "B", "name": "web_search", "arguments": "{}"}]
|
||||||
|
al._append_tool_results(
|
||||||
|
messages, "some response", converted,
|
||||||
|
["RESULT"], ["RESULT"], True, 1,
|
||||||
|
)
|
||||||
|
tool_msgs = [m for m in messages if m.get("role") == "tool"]
|
||||||
|
assert len(tool_msgs) == 1
|
||||||
|
assert tool_msgs[0]["tool_call_id"] == "B"
|
||||||
|
assert tool_msgs[0]["content"] == "RESULT"
|
||||||
|
asst = next(m for m in messages if m.get("role") == "assistant")
|
||||||
|
assert [tc["id"] for tc in asst["tool_calls"]] == ["B"]
|
||||||
Reference in New Issue
Block a user