fix: tool results misthreaded to the wrong tool_call_id when a native call fails to convert (#1917)

* fix: tool results misthreaded when a native call fails to convert

* Unpack the third converted_calls return from _resolve_tool_blocks in the fenced-example tests
This commit is contained in:
Afonso Coutinho
2026-06-27 19:31:17 +01:00
committed by GitHub
parent 3e7af8634f
commit 70d806019b
3 changed files with 53 additions and 7 deletions
@@ -178,14 +178,14 @@ def test_issue_3222_repro_guide_only_response_resolves_no_tool_actions(monkeypat
# ---------------------------------------------------------------------------
def test_resolve_tool_blocks_skips_textual_fallback_for_native_models_with_no_native_calls():
guide_only = "```bash\nnpm run plan:articles\n```\n```json\n{\"a\": 1}\n```"
blocks, used_native = al._resolve_tool_blocks(guide_only, [], round_num=1, is_api_model=True)
blocks, used_native, _ = al._resolve_tool_blocks(guide_only, [], round_num=1, is_api_model=True)
assert blocks == []
assert used_native is False
def test_resolve_tool_blocks_keeps_textual_fallback_for_non_native_models():
text = "```bash\necho hi\n```"
blocks, used_native = al._resolve_tool_blocks(text, [], round_num=1, is_api_model=False)
blocks, used_native, _ = al._resolve_tool_blocks(text, [], round_num=1, is_api_model=False)
assert len(blocks) == 1
assert blocks[0].tool_type == "bash"
assert used_native is False
@@ -193,7 +193,7 @@ def test_resolve_tool_blocks_keeps_textual_fallback_for_non_native_models():
def test_resolve_tool_blocks_native_path_untouched_when_native_calls_present():
native_calls = [{"name": "bash", "arguments": json.dumps({"command": "echo hi"})}]
blocks, used_native = al._resolve_tool_blocks("some prose", native_calls, round_num=1, is_api_model=True)
blocks, used_native, _ = al._resolve_tool_blocks("some prose", native_calls, round_num=1, is_api_model=True)
assert used_native is True
assert len(blocks) == 1
assert blocks[0].tool_type == "bash"
@@ -305,7 +305,7 @@ def test_resolve_tool_blocks_recovers_invoke_markup_for_native_model_with_no_nat
"I'll search for that now.\n"
'<invoke name="web_search"><parameter name="query">odysseus changelog</parameter></invoke>'
)
blocks, used_native = al._resolve_tool_blocks(leaked, [], round_num=1, is_api_model=True)
blocks, used_native, _ = al._resolve_tool_blocks(leaked, [], round_num=1, is_api_model=True)
assert used_native is False
assert len(blocks) == 1
assert blocks[0].tool_type == "web_search"
@@ -0,0 +1,39 @@
"""Native tool-call results must be threaded by CONVERTED-call position.
When an OpenAI/Anthropic model emits several tool_calls in one round and one
fails to convert (hallucinated name or bad-JSON args), it is dropped from
tool_blocks (so it produces no result) but used to stay in native_tool_calls.
_append_tool_results indexed tool_result_texts by native-call position, so the
surviving result was attached to the wrong tool_call_id and the real call was
answered with an empty string. _resolve_tool_blocks now returns the converted
calls aligned 1:1 with tool_blocks/tool_result_texts, and that aligned list is
what is threaded back.
"""
import src.agent_loop as al
def test_resolve_returns_converted_calls_aligned():
native = [
{"name": "bogus_unknown_tool", "arguments": "{}", "id": "A"},
{"name": "web_search", "arguments": '{"query": "hello"}', "id": "B"},
]
tool_blocks, used_native, converted = al._resolve_tool_blocks("", native, 1)
assert used_native is True
assert len(tool_blocks) == 1 # only web_search converted
assert [c["name"] for c in converted] == ["web_search"]
assert len(converted) == len(tool_blocks) # aligned 1:1
def test_append_threads_result_to_correct_tool_call_id():
messages = []
converted = [{"id": "B", "name": "web_search", "arguments": "{}"}]
al._append_tool_results(
messages, "some response", converted,
["RESULT"], ["RESULT"], True, 1,
)
tool_msgs = [m for m in messages if m.get("role") == "tool"]
assert len(tool_msgs) == 1
assert tool_msgs[0]["tool_call_id"] == "B"
assert tool_msgs[0]["content"] == "RESULT"
asst = next(m for m in messages if m.get("role") == "assistant")
assert [tc["id"] for tc in asst["tool_calls"]] == ["B"]