fix(compactor): shrink oversized tool_calls arguments so trim_for_context can fit a tool-only turn (#2949)

This commit is contained in:
nubs
2026-06-05 18:23:38 +00:00
committed by GitHub
parent b448119919
commit fa9f62b44c
2 changed files with 108 additions and 5 deletions
+46 -5
View File
@@ -5,6 +5,7 @@ Auto-compacts conversation history when approaching context window limits.
Summarizes older messages via the same LLM, preserving key context. Summarizes older messages via the same LLM, preserving key context.
""" """
import json
import logging import logging
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
@@ -146,15 +147,53 @@ def _truncate_text_to_token_budget(text: str, token_budget: int) -> str:
return text[:head_len].rstrip() + notice + "\n\n" + text[-tail_len:].lstrip() return text[:head_len].rstrip() + notice + "\n\n" + text[-tail_len:].lstrip()
def _truncate_tool_call_args(msg: Dict[str, Any], token_budget: int) -> Dict[str, Any]:
"""Shrink oversized assistant ``tool_calls`` arguments to fit ``token_budget``.
A tool-only turn persists ``content=None`` with its whole payload in
``tool_calls[].function.arguments`` (e.g. a large create_document body), which
the text-content truncation can't reach — so the message could stay over
budget and the upstream call would 400. Replace each argument string that
overflows its share of the budget with a small valid-JSON placeholder,
preserving ``id``/``type``/``function.name`` so tool/result pairing and
provider validation are unaffected. Returns msg unchanged when there is
nothing oversized.
"""
tool_calls = msg.get("tool_calls")
if not isinstance(tool_calls, list) or not tool_calls:
return msg
# Budget left after whatever content survived (estimate_tokens counts tool
# arguments too, so measure content alone here).
content_tokens = estimate_tokens([{"role": msg.get("role", "assistant"), "content": msg.get("content")}])
per_call = max(16, (max(0, token_budget - content_tokens)) // len(tool_calls))
new_calls = []
changed = False
for tc in tool_calls:
fn = tc.get("function") if isinstance(tc, dict) else None
args = fn.get("arguments") if isinstance(fn, dict) else None
if isinstance(args, str) and int(len(args) * 0.3) > per_call:
new_fn = dict(fn)
new_fn["arguments"] = json.dumps({"_truncated_for_context": len(args)})
new_tc = dict(tc)
new_tc["function"] = new_fn
new_calls.append(new_tc)
changed = True
else:
new_calls.append(tc)
if not changed:
return msg
out = dict(msg)
out["tool_calls"] = new_calls
return out
def _truncate_message_to_token_budget(msg: Dict[str, Any], token_budget: int) -> Dict[str, Any]: def _truncate_message_to_token_budget(msg: Dict[str, Any], token_budget: int) -> Dict[str, Any]:
"""Return a copy of msg whose text content fits inside token_budget.""" """Return a copy of msg whose text content (and tool-call args) fit token_budget."""
out = dict(msg) out = dict(msg)
content = out.get("content", "") content = out.get("content", "")
if isinstance(content, str): if isinstance(content, str):
out["content"] = _truncate_text_to_token_budget(content, token_budget) out["content"] = _truncate_text_to_token_budget(content, token_budget)
return out elif isinstance(content, list):
if isinstance(content, list):
remaining = token_budget remaining = token_budget
new_content = [] new_content = []
for item in content: for item in content:
@@ -168,7 +207,9 @@ def _truncate_message_to_token_budget(msg: Dict[str, Any], token_budget: int) ->
new_content.append(cloned) new_content.append(cloned)
remaining -= _message_text_token_estimate(truncated) remaining -= _message_text_token_estimate(truncated)
out["content"] = new_content out["content"] = new_content
return out # A tool-only turn (content=None) carries its payload in tool_calls args,
# which the branches above can't shrink — handle it so the message can fit.
return _truncate_tool_call_args(out, token_budget)
def trim_for_context(messages: List[Dict], context_length: int, reserve_tokens: int = 512) -> List[Dict]: def trim_for_context(messages: List[Dict], context_length: int, reserve_tokens: int = 512) -> List[Dict]:
@@ -0,0 +1,62 @@
"""Issue #2947 — _truncate_message_to_token_budget must shrink oversized tool_calls
arguments, not just text content.
A tool-only assistant turn persists content=None with its whole payload in
tool_calls[].function.arguments. The text-content truncation can't reach it, so
trim_for_context's last-resort message shrink left the message over budget and the
upstream call 400'd. This pins that oversized args are bounded (so the message
fits) while id/type/function.name are preserved, and that small args / plain text
are untouched.
"""
import json
import sys
from unittest.mock import MagicMock
import pytest
for mod in [
'sqlalchemy', 'sqlalchemy.orm', 'sqlalchemy.ext', 'sqlalchemy.ext.declarative',
'sqlalchemy.ext.hybrid', 'sqlalchemy.sql', 'sqlalchemy.sql.expression',
'src.database',
'core.models', 'core.database',
]:
if mod not in sys.modules:
sys.modules[mod] = MagicMock()
from src.context_compactor import _truncate_message_to_token_budget # noqa: E402
from src.model_context import estimate_tokens # noqa: E402
def _tool_msg(arg_len):
return {
"role": "assistant",
"content": None,
"tool_calls": [{
"id": "c1", "type": "function",
"function": {"name": "create_document", "arguments": "x" * arg_len},
}],
}
def test_oversized_tool_call_args_are_truncated_to_fit_budget():
budget = 200
out = _truncate_message_to_token_budget(_tool_msg(40000), budget)
# The message now fits the budget (before the fix it stayed ~12k tokens).
assert estimate_tokens([out]) <= budget, estimate_tokens([out])
tc = out["tool_calls"][0]
# Structure preserved so tool/result pairing + provider validation still hold.
assert tc["id"] == "c1" and tc["type"] == "function"
assert tc["function"]["name"] == "create_document"
# Arguments remain valid JSON, just bounded.
parsed = json.loads(tc["function"]["arguments"])
assert parsed.get("_truncated_for_context") == 40000
def test_small_tool_call_args_are_left_untouched():
out = _truncate_message_to_token_budget(_tool_msg(20), 500)
assert out["tool_calls"][0]["function"]["arguments"] == "x" * 20
def test_plain_text_content_still_truncates():
out = _truncate_message_to_token_budget({"role": "user", "content": "y" * 40000}, 200)
assert len(out["content"]) < 2000 # truncated, not left at 40k