fix(model-context): count tool_calls in estimate_tokens so compaction sees real size (#2751)

This commit is contained in:
nubs
2026-06-05 13:56:54 +00:00
committed by GitHub
parent 8354948a1c
commit 6973c5427c
2 changed files with 68 additions and 1 deletions
+21 -1
View File
@@ -357,7 +357,11 @@ def estimate_tokens(messages: List[Dict]) -> int:
Uses chars * 0.3 which is closer to real BPE tokenizer output
than the commonly-cited chars/4 (which underestimates by ~20-30%).
Also adds ~4 tokens per message for role/formatting overhead.
Also adds ~4 tokens per message for role/formatting overhead, and counts
assistant tool_calls (name + arguments) — a tool-only turn carries
content=None with the real payload in tool_calls, so ignoring them made the
estimate (and the compaction/trim gates that rely on it) blind to large
tool arguments.
"""
total = 0
for msg in messages:
@@ -369,4 +373,20 @@ def estimate_tokens(messages: List[Dict]) -> int:
for item in content:
if isinstance(item, dict) and item.get("type") == "text":
total += int(len(item.get("text", "")) * 0.3)
# Tool calls carry real payload too: a tool-only assistant turn is stored
# with content=None and the actual args (e.g. a create_document body) in
# tool_calls[].function.arguments. Ignoring them made large tool arguments
# read as ~0 tokens, so the compaction/trim gates missed genuine overflow.
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:
if not isinstance(tc, dict):
continue
fn = tc.get("function") if isinstance(tc.get("function"), dict) else tc
name = fn.get("name", "") or ""
args = fn.get("arguments", "") or ""
if not isinstance(args, str):
args = str(args) # some shapes store arguments as a dict
total += 4 # per tool-call overhead (id, type, wrapper)
total += int((len(str(name)) + len(args)) * 0.3)
return total
+47
View File
@@ -0,0 +1,47 @@
"""Issue #2748 — estimate_tokens must count assistant tool_calls (name + arguments).
A tool-only assistant turn is stored with content=None and the real payload (e.g.
a large create_document body) in tool_calls[].function.arguments. Before this fix
estimate_tokens ignored tool_calls, so such a turn counted as ~4 tokens and the
compaction/trim gates that rely on estimate_tokens silently missed real context
overflow, letting the upstream call 400 with 'context length exceeded'.
"""
from src.model_context import estimate_tokens
def test_tool_call_arguments_are_counted():
big = "x" * 40000 # ~ a large create_document body
msg = {
"role": "assistant",
"content": None,
"tool_calls": [
{"id": "c1", "type": "function",
"function": {"name": "create_document", "arguments": big}},
],
}
est = estimate_tokens([msg])
# ~40k chars * 0.3 ≈ 12000, vs the old ~4 that ignored tool_calls entirely.
assert est > 10000, est
def test_content_only_message_is_unchanged():
# No tool_calls -> identical to the previous behaviour (content*0.3 + overhead).
msg = {"role": "user", "content": "x" * 100}
assert estimate_tokens([msg]) == 4 + int(100 * 0.3)
def test_dict_arguments_are_handled():
# Some shapes store arguments as a dict rather than a JSON string.
msg = {
"role": "assistant",
"content": None,
"tool_calls": [{"function": {"name": "f", "arguments": {"path": "x" * 1000}}}],
}
assert estimate_tokens([msg]) > 200
def test_empty_and_malformed_tool_calls_are_safe():
# tool_calls=None and non-dict entries must not raise and must not inflate.
assert estimate_tokens([{"role": "assistant", "content": "hi", "tool_calls": None}]) == 4 + int(2 * 0.3)
assert estimate_tokens([{"role": "assistant", "content": None, "tool_calls": ["bad", 5]}]) == 4