mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 10:15:27 -04:00
fix(model-context): count tool_calls in estimate_tokens so compaction sees real size (#2751)
This commit is contained in:
+21
-1
@@ -357,7 +357,11 @@ def estimate_tokens(messages: List[Dict]) -> int:
|
|||||||
|
|
||||||
Uses chars * 0.3 which is closer to real BPE tokenizer output
|
Uses chars * 0.3 which is closer to real BPE tokenizer output
|
||||||
than the commonly-cited chars/4 (which underestimates by ~20-30%).
|
than the commonly-cited chars/4 (which underestimates by ~20-30%).
|
||||||
Also adds ~4 tokens per message for role/formatting overhead.
|
Also adds ~4 tokens per message for role/formatting overhead, and counts
|
||||||
|
assistant tool_calls (name + arguments) — a tool-only turn carries
|
||||||
|
content=None with the real payload in tool_calls, so ignoring them made the
|
||||||
|
estimate (and the compaction/trim gates that rely on it) blind to large
|
||||||
|
tool arguments.
|
||||||
"""
|
"""
|
||||||
total = 0
|
total = 0
|
||||||
for msg in messages:
|
for msg in messages:
|
||||||
@@ -369,4 +373,20 @@ def estimate_tokens(messages: List[Dict]) -> int:
|
|||||||
for item in content:
|
for item in content:
|
||||||
if isinstance(item, dict) and item.get("type") == "text":
|
if isinstance(item, dict) and item.get("type") == "text":
|
||||||
total += int(len(item.get("text", "")) * 0.3)
|
total += int(len(item.get("text", "")) * 0.3)
|
||||||
|
# Tool calls carry real payload too: a tool-only assistant turn is stored
|
||||||
|
# with content=None and the actual args (e.g. a create_document body) in
|
||||||
|
# tool_calls[].function.arguments. Ignoring them made large tool arguments
|
||||||
|
# read as ~0 tokens, so the compaction/trim gates missed genuine overflow.
|
||||||
|
tool_calls = msg.get("tool_calls")
|
||||||
|
if isinstance(tool_calls, list):
|
||||||
|
for tc in tool_calls:
|
||||||
|
if not isinstance(tc, dict):
|
||||||
|
continue
|
||||||
|
fn = tc.get("function") if isinstance(tc.get("function"), dict) else tc
|
||||||
|
name = fn.get("name", "") or ""
|
||||||
|
args = fn.get("arguments", "") or ""
|
||||||
|
if not isinstance(args, str):
|
||||||
|
args = str(args) # some shapes store arguments as a dict
|
||||||
|
total += 4 # per tool-call overhead (id, type, wrapper)
|
||||||
|
total += int((len(str(name)) + len(args)) * 0.3)
|
||||||
return total
|
return total
|
||||||
|
|||||||
@@ -0,0 +1,47 @@
|
|||||||
|
"""Issue #2748 — estimate_tokens must count assistant tool_calls (name + arguments).
|
||||||
|
|
||||||
|
A tool-only assistant turn is stored with content=None and the real payload (e.g.
|
||||||
|
a large create_document body) in tool_calls[].function.arguments. Before this fix
|
||||||
|
estimate_tokens ignored tool_calls, so such a turn counted as ~4 tokens and the
|
||||||
|
compaction/trim gates that rely on estimate_tokens silently missed real context
|
||||||
|
overflow, letting the upstream call 400 with 'context length exceeded'.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from src.model_context import estimate_tokens
|
||||||
|
|
||||||
|
|
||||||
|
def test_tool_call_arguments_are_counted():
|
||||||
|
big = "x" * 40000 # ~ a large create_document body
|
||||||
|
msg = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": None,
|
||||||
|
"tool_calls": [
|
||||||
|
{"id": "c1", "type": "function",
|
||||||
|
"function": {"name": "create_document", "arguments": big}},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
est = estimate_tokens([msg])
|
||||||
|
# ~40k chars * 0.3 ≈ 12000, vs the old ~4 that ignored tool_calls entirely.
|
||||||
|
assert est > 10000, est
|
||||||
|
|
||||||
|
|
||||||
|
def test_content_only_message_is_unchanged():
|
||||||
|
# No tool_calls -> identical to the previous behaviour (content*0.3 + overhead).
|
||||||
|
msg = {"role": "user", "content": "x" * 100}
|
||||||
|
assert estimate_tokens([msg]) == 4 + int(100 * 0.3)
|
||||||
|
|
||||||
|
|
||||||
|
def test_dict_arguments_are_handled():
|
||||||
|
# Some shapes store arguments as a dict rather than a JSON string.
|
||||||
|
msg = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": None,
|
||||||
|
"tool_calls": [{"function": {"name": "f", "arguments": {"path": "x" * 1000}}}],
|
||||||
|
}
|
||||||
|
assert estimate_tokens([msg]) > 200
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_and_malformed_tool_calls_are_safe():
|
||||||
|
# tool_calls=None and non-dict entries must not raise and must not inflate.
|
||||||
|
assert estimate_tokens([{"role": "assistant", "content": "hi", "tool_calls": None}]) == 4 + int(2 * 0.3)
|
||||||
|
assert estimate_tokens([{"role": "assistant", "content": None, "tool_calls": ["bad", 5]}]) == 4
|
||||||
Reference in New Issue
Block a user