From fa9f62b44c8978af8530d1eb5998cea4dbef5754 Mon Sep 17 00:00:00 2001 From: nubs Date: Fri, 5 Jun 2026 18:23:38 +0000 Subject: [PATCH] fix(compactor): shrink oversized tool_calls arguments so trim_for_context can fit a tool-only turn (#2949) --- src/context_compactor.py | 51 +++++++++++++-- tests/test_compact_truncate_tool_call_args.py | 62 +++++++++++++++++++ 2 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 tests/test_compact_truncate_tool_call_args.py diff --git a/src/context_compactor.py b/src/context_compactor.py index c70ed0bb4..7da52425a 100644 --- a/src/context_compactor.py +++ b/src/context_compactor.py @@ -5,6 +5,7 @@ Auto-compacts conversation history when approaching context window limits. Summarizes older messages via the same LLM, preserving key context. """ +import json import logging from typing import Any, Dict, List, Optional @@ -146,15 +147,53 @@ def _truncate_text_to_token_budget(text: str, token_budget: int) -> str: return text[:head_len].rstrip() + notice + "\n\n" + text[-tail_len:].lstrip() +def _truncate_tool_call_args(msg: Dict[str, Any], token_budget: int) -> Dict[str, Any]: + """Shrink oversized assistant ``tool_calls`` arguments to fit ``token_budget``. + + A tool-only turn persists ``content=None`` with its whole payload in + ``tool_calls[].function.arguments`` (e.g. a large create_document body), which + the text-content truncation can't reach — so the message could stay over + budget and the upstream call would 400. Replace each argument string that + overflows its share of the budget with a small valid-JSON placeholder, + preserving ``id``/``type``/``function.name`` so tool/result pairing and + provider validation are unaffected. Returns msg unchanged when there is + nothing oversized. + """ + tool_calls = msg.get("tool_calls") + if not isinstance(tool_calls, list) or not tool_calls: + return msg + # Budget left after whatever content survived (estimate_tokens counts tool + # arguments too, so measure content alone here). + content_tokens = estimate_tokens([{"role": msg.get("role", "assistant"), "content": msg.get("content")}]) + per_call = max(16, (max(0, token_budget - content_tokens)) // len(tool_calls)) + new_calls = [] + changed = False + for tc in tool_calls: + fn = tc.get("function") if isinstance(tc, dict) else None + args = fn.get("arguments") if isinstance(fn, dict) else None + if isinstance(args, str) and int(len(args) * 0.3) > per_call: + new_fn = dict(fn) + new_fn["arguments"] = json.dumps({"_truncated_for_context": len(args)}) + new_tc = dict(tc) + new_tc["function"] = new_fn + new_calls.append(new_tc) + changed = True + else: + new_calls.append(tc) + if not changed: + return msg + out = dict(msg) + out["tool_calls"] = new_calls + return out + + def _truncate_message_to_token_budget(msg: Dict[str, Any], token_budget: int) -> Dict[str, Any]: - """Return a copy of msg whose text content fits inside token_budget.""" + """Return a copy of msg whose text content (and tool-call args) fit token_budget.""" out = dict(msg) content = out.get("content", "") if isinstance(content, str): out["content"] = _truncate_text_to_token_budget(content, token_budget) - return out - - if isinstance(content, list): + elif isinstance(content, list): remaining = token_budget new_content = [] for item in content: @@ -168,7 +207,9 @@ def _truncate_message_to_token_budget(msg: Dict[str, Any], token_budget: int) -> new_content.append(cloned) remaining -= _message_text_token_estimate(truncated) out["content"] = new_content - return out + # A tool-only turn (content=None) carries its payload in tool_calls args, + # which the branches above can't shrink — handle it so the message can fit. + return _truncate_tool_call_args(out, token_budget) def trim_for_context(messages: List[Dict], context_length: int, reserve_tokens: int = 512) -> List[Dict]: diff --git a/tests/test_compact_truncate_tool_call_args.py b/tests/test_compact_truncate_tool_call_args.py new file mode 100644 index 000000000..cc081b924 --- /dev/null +++ b/tests/test_compact_truncate_tool_call_args.py @@ -0,0 +1,62 @@ +"""Issue #2947 — _truncate_message_to_token_budget must shrink oversized tool_calls +arguments, not just text content. + +A tool-only assistant turn persists content=None with its whole payload in +tool_calls[].function.arguments. The text-content truncation can't reach it, so +trim_for_context's last-resort message shrink left the message over budget and the +upstream call 400'd. This pins that oversized args are bounded (so the message +fits) while id/type/function.name are preserved, and that small args / plain text +are untouched. +""" +import json +import sys +from unittest.mock import MagicMock + +import pytest + +for mod in [ + 'sqlalchemy', 'sqlalchemy.orm', 'sqlalchemy.ext', 'sqlalchemy.ext.declarative', + 'sqlalchemy.ext.hybrid', 'sqlalchemy.sql', 'sqlalchemy.sql.expression', + 'src.database', + 'core.models', 'core.database', +]: + if mod not in sys.modules: + sys.modules[mod] = MagicMock() + +from src.context_compactor import _truncate_message_to_token_budget # noqa: E402 +from src.model_context import estimate_tokens # noqa: E402 + + +def _tool_msg(arg_len): + return { + "role": "assistant", + "content": None, + "tool_calls": [{ + "id": "c1", "type": "function", + "function": {"name": "create_document", "arguments": "x" * arg_len}, + }], + } + + +def test_oversized_tool_call_args_are_truncated_to_fit_budget(): + budget = 200 + out = _truncate_message_to_token_budget(_tool_msg(40000), budget) + # The message now fits the budget (before the fix it stayed ~12k tokens). + assert estimate_tokens([out]) <= budget, estimate_tokens([out]) + tc = out["tool_calls"][0] + # Structure preserved so tool/result pairing + provider validation still hold. + assert tc["id"] == "c1" and tc["type"] == "function" + assert tc["function"]["name"] == "create_document" + # Arguments remain valid JSON, just bounded. + parsed = json.loads(tc["function"]["arguments"]) + assert parsed.get("_truncated_for_context") == 40000 + + +def test_small_tool_call_args_are_left_untouched(): + out = _truncate_message_to_token_budget(_tool_msg(20), 500) + assert out["tool_calls"][0]["function"]["arguments"] == "x" * 20 + + +def test_plain_text_content_still_truncates(): + out = _truncate_message_to_token_budget({"role": "user", "content": "y" * 40000}, 200) + assert len(out["content"]) < 2000 # truncated, not left at 40k