fix(integrations): truncate api_call JSON lists with sentinel instead of mid-string cut (#3540)

* fix(integrations): truncate api_call JSON lists with sentinel instead of mid-string cut * fix(integrations): avoid mutating response dict in-place on truncation * fix(integrations): truncate dict responses and bound list sentinel overhead - Dict path now walks keys in insertion order, adding them one at a time while checking that the accumulated dict + _truncated marker fits within the 12 000-char limit. Previously the marker was appended without removing any content, so large dicts were not actually truncated. - List path now subtracts the sentinel's serialised size (+ element-separator padding) from the budget before binary-searching, so the final array including the sentinel stays at or under the limit. - Add regression tests: large-dict actually-truncated, small-dict pass-through, and list-with-sentinel respects the size bound. --------- Co-authored-by: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com>
2026-06-16 09:45:24 -04:00 · 2026-06-09 18:34:08 -03:00
parent 8753daf357
commit d273085744
2 changed files with 264 additions and 5 deletions
@@ -411,17 +411,80 @@ async def execute_api_call(
        if "application/json" in content_type:
            try:
                data = response.json()
-                formatted = json.dumps(data, indent=2, ensure_ascii=False)
+                full = json.dumps(data, indent=2, ensure_ascii=False)
+                if len(full) > 12000:
+                    if isinstance(data, list):
+                        # Binary-search for the largest prefix such that the
+                        # final array (prefix + sentinel) fits within the limit.
+                        # Pre-compute the sentinel so we know its serialized size.
+                        sentinel_placeholder = {
+                            "_truncated": True,
+                            "total_items": len(data),
+                            "shown_items": 0,
+                        }
+                        # Overhead: the sentinel appears as an extra array element.
+                        # Add a conservative padding for the separating comma,
+                        # newline, and indentation characters (~6 chars).
+                        sentinel_overhead = len(
+                            json.dumps(sentinel_placeholder, indent=2, ensure_ascii=False)
+                        ) + 6
+                        budget = 12000 - sentinel_overhead
+                        lo, hi = 0, len(data)
+                        while lo < hi:
+                            mid = (lo + hi + 1) // 2
+                            candidate = json.dumps(
+                                data[:mid], indent=2, ensure_ascii=False
+                            )
+                            if len(candidate) < budget:
+                                lo = mid
+                            else:
+                                hi = mid - 1
+                        sentinel = {
+                            "_truncated": True,
+                            "total_items": len(data),
+                            "shown_items": lo,
+                        }
+                        formatted = json.dumps(
+                            data[:lo] + [sentinel], indent=2, ensure_ascii=False
+                        )
+                    elif isinstance(data, dict):
+                        # Truncate dict entries until the result fits, then add
+                        # the _truncated marker.  Walk keys in insertion order.
+                        DICT_LIMIT = 12000
+                        kept: dict = {}
+                        for k, v in data.items():
+                            candidate = json.dumps(
+                                {**kept, k: v, "_truncated": True},
+                                indent=2,
+                                ensure_ascii=False,
+                            )
+                            if len(candidate) <= DICT_LIMIT:
+                                kept[k] = v
+                            else:
+                                break
+                        formatted = json.dumps(
+                            {**kept, "_truncated": True}, indent=2, ensure_ascii=False
+                        )
+                    else:
+                        total = len(full)
+                        formatted = full[:12000] + f"\n... (truncated, {total} chars total)"
+                else:
+                    formatted = full
            except (json.JSONDecodeError, ValueError):
                formatted = response.text
+                if len(formatted) > 12000:
+                    total = len(formatted)
+                    formatted = formatted[:12000] + f"\n... (truncated, {total} chars total)"
        elif "text/html" in content_type:
            formatted = _strip_html_tags(response.text)
+            if len(formatted) > 12000:
+                total = len(formatted)
+                formatted = formatted[:12000] + f"\n... (truncated, {total} chars total)"
        else:
            formatted = response.text
-
-        # Truncate
-        if len(formatted) > 12000:
-            formatted = formatted[:12000] + "\n... (truncated)"
+            if len(formatted) > 12000:
+                total = len(formatted)
+                formatted = formatted[:12000] + f"\n... (truncated, {total} chars total)"

        output = f"HTTP {status}\n{formatted}"