mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-16 09:45:24 -04:00
fix(integrations): truncate api_call JSON lists with sentinel instead of mid-string cut (#3540)
* fix(integrations): truncate api_call JSON lists with sentinel instead of mid-string cut * fix(integrations): avoid mutating response dict in-place on truncation * fix(integrations): truncate dict responses and bound list sentinel overhead - Dict path now walks keys in insertion order, adding them one at a time while checking that the accumulated dict + _truncated marker fits within the 12 000-char limit. Previously the marker was appended without removing any content, so large dicts were not actually truncated. - List path now subtracts the sentinel's serialised size (+ element-separator padding) from the budget before binary-searching, so the final array including the sentinel stays at or under the limit. - Add regression tests: large-dict actually-truncated, small-dict pass-through, and list-with-sentinel respects the size bound. --------- Co-authored-by: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com>
This commit is contained in:
+68
-5
@@ -411,17 +411,80 @@ async def execute_api_call(
|
||||
if "application/json" in content_type:
|
||||
try:
|
||||
data = response.json()
|
||||
formatted = json.dumps(data, indent=2, ensure_ascii=False)
|
||||
full = json.dumps(data, indent=2, ensure_ascii=False)
|
||||
if len(full) > 12000:
|
||||
if isinstance(data, list):
|
||||
# Binary-search for the largest prefix such that the
|
||||
# final array (prefix + sentinel) fits within the limit.
|
||||
# Pre-compute the sentinel so we know its serialized size.
|
||||
sentinel_placeholder = {
|
||||
"_truncated": True,
|
||||
"total_items": len(data),
|
||||
"shown_items": 0,
|
||||
}
|
||||
# Overhead: the sentinel appears as an extra array element.
|
||||
# Add a conservative padding for the separating comma,
|
||||
# newline, and indentation characters (~6 chars).
|
||||
sentinel_overhead = len(
|
||||
json.dumps(sentinel_placeholder, indent=2, ensure_ascii=False)
|
||||
) + 6
|
||||
budget = 12000 - sentinel_overhead
|
||||
lo, hi = 0, len(data)
|
||||
while lo < hi:
|
||||
mid = (lo + hi + 1) // 2
|
||||
candidate = json.dumps(
|
||||
data[:mid], indent=2, ensure_ascii=False
|
||||
)
|
||||
if len(candidate) < budget:
|
||||
lo = mid
|
||||
else:
|
||||
hi = mid - 1
|
||||
sentinel = {
|
||||
"_truncated": True,
|
||||
"total_items": len(data),
|
||||
"shown_items": lo,
|
||||
}
|
||||
formatted = json.dumps(
|
||||
data[:lo] + [sentinel], indent=2, ensure_ascii=False
|
||||
)
|
||||
elif isinstance(data, dict):
|
||||
# Truncate dict entries until the result fits, then add
|
||||
# the _truncated marker. Walk keys in insertion order.
|
||||
DICT_LIMIT = 12000
|
||||
kept: dict = {}
|
||||
for k, v in data.items():
|
||||
candidate = json.dumps(
|
||||
{**kept, k: v, "_truncated": True},
|
||||
indent=2,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
if len(candidate) <= DICT_LIMIT:
|
||||
kept[k] = v
|
||||
else:
|
||||
break
|
||||
formatted = json.dumps(
|
||||
{**kept, "_truncated": True}, indent=2, ensure_ascii=False
|
||||
)
|
||||
else:
|
||||
total = len(full)
|
||||
formatted = full[:12000] + f"\n... (truncated, {total} chars total)"
|
||||
else:
|
||||
formatted = full
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
formatted = response.text
|
||||
if len(formatted) > 12000:
|
||||
total = len(formatted)
|
||||
formatted = formatted[:12000] + f"\n... (truncated, {total} chars total)"
|
||||
elif "text/html" in content_type:
|
||||
formatted = _strip_html_tags(response.text)
|
||||
if len(formatted) > 12000:
|
||||
total = len(formatted)
|
||||
formatted = formatted[:12000] + f"\n... (truncated, {total} chars total)"
|
||||
else:
|
||||
formatted = response.text
|
||||
|
||||
# Truncate
|
||||
if len(formatted) > 12000:
|
||||
formatted = formatted[:12000] + "\n... (truncated)"
|
||||
if len(formatted) > 12000:
|
||||
total = len(formatted)
|
||||
formatted = formatted[:12000] + f"\n... (truncated, {total} chars total)"
|
||||
|
||||
output = f"HTTP {status}\n{formatted}"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user