fix: expand cookbook error output tail from 12 to 50 lines (#1538)

* fix: expand cookbook error output tail from 12 to 50 lines When a task reaches status 'error', the status endpoint was returning only the last 12 lines of the subprocess log. The existing context-menu 'Copy last 50 lines' action was therefore copying the same 12 lines, making it useless for diagnosing failures that produce long stack traces or build output. - Set _tail_lines = 50 when status == 'error', keep 12 for running tasks - Initialise exit_code = None before the status-classification block so it is always defined in the result dict (was only set inside the is_alive branch, potential NameError in the dead-session path) - Include exit_code in the task-status response dict - JS poller captures exit_code from live data into local task state The frontend output panel and 'Copy last 50 lines' now show the actual error context without any UI changes. * refactor: extract output-tail logic to testable helper + behavioral tests Addresses review feedback on #1538: the previous tests were source-level string guards. Extract the tail-slicing into a dependency-free helper (routes/cookbook_output.error_aware_output_tail) and replace the guards with behavioral tests that exercise the actual logic: - error status with a 200-line snapshot -> exactly the last 50 lines - running/ready/completed/stopped/unknown -> last 12 lines - short snapshot -> all lines, no padding - empty snapshot -> empty string - error tail is a strict superset (suffix-compatible) of the non-error tail The helper has no FastAPI/SQLAlchemy imports so it unit-tests without standing up the app. --------- Co-authored-by: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com>
2026-06-15 17:25:26 -04:00 · 2026-06-11 18:55:33 +02:00
parent 01fbee021b
commit 3e65326c3f
4 changed files with 80 additions and 1 deletions
@@ -0,0 +1,19 @@
+"""Pure helpers for shaping cookbook task output for the status response.
+
+Kept dependency-free (no FastAPI / SQLAlchemy imports) so the behavior can be
+unit-tested without standing up the whole app.
+"""
+
+
+def error_aware_output_tail(full_snapshot: str, status: str) -> str:
+    """Return the trailing slice of a task log for the status response.
+
+    Failed tasks return the last 50 lines so the "Copy last 50 lines" action
+    surfaces the actual error context (stack traces, build output). Running and
+    other non-error tasks keep the cheaper 12-line tail to limit the payload on
+    the 10s polling interval.
+    """
+    if not full_snapshot:
+        return ""
+    tail_lines = 50 if status == "error" else 12
+    return "\n".join(full_snapshot.splitlines()[-tail_lines:])
@@ -30,6 +30,7 @@ from core.platform_compat import (
    which_tool,
 )
 from routes.shell_routes import TMUX_LOG_DIR
+from routes.cookbook_output import error_aware_output_tail

 logger = logging.getLogger(__name__)

@@ -2873,6 +2874,7 @@ def setup_cookbook_routes() -> APIRouter:
            # snapshot to classify (DOWNLOAD_OK / exit marker) — evaluate it even
            # when the PID is gone instead of blindly reporting "stopped".
            download_zero_files = False
+            exit_code = None
            status = "unknown"
            download_has_ok = task_type == "download" and "DOWNLOAD_OK" in full_snapshot
            download_has_failed = task_type == "download" and "DOWNLOAD_FAILED" in full_snapshot
@@ -2946,7 +2948,7 @@ def setup_cookbook_routes() -> APIRouter:
                status = "error"
            if download_zero_files:
                diagnosis = {"message": "No matching files were downloaded. The model repo or filename/quant pattern may be wrong (for example a ':Q4_K_M' tag that does not exist in the repo). Check the repo and the include/quant pattern."}
-            output_tail = "\n".join(full_snapshot.splitlines()[-12:]) if full_snapshot else ""
+            output_tail = error_aware_output_tail(full_snapshot, status)

            results.append({
                "session_id": session_id,
@@ -2957,6 +2959,7 @@ def setup_cookbook_routes() -> APIRouter:
                "phase": serve_phase,
                "diagnosis": diagnosis,
                "output_tail": output_tail,
+                "exit_code": exit_code,
                "cmd": _payload.get("_cmd") or "",
                "tps": phase_info.get("tps"),
                "reqs": phase_info.get("reqs"),
@@ -3547,6 +3547,7 @@ async function _pollBackgroundStatus() {
          updates.status = live.status === 'ready' ? 'ready' : 'running';
        }
        if (live.progress && live.progress !== task.progress) updates.progress = live.progress;
+        if (live.exit_code != null && live.exit_code !== task.exit_code) updates.exit_code = live.exit_code;
        if (live.output_tail) {
          const previous = String(task.output || '');
          const tail = String(live.output_tail || '');
@@ -0,0 +1,56 @@
+"""Behavioral guard for the cookbook error output-tail expansion.
+
+When a task reaches status "error" the status endpoint previously returned
+only the last 12 lines of the subprocess log. The "Copy last 50 lines"
+context-menu action was therefore copying the same 12 lines — useless for
+diagnosing failures that emit long stack traces or build output.
+
+`error_aware_output_tail` now returns the last 50 lines on error and keeps
+the cheaper 12-line tail for running/other tasks.
+"""
+from routes.cookbook_output import error_aware_output_tail
+
+
+def _snapshot(n):
+    return "\n".join(f"line {i}" for i in range(n))
+
+
+def test_error_status_returns_last_50_lines():
+    snap = _snapshot(200)
+    tail = error_aware_output_tail(snap, "error")
+    lines = tail.splitlines()
+    assert len(lines) == 50, f"error tail should be 50 lines, got {len(lines)}"
+    assert lines[0] == "line 150"
+    assert lines[-1] == "line 199"
+
+
+def test_non_error_status_returns_last_12_lines():
+    snap = _snapshot(200)
+    for status in ("running", "ready", "completed", "stopped", "unknown"):
+        tail = error_aware_output_tail(snap, status)
+        lines = tail.splitlines()
+        assert len(lines) == 12, f"{status} tail should be 12 lines, got {len(lines)}"
+        assert lines[-1] == "line 199"
+
+
+def test_short_snapshot_returns_all_lines():
+    # Fewer lines than the cap — return everything, no padding.
+    snap = _snapshot(5)
+    assert error_aware_output_tail(snap, "error").splitlines() == [
+        "line 0", "line 1", "line 2", "line 3", "line 4",
+    ]
+    assert len(error_aware_output_tail(snap, "running").splitlines()) == 5
+
+
+def test_empty_snapshot_returns_empty_string():
+    assert error_aware_output_tail("", "error") == ""
+    assert error_aware_output_tail("", "running") == ""
+
+
+def test_error_tail_is_wider_than_non_error():
+    snap = _snapshot(100)
+    err = error_aware_output_tail(snap, "error").splitlines()
+    run = error_aware_output_tail(snap, "running").splitlines()
+    assert len(err) > len(run)
+    # The non-error tail is a strict suffix of the error tail.
+    assert err[-len(run):] == run