From 3e65326c3fecabe779c1e4b70393b02342a8bf1a Mon Sep 17 00:00:00 2001
From: Carles Siles <71840321+carlescsb1990@users.noreply.github.com>
Date: Thu, 11 Jun 2026 18:55:33 +0200
Subject: [PATCH] fix: expand cookbook error output tail from 12 to 50 lines
 (#1538)

* fix: expand cookbook error output tail from 12 to 50 lines

When a task reaches status 'error', the status endpoint was returning
only the last 12 lines of the subprocess log. The existing context-menu
'Copy last 50 lines' action was therefore copying the same 12 lines,
making it useless for diagnosing failures that produce long stack traces
or build output.

- Set _tail_lines = 50 when status == 'error', keep 12 for running tasks
- Initialise exit_code = None before the status-classification block so
  it is always defined in the result dict (was only set inside the
  is_alive branch, potential NameError in the dead-session path)
- Include exit_code in the task-status response dict
- JS poller captures exit_code from live data into local task state

The frontend output panel and 'Copy last 50 lines' now show the actual
error context without any UI changes.

* refactor: extract output-tail logic to testable helper + behavioral tests

Addresses review feedback on #1538: the previous tests were source-level
string guards. Extract the tail-slicing into a dependency-free helper
(routes/cookbook_output.error_aware_output_tail) and replace the guards
with behavioral tests that exercise the actual logic:

- error status with a 200-line snapshot -> exactly the last 50 lines
- running/ready/completed/stopped/unknown -> last 12 lines
- short snapshot -> all lines, no padding
- empty snapshot -> empty string
- error tail is a strict superset (suffix-compatible) of the non-error tail

The helper has no FastAPI/SQLAlchemy imports so it unit-tests without
standing up the app.

---------

Co-authored-by: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com>
---
 routes/cookbook_output.py               | 19 +++++++++
 routes/cookbook_routes.py               |  5 ++-
 static/js/cookbookRunning.js            |  1 +
 tests/test_cookbook_error_tail_lines.py | 56 +++++++++++++++++++++++++
 4 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 routes/cookbook_output.py
 create mode 100644 tests/test_cookbook_error_tail_lines.py

diff --git a/routes/cookbook_output.py b/routes/cookbook_output.py
new file mode 100644
index 000000000..16a14adc2
--- /dev/null
+++ b/routes/cookbook_output.py
@@ -0,0 +1,19 @@
+"""Pure helpers for shaping cookbook task output for the status response.
+
+Kept dependency-free (no FastAPI / SQLAlchemy imports) so the behavior can be
+unit-tested without standing up the whole app.
+"""
+
+
+def error_aware_output_tail(full_snapshot: str, status: str) -> str:
+    """Return the trailing slice of a task log for the status response.
+
+    Failed tasks return the last 50 lines so the "Copy last 50 lines" action
+    surfaces the actual error context (stack traces, build output). Running and
+    other non-error tasks keep the cheaper 12-line tail to limit the payload on
+    the 10s polling interval.
+    """
+    if not full_snapshot:
+        return ""
+    tail_lines = 50 if status == "error" else 12
+    return "\n".join(full_snapshot.splitlines()[-tail_lines:])
diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py
index 36f98aeae..40cfec31d 100644
--- a/routes/cookbook_routes.py
+++ b/routes/cookbook_routes.py
@@ -30,6 +30,7 @@ from core.platform_compat import (
     which_tool,
 )
 from routes.shell_routes import TMUX_LOG_DIR
+from routes.cookbook_output import error_aware_output_tail
 
 logger = logging.getLogger(__name__)
 
@@ -2873,6 +2874,7 @@ def setup_cookbook_routes() -> APIRouter:
             # snapshot to classify (DOWNLOAD_OK / exit marker) — evaluate it even
             # when the PID is gone instead of blindly reporting "stopped".
             download_zero_files = False
+            exit_code = None
             status = "unknown"
             download_has_ok = task_type == "download" and "DOWNLOAD_OK" in full_snapshot
             download_has_failed = task_type == "download" and "DOWNLOAD_FAILED" in full_snapshot
@@ -2946,7 +2948,7 @@ def setup_cookbook_routes() -> APIRouter:
                 status = "error"
             if download_zero_files:
                 diagnosis = {"message": "No matching files were downloaded. The model repo or filename/quant pattern may be wrong (for example a ':Q4_K_M' tag that does not exist in the repo). Check the repo and the include/quant pattern."}
-            output_tail = "\n".join(full_snapshot.splitlines()[-12:]) if full_snapshot else ""
+            output_tail = error_aware_output_tail(full_snapshot, status)
 
             results.append({
                 "session_id": session_id,
@@ -2957,6 +2959,7 @@ def setup_cookbook_routes() -> APIRouter:
                 "phase": serve_phase,
                 "diagnosis": diagnosis,
                 "output_tail": output_tail,
+                "exit_code": exit_code,
                 "cmd": _payload.get("_cmd") or "",
                 "tps": phase_info.get("tps"),
                 "reqs": phase_info.get("reqs"),
diff --git a/static/js/cookbookRunning.js b/static/js/cookbookRunning.js
index b13856c08..06b557c1c 100644
--- a/static/js/cookbookRunning.js
+++ b/static/js/cookbookRunning.js
@@ -3547,6 +3547,7 @@ async function _pollBackgroundStatus() {
           updates.status = live.status === 'ready' ? 'ready' : 'running';
         }
         if (live.progress && live.progress !== task.progress) updates.progress = live.progress;
+        if (live.exit_code != null && live.exit_code !== task.exit_code) updates.exit_code = live.exit_code;
         if (live.output_tail) {
           const previous = String(task.output || '');
           const tail = String(live.output_tail || '');
diff --git a/tests/test_cookbook_error_tail_lines.py b/tests/test_cookbook_error_tail_lines.py
new file mode 100644
index 000000000..5e647273d
--- /dev/null
+++ b/tests/test_cookbook_error_tail_lines.py
@@ -0,0 +1,56 @@
+"""Behavioral guard for the cookbook error output-tail expansion.
+
+When a task reaches status "error" the status endpoint previously returned
+only the last 12 lines of the subprocess log. The "Copy last 50 lines"
+context-menu action was therefore copying the same 12 lines — useless for
+diagnosing failures that emit long stack traces or build output.
+
+`error_aware_output_tail` now returns the last 50 lines on error and keeps
+the cheaper 12-line tail for running/other tasks.
+"""
+from routes.cookbook_output import error_aware_output_tail
+
+
+def _snapshot(n):
+    return "\n".join(f"line {i}" for i in range(n))
+
+
+def test_error_status_returns_last_50_lines():
+    snap = _snapshot(200)
+    tail = error_aware_output_tail(snap, "error")
+    lines = tail.splitlines()
+    assert len(lines) == 50, f"error tail should be 50 lines, got {len(lines)}"
+    assert lines[0] == "line 150"
+    assert lines[-1] == "line 199"
+
+
+def test_non_error_status_returns_last_12_lines():
+    snap = _snapshot(200)
+    for status in ("running", "ready", "completed", "stopped", "unknown"):
+        tail = error_aware_output_tail(snap, status)
+        lines = tail.splitlines()
+        assert len(lines) == 12, f"{status} tail should be 12 lines, got {len(lines)}"
+        assert lines[-1] == "line 199"
+
+
+def test_short_snapshot_returns_all_lines():
+    # Fewer lines than the cap — return everything, no padding.
+    snap = _snapshot(5)
+    assert error_aware_output_tail(snap, "error").splitlines() == [
+        "line 0", "line 1", "line 2", "line 3", "line 4",
+    ]
+    assert len(error_aware_output_tail(snap, "running").splitlines()) == 5
+
+
+def test_empty_snapshot_returns_empty_string():
+    assert error_aware_output_tail("", "error") == ""
+    assert error_aware_output_tail("", "running") == ""
+
+
+def test_error_tail_is_wider_than_non_error():
+    snap = _snapshot(100)
+    err = error_aware_output_tail(snap, "error").splitlines()
+    run = error_aware_output_tail(snap, "running").splitlines()
+    assert len(err) > len(run)
+    # The non-error tail is a strict suffix of the error tail.
+    assert err[-len(run):] == run