Cookbook tmux: history-limit 100k + crash-watchdog grabs 2000 lines

The tmux default 2000-line scrollback was getting blown out by long vLLM tracebacks (DeepSeek-V4-Flash launch crash had the root cause scrolled off; the user saw only the tail "See root cause above"). Bumped: - tmux server history-limit to 100000 at session creation (prepended to each tmux new-session command so both local + ssh remote inherit the larger scrollback) - crash-watchdog capture-pane from -S -200 → -S -2000 so the diagnosis includes the actual exception line
2026-06-17 18:25:26 -04:00 · 2026-06-14 09:02:04 +09:00
parent 4074e77d93
commit 09a82852c0
1 changed files with 193 additions and 6 deletions
@@ -659,7 +659,7 @@ def setup_cookbook_routes() -> APIRouter:
            _spf = f"-p {_port} " if _port and _port != "22" else ""
            setup_cmd = (
                f"scp -O {_pf}-q '{runner_path}' {remote}:{remote_runner} && "
-                f"ssh {_spf}{remote} 'chmod +x {remote_runner} && tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
+                f"ssh {_spf}{remote} 'chmod +x {remote_runner} && tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
            )
        else:
            # Local: run hf download in the background (tmux on POSIX, a detached
@@ -691,7 +691,7 @@ def setup_cookbook_routes() -> APIRouter:
                lines.append('exec "${SHELL:-/bin/bash}"')
                wrapper_script.write_text("\n".join(lines) + "\n", encoding="utf-8")
                wrapper_script.chmod(0o755)
-            setup_cmd = None if IS_WINDOWS else f"tmux new-session -d -s {session_id} {shlex.quote(str(wrapper_script))}"
+            setup_cmd = None if IS_WINDOWS else f"tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} {shlex.quote(str(wrapper_script))}"

        logger.info(f"Model download: {req.repo_id} (backend={'ollama' if is_ollama_download else 'hf'}, include={req.include}, session={session_id}, remote={remote})")
        logger.info(f"Download setup_cmd: {setup_cmd}")
@@ -963,9 +963,9 @@ def setup_cookbook_routes() -> APIRouter:
            ssh_args = ["ssh"]
            if ssh_port and ssh_port != "22":
                ssh_args.extend(["-p", str(ssh_port)])
-            capture_cmd = ssh_args + [remote, "tmux", "capture-pane", "-t", session_id, "-p", "-S", "-200"]
+            capture_cmd = ssh_args + [remote, "tmux", "capture-pane", "-t", session_id, "-p", "-S", "-2000"]
        else:
-            capture_cmd = ["tmux", "capture-pane", "-t", session_id, "-p", "-S", "-200"]
+            capture_cmd = ["tmux", "capture-pane", "-t", session_id, "-p", "-S", "-2000"]

        _exit_re = re.compile(r"=== Process exited with code (-?\d+) ===")
        for wait_s in _waits:
@@ -1555,10 +1555,10 @@ def setup_cookbook_routes() -> APIRouter:
                setup_cmd = (
                    f"{scp_extras}"
                    f"scp -O {_Pf}-q '{runner_path}' {remote}:{remote_runner} && "
-                    f"ssh {_pf}{remote} 'chmod +x {remote_runner} && tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
+                    f"ssh {_pf}{remote} 'chmod +x {remote_runner} && tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
                )
            else:
-                setup_cmd = f"tmux new-session -d -s {session_id} {shlex.quote(str(runner_path))}"
+                setup_cmd = f"tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} {shlex.quote(str(runner_path))}"

        if setup_cmd is None:
            # LOCAL Windows: launch the bash runner detached; no tmux setup_cmd.
@@ -2601,6 +2601,193 @@ def setup_cookbook_routes() -> APIRouter:
            "error": _ollama_library_cache["error"],
        }

+    # ── vLLM recipe scraper ─────────────────────────────────────────────
+    # Fetches the official YAML recipe for a model from vllm-project/recipes
+    # and normalizes it into a small JSON the frontend can consume. Cached
+    # per-repo so the GitHub raw endpoint isn't hammered.
+    _vllm_recipe_cache: dict[str, tuple[float, dict | None]] = {}
+    # Manifest of all <org>/<model> ids that have a recipe in the upstream
+    # repo. Cheap to fetch (one Git Tree API call), so we cache the whole
+    # set for ~12h. Per-row "does this model have a recipe?" lookups hit
+    # this set instead of doing 912 individual recipe fetches.
+    _vllm_recipe_manifest: dict = {"fetched_at": 0.0, "models": set(), "error": ""}
+
+    @router.get("/api/cookbook/vllm-recipe-manifest")
+    async def vllm_recipe_manifest(refresh: int = 0):
+        """Return the set of <org>/<model> ids known to have a vLLM recipe.
+        One GitHub Tree API call, 12h cache. The frontend uses this to badge
+        rows in the model list before the user expands them."""
+        import time as _time
+        import httpx as _httpx
+        TTL = 12 * 3600.0
+        now = _time.time()
+        if (
+            refresh
+            or (now - _vllm_recipe_manifest["fetched_at"]) > TTL
+            or not _vllm_recipe_manifest["models"]
+        ):
+            url = (
+                "https://api.github.com/repos/vllm-project/recipes/"
+                "git/trees/main?recursive=1"
+            )
+            def _fetch_sync() -> tuple[int, dict | None, str]:
+                try:
+                    headers = {"Accept": "application/vnd.github+json"}
+                    with _httpx.Client(timeout=10.0, follow_redirects=True) as client:
+                        r = client.get(url, headers=headers)
+                        if r.status_code != 200:
+                            return r.status_code, None, r.text[:200]
+                        return 200, r.json(), ""
+                except Exception as e:
+                    return 0, None, f"fetch error: {e}"
+            status, data, err = await asyncio.to_thread(_fetch_sync)
+            if status == 200 and isinstance(data, dict):
+                models: set[str] = set()
+                for entry in data.get("tree") or []:
+                    path = (entry or {}).get("path") or ""
+                    if not path.startswith("models/") or not path.endswith(".yaml"):
+                        continue
+                    # path = "models/<org>/<model>.yaml" → "<org>/<model>"
+                    body = path[len("models/"):-len(".yaml")]
+                    if "/" in body:
+                        models.add(body)
+                _vllm_recipe_manifest["models"] = models
+                _vllm_recipe_manifest["fetched_at"] = now
+                _vllm_recipe_manifest["error"] = ""
+            else:
+                _vllm_recipe_manifest["error"] = (
+                    f"HTTP {status}: {err}" if status else err
+                )
+                # Don't clobber a stale-but-usable list on transient failures.
+                if not _vllm_recipe_manifest["models"]:
+                    return {
+                        "models": [],
+                        "count": 0,
+                        "error": _vllm_recipe_manifest["error"],
+                    }
+        return {
+            "models": sorted(_vllm_recipe_manifest["models"]),
+            "count": len(_vllm_recipe_manifest["models"]),
+            "fetched_at": _vllm_recipe_manifest["fetched_at"],
+            "error": _vllm_recipe_manifest["error"],
+        }
+
+    @router.get("/api/cookbook/vllm-recipe")
+    async def vllm_recipe(repo: str, refresh: int = 0):
+        """Return the vLLM official recipe for a HuggingFace repo, if one
+        exists at vllm-project/recipes. `repo` is the full HF id like
+        'MiniMaxAI/MiniMax-M2'. Cached 6h."""
+        import time as _time
+        import httpx as _httpx
+        import yaml as _yaml
+
+        TTL = 6 * 3600.0
+        now = _time.time()
+        repo = (repo or "").strip().strip("/")
+        if "/" not in repo:
+            return {"exists": False, "error": "repo must be <org>/<model>"}
+
+        cached = _vllm_recipe_cache.get(repo)
+        if cached and not refresh and (now - cached[0]) < TTL:
+            return cached[1] or {"exists": False, "cached": True}
+
+        url = (
+            f"https://raw.githubusercontent.com/vllm-project/recipes/"
+            f"main/models/{repo}.yaml"
+        )
+
+        def _fetch_sync() -> tuple[int, str]:
+            try:
+                with _httpx.Client(timeout=8.0, follow_redirects=True) as client:
+                    r = client.get(url)
+                    return r.status_code, r.text
+            except Exception as e:
+                return 0, f"fetch error: {e}"
+
+        status, text = await asyncio.to_thread(_fetch_sync)
+        if status == 404:
+            _vllm_recipe_cache[repo] = (now, {"exists": False})
+            return {"exists": False}
+        if status != 200:
+            return {"exists": False, "error": f"HTTP {status}", "transient": True}
+
+        try:
+            doc = _yaml.safe_load(text) or {}
+        except Exception as e:
+            return {"exists": False, "error": f"yaml parse: {e}"}
+
+        meta = doc.get("meta") or {}
+        model = doc.get("model") or {}
+        features = doc.get("features") or {}
+        deps = doc.get("dependencies") or []
+        variants = doc.get("variants") or {}
+        hw_overrides = doc.get("hardware_overrides") or {}
+        strat_overrides = doc.get("strategy_overrides") or {}
+
+        # Tool-call + reasoning parsers, as flat arg arrays, so the frontend
+        # can drop them straight into the launch command.
+        tool_calling = features.get("tool_calling") or {}
+        reasoning = features.get("reasoning") or {}
+
+        normalized = {
+            "exists": True,
+            "source_url": url,
+            "title": meta.get("title") or "",
+            "provider": meta.get("provider") or "",
+            "description": meta.get("description") or "",
+            "date_updated": str(meta.get("date_updated") or ""),
+            "hardware_support": meta.get("hardware") or {},
+            "model_id": model.get("model_id") or repo,
+            "min_vllm_version": model.get("min_vllm_version") or "",
+            "architecture": model.get("architecture") or "",
+            "parameter_count": model.get("parameter_count") or "",
+            "active_parameters": model.get("active_parameters") or "",
+            "context_length": model.get("context_length") or 0,
+            "base_args": list(model.get("base_args") or []),
+            "base_env": dict(model.get("base_env") or {}),
+            "tool_calling": {
+                "description": tool_calling.get("description") or "",
+                "args": list(tool_calling.get("args") or []),
+            } if tool_calling else None,
+            "reasoning": {
+                "description": reasoning.get("description") or "",
+                "args": list(reasoning.get("args") or []),
+            } if reasoning else None,
+            "dependencies": [
+                {
+                    "note": (d.get("note") or "").strip(),
+                    "command": (d.get("command") or "").strip(),
+                    "optional": bool(d.get("optional", False)),
+                }
+                for d in deps if isinstance(d, dict)
+            ],
+            "variants": {
+                k: {
+                    "model_id": v.get("model_id") or model.get("model_id") or repo,
+                    "precision": v.get("precision") or "",
+                    "vram_minimum_gb": v.get("vram_minimum_gb") or 0,
+                    "description": v.get("description") or "",
+                    "extra_args": list(v.get("extra_args") or []),
+                    "extra_env": dict(v.get("extra_env") or {}),
+                }
+                for k, v in variants.items() if isinstance(v, dict)
+            },
+            "hardware_overrides": {
+                hw: {
+                    "extra_args": list((ov or {}).get("extra_args") or []),
+                    "extra_env": dict((ov or {}).get("extra_env") or {}),
+                }
+                for hw, ov in hw_overrides.items() if isinstance(ov, dict)
+            },
+            "strategy_overrides": {
+                strat: dict(ov or {})
+                for strat, ov in strat_overrides.items() if isinstance(ov, dict)
+            },
+            "compatible_strategies": list(doc.get("compatible_strategies") or []),
+        }
+        _vllm_recipe_cache[repo] = (now, normalized)
+        return normalized
+
    @router.get("/api/cookbook/tasks/status")
    async def cookbook_tasks_status(request: Request):
        """Check status of all active cookbook tmux sessions.