mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 18:25:26 -04:00
Cookbook tmux: history-limit 100k + crash-watchdog grabs 2000 lines
The tmux default 2000-line scrollback was getting blown out by long vLLM tracebacks (DeepSeek-V4-Flash launch crash had the root cause scrolled off; the user saw only the tail "See root cause above"). Bumped: - tmux server history-limit to 100000 at session creation (prepended to each tmux new-session command so both local + ssh remote inherit the larger scrollback) - crash-watchdog capture-pane from -S -200 → -S -2000 so the diagnosis includes the actual exception line
This commit is contained in:
+193
-6
@@ -659,7 +659,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
_spf = f"-p {_port} " if _port and _port != "22" else ""
|
||||
setup_cmd = (
|
||||
f"scp -O {_pf}-q '{runner_path}' {remote}:{remote_runner} && "
|
||||
f"ssh {_spf}{remote} 'chmod +x {remote_runner} && tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
|
||||
f"ssh {_spf}{remote} 'chmod +x {remote_runner} && tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
|
||||
)
|
||||
else:
|
||||
# Local: run hf download in the background (tmux on POSIX, a detached
|
||||
@@ -691,7 +691,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
lines.append('exec "${SHELL:-/bin/bash}"')
|
||||
wrapper_script.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
||||
wrapper_script.chmod(0o755)
|
||||
setup_cmd = None if IS_WINDOWS else f"tmux new-session -d -s {session_id} {shlex.quote(str(wrapper_script))}"
|
||||
setup_cmd = None if IS_WINDOWS else f"tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} {shlex.quote(str(wrapper_script))}"
|
||||
|
||||
logger.info(f"Model download: {req.repo_id} (backend={'ollama' if is_ollama_download else 'hf'}, include={req.include}, session={session_id}, remote={remote})")
|
||||
logger.info(f"Download setup_cmd: {setup_cmd}")
|
||||
@@ -963,9 +963,9 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
ssh_args = ["ssh"]
|
||||
if ssh_port and ssh_port != "22":
|
||||
ssh_args.extend(["-p", str(ssh_port)])
|
||||
capture_cmd = ssh_args + [remote, "tmux", "capture-pane", "-t", session_id, "-p", "-S", "-200"]
|
||||
capture_cmd = ssh_args + [remote, "tmux", "capture-pane", "-t", session_id, "-p", "-S", "-2000"]
|
||||
else:
|
||||
capture_cmd = ["tmux", "capture-pane", "-t", session_id, "-p", "-S", "-200"]
|
||||
capture_cmd = ["tmux", "capture-pane", "-t", session_id, "-p", "-S", "-2000"]
|
||||
|
||||
_exit_re = re.compile(r"=== Process exited with code (-?\d+) ===")
|
||||
for wait_s in _waits:
|
||||
@@ -1555,10 +1555,10 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
setup_cmd = (
|
||||
f"{scp_extras}"
|
||||
f"scp -O {_Pf}-q '{runner_path}' {remote}:{remote_runner} && "
|
||||
f"ssh {_pf}{remote} 'chmod +x {remote_runner} && tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
|
||||
f"ssh {_pf}{remote} 'chmod +x {remote_runner} && tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
|
||||
)
|
||||
else:
|
||||
setup_cmd = f"tmux new-session -d -s {session_id} {shlex.quote(str(runner_path))}"
|
||||
setup_cmd = f"tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} {shlex.quote(str(runner_path))}"
|
||||
|
||||
if setup_cmd is None:
|
||||
# LOCAL Windows: launch the bash runner detached; no tmux setup_cmd.
|
||||
@@ -2601,6 +2601,193 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
"error": _ollama_library_cache["error"],
|
||||
}
|
||||
|
||||
# ── vLLM recipe scraper ─────────────────────────────────────────────
|
||||
# Fetches the official YAML recipe for a model from vllm-project/recipes
|
||||
# and normalizes it into a small JSON the frontend can consume. Cached
|
||||
# per-repo so the GitHub raw endpoint isn't hammered.
|
||||
_vllm_recipe_cache: dict[str, tuple[float, dict | None]] = {}
|
||||
# Manifest of all <org>/<model> ids that have a recipe in the upstream
|
||||
# repo. Cheap to fetch (one Git Tree API call), so we cache the whole
|
||||
# set for ~12h. Per-row "does this model have a recipe?" lookups hit
|
||||
# this set instead of doing 912 individual recipe fetches.
|
||||
_vllm_recipe_manifest: dict = {"fetched_at": 0.0, "models": set(), "error": ""}
|
||||
|
||||
@router.get("/api/cookbook/vllm-recipe-manifest")
|
||||
async def vllm_recipe_manifest(refresh: int = 0):
|
||||
"""Return the set of <org>/<model> ids known to have a vLLM recipe.
|
||||
One GitHub Tree API call, 12h cache. The frontend uses this to badge
|
||||
rows in the model list before the user expands them."""
|
||||
import time as _time
|
||||
import httpx as _httpx
|
||||
TTL = 12 * 3600.0
|
||||
now = _time.time()
|
||||
if (
|
||||
refresh
|
||||
or (now - _vllm_recipe_manifest["fetched_at"]) > TTL
|
||||
or not _vllm_recipe_manifest["models"]
|
||||
):
|
||||
url = (
|
||||
"https://api.github.com/repos/vllm-project/recipes/"
|
||||
"git/trees/main?recursive=1"
|
||||
)
|
||||
def _fetch_sync() -> tuple[int, dict | None, str]:
|
||||
try:
|
||||
headers = {"Accept": "application/vnd.github+json"}
|
||||
with _httpx.Client(timeout=10.0, follow_redirects=True) as client:
|
||||
r = client.get(url, headers=headers)
|
||||
if r.status_code != 200:
|
||||
return r.status_code, None, r.text[:200]
|
||||
return 200, r.json(), ""
|
||||
except Exception as e:
|
||||
return 0, None, f"fetch error: {e}"
|
||||
status, data, err = await asyncio.to_thread(_fetch_sync)
|
||||
if status == 200 and isinstance(data, dict):
|
||||
models: set[str] = set()
|
||||
for entry in data.get("tree") or []:
|
||||
path = (entry or {}).get("path") or ""
|
||||
if not path.startswith("models/") or not path.endswith(".yaml"):
|
||||
continue
|
||||
# path = "models/<org>/<model>.yaml" → "<org>/<model>"
|
||||
body = path[len("models/"):-len(".yaml")]
|
||||
if "/" in body:
|
||||
models.add(body)
|
||||
_vllm_recipe_manifest["models"] = models
|
||||
_vllm_recipe_manifest["fetched_at"] = now
|
||||
_vllm_recipe_manifest["error"] = ""
|
||||
else:
|
||||
_vllm_recipe_manifest["error"] = (
|
||||
f"HTTP {status}: {err}" if status else err
|
||||
)
|
||||
# Don't clobber a stale-but-usable list on transient failures.
|
||||
if not _vllm_recipe_manifest["models"]:
|
||||
return {
|
||||
"models": [],
|
||||
"count": 0,
|
||||
"error": _vllm_recipe_manifest["error"],
|
||||
}
|
||||
return {
|
||||
"models": sorted(_vllm_recipe_manifest["models"]),
|
||||
"count": len(_vllm_recipe_manifest["models"]),
|
||||
"fetched_at": _vllm_recipe_manifest["fetched_at"],
|
||||
"error": _vllm_recipe_manifest["error"],
|
||||
}
|
||||
|
||||
@router.get("/api/cookbook/vllm-recipe")
|
||||
async def vllm_recipe(repo: str, refresh: int = 0):
|
||||
"""Return the vLLM official recipe for a HuggingFace repo, if one
|
||||
exists at vllm-project/recipes. `repo` is the full HF id like
|
||||
'MiniMaxAI/MiniMax-M2'. Cached 6h."""
|
||||
import time as _time
|
||||
import httpx as _httpx
|
||||
import yaml as _yaml
|
||||
|
||||
TTL = 6 * 3600.0
|
||||
now = _time.time()
|
||||
repo = (repo or "").strip().strip("/")
|
||||
if "/" not in repo:
|
||||
return {"exists": False, "error": "repo must be <org>/<model>"}
|
||||
|
||||
cached = _vllm_recipe_cache.get(repo)
|
||||
if cached and not refresh and (now - cached[0]) < TTL:
|
||||
return cached[1] or {"exists": False, "cached": True}
|
||||
|
||||
url = (
|
||||
f"https://raw.githubusercontent.com/vllm-project/recipes/"
|
||||
f"main/models/{repo}.yaml"
|
||||
)
|
||||
|
||||
def _fetch_sync() -> tuple[int, str]:
|
||||
try:
|
||||
with _httpx.Client(timeout=8.0, follow_redirects=True) as client:
|
||||
r = client.get(url)
|
||||
return r.status_code, r.text
|
||||
except Exception as e:
|
||||
return 0, f"fetch error: {e}"
|
||||
|
||||
status, text = await asyncio.to_thread(_fetch_sync)
|
||||
if status == 404:
|
||||
_vllm_recipe_cache[repo] = (now, {"exists": False})
|
||||
return {"exists": False}
|
||||
if status != 200:
|
||||
return {"exists": False, "error": f"HTTP {status}", "transient": True}
|
||||
|
||||
try:
|
||||
doc = _yaml.safe_load(text) or {}
|
||||
except Exception as e:
|
||||
return {"exists": False, "error": f"yaml parse: {e}"}
|
||||
|
||||
meta = doc.get("meta") or {}
|
||||
model = doc.get("model") or {}
|
||||
features = doc.get("features") or {}
|
||||
deps = doc.get("dependencies") or []
|
||||
variants = doc.get("variants") or {}
|
||||
hw_overrides = doc.get("hardware_overrides") or {}
|
||||
strat_overrides = doc.get("strategy_overrides") or {}
|
||||
|
||||
# Tool-call + reasoning parsers, as flat arg arrays, so the frontend
|
||||
# can drop them straight into the launch command.
|
||||
tool_calling = features.get("tool_calling") or {}
|
||||
reasoning = features.get("reasoning") or {}
|
||||
|
||||
normalized = {
|
||||
"exists": True,
|
||||
"source_url": url,
|
||||
"title": meta.get("title") or "",
|
||||
"provider": meta.get("provider") or "",
|
||||
"description": meta.get("description") or "",
|
||||
"date_updated": str(meta.get("date_updated") or ""),
|
||||
"hardware_support": meta.get("hardware") or {},
|
||||
"model_id": model.get("model_id") or repo,
|
||||
"min_vllm_version": model.get("min_vllm_version") or "",
|
||||
"architecture": model.get("architecture") or "",
|
||||
"parameter_count": model.get("parameter_count") or "",
|
||||
"active_parameters": model.get("active_parameters") or "",
|
||||
"context_length": model.get("context_length") or 0,
|
||||
"base_args": list(model.get("base_args") or []),
|
||||
"base_env": dict(model.get("base_env") or {}),
|
||||
"tool_calling": {
|
||||
"description": tool_calling.get("description") or "",
|
||||
"args": list(tool_calling.get("args") or []),
|
||||
} if tool_calling else None,
|
||||
"reasoning": {
|
||||
"description": reasoning.get("description") or "",
|
||||
"args": list(reasoning.get("args") or []),
|
||||
} if reasoning else None,
|
||||
"dependencies": [
|
||||
{
|
||||
"note": (d.get("note") or "").strip(),
|
||||
"command": (d.get("command") or "").strip(),
|
||||
"optional": bool(d.get("optional", False)),
|
||||
}
|
||||
for d in deps if isinstance(d, dict)
|
||||
],
|
||||
"variants": {
|
||||
k: {
|
||||
"model_id": v.get("model_id") or model.get("model_id") or repo,
|
||||
"precision": v.get("precision") or "",
|
||||
"vram_minimum_gb": v.get("vram_minimum_gb") or 0,
|
||||
"description": v.get("description") or "",
|
||||
"extra_args": list(v.get("extra_args") or []),
|
||||
"extra_env": dict(v.get("extra_env") or {}),
|
||||
}
|
||||
for k, v in variants.items() if isinstance(v, dict)
|
||||
},
|
||||
"hardware_overrides": {
|
||||
hw: {
|
||||
"extra_args": list((ov or {}).get("extra_args") or []),
|
||||
"extra_env": dict((ov or {}).get("extra_env") or {}),
|
||||
}
|
||||
for hw, ov in hw_overrides.items() if isinstance(ov, dict)
|
||||
},
|
||||
"strategy_overrides": {
|
||||
strat: dict(ov or {})
|
||||
for strat, ov in strat_overrides.items() if isinstance(ov, dict)
|
||||
},
|
||||
"compatible_strategies": list(doc.get("compatible_strategies") or []),
|
||||
}
|
||||
_vllm_recipe_cache[repo] = (now, normalized)
|
||||
return normalized
|
||||
|
||||
@router.get("/api/cookbook/tasks/status")
|
||||
async def cookbook_tasks_status(request: Request):
|
||||
"""Check status of all active cookbook tmux sessions.
|
||||
|
||||
Reference in New Issue
Block a user