mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 18:25:26 -04:00
Cookbook tmux: history-limit 100k + crash-watchdog grabs 2000 lines
The tmux default 2000-line scrollback was getting blown out by long vLLM tracebacks (DeepSeek-V4-Flash launch crash had the root cause scrolled off; the user saw only the tail "See root cause above"). Bumped: - tmux server history-limit to 100000 at session creation (prepended to each tmux new-session command so both local + ssh remote inherit the larger scrollback) - crash-watchdog capture-pane from -S -200 → -S -2000 so the diagnosis includes the actual exception line
This commit is contained in:
+193
-6
@@ -659,7 +659,7 @@ def setup_cookbook_routes() -> APIRouter:
|
|||||||
_spf = f"-p {_port} " if _port and _port != "22" else ""
|
_spf = f"-p {_port} " if _port and _port != "22" else ""
|
||||||
setup_cmd = (
|
setup_cmd = (
|
||||||
f"scp -O {_pf}-q '{runner_path}' {remote}:{remote_runner} && "
|
f"scp -O {_pf}-q '{runner_path}' {remote}:{remote_runner} && "
|
||||||
f"ssh {_spf}{remote} 'chmod +x {remote_runner} && tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
|
f"ssh {_spf}{remote} 'chmod +x {remote_runner} && tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Local: run hf download in the background (tmux on POSIX, a detached
|
# Local: run hf download in the background (tmux on POSIX, a detached
|
||||||
@@ -691,7 +691,7 @@ def setup_cookbook_routes() -> APIRouter:
|
|||||||
lines.append('exec "${SHELL:-/bin/bash}"')
|
lines.append('exec "${SHELL:-/bin/bash}"')
|
||||||
wrapper_script.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
wrapper_script.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
||||||
wrapper_script.chmod(0o755)
|
wrapper_script.chmod(0o755)
|
||||||
setup_cmd = None if IS_WINDOWS else f"tmux new-session -d -s {session_id} {shlex.quote(str(wrapper_script))}"
|
setup_cmd = None if IS_WINDOWS else f"tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} {shlex.quote(str(wrapper_script))}"
|
||||||
|
|
||||||
logger.info(f"Model download: {req.repo_id} (backend={'ollama' if is_ollama_download else 'hf'}, include={req.include}, session={session_id}, remote={remote})")
|
logger.info(f"Model download: {req.repo_id} (backend={'ollama' if is_ollama_download else 'hf'}, include={req.include}, session={session_id}, remote={remote})")
|
||||||
logger.info(f"Download setup_cmd: {setup_cmd}")
|
logger.info(f"Download setup_cmd: {setup_cmd}")
|
||||||
@@ -963,9 +963,9 @@ def setup_cookbook_routes() -> APIRouter:
|
|||||||
ssh_args = ["ssh"]
|
ssh_args = ["ssh"]
|
||||||
if ssh_port and ssh_port != "22":
|
if ssh_port and ssh_port != "22":
|
||||||
ssh_args.extend(["-p", str(ssh_port)])
|
ssh_args.extend(["-p", str(ssh_port)])
|
||||||
capture_cmd = ssh_args + [remote, "tmux", "capture-pane", "-t", session_id, "-p", "-S", "-200"]
|
capture_cmd = ssh_args + [remote, "tmux", "capture-pane", "-t", session_id, "-p", "-S", "-2000"]
|
||||||
else:
|
else:
|
||||||
capture_cmd = ["tmux", "capture-pane", "-t", session_id, "-p", "-S", "-200"]
|
capture_cmd = ["tmux", "capture-pane", "-t", session_id, "-p", "-S", "-2000"]
|
||||||
|
|
||||||
_exit_re = re.compile(r"=== Process exited with code (-?\d+) ===")
|
_exit_re = re.compile(r"=== Process exited with code (-?\d+) ===")
|
||||||
for wait_s in _waits:
|
for wait_s in _waits:
|
||||||
@@ -1555,10 +1555,10 @@ def setup_cookbook_routes() -> APIRouter:
|
|||||||
setup_cmd = (
|
setup_cmd = (
|
||||||
f"{scp_extras}"
|
f"{scp_extras}"
|
||||||
f"scp -O {_Pf}-q '{runner_path}' {remote}:{remote_runner} && "
|
f"scp -O {_Pf}-q '{runner_path}' {remote}:{remote_runner} && "
|
||||||
f"ssh {_pf}{remote} 'chmod +x {remote_runner} && tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
|
f"ssh {_pf}{remote} 'chmod +x {remote_runner} && tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
setup_cmd = f"tmux new-session -d -s {session_id} {shlex.quote(str(runner_path))}"
|
setup_cmd = f"tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} {shlex.quote(str(runner_path))}"
|
||||||
|
|
||||||
if setup_cmd is None:
|
if setup_cmd is None:
|
||||||
# LOCAL Windows: launch the bash runner detached; no tmux setup_cmd.
|
# LOCAL Windows: launch the bash runner detached; no tmux setup_cmd.
|
||||||
@@ -2601,6 +2601,193 @@ def setup_cookbook_routes() -> APIRouter:
|
|||||||
"error": _ollama_library_cache["error"],
|
"error": _ollama_library_cache["error"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ── vLLM recipe scraper ─────────────────────────────────────────────
|
||||||
|
# Fetches the official YAML recipe for a model from vllm-project/recipes
|
||||||
|
# and normalizes it into a small JSON the frontend can consume. Cached
|
||||||
|
# per-repo so the GitHub raw endpoint isn't hammered.
|
||||||
|
_vllm_recipe_cache: dict[str, tuple[float, dict | None]] = {}
|
||||||
|
# Manifest of all <org>/<model> ids that have a recipe in the upstream
|
||||||
|
# repo. Cheap to fetch (one Git Tree API call), so we cache the whole
|
||||||
|
# set for ~12h. Per-row "does this model have a recipe?" lookups hit
|
||||||
|
# this set instead of doing 912 individual recipe fetches.
|
||||||
|
_vllm_recipe_manifest: dict = {"fetched_at": 0.0, "models": set(), "error": ""}
|
||||||
|
|
||||||
|
@router.get("/api/cookbook/vllm-recipe-manifest")
|
||||||
|
async def vllm_recipe_manifest(refresh: int = 0):
|
||||||
|
"""Return the set of <org>/<model> ids known to have a vLLM recipe.
|
||||||
|
One GitHub Tree API call, 12h cache. The frontend uses this to badge
|
||||||
|
rows in the model list before the user expands them."""
|
||||||
|
import time as _time
|
||||||
|
import httpx as _httpx
|
||||||
|
TTL = 12 * 3600.0
|
||||||
|
now = _time.time()
|
||||||
|
if (
|
||||||
|
refresh
|
||||||
|
or (now - _vllm_recipe_manifest["fetched_at"]) > TTL
|
||||||
|
or not _vllm_recipe_manifest["models"]
|
||||||
|
):
|
||||||
|
url = (
|
||||||
|
"https://api.github.com/repos/vllm-project/recipes/"
|
||||||
|
"git/trees/main?recursive=1"
|
||||||
|
)
|
||||||
|
def _fetch_sync() -> tuple[int, dict | None, str]:
|
||||||
|
try:
|
||||||
|
headers = {"Accept": "application/vnd.github+json"}
|
||||||
|
with _httpx.Client(timeout=10.0, follow_redirects=True) as client:
|
||||||
|
r = client.get(url, headers=headers)
|
||||||
|
if r.status_code != 200:
|
||||||
|
return r.status_code, None, r.text[:200]
|
||||||
|
return 200, r.json(), ""
|
||||||
|
except Exception as e:
|
||||||
|
return 0, None, f"fetch error: {e}"
|
||||||
|
status, data, err = await asyncio.to_thread(_fetch_sync)
|
||||||
|
if status == 200 and isinstance(data, dict):
|
||||||
|
models: set[str] = set()
|
||||||
|
for entry in data.get("tree") or []:
|
||||||
|
path = (entry or {}).get("path") or ""
|
||||||
|
if not path.startswith("models/") or not path.endswith(".yaml"):
|
||||||
|
continue
|
||||||
|
# path = "models/<org>/<model>.yaml" → "<org>/<model>"
|
||||||
|
body = path[len("models/"):-len(".yaml")]
|
||||||
|
if "/" in body:
|
||||||
|
models.add(body)
|
||||||
|
_vllm_recipe_manifest["models"] = models
|
||||||
|
_vllm_recipe_manifest["fetched_at"] = now
|
||||||
|
_vllm_recipe_manifest["error"] = ""
|
||||||
|
else:
|
||||||
|
_vllm_recipe_manifest["error"] = (
|
||||||
|
f"HTTP {status}: {err}" if status else err
|
||||||
|
)
|
||||||
|
# Don't clobber a stale-but-usable list on transient failures.
|
||||||
|
if not _vllm_recipe_manifest["models"]:
|
||||||
|
return {
|
||||||
|
"models": [],
|
||||||
|
"count": 0,
|
||||||
|
"error": _vllm_recipe_manifest["error"],
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
"models": sorted(_vllm_recipe_manifest["models"]),
|
||||||
|
"count": len(_vllm_recipe_manifest["models"]),
|
||||||
|
"fetched_at": _vllm_recipe_manifest["fetched_at"],
|
||||||
|
"error": _vllm_recipe_manifest["error"],
|
||||||
|
}
|
||||||
|
|
||||||
|
@router.get("/api/cookbook/vllm-recipe")
|
||||||
|
async def vllm_recipe(repo: str, refresh: int = 0):
|
||||||
|
"""Return the vLLM official recipe for a HuggingFace repo, if one
|
||||||
|
exists at vllm-project/recipes. `repo` is the full HF id like
|
||||||
|
'MiniMaxAI/MiniMax-M2'. Cached 6h."""
|
||||||
|
import time as _time
|
||||||
|
import httpx as _httpx
|
||||||
|
import yaml as _yaml
|
||||||
|
|
||||||
|
TTL = 6 * 3600.0
|
||||||
|
now = _time.time()
|
||||||
|
repo = (repo or "").strip().strip("/")
|
||||||
|
if "/" not in repo:
|
||||||
|
return {"exists": False, "error": "repo must be <org>/<model>"}
|
||||||
|
|
||||||
|
cached = _vllm_recipe_cache.get(repo)
|
||||||
|
if cached and not refresh and (now - cached[0]) < TTL:
|
||||||
|
return cached[1] or {"exists": False, "cached": True}
|
||||||
|
|
||||||
|
url = (
|
||||||
|
f"https://raw.githubusercontent.com/vllm-project/recipes/"
|
||||||
|
f"main/models/{repo}.yaml"
|
||||||
|
)
|
||||||
|
|
||||||
|
def _fetch_sync() -> tuple[int, str]:
|
||||||
|
try:
|
||||||
|
with _httpx.Client(timeout=8.0, follow_redirects=True) as client:
|
||||||
|
r = client.get(url)
|
||||||
|
return r.status_code, r.text
|
||||||
|
except Exception as e:
|
||||||
|
return 0, f"fetch error: {e}"
|
||||||
|
|
||||||
|
status, text = await asyncio.to_thread(_fetch_sync)
|
||||||
|
if status == 404:
|
||||||
|
_vllm_recipe_cache[repo] = (now, {"exists": False})
|
||||||
|
return {"exists": False}
|
||||||
|
if status != 200:
|
||||||
|
return {"exists": False, "error": f"HTTP {status}", "transient": True}
|
||||||
|
|
||||||
|
try:
|
||||||
|
doc = _yaml.safe_load(text) or {}
|
||||||
|
except Exception as e:
|
||||||
|
return {"exists": False, "error": f"yaml parse: {e}"}
|
||||||
|
|
||||||
|
meta = doc.get("meta") or {}
|
||||||
|
model = doc.get("model") or {}
|
||||||
|
features = doc.get("features") or {}
|
||||||
|
deps = doc.get("dependencies") or []
|
||||||
|
variants = doc.get("variants") or {}
|
||||||
|
hw_overrides = doc.get("hardware_overrides") or {}
|
||||||
|
strat_overrides = doc.get("strategy_overrides") or {}
|
||||||
|
|
||||||
|
# Tool-call + reasoning parsers, as flat arg arrays, so the frontend
|
||||||
|
# can drop them straight into the launch command.
|
||||||
|
tool_calling = features.get("tool_calling") or {}
|
||||||
|
reasoning = features.get("reasoning") or {}
|
||||||
|
|
||||||
|
normalized = {
|
||||||
|
"exists": True,
|
||||||
|
"source_url": url,
|
||||||
|
"title": meta.get("title") or "",
|
||||||
|
"provider": meta.get("provider") or "",
|
||||||
|
"description": meta.get("description") or "",
|
||||||
|
"date_updated": str(meta.get("date_updated") or ""),
|
||||||
|
"hardware_support": meta.get("hardware") or {},
|
||||||
|
"model_id": model.get("model_id") or repo,
|
||||||
|
"min_vllm_version": model.get("min_vllm_version") or "",
|
||||||
|
"architecture": model.get("architecture") or "",
|
||||||
|
"parameter_count": model.get("parameter_count") or "",
|
||||||
|
"active_parameters": model.get("active_parameters") or "",
|
||||||
|
"context_length": model.get("context_length") or 0,
|
||||||
|
"base_args": list(model.get("base_args") or []),
|
||||||
|
"base_env": dict(model.get("base_env") or {}),
|
||||||
|
"tool_calling": {
|
||||||
|
"description": tool_calling.get("description") or "",
|
||||||
|
"args": list(tool_calling.get("args") or []),
|
||||||
|
} if tool_calling else None,
|
||||||
|
"reasoning": {
|
||||||
|
"description": reasoning.get("description") or "",
|
||||||
|
"args": list(reasoning.get("args") or []),
|
||||||
|
} if reasoning else None,
|
||||||
|
"dependencies": [
|
||||||
|
{
|
||||||
|
"note": (d.get("note") or "").strip(),
|
||||||
|
"command": (d.get("command") or "").strip(),
|
||||||
|
"optional": bool(d.get("optional", False)),
|
||||||
|
}
|
||||||
|
for d in deps if isinstance(d, dict)
|
||||||
|
],
|
||||||
|
"variants": {
|
||||||
|
k: {
|
||||||
|
"model_id": v.get("model_id") or model.get("model_id") or repo,
|
||||||
|
"precision": v.get("precision") or "",
|
||||||
|
"vram_minimum_gb": v.get("vram_minimum_gb") or 0,
|
||||||
|
"description": v.get("description") or "",
|
||||||
|
"extra_args": list(v.get("extra_args") or []),
|
||||||
|
"extra_env": dict(v.get("extra_env") or {}),
|
||||||
|
}
|
||||||
|
for k, v in variants.items() if isinstance(v, dict)
|
||||||
|
},
|
||||||
|
"hardware_overrides": {
|
||||||
|
hw: {
|
||||||
|
"extra_args": list((ov or {}).get("extra_args") or []),
|
||||||
|
"extra_env": dict((ov or {}).get("extra_env") or {}),
|
||||||
|
}
|
||||||
|
for hw, ov in hw_overrides.items() if isinstance(ov, dict)
|
||||||
|
},
|
||||||
|
"strategy_overrides": {
|
||||||
|
strat: dict(ov or {})
|
||||||
|
for strat, ov in strat_overrides.items() if isinstance(ov, dict)
|
||||||
|
},
|
||||||
|
"compatible_strategies": list(doc.get("compatible_strategies") or []),
|
||||||
|
}
|
||||||
|
_vllm_recipe_cache[repo] = (now, normalized)
|
||||||
|
return normalized
|
||||||
|
|
||||||
@router.get("/api/cookbook/tasks/status")
|
@router.get("/api/cookbook/tasks/status")
|
||||||
async def cookbook_tasks_status(request: Request):
|
async def cookbook_tasks_status(request: Request):
|
||||||
"""Check status of all active cookbook tmux sessions.
|
"""Check status of all active cookbook tmux sessions.
|
||||||
|
|||||||
Reference in New Issue
Block a user