Cookbook/Dependencies: per-backend recipe panel (vllm/sglang/llama_cpp)

Each row for vllm, sglang, llama_cpp now carries an expand caret that opens an inline recipe panel below the row. The panel has: - 'Serving which model?' select populated from a new tiny catalog - <pre> code block showing the exact shell sequence for that pair - Copy: clipboard the commands - Run: launch the joined 'cmd1 && cmd2 && …' as a tmux task on the currently-selected deps server (same plumbing as Install) New file: src/static/js/cookbook-deps-recipes.js — single source of truth for the recipes. Seeded with MiniMax M2/M2.7 + a generic fallback for each backend (all three use 'uv venv → source .venv/bin/activate → uv pip install ... --torch-backend auto', the recipe the user pasted). Adding model-specific recipes is now a one-entry edit. Next commit: Launch-tab pre-flight that intercepts the serve click when the backend isn't installed and deep-links into this panel.
2026-06-17 10:15:27 -04:00 · 2026-06-14 22:33:49 +09:00
parent 781a3ee829
commit 600fa6be8a
2 changed files with 200 additions and 2 deletions
@@ -0,0 +1,85 @@
+// Per-backend × per-model install recipes for the Dependencies tab.
+//
+// Each entry says: when you're about to serve `model` on `backend`, here's
+// the exact shell sequence to make the venv + install the right packages.
+// Entries are matched first-hit; put the more specific patterns ABOVE the
+// generic fallback for that backend.
+
+const _RECIPES = [
+  // ── vllm ──────────────────────────────────────────────────────────────
+  // MiniMax M2/M2.7 — same generic vllm install for now; kept as its own
+  // entry so future model-specific patches (FP8 quants, custom kernels)
+  // land in one obvious place without touching the catch-all.
+  {
+    backend: 'vllm',
+    label: 'MiniMax M2 / M2.7',
+    match: (m) => /minimax[-_]?m\s?2(\.7)?/i.test(m || ''),
+    commands: [
+      'uv venv',
+      'source .venv/bin/activate',
+      'uv pip install -U vllm --torch-backend auto',
+    ],
+  },
+  // Generic vllm fallback — auto-resolves the right torch backend (CUDA
+  // 12.x / 12.4 / ROCm) at install time so users don't have to know.
+  {
+    backend: 'vllm',
+    label: 'Any vLLM model',
+    match: () => true,
+    commands: [
+      'uv venv',
+      'source .venv/bin/activate',
+      'uv pip install -U vllm --torch-backend auto',
+    ],
+  },
+
+  // ── sglang ────────────────────────────────────────────────────────────
+  {
+    backend: 'sglang',
+    label: 'Any SGLang model',
+    match: () => true,
+    commands: [
+      'uv venv',
+      'source .venv/bin/activate',
+      'uv pip install -U "sglang[all]" --torch-backend auto',
+    ],
+  },
+
+  // ── llama.cpp ─────────────────────────────────────────────────────────
+  // The cookbook-side rebuild path covers this for users who already have
+  // the engine compiled — but for a fresh box, surface a sane install.
+  {
+    backend: 'llama_cpp',
+    label: 'Any GGUF model',
+    match: () => true,
+    commands: [
+      'uv venv',
+      'source .venv/bin/activate',
+      'CMAKE_ARGS="-DGGML_CUDA=on" uv pip install -U "llama-cpp-python[server]"',
+    ],
+  },
+];
+
+// Backends we surface a recipe panel for. Other rows in the Dependencies
+// list keep the existing flat Install/Reinstall button without an expand
+// affordance.
+export const RECIPE_BACKENDS = new Set(['vllm', 'sglang', 'llama_cpp']);
+
+// All recipe entries for a given backend, in catalog order. The first one
+// is the model-specific match (when present); the last is always the
+// generic fallback.
+export function recipesForBackend(backend) {
+  return _RECIPES.filter((r) => r.backend === backend);
+}
+
+// Pick the best recipe for a backend + model id. Returns the catalog
+// fallback when nothing more specific matches, or null if the backend
+// isn't in the catalog at all.
+export function pickRecipe(backend, modelId) {
+  const candidates = recipesForBackend(backend);
+  if (!candidates.length) return null;
+  for (const r of candidates) {
+    try { if (r.match(modelId)) return r; } catch (_) {}
+  }
+  return candidates[candidates.length - 1] || null;
+}