Files
odysseus/static/js/cookbook-deps-recipes.js
T
pewdiepie-archdaemon 600fa6be8a Cookbook/Dependencies: per-backend recipe panel (vllm/sglang/llama_cpp)
Each row for vllm, sglang, llama_cpp now carries an expand caret that
opens an inline recipe panel below the row. The panel has:
  - 'Serving which model?' select populated from a new tiny catalog
  - <pre> code block showing the exact shell sequence for that pair
  - Copy: clipboard the commands
  - Run: launch the joined 'cmd1 && cmd2 && …' as a tmux task on the
    currently-selected deps server (same plumbing as Install)

New file: src/static/js/cookbook-deps-recipes.js — single source of
truth for the recipes. Seeded with MiniMax M2/M2.7 + a generic fallback
for each backend (all three use 'uv venv → source .venv/bin/activate
→ uv pip install ... --torch-backend auto', the recipe the user
pasted). Adding model-specific recipes is now a one-entry edit.

Next commit: Launch-tab pre-flight that intercepts the serve click
when the backend isn't installed and deep-links into this panel.
2026-06-14 22:33:49 +09:00

86 lines
3.3 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Per-backend × per-model install recipes for the Dependencies tab.
//
// Each entry says: when you're about to serve `model` on `backend`, here's
// the exact shell sequence to make the venv + install the right packages.
// Entries are matched first-hit; put the more specific patterns ABOVE the
// generic fallback for that backend.
const _RECIPES = [
// ── vllm ──────────────────────────────────────────────────────────────
// MiniMax M2/M2.7 — same generic vllm install for now; kept as its own
// entry so future model-specific patches (FP8 quants, custom kernels)
// land in one obvious place without touching the catch-all.
{
backend: 'vllm',
label: 'MiniMax M2 / M2.7',
match: (m) => /minimax[-_]?m\s?2(\.7)?/i.test(m || ''),
commands: [
'uv venv',
'source .venv/bin/activate',
'uv pip install -U vllm --torch-backend auto',
],
},
// Generic vllm fallback — auto-resolves the right torch backend (CUDA
// 12.x / 12.4 / ROCm) at install time so users don't have to know.
{
backend: 'vllm',
label: 'Any vLLM model',
match: () => true,
commands: [
'uv venv',
'source .venv/bin/activate',
'uv pip install -U vllm --torch-backend auto',
],
},
// ── sglang ────────────────────────────────────────────────────────────
{
backend: 'sglang',
label: 'Any SGLang model',
match: () => true,
commands: [
'uv venv',
'source .venv/bin/activate',
'uv pip install -U "sglang[all]" --torch-backend auto',
],
},
// ── llama.cpp ─────────────────────────────────────────────────────────
// The cookbook-side rebuild path covers this for users who already have
// the engine compiled — but for a fresh box, surface a sane install.
{
backend: 'llama_cpp',
label: 'Any GGUF model',
match: () => true,
commands: [
'uv venv',
'source .venv/bin/activate',
'CMAKE_ARGS="-DGGML_CUDA=on" uv pip install -U "llama-cpp-python[server]"',
],
},
];
// Backends we surface a recipe panel for. Other rows in the Dependencies
// list keep the existing flat Install/Reinstall button without an expand
// affordance.
export const RECIPE_BACKENDS = new Set(['vllm', 'sglang', 'llama_cpp']);
// All recipe entries for a given backend, in catalog order. The first one
// is the model-specific match (when present); the last is always the
// generic fallback.
export function recipesForBackend(backend) {
return _RECIPES.filter((r) => r.backend === backend);
}
// Pick the best recipe for a backend + model id. Returns the catalog
// fallback when nothing more specific matches, or null if the backend
// isn't in the catalog at all.
export function pickRecipe(backend, modelId) {
const candidates = recipesForBackend(backend);
if (!candidates.length) return null;
for (const r of candidates) {
try { if (r.match(modelId)) return r; } catch (_) {}
}
return candidates[candidates.length - 1] || null;
}