Cookbook/Serve: 'Install in Dependencies →' link in the runtime readiness note

When the backend (vllm / sglang / llama_cpp / diffusers) is missing on the chosen serve target, the runtime-readiness note already flips red and reads '<backend> missing on <host>.' but offered no fix path. Append an accent-coloured link that calls openCookbookDependencies with expandRecipe + the model's repo id, so one click switches to the Dependencies tab, expands the right backend row's recipe panel, and pre-selects the model so the user just hits Run.
2026-06-15 09:15:29 -04:00 · 2026-06-14 22:57:43 +09:00
parent 768bcb565a
commit 1fcec32a3c
1 changed files with 67 additions and 35 deletions
@@ -9,6 +9,7 @@ import spinnerModule from './spinner.js';
 import { providerLogo } from './providers.js';
 import { modelColor } from './chatRenderer.js';
 import { bindMenuDismiss, dismissOrRemove } from './escMenuStack.js';
+import { openCookbookDependencies } from './cookbook-diagnosis.js';

 // Shared state/functions injected by init()
 let _envState;
@@ -579,10 +580,15 @@ function _rerenderCachedModels() {
      const _arrowTitle = _modelPresets.length > 0
        ? `${_modelPresets.length} saved launch config${_modelPresets.length === 1 ? '' : 's'} for ${_repoShort} — click ▾ to load or delete`
        : `No saved launch configs for ${_repoShort} yet — click Save to add one`;
-      let _slotsHtml = `<div class="cookbook-serve-slots cookbook-saved-split">`
-        + `<button type="button" class="cookbook-slot-btn cookbook-saved-save" title="Save current config"><svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M19 21H5a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h11l5 5v11a2 2 0 0 1-2 2z"/><polyline points="17 21 17 13 7 13 7 21"/><polyline points="7 3 7 8 15 8"/></svg>Settings</button>`
+      // Wrap the Save split in a <label> so it picks up the same "field
+      // title + ?-help" treatment as Backend / venv / Port / GPUs sitting
+      // beside it in Row 1. Button text is "Save" (the action), label is
+      // "Settings" (what the saved blob represents).
+      let _slotsHtml = `<label>${_l('Settings','Saved launch configurations for this model — click ▾ to load or delete')}`
+        + `<div class="cookbook-serve-slots cookbook-saved-split">`
+        + `<button type="button" class="cookbook-slot-btn cookbook-saved-save" title="Save current config"><svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M19 21H5a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h11l5 5v11a2 2 0 0 1-2 2z"/><polyline points="17 21 17 13 7 13 7 21"/><polyline points="7 3 7 8 15 8"/></svg>Save</button>`
        + `<button type="button" class="cookbook-slot-btn cookbook-saved-arrow" title="${esc(_arrowTitle)}">${_arrowLabel}</button>`
-        + `</div>`;
+        + `</div></label>`;

      let panelHtml = `<div class="hwfit-serve-panel">`;
      // Runtime-readiness note pinned at the top of the serve area so the
@@ -617,6 +623,10 @@ function _rerenderCachedModels() {
      panelHtml += `<label>${_l('Backend','Inference engine: vLLM, SGLang, llama.cpp, Ollama, or Diffusers')}<div class="hwfit-backend-picker" data-backend-picker style="position:relative;width:100%;"><select class="hwfit-sf hwfit-backend-source" data-field="backend" style="display:none;">${backendOpts}</select><button type="button" class="hwfit-backend-btn" data-backend-btn aria-haspopup="listbox" aria-expanded="false" style="display:flex;align-items:center;gap:6px;width:100%;height:28px;padding:0 8px;background:var(--bg);color:var(--fg);border:1px solid var(--border);border-radius:4px;font:inherit;font-size:11px;cursor:pointer;text-align:left;"><span class="hwfit-backend-btn-icon" data-backend-icon-slot aria-hidden="true" style="display:inline-flex;align-items:center;justify-content:center;width:16px;height:16px;color:var(--accent, var(--red));flex-shrink:0;"></span><span class="hwfit-backend-btn-label" data-backend-label style="flex:1;min-width:0;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;"></span><svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true" style="opacity:0.6;flex-shrink:0;"><polyline points="6 9 12 15 18 9"/></svg></button><div class="hwfit-backend-menu" data-backend-menu role="listbox" hidden style="position:absolute;top:calc(100% + 4px);left:0;right:0;z-index:100;background:var(--panel, var(--bg));border:1px solid var(--border);border-radius:6px;box-shadow:0 6px 20px rgba(0,0,0,0.22);padding:4px;"></div></div></label>`;
      panelHtml += `<input type="hidden" class="hwfit-sf" data-field="host" value="${esc(_es.remoteHost || '')}" />`;
      panelHtml += `<label>${_l('venv','Path to Python venv or conda env activate script')}<input type="text" class="hwfit-sf hwfit-sf-wide" data-field="venv" value="${esc(sv('venv', _es.envPath || _srvVenv || ''))}" placeholder="~/venv" /></label>`;
+      // Dtype lives in Row 1 (next to venv) — it's the first knob people
+      // change when matching the model to the box, so it earns top-row
+      // real estate over Row 2's launch-tuning controls.
+      panelHtml += `<label>${_l('Dtype','Data type for weights. auto picks best for GPU')}<select class="hwfit-sf" data-field="dtype">${dtypeOpts}</select></label>`;
      const defaultPort = defaultBackend === 'ollama' ? '11434' : _nextAvailablePort();
      panelHtml += `<label>${_l('Port','HTTP port for the API server')}<input type="text" class="hwfit-sf" data-field="port" value="${esc(sv('port', defaultPort))}" /></label>`;
      const _activeGpus = (defaultGpus || '').split(',').map(s => s.trim()).filter(Boolean);
@@ -627,9 +637,13 @@ function _rerenderCachedModels() {
        const on = _activeGpus.includes(String(i));
        _gpuBtnsHtml += `<button type="button" class="cookbook-gpu-btn${on ? ' active' : ''}" data-gpu="${i}">${i}</button>`;
      }
-      panelHtml += `<label>${_l('GPUs','Toggle which GPUs to use')}<div class="cookbook-gpu-group">${_gpuBtnsHtml}</div><input type="hidden" class="hwfit-sf" data-field="gpus" value="${esc(defaultGpus)}" /></label>`;
-      // Save / saved-configs split button — moved into Row 1 (next to GPUs)
-      // so it shares the same baseline as the rest of the top controls.
+      // GPUs button strip moved to Row 2 (next to GPU Mem) below. 4px
+      // margin on the left, 8px on the right — extra 4px right-side gap
+      // separates the GPU chiclets from the GPU Mem field that follows
+      // (asked-for breathing room; 4px on either side felt cramped on
+      // the GPU-Mem boundary).
+      const _gpusLabelHtml = `<label class="hwfit-gpus-label" style="margin:0 8px 0 4px;">${_l('GPUs','Toggle which GPUs to use')}<div class="cookbook-gpu-group">${_gpuBtnsHtml}</div><input type="hidden" class="hwfit-sf" data-field="gpus" value="${esc(defaultGpus)}" /></label>`;
+      // Save / saved-configs split button — sits at the right end of Row 1.
      panelHtml += _slotsHtml;
      panelHtml += `</div>`;
      // (hwfit-serve-runtime-note moved to the top of the panel — see above.)
@@ -650,17 +664,21 @@ function _rerenderCachedModels() {
      // (Swap, KV Cache, Attention backend, Env vars, llama.cpp batch/ubatch)
      // moved to the Advanced fold below to keep this row scannable.
      panelHtml += `<div class="hwfit-serve-row hwfit-serve-row-core hwfit-backend-vllm hwfit-backend-sglang hwfit-backend-llamacpp hwfit-backend-ollama">`;
-      // Order: Dtype → TP → Context → GPU → GPU Mem → Max Seqs.
-      // Dtype moved left of TP at user's request — it's the first knob
-      // people typically check when matching the model to the box.
-      panelHtml += `<label>${_l('Dtype','Data type for weights. auto picks best for GPU')}<select class="hwfit-sf" data-field="dtype">${dtypeOpts}</select></label>`;
+      // Order: TP → Context → Max Seqs → GPUs → GPU Mem.
+      // Dtype moved up to Row 1. GPUs moved here next to GPU Mem so the
+      // "which devices + how much of them" decisions sit adjacent. Max
+      // Seqs follows Context per the "request-shape" cluster.
      panelHtml += `<label class="hwfit-backend-vllm hwfit-backend-sglang">${_l('TP','Tensor Parallelism — split model across N GPUs')}<select class="hwfit-sf" data-field="tp">${tpOpts}</select></label>`;
      // ctx resets to the model's max on every panel open (the real ctx slider
      // lives in the Scan/Download toolbar — see cookbook.js .hwfit-ctx-control).
      panelHtml += `<label>${_l('Context','Max tokens per request — resets to the model max on every open. Lower = less VRAM')}<input type="text" class="hwfit-sf" data-field="ctx" value="${esc(m.context_length || m.context || '20000')}" /></label>`;
-      panelHtml += `<label>${_l('GPU','Which GPU to use. Leave empty for default')}<input type="text" class="hwfit-sf" data-field="gpu_id" value="${esc(sv('gpu_id', ''))}" placeholder="auto" style="width:50px;" /></label>`;
-      panelHtml += `<label class="hwfit-backend-vllm hwfit-backend-sglang">${_l('GPU Mem','Fraction of GPU memory (0.0–1.0). Lower if OOM')}<input type="text" class="hwfit-sf" data-field="gpu_mem" value="${esc(sv('gpu_mem', '0.90'))}" /></label>`;
      panelHtml += `<label class="hwfit-backend-vllm hwfit-backend-sglang">${_l('Max Seqs','Maximum concurrent requests. Lower = less memory. Default 4 — prosumer GPUs often OOM on vLLM default 256 during CUDA graph capture.')}<input type="text" class="hwfit-sf" data-field="max_seqs" value="${esc(sv('max_seqs', '4'))}" placeholder="4" /></label>`;
+      // GPU "auto" field removed — the GPU button strip below already
+      // writes data-field="gpus" (the canonical comma-separated device
+      // list) and the command builders now read from that single source.
+      panelHtml += `<label class="hwfit-backend-vllm hwfit-backend-sglang">${_l('GPU Mem','Fraction of GPU memory (0.0–1.0). Lower if OOM')}<input type="text" class="hwfit-sf" data-field="gpu_mem" value="${esc(sv('gpu_mem', '0.90'))}" /></label>`;
+      // GPUs button strip at the far right of Row 2.
+      panelHtml += _gpusLabelHtml;
      panelHtml += `</div>`;
      // ── Advanced (collapsed by default) ──
      // Everything below the fold is tuning users only touch occasionally:
@@ -688,7 +706,10 @@ function _rerenderCachedModels() {
      // tuning, or any other KEY=VALUE pair that doesn't have a dedicated
      // field. After the venv activate runs, $VIRTUAL_ENV / $PATH / etc. are
      // already exported so they expand correctly here.
-      panelHtml += `<label class="hwfit-backend-vllm hwfit-backend-sglang" style="flex:1 1 100%;">${_l('Env','Extra KEY=VALUE env-var pairs prepended to the launch (space-separated). Example: CUDACXX=$VIRTUAL_ENV/lib/python3.10/site-packages/nvidia/cuda_nvcc/bin/nvcc — points flashinfer at the venv-bundled nvcc when the system one is too old for your GPU.')}<input type="text" class="hwfit-sf" data-field="extra_env" value="${esc(sv('extra_env',''))}" placeholder="CUDACXX=/path/to/nvcc NCCL_P2P_DISABLE=1" style="width:100%;" /></label>`;
+      // grid-column: 1 / -1 makes Env span every column of the Advanced
+      // row's CSS grid (the old flex:1 1 100% did nothing in a grid
+      // container — left an empty trailing column gap on wide modals).
+      panelHtml += `<label class="hwfit-backend-vllm hwfit-backend-sglang" style="grid-column:1 / -1;">${_l('Env','Extra KEY=VALUE env-var pairs prepended to the launch (space-separated). Example: CUDACXX=$VIRTUAL_ENV/lib/python3.10/site-packages/nvidia/cuda_nvcc/bin/nvcc — points flashinfer at the venv-bundled nvcc when the system one is too old for your GPU.')}<input type="text" class="hwfit-sf" data-field="extra_env" value="${esc(sv('extra_env',''))}" placeholder="CUDACXX=/path/to/nvcc NCCL_P2P_DISABLE=1" style="width:100%;" /></label>`;
      panelHtml += `</div>`;
      // Advanced llama.cpp row (Batch / UBatch — moved out of Core for the
      // same "rarely touched" reason as the vLLM extras above).
@@ -722,6 +743,21 @@ function _rerenderCachedModels() {
      if (_rp_name) panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm"><input type="checkbox" class="hwfit-sf" data-field="reasoning_parser" data-parser="${_rp_name}" /> Reasoning Parser <span class="hwfit-parser-tag">${_rp_name}</span></label>`;
      panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="enforce_eager"${sv('enforce_eager',false)?' checked':''} /> Enforce Eager${_h('Disable CUDA graphs. Slower but uses less memory')}</label>`;
      panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="prefix_cache"${sv('prefix_cache',false)?' checked':''} /> Prefix Caching${_h('Cache shared prompt prefixes across requests')}</label>`;
+      // Inline the previously-second vLLM checks row so Expert Parallel /
+      // Speculative / MoE Env sit next to Prefix Caching with no gap. All
+      // three are vLLM-only — class-gated so they hide on SGLang.
+      if (_opts2_row3.flags.includes('--enable-expert-parallel')) panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm"><input type="checkbox" class="hwfit-sf" data-field="expert_parallel" /> Expert Parallel</label>`;
+      {
+        const _specDef = _opts2_row3.spec || { method: 'mtp', tokens: 3 };
+        const _specMethod = sv('spec_method', _specDef.method);
+        const _specTokens = sv('spec_tokens', String(_specDef.tokens));
+        const _specMethods = ['mtp', 'qwen3_next_mtp', 'eagle', 'medusa', 'ngram'];
+        if (!_specMethods.includes(_specMethod)) _specMethods.unshift(_specMethod);
+        const _specOpts = _specMethods.map(m =>
+          `<option value="${m}"${m === _specMethod ? ' selected' : ''}>${m}</option>`).join('');
+        panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm hwfit-spec-group"><input type="checkbox" class="hwfit-sf" data-field="speculative" /> Speculative <select class="hwfit-sf hwfit-spec-method" data-field="spec_method" title="vLLM --speculative-config method">${_specOpts}</select><input type="number" class="hwfit-sf hwfit-spec-tokens hwfit-spec-tokens-bare" data-field="spec_tokens" value="${esc(_specTokens)}" min="1" max="10" title="num_speculative_tokens" style="width:44px;" /><span class="hwfit-help-chip hwfit-help-chip-inline" title="MTP / speculative decoding is supported on a few model families only — turn it on when the model card explicitly recommends it. On supported models it can boost inference throughput up to ~3×; on unsupported models it will either be ignored or fail to launch." style="margin-left:6px;">?</span></label>`;
+      }
+      if (_opts2_row3.envVars.length) panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm"><input type="checkbox" class="hwfit-sf" data-field="moe_env" /> MoE Env Vars</label>`;
      panelHtml += `</div>`;
      // Row 2c: llama.cpp fit/perf flags (set by Auto profiles, editable by hand)
      const _kvOpts = ['', 'q4_0', 'q8_0', 'f16'].map(k => `<option value="${k}"${sv('cache_type','')===k?' selected':''}>${k||'default'}</option>`).join('');
@@ -774,28 +810,8 @@ function _rerenderCachedModels() {
      // vLLM backend so the Speculative (MTP) control is ALWAYS reachable —
      // even for models the auto-detector doesn't recognize. Expert-parallel,
      // reasoning-parser and MoE-env still only appear when auto-detected.
-      const _opts2 = _detectModelOptimizations(repo);
-      panelHtml += `<div class="hwfit-serve-checks hwfit-backend-vllm">`;
-      if (_opts2.flags.includes('--enable-expert-parallel')) panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="expert_parallel" /> Expert Parallel</label>`;
-      // Reasoning Parser moved to Row 3 (inline with Trust Remote / Auto
-      // Tool) so the per-model toggles sit together — the duplicate that
-      // lived here previously left two copies of the same checkbox.
-      {
-        // Speculative decoding (vLLM --speculative-config). Default OFF; the
-        // method/token defaults come from auto-detection when available,
-        // else fall back to MTP/3. Toggling the checkbox is what actually
-        // adds the flag at launch (see cookbook.js command builder).
-        const _specDef = _opts2.spec || { method: 'mtp', tokens: 3 };
-        const _specMethod = sv('spec_method', _specDef.method);
-        const _specTokens = sv('spec_tokens', String(_specDef.tokens));
-        const _specMethods = ['mtp', 'qwen3_next_mtp', 'eagle', 'medusa', 'ngram'];
-        if (!_specMethods.includes(_specMethod)) _specMethods.unshift(_specMethod);
-        const _specOpts = _specMethods.map(m =>
-          `<option value="${m}"${m === _specMethod ? ' selected' : ''}>${m}</option>`).join('');
-        panelHtml += `<label class="hwfit-sf-cb hwfit-spec-group"><input type="checkbox" class="hwfit-sf" data-field="speculative" /> Speculative <select class="hwfit-sf hwfit-spec-method" data-field="spec_method" title="vLLM --speculative-config method">${_specOpts}</select><input type="number" class="hwfit-sf hwfit-spec-tokens hwfit-spec-tokens-bare" data-field="spec_tokens" value="${esc(_specTokens)}" min="1" max="10" title="num_speculative_tokens" style="width:44px;" /><span class="hwfit-help-chip hwfit-help-chip-inline" title="MTP / speculative decoding is supported on a few model families only — turn it on when the model card explicitly recommends it. On supported models it can boost inference throughput up to ~3×; on unsupported models it will either be ignored or fail to launch." style="margin-left:6px;">?</span></label>`;
-      }
-      if (_opts2.envVars.length) panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="moe_env" /> MoE Env Vars</label>`;
-      panelHtml += `</div>`;
+      // Expert Parallel / Speculative / MoE Env moved into Row 3 above so
+      // the vLLM-only toggles sit next to Prefix Caching with no gap.
      // Extra args sits below the vLLM checks (Reasoning Parser + Spec)
      // so it reads as "after the advanced toggles, any other flags".
      panelHtml += `<div class="hwfit-serve-extra">`;
@@ -1143,6 +1159,22 @@ function _rerenderCachedModels() {
            note.style.color = 'var(--red)';
            note.style.borderColor = 'color-mix(in srgb, var(--red) 40%, transparent)';
            note.style.background = 'color-mix(in srgb, var(--red) 8%, transparent)';
+            // Append an accent-color link straight to the Dependencies
+            // recipe panel for this backend so the user has one click
+            // to the fix instead of hunting for the right row.
+            if (noteText) {
+              const pkgName = pkg?.name || ({ vllm: 'vllm', sglang: 'sglang', llamacpp: 'llama_cpp', diffusers: 'diffusers' }[backend]);
+              const repo = (panel.closest('.doclib-card, .memory-item')?.dataset?.repo) || '';
+              const link = document.createElement('a');
+              link.href = '#';
+              link.textContent = ' Install in Dependencies →';
+              link.style.cssText = 'color:var(--accent, var(--red));text-decoration:underline;font-weight:600;margin-left:4px;';
+              link.addEventListener('click', (ev) => {
+                ev.preventDefault();
+                if (pkgName) openCookbookDependencies(pkgName, { expandRecipe: pkgName, model: repo });
+              });
+              noteText.appendChild(link);
+            }
          } else {
            // Healthy / ready → green so the user reads "good to go" at a
            // glance instead of scanning fg-muted for a state.