diff --git a/static/js/cookbook-hwfit.js b/static/js/cookbook-hwfit.js index 753009dc5..164febae3 100644 --- a/static/js/cookbook-hwfit.js +++ b/static/js/cookbook-hwfit.js @@ -127,7 +127,12 @@ export function _renderGpuToggles(system) { _gpuToggleTotal = 0; return; } - if (!_gpuToggleTotal) _gpuToggleTotal = total; + // Update on every scan that returns a positive total — previously this + // only set on the first scan, so switching servers (e.g. local 1-GPU + // first, then a 4-GPU remote) left the Run-panel GPU buttons stuck on + // the original count. Zero/missing totals still don't clobber a known + // good value (avoids flicker during an in-flight re-probe). + if (total > 0) _gpuToggleTotal = total; container._groups = groups; if (container._activeGroup === undefined) container._activeGroup = 0; // auto = largest pool @@ -1209,6 +1214,34 @@ function _syncHostFromScanDropdown() { return host; } +// Map the detected GPU + the model's quant to SGLang's URL-hash params so +// the cookbook page lands on the right preset. SGLang supports: +// hw = b200 | b300 | gb200 | gb300 | mi300x | mi325x | mi350x | mi355x | h200 +// quant = mxfp8 | bf16 +// variant = default strategy = balanced nodes = single +// We only set what we can confidently infer; anything missing degrades to +// SGLang's own default (which is `h200` + bf16 single-node balanced). +function _sglangHashFor(modelData) { + const sys = (typeof _hwfitCache !== 'undefined' ? _hwfitCache?.system : null) || {}; + const gpuName = String(sys.gpu_name || '').toLowerCase(); + let hw = ''; + if (/\bgb300/.test(gpuName)) hw = 'gb300'; + else if (/\bgb200/.test(gpuName)) hw = 'gb200'; + else if (/\bb300/.test(gpuName)) hw = 'b300'; + else if (/\bb200/.test(gpuName)) hw = 'b200'; + else if (/\bh200/.test(gpuName)) hw = 'h200'; + else if (/mi355/.test(gpuName)) hw = 'mi355x'; + else if (/mi350/.test(gpuName)) hw = 'mi350x'; + else if (/mi325/.test(gpuName)) hw = 'mi325x'; + else if (/mi300/.test(gpuName)) hw = 'mi300x'; + const qRaw = String(modelData?.quant || '').toLowerCase(); + // mxfp8 covers fp8 / mxfp8 / nvfp4; bf16 covers everything else cheap. + const quant = /fp8|mxfp|nvfp/.test(qRaw) ? 'mxfp8' : 'bf16'; + const parts = ['variant=default', `quant=${quant}`, 'strategy=balanced', 'nodes=single']; + if (hw) parts.unshift(`hw=${hw}`); + return '#' + parts.join('&'); +} + export function _expandModelRow(row, modelData) { const list = row.closest('.hwfit-list'); if (!list) return; @@ -1231,11 +1264,23 @@ export function _expandModelRow(row, modelData) { const dlSource = _downloadSourceRepo(modelData, backend); const hfUrl = `https://huggingface.co/${dlSource.repo}`; + // Official vendor recipe deep-links. These point to vLLM / SGLang's curated + // hardware-specific launch-command pages. They 404 for uncatalogued models \u2014 + // a known tradeoff; user just gets the vendor's "model not found" page. + const _recipeRepo = modelData.name || ''; + const _vllmUrl = _recipeRepo ? `https://recipes.vllm.ai/${_recipeRepo}` : ''; + const _sglangUrl = _recipeRepo ? `https://docs.sglang.io/cookbook/autoregressive/${_recipeRepo}${_sglangHashFor(modelData)}` : ''; let html = `
`; html += `
`; html += `${esc(modelData.name)}${dlSource.kind ? ` (${esc(dlSource.kind)} ${esc(modelData.quant || '')})` : (modelData.quant_repo ? ` (${esc(modelData.quant)})` : '')}`; html += `${esc(label)}`; html += `HF \u2197`; + if (backend === 'vllm' && _vllmUrl) { + html += `vLLM \u2197`; + } + if (backend === 'sglang' && _sglangUrl) { + html += `SGLang \u2197`; + } html += `
`; html += `
`; html += ``;