// ============================================ // COOKBOOK SERVE SUB-MODULE // Serve tab: cached model list, serve panel building, // command building, preset slots, launch logic // ============================================ import uiModule from './ui.js'; import spinnerModule from './spinner.js'; import { providerLogo } from './providers.js'; import { modelColor } from './chatRenderer.js'; import { bindMenuDismiss, dismissOrRemove } from './escMenuStack.js'; import { openCookbookDependencies } from './cookbook-diagnosis.js'; import { _hwfitCache } from './cookbook-hwfit.js'; // Shared state/functions injected by init() let _envState; let _sshCmd; let _getPort; let _sshPrefix; let _serverByVal; let _serverKey; let _getPlatform; let _isWindows; let _isMetal; let _buildEnvPrefix; let _buildServeCmd; let _shellQuote; let _psQuote; let _detectBackend; let _detectToolParser; let _detectModelOptimizations; let _loadPresets; let _savePresets; let _copyText; let _persistEnvState; let _getGpuToggleTotal; let modelLogo; let esc; let _launchServeTask; let _retryDownload; let _nextAvailablePort; // Storage keys const SERVE_STATE_KEY = 'cookbook-serve-state'; const SERVE_FAVORITES_KEY = 'cookbook-serve-favorite-models'; let _cachedAllModels = []; function _loadServeFavorites() { try { const raw = JSON.parse(localStorage.getItem(SERVE_FAVORITES_KEY) || '[]'); return new Set(Array.isArray(raw) ? raw.filter(Boolean).map(String) : []); } catch { return new Set(); } } function _saveServeFavorites(favorites) { try { localStorage.setItem(SERVE_FAVORITES_KEY, JSON.stringify(Array.from(favorites || []))); } catch {} } function _redactStoredCommand(value) { return String(value || '') .replace(/hf_[A-Za-z0-9]{20,}/g, '[redacted-token]') .replace(/((?:api[_-]?key|token|authorization|password|passwd|secret)\s*[=:]\s*)(["']?)[^\s"']+/gi, '$1$2[redacted]'); } function _redactServeStateForStorage(value) { if (!value || typeof value !== 'object') return value; if (Array.isArray(value)) return value.map(_redactServeStateForStorage); const safe = { ...value }; for (const key of Object.keys(safe)) { if (/token|password|passwd|secret|api[_-]?key/i.test(key)) { delete safe[key]; } else if (typeof safe[key] === 'string' && /cmd|command|args|env/i.test(key)) { safe[key] = _redactStoredCommand(safe[key]); } else if (safe[key] && typeof safe[key] === 'object') { safe[key] = _redactServeStateForStorage(safe[key]); } } return safe; } function _isServeFavorite(repo) { return _loadServeFavorites().has(String(repo || '')); } function _toggleServeFavorite(repo) { const key = String(repo || ''); if (!key) return false; const favorites = _loadServeFavorites(); const next = !favorites.has(key); if (next) favorites.add(key); else favorites.delete(key); _saveServeFavorites(favorites); return next; } function _repoLooksAwqLike(model, repo) { const q = String(model?.quant || '').toUpperCase(); const n = `${repo || ''} ${model?.repo_id || ''} ${model?.name || ''} ${model?.path || ''}`.toLowerCase(); return /^AWQ|^GPTQ/.test(q) || q === 'FP8' || /\b(awq|gptq|fp8)\b/i.test(n); } function _repoLooksGgufLike(model, repo) { const q = String(model?.quant || '').toUpperCase(); const n = `${repo || ''} ${model?.repo_id || ''} ${model?.name || ''} ${model?.path || ''}`.toLowerCase(); const hasGgufFile = Array.isArray(model?.gguf_files) && model.gguf_files.some(f => f && typeof f.rel_path === 'string' && /\.gguf$/i.test(f.rel_path)); return !!model?.is_gguf || hasGgufFile || /^Q[2-8]/.test(q) || /^IQ/.test(q) || q === 'GGUF' || n.includes('gguf'); } function _serveBackendWarning(model, repo, backend, fields = {}) { const awqLike = _repoLooksAwqLike(model, repo); const ggufLike = _repoLooksGgufLike(model, repo); if (awqLike && (backend === 'llamacpp' || backend === 'ollama')) { return { title: 'AWQ needs vLLM or SGLang', body: 'This model looks like AWQ/GPTQ/FP8 safetensors. llama.cpp and Ollama need GGUF files, so this backend cannot serve it. Choose vLLM/SGLang on a CUDA/ROCm GPU server, or download a GGUF version for llama.cpp/Ollama.', }; } if (awqLike && _isMetal() && (backend === 'vllm' || backend === 'sglang')) { return { title: 'AWQ is not a unified-memory path', body: 'This model looks like AWQ/GPTQ/FP8 safetensors. AWQ is for vLLM/SGLang on CUDA/ROCm-style GPU servers, not local unified-memory llama.cpp/Ollama serving. For unified memory, download a GGUF model and use llama.cpp/Ollama.', }; } if (awqLike && fields.unified_mem) { return { title: 'AWQ is not a unified-memory path', body: 'This model looks like AWQ/GPTQ/FP8 safetensors, but unified-memory local serving expects GGUF. Use vLLM/SGLang on a compatible GPU server, or download a GGUF version for llama.cpp/Ollama.', }; } if (ggufLike && (backend === 'vllm' || backend === 'sglang')) { return { title: 'GGUF needs llama.cpp or Ollama', body: 'This model looks like GGUF. vLLM/SGLang expect HuggingFace safetensors-style repos. Choose llama.cpp/Ollama for GGUF, or download a safetensors model for vLLM/SGLang.', }; } return null; } function _hasOwn(obj, key) { return Object.prototype.hasOwnProperty.call(obj || {}, key); } function _allGpuIds(count) { const n = Number(count || 0); if (!Number.isFinite(n) || n <= 0) return ''; return Array.from({ length: Math.floor(n) }, (_, i) => String(i)).join(','); } function _shellSplitForPreview(cmd) { const s = String(cmd || ''); const out = []; let cur = ''; let quote = ''; let escNext = false; for (const ch of s) { if (escNext) { cur += ch; escNext = false; continue; } if (ch === '\\') { cur += ch; escNext = true; continue; } if (quote) { cur += ch; if (ch === quote) quote = ''; continue; } if (ch === '"' || ch === "'") { quote = ch; cur += ch; continue; } if (/\s/.test(ch)) { if (cur) { out.push(cur); cur = ''; } continue; } cur += ch; } if (cur) out.push(cur); return out; } function _formatServeCmdPreview(cmd) { const raw = String(cmd || ''); if (raw.startsWith('MODEL_FILE=$({')) { const marker = /&&\s+([A-Za-z_][A-Za-z0-9_]*=\S+\s+)*(?:[A-Za-z_][A-Za-z0-9_]*=\S+\s+)?(?:llama-server|python3?\s+-m\s+llama_cpp\.server)\b/; const match = raw.match(marker); if (match && match.index > 0) { const prelude = raw.slice(0, match.index).replace(/\s+/g, ' ').trim(); const rest = raw.slice(match.index).replace(/^\s*&&\s*/, ''); return `${prelude}\n&&\n${_formatServeCmdPreview(rest)}`; } } const tokens = _shellSplitForPreview(cmd); if (tokens.length <= 4) return String(cmd || ''); const lines = []; let i = 0; while (i < tokens.length && /^[A-Za-z_][A-Za-z0-9_]*=/.test(tokens[i])) { lines.push(tokens[i]); i++; } if (tokens[i]) { const head = [tokens[i++]]; if (tokens[i] && !tokens[i].startsWith('--') && !/^[A-Za-z_][A-Za-z0-9_]*=/.test(tokens[i])) head.push(tokens[i++]); if (tokens[i] && !tokens[i].startsWith('--') && !/^[A-Za-z_][A-Za-z0-9_]*=/.test(tokens[i])) head.push(tokens[i++]); lines.push(head.join(' ')); } while (i < tokens.length) { const t = tokens[i++]; if (t.startsWith('--')) { const vals = []; while (i < tokens.length && !tokens[i].startsWith('--') && !/^[A-Za-z_][A-Za-z0-9_]*=/.test(tokens[i])) { vals.push(tokens[i++]); } lines.push([t, ...vals].join(' ')); } else { lines.push(t); } } return lines.join('\n'); } function _normalizeServeCmdForLaunch(cmd) { return String(cmd || '') .replace(/MODEL_FILE=\$\(\{\s+/g, 'MODEL_FILE=$({ ') .replace(/\s+\}\s+\|\s+head\s+-1\)/g, ' } | head -1)') .replace(/\s*;\s*/g, '; ') .replace(/\s*\|\|\s*/g, ' __ODY_OR__ ') .replace(/\s*\|\s*/g, ' | ') .replace(/\s+__ODY_OR__\s+/g, ' || ') .replace(/\s+/g, ' ') .trim(); } function _modelSizeGb(model, explicitGb = 0) { const explicit = Number(explicitGb || 0); if (Number.isFinite(explicit) && explicit > 0) return explicit; const bytes = Number(model?.size_bytes || 0); if (Number.isFinite(bytes) && bytes > 0) return bytes / (1024 ** 3); const gb = Number( model?.size_gb || model?.required_gb || model?.vram_needed || model?.min_vram_gb || model?.recommended_ram_gb || model?.min_ram_gb || 0 ); if (Number.isFinite(gb) && gb > 0) return gb; if (_isMiniMaxM3Model(model)) return 240; return 0; } function _parseParamsB(text) { const s = String(text || ''); const m = s.match(/(\d+(?:\.\d+)?)\s*([bBmMtT])\b/); if (!m) return 0; const n = parseFloat(m[1]); if (!Number.isFinite(n) || n <= 0) return 0; const unit = m[2].toLowerCase(); if (unit === 't') return n * 1000; if (unit === 'b') return n; if (unit === 'm') return n / 1000; return 0; } function _knownModelContextMax(model) { if (_isMiniMaxM3Model(model)) return 1048576; return 0; } function _modelIdentityText(model) { return [ model?.repo_id, model?.quant_repo, model?.name, model?.id, model?.path, model?.model_path, model?.served_model_name, model?.quant, model?.format, ].filter(Boolean).join(' ').toLowerCase(); } function _isMiniMaxM3Model(model) { const name = _modelIdentityText(model); return ( (/minimax/.test(name) && /\bm3\b/.test(name)) || /minimax-m3/.test(name) || /models--cyankiwi--minimax-m3-awq-int4/.test(name) || /cyankiwi\/minimax-m3-awq-int4/.test(name) ); } function _isMiniMaxM2Model(model) { const name = _modelIdentityText(model); return /minimax/.test(name) && /\bm2(?:\.\d+)?\b/.test(name); } function _modelContextMaxForServe(model, explicitMax) { const explicit = Number(explicitMax || 0); if (Number.isFinite(explicit) && explicit > 0) return explicit; const known = _knownModelContextMax(model); if (known > 0) return known; for (const key of ['context_length', 'max_position_embeddings', 'n_ctx_train', 'model_max_length', 'max_seq_len']) { const value = Number(model?.[key] || 0); if (Number.isFinite(value) && value > 0) return value; } const catalogCtx = Number(model?.context || 0); if (Number.isFinite(catalogCtx) && catalogCtx > 0) return catalogCtx; return 131072; } function _estimateVllmContextFit(model, fields, modelCtxMax, modelWeightsGb = 0, fitSystem = null) { const sys = fitSystem || _hwfitCache?.system || {}; const isMiniMaxM3 = _isMiniMaxM3Model(model); const gpuIds = String(fields.gpus || '').split(',').map(s => parseInt(s.trim(), 10)).filter(Number.isFinite); const tp = Math.max(1, parseInt(fields.tp, 10) || gpuIds.length || 1); const selectedCount = Math.max(1, gpuIds.length || tp); const groups = Array.isArray(sys.gpu_groups) ? sys.gpu_groups : []; const activeGroup = sys.active_group || groups[0] || null; const perGpuGb = Number(activeGroup?.vram_each) || (Number(sys.gpu_vram_gb) / Math.max(1, Number(sys.gpu_count) || selectedCount)) || 0; if (!perGpuGb) { return { needsHardwareScan: true, reason: 'scan hardware first to estimate context from VRAM' }; } const gpuUtil = Math.min(0.99, Math.max(0.1, parseFloat(fields.gpu_mem) || 0.90)); const budgetGb = perGpuGb * selectedCount * gpuUtil; const modelGb = _modelSizeGb(model, modelWeightsGb); if (!modelGb) return { needsModelSize: true, reason: 'model weight size unknown; scan model files or enter context manually' }; const modelMax = Math.max(1024, _modelContextMaxForServe(model, modelCtxMax)); if (isMiniMaxM3) { const perGpuBudgetGb = perGpuGb * gpuUtil; const modelShardGb = modelGb / Math.max(1, tp); const fixedOverheadGb = Math.max(1.5, perGpuBudgetGb * 0.035); const freeForKv = perGpuBudgetGb - modelShardGb - fixedOverheadGb; const kvGbPerToken = (29.25 / 1048576) * (String(fields.vllm_kv_cache_dtype || '').toLowerCase() === 'fp8' ? 1 : 1.8); if (freeForKv <= 0) { return { ctx: 1024, budgetGb, modelGb, kvGbPerToken, reason: `model shard ${modelShardGb.toFixed(1)}G exceeds per-GPU usable ${perGpuBudgetGb.toFixed(1)}G before KV`, }; } const raw = Math.floor((freeForKv / kvGbPerToken) * 0.99); const rounded = Math.max(1024, Math.floor(raw / 128) * 128); const ctx = Math.min(modelMax, rounded); return { ctx, budgetGb, modelGb, kvGbPerToken, reason: `~${ctx.toLocaleString()} tokens fits per-GPU KV (${freeForKv.toFixed(1)}G free)`, }; } const name = `${model?.repo_id || ''} ${model?.name || ''} ${model?.quant || ''}`; const lower = name.toLowerCase(); const isMoE = /\bmoe\b|a\d+b|minimax|deepseek|mixtral|kimi-k2|glm-4\.5/.test(lower); const totalParams = _parseParamsB(name) || Math.max(1, modelGb / 0.58); const activeFromName = (() => { const m = lower.match(/\ba(\d+(?:\.\d+)?)b\b/); return m ? parseFloat(m[1]) : 0; })(); const activeParams = activeFromName || (isMoE ? Math.min(totalParams, 32) : totalParams); const effectiveActiveParams = (/minimax/.test(lower) && /\bm3\b/.test(lower)) ? 23 : activeParams; const kvDtype = String(fields.vllm_kv_cache_dtype || '').toLowerCase(); const kvFactor = kvDtype === 'fp8' ? 0.55 : 1; const kvGbPerTokenTotal = Math.max(0.00002, 0.000008 * effectiveActiveParams * kvFactor); const kvGbPerToken = kvGbPerTokenTotal / Math.max(1, tp); const perGpuBudgetGb = perGpuGb * gpuUtil; const modelShardGb = modelGb / Math.max(1, tp); const fixedOverheadGb = Math.max(1.5, perGpuBudgetGb * 0.035); const freeForKv = perGpuBudgetGb - modelShardGb - fixedOverheadGb; if (freeForKv <= 0) { return { ctx: 1024, budgetGb, modelGb, kvGbPerToken, reason: `model shard ${modelShardGb.toFixed(1)}G exceeds per-GPU usable ${perGpuBudgetGb.toFixed(1)}G before KV`, }; } const raw = Math.floor(freeForKv / kvGbPerToken); const rounded = Math.max(1024, Math.floor(raw / 1024) * 1024); const ctx = Math.min(modelMax, rounded); return { ctx, budgetGb, modelGb, kvGbPerToken, reason: `~${ctx.toLocaleString()} tokens fits per-GPU KV (${freeForKv.toFixed(1)}G free)`, }; } function _estimateLlamaContextFit(model, fields, modelCtxMax, modelWeightsGb = 0, fitSystem = null, profileData = null) { const profiles = Array.isArray(profileData?.profiles) ? profileData.profiles : []; const preferred = profiles.find(p => String(p?.key || '').toLowerCase() === 'balanced') || profiles.find(p => Number(p?.ctx) > 0) || null; const modelMax = Math.max(1024, _modelContextMaxForServe(model, modelCtxMax)); if (preferred && Number(preferred.ctx) > 0) { const ctx = Math.min(modelMax, Number(preferred.ctx)); return { ctx, reason: `profile ${preferred.label || preferred.key || 'fit'} fits scanned hardware`, }; } const sys = fitSystem || _hwfitCache?.system || {}; const modelGb = _modelSizeGb(model, modelWeightsGb); const backend = String(fields.backend || '').toLowerCase(); const llamaMode = String(fields.llama_mode || '').toLowerCase(); const isCpuMode = backend === 'llamacpp' && llamaMode === 'cpu'; const isUnifiedMode = backend === 'llamacpp' && (llamaMode === 'unified' || fields.unified_mem); if (!modelGb) { return { ctx: Math.min(modelMax, 32768), needsModelSize: true, reason: 'model weight size unknown; using model limit fallback', }; } if (isCpuMode) { return { ctx: Math.min(modelMax, 131072), modelGb, reason: 'CPU mode uses system RAM; capped to trained limit', }; } const gpuIds = String(fields.gpus || '').split(',').map(s => parseInt(s.trim(), 10)).filter(Number.isFinite); const selectedCount = Math.max(1, gpuIds.length || parseInt(fields.tp, 10) || 1); const groups = Array.isArray(sys.gpu_groups) ? sys.gpu_groups : []; const activeGroup = sys.active_group || groups[0] || null; const totalVramGb = Number(activeGroup?.vram_each) ? Number(activeGroup.vram_each) * selectedCount : (Number(sys.gpu_vram_gb) || 0); if (!totalVramGb) { return { ctx: Math.min(modelMax, 32768), modelGb, needsHardwareScan: true, reason: 'scan hardware first; using model limit fallback', }; } const totalRamGb = Number(sys.total_ram_gb) || 0; const availableRamGb = Number(sys.available_ram_gb) || 0; const unifiedPoolGb = isUnifiedMode ? Math.max( totalVramGb, availableRamGb, totalRamGb > 0 ? totalRamGb * 0.85 : 0 ) : totalVramGb; const usableGb = isUnifiedMode ? Math.max(1, unifiedPoolGb - Math.max(2.0, unifiedPoolGb * 0.08)) : Math.max(1, totalVramGb - Math.max(1.0, selectedCount * 0.6)); const freeForKv = usableGb - modelGb; const kv = String(fields.cache_type || '').toLowerCase(); const kvFactor = kv === 'q4_0' ? 0.55 : (kv === 'q8_0' ? 1 : (kv === 'f16' ? 1.9 : 1)); const kvGbPerToken = Math.max(0.00008, (modelGb / 7.5) * 0.0007 * kvFactor); if (freeForKv <= 0) { return { ctx: Math.min(modelMax, 8192), modelGb, kvGbPerToken, reason: `model ${modelGb.toFixed(1)}G exceeds usable ${isUnifiedMode ? 'unified memory' : 'VRAM'} ${usableGb.toFixed(1)}G before KV`, }; } const raw = Math.floor(freeForKv / kvGbPerToken); const rounded = Math.max(1024, Math.floor(raw / 1024) * 1024); const ctx = Math.min(modelMax, rounded); return { ctx, modelGb, kvGbPerToken, reason: `~${ctx.toLocaleString()} tokens fits llama.cpp KV (${freeForKv.toFixed(1)}G free ${isUnifiedMode ? 'unified' : 'VRAM'})`, }; } function _selectedServeTarget(panel) { const select = panel?.querySelector?.('#hwfit-server-select') || document.getElementById('hwfit-server-select') || document.getElementById('hwfit-dl-server'); const servers = Array.isArray(_envState.servers) ? _envState.servers : []; let host = _envState.remoteHost || ''; let server = host ? (_serverByVal?.(_envState.remoteServerKey || host) || servers.find(s => s.host === host)) : null; if (select && select.value != null) { if (select.value === 'local') { host = ''; server = servers.find(s => !s.host || s.host === 'local') || null; } else { const idx = /^\d+$/.test(String(select.value)) ? parseInt(select.value, 10) : -1; server = _serverByVal?.(select.value) || (idx >= 0 ? servers[idx] : null) || null; host = server?.host || ''; } } const venv = panel?.querySelector('[data-field="venv"]')?.value?.trim() || server?.envPath || _envState.envPath || ''; const label = host ? (server?.name ? `${server.name} (${host})` : host) : (server?.name || 'local server'); return { host, serverKey: server ? (_serverKey?.(server) || '') : (select?.value || ''), serverName: server?.name || '', env: server?.env || '', port: host ? (server?.port || _getPort(host) || '') : '', venv, platform: server?.platform || _envState.platform || '', label, }; } function _remoteWindowsDiffusersUnsupported(target) { return !!(target?.host && target?.platform === 'windows'); } function _backendChoicesForTarget(target) { if (target?.platform === 'windows') { if (_remoteWindowsDiffusersUnsupported(target)) return [['llamacpp','llama.cpp']]; return [['llamacpp','llama.cpp'],['diffusers','Diffusers']]; } return _isMetal() ? [['llamacpp','llama.cpp'],['ollama','Ollama']] : [['vllm','vLLM'],['sglang','SGLang'],['llamacpp','llama.cpp'],['ollama','Ollama'],['diffusers','Diffusers']]; } async function _fetchServeRuntimePackage(panel, backend) { const packageByBackend = { vllm: 'vllm', sglang: 'sglang', llamacpp: 'llama_cpp', diffusers: 'diffusers', }; const packageName = packageByBackend[backend]; if (!packageName) return null; const target = _selectedServeTarget(panel); const params = new URLSearchParams(); if (target.host) { params.set('host', target.host); if (target.port) params.set('ssh_port', target.port); if (target.venv) params.set('venv', target.venv); } const res = await fetch('/api/cookbook/packages' + (params.toString() ? '?' + params.toString() : ''), { credentials: 'same-origin' }); if (!res.ok) throw new Error(`HTTP ${res.status}`); const data = await res.json(); const pkg = (data.packages || []).find(p => p.name === packageName); return { pkg, target }; } function _runtimeNoteText(backend, pkg, target) { const labels = { vllm: 'vLLM', sglang: 'SGLang', llamacpp: 'llama.cpp', diffusers: 'Diffusers' }; const label = labels[backend] || backend; if (!pkg) return `${label} readiness unavailable for ${target.label}.`; const note = pkg.status_note || pkg.update_note || ''; if (pkg.installed === null || pkg.probe_error) { return note ? `${label} readiness unavailable for ${target.label}: ${note}` : `${label} readiness unavailable for ${target.label}.`; } if (pkg.installed) { return note ? `${label} ready on ${target.label}: ${note}` : `${label} ready on ${target.label}.`; } return note ? `${label} missing on ${target.label}: ${note}` : `${label} missing on ${target.label}.`; } // ── Filter/sort cached model list ── function _filterCachedList() { const list = document.getElementById('hwfit-cached-list'); const tagContainer = document.getElementById('serve-tags'); if (!list) return; const activeTag = tagContainer?.querySelector('.memory-cat-chip.active')?.dataset.serveTag || ''; const searchVal = (document.getElementById('serve-search')?.value || '').toLowerCase().trim(); const isFamily = activeTag.startsWith('fam:'); const familyVal = isFamily ? activeTag.slice(4) : ''; list.querySelectorAll('.memory-item[data-repo]').forEach(item => { const repo = (item.dataset.repo || '').toLowerCase(); const tag = item.dataset.tag || ''; const family = item.dataset.family || ''; const tagMatch = !activeTag || (isFamily ? family === familyVal : tag === activeTag); const searchMatch = !searchVal || repo.includes(searchVal); item.style.display = (tagMatch && searchMatch) ? '' : 'none'; }); } // Is there a live download task for this repo in the Running tab? The cache // reports any incomplete download dir as "downloading", but if nothing is // actively pulling it, it's really a stalled/partial download — so we label it // accordingly. Reads the running-tab tasks straight from localStorage (same // key the running module writes) to avoid a cross-module import cycle. function _isActivelyDownloading(repoId) { try { const tasks = JSON.parse(localStorage.getItem('cookbook-tasks')) || []; const short = (repoId || '').split('/').pop(); return tasks.some(t => t.type === 'download' && t.status === 'running' && (t.payload?.repo_id === repoId || t.name === repoId || t.name === short || (t.payload?.repo_id || '').split('/').pop() === short)); } catch { return false; } } // Same idea for serve: is there a live serve task for this repo? Used to // surface a "running" pill on the Serve tab card. function _isActivelyServing(repoId) { try { const tasks = JSON.parse(localStorage.getItem('cookbook-tasks')) || []; const short = (repoId || '').split('/').pop(); return tasks.some(t => t.type === 'serve' && t.status === 'running' && (t.payload?.repo_id === repoId || t.name === repoId || t.name === short || (t.payload?.repo_id || '').split('/').pop() === short)); } catch { return false; } } function _formatGgufSize(bytes) { const n = Number(bytes || 0); if (!Number.isFinite(n) || n <= 0) return ''; if (n >= 1024 ** 3) return `${(n / (1024 ** 3)).toFixed(1)} GB`; if (n >= 1024 ** 2) return `${Math.round(n / (1024 ** 2))} MB`; return `${Math.max(1, Math.round(n / 1024))} KB`; } function _ggufFilesForModel(model) { return Array.isArray(model?.gguf_files) ? model.gguf_files.filter(f => f && typeof f.rel_path === 'string' && f.rel_path) : []; } function _runnableGgufFiles(model) { const files = _ggufFilesForModel(model); const primary = files.filter(f => (f.role || 'model') === 'model'); return primary.length ? primary : files; } function _selectedGgufSizeGb(model, relPath) { const file = _runnableGgufFiles(model).find(f => f.rel_path === relPath); const bytes = Number(file?.size_bytes || 0); if (!Number.isFinite(bytes) || bytes <= 0) return 0; return bytes / (1024 ** 3); } function _ggufFileLabel(file) { const base = (file.name || file.rel_path || '').split('/').pop(); const size = _formatGgufSize(file.size_bytes); const quant = file.quant ? `${file.quant} ` : ''; const parts = Number(file.parts || 0); const split = parts > 1 ? `, ${parts} parts` : ''; const role = file.role && file.role !== 'model' ? ` ${file.role}` : ''; return `${quant}${base}${size || split ? ` (${[size, split.replace(/^, /, '')].filter(Boolean).join(', ')})` : ''}${role}`; } function _ggufTaskDisplayPart(model, relPath) { const rel = String(relPath || ''); if (!rel) return ''; const file = _ggufFilesForModel(model).find(f => f.rel_path === rel); if (file?.quant) return String(file.quant).toUpperCase().replace(/^UD-/, ''); const parts = rel.split('/').filter(Boolean); const base = parts[parts.length - 1] || ''; const parent = parts.length > 1 ? parts[parts.length - 2] : ''; const text = `${parent} ${base}`; const quant = text.match(/\b(?:UD-)?(?:IQ[1-8]_[A-Z0-9]+|Q[2-8]_K_[MLS]|Q[2-8]_[0-9A-Z]+|Q[2-8])\b/i); if (quant) return quant[0].toUpperCase().replace(/^UD-/, ''); return base.replace(/\.gguf$/i, '').replace(/-\d{5}-of-\d{5}$/i, ''); } function _serveTaskDisplayName(shortName, model, fields) { const name = String(shortName || '').trim(); const backend = String(fields?.backend || '').toLowerCase(); if (backend !== 'llamacpp' && backend !== 'ollama') return name; const part = _ggufTaskDisplayPart(model, fields?.gguf_file); return part && !name.includes(` · ${part}`) ? `${name} · ${part}` : name; } function _safeGgufRelPath(relPath) { const rel = String(relPath || '').replace(/\\/g, '/').replace(/^\/+/, ''); if (!rel || rel.startsWith('../') || rel.includes('/../') || rel === '..') return ''; if (rel.includes('\0')) return ''; return rel; } function _ggufDeleteChoice(repo, files) { return new Promise(resolve => { let overlay = document.getElementById('cookbook-gguf-delete-overlay'); if (!overlay) { overlay = document.createElement('div'); overlay.id = 'cookbook-gguf-delete-overlay'; overlay.className = 'modal hidden'; overlay.innerHTML = '
'; document.body.appendChild(overlay); } const safeFiles = files .map(f => ({ ...f, rel_path: _safeGgufRelPath(f.rel_path) })) .filter(f => f.rel_path); const msg = overlay.querySelector('#cookbook-gguf-delete-msg'); const list = overlay.querySelector('#cookbook-gguf-delete-list'); const cancelBtn = overlay.querySelector('#cookbook-gguf-delete-cancel'); const repoBtn = overlay.querySelector('#cookbook-gguf-delete-repo'); const selectedBtn = overlay.querySelector('#cookbook-gguf-delete-selected'); const prevFocus = document.activeElement; msg.textContent = `${repo} has multiple GGUF files. Pick what to delete.`; list.innerHTML = safeFiles.map((file, idx) => { const label = esc ? esc(_ggufFileLabel(file)) : _ggufFileLabel(file); const rel = esc ? esc(file.rel_path) : file.rel_path; return ``; }).join(''); function cleanup(result) { overlay.classList.add('hidden'); overlay.style.display = 'none'; cancelBtn.removeEventListener('click', onCancel); repoBtn.removeEventListener('click', onRepo); selectedBtn.removeEventListener('click', onSelected); overlay.removeEventListener('click', onBackdrop); document.removeEventListener('keydown', onKey); try { prevFocus && prevFocus.focus && prevFocus.focus(); } catch {} resolve(result); } function onCancel() { cleanup(null); } function onRepo() { cleanup({ mode: 'repo' }); } function onSelected() { const selected = [...list.querySelectorAll('input[type="checkbox"]:checked')] .map(input => safeFiles[Number(input.value)]) .filter(Boolean); if (!selected.length) { uiModule.showToast?.('Select at least one GGUF file.'); return; } cleanup({ mode: 'files', files: selected }); } function onBackdrop(e) { if (e.target === overlay) cleanup(null); } function onKey(e) { if (e.key === 'Escape') { e.preventDefault(); e.stopPropagation(); cleanup(null); } } cancelBtn.addEventListener('click', onCancel); repoBtn.addEventListener('click', onRepo); selectedBtn.addEventListener('click', onSelected); overlay.addEventListener('click', onBackdrop); document.addEventListener('keydown', onKey); overlay.classList.remove('hidden'); overlay.style.display = ''; selectedBtn.focus(); }); } function _shellPathExpr(path) { const s = String(path || ''); if (s === '~') return '${HOME}'; if (s.startsWith('~/')) return '${HOME}' + _shellQuote(s.slice(1)); return _shellQuote(s); } function _selectedGgufExpr(model, repo, relPath) { const rel = String(relPath || '').replace(/^\/+/, ''); if (!rel) return ''; if (model.is_local_dir && model.path) { const base = String(model.path || '').replace(/\/+$/, ''); return `$(printf %s ${_shellPathExpr(`${base}/${repo}/${rel}`)})`; } if (model.path) { const base = String(model.path || '').replace(/\/+$/, ''); return `$(printf %s ${_shellPathExpr(`${base}/models--${repo.replace(/\//g, '--')}/snapshots/${rel}`)})`; } const cacheRepo = repo.replace(/\//g, '--'); return `$(printf %s \${HOME}${_shellQuote(`/.cache/huggingface/hub/models--${cacheRepo}/snapshots/${rel}`)})`; } function _ggufSearchDirExpr(model, repo) { if (model.is_local_dir && model.path) return _shellQuote(`${String(model.path || '').replace(/\/+$/, '')}/${repo}`); if (model.path) return _shellQuote(`${String(model.path || '').replace(/\/+$/, '')}/models--${repo.replace(/\//g, '--')}/snapshots`); return `"$HOME/.cache/huggingface/hub/models--${repo.replace(/\//g, '--')}/snapshots"`; } function _rerenderCachedModels() { const list = document.getElementById('hwfit-cached-list'); const tagContainer = document.getElementById('serve-tags'); if (!list || !_cachedAllModels.length) return; const allModels = _cachedAllModels; const _h = (text) => `?`; const activeTag = tagContainer?.querySelector('.memory-cat-chip.active')?.dataset.serveTag || ''; const searchVal = (document.getElementById('serve-search')?.value || '').toLowerCase().trim(); const sortVal = document.getElementById('serve-sort')?.value || 'name'; const _parseSize = (s) => { const m = (s || '').match(/([\d.]+)\s*(GB|MB|KB)/i); if (!m) return 0; const n = parseFloat(m[1]); if (m[2] === 'GB') return n * 1024; if (m[2] === 'MB') return n; return n / 1024; }; if (sortVal === 'name') allModels.sort((a, b) => (a.repo_id || '').localeCompare(b.repo_id || '')); else if (sortVal === 'size-desc') allModels.sort((a, b) => _parseSize(b.size) - _parseSize(a.size)); else if (sortVal === 'size-asc') allModels.sort((a, b) => _parseSize(a.size) - _parseSize(b.size)); else if (sortVal === 'recent') allModels.sort((a, b) => (b.mtime || 0) - (a.mtime || 0)); const favorites = _loadServeFavorites(); allModels.sort((a, b) => { const af = favorites.has(String(a.repo_id || '')) ? 1 : 0; const bf = favorites.has(String(b.repo_id || '')) ? 1 : 0; return bf - af; }); let html = ''; let visibleCount = 0; for (const m of allModels) { if (activeTag && m._tag !== activeTag) continue; if (searchVal && !(m.repo_id || '').toLowerCase().includes(searchVal)) continue; visibleCount++; const shortName = m.repo_id.split('/').pop() || m.repo_id; const hfLink = m.repo_id.includes('/') ? `https://huggingface.co/${m.repo_id}` : ''; const metaParts = []; if (m.repo_id.includes('/')) metaParts.push(m.repo_id.split('/')[0]); metaParts.push(m.size); if (m.path) { metaParts.push(`${esc(m.path)}`); } const ggufCount = _runnableGgufFiles(m).length; if (ggufCount > 1) metaParts.push(`${ggufCount} GGUFs`); // "downloading" status now renders as a title-row pill instead of // a meta-row text label, matching the "running" pill style and // living on the same line as the model name. const _isDownloading = m.status === 'downloading'; const _isDlActive = _isDownloading ? _isActivelyDownloading(m.repo_id) : false; const _isFavorite = favorites.has(String(m.repo_id || '')); const isSelectMode = document.getElementById('hwfit-cache-select')?.classList.contains('active'); html += `data/huggingface. Download a model here, or copy an existing host HuggingFace cache into that folder once.