mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 10:15:27 -04:00
Cookbook: scoring fixes, UI polish, false-finished + stale-state bug fixes
Backend (services/hwfit + routes): - rank_models picks visible set by REQUESTED column, not always score — sorting by Param now shows highest-param models PERIOD (incl. too_tight). - New fit_only param. Multi-GPU rigs filter GGUF Q*/IQ quants (vLLM/SGLang cannot serve them); default non-prequantized to BF16 on 2+ GPUs. - AWQ / GPTQ-8bit get a -1.0 quality penalty (was 0.0, tied with FP8), so FP8 wins when both fit. - Version-aware tiebreaker (parse Mn.n / Vn) — MiniMax-M2.7 ranks above M2.5 on equal composite score; >=100B integers not misread as versions. - /api/cookbook/hf-latest no longer drops models without an "NB" pattern in the repo id (MiniMax-M2.7, DeepSeek-V4-Pro etc. were silently filtered). - Cached-model scan: atexit flushes models JSON even if the script is killed mid-walk; each scan_dir wrapped in try/except; timeout 60s -> 180s. - KB granularity for sub-MB sizes (was "0 MB" for 12 KB shells). New "stalled" status for shells <1 MB with no .incomplete files. - /api/cookbook/state POST guard: rejects "done" download tasks lacking DOWNLOAD_OK / DOWNLOAD_FAILED / /snapshots/ when the last-mentioned shard is N<total — stops stale tabs from poisoning persisted state. - hf_models.json: add zai-org/GLM-5.1; flip zai-org/GLM-5 quantization Q4_K_M -> BF16 (it is the native base, not a quant). Frontend (static/js): - Scan/Download toolbar: quant defaults to All; ctx slider (8k/16k/32k/ 50k/128k/Max) ported from origin/main with sort=fit on drag, sort=score on Max. GPU toggle commits _activeCount to maxGpu on initial render. Fit column header tagged with active budget (RAM / GPU / N GPU). - Foldable Download admin-card: the Download h2 is the chevron trigger; state persists in localStorage. - Download card surfaces destination dir (Dir: <path>). Same dir on running task row, font/color matched to uptime (9px Fira Code muted, opacity .4). - Serve panel ctx text input always resets to model max on open. Sub-MB cached models show with red "download stalled" badge. - Bulk-select Cancel + Delete reset the Select button label on exit. - Cookbook running: false-finished bug fixed — DOWNLOAD_OK or /snapshots/ required; bare "Download complete" no longer marks the task done after the first config file. Clear button now sends tmux kill-session too. True overall % for multi-shard downloads: ((N-1)+frac)/total instead of hf_transfer per-shard aggregate. - Diagnosis card simplified: removed fold toggle, copy button, dismiss X. Suggestion font matches message body (12px). - HF token field flashes green check + "Saved" on save. - Cached scan no longer counts stalled rows as downloaded in Scan/Download. CSS: - dep Install button width pinned to 76px to match Installed split. - task-sub row +1px; task-status badge gets margin-right 8px. - Ctx slider styled like gallery editor sliders (thin pill rail, red thumb). - Bulk-select cancel button top -3px -> -5px.
This commit is contained in:
@@ -153,14 +153,31 @@ export function _renderGpuToggles(system) {
|
||||
}
|
||||
const validCounts = _validTpCounts(poolSize);
|
||||
const maxGpu = validCounts.length ? validCounts[validCounts.length - 1] : 0;
|
||||
// Commit the data layer to maxGpu on initial render so it matches the
|
||||
// visual highlight. Before this, _activeCount stayed undefined → no
|
||||
// gpu_count param sent → backend's fallback could rank against RAM on
|
||||
// mixed-resource boxes ("tightest" sorted by RAM instead of GPU).
|
||||
if (container._activeCount === undefined && validCounts.length) {
|
||||
container._activeCount = maxGpu;
|
||||
}
|
||||
html += '<button class="hwfit-gpu-btn" data-count="0" title="CPU / RAM only">RAM</button>';
|
||||
const hasExplicitCount = typeof container._activeCount === 'number';
|
||||
for (const n of validCounts) {
|
||||
const text = n === 1 ? 'GPU' : n + ' GPU';
|
||||
const isActive = hasExplicitCount ? (n === container._activeCount) : (container._activeCount === undefined && n === maxGpu);
|
||||
const isActive = hasExplicitCount && n === container._activeCount;
|
||||
html += `<button class="hwfit-gpu-btn${isActive ? ' active' : ''}" data-count="${n}" title="${n} GPU${n > 1 ? 's' : ''}">${text}</button>`;
|
||||
}
|
||||
// Also mark the RAM button active when the user explicitly chose RAM (0)
|
||||
// — the loop above only handles GPU buttons.
|
||||
if (container._activeCount === 0) {
|
||||
const ramBtn = container.querySelector('.hwfit-gpu-btn[data-count="0"]');
|
||||
// (we just set innerHTML so we re-mark below after assignment)
|
||||
}
|
||||
container.innerHTML = html;
|
||||
if (container._activeCount === 0) {
|
||||
const ramBtn = container.querySelector('.hwfit-gpu-btn[data-count="0"]');
|
||||
if (ramBtn) ramBtn.classList.add('active');
|
||||
}
|
||||
|
||||
// Pool dropdown: switch pools, reset the count to the new pool's max, rebuild.
|
||||
const sel = container.querySelector('#hwfit-gpu-group');
|
||||
@@ -188,9 +205,12 @@ export function _renderGpuToggles(system) {
|
||||
} else {
|
||||
btn.classList.add('active');
|
||||
container._activeCount = count;
|
||||
// Auto-set quant based on hardware selection
|
||||
// Auto-suggest a quant based on hardware selection — but ONLY when the
|
||||
// user has already picked a specific quant. When they're on "All"
|
||||
// (value === ""), leave them on All: toggling a GPU shouldn't silently
|
||||
// yank them out of the All view they wanted to see.
|
||||
const quantSel = document.getElementById('hwfit-quant');
|
||||
if (quantSel) {
|
||||
if (quantSel && quantSel.value !== '') {
|
||||
if (count <= 1) {
|
||||
quantSel.value = 'Q4_K_M'; // RAM or 1 GPU -> Q4 sweet spot
|
||||
} else {
|
||||
@@ -211,9 +231,34 @@ export function _renderGpuToggles(system) {
|
||||
// reload paints instantly, then we refresh in the background and swap.
|
||||
const _SCAN_CACHE_KEY = 'hwfit_scan_cache_v1';
|
||||
const _MANUAL_HW_KEY = 'hwfit_manual_hardware_v1';
|
||||
const _CTX_KEY = 'hwfit_target_context_v1';
|
||||
const _CTX_PRESETS = [8192, 16384, 32768, 50000, 131072, 0]; // 0 = model max
|
||||
const _SCAN_CACHE_MAX = 12; // keep the newest N signatures
|
||||
const _SCAN_CACHE_TTL = 6 * 3600 * 1000; // 6 h — hardware rarely changes
|
||||
|
||||
// Ctx slider helpers (ported from origin/main). The slider picks an INDEX into
|
||||
// _CTX_PRESETS; _ctxValue() resolves it to a token count (0 = "Max"). The label
|
||||
// next to the slider re-renders to "8k" / "16k" / … / "Max".
|
||||
function _ctxLabel(value) {
|
||||
const n = Number(value) || 0;
|
||||
if (!n) return 'Max';
|
||||
return n >= 1000 ? Math.round(n / 1000) + 'k' : String(n);
|
||||
}
|
||||
function _ctxValue() {
|
||||
const slider = document.getElementById('hwfit-context');
|
||||
const idx = Math.max(0, Math.min(_CTX_PRESETS.length - 1, Number(slider?.value ?? 3) || 0));
|
||||
return _CTX_PRESETS[idx] || 0;
|
||||
}
|
||||
function _syncCtxControl() {
|
||||
const slider = document.getElementById('hwfit-context');
|
||||
const label = document.getElementById('hwfit-context-label');
|
||||
if (!slider) return;
|
||||
const saved = localStorage.getItem(_CTX_KEY);
|
||||
const savedIdx = saved == null ? 3 : _CTX_PRESETS.indexOf(Number(saved));
|
||||
slider.value = String(savedIdx >= 0 ? savedIdx : 3);
|
||||
if (label) label.textContent = _ctxLabel(_ctxValue());
|
||||
}
|
||||
|
||||
function _manualHwState() {
|
||||
try {
|
||||
const s = JSON.parse(localStorage.getItem(_MANUAL_HW_KEY) || '{}');
|
||||
@@ -749,6 +794,13 @@ export function _hwfitRenderList(el, models) {
|
||||
const sortSel = document.getElementById('hwfit-sort');
|
||||
const currentSort = sortSel?.value || 'score';
|
||||
const isReversed = sortSel?.dataset.reverse === '1';
|
||||
// Active budget for the Fit column label \u2014 make it obvious whether the
|
||||
// ranking is against GPU or RAM so "tightest" can't be ambiguous on a
|
||||
// mixed-resource box.
|
||||
const tc = document.getElementById('hwfit-gpu-toggles');
|
||||
const _budget = (tc && typeof tc._activeCount === 'number')
|
||||
? (tc._activeCount === 0 ? 'RAM' : (tc._activeCount === 1 ? 'GPU' : tc._activeCount + ' GPU'))
|
||||
: null;
|
||||
let html = '<div class="hwfit-row hwfit-header">';
|
||||
for (const col of _hwfitColumns) {
|
||||
const sortable = col.key ? ' hwfit-sortable' : '';
|
||||
@@ -760,7 +812,10 @@ export function _hwfitRenderList(el, models) {
|
||||
arrow = isReversed ? ' \u25B2' : ' \u25BC';
|
||||
}
|
||||
const dataAttr = col.key ? ` data-sort="${col.key}"` : '';
|
||||
html += `<span class="hwfit-col ${col.cls}${sortable}${active}"${dataAttr}>${col.label}${arrow}</span>`;
|
||||
const label = (col.cls === 'hwfit-fit' && _budget)
|
||||
? `${col.label} <span style="font-size:0.75em;opacity:0.6;font-weight:normal;">(${_budget})</span>`
|
||||
: col.label;
|
||||
html += `<span class="hwfit-col ${col.cls}${sortable}${active}"${dataAttr}>${label}${arrow}</span>`;
|
||||
}
|
||||
html += '</div>';
|
||||
for (const m of models) {
|
||||
@@ -1082,11 +1137,40 @@ export function _hwfitInit() {
|
||||
const uc = document.getElementById('hwfit-usecase');
|
||||
const sort = document.getElementById('hwfit-sort');
|
||||
const qpref = document.getElementById('hwfit-quant');
|
||||
const ctx = document.getElementById('hwfit-context');
|
||||
const ctxLabel = document.getElementById('hwfit-context-label');
|
||||
const search = document.getElementById('hwfit-search');
|
||||
const remote = document.getElementById('hwfit-host');
|
||||
_syncCtxControl();
|
||||
if (uc) uc.addEventListener('change', () => _hwfitFetch());
|
||||
if (sort) sort.addEventListener('change', () => _hwfitFetch());
|
||||
if (qpref) qpref.addEventListener('change', () => _hwfitFetch());
|
||||
if (ctx && !ctx.dataset.bound) {
|
||||
ctx.dataset.bound = '1';
|
||||
ctx.addEventListener('input', () => {
|
||||
if (ctxLabel) ctxLabel.textContent = _ctxLabel(_ctxValue());
|
||||
});
|
||||
ctx.addEventListener('change', () => {
|
||||
const targetCtx = _ctxValue();
|
||||
try { localStorage.setItem(_CTX_KEY, String(targetCtx)); } catch {}
|
||||
// Ctx drag affects sort mode: a specific ctx target (anything < Max)
|
||||
// implies the user is hunting for "what fits at this context length",
|
||||
// so re-rank by fit (lowest first). Dragging back to Max means no
|
||||
// ctx constraint → go back to the default score-based ranking.
|
||||
const sortSel = document.getElementById('hwfit-sort');
|
||||
if (sortSel) {
|
||||
if (targetCtx) {
|
||||
sortSel.value = 'fit';
|
||||
sortSel.dataset.reverse = '1';
|
||||
} else {
|
||||
sortSel.value = 'score';
|
||||
sortSel.dataset.reverse = '';
|
||||
}
|
||||
}
|
||||
_hwfitCache = null;
|
||||
_hwfitFetch();
|
||||
});
|
||||
}
|
||||
// Rescan — force a fresh hardware probe (bypasses the per-host cache).
|
||||
const rescan = document.getElementById('hwfit-rescan');
|
||||
if (rescan && !rescan.dataset.bound) {
|
||||
|
||||
Reference in New Issue
Block a user