mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-30 00:22:10 -04:00
Cookbook UI: Ollama browser, advanced serve fold, API tokens form, diagnosis toolbar, polish
Surface a lot of accumulated cookbook + UI work as a single non-agent
commit so the agent rework lands cleanly.
Highlights:
- Ollama as a first-class backend in the Cookbook:
* Download input accepts ollama-style names (name:tag) → backend=ollama
* /api/cookbook/ollama/library (cached scrape of ollama.com + curated
fallback so classic models like qwen2.5 stay reachable)
* "Browse Ollama library" toggle below Download with size chips
* Engine=Ollama in hwfit toolbar merges the Ollama library into the
main scan list as per-tag rows with the same Fit/Param/Quant/VRAM
columns; click → fills Download input
- API Tokens form added to Integrations panel (matching wired
loadTokens()/initTokenForm() that had no HTML)
- Serve panel polish: Advanced fold tightening (-8px nudges on vLLM
checks, Extra args, Spec row), n_cpu_moe + Split Mode controls
pulled up 8px to align with the row's checkboxes, GGUF File dropdown
exposed for Ollama backend, GPU re-render on Edit serve restore,
_forceBackend flag so saved serveState wins over backend detection,
cookbook:servers-changed CustomEvent so panels don't need refresh
- Models page redesign: Add Models row (URL + hidden API key reveal +
Type select + Scan/Ollama/Key/Test/Add icon buttons), Probe All +
Clear-offline buttons in Added Models toolbar, offline-pill removed
(opacity already conveys state), Engine dropdown gains Ollama option
- _ping_endpoint probes /v1/models then base, accepts 4xx as
reachable (vLLM returns 404 on bare /v1, fully working endpoints
were showing offline)
- Diagnosis card: × dismiss + Copy bundle buttons restored on the
serve error feedback card
- Orphan tmux sweep re-enabled behind a 60s rate-limit + background
Thread (off the main event loop) so dead serves get discovered
- cookbook_routes auto-register watchdog: drops the endpoint if the
serve session exits non-zero within the first ~3min
- ollama-rocm sidecar awareness in download wrapper (`docker exec
ollama-rocm ollama pull` when host ollama isn't installed)
- Skill extractor sets initial_status="published" when
auto_approve_skills pref is on (audit demotes later)
- Skill list / model list / cookbook scan misc polish
This commit is contained in:
+159
-8
@@ -416,9 +416,11 @@ function _hwfitShowError(list, host, detail) {
|
||||
if (rb) rb.addEventListener('click', () => { _resetGpuToggleState(); _hwfitFetch(true); });
|
||||
}
|
||||
|
||||
// Client-side "Engine" filter (llama.cpp / vLLM / SGLang). Empty = show all.
|
||||
// Uses the same _detectBackend() the serve commands use, so what you filter to
|
||||
// is exactly what would be launched. Pure view filter — no refetch needed.
|
||||
// Client-side "Engine" filter (llama.cpp / vLLM / SGLang / Ollama). Empty =
|
||||
// show all. Uses the same _detectBackend() the serve commands use, so what you
|
||||
// filter to is exactly what would be launched. Pure view filter — no refetch
|
||||
// needed. Ollama rows are merged into the main list (see _ensureOllamaLib +
|
||||
// _ollamaToHwfitRows below) so the filter handles all engines uniformly.
|
||||
function _applyEngineFilter(models) {
|
||||
const want = document.getElementById('hwfit-engine')?.value || '';
|
||||
if (!want || !Array.isArray(models)) return models || [];
|
||||
@@ -427,6 +429,86 @@ function _applyEngineFilter(models) {
|
||||
});
|
||||
}
|
||||
|
||||
// Ollama library cache (per-page). Filled lazily on first _hwfitFetch; the raw
|
||||
// list is the same shape returned by /api/cookbook/ollama/library, then turned
|
||||
// into per-tag hwfit rows so they slot into the main list grid alongside HF
|
||||
// scan results.
|
||||
let _ollamaLibCache = null;
|
||||
async function _ensureOllamaLib() {
|
||||
if (_ollamaLibCache) return _ollamaLibCache;
|
||||
try {
|
||||
const res = await fetch('/api/cookbook/ollama/library');
|
||||
const data = await res.json();
|
||||
_ollamaLibCache = Array.isArray(data?.models) ? data.models : [];
|
||||
} catch { _ollamaLibCache = []; }
|
||||
return _ollamaLibCache;
|
||||
}
|
||||
|
||||
// Convert an Ollama library entry's sizes into per-tag hwfit rows. Shape
|
||||
// matches what _hwfitRenderList expects (fit_level, parameter_count,
|
||||
// required_gb, score, …) so the rows render identically to HF results.
|
||||
function _olParseSize(s) {
|
||||
// "14b" → 14, "1.5b" → 1.5, "8x7b" → 56 (rough), "135m" → 0.135, "latest" → null
|
||||
if (!s) return null;
|
||||
const low = s.toLowerCase();
|
||||
let m = low.match(/^(\d+(?:\.\d+)?)x(\d+(?:\.\d+)?)b$/);
|
||||
if (m) return parseFloat(m[1]) * parseFloat(m[2]);
|
||||
m = low.match(/^(\d+(?:\.\d+)?)b$/);
|
||||
if (m) return parseFloat(m[1]);
|
||||
m = low.match(/^(\d+(?:\.\d+)?)m$/);
|
||||
if (m) return parseFloat(m[1]) / 1000;
|
||||
return null;
|
||||
}
|
||||
function _ollamaToHwfitRows(libModels, vramAvail, ramAvail) {
|
||||
const out = [];
|
||||
if (!Array.isArray(libModels)) return out;
|
||||
for (const m of libModels) {
|
||||
const sizes = (Array.isArray(m.sizes) && m.sizes.length) ? m.sizes : ['latest'];
|
||||
for (const sz of sizes) {
|
||||
const params = _olParseSize(sz);
|
||||
// Ollama default GGUF is ~Q4_K_M. Rough VRAM estimate: 0.6 GB / B.
|
||||
const vramGb = params ? params * 0.6 : 0;
|
||||
let fitLevel = 'no_fit';
|
||||
if (vramGb && vramAvail) {
|
||||
if (vramGb <= vramAvail * 0.6) fitLevel = 'perfect';
|
||||
else if (vramGb <= vramAvail) fitLevel = 'good';
|
||||
else if (ramAvail && vramGb <= ramAvail) fitLevel = 'marginal';
|
||||
else fitLevel = 'too_tight';
|
||||
} else if (vramGb && ramAvail && vramGb <= ramAvail) {
|
||||
fitLevel = 'marginal';
|
||||
}
|
||||
const tag = `${m.name}:${sz}`;
|
||||
const paramsLabel = params
|
||||
? (params >= 1 ? params.toFixed(params >= 10 ? 0 : 1) + 'B' : (params * 1000).toFixed(0) + 'M')
|
||||
: '?';
|
||||
// A modest score so Ollama rows still sort sensibly in the default
|
||||
// score view — bigger models get a slightly higher base, but they
|
||||
// always come in below well-scored HF results. Sort by Fit or VRAM
|
||||
// to surface them more aggressively.
|
||||
const score = params ? Math.min(30 + params * 0.3, 60) : 25;
|
||||
out.push({
|
||||
name: tag,
|
||||
repo_id: tag,
|
||||
quant: 'Q4_K_M',
|
||||
parameter_count: paramsLabel,
|
||||
params_b: params || 0,
|
||||
required_gb: vramGb,
|
||||
fit_level: fitLevel,
|
||||
score,
|
||||
speed_tps: 0,
|
||||
context: 0,
|
||||
is_gguf: true,
|
||||
backend: 'ollama',
|
||||
_isOllama: true,
|
||||
_olName: m.name,
|
||||
_olSize: sz,
|
||||
_description: m.description || '',
|
||||
});
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
export async function _hwfitFetch(fresh = false) {
|
||||
const _tk = ++_hwfitFetchToken;
|
||||
const useCase = document.getElementById('hwfit-usecase')?.value || '';
|
||||
@@ -475,7 +557,12 @@ export async function _hwfitFetch(fresh = false) {
|
||||
_setLastCacheHost(remoteKey);
|
||||
const _cacheSrv = _serverByVal(_envState.remoteServerKey || remoteHost);
|
||||
const _cachePort = _cacheSrv?.port || '';
|
||||
const _cacheParams = new URLSearchParams({ host: remoteHost }); if (_cachePort) _cacheParams.set('ssh_port', _cachePort); if (_cacheSrv?.platform) _cacheParams.set('platform', _cacheSrv.platform);
|
||||
const _cacheParams = new URLSearchParams();
|
||||
if (remoteHost) {
|
||||
_cacheParams.set('host', remoteHost);
|
||||
if (_cachePort) _cacheParams.set('ssh_port', _cachePort);
|
||||
if (_cacheSrv?.platform) _cacheParams.set('platform', _cacheSrv.platform);
|
||||
}
|
||||
fetch(`/api/model/cached?${_cacheParams}`, { credentials: 'same-origin' })
|
||||
.then(r => r.json())
|
||||
.then(d => {
|
||||
@@ -543,7 +630,18 @@ export async function _hwfitFetch(fresh = false) {
|
||||
// A newer scan started while this one was in flight (user switched servers
|
||||
// mid-probe) — drop this stale response so it can't clobber the new one.
|
||||
if (_tk !== _hwfitFetchToken) { try { wp.destroy(); } catch {} return; }
|
||||
if (!res.ok) throw new Error(res.statusText);
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => '');
|
||||
let msg = '';
|
||||
try {
|
||||
const payload = JSON.parse(body);
|
||||
msg = payload && (payload.detail || payload.error || payload.message);
|
||||
} catch {
|
||||
msg = body;
|
||||
}
|
||||
msg = typeof msg === 'string' ? msg.trim() : '';
|
||||
throw new Error(`HTTP ${res.status} ${res.statusText}${msg ? `: ${msg}` : ''}`);
|
||||
}
|
||||
let data = await res.json();
|
||||
if (_tk !== _hwfitFetchToken) { try { wp.destroy(); } catch {} return; }
|
||||
if (!isImageMode && quantPref && !data.error && Array.isArray(data.models) && data.models.length === 0) {
|
||||
@@ -583,6 +681,23 @@ export async function _hwfitFetch(fresh = false) {
|
||||
if (!_cached) { _hwfitShowError(list, remoteHost, data.error); if (hw) hw.innerHTML = ''; }
|
||||
return;
|
||||
}
|
||||
// Merge Ollama library rows into the main list so they appear with the
|
||||
// same Fit/Param/Quant/VRAM/Mode columns as HF results and respond to the
|
||||
// Engine filter. Skipped in image-gen mode (Ollama doesn't serve diffusers).
|
||||
if (!isImageMode) {
|
||||
const _vramAvail = data.system?.gpu_vram_gb || 0;
|
||||
const _ramAvail = data.system?.total_ram_gb || 0;
|
||||
const _lib = await _ensureOllamaLib();
|
||||
const _olRows = _ollamaToHwfitRows(_lib, _vramAvail, _ramAvail);
|
||||
// Search filter on Ollama rows: HF API already filters by search; do the
|
||||
// same client-side over Ollama name + description so the search box
|
||||
// works consistently across both sources.
|
||||
const _s = (search || '').trim().toLowerCase();
|
||||
const _olFiltered = _s
|
||||
? _olRows.filter(r => r.name.toLowerCase().includes(_s) || (r._description || '').toLowerCase().includes(_s))
|
||||
: _olRows;
|
||||
data.models = (data.models || []).concat(_olFiltered);
|
||||
}
|
||||
_hwfitCache = data;
|
||||
_hwfitRenderHw(hw, data.system);
|
||||
// Propagate local platform from hardware probe so _isWindows(task) works
|
||||
@@ -964,14 +1079,36 @@ export function _hwfitRenderList(el, models) {
|
||||
html += `</div>`;
|
||||
}
|
||||
el.innerHTML = html;
|
||||
// Click row → expand inline action panel
|
||||
// Click row → expand inline action panel. Exception: Ollama rows skip the
|
||||
// expand panel (no HF metadata to power it) and just fill the Download
|
||||
// input with the `<name>:<size>` tag — one click → ready to pull.
|
||||
el.querySelectorAll('.hwfit-row:not(.hwfit-header)').forEach(row => {
|
||||
row.addEventListener('click', () => {
|
||||
const name = row.dataset.model;
|
||||
if (!name) return;
|
||||
// Find model data from cache
|
||||
const modelData = (_hwfitCache?.models || []).find(m => m.name === name);
|
||||
if (!modelData) return;
|
||||
if (modelData._isOllama) {
|
||||
// Force-open the Download card if it's been collapsed — otherwise
|
||||
// filling the (hidden) input silently swallows the click.
|
||||
const dlBody = document.getElementById('cookbook-download-card-body');
|
||||
const dlArrow = document.getElementById('cookbook-download-card-arrow');
|
||||
if (dlBody && dlBody.style.display === 'none') {
|
||||
dlBody.style.display = 'block';
|
||||
if (dlArrow) dlArrow.style.transform = 'rotate(90deg)';
|
||||
}
|
||||
const dlInput = document.getElementById('cookbook-dl-repo');
|
||||
if (dlInput) {
|
||||
dlInput.value = modelData.name;
|
||||
dlInput.focus();
|
||||
// Briefly highlight so the user sees what got filled even when the
|
||||
// download card sits far above the (long) hwfit list.
|
||||
dlInput.classList.add('cookbook-dl-flash');
|
||||
setTimeout(() => dlInput.classList.remove('cookbook-dl-flash'), 800);
|
||||
dlInput.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||||
}
|
||||
return;
|
||||
}
|
||||
_expandModelRow(row, modelData);
|
||||
});
|
||||
});
|
||||
@@ -1297,7 +1434,7 @@ export function _hwfitInit() {
|
||||
if (sort) sort.addEventListener('change', () => _hwfitFetch());
|
||||
if (qpref) qpref.addEventListener('change', () => _hwfitFetch());
|
||||
// Engine filter is a pure client-side view filter over the already-fetched
|
||||
// list, so just re-render from cache instead of re-probing hardware.
|
||||
// list (HF + Ollama merged), so just re-render from cache.
|
||||
const engine = document.getElementById('hwfit-engine');
|
||||
if (engine) engine.addEventListener('change', () => {
|
||||
const list = document.getElementById('hwfit-list');
|
||||
@@ -1694,6 +1831,15 @@ export function _hwfitInit() {
|
||||
saveBtn.addEventListener('click', () => {
|
||||
_syncServers();
|
||||
_rebuildServerSelect();
|
||||
// Broadcast for anything outside the settings tab that depends on
|
||||
// the server list (Serve dialog host picker, Running tasks, etc.).
|
||||
// Without this the user had to hard-refresh to see the new entry
|
||||
// in those other places.
|
||||
try {
|
||||
document.dispatchEvent(new CustomEvent('cookbook:servers-changed', {
|
||||
detail: { servers: _envState.servers.slice() },
|
||||
}));
|
||||
} catch (_) {}
|
||||
saveBtn.classList.add('saved');
|
||||
saveBtn.innerHTML = '<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="#50fa7b" stroke-width="2.6" stroke-linecap="round" stroke-linejoin="round" style="margin-right:4px;flex-shrink:0;"><polyline points="20 6 9 17 4 12"/></svg>Saved';
|
||||
});
|
||||
@@ -1713,6 +1859,11 @@ export function _hwfitInit() {
|
||||
entry.remove();
|
||||
_syncServers();
|
||||
_rebuildServerSelect();
|
||||
try {
|
||||
document.dispatchEvent(new CustomEvent('cookbook:servers-changed', {
|
||||
detail: { servers: _envState.servers.slice() },
|
||||
}));
|
||||
} catch (_) {}
|
||||
_hwfitCache = null;
|
||||
_hwfitFetch();
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user