mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-28 15:45:22 -04:00
fix(cookbook): treat local Windows as Windows for serve commands (#3975)
* fix(cookbook): prefer native llama-server on local Windows * fix(cookbook): harden local llama-server launch commands * fix(cookbook): build serve commands for selected target
This commit is contained in:
+30
-14
@@ -76,7 +76,7 @@ function _platformIcon(platform) {
|
||||
return '';
|
||||
}
|
||||
|
||||
export let _envState = { env: 'none', envPath: '', hfToken: '', hfTokenConfigured: false, hfTokenMasked: '', gpus: '', remoteHost: '', servers: [], modelPaths: [], platform: '', defaultServer: '' };
|
||||
export let _envState = { env: 'none', envPath: '', hfToken: '', hfTokenConfigured: false, hfTokenMasked: '', gpus: '', remoteHost: '', servers: [], modelPaths: [], platform: '', hostPlatform: '', defaultServer: '' };
|
||||
let _lastCacheHostVal = null;
|
||||
let _cookbookOpeningSpinners = [];
|
||||
export function _lastCacheHost() { return _lastCacheHostVal; }
|
||||
@@ -213,8 +213,13 @@ function _getPort(hostOrTask) {
|
||||
|
||||
/** Get platform for a given host (or task object). Returns 'windows', 'termux', 'linux', or '' */
|
||||
export function _getPlatform(hostOrTask) {
|
||||
if (!hostOrTask) return _envState.platform || '';
|
||||
if (typeof hostOrTask === 'object') return hostOrTask.platform || _getPlatform(hostOrTask.remoteServerKey || hostOrTask.remoteHost);
|
||||
if (hostOrTask === 'local') return _envState.hostPlatform || '';
|
||||
if (!hostOrTask) return _envState.remoteHost ? (_envState.platform || '') : (_envState.hostPlatform || '');
|
||||
if (typeof hostOrTask === 'object') {
|
||||
const taskHost = hostOrTask.remoteServerKey || hostOrTask.remoteHost || '';
|
||||
if (!taskHost || taskHost === 'local') return _envState.hostPlatform || '';
|
||||
return hostOrTask.platform || _getPlatform(taskHost);
|
||||
}
|
||||
const selected = hostOrTask === _envState.remoteHost ? _selectedServer() : null;
|
||||
const srv = selected || _serverByVal(hostOrTask);
|
||||
return srv?.platform || '';
|
||||
@@ -638,7 +643,12 @@ export function _buildServeCmd(f, modelName, backend) {
|
||||
// GPU list — read from gpus (button strip); fall back to gpu_id for
|
||||
// backward-compat with older saved presets that pre-date the removal.
|
||||
const gpuId = (f.gpus || f.gpu_id || '').toString().trim();
|
||||
const py = _isWindows() ? 'python' : 'python3';
|
||||
const _targetHost = Object.prototype.hasOwnProperty.call(f, 'host')
|
||||
? String(f.host || '').trim()
|
||||
: String(_envState.remoteHost || '').trim();
|
||||
const _isWin = _targetHost ? _isWindows(_targetHost) : _isWindows('local');
|
||||
const _localWindows = _isWin && !_targetHost;
|
||||
const py = _isWin ? 'python' : 'python3';
|
||||
// CPU-only serve (-ngl 0): drop the GPU-only flags, otherwise the command
|
||||
// mixes "zero GPU layers" with CUDA unified-memory + flash-attn and fails to
|
||||
// start (issue #1291). Only affects the ngl=0 path; GPU serving is unchanged.
|
||||
@@ -660,19 +670,19 @@ export function _buildServeCmd(f, modelName, backend) {
|
||||
// with misleading prefixes.
|
||||
const _sb = String(_hwfitCache?.system?.backend || '').toLowerCase();
|
||||
const _hwfitHost = String(_hwfitCache?._scannedHost || '');
|
||||
const _curHost = String(_envState.remoteHost || '');
|
||||
const _curHost = _targetHost;
|
||||
const _isCudaTarget = (_sb === 'cuda') && (_hwfitHost === _curHost);
|
||||
const lcPrefix = (() => {
|
||||
let p = '';
|
||||
if (f.unified_mem && !_cpuOnly && !_isWindows() && _isCudaTarget) p += `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1 `;
|
||||
// No GPU env var in CPU mode — `-ngl 0` already disables offload
|
||||
if (f.unified_mem && !_cpuOnly && (!_isWin || _localWindows) && _isCudaTarget) p += `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1 `;
|
||||
// No GPU env var in CPU mode - `-ngl 0` already disables offload
|
||||
// so CUDA_VISIBLE_DEVICES / HIP_VISIBLE_DEVICES would be misleading
|
||||
// clutter ("why is CUDA pinned for a CPU run?").
|
||||
if (!_isWindows() && !_cpuOnly) p += _gpuEnvPrefix(gpuId);
|
||||
if ((!_isWin || _localWindows) && !_cpuOnly) p += _gpuEnvPrefix(gpuId);
|
||||
return p;
|
||||
})();
|
||||
if (f.unified_mem && !_cpuOnly && _isWindows() && _isCudaTarget) cmd += `$env:GGML_CUDA_ENABLE_UNIFIED_MEMORY="1"; `;
|
||||
if (_isWindows() && !_cpuOnly) cmd += _gpuEnvPrefix(gpuId, true);
|
||||
if (f.unified_mem && !_cpuOnly && _isWin && !_localWindows && _isCudaTarget) cmd += `$env:GGML_CUDA_ENABLE_UNIFIED_MEMORY="1"; `;
|
||||
if (_isWin && !_localWindows && !_cpuOnly) cmd += _gpuEnvPrefix(gpuId, true);
|
||||
const needsGgufPrelude = /^\$\(\{\s*find\s/.test(String(ggufPath || ''));
|
||||
const modelArg = needsGgufPrelude ? '"$MODEL_FILE"' : `"${ggufPath}"`;
|
||||
// Prefer native llama-server. The backend bootstrap resolves/builds the
|
||||
@@ -744,11 +754,16 @@ export function _buildServeCmd(f, modelName, backend) {
|
||||
// llama-cpp-python takes the projector via --clip_model_path.
|
||||
_lcpExtra += ` --clip_model_path "${f._mmproj_path}"`;
|
||||
}
|
||||
if (_isWindows()) {
|
||||
const _lcpServer = `${lcPrefix}${py} -m llama_cpp.server --model ${modelArg} --host 0.0.0.0 --port ${f.port || '8080'} --n_gpu_layers ${f.ngl || '99'} --n_ctx ${f.ctx || '8192'}${_lcpExtra}`;
|
||||
const _lcServer = `${lcPrefix}llama-server --model ${modelArg} --host 0.0.0.0 --port ${f.port || '8080'} -ngl ${f.ngl || '99'} -c ${f.ctx || '8192'}${_lcExtra}`;
|
||||
const _lcpServer = `${lcPrefix}${py} -m llama_cpp.server --model ${modelArg} --host 0.0.0.0 --port ${f.port || '8080'} --n_gpu_layers ${f.ngl || '99'} --n_ctx ${f.ctx || '8192'}${_lcpExtra}`;
|
||||
if (_localWindows) {
|
||||
// Local Windows serve is launched through Git Bash, so use the native
|
||||
// llama-server shape and let PATH resolve the CUDA Release wrapper.
|
||||
cmd += _lcServer;
|
||||
} else if (_isWin) {
|
||||
cmd += _lcpServer;
|
||||
} else {
|
||||
cmd += `${lcPrefix}llama-server --model ${modelArg} --host 0.0.0.0 --port ${f.port || '8080'} -ngl ${f.ngl || '99'} -c ${f.ctx || '8192'}${_lcExtra}`;
|
||||
cmd += _lcServer;
|
||||
}
|
||||
if (needsGgufPrelude) {
|
||||
cmd = `MODEL_FILE=${ggufPath} && { [ -n "$MODEL_FILE" ] && [ -f "$MODEL_FILE" ]; } || { echo "ERROR: No GGUF found on this host"; exit 1; } && ${cmd}`;
|
||||
@@ -2612,13 +2627,14 @@ function _renderRecipes() {
|
||||
const isLocal = !s.host || s.host.toLowerCase() === 'local';
|
||||
if (isLocal) {
|
||||
s.host = '';
|
||||
s.platform = _envState.hostPlatform || '';
|
||||
if (_localSeen) return false;
|
||||
_localSeen = true;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
if (!_localSeen) {
|
||||
_es.servers.unshift({ host: '', env: _es.env || 'none', envPath: _es.envPath || '', modelDir: '~/.cache/huggingface/hub' });
|
||||
_es.servers.unshift({ host: '', env: _es.env || 'none', envPath: _es.envPath || '', modelDir: '~/.cache/huggingface/hub', platform: _envState.hostPlatform || '' });
|
||||
}
|
||||
if (_es.remoteHost && !_es.servers.some(s => s.host === _es.remoteHost)) {
|
||||
_es.servers.push({ host: _es.remoteHost, env: _es.env || 'none', envPath: _es.envPath || '', modelDir: '~/.cache/huggingface/hub' });
|
||||
|
||||
@@ -781,6 +781,7 @@ function _stripStateSecrets(state) {
|
||||
const safe = { ...state };
|
||||
if (safe.env && typeof safe.env === 'object') {
|
||||
const { hfToken, ...env } = safe.env;
|
||||
delete env.hostPlatform;
|
||||
safe.env = env;
|
||||
}
|
||||
if (Array.isArray(safe.tasks)) safe.tasks = safe.tasks.map(_redactTaskForStorage);
|
||||
@@ -1673,7 +1674,7 @@ export async function _launchServeTask(shortName, repo, cmd, fields, hostOverrid
|
||||
|| _envState.servers.find(s => s.host === _host) || {};
|
||||
const _serverMetaKey = _targetKey || (_hsrv && _serverKey ? _serverKey(_hsrv) : '') || (_host || 'local');
|
||||
const _serverMetaName = targetMeta?.serverName || _hsrv.name || (_host ? _host : 'Local');
|
||||
const _hplatform = _host ? (_hsrv.platform || '') : (_envState.platform || '');
|
||||
const _hplatform = _host ? (_hsrv.platform || '') : (_envState.hostPlatform || '');
|
||||
const _replaceTaskId = fields?._replaceTaskId || '';
|
||||
if (_replaceTaskId) {
|
||||
try {
|
||||
@@ -1688,7 +1689,6 @@ export async function _launchServeTask(shortName, repo, cmd, fields, hostOverrid
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
// Replace any serve already targeting this same host:port — you can't run two
|
||||
// servers on one port, so re-serving (or retrying) should stop & remove the
|
||||
// old one instead of leaving a dead duplicate behind. (The retry buttons
|
||||
|
||||
@@ -527,7 +527,7 @@ function _selectedServeTarget(panel) {
|
||||
env: server?.env || '',
|
||||
port: host ? (server?.port || _getPort(host) || '') : '',
|
||||
venv,
|
||||
platform: server?.platform || _envState.platform || '',
|
||||
platform: host ? (server?.platform || '') : (_envState.hostPlatform || ''),
|
||||
label,
|
||||
};
|
||||
}
|
||||
@@ -658,6 +658,12 @@ function _selectedGgufSizeGb(model, relPath) {
|
||||
return bytes / (1024 ** 3);
|
||||
}
|
||||
|
||||
function _projectorGgufFiles(model) {
|
||||
return _ggufFilesForModel(model)
|
||||
.filter(f => (f.role || '') === 'projector' || /(^|\/)mmproj[^/]*\.gguf$/i.test(f.rel_path || f.name || ''))
|
||||
.sort((a, b) => String(a.rel_path || a.name || '').localeCompare(String(b.rel_path || b.name || '')));
|
||||
}
|
||||
|
||||
function _ggufFileLabel(file) {
|
||||
const base = (file.name || file.rel_path || '').split('/').pop();
|
||||
const size = _formatGgufSize(file.size_bytes);
|
||||
@@ -1198,6 +1204,7 @@ function _rerenderCachedModels() {
|
||||
panelHtml += `<div class="hwfit-serve-warn" style="margin:0 0 8px;padding:6px 10px;border-radius:5px;font-size:11px;background:color-mix(in srgb, var(--color-warning, #f0ad4e) 14%, transparent);border:1px solid color-mix(in srgb, var(--color-warning, #f0ad4e) 40%, transparent);color:var(--color-warning, #f0ad4e);display:flex;gap:6px;align-items:flex-start;line-height:1.4;"><span aria-hidden="true">⚠</span><span>${_warnText}</span></div>`;
|
||||
}
|
||||
panelHtml += `<div class="hwfit-serve-preset-row">${_slotsHtml}</div>`;
|
||||
panelHtml += `<div class="hwfit-serve-vision-warn" style="display:none;margin:0 0 8px;padding:6px 10px;border-radius:5px;font-size:11px;background:color-mix(in srgb, var(--color-warning, #f0ad4e) 14%, transparent);border:1px solid color-mix(in srgb, var(--color-warning, #f0ad4e) 40%, transparent);color:var(--color-warning, #f0ad4e);gap:6px;align-items:flex-start;line-height:1.4;"><span aria-hidden="true">⚠</span><span>Vision is enabled, but no mmproj GGUF projector was found in the cached model scan. Download an mmproj-*.gguf for this model, then refresh the cached model list before launching.</span></div>`;
|
||||
// Row 1: Engine + Server + Env
|
||||
panelHtml += `<div class="hwfit-serve-row">`;
|
||||
const backendOpts = _backendChoices.map(([v,l]) => `<option value="${v}"${defaultBackend===v?' selected':''}>${l}</option>`).join('');
|
||||
@@ -1524,6 +1531,11 @@ function _rerenderCachedModels() {
|
||||
if (el.type === 'checkbox') f[el.dataset.field] = el.checked;
|
||||
else f[el.dataset.field] = el.value;
|
||||
});
|
||||
const buildTarget = _selectedServeTarget(panel);
|
||||
f.host = buildTarget.host || '';
|
||||
f.platform = buildTarget.platform || '';
|
||||
const hostField = panel.querySelector('[data-field="host"]');
|
||||
if (hostField) hostField.value = f.host;
|
||||
const backend = f.backend || 'vllm';
|
||||
const serveModel = (f.model_path || '').trim() || (m.is_local_dir && m.path ? `${m.path}/${repo}` : repo);
|
||||
if (backend === 'llamacpp') {
|
||||
@@ -1543,11 +1555,11 @@ function _rerenderCachedModels() {
|
||||
: m.is_local_dir && m.path
|
||||
? `$({ find ${_ldir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${_ldir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`
|
||||
: `$({ find ${dir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${dir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`;
|
||||
// Vision: auto-find the mmproj (CLIP/projector) file in the same dir.
|
||||
// Resolved at runtime so the toggle just works if an mmproj-*.gguf is
|
||||
// present (downloaded alongside the model). Empty if none → cmd omits it.
|
||||
const _vsearchdir = (m.is_local_dir && m.path) ? _ldir : dir;
|
||||
f._mmproj_path = `$(find ${_vsearchdir} -iname 'mmproj*.gguf' 2>/dev/null | sort | head -1)`;
|
||||
// Vision: use the scanned projector (CLIP/mmproj) file when present.
|
||||
// Keeping this as a printf path avoids generating a command substitution
|
||||
// that the backend serve-command validator must reject as unsafe.
|
||||
const selectedProjector = _projectorGgufFiles(m)[0];
|
||||
f._mmproj_path = selectedProjector ? _selectedGgufExpr(m, repo, selectedProjector.rel_path) : '';
|
||||
}
|
||||
if (f.reasoning_parser) {
|
||||
const _rpEl2 = panel.querySelector('[data-field="reasoning_parser"]');
|
||||
@@ -1563,6 +1575,10 @@ function _rerenderCachedModels() {
|
||||
}
|
||||
let cmd = _buildServeCmd(f, serveModel, backend);
|
||||
if (f.extra && f.extra.trim()) cmd += ' ' + f.extra.trim();
|
||||
const missingVisionProjector = backend === 'llamacpp' && !!f.vision && !f._mmproj_path;
|
||||
panel._visionMissingProjector = missingVisionProjector;
|
||||
const _visionWarn = panel.querySelector('.hwfit-serve-vision-warn');
|
||||
if (_visionWarn) _visionWarn.style.display = missingVisionProjector ? 'flex' : 'none';
|
||||
const _ce2 = panel.querySelector('.hwfit-serve-cmd'); _ce2.value = _formatServeCmdPreview(cmd); _ce2.style.height = 'auto'; _ce2.style.height = _ce2.scrollHeight + 'px';
|
||||
panel._cmd = cmd;
|
||||
panel._host = f.host || '';
|
||||
@@ -2938,12 +2954,16 @@ function _rerenderCachedModels() {
|
||||
});
|
||||
serveState.backend = serveState.backend || (_detectBackend(m).backend) || 'vllm';
|
||||
const launchTarget = _selectedServeTarget(panel);
|
||||
if (serveState.backend === 'llamacpp' && serveState.vision && !/(?:^|\s)(?:--mmproj|--clip_model_path)\b/.test(launchCmd)) {
|
||||
_restoreLaunchBtn();
|
||||
uiModule.showToast('Vision is checked, but no mmproj projector is in the launch command. Refresh cached models after downloading mmproj, or add --mmproj manually.', 8000);
|
||||
return;
|
||||
}
|
||||
if (serveState.backend === 'diffusers' && _remoteWindowsDiffusersUnsupported(launchTarget)) {
|
||||
_restoreLaunchBtn();
|
||||
uiModule.showToast('Diffusers serving is not supported on remote Windows servers yet. Use local Windows or a Linux server.', 9000);
|
||||
return;
|
||||
}
|
||||
|
||||
// Pre-launch: check our own task list for a serve already running
|
||||
// on this host. Offer to stop+launch as the default action — the
|
||||
// SSH-based port probe below is more thorough but it can miss
|
||||
|
||||
Reference in New Issue
Block a user