diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py index 51f019edb..836b26c8a 100644 --- a/routes/cookbook_helpers.py +++ b/routes/cookbook_helpers.py @@ -561,7 +561,7 @@ def _bash_squote(v: str) -> str: # Allow-list of binaries permitted as the leading token of `req.cmd` for /api/model/serve. # Anything else is rejected before the cmd is interpolated into a tmux/PowerShell wrapper. _SERVE_CMD_ALLOWLIST = { - "vllm", "llama-server", "llama_server", "llama.cpp", "ollama", + "vllm", "llama-server", "llama-server.exe", "llama_server", "llama.cpp", "ollama", "python", "python3", "sglang", "lmdeploy", "node", "npx", diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py index f57ecf6e5..a0fb672d7 100644 --- a/routes/cookbook_routes.py +++ b/routes/cookbook_routes.py @@ -73,6 +73,9 @@ def setup_cookbook_routes() -> APIRouter: return "stored" return f"{value[:4]}...{value[-4:]}" + def _client_host_platform() -> str: + return "windows" if IS_WINDOWS else "" + def _decrypt_secret(value: str | None) -> str: if not value: return "" @@ -245,11 +248,15 @@ def setup_cookbook_routes() -> APIRouter: """Return cookbook state without raw secrets for browser clients.""" _strip_task_secrets(state) env = state.get("env") if isinstance(state, dict) else None + if isinstance(state, dict) and not isinstance(env, dict): + env = {} + state["env"] = env if isinstance(env, dict): token = _decrypt_secret(env.get("hfToken")) env.pop("hfToken", None) env["hfTokenConfigured"] = bool(token) env["hfTokenMasked"] = _mask_secret(token) + env["hostPlatform"] = _client_host_platform() return state def _state_for_storage(state, on_disk=None): @@ -268,6 +275,7 @@ def setup_cookbook_routes() -> APIRouter: env.pop("hfToken", None) env.pop("hfTokenMasked", None) env.pop("hfTokenConfigured", None) + env.pop("hostPlatform", None) return state def _load_stored_hf_token() -> str: @@ -1479,6 +1487,10 @@ def setup_cookbook_routes() -> APIRouter: # shell resolves the bundled python3/hf, mirroring the download flow. if not remote: runner_lines.append(_local_tooling_path_export(sys.executable)) + if local_windows: + # Detached Git Bash runs do not always inherit recently edited + # user PATH entries from the already-running Odysseus process. + runner_lines.append('export PATH="$HOME/bin:$HOME/llama.cpp/build-cuda/bin/Release:$HOME/llama.cpp/build/bin/Release:$HOME/llama.cpp/build/bin/Debug:$HOME/llama.cpp/build/bin:$PATH"') runner_lines.append("export FLASHINFER_DISABLE_VERSION_CHECK=1") if req.hf_token: runner_lines.append(f"export HF_TOKEN='{_bash_squote(req.hf_token)}'") @@ -1493,7 +1505,8 @@ def setup_cookbook_routes() -> APIRouter: runner_lines.append(_HF_TOKEN_STATUS_SNIPPET) handled_ollama_serve = False # Auto-install inference engine if missing - if "llama_cpp" in req.cmd or "llama-server" in req.cmd: + local_windows_llama_cmd = local_windows and ("llama_cpp" in req.cmd or "llama-server" in req.cmd) + if ("llama_cpp" in req.cmd or "llama-server" in req.cmd) and not local_windows_llama_cmd: # Prefer the NATIVE llama-server binary — its minja templating # renders modern GGUF chat templates that the Python bindings' # Jinja2 rejects (do_tojson ensure_ascii). Build it once from @@ -2396,8 +2409,8 @@ def setup_cookbook_routes() -> APIRouter: try: return _state_for_client(json.loads(_cookbook_state_path.read_text(encoding="utf-8"))) except Exception: - return {} - return {} + return _state_for_client({}) + return _state_for_client({}) @router.post("/api/cookbook/state") async def save_cookbook_state(request: Request): diff --git a/static/js/cookbook.js b/static/js/cookbook.js index fca21b57e..8821e1bc6 100644 --- a/static/js/cookbook.js +++ b/static/js/cookbook.js @@ -76,7 +76,7 @@ function _platformIcon(platform) { return ''; } -export let _envState = { env: 'none', envPath: '', hfToken: '', hfTokenConfigured: false, hfTokenMasked: '', gpus: '', remoteHost: '', servers: [], modelPaths: [], platform: '', defaultServer: '' }; +export let _envState = { env: 'none', envPath: '', hfToken: '', hfTokenConfigured: false, hfTokenMasked: '', gpus: '', remoteHost: '', servers: [], modelPaths: [], platform: '', hostPlatform: '', defaultServer: '' }; let _lastCacheHostVal = null; let _cookbookOpeningSpinners = []; export function _lastCacheHost() { return _lastCacheHostVal; } @@ -213,8 +213,13 @@ function _getPort(hostOrTask) { /** Get platform for a given host (or task object). Returns 'windows', 'termux', 'linux', or '' */ export function _getPlatform(hostOrTask) { - if (!hostOrTask) return _envState.platform || ''; - if (typeof hostOrTask === 'object') return hostOrTask.platform || _getPlatform(hostOrTask.remoteServerKey || hostOrTask.remoteHost); + if (hostOrTask === 'local') return _envState.hostPlatform || ''; + if (!hostOrTask) return _envState.remoteHost ? (_envState.platform || '') : (_envState.hostPlatform || ''); + if (typeof hostOrTask === 'object') { + const taskHost = hostOrTask.remoteServerKey || hostOrTask.remoteHost || ''; + if (!taskHost || taskHost === 'local') return _envState.hostPlatform || ''; + return hostOrTask.platform || _getPlatform(taskHost); + } const selected = hostOrTask === _envState.remoteHost ? _selectedServer() : null; const srv = selected || _serverByVal(hostOrTask); return srv?.platform || ''; @@ -638,7 +643,12 @@ export function _buildServeCmd(f, modelName, backend) { // GPU list — read from gpus (button strip); fall back to gpu_id for // backward-compat with older saved presets that pre-date the removal. const gpuId = (f.gpus || f.gpu_id || '').toString().trim(); - const py = _isWindows() ? 'python' : 'python3'; + const _targetHost = Object.prototype.hasOwnProperty.call(f, 'host') + ? String(f.host || '').trim() + : String(_envState.remoteHost || '').trim(); + const _isWin = _targetHost ? _isWindows(_targetHost) : _isWindows('local'); + const _localWindows = _isWin && !_targetHost; + const py = _isWin ? 'python' : 'python3'; // CPU-only serve (-ngl 0): drop the GPU-only flags, otherwise the command // mixes "zero GPU layers" with CUDA unified-memory + flash-attn and fails to // start (issue #1291). Only affects the ngl=0 path; GPU serving is unchanged. @@ -660,19 +670,19 @@ export function _buildServeCmd(f, modelName, backend) { // with misleading prefixes. const _sb = String(_hwfitCache?.system?.backend || '').toLowerCase(); const _hwfitHost = String(_hwfitCache?._scannedHost || ''); - const _curHost = String(_envState.remoteHost || ''); + const _curHost = _targetHost; const _isCudaTarget = (_sb === 'cuda') && (_hwfitHost === _curHost); const lcPrefix = (() => { let p = ''; - if (f.unified_mem && !_cpuOnly && !_isWindows() && _isCudaTarget) p += `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1 `; - // No GPU env var in CPU mode — `-ngl 0` already disables offload + if (f.unified_mem && !_cpuOnly && (!_isWin || _localWindows) && _isCudaTarget) p += `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1 `; + // No GPU env var in CPU mode - `-ngl 0` already disables offload // so CUDA_VISIBLE_DEVICES / HIP_VISIBLE_DEVICES would be misleading // clutter ("why is CUDA pinned for a CPU run?"). - if (!_isWindows() && !_cpuOnly) p += _gpuEnvPrefix(gpuId); + if ((!_isWin || _localWindows) && !_cpuOnly) p += _gpuEnvPrefix(gpuId); return p; })(); - if (f.unified_mem && !_cpuOnly && _isWindows() && _isCudaTarget) cmd += `$env:GGML_CUDA_ENABLE_UNIFIED_MEMORY="1"; `; - if (_isWindows() && !_cpuOnly) cmd += _gpuEnvPrefix(gpuId, true); + if (f.unified_mem && !_cpuOnly && _isWin && !_localWindows && _isCudaTarget) cmd += `$env:GGML_CUDA_ENABLE_UNIFIED_MEMORY="1"; `; + if (_isWin && !_localWindows && !_cpuOnly) cmd += _gpuEnvPrefix(gpuId, true); const needsGgufPrelude = /^\$\(\{\s*find\s/.test(String(ggufPath || '')); const modelArg = needsGgufPrelude ? '"$MODEL_FILE"' : `"${ggufPath}"`; // Prefer native llama-server. The backend bootstrap resolves/builds the @@ -744,11 +754,16 @@ export function _buildServeCmd(f, modelName, backend) { // llama-cpp-python takes the projector via --clip_model_path. _lcpExtra += ` --clip_model_path "${f._mmproj_path}"`; } - if (_isWindows()) { - const _lcpServer = `${lcPrefix}${py} -m llama_cpp.server --model ${modelArg} --host 0.0.0.0 --port ${f.port || '8080'} --n_gpu_layers ${f.ngl || '99'} --n_ctx ${f.ctx || '8192'}${_lcpExtra}`; + const _lcServer = `${lcPrefix}llama-server --model ${modelArg} --host 0.0.0.0 --port ${f.port || '8080'} -ngl ${f.ngl || '99'} -c ${f.ctx || '8192'}${_lcExtra}`; + const _lcpServer = `${lcPrefix}${py} -m llama_cpp.server --model ${modelArg} --host 0.0.0.0 --port ${f.port || '8080'} --n_gpu_layers ${f.ngl || '99'} --n_ctx ${f.ctx || '8192'}${_lcpExtra}`; + if (_localWindows) { + // Local Windows serve is launched through Git Bash, so use the native + // llama-server shape and let PATH resolve the CUDA Release wrapper. + cmd += _lcServer; + } else if (_isWin) { cmd += _lcpServer; } else { - cmd += `${lcPrefix}llama-server --model ${modelArg} --host 0.0.0.0 --port ${f.port || '8080'} -ngl ${f.ngl || '99'} -c ${f.ctx || '8192'}${_lcExtra}`; + cmd += _lcServer; } if (needsGgufPrelude) { cmd = `MODEL_FILE=${ggufPath} && { [ -n "$MODEL_FILE" ] && [ -f "$MODEL_FILE" ]; } || { echo "ERROR: No GGUF found on this host"; exit 1; } && ${cmd}`; @@ -2612,13 +2627,14 @@ function _renderRecipes() { const isLocal = !s.host || s.host.toLowerCase() === 'local'; if (isLocal) { s.host = ''; + s.platform = _envState.hostPlatform || ''; if (_localSeen) return false; _localSeen = true; } return true; }); if (!_localSeen) { - _es.servers.unshift({ host: '', env: _es.env || 'none', envPath: _es.envPath || '', modelDir: '~/.cache/huggingface/hub' }); + _es.servers.unshift({ host: '', env: _es.env || 'none', envPath: _es.envPath || '', modelDir: '~/.cache/huggingface/hub', platform: _envState.hostPlatform || '' }); } if (_es.remoteHost && !_es.servers.some(s => s.host === _es.remoteHost)) { _es.servers.push({ host: _es.remoteHost, env: _es.env || 'none', envPath: _es.envPath || '', modelDir: '~/.cache/huggingface/hub' }); diff --git a/static/js/cookbookRunning.js b/static/js/cookbookRunning.js index 8e106f533..a64205c4d 100644 --- a/static/js/cookbookRunning.js +++ b/static/js/cookbookRunning.js @@ -781,6 +781,7 @@ function _stripStateSecrets(state) { const safe = { ...state }; if (safe.env && typeof safe.env === 'object') { const { hfToken, ...env } = safe.env; + delete env.hostPlatform; safe.env = env; } if (Array.isArray(safe.tasks)) safe.tasks = safe.tasks.map(_redactTaskForStorage); @@ -1673,7 +1674,7 @@ export async function _launchServeTask(shortName, repo, cmd, fields, hostOverrid || _envState.servers.find(s => s.host === _host) || {}; const _serverMetaKey = _targetKey || (_hsrv && _serverKey ? _serverKey(_hsrv) : '') || (_host || 'local'); const _serverMetaName = targetMeta?.serverName || _hsrv.name || (_host ? _host : 'Local'); - const _hplatform = _host ? (_hsrv.platform || '') : (_envState.platform || ''); + const _hplatform = _host ? (_hsrv.platform || '') : (_envState.hostPlatform || ''); const _replaceTaskId = fields?._replaceTaskId || ''; if (_replaceTaskId) { try { @@ -1688,7 +1689,6 @@ export async function _launchServeTask(shortName, repo, cmd, fields, hostOverrid } } catch {} } - // Replace any serve already targeting this same host:port — you can't run two // servers on one port, so re-serving (or retrying) should stop & remove the // old one instead of leaving a dead duplicate behind. (The retry buttons diff --git a/static/js/cookbookServe.js b/static/js/cookbookServe.js index 6f7b53057..253ba7483 100644 --- a/static/js/cookbookServe.js +++ b/static/js/cookbookServe.js @@ -527,7 +527,7 @@ function _selectedServeTarget(panel) { env: server?.env || '', port: host ? (server?.port || _getPort(host) || '') : '', venv, - platform: server?.platform || _envState.platform || '', + platform: host ? (server?.platform || '') : (_envState.hostPlatform || ''), label, }; } @@ -658,6 +658,12 @@ function _selectedGgufSizeGb(model, relPath) { return bytes / (1024 ** 3); } +function _projectorGgufFiles(model) { + return _ggufFilesForModel(model) + .filter(f => (f.role || '') === 'projector' || /(^|\/)mmproj[^/]*\.gguf$/i.test(f.rel_path || f.name || '')) + .sort((a, b) => String(a.rel_path || a.name || '').localeCompare(String(b.rel_path || b.name || ''))); +} + function _ggufFileLabel(file) { const base = (file.name || file.rel_path || '').split('/').pop(); const size = _formatGgufSize(file.size_bytes); @@ -1198,6 +1204,7 @@ function _rerenderCachedModels() { panelHtml += `
${_warnText}
`; } panelHtml += `
${_slotsHtml}
`; + panelHtml += ``; // Row 1: Engine + Server + Env panelHtml += `
`; const backendOpts = _backendChoices.map(([v,l]) => ``).join(''); @@ -1524,6 +1531,11 @@ function _rerenderCachedModels() { if (el.type === 'checkbox') f[el.dataset.field] = el.checked; else f[el.dataset.field] = el.value; }); + const buildTarget = _selectedServeTarget(panel); + f.host = buildTarget.host || ''; + f.platform = buildTarget.platform || ''; + const hostField = panel.querySelector('[data-field="host"]'); + if (hostField) hostField.value = f.host; const backend = f.backend || 'vllm'; const serveModel = (f.model_path || '').trim() || (m.is_local_dir && m.path ? `${m.path}/${repo}` : repo); if (backend === 'llamacpp') { @@ -1543,11 +1555,11 @@ function _rerenderCachedModels() { : m.is_local_dir && m.path ? `$({ find ${_ldir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${_ldir} -name '*.gguf' 2>/dev/null | sort; } | head -1)` : `$({ find ${dir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${dir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`; - // Vision: auto-find the mmproj (CLIP/projector) file in the same dir. - // Resolved at runtime so the toggle just works if an mmproj-*.gguf is - // present (downloaded alongside the model). Empty if none → cmd omits it. - const _vsearchdir = (m.is_local_dir && m.path) ? _ldir : dir; - f._mmproj_path = `$(find ${_vsearchdir} -iname 'mmproj*.gguf' 2>/dev/null | sort | head -1)`; + // Vision: use the scanned projector (CLIP/mmproj) file when present. + // Keeping this as a printf path avoids generating a command substitution + // that the backend serve-command validator must reject as unsafe. + const selectedProjector = _projectorGgufFiles(m)[0]; + f._mmproj_path = selectedProjector ? _selectedGgufExpr(m, repo, selectedProjector.rel_path) : ''; } if (f.reasoning_parser) { const _rpEl2 = panel.querySelector('[data-field="reasoning_parser"]'); @@ -1563,6 +1575,10 @@ function _rerenderCachedModels() { } let cmd = _buildServeCmd(f, serveModel, backend); if (f.extra && f.extra.trim()) cmd += ' ' + f.extra.trim(); + const missingVisionProjector = backend === 'llamacpp' && !!f.vision && !f._mmproj_path; + panel._visionMissingProjector = missingVisionProjector; + const _visionWarn = panel.querySelector('.hwfit-serve-vision-warn'); + if (_visionWarn) _visionWarn.style.display = missingVisionProjector ? 'flex' : 'none'; const _ce2 = panel.querySelector('.hwfit-serve-cmd'); _ce2.value = _formatServeCmdPreview(cmd); _ce2.style.height = 'auto'; _ce2.style.height = _ce2.scrollHeight + 'px'; panel._cmd = cmd; panel._host = f.host || ''; @@ -2938,12 +2954,16 @@ function _rerenderCachedModels() { }); serveState.backend = serveState.backend || (_detectBackend(m).backend) || 'vllm'; const launchTarget = _selectedServeTarget(panel); + if (serveState.backend === 'llamacpp' && serveState.vision && !/(?:^|\s)(?:--mmproj|--clip_model_path)\b/.test(launchCmd)) { + _restoreLaunchBtn(); + uiModule.showToast('Vision is checked, but no mmproj projector is in the launch command. Refresh cached models after downloading mmproj, or add --mmproj manually.', 8000); + return; + } if (serveState.backend === 'diffusers' && _remoteWindowsDiffusersUnsupported(launchTarget)) { _restoreLaunchBtn(); uiModule.showToast('Diffusers serving is not supported on remote Windows servers yet. Use local Windows or a Linux server.', 9000); return; } - // Pre-launch: check our own task list for a serve already running // on this host. Offer to stop+launch as the default action — the // SSH-based port probe below is more thorough but it can miss diff --git a/tests/test_cookbook_cpu_only_serve.py b/tests/test_cookbook_cpu_only_serve.py index bcb06b098..3de0c0f2d 100644 --- a/tests/test_cookbook_cpu_only_serve.py +++ b/tests/test_cookbook_cpu_only_serve.py @@ -1,4 +1,4 @@ -"""Regression guard for issue #1291 — CPU-only serve still emitted GPU-only flags. +"""Regression guard for issue #1291 - CPU-only serve still emitted GPU-only flags. The llama.cpp serve command builder (static/js/cookbook.js) added `--flash-attn on` and exported `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1` from @@ -16,8 +16,8 @@ from pathlib import Path SRC = Path(__file__).resolve().parent.parent / "static/js/cookbook.js" SERVE_SRC = Path(__file__).resolve().parent.parent / "static/js/cookbookServe.js" -ROUTES_SRC = Path(__file__).resolve().parent.parent / "routes/cookbook_routes.py" - +ROOT = SRC.parent.parent.parent +ROUTES_SRC = ROOT / "routes/cookbook_routes.py" def test_cpu_only_drops_gpu_only_flags(): text = SRC.read_text(encoding="utf-8") @@ -84,3 +84,101 @@ def test_vllm_route_strips_swap_space_when_runtime_rejects_it(): assert "print(shlex.join(parts[:serve_i + 1] + [\"--help\"]))" in text assert "eval \"$ODYSSEUS_VLLM_HELP_CMD\" 2>&1 | grep -q -- \"--swap-space\"" in text assert "eval \"$ODYSSEUS_SERVE_CMD\"" in text + + +def test_local_windows_platform_comes_from_backend_host_state(): + text = SRC.read_text(encoding="utf-8") + routes = ROUTES_SRC.read_text(encoding="utf-8") + running = (SRC.parent / "cookbookRunning.js").read_text(encoding="utf-8") + + assert "hostPlatform" in text + assert "navigator.platform" not in text + assert "hostOrTask === 'local'" in text + assert "if (hostOrTask === 'local') return _envState.hostPlatform || '';" in text + assert "return _envState.hostPlatform || _envState.platform || ''" not in text + assert "s.platform = _envState.hostPlatform || '';" in text + assert "platform: _envState.hostPlatform || ''" in text + assert "s.platform = _envState.hostPlatform || _envState.platform || '';" not in text + assert "platform: _envState.hostPlatform || _envState.platform || ''" not in text + assert 'return "windows" if IS_WINDOWS else ""' in routes + assert 'env["hostPlatform"] = _client_host_platform()' in routes + assert "return _state_for_client({})" in routes + assert 'env.pop("hostPlatform", None)' in routes + assert "delete env.hostPlatform;" in running + + +def test_local_serve_payload_ignores_stale_env_platform(): + serve = SERVE_SRC.read_text(encoding="utf-8") + running = (SRC.parent / "cookbookRunning.js").read_text(encoding="utf-8") + + assert "platform: host ? (server?.platform || '') : (_envState.hostPlatform || '')," in serve + assert "platform: server?.platform || _envState.platform || ''" not in serve + assert "const _hplatform = _host ? (_hsrv.platform || '') : (_envState.hostPlatform || '');" in running + assert "const _hplatform = _host ? (_hsrv.platform || '') : (_envState.platform || '');" not in running + + +def test_local_windows_llamacpp_prefers_native_llama_server(): + text = SRC.read_text(encoding="utf-8") + helpers = (ROOT / "routes/cookbook_helpers.py").read_text(encoding="utf-8") + + assert "Object.prototype.hasOwnProperty.call(f, 'host')" in text + assert "const _isWin = _targetHost ? _isWindows(_targetHost) : _isWindows('local');" in text + assert "const _localWindows = _isWin && !_targetHost;" in text + assert "const _curHost = _targetHost;" in text + assert "const _localWindows = _isWin && !_envState.remoteHost;" not in text + assert "const gpuId = (f.gpus || f.gpu_id || '').toString().trim();" in text + assert "const _lcServer = `${lcPrefix}llama-server --model" in text + assert "if (_localWindows) {" in text + assert "cmd += _lcServer;" in text + assert '"llama-server.exe"' in helpers + + + +def test_serve_command_preview_uses_selected_target_host(): + text = SERVE_SRC.read_text(encoding="utf-8") + + assert "const buildTarget = _selectedServeTarget(panel);" in text + assert "f.host = buildTarget.host || '';" in text + assert "f.platform = buildTarget.platform || '';" in text + assert "const hostField = panel.querySelector('[data-field=\"host\"]');" in text + assert "if (hostField) hostField.value = f.host;" in text + + +def test_local_windows_llama_server_skips_source_bootstrap(): + routes = ROUTES_SRC.read_text(encoding="utf-8") + + assert 'local_windows_llama_cmd = local_windows and ("llama_cpp" in req.cmd or "llama-server" in req.cmd)' in routes + assert 'if ("llama_cpp" in req.cmd or "llama-server" in req.cmd) and not local_windows_llama_cmd:' in routes + + +def test_local_windows_llama_server_path_includes_user_wrapper_and_cuda_builds(): + routes = (ROOT / "routes/cookbook_routes.py").read_text(encoding="utf-8") + + assert 'if local_windows:' in routes + assert ( + 'export PATH="$HOME/bin:$HOME/llama.cpp/build-cuda/bin/Release:' + '$HOME/llama.cpp/build/bin/Release:$HOME/llama.cpp/build/bin/Debug:' + '$HOME/llama.cpp/build/bin:$PATH"' + ) in routes + + +def test_serve_panel_keeps_row_markup_and_launch_cmd_assignment_executable(): + text = SERVE_SRC.read_text(encoding="utf-8").replace("\r\n", "\n") + + assert '// Row 1: Engine + Server + Env panelHtml +=' not in text + assert "px'; panel._cmd = cmd;" not in text + assert '// Row 1: Engine + Server + Env\n panelHtml += `
`;' in text + assert "px';\n panel._cmd = cmd;" in text + + +def test_llamacpp_vision_uses_scanned_projector_instead_of_runtime_find(): + text = SERVE_SRC.read_text(encoding="utf-8") + + assert "function _projectorGgufFiles(model)" in text + assert "const selectedProjector = _projectorGgufFiles(m)[0];" in text + assert "f._mmproj_path = selectedProjector ? _selectedGgufExpr(m, repo, selectedProjector.rel_path) : '';" in text + assert "const missingVisionProjector = backend === 'llamacpp' && !!f.vision && !f._mmproj_path;" in text + assert "hwfit-serve-vision-warn" in text + assert "!/(?:^|\\s)(?:--mmproj|--clip_model_path)\\b/.test(launchCmd)" in text + assert "no mmproj projector is in the launch command" in text + assert "find ${_vsearchdir} -iname 'mmproj*.gguf'" not in text diff --git a/tests/test_cookbook_helpers.py b/tests/test_cookbook_helpers.py index 5d2db5dda..be684b177 100644 --- a/tests/test_cookbook_helpers.py +++ b/tests/test_cookbook_helpers.py @@ -419,8 +419,6 @@ def test_pip_install_attempt_failure_propagates_real_exit_code(): """Run the generated snippet against a deliberately broken pip install to confirm the subshell exits with pip's non-zero status.""" snippet = _pip_install_attempt("python3 -m pip install __nonexistent_package_12345__") - if sys.platform == "win32": - snippet = snippet.replace("$", "\\$") result = subprocess.run( ["bash", "-c", snippet], capture_output=True, @@ -433,8 +431,6 @@ def test_pip_install_attempt_failure_propagates_real_exit_code(): def test_pip_install_attempt_success_exits_zero(): """When pip succeeds, the subshell should exit 0.""" snippet = _pip_install_attempt("python3 -c 'pass'") - if sys.platform == "win32": - snippet = snippet.replace("$", "\\$") result = subprocess.run( ["bash", "-c", snippet], capture_output=True, @@ -447,8 +443,6 @@ def test_pip_install_attempt_success_exits_zero(): def test_pip_install_attempt_surfaces_stderr_on_failure(): """On failure, the last 5 lines of pip output should appear in stdout.""" snippet = _pip_install_attempt("python3 -m pip install __nonexistent_package_12345__") - if sys.platform == "win32": - snippet = snippet.replace("$", "\\$") result = subprocess.run( ["bash", "-c", snippet], capture_output=True, @@ -557,6 +551,19 @@ def test_validate_serve_cmd_accepts_windows_printf_format(): assert _validate_serve_cmd(cmd) == cmd +def test_validate_serve_cmd_accepts_llama_mmproj_printf_format(): + cmd = ( + "CUDA_VISIBLE_DEVICES=0 llama-server --model " + "\"$(printf %s ${HOME}'/.cache/huggingface/hub/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/abc/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf')\" " + "--host 0.0.0.0 --port 8000 -ngl 99 -c 20000 " + "--cache-type-k q4_0 --cache-type-v q4_0 --mmproj " + "\"$(printf %s ${HOME}'/.cache/huggingface/hub/models--unsloth--Qwen3.6-35B-A3B-GGUF/snapshots/abc/mmproj-BF16.gguf')\" " + "--image-max-tokens 1024" + ) + + assert _validate_serve_cmd(cmd) == cmd + + def test_normalize_llama_cpp_python_cache_types_for_stale_client_cmd(): cmd = ( "python -m llama_cpp.server --model model.gguf --host 0.0.0.0 --port 8000 "