Fix/windows llama cpp serve and test upstream (#2669)

* fix: code runner base64, Windows serve paths, endpoint cache clear, copy-log guards, model-picker remove-recent

* Revert model-picker 'remove from recent' feature and remove stray PR_DRAFT.md
This commit is contained in:
Zen0-99
2026-06-05 13:53:33 +01:00
committed by GitHub
parent ec8fbf5d8f
commit bec594904d
7 changed files with 119 additions and 23 deletions
+9
View File
@@ -342,6 +342,15 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
" if f.is_file(): nf += 1; sz += f.stat().st_size",
" if f.name.endswith('.incomplete'): ic = True",
" snap = os.path.join(cache, d, 'snapshots')",
" # Windows HF cache stores files directly in snapshots/; blobs/ may be empty.",
" # Fallback: scan snapshots for real files when blobs yielded nothing.",
" if sz == 0 and os.path.isdir(snap):",
" for sd in os.listdir(snap):",
" sf = os.path.join(snap, sd)",
" if not os.path.isdir(sf): continue",
" for f in os.scandir(sf):",
" if f.is_file(): nf += 1; sz += f.stat().st_size",
" if f.name.endswith('.incomplete'): ic = True",
" is_diffusion = False; gguf_files = []",
" if os.path.isdir(snap):",
" for sd in os.listdir(snap):",
+30 -11
View File
@@ -799,6 +799,10 @@ def setup_cookbook_routes() -> APIRouter:
existing.name = display_name
if supports_tools is not None:
existing.supports_tools = supports_tools
# Wipe stale model lists so the picker re-probes and discovers
# the newly-served model instead of showing the old one.
existing.cached_models = None
existing.hidden_models = None
db.commit()
logger.info(f"Updated existing local model endpoint: {base_url}")
return existing.id
@@ -1089,13 +1093,23 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(' ODYSSEUS_OLLAMA_PORT="$_ody_try_port"')
runner_lines.append(' break')
runner_lines.append(' fi')
runner_lines.append(' exec 3<&-; exec 3>&-')
runner_lines.append('done')
runner_lines.append(' echo "[odysseus] Ollama API ready on port ${ODYSSEUS_OLLAMA_PORT}: ${ODYSSEUS_OLLAMA_URL}"')
runner_lines.append(' echo "[odysseus] This task is monitoring an existing Ollama server; stopping it here will not stop an external Docker/system service."')
if local_windows:
# Windows detached process has no TTY; exec bash -i crashes.
# Keep the monitoring task alive with a sleep loop.
runner_lines.append(' while true; do sleep 60; done')
else:
runner_lines.append(' exec bash -i')
runner_lines.append('fi')
runner_lines.append('if ! command -v ollama &>/dev/null; then')
runner_lines.append(' echo "ERROR: Ollama not found on this server. Install it from https://ollama.com/download or `curl -fsSL https://ollama.com/install.sh | sh`."')
runner_lines.append(' echo')
runner_lines.append(' echo "=== Process exited with code 127 ==="')
runner_lines.append(' exec bash -i')
if local_windows:
runner_lines.append(' exit 127')
else:
runner_lines.append(' exec bash -i')
runner_lines.append('fi')
runner_lines.append('ODYSSEUS_OLLAMA_URL="http://${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}"')
if remote and _ollama_host in ("0.0.0.0", "::"):
@@ -1103,10 +1117,13 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append('echo "[odysseus] Ollama has no built-in authentication; expose this only on a trusted LAN/VPN or provide an explicit OLLAMA_HOST with your own access controls."')
runner_lines.append('echo "Starting ollama server on ${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}..."')
runner_lines.append('OLLAMA_HOST="${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}" ollama serve')
runner_lines.append('_ody_exit=$?')
runner_lines.append('echo')
runner_lines.append('echo "=== Process exited with code ${_ody_exit} ==="')
runner_lines.append('exec bash -i')
if local_windows:
_append_serve_exit_code_lines(runner_lines, keep_shell_open=False)
else:
runner_lines.append('_ody_exit=$?')
runner_lines.append('echo')
runner_lines.append('echo "=== Process exited with code ${_ody_exit} ==="')
runner_lines.append('exec bash -i')
elif "vllm serve" in req.cmd:
# vLLM is CUDA/ROCm-only and does not run on macOS at all.
runner_lines.append('if [ "$(uname -s)" = "Darwin" ]; then')
@@ -2104,11 +2121,13 @@ def setup_cookbook_routes() -> APIRouter:
"inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
"sys.exit(0 if ok and not inc else 1)"
)
if not remote_host:
import sys
cmd = [sys.executable, "-c", py, repo_id]
else:
if remote_host:
cmd = ["python3", "-c", py, repo_id]
else:
# Local Windows: python3 can hit the Microsoft Store stub. Use the
# real Python Odysseus is running under (guaranteed to exist).
import sys as _sys_local
cmd = [_sys_local.executable, "-c", py, repo_id]
try:
if remote_host:
ssh_base = ["ssh"]
+6 -2
View File
@@ -373,7 +373,11 @@ async def _create_shell(command: str, **kwargs):
and env variable expansion errors under Git Bash.
"""
if IS_WINDOWS:
if command.strip().lower().startswith("powershell"):
# PowerShell commands (used by the frontend for Windows log-file polling
# and session management) must run directly — passing them through
# bash -c mangles $env:VAR syntax and breaks the command.
cmd_trim = command.strip()
if cmd_trim.startswith("powershell") or cmd_trim.startswith("cmd "):
return await asyncio.create_subprocess_shell(command, **kwargs)
bash = find_bash()
if bash:
@@ -758,7 +762,7 @@ def setup_shell_routes() -> APIRouter:
return {"stdout": "", "stderr": "No command provided", "exit_code": 1}
logger.info("User shell exec requested: length=%d", len(cmd))
result = await _exec_shell(cmd, timeout=EXEC_TIMEOUT)
result = await _exec_shell(cmd, timeout=req.timeout if req.timeout is not None else EXEC_TIMEOUT)
return result
@router.post("/api/shell/stream")
+6 -2
View File
@@ -310,11 +310,15 @@ try {
*/
export async function runServer(code, panel, lang) {
showLoading(panel, 'Running on server...');
// Base64-encode the script so newlines survive the shell quoting intact.
// JSON.stringify turns \n into literal \\n which python3 -c sees as backslash-n;
// base64 avoids every quoting/escaping pitfall.
const b64 = btoa(unescape(encodeURIComponent(code)));
var command;
if (lang === 'python' || lang === 'py') {
command = 'python3 -c ' + JSON.stringify(code);
command = `python3 -c "import base64; exec(base64.b64decode('${b64}').decode('utf-8'))"`;
} else {
command = 'bash -c ' + JSON.stringify(code);
command = `python3 -c "import base64, subprocess, sys; sys.exit(subprocess.run(['bash','-c',base64.b64decode('${b64}').decode('utf-8')]).returncode)"`;
}
try {
var res = await fetch('/api/shell/exec', {
+8
View File
@@ -443,6 +443,9 @@ export async function _hwfitFetch(fresh = false) {
if (_cached) {
_hwfitCache = _cached;
_hwfitRenderHw(hw, _cached.system);
if (!remoteHost && _cached.system && _cached.system.platform) {
_envState.platform = _cached.system.platform;
}
_hwfitRenderList(list, _applyEngineFilter(_cached.models));
} else {
// Show spinner while scanning — stack the spinner above a text label
@@ -578,6 +581,11 @@ export async function _hwfitFetch(fresh = false) {
}
_hwfitCache = data;
_hwfitRenderHw(hw, data.system);
// Propagate local platform from hardware probe so _isWindows(task) works
// for local tasks (menu items, shell commands, etc.).
if (!remoteHost && data.system && data.system.platform) {
_envState.platform = data.system.platform;
}
// Sort client-side by the active column so the highest↔lowest toggle is
// deterministic (the previous array .reverse() didn't reliably flip).
// 1st click on a column = highest first; clicking it again = lowest first.
+19 -1
View File
@@ -1862,7 +1862,17 @@ export function _renderRunningTab() {
const startNow = el.querySelector('.cookbook-task-start-now');
if (startNow) startNow.style.display = (task.type === 'download' && task.status === 'queued') ? '' : 'none';
const terminalDiag = _terminalServeDiagnosis(task, el.querySelector('.cookbook-output-pre')?.textContent || task.output || '');
if (terminalDiag) _showDiagnosis(el, terminalDiag, el.querySelector('.cookbook-output-pre')?.textContent || task.output || '');
if (terminalDiag) {
_showDiagnosis(el, terminalDiag, el.querySelector('.cookbook-output-pre')?.textContent || task.output || '');
} else {
const existingDiag = el.querySelector('.cookbook-diagnosis');
// Keep diagnosis for failed tasks even if output was cleared and we
// can no longer re-derive the exact message — removing it would hide
// the crash reason from the user.
if (existingDiag && !['stopped', 'error', 'crashed', 'failed'].includes(task.status)) {
existingDiag.remove();
}
}
}
if (!task) {
if (el._uptimeInterval) { clearInterval(el._uptimeInterval); el._uptimeInterval = null; }
@@ -2201,6 +2211,10 @@ export function _renderRunningTab() {
items.push({ label: 'Copy last 50 lines', action: 'copy-log', custom: () => {
const out = (el.querySelector('.cookbook-output-pre')?.textContent || task.output || '');
const last = out.split('\n').slice(-50).join('\n');
if (!last.trim()) {
uiModule.showToast('No log content available yet');
return;
}
_copyText(last);
uiModule.showToast('Copied last 50 lines');
}});
@@ -2437,6 +2451,10 @@ export function _renderRunningTab() {
el.querySelector('.cookbook-output-copy').addEventListener('click', (e) => {
e.stopPropagation();
const text = el.querySelector('.cookbook-output-pre')?.textContent || '';
if (!text.trim()) {
uiModule.showToast('No log content available yet');
return;
}
_copyText(text).then(() => {
const btn = el.querySelector('.cookbook-output-copy');
const origHTML = btn.innerHTML;
+41 -7
View File
@@ -242,6 +242,21 @@ function _shellPathExpr(path) {
function _selectedGgufExpr(model, repo, relPath) {
const rel = String(relPath || '').replace(/^\/+/, '');
if (!rel) return '';
if (_isWindows()) {
// PowerShell: plain path — no bash $() syntax (backend validator rejects
// $( ) in non-prelude commands, and PowerShell doesn't have printf).
const relW = rel.replace(/\//g, '\\');
if (model.is_local_dir && model.path) {
const base = String(model.path || '').replace(/\/+$/, '').replace(/\//g, '\\');
return `${base}\\${repo.replace(/\//g, '\\')}\\${relW}`;
}
if (model.path) {
const base = String(model.path || '').replace(/\/+$/, '').replace(/\//g, '\\');
return `${base}\\models--${repo.replace(/\//g, '--')}\\snapshots\\${relW}`;
}
const cacheRepo = repo.replace(/\//g, '--');
return `$env:USERPROFILE\\.cache\\huggingface\\hub\\models--${cacheRepo}\\snapshots\\${relW}`;
}
if (model.is_local_dir && model.path) {
const base = String(model.path || '').replace(/\/+$/, '');
return `$(printf %s ${_shellPathExpr(`${base}/${repo}/${rel}`)})`;
@@ -255,6 +270,15 @@ function _selectedGgufExpr(model, repo, relPath) {
}
function _ggufSearchDirExpr(model, repo) {
if (_isWindows()) {
if (model.is_local_dir && model.path) {
return `${String(model.path || '').replace(/\/+$/, '').replace(/\//g, '\\')}\\${repo.replace(/\//g, '\\')}`;
}
if (model.path) {
return `${String(model.path || '').replace(/\/+$/, '').replace(/\//g, '\\')}\\models--${repo.replace(/\//g, '--')}\\snapshots`;
}
return `$env:USERPROFILE\\.cache\\huggingface\\hub\\models--${repo.replace(/\//g, '--')}\\snapshots`;
}
if (model.is_local_dir && model.path) return _shellQuote(`${String(model.path || '').replace(/\/+$/, '')}/${repo}`);
if (model.path) return _shellQuote(`${String(model.path || '').replace(/\/+$/, '')}/models--${repo.replace(/\//g, '--')}/snapshots`);
return `"$HOME/.cache/huggingface/hub/models--${repo.replace(/\//g, '--')}/snapshots"`;
@@ -800,17 +824,27 @@ function _rerenderCachedModels() {
// model the file lives under "<path>/<repo>" — search there just like we
// search the HF snapshots dir, so serving a GGUF from a custom dir works
// instead of handing llama.cpp a directory (which fails).
const _ldir = m.path ? _shellQuote(`${m.path}/${repo}`) : '""';
f._gguf_path = selectedGguf
? _selectedGgufExpr(m, repo, selectedGguf.rel_path)
: m.is_local_dir && m.path
? `$({ find ${_ldir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${_ldir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`
: `$({ find ${dir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${dir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`;
const _ldir = m.path
? (_isWindows() ? `${m.path.replace(/\//g, '\\')}\\${repo.replace(/\//g, '\\')}` : _shellQuote(`${m.path}/${repo}`))
: (_isWindows() ? '' : '""');
if (selectedGguf) {
f._gguf_path = _selectedGgufExpr(m, repo, selectedGguf.rel_path);
} else if (_isWindows()) {
// Windows fallback: no bash $() available; validator rejects it.
// Return empty so the serve fails with a clear message.
f._gguf_path = '';
} else if (m.is_local_dir && m.path) {
f._gguf_path = `$({ find ${_ldir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${_ldir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`;
} else {
f._gguf_path = `$({ find ${dir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${dir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`;
}
// Vision: auto-find the mmproj (CLIP/projector) file in the same dir.
// Resolved at runtime so the toggle just works if an mmproj-*.gguf is
// present (downloaded alongside the model). Empty if none → cmd omits it.
const _vsearchdir = (m.is_local_dir && m.path) ? _ldir : dir;
f._mmproj_path = `$(find ${_vsearchdir} -iname 'mmproj*.gguf' 2>/dev/null | sort | head -1)`;
f._mmproj_path = _isWindows()
? (_vsearchdir ? `${_vsearchdir}\\mmproj*.gguf` : '')
: `$(find ${_vsearchdir} -iname 'mmproj*.gguf' 2>/dev/null | sort | head -1)`;
}
if (f.reasoning_parser) {
const _rpEl2 = panel.querySelector('[data-field="reasoning_parser"]');