fix(platform): Improve WSL SSH remote compatibility (#3316)

* fix(platform): add WSL compatibility functions and path translation
fix(cookbook): enhance model scan script to support additional HuggingFace cache paths
fix(hardware): improve cache key generation for remote SSH context
test(tests): add tests for WSL detection and path translation functionality

* fix(cookbook): prefer prebuilt wheels for llama-cpp-python and normalize package aliases

* fix: enable StrictHostKeyChecking in nvidia probe
refactor: consolidate ssh & powershell command execution to utility functions in core module
refactor: consolidate nvidia path candidates in to single variables in core module
tests: add tests for new utility functions

* fix: correct wrong variable name
This commit is contained in:
horribleCodes
2026-06-08 00:33:50 +02:00
committed by GitHub
parent 73315e6ddc
commit 9c90f62657
8 changed files with 763 additions and 33 deletions
+49 -3
View File
@@ -11,6 +11,8 @@ import shlex
from fastapi import HTTPException
from pydantic import BaseModel
from core.platform_compat import _ssh_exec_argv
logger = logging.getLogger(__name__)
@@ -213,7 +215,10 @@ def _pip_install_fallback_chain(package: str, *, python_cmd: str = "python3 -m p
# before being embedded in the install command. Plain names (e.g.
# ``huggingface_hub``) are returned unchanged by ``shlex.quote``.
pkg = shlex.quote(package)
if IS_WINDOWS and "llama-cpp-python" in package:
# llama-cpp-python source builds are brittle on older distro pip/packaging
# stacks (common on WSL images). Prefer the prebuilt wheel index whenever
# this package is requested so dependency-install tasks are reliable.
if "llama-cpp-python" in package:
pkg += " --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu"
base = _pip_install_attempt(f"{python_cmd} install -q{upgrade_flag} {pkg}")
@@ -275,11 +280,14 @@ def _user_shell_path_bootstrap() -> list[str]:
' if [ -n "$ODYSSEUS_USER_PATH" ]; then export PATH="$ODYSSEUS_USER_PATH:$PATH"; fi',
'fi',
'command -v python3 >/dev/null 2>&1 || python3() { python "$@"; }',
'command -v python >/dev/null 2>&1 || python() { python3 "$@"; }',
]
def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
"""Build the standalone Python scanner used by /api/model/cached."""
def _cached_model_scan_script(model_dirs: list[str] | None = None, add_hf_cache: str | None = None) -> str:
"""Build the standalone Python scanner used by /api/model/cached.
Allows for an additional HuggingFace cache path to be scanned (i.e. Windows HF cache for local WSL envs.)
"""
lines = [
"import json, os, re, shutil, subprocess, urllib.request",
"models = []",
@@ -372,6 +380,7 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
" # Docker images mount ./data/huggingface at /app/.cache/huggingface.",
" # When HOME is /root, expanduser() misses that persisted cache.",
" add('/app/.cache/huggingface/hub')",
f" add({add_hf_cache!r})" if add_hf_cache else "",
" return candidates",
"def scan_dir(p):",
" if not os.path.isdir(p) or not safe_path(p): return",
@@ -989,3 +998,40 @@ def _diagnose_serve_output(text: str) -> dict | None:
"suggestions": [{"label": "inspect traceback and retry with adjusted backend/settings", "op": "manual"}],
}
return None
async def run_ssh_command_async(
remote: str,
ssh_port: str | None,
remote_cmd: str,
*,
timeout: float,
connect_timeout: int | None = None,
strict_host_key_checking: bool | None = None,
stdin_data: bytes | None = None,
) -> tuple[int, bytes, bytes]:
"""Run an ssh command with centralized timeout and stderr/stdout capture.
Async version of core.platform_compat.run_ssh_command_sync.
"""
import asyncio
proc = await asyncio.create_subprocess_exec(
*_ssh_exec_argv(
remote,
ssh_port,
remote_cmd=remote_cmd,
connect_timeout=connect_timeout,
strict_host_key_checking=strict_host_key_checking,
),
stdin=asyncio.subprocess.PIPE if stdin_data is not None else None,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
try:
stdout, stderr = await asyncio.wait_for(
proc.communicate(input=stdin_data), timeout=timeout
)
except asyncio.TimeoutError:
proc.kill()
await proc.communicate()
raise
return proc.returncode or 0, stdout, stderr
+66 -18
View File
@@ -20,6 +20,8 @@ from pydantic import BaseModel
from core.middleware import require_admin
from core.platform_compat import (
IS_WINDOWS,
SSH_PATH_OVERRIDE,
NVIDIA_PATH_CANDIDATES,
detached_popen_kwargs,
find_bash,
git_bash_path,
@@ -27,6 +29,8 @@ from core.platform_compat import (
pid_alive,
safe_chmod,
which_tool,
translate_path,
get_wsl_windows_user_profile,
)
from routes.shell_routes import TMUX_LOG_DIR
@@ -41,7 +45,7 @@ from routes.cookbook_helpers import (
_append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script,
_append_vllm_linux_preflight_lines, _ollama_bind_from_cmd, _pip_install_fallback_chain,
_pip_install_no_cache, _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
_diagnose_serve_output,
_diagnose_serve_output, run_ssh_command_async,
ModelDownloadRequest, ServeRequest,
)
@@ -557,24 +561,35 @@ def setup_cookbook_routes() -> APIRouter:
for d in model_dir.split(','):
d = d.strip()
if d:
model_dirs.append(d)
paths_code = _cached_model_scan_script(model_dirs)
translated_d = translate_path(d) if not host else d
model_dirs.append(translated_d)
win_hf_hub = None
if not host:
win_profile = get_wsl_windows_user_profile()
win_hf_hub = os.path.join(win_profile, ".cache", "huggingface", "hub") if win_profile else None
paths_code = _cached_model_scan_script(model_dirs, win_hf_hub)
scan_py = TMUX_LOG_DIR / "scan_cache.py"
scan_py.write_text(paths_code, encoding="utf-8")
scan_payload = scan_py.read_bytes()
if host:
_pf = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
if platform == "windows":
# Windows: use 'python' and pipe via stdin with double-quote wrapping
cmd = f'ssh {_pf}{host} "python -" < \'{scan_py}\''
remote_cmd = "python -"
else:
cmd = f"ssh {_pf}{host} 'python3 -' < '{scan_py}'"
proc = await asyncio.create_subprocess_shell(
cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=str(Path.home()),
# POSIX: use 'python3' if available, fall back to 'python'; throw if neither is found.
remote_cmd = (
"if command -v python3 >/dev/null 2>&1; then python3 -; "
"elif command -v python >/dev/null 2>&1; then python -; "
"else echo \"python3/python not found\" >&2; exit 127; fi"
)
rc, stdout_b, stderr_b = await run_ssh_command_async(
host,
ssh_port,
remote_cmd,
timeout=60,
stdin_data=scan_payload,
)
else:
# LOCAL scan: use sys.executable (the venv Python Odysseus is already
@@ -594,7 +609,7 @@ def setup_cookbook_routes() -> APIRouter:
stderr=asyncio.subprocess.PIPE,
cwd=str(Path.home()),
)
stdout_b, stderr_b = await asyncio.wait_for(proc.communicate(), timeout=60)
stdout_b, stderr_b = await asyncio.wait_for(proc.communicate(), timeout=60)
models = []
try:
@@ -874,6 +889,12 @@ def setup_cookbook_routes() -> APIRouter:
# pip cache so they don't fail mid-build with "No space left" (#1219)
# and leave the dep installed-but-unusable (#1459).
req.cmd = _pip_install_no_cache(req.cmd)
# Accept common aliases and enforce server extras for llama-cpp so
# `python -m llama_cpp.server` has all runtime dependencies.
req.cmd = re.sub(r"(?<![A-Za-z0-9_.-])llama_cpp(?![A-Za-z0-9_.-])", "llama-cpp-python[server]", req.cmd)
req.cmd = re.sub(r"(?<![A-Za-z0-9_.-])llama-cpp-python(?!\[)", "llama-cpp-python[server]", req.cmd)
if "llama-cpp-python" in req.cmd and "--extra-index-url" not in req.cmd:
req.cmd += " --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu"
# PEP-508-style package spec — letters, digits, `.-_` for the
# name; `[` `]` for extras; `<>=!~,` for version specifiers.
# v2 review HIGH-14: tightened from the previous regex which
@@ -1354,11 +1375,38 @@ def setup_cookbook_routes() -> APIRouter:
async def _run_nvidia_smi(query: str, host: str | None, ssh_port: str | None, timeout: int = 8):
"""Run nvidia-smi locally or over SSH. Returns (stdout, error_or_None)."""
if host:
pf = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
cmd = f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no {pf}{host} '{query}'"
proc = await asyncio.create_subprocess_shell(
cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
candidates = [query]
stripped = query.strip()
if stripped.startswith("nvidia-smi "):
args = stripped[len("nvidia-smi "):]
candidates.append(
"bash -lc "
+ shlex.quote(
f"{SSH_PATH_OVERRIDE}"
f"nvidia-smi {args}"
)
)
for nvidia_path in NVIDIA_PATH_CANDIDATES:
candidates.append(f"{nvidia_path} {args}")
last_err = "nvidia-smi failed"
for candidate in candidates:
try:
rc, stdout, stderr = await run_ssh_command_async(
host,
ssh_port,
candidate,
connect_timeout=5,
timeout=timeout,
)
except asyncio.TimeoutError:
return None, "nvidia-smi timed out"
if rc == 0:
return stdout.decode("utf-8", errors="replace"), None
err = (stderr.decode("utf-8", errors="replace") or "").strip()[:200]
if err:
last_err = err
return None, last_err
else:
proc = await asyncio.create_subprocess_exec(
*shlex.split(query),