fix(platform): Improve WSL SSH remote compatibility (#3316)

* fix(platform): add WSL compatibility functions and path translation
fix(cookbook): enhance model scan script to support additional HuggingFace cache paths
fix(hardware): improve cache key generation for remote SSH context
test(tests): add tests for WSL detection and path translation functionality

* fix(cookbook): prefer prebuilt wheels for llama-cpp-python and normalize package aliases

* fix: enable StrictHostKeyChecking in nvidia probe
refactor: consolidate ssh & powershell command execution to utility functions in core module
refactor: consolidate nvidia path candidates in to single variables in core module
tests: add tests for new utility functions

* fix: correct wrong variable name
This commit is contained in:
horribleCodes
2026-06-08 00:33:50 +02:00
committed by GitHub
parent 73315e6ddc
commit 9c90f62657
8 changed files with 763 additions and 33 deletions
+66 -18
View File
@@ -20,6 +20,8 @@ from pydantic import BaseModel
from core.middleware import require_admin
from core.platform_compat import (
IS_WINDOWS,
SSH_PATH_OVERRIDE,
NVIDIA_PATH_CANDIDATES,
detached_popen_kwargs,
find_bash,
git_bash_path,
@@ -27,6 +29,8 @@ from core.platform_compat import (
pid_alive,
safe_chmod,
which_tool,
translate_path,
get_wsl_windows_user_profile,
)
from routes.shell_routes import TMUX_LOG_DIR
@@ -41,7 +45,7 @@ from routes.cookbook_helpers import (
_append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script,
_append_vllm_linux_preflight_lines, _ollama_bind_from_cmd, _pip_install_fallback_chain,
_pip_install_no_cache, _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
_diagnose_serve_output,
_diagnose_serve_output, run_ssh_command_async,
ModelDownloadRequest, ServeRequest,
)
@@ -557,24 +561,35 @@ def setup_cookbook_routes() -> APIRouter:
for d in model_dir.split(','):
d = d.strip()
if d:
model_dirs.append(d)
paths_code = _cached_model_scan_script(model_dirs)
translated_d = translate_path(d) if not host else d
model_dirs.append(translated_d)
win_hf_hub = None
if not host:
win_profile = get_wsl_windows_user_profile()
win_hf_hub = os.path.join(win_profile, ".cache", "huggingface", "hub") if win_profile else None
paths_code = _cached_model_scan_script(model_dirs, win_hf_hub)
scan_py = TMUX_LOG_DIR / "scan_cache.py"
scan_py.write_text(paths_code, encoding="utf-8")
scan_payload = scan_py.read_bytes()
if host:
_pf = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
if platform == "windows":
# Windows: use 'python' and pipe via stdin with double-quote wrapping
cmd = f'ssh {_pf}{host} "python -" < \'{scan_py}\''
remote_cmd = "python -"
else:
cmd = f"ssh {_pf}{host} 'python3 -' < '{scan_py}'"
proc = await asyncio.create_subprocess_shell(
cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=str(Path.home()),
# POSIX: use 'python3' if available, fall back to 'python'; throw if neither is found.
remote_cmd = (
"if command -v python3 >/dev/null 2>&1; then python3 -; "
"elif command -v python >/dev/null 2>&1; then python -; "
"else echo \"python3/python not found\" >&2; exit 127; fi"
)
rc, stdout_b, stderr_b = await run_ssh_command_async(
host,
ssh_port,
remote_cmd,
timeout=60,
stdin_data=scan_payload,
)
else:
# LOCAL scan: use sys.executable (the venv Python Odysseus is already
@@ -594,7 +609,7 @@ def setup_cookbook_routes() -> APIRouter:
stderr=asyncio.subprocess.PIPE,
cwd=str(Path.home()),
)
stdout_b, stderr_b = await asyncio.wait_for(proc.communicate(), timeout=60)
stdout_b, stderr_b = await asyncio.wait_for(proc.communicate(), timeout=60)
models = []
try:
@@ -874,6 +889,12 @@ def setup_cookbook_routes() -> APIRouter:
# pip cache so they don't fail mid-build with "No space left" (#1219)
# and leave the dep installed-but-unusable (#1459).
req.cmd = _pip_install_no_cache(req.cmd)
# Accept common aliases and enforce server extras for llama-cpp so
# `python -m llama_cpp.server` has all runtime dependencies.
req.cmd = re.sub(r"(?<![A-Za-z0-9_.-])llama_cpp(?![A-Za-z0-9_.-])", "llama-cpp-python[server]", req.cmd)
req.cmd = re.sub(r"(?<![A-Za-z0-9_.-])llama-cpp-python(?!\[)", "llama-cpp-python[server]", req.cmd)
if "llama-cpp-python" in req.cmd and "--extra-index-url" not in req.cmd:
req.cmd += " --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu"
# PEP-508-style package spec — letters, digits, `.-_` for the
# name; `[` `]` for extras; `<>=!~,` for version specifiers.
# v2 review HIGH-14: tightened from the previous regex which
@@ -1354,11 +1375,38 @@ def setup_cookbook_routes() -> APIRouter:
async def _run_nvidia_smi(query: str, host: str | None, ssh_port: str | None, timeout: int = 8):
"""Run nvidia-smi locally or over SSH. Returns (stdout, error_or_None)."""
if host:
pf = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
cmd = f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no {pf}{host} '{query}'"
proc = await asyncio.create_subprocess_shell(
cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
candidates = [query]
stripped = query.strip()
if stripped.startswith("nvidia-smi "):
args = stripped[len("nvidia-smi "):]
candidates.append(
"bash -lc "
+ shlex.quote(
f"{SSH_PATH_OVERRIDE}"
f"nvidia-smi {args}"
)
)
for nvidia_path in NVIDIA_PATH_CANDIDATES:
candidates.append(f"{nvidia_path} {args}")
last_err = "nvidia-smi failed"
for candidate in candidates:
try:
rc, stdout, stderr = await run_ssh_command_async(
host,
ssh_port,
candidate,
connect_timeout=5,
timeout=timeout,
)
except asyncio.TimeoutError:
return None, "nvidia-smi timed out"
if rc == 0:
return stdout.decode("utf-8", errors="replace"), None
err = (stderr.decode("utf-8", errors="replace") or "").strip()[:200]
if err:
last_err = err
return None, last_err
else:
proc = await asyncio.create_subprocess_exec(
*shlex.split(query),