fix(platform): Improve WSL SSH remote compatibility (#3316)

* fix(platform): add WSL compatibility functions and path translation fix(cookbook): enhance model scan script to support additional HuggingFace cache paths fix(hardware): improve cache key generation for remote SSH context test(tests): add tests for WSL detection and path translation functionality * fix(cookbook): prefer prebuilt wheels for llama-cpp-python and normalize package aliases * fix: enable StrictHostKeyChecking in nvidia probe refactor: consolidate ssh & powershell command execution to utility functions in core module refactor: consolidate nvidia path candidates in to single variables in core module tests: add tests for new utility functions * fix: correct wrong variable name
2026-06-17 10:15:27 -04:00 · 2026-06-08 00:33:50 +02:00
parent 73315e6ddc
commit 9c90f62657
8 changed files with 763 additions and 33 deletions
@@ -11,6 +11,8 @@ import shlex
 from fastapi import HTTPException
 from pydantic import BaseModel

+from core.platform_compat import _ssh_exec_argv
+
 logger = logging.getLogger(__name__)


@@ -213,7 +215,10 @@ def _pip_install_fallback_chain(package: str, *, python_cmd: str = "python3 -m p
    # before being embedded in the install command. Plain names (e.g.
    # ``huggingface_hub``) are returned unchanged by ``shlex.quote``.
    pkg = shlex.quote(package)
-    if IS_WINDOWS and "llama-cpp-python" in package:
+    # llama-cpp-python source builds are brittle on older distro pip/packaging
+    # stacks (common on WSL images). Prefer the prebuilt wheel index whenever
+    # this package is requested so dependency-install tasks are reliable.
+    if "llama-cpp-python" in package:
        pkg += " --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu"

    base = _pip_install_attempt(f"{python_cmd} install -q{upgrade_flag} {pkg}")
@@ -275,11 +280,14 @@ def _user_shell_path_bootstrap() -> list[str]:
        '  if [ -n "$ODYSSEUS_USER_PATH" ]; then export PATH="$ODYSSEUS_USER_PATH:$PATH"; fi',
        'fi',
        'command -v python3 >/dev/null 2>&1 || python3() { python "$@"; }',
+        'command -v python >/dev/null 2>&1 || python() { python3 "$@"; }',
    ]


-def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
-    """Build the standalone Python scanner used by /api/model/cached."""
+def _cached_model_scan_script(model_dirs: list[str] | None = None, add_hf_cache: str | None = None) -> str:
+    """Build the standalone Python scanner used by /api/model/cached.
+    Allows for an additional HuggingFace cache path to be scanned (i.e. Windows HF cache for local WSL envs.)
+    """
    lines = [
        "import json, os, re, shutil, subprocess, urllib.request",
        "models = []",
@@ -372,6 +380,7 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
        "    # Docker images mount ./data/huggingface at /app/.cache/huggingface.",
        "    # When HOME is /root, expanduser() misses that persisted cache.",
        "    add('/app/.cache/huggingface/hub')",
+        f"    add({add_hf_cache!r})" if add_hf_cache else "",
        "    return candidates",
        "def scan_dir(p):",
        "    if not os.path.isdir(p) or not safe_path(p): return",
@@ -989,3 +998,40 @@ def _diagnose_serve_output(text: str) -> dict | None:
            "suggestions": [{"label": "inspect traceback and retry with adjusted backend/settings", "op": "manual"}],
        }
    return None
+
+
+async def run_ssh_command_async(
+    remote: str,
+    ssh_port: str | None,
+    remote_cmd: str,
+    *,
+    timeout: float,
+    connect_timeout: int | None = None,
+    strict_host_key_checking: bool | None = None,
+    stdin_data: bytes | None = None,
+) -> tuple[int, bytes, bytes]:
+    """Run an ssh command with centralized timeout and stderr/stdout capture.
+    Async version of core.platform_compat.run_ssh_command_sync.
+    """
+    import asyncio
+    proc = await asyncio.create_subprocess_exec(
+        *_ssh_exec_argv(
+            remote,
+            ssh_port,
+            remote_cmd=remote_cmd,
+            connect_timeout=connect_timeout,
+            strict_host_key_checking=strict_host_key_checking,
+        ),
+        stdin=asyncio.subprocess.PIPE if stdin_data is not None else None,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    try:
+        stdout, stderr = await asyncio.wait_for(
+            proc.communicate(input=stdin_data), timeout=timeout
+        )
+    except asyncio.TimeoutError:
+        proc.kill()
+        await proc.communicate()
+        raise
+    return proc.returncode or 0, stdout, stderr
@@ -20,6 +20,8 @@ from pydantic import BaseModel
 from core.middleware import require_admin
 from core.platform_compat import (
    IS_WINDOWS,
+    SSH_PATH_OVERRIDE,
+    NVIDIA_PATH_CANDIDATES,
    detached_popen_kwargs,
    find_bash,
    git_bash_path,
@@ -27,6 +29,8 @@ from core.platform_compat import (
    pid_alive,
    safe_chmod,
    which_tool,
+    translate_path,
+    get_wsl_windows_user_profile,
 )
 from routes.shell_routes import TMUX_LOG_DIR

@@ -41,7 +45,7 @@ from routes.cookbook_helpers import (
    _append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script,
    _append_vllm_linux_preflight_lines, _ollama_bind_from_cmd, _pip_install_fallback_chain,
    _pip_install_no_cache, _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
-    _diagnose_serve_output,
+    _diagnose_serve_output, run_ssh_command_async,
    ModelDownloadRequest, ServeRequest,
 )

@@ -557,24 +561,35 @@ def setup_cookbook_routes() -> APIRouter:
            for d in model_dir.split(','):
                d = d.strip()
                if d:
-                    model_dirs.append(d)
-        paths_code = _cached_model_scan_script(model_dirs)
+                    translated_d = translate_path(d) if not host else d
+                    model_dirs.append(translated_d)
+        win_hf_hub = None
+        if not host:
+            win_profile = get_wsl_windows_user_profile()
+            win_hf_hub = os.path.join(win_profile, ".cache", "huggingface", "hub") if win_profile else None
+            
+        paths_code = _cached_model_scan_script(model_dirs, win_hf_hub)

        scan_py = TMUX_LOG_DIR / "scan_cache.py"
        scan_py.write_text(paths_code, encoding="utf-8")
+        scan_payload = scan_py.read_bytes()

        if host:
-            _pf = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
            if platform == "windows":
-                # Windows: use 'python' and pipe via stdin with double-quote wrapping
-                cmd = f'ssh {_pf}{host} "python -" < \'{scan_py}\''
+                remote_cmd = "python -"
            else:
-                cmd = f"ssh {_pf}{host} 'python3 -' < '{scan_py}'"
-            proc = await asyncio.create_subprocess_shell(
-                cmd,
-                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE,
-                cwd=str(Path.home()),
+                # POSIX: use 'python3' if available, fall back to 'python'; throw if neither is found.
+                remote_cmd = (
+                    "if command -v python3 >/dev/null 2>&1; then python3 -; "
+                    "elif command -v python >/dev/null 2>&1; then python -; "
+                    "else echo \"python3/python not found\" >&2; exit 127; fi"
+                )
+            rc, stdout_b, stderr_b = await run_ssh_command_async(
+                host,
+                ssh_port,
+                remote_cmd,
+                timeout=60,
+                stdin_data=scan_payload,
            )
        else:
            # LOCAL scan: use sys.executable (the venv Python Odysseus is already
@@ -594,7 +609,7 @@ def setup_cookbook_routes() -> APIRouter:
                stderr=asyncio.subprocess.PIPE,
                cwd=str(Path.home()),
            )
-        stdout_b, stderr_b = await asyncio.wait_for(proc.communicate(), timeout=60)
+            stdout_b, stderr_b = await asyncio.wait_for(proc.communicate(), timeout=60)

        models = []
        try:
@@ -874,6 +889,12 @@ def setup_cookbook_routes() -> APIRouter:
            # pip cache so they don't fail mid-build with "No space left" (#1219)
            # and leave the dep installed-but-unusable (#1459).
            req.cmd = _pip_install_no_cache(req.cmd)
+            # Accept common aliases and enforce server extras for llama-cpp so
+            # `python -m llama_cpp.server` has all runtime dependencies.
+            req.cmd = re.sub(r"(?<![A-Za-z0-9_.-])llama_cpp(?![A-Za-z0-9_.-])", "llama-cpp-python[server]", req.cmd)
+            req.cmd = re.sub(r"(?<![A-Za-z0-9_.-])llama-cpp-python(?!\[)", "llama-cpp-python[server]", req.cmd)
+            if "llama-cpp-python" in req.cmd and "--extra-index-url" not in req.cmd:
+                req.cmd += " --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu"
            # PEP-508-style package spec — letters, digits, `.-_` for the
            # name; `[` `]` for extras; `<>=!~,` for version specifiers.
            # v2 review HIGH-14: tightened from the previous regex which
@@ -1354,11 +1375,38 @@ def setup_cookbook_routes() -> APIRouter:
    async def _run_nvidia_smi(query: str, host: str | None, ssh_port: str | None, timeout: int = 8):
        """Run nvidia-smi locally or over SSH. Returns (stdout, error_or_None)."""
        if host:
-            pf = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
-            cmd = f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no {pf}{host} '{query}'"
-            proc = await asyncio.create_subprocess_shell(
-                cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
-            )
+            candidates = [query]
+            stripped = query.strip()
+            if stripped.startswith("nvidia-smi "):
+                args = stripped[len("nvidia-smi "):]
+                candidates.append(
+                    "bash -lc "
+                    + shlex.quote(
+                        f"{SSH_PATH_OVERRIDE}"
+                        f"nvidia-smi {args}"
+                    )
+                )
+                for nvidia_path in NVIDIA_PATH_CANDIDATES:
+                    candidates.append(f"{nvidia_path} {args}")
+
+            last_err = "nvidia-smi failed"
+            for candidate in candidates:
+                try:
+                    rc, stdout, stderr = await run_ssh_command_async(
+                        host,
+                        ssh_port,
+                        candidate,
+                        connect_timeout=5,
+                        timeout=timeout,
+                    )
+                except asyncio.TimeoutError:
+                    return None, "nvidia-smi timed out"
+                if rc == 0:
+                    return stdout.decode("utf-8", errors="replace"), None
+                err = (stderr.decode("utf-8", errors="replace") or "").strip()[:200]
+                if err:
+                    last_err = err
+            return None, last_err
        else:
            proc = await asyncio.create_subprocess_exec(
                *shlex.split(query),