fix(platform): Improve WSL SSH remote compatibility (#3316)

* fix(platform): add WSL compatibility functions and path translation fix(cookbook): enhance model scan script to support additional HuggingFace cache paths fix(hardware): improve cache key generation for remote SSH context test(tests): add tests for WSL detection and path translation functionality * fix(cookbook): prefer prebuilt wheels for llama-cpp-python and normalize package aliases * fix: enable StrictHostKeyChecking in nvidia probe refactor: consolidate ssh & powershell command execution to utility functions in core module refactor: consolidate nvidia path candidates in to single variables in core module tests: add tests for new utility functions * fix: correct wrong variable name
2026-06-17 10:15:27 -04:00 · 2026-06-08 00:33:50 +02:00
parent 73315e6ddc
commit 9c90f62657
8 changed files with 763 additions and 33 deletions
@@ -20,6 +20,8 @@ from pydantic import BaseModel
 from core.middleware import require_admin
 from core.platform_compat import (
    IS_WINDOWS,
+    SSH_PATH_OVERRIDE,
+    NVIDIA_PATH_CANDIDATES,
    detached_popen_kwargs,
    find_bash,
    git_bash_path,
@@ -27,6 +29,8 @@ from core.platform_compat import (
    pid_alive,
    safe_chmod,
    which_tool,
+    translate_path,
+    get_wsl_windows_user_profile,
 )
 from routes.shell_routes import TMUX_LOG_DIR

@@ -41,7 +45,7 @@ from routes.cookbook_helpers import (
    _append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script,
    _append_vllm_linux_preflight_lines, _ollama_bind_from_cmd, _pip_install_fallback_chain,
    _pip_install_no_cache, _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
-    _diagnose_serve_output,
+    _diagnose_serve_output, run_ssh_command_async,
    ModelDownloadRequest, ServeRequest,
 )

@@ -557,24 +561,35 @@ def setup_cookbook_routes() -> APIRouter:
            for d in model_dir.split(','):
                d = d.strip()
                if d:
-                    model_dirs.append(d)
-        paths_code = _cached_model_scan_script(model_dirs)
+                    translated_d = translate_path(d) if not host else d
+                    model_dirs.append(translated_d)
+        win_hf_hub = None
+        if not host:
+            win_profile = get_wsl_windows_user_profile()
+            win_hf_hub = os.path.join(win_profile, ".cache", "huggingface", "hub") if win_profile else None
+            
+        paths_code = _cached_model_scan_script(model_dirs, win_hf_hub)

        scan_py = TMUX_LOG_DIR / "scan_cache.py"
        scan_py.write_text(paths_code, encoding="utf-8")
+        scan_payload = scan_py.read_bytes()

        if host:
-            _pf = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
            if platform == "windows":
-                # Windows: use 'python' and pipe via stdin with double-quote wrapping
-                cmd = f'ssh {_pf}{host} "python -" < \'{scan_py}\''
+                remote_cmd = "python -"
            else:
-                cmd = f"ssh {_pf}{host} 'python3 -' < '{scan_py}'"
-            proc = await asyncio.create_subprocess_shell(
-                cmd,
-                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE,
-                cwd=str(Path.home()),
+                # POSIX: use 'python3' if available, fall back to 'python'; throw if neither is found.
+                remote_cmd = (
+                    "if command -v python3 >/dev/null 2>&1; then python3 -; "
+                    "elif command -v python >/dev/null 2>&1; then python -; "
+                    "else echo \"python3/python not found\" >&2; exit 127; fi"
+                )
+            rc, stdout_b, stderr_b = await run_ssh_command_async(
+                host,
+                ssh_port,
+                remote_cmd,
+                timeout=60,
+                stdin_data=scan_payload,
            )
        else:
            # LOCAL scan: use sys.executable (the venv Python Odysseus is already
@@ -594,7 +609,7 @@ def setup_cookbook_routes() -> APIRouter:
                stderr=asyncio.subprocess.PIPE,
                cwd=str(Path.home()),
            )
-        stdout_b, stderr_b = await asyncio.wait_for(proc.communicate(), timeout=60)
+            stdout_b, stderr_b = await asyncio.wait_for(proc.communicate(), timeout=60)

        models = []
        try:
@@ -874,6 +889,12 @@ def setup_cookbook_routes() -> APIRouter:
            # pip cache so they don't fail mid-build with "No space left" (#1219)
            # and leave the dep installed-but-unusable (#1459).
            req.cmd = _pip_install_no_cache(req.cmd)
+            # Accept common aliases and enforce server extras for llama-cpp so
+            # `python -m llama_cpp.server` has all runtime dependencies.
+            req.cmd = re.sub(r"(?<![A-Za-z0-9_.-])llama_cpp(?![A-Za-z0-9_.-])", "llama-cpp-python[server]", req.cmd)
+            req.cmd = re.sub(r"(?<![A-Za-z0-9_.-])llama-cpp-python(?!\[)", "llama-cpp-python[server]", req.cmd)
+            if "llama-cpp-python" in req.cmd and "--extra-index-url" not in req.cmd:
+                req.cmd += " --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu"
            # PEP-508-style package spec — letters, digits, `.-_` for the
            # name; `[` `]` for extras; `<>=!~,` for version specifiers.
            # v2 review HIGH-14: tightened from the previous regex which
@@ -1354,11 +1375,38 @@ def setup_cookbook_routes() -> APIRouter:
    async def _run_nvidia_smi(query: str, host: str | None, ssh_port: str | None, timeout: int = 8):
        """Run nvidia-smi locally or over SSH. Returns (stdout, error_or_None)."""
        if host:
-            pf = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
-            cmd = f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no {pf}{host} '{query}'"
-            proc = await asyncio.create_subprocess_shell(
-                cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
-            )
+            candidates = [query]
+            stripped = query.strip()
+            if stripped.startswith("nvidia-smi "):
+                args = stripped[len("nvidia-smi "):]
+                candidates.append(
+                    "bash -lc "
+                    + shlex.quote(
+                        f"{SSH_PATH_OVERRIDE}"
+                        f"nvidia-smi {args}"
+                    )
+                )
+                for nvidia_path in NVIDIA_PATH_CANDIDATES:
+                    candidates.append(f"{nvidia_path} {args}")
+
+            last_err = "nvidia-smi failed"
+            for candidate in candidates:
+                try:
+                    rc, stdout, stderr = await run_ssh_command_async(
+                        host,
+                        ssh_port,
+                        candidate,
+                        connect_timeout=5,
+                        timeout=timeout,
+                    )
+                except asyncio.TimeoutError:
+                    return None, "nvidia-smi timed out"
+                if rc == 0:
+                    return stdout.decode("utf-8", errors="replace"), None
+                err = (stderr.decode("utf-8", errors="replace") or "").strip()[:200]
+                if err:
+                    last_err = err
+            return None, last_err
        else:
            proc = await asyncio.create_subprocess_exec(
                *shlex.split(query),