mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-23 05:05:24 -04:00
b57989f08c
Remote Cookbook hwfit probes failed on Windows hosts because the PowerShell script was sent as nested -Command quoting through OpenSSH. Use -EncodedCommand for remote probes, auto-detect platform when omitted (including Darwin for Mac SSH hosts), and return a clearer error when SSH works but the probe fails. Co-authored-by: Cursor <cursoragent@cursor.com>
122 lines
4.1 KiB
Python
122 lines
4.1 KiB
Python
"""Windows support for Cookbook hardware-fit.
|
|
|
|
Odysseus only supports llama.cpp on Windows (vLLM/SGLang are explicitly
|
|
blocked). llama.cpp requires GGUF, so non-GGUF models — including AWQ/GPTQ/
|
|
FP8 safetensors repos — must be filtered out on Windows so the Cookbook does
|
|
not recommend models the user cannot actually serve.
|
|
"""
|
|
|
|
from services.hwfit.fit import rank_models
|
|
from services.hwfit.models import get_models
|
|
|
|
|
|
def _windows_system(ram_gb=32.0, vram_gb=16.0):
|
|
return {
|
|
"has_gpu": True,
|
|
"backend": "cuda",
|
|
"gpu_name": "NVIDIA RTX 4060",
|
|
"gpu_vram_gb": vram_gb,
|
|
"gpu_count": 1,
|
|
"available_ram_gb": ram_gb * 0.7,
|
|
"total_ram_gb": ram_gb,
|
|
"platform": "windows",
|
|
}
|
|
|
|
|
|
def _cuda_system():
|
|
return {
|
|
"has_gpu": True,
|
|
"backend": "cuda",
|
|
"gpu_name": "NVIDIA RTX 4090",
|
|
"gpu_vram_gb": 24.0,
|
|
"gpu_count": 1,
|
|
"available_ram_gb": 32.0,
|
|
"total_ram_gb": 64.0,
|
|
}
|
|
|
|
|
|
def test_only_gguf_models_recommended_on_windows():
|
|
"""llama.cpp (GGUF) is the only servable path on Windows, so every model
|
|
recommended there must ship a real GGUF — no vLLM-only AWQ/GPTQ/FP8."""
|
|
catalog = {m["name"]: m for m in get_models()}
|
|
unservable = [
|
|
r["name"] for r in rank_models(_windows_system(), limit=900)
|
|
if not (catalog.get(r["name"], {}).get("is_gguf")
|
|
or catalog.get(r["name"], {}).get("gguf_sources"))
|
|
]
|
|
assert unservable == [], f"{len(unservable)} non-GGUF models on Windows, e.g. {unservable[:3]}"
|
|
|
|
|
|
def test_safetensors_models_still_recommended_on_cuda():
|
|
"""Regression guard: the GGUF-only rule must not leak onto CUDA."""
|
|
names = {r["name"] for r in rank_models(_cuda_system(), limit=900)}
|
|
assert "microsoft/Phi-mini-MoE-instruct" in names
|
|
|
|
|
|
def test_awq_model_hidden_on_windows():
|
|
"""The user's reported issue: Qwen2.5-3B-Instruct-AWQ is AWQ-only and must
|
|
not be recommended on Windows where it cannot be served."""
|
|
names = {r["name"] for r in rank_models(_windows_system(), limit=900)}
|
|
assert "Qwen/Qwen2.5-3B-Instruct-AWQ" not in names
|
|
|
|
|
|
def test_awq_model_visible_on_cuda():
|
|
"""The same AWQ model should still be visible on CUDA where vLLM can
|
|
serve it."""
|
|
names = {r["name"] for r in rank_models(_cuda_system(), limit=900)}
|
|
assert "Qwen/Qwen2.5-3B-Instruct-AWQ" in names
|
|
|
|
|
|
def test_gguf_alternate_still_recommended_on_windows():
|
|
"""Qwen2.5-3B-Instruct (the base model) has a GGUF source, so it should
|
|
still appear on Windows even though the AWQ variant is hidden."""
|
|
names = {r["name"] for r in rank_models(_windows_system(), limit=900)}
|
|
assert "Qwen/Qwen2.5-3B-Instruct" in names
|
|
|
|
|
|
def test_remote_windows_probe_uses_encoded_command(monkeypatch):
|
|
"""Remote Windows hwfit must not use nested -Command quoting over SSH."""
|
|
from services.hwfit import hardware
|
|
|
|
calls = []
|
|
monkeypatch.setattr(hardware, "_remote_host", "user@winpc")
|
|
monkeypatch.setattr(hardware, "_remote_port", None)
|
|
|
|
def fake_run(cmd):
|
|
calls.append(cmd)
|
|
if isinstance(cmd, str) and "EncodedCommand" in cmd:
|
|
return (
|
|
'{"ram_gb":64,"avail_gb":32,"cpu_name":"Test CPU",'
|
|
'"cpu_cores":8,"arch":64}'
|
|
)
|
|
return None
|
|
|
|
monkeypatch.setattr(hardware, "_run", fake_run)
|
|
result = hardware._detect_windows()
|
|
assert result is not None
|
|
assert result["total_ram_gb"] == 64
|
|
assert len(calls) == 1
|
|
assert "EncodedCommand" in calls[0]
|
|
assert '-Command "' not in calls[0]
|
|
|
|
|
|
def test_probe_remote_platform_detects_windows(monkeypatch):
|
|
from services.hwfit import hardware
|
|
|
|
monkeypatch.setattr(hardware, "_run", lambda cmd: "Windows_NT\n")
|
|
assert hardware._probe_remote_platform() == "windows"
|
|
|
|
|
|
def test_probe_remote_platform_detects_darwin(monkeypatch):
|
|
from services.hwfit import hardware
|
|
|
|
def fake_run(cmd):
|
|
if cmd == "echo %OS%":
|
|
return "%OS%"
|
|
if cmd == ["uname", "-s"]:
|
|
return "Darwin"
|
|
raise AssertionError(f"unexpected probe cmd: {cmd!r}")
|
|
|
|
monkeypatch.setattr(hardware, "_run", fake_run)
|
|
assert hardware._probe_remote_platform() == "linux"
|