"""Windows support for Cookbook hardware-fit. Odysseus only supports llama.cpp on Windows (vLLM/SGLang are explicitly blocked). llama.cpp requires GGUF, so non-GGUF models — including AWQ/GPTQ/ FP8 safetensors repos — must be filtered out on Windows so the Cookbook does not recommend models the user cannot actually serve. """ from services.hwfit.fit import rank_models from services.hwfit.models import get_models def _windows_system(ram_gb=32.0, vram_gb=16.0): return { "has_gpu": True, "backend": "cuda", "gpu_name": "NVIDIA RTX 4060", "gpu_vram_gb": vram_gb, "gpu_count": 1, "available_ram_gb": ram_gb * 0.7, "total_ram_gb": ram_gb, "platform": "windows", } def _cuda_system(): return { "has_gpu": True, "backend": "cuda", "gpu_name": "NVIDIA RTX 4090", "gpu_vram_gb": 24.0, "gpu_count": 1, "available_ram_gb": 32.0, "total_ram_gb": 64.0, } def test_only_gguf_models_recommended_on_windows(): """llama.cpp (GGUF) is the only servable path on Windows, so every model recommended there must ship a real GGUF — no vLLM-only AWQ/GPTQ/FP8.""" catalog = {m["name"]: m for m in get_models()} unservable = [ r["name"] for r in rank_models(_windows_system(), limit=900) if not (catalog.get(r["name"], {}).get("is_gguf") or catalog.get(r["name"], {}).get("gguf_sources")) ] assert unservable == [], f"{len(unservable)} non-GGUF models on Windows, e.g. {unservable[:3]}" def test_safetensors_models_still_recommended_on_cuda(): """Regression guard: the GGUF-only rule must not leak onto CUDA.""" names = {r["name"] for r in rank_models(_cuda_system(), limit=900)} assert "microsoft/Phi-mini-MoE-instruct" in names def test_awq_model_hidden_on_windows(): """The user's reported issue: Qwen2.5-3B-Instruct-AWQ is AWQ-only and must not be recommended on Windows where it cannot be served.""" names = {r["name"] for r in rank_models(_windows_system(), limit=900)} assert "Qwen/Qwen2.5-3B-Instruct-AWQ" not in names def test_awq_model_visible_on_cuda(): """The same AWQ model should still be visible on CUDA where vLLM can serve it.""" names = {r["name"] for r in rank_models(_cuda_system(), limit=900)} assert "Qwen/Qwen2.5-3B-Instruct-AWQ" in names def test_gguf_alternate_still_recommended_on_windows(): """Qwen2.5-3B-Instruct (the base model) has a GGUF source, so it should still appear on Windows even though the AWQ variant is hidden.""" names = {r["name"] for r in rank_models(_windows_system(), limit=900)} assert "Qwen/Qwen2.5-3B-Instruct" in names def test_remote_windows_probe_uses_encoded_command(monkeypatch): """Remote Windows hwfit must not use nested -Command quoting over SSH.""" from services.hwfit import hardware calls = [] monkeypatch.setattr(hardware, "_remote_host", "user@winpc") monkeypatch.setattr(hardware, "_remote_port", None) def fake_run(cmd): calls.append(cmd) if isinstance(cmd, str) and "EncodedCommand" in cmd: return ( '{"ram_gb":64,"avail_gb":32,"cpu_name":"Test CPU",' '"cpu_cores":8,"arch":64}' ) return None monkeypatch.setattr(hardware, "_run", fake_run) result = hardware._detect_windows() assert result is not None assert result["total_ram_gb"] == 64 assert len(calls) == 1 assert "EncodedCommand" in calls[0] assert '-Command "' not in calls[0] def test_probe_remote_platform_detects_windows(monkeypatch): from services.hwfit import hardware monkeypatch.setattr(hardware, "_run", lambda cmd: "Windows_NT\n") assert hardware._probe_remote_platform() == "windows" def test_probe_remote_platform_detects_darwin(monkeypatch): from services.hwfit import hardware def fake_run(cmd): if cmd == "echo %OS%": return "%OS%" if cmd == ["uname", "-s"]: return "Darwin" raise AssertionError(f"unexpected probe cmd: {cmd!r}") monkeypatch.setattr(hardware, "_run", fake_run) assert hardware._probe_remote_platform() == "linux"