diff --git a/services/hwfit/fit.py b/services/hwfit/fit.py index 14865d905..a5a49a7ff 100644 --- a/services/hwfit/fit.py +++ b/services/hwfit/fit.py @@ -146,17 +146,18 @@ def _canonical_cpu_backend(system): if backend in ("cpu_x86", "cpu_arm"): return backend - # Raw CPU-architecture aliases + # Raw CPU-architecture aliases. Treat plain "arm" as 32-bit ARM, not the + # ARM64-class CPU fallback used for Apple Silicon/aarch64 machines. if backend in ("x86_64", "amd64", "i386", "i686"): return "cpu_x86" - if backend in ("arm64", "aarch64", "arm"): + if backend in ("arm64", "aarch64"): return "cpu_arm" # Prefer an explicit CPU architecture field when present if cpu_arch: if cpu_arch in ("x86_64", "amd64", "x86", "i386", "i686"): return "cpu_x86" - if cpu_arch in ("arm64", "aarch64", "arm"): + if cpu_arch in ("arm64", "aarch64"): return "cpu_arm" # Apple Silicon enters ranking as backend="metal"; its CPU path is ARM. diff --git a/services/hwfit/hardware.py b/services/hwfit/hardware.py index a3ad7ba05..1c4529839 100644 --- a/services/hwfit/hardware.py +++ b/services/hwfit/hardware.py @@ -320,7 +320,7 @@ def _detect_apple_silicon(): # Only Apple Silicon (arm64) has a Metal GPU worth serving LLMs on; Intel # Macs fall through to the CPU path. - if "arm" not in arch and "aarch64" not in arch: + if _canonical_cpu_arch(arch) != "arm64": return None # Chip name, e.g. "Apple M4 Max" — carries the Pro/Max/Ultra variant that @@ -503,6 +503,25 @@ def _get_cpu_count(): return os.cpu_count() or 1 +def _canonical_cpu_arch(value): + arch = str(value or "").lower().strip().replace("-", "_") + if arch in ("x86_64", "amd64", "x64"): + return "x86_64" + if arch in ("i386", "i686", "x86"): + return "x86" + if arch in ("arm64", "aarch64"): + return "arm64" + if arch == "arm" or arch.startswith("armv"): + return "arm" + return arch + + +def _get_cpu_arch(): + if _remote_host: + return _canonical_cpu_arch(_run(["uname", "-m"]) or "") + return _canonical_cpu_arch(platform.machine()) + + def _powershell_exe(): """Pick the best PowerShell executable for LOCAL execution: prefer pwsh (PowerShell 7+), fall back to Windows PowerShell 5.1. Returns an absolute @@ -528,6 +547,7 @@ def _detect_windows(): $r.cpu_name = $cpu.Name $r.cpu_cores = (Get-CimInstance Win32_Processor | Measure-Object -Property NumberOfLogicalProcessors -Sum).Sum $r.arch = $cpu.AddressWidth + $r.cpu_arch = if ($env:PROCESSOR_ARCHITEW6432) { $env:PROCESSOR_ARCHITEW6432 } else { $env:PROCESSOR_ARCHITECTURE } # GPU detection via nvidia-smi (fastest) or WMI fallback try { $nv = nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits 2>$null @@ -599,6 +619,7 @@ def _detect_windows(): "available_ram_gb": d.get("avail_gb", 0), "cpu_cores": _as_int(d.get("cpu_cores"), 1), "cpu_name": _cpu_name, + "cpu_arch": _canonical_cpu_arch(d.get("cpu_arch")), "has_gpu": bool(d.get("gpu_name")), "gpu_name": d.get("gpu_name"), "gpu_vram_gb": d.get("gpu_vram_gb"), @@ -794,6 +815,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False): available_ram = round(_get_available_ram_gb(), 1) cpu_cores = _get_cpu_count() cpu_name = _get_cpu_name() + cpu_arch = _get_cpu_arch() gpu_info = _detect_apple_silicon() or _detect_nvidia() or _detect_amd() @@ -803,6 +825,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False): "available_ram_gb": available_ram, "cpu_cores": cpu_cores, "cpu_name": cpu_name, + "cpu_arch": cpu_arch, "has_gpu": True, "gpu_name": gpu_info["gpu_name"], "gpu_vram_gb": gpu_info["gpu_vram_gb"], @@ -817,17 +840,13 @@ def detect_system(host="", ssh_port="", platform="", fresh=False): "unified_memory": gpu_info.get("unified_memory", False), } else: - if _remote_host: - arch_out = _run(["uname", "-m"]) or "" - else: - import platform as _platform - arch_out = _platform.machine().lower() - backend = "cpu_arm" if "aarch64" in arch_out or "arm" in arch_out else "cpu_x86" + backend = "cpu_arm" if cpu_arch == "arm64" else "cpu_x86" result = { "total_ram_gb": total_ram, "available_ram_gb": available_ram, "cpu_cores": cpu_cores, "cpu_name": cpu_name, + "cpu_arch": cpu_arch, "has_gpu": False, "gpu_name": None, "gpu_vram_gb": None, diff --git a/tests/test_hwfit_cpu_arch_detection.py b/tests/test_hwfit_cpu_arch_detection.py new file mode 100644 index 000000000..b2b6fba8f --- /dev/null +++ b/tests/test_hwfit_cpu_arch_detection.py @@ -0,0 +1,55 @@ +"""CPU architecture normalization for HW Fit hardware detection.""" + +import pytest + +from services.hwfit import hardware + + +@pytest.fixture(autouse=True) +def _clear_hwfit_cache(monkeypatch): + hardware._cache_by_host.clear() + monkeypatch.setattr(hardware, "_remote_host", None) + monkeypatch.setattr(hardware, "_remote_platform", None) + monkeypatch.setattr(hardware, "_is_containerized", lambda: False) + yield + hardware._cache_by_host.clear() + + +def _stub_common_probe(monkeypatch, machine): + monkeypatch.setattr(hardware.platform, "machine", lambda: machine) + monkeypatch.setattr(hardware, "_get_ram_gb", lambda: 64.0) + monkeypatch.setattr(hardware, "_get_available_ram_gb", lambda: 48.0) + monkeypatch.setattr(hardware, "_get_cpu_count", lambda: 16) + monkeypatch.setattr(hardware, "_get_cpu_name", lambda: "Test CPU") + monkeypatch.setattr(hardware, "_detect_apple_silicon", lambda: None) + monkeypatch.setattr(hardware, "_detect_amd", lambda: None) + + +def test_detect_system_reports_cpu_arch_for_gpu_backends(monkeypatch): + """GPU-backed systems still need CPU architecture for cpu_only estimates.""" + _stub_common_probe(monkeypatch, "aarch64") + monkeypatch.setattr(hardware, "_detect_nvidia", lambda: { + "gpu_name": "NVIDIA GB10", + "gpu_vram_gb": 64.0, + "gpu_count": 1, + "gpus": [], + "gpu_groups": [], + "homogeneous": True, + "backend": "cuda", + }) + + system = hardware.detect_system(fresh=True) + + assert system["backend"] == "cuda" + assert system["cpu_arch"] == "arm64" + + +def test_detect_system_keeps_32_bit_arm_on_conservative_cpu_backend(monkeypatch): + """Plain arm/armv7 is not the same as the ARM64-class cpu_arm fallback.""" + _stub_common_probe(monkeypatch, "armv7l") + monkeypatch.setattr(hardware, "_detect_nvidia", lambda: None) + + system = hardware.detect_system(fresh=True) + + assert system["cpu_arch"] == "arm" + assert system["backend"] == "cpu_x86" diff --git a/tests/test_hwfit_cpu_only_fallback.py b/tests/test_hwfit_cpu_only_fallback.py index 765f99051..826684fca 100644 --- a/tests/test_hwfit_cpu_only_fallback.py +++ b/tests/test_hwfit_cpu_only_fallback.py @@ -47,6 +47,12 @@ ARM64_SYSTEM = { "gpu_vram_gb": 0, } +ARM32_SYSTEM = { + "backend": "arm", + "gpu_name": None, + "gpu_vram_gb": 0, +} + AARCH64_SYSTEM = { "backend": "aarch64", "gpu_name": None, @@ -79,6 +85,16 @@ def test_cpu_only_on_metal_apple_silicon_uses_cpu_arm_fallback(): assert metal_tps > 0 +def test_cpu_only_on_gpu_backend_uses_detected_arm64_cpu_arch(): + """A GPU backend on an ARM64 host should use the ARM CPU fallback for cpu_only.""" + cuda_arm64 = dict(CUDA_SYSTEM, cpu_arch="aarch64", cpu_name="Ampere Altra") + cuda_arm64_tps = _estimate_speed(DENSE_MODEL, QUANT, "cpu_only", cuda_arm64) + arm_tps = _estimate_speed(DENSE_MODEL, QUANT, "cpu_only", CPU_ARM_SYSTEM) + + assert cuda_arm64_tps == pytest.approx(arm_tps, rel=1e-9, abs=1e-9) + assert cuda_arm64_tps > 0 + + @pytest.mark.parametrize( "arm_alias_system", [ARM64_SYSTEM, AARCH64_SYSTEM, CPU_ARM_SYSTEM], @@ -93,6 +109,15 @@ def test_cpu_only_preserves_arm_backends(arm_alias_system): assert alias_tps > 0 +def test_cpu_only_does_not_treat_plain_arm_as_arm64_fallback(): + """Docker/OCI plain arm is not the ARM64-class fallback used for Apple Silicon.""" + arm32_tps = _estimate_speed(DENSE_MODEL, QUANT, "cpu_only", ARM32_SYSTEM) + x86_tps = _estimate_speed(DENSE_MODEL, QUANT, "cpu_only", CPU_X86_SYSTEM) + + assert arm32_tps == pytest.approx(x86_tps, rel=1e-9, abs=1e-9) + assert arm32_tps > 0 + + def test_cpu_only_preserves_known_cpu_backends(): """Known CPU backends should be preserved, not rewritten to cpu_x86.""" for system in (CPU_X86_SYSTEM, CPU_ARM_SYSTEM): diff --git a/tests/test_hwfit_macos.py b/tests/test_hwfit_macos.py index a979d14eb..f81cc9b38 100644 --- a/tests/test_hwfit_macos.py +++ b/tests/test_hwfit_macos.py @@ -165,6 +165,15 @@ def test_intel_mac_skipped(monkeypatch): assert hardware._detect_apple_silicon() is None +def test_plain_arm_mac_skipped(monkeypatch): + """Only ARM64-class Macs should enter the Apple Silicon Metal path.""" + monkeypatch.setattr(hardware, "_remote_host", None) + monkeypatch.setattr(hardware.platform, "system", lambda: "Darwin") + monkeypatch.setattr(hardware.platform, "machine", lambda: "armv7l") + monkeypatch.setattr(hardware, "_run", _fake_sysctl()) + assert hardware._detect_apple_silicon() is None + + def test_detect_system_propagates_unified_memory(monkeypatch): """The unified_memory flag set by GPU detection must survive into the system dict so the API and UI can report it (it was being dropped)."""