fix(hwfit): normalize CPU arch for fallback estimates (#4441)

This commit is contained in:
RaresKeY
2026-06-18 20:26:22 +02:00
committed by GitHub
parent b51d83b16d
commit 16e660ad09
5 changed files with 119 additions and 10 deletions
+4 -3
View File
@@ -146,17 +146,18 @@ def _canonical_cpu_backend(system):
if backend in ("cpu_x86", "cpu_arm"): if backend in ("cpu_x86", "cpu_arm"):
return backend return backend
# Raw CPU-architecture aliases # Raw CPU-architecture aliases. Treat plain "arm" as 32-bit ARM, not the
# ARM64-class CPU fallback used for Apple Silicon/aarch64 machines.
if backend in ("x86_64", "amd64", "i386", "i686"): if backend in ("x86_64", "amd64", "i386", "i686"):
return "cpu_x86" return "cpu_x86"
if backend in ("arm64", "aarch64", "arm"): if backend in ("arm64", "aarch64"):
return "cpu_arm" return "cpu_arm"
# Prefer an explicit CPU architecture field when present # Prefer an explicit CPU architecture field when present
if cpu_arch: if cpu_arch:
if cpu_arch in ("x86_64", "amd64", "x86", "i386", "i686"): if cpu_arch in ("x86_64", "amd64", "x86", "i386", "i686"):
return "cpu_x86" return "cpu_x86"
if cpu_arch in ("arm64", "aarch64", "arm"): if cpu_arch in ("arm64", "aarch64"):
return "cpu_arm" return "cpu_arm"
# Apple Silicon enters ranking as backend="metal"; its CPU path is ARM. # Apple Silicon enters ranking as backend="metal"; its CPU path is ARM.
+26 -7
View File
@@ -320,7 +320,7 @@ def _detect_apple_silicon():
# Only Apple Silicon (arm64) has a Metal GPU worth serving LLMs on; Intel # Only Apple Silicon (arm64) has a Metal GPU worth serving LLMs on; Intel
# Macs fall through to the CPU path. # Macs fall through to the CPU path.
if "arm" not in arch and "aarch64" not in arch: if _canonical_cpu_arch(arch) != "arm64":
return None return None
# Chip name, e.g. "Apple M4 Max" — carries the Pro/Max/Ultra variant that # Chip name, e.g. "Apple M4 Max" — carries the Pro/Max/Ultra variant that
@@ -503,6 +503,25 @@ def _get_cpu_count():
return os.cpu_count() or 1 return os.cpu_count() or 1
def _canonical_cpu_arch(value):
arch = str(value or "").lower().strip().replace("-", "_")
if arch in ("x86_64", "amd64", "x64"):
return "x86_64"
if arch in ("i386", "i686", "x86"):
return "x86"
if arch in ("arm64", "aarch64"):
return "arm64"
if arch == "arm" or arch.startswith("armv"):
return "arm"
return arch
def _get_cpu_arch():
if _remote_host:
return _canonical_cpu_arch(_run(["uname", "-m"]) or "")
return _canonical_cpu_arch(platform.machine())
def _powershell_exe(): def _powershell_exe():
"""Pick the best PowerShell executable for LOCAL execution: prefer pwsh """Pick the best PowerShell executable for LOCAL execution: prefer pwsh
(PowerShell 7+), fall back to Windows PowerShell 5.1. Returns an absolute (PowerShell 7+), fall back to Windows PowerShell 5.1. Returns an absolute
@@ -528,6 +547,7 @@ def _detect_windows():
$r.cpu_name = $cpu.Name $r.cpu_name = $cpu.Name
$r.cpu_cores = (Get-CimInstance Win32_Processor | Measure-Object -Property NumberOfLogicalProcessors -Sum).Sum $r.cpu_cores = (Get-CimInstance Win32_Processor | Measure-Object -Property NumberOfLogicalProcessors -Sum).Sum
$r.arch = $cpu.AddressWidth $r.arch = $cpu.AddressWidth
$r.cpu_arch = if ($env:PROCESSOR_ARCHITEW6432) { $env:PROCESSOR_ARCHITEW6432 } else { $env:PROCESSOR_ARCHITECTURE }
# GPU detection via nvidia-smi (fastest) or WMI fallback # GPU detection via nvidia-smi (fastest) or WMI fallback
try { try {
$nv = nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits 2>$null $nv = nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits 2>$null
@@ -599,6 +619,7 @@ def _detect_windows():
"available_ram_gb": d.get("avail_gb", 0), "available_ram_gb": d.get("avail_gb", 0),
"cpu_cores": _as_int(d.get("cpu_cores"), 1), "cpu_cores": _as_int(d.get("cpu_cores"), 1),
"cpu_name": _cpu_name, "cpu_name": _cpu_name,
"cpu_arch": _canonical_cpu_arch(d.get("cpu_arch")),
"has_gpu": bool(d.get("gpu_name")), "has_gpu": bool(d.get("gpu_name")),
"gpu_name": d.get("gpu_name"), "gpu_name": d.get("gpu_name"),
"gpu_vram_gb": d.get("gpu_vram_gb"), "gpu_vram_gb": d.get("gpu_vram_gb"),
@@ -794,6 +815,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
available_ram = round(_get_available_ram_gb(), 1) available_ram = round(_get_available_ram_gb(), 1)
cpu_cores = _get_cpu_count() cpu_cores = _get_cpu_count()
cpu_name = _get_cpu_name() cpu_name = _get_cpu_name()
cpu_arch = _get_cpu_arch()
gpu_info = _detect_apple_silicon() or _detect_nvidia() or _detect_amd() gpu_info = _detect_apple_silicon() or _detect_nvidia() or _detect_amd()
@@ -803,6 +825,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
"available_ram_gb": available_ram, "available_ram_gb": available_ram,
"cpu_cores": cpu_cores, "cpu_cores": cpu_cores,
"cpu_name": cpu_name, "cpu_name": cpu_name,
"cpu_arch": cpu_arch,
"has_gpu": True, "has_gpu": True,
"gpu_name": gpu_info["gpu_name"], "gpu_name": gpu_info["gpu_name"],
"gpu_vram_gb": gpu_info["gpu_vram_gb"], "gpu_vram_gb": gpu_info["gpu_vram_gb"],
@@ -817,17 +840,13 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
"unified_memory": gpu_info.get("unified_memory", False), "unified_memory": gpu_info.get("unified_memory", False),
} }
else: else:
if _remote_host: backend = "cpu_arm" if cpu_arch == "arm64" else "cpu_x86"
arch_out = _run(["uname", "-m"]) or ""
else:
import platform as _platform
arch_out = _platform.machine().lower()
backend = "cpu_arm" if "aarch64" in arch_out or "arm" in arch_out else "cpu_x86"
result = { result = {
"total_ram_gb": total_ram, "total_ram_gb": total_ram,
"available_ram_gb": available_ram, "available_ram_gb": available_ram,
"cpu_cores": cpu_cores, "cpu_cores": cpu_cores,
"cpu_name": cpu_name, "cpu_name": cpu_name,
"cpu_arch": cpu_arch,
"has_gpu": False, "has_gpu": False,
"gpu_name": None, "gpu_name": None,
"gpu_vram_gb": None, "gpu_vram_gb": None,
+55
View File
@@ -0,0 +1,55 @@
"""CPU architecture normalization for HW Fit hardware detection."""
import pytest
from services.hwfit import hardware
@pytest.fixture(autouse=True)
def _clear_hwfit_cache(monkeypatch):
hardware._cache_by_host.clear()
monkeypatch.setattr(hardware, "_remote_host", None)
monkeypatch.setattr(hardware, "_remote_platform", None)
monkeypatch.setattr(hardware, "_is_containerized", lambda: False)
yield
hardware._cache_by_host.clear()
def _stub_common_probe(monkeypatch, machine):
monkeypatch.setattr(hardware.platform, "machine", lambda: machine)
monkeypatch.setattr(hardware, "_get_ram_gb", lambda: 64.0)
monkeypatch.setattr(hardware, "_get_available_ram_gb", lambda: 48.0)
monkeypatch.setattr(hardware, "_get_cpu_count", lambda: 16)
monkeypatch.setattr(hardware, "_get_cpu_name", lambda: "Test CPU")
monkeypatch.setattr(hardware, "_detect_apple_silicon", lambda: None)
monkeypatch.setattr(hardware, "_detect_amd", lambda: None)
def test_detect_system_reports_cpu_arch_for_gpu_backends(monkeypatch):
"""GPU-backed systems still need CPU architecture for cpu_only estimates."""
_stub_common_probe(monkeypatch, "aarch64")
monkeypatch.setattr(hardware, "_detect_nvidia", lambda: {
"gpu_name": "NVIDIA GB10",
"gpu_vram_gb": 64.0,
"gpu_count": 1,
"gpus": [],
"gpu_groups": [],
"homogeneous": True,
"backend": "cuda",
})
system = hardware.detect_system(fresh=True)
assert system["backend"] == "cuda"
assert system["cpu_arch"] == "arm64"
def test_detect_system_keeps_32_bit_arm_on_conservative_cpu_backend(monkeypatch):
"""Plain arm/armv7 is not the same as the ARM64-class cpu_arm fallback."""
_stub_common_probe(monkeypatch, "armv7l")
monkeypatch.setattr(hardware, "_detect_nvidia", lambda: None)
system = hardware.detect_system(fresh=True)
assert system["cpu_arch"] == "arm"
assert system["backend"] == "cpu_x86"
+25
View File
@@ -47,6 +47,12 @@ ARM64_SYSTEM = {
"gpu_vram_gb": 0, "gpu_vram_gb": 0,
} }
ARM32_SYSTEM = {
"backend": "arm",
"gpu_name": None,
"gpu_vram_gb": 0,
}
AARCH64_SYSTEM = { AARCH64_SYSTEM = {
"backend": "aarch64", "backend": "aarch64",
"gpu_name": None, "gpu_name": None,
@@ -79,6 +85,16 @@ def test_cpu_only_on_metal_apple_silicon_uses_cpu_arm_fallback():
assert metal_tps > 0 assert metal_tps > 0
def test_cpu_only_on_gpu_backend_uses_detected_arm64_cpu_arch():
"""A GPU backend on an ARM64 host should use the ARM CPU fallback for cpu_only."""
cuda_arm64 = dict(CUDA_SYSTEM, cpu_arch="aarch64", cpu_name="Ampere Altra")
cuda_arm64_tps = _estimate_speed(DENSE_MODEL, QUANT, "cpu_only", cuda_arm64)
arm_tps = _estimate_speed(DENSE_MODEL, QUANT, "cpu_only", CPU_ARM_SYSTEM)
assert cuda_arm64_tps == pytest.approx(arm_tps, rel=1e-9, abs=1e-9)
assert cuda_arm64_tps > 0
@pytest.mark.parametrize( @pytest.mark.parametrize(
"arm_alias_system", "arm_alias_system",
[ARM64_SYSTEM, AARCH64_SYSTEM, CPU_ARM_SYSTEM], [ARM64_SYSTEM, AARCH64_SYSTEM, CPU_ARM_SYSTEM],
@@ -93,6 +109,15 @@ def test_cpu_only_preserves_arm_backends(arm_alias_system):
assert alias_tps > 0 assert alias_tps > 0
def test_cpu_only_does_not_treat_plain_arm_as_arm64_fallback():
"""Docker/OCI plain arm is not the ARM64-class fallback used for Apple Silicon."""
arm32_tps = _estimate_speed(DENSE_MODEL, QUANT, "cpu_only", ARM32_SYSTEM)
x86_tps = _estimate_speed(DENSE_MODEL, QUANT, "cpu_only", CPU_X86_SYSTEM)
assert arm32_tps == pytest.approx(x86_tps, rel=1e-9, abs=1e-9)
assert arm32_tps > 0
def test_cpu_only_preserves_known_cpu_backends(): def test_cpu_only_preserves_known_cpu_backends():
"""Known CPU backends should be preserved, not rewritten to cpu_x86.""" """Known CPU backends should be preserved, not rewritten to cpu_x86."""
for system in (CPU_X86_SYSTEM, CPU_ARM_SYSTEM): for system in (CPU_X86_SYSTEM, CPU_ARM_SYSTEM):
+9
View File
@@ -165,6 +165,15 @@ def test_intel_mac_skipped(monkeypatch):
assert hardware._detect_apple_silicon() is None assert hardware._detect_apple_silicon() is None
def test_plain_arm_mac_skipped(monkeypatch):
"""Only ARM64-class Macs should enter the Apple Silicon Metal path."""
monkeypatch.setattr(hardware, "_remote_host", None)
monkeypatch.setattr(hardware.platform, "system", lambda: "Darwin")
monkeypatch.setattr(hardware.platform, "machine", lambda: "armv7l")
monkeypatch.setattr(hardware, "_run", _fake_sysctl())
assert hardware._detect_apple_silicon() is None
def test_detect_system_propagates_unified_memory(monkeypatch): def test_detect_system_propagates_unified_memory(monkeypatch):
"""The unified_memory flag set by GPU detection must survive into the """The unified_memory flag set by GPU detection must survive into the
system dict so the API and UI can report it (it was being dropped).""" system dict so the API and UI can report it (it was being dropped)."""