mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 02:05:22 -04:00
fix(hwfit): distinguish Apple Silicon bandwidth variants (#2564)
* fix: resolve Apple Silicon bandwidth variants * fix(hwfit): preserve string lookup path in _lookup_bandwidth * fix(hwfit): guard Apple bandwidth lookup against false GPU matches Add "apple" not in gn check to _lookup_apple_bandwidth() so that non-Apple GPUs with "m3"/"m4"/"m5" in their names (e.g. NVIDIA Quadro M4 000) don't incorrectly match Apple bandwidth tiers. Addresses @o3LL review comment on PR #2564.
This commit is contained in:
+64
-13
@@ -19,22 +19,32 @@ GPU_BANDWIDTH = {
|
|||||||
"6950 xt": 576, "6900 xt": 512, "6800 xt": 512, "6800": 512, "6700 xt": 384, "6600 xt": 256, "6600": 224,
|
"6950 xt": 576, "6900 xt": 512, "6800 xt": 512, "6800": 512, "6700 xt": 384, "6600 xt": 256, "6600": 224,
|
||||||
"mi300x": 5300, "mi300": 5300, "mi250x": 3277, "mi250": 3277, "mi210": 1638, "mi100": 1229,
|
"mi300x": 5300, "mi300": 5300, "mi250x": 3277, "mi250": 3277, "mi210": 1638, "mi100": 1229,
|
||||||
"9070 xt": 624, "9070": 488, "9060 xt": 322, "9060": 322,
|
"9070 xt": 624, "9070": 488, "9060 xt": 322, "9060": 322,
|
||||||
# Apple Silicon unified-memory bandwidth (GB/s). Keyed off the chip name
|
|
||||||
# reported by sysctl machdep.cpu.brand_string (e.g. "Apple M4 Max"). Listed
|
|
||||||
# before the bare "m_" keys matters less than length-sorting (done below),
|
|
||||||
# which guarantees "m4 max" is tried before "m4".
|
|
||||||
"m1 ultra": 800, "m1 max": 400, "m1 pro": 200, "m1": 68,
|
|
||||||
"m2 ultra": 800, "m2 max": 400, "m2 pro": 200, "m2": 100,
|
|
||||||
"m3 ultra": 800, "m3 max": 300, "m3 pro": 150, "m3": 100,
|
|
||||||
"m4 max": 546, "m4 pro": 273, "m4": 120,
|
|
||||||
"m5 max": 546, "m5 pro": 273, "m5": 150,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Pre-sort keys by length descending for correct substring matching
|
# Pre-sort keys by length descending for correct substring matching
|
||||||
_BW_KEYS_SORTED = sorted(GPU_BANDWIDTH.keys(), key=len, reverse=True)
|
_BW_KEYS_SORTED = sorted(GPU_BANDWIDTH.keys(), key=len, reverse=True)
|
||||||
|
|
||||||
# metal: backstop for Apple Silicon chips not in GPU_BANDWIDTH (e.g. a future
|
# Apple Silicon unified-memory bandwidth (GB/s). For chip families with both
|
||||||
# M5) — the named chips above take the accurate bandwidth path instead.
|
# binned and full variants under the same "Apple Mx Max" brand string, prefer
|
||||||
|
# GPU core count when hardware detection provides it; otherwise fall back to the
|
||||||
|
# conservative tier so speed estimates do not over-promise.
|
||||||
|
APPLE_BANDWIDTH_FIXED = {
|
||||||
|
"m1 ultra": 800, "m1 max": 400, "m1 pro": 200, "m1": 68,
|
||||||
|
"m2 ultra": 800, "m2 max": 400, "m2 pro": 200, "m2": 100,
|
||||||
|
"m3 ultra": 800, "m3 pro": 150, "m3": 100,
|
||||||
|
"m4 pro": 273, "m4": 120,
|
||||||
|
"m5 pro": 307, "m5": 153,
|
||||||
|
}
|
||||||
|
APPLE_BANDWIDTH_BY_CORES = {
|
||||||
|
"m3 max": {30: 300, 40: 400},
|
||||||
|
"m4 max": {32: 410, 40: 546},
|
||||||
|
"m5 max": {32: 460, 40: 614},
|
||||||
|
}
|
||||||
|
_APPLE_FIXED_KEYS_SORTED = sorted(APPLE_BANDWIDTH_FIXED.keys(), key=len, reverse=True)
|
||||||
|
_APPLE_VARIANT_KEYS_SORTED = sorted(APPLE_BANDWIDTH_BY_CORES.keys(), key=len, reverse=True)
|
||||||
|
|
||||||
|
# metal: backstop for Apple Silicon chips not in the explicit tables above
|
||||||
|
# (e.g. a future M6) — use a conservative generic estimate when unknown.
|
||||||
FALLBACK_K = {"cuda": 220, "rocm": 180, "metal": 150, "cpu_x86": 70, "cpu_arm": 90}
|
FALLBACK_K = {"cuda": 220, "rocm": 180, "metal": 150, "cpu_x86": 70, "cpu_arm": 90}
|
||||||
|
|
||||||
USE_CASE_WEIGHTS = {
|
USE_CASE_WEIGHTS = {
|
||||||
@@ -60,10 +70,51 @@ CONTEXT_TARGET = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _lookup_bandwidth(gpu_name):
|
def _lookup_apple_bandwidth(system):
|
||||||
|
gpu_name = system.get("gpu_name")
|
||||||
if not isinstance(gpu_name, str) or not gpu_name:
|
if not isinstance(gpu_name, str) or not gpu_name:
|
||||||
return None
|
return None
|
||||||
gn = gpu_name.lower()
|
gn = gpu_name.lower()
|
||||||
|
|
||||||
|
# Guard against false matches on non-Apple GPUs whose names contain
|
||||||
|
# "m3"/"m4"/"m5" (e.g. NVIDIA Quadro M4 000).
|
||||||
|
if "apple" not in gn:
|
||||||
|
return None
|
||||||
|
|
||||||
|
raw_cores = system.get("gpu_cores")
|
||||||
|
try:
|
||||||
|
gpu_cores = int(raw_cores) if raw_cores is not None else None
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
gpu_cores = None
|
||||||
|
|
||||||
|
for key in _APPLE_VARIANT_KEYS_SORTED:
|
||||||
|
if key not in gn:
|
||||||
|
continue
|
||||||
|
if gpu_cores in APPLE_BANDWIDTH_BY_CORES[key]:
|
||||||
|
return APPLE_BANDWIDTH_BY_CORES[key][gpu_cores]
|
||||||
|
return min(APPLE_BANDWIDTH_BY_CORES[key].values())
|
||||||
|
|
||||||
|
for key in _APPLE_FIXED_KEYS_SORTED:
|
||||||
|
if key in gn:
|
||||||
|
return APPLE_BANDWIDTH_FIXED[key]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _lookup_bandwidth(system):
|
||||||
|
if isinstance(system, dict):
|
||||||
|
gpu_name = system.get("gpu_name")
|
||||||
|
else:
|
||||||
|
gpu_name = system
|
||||||
|
|
||||||
|
if not isinstance(gpu_name, str) or not gpu_name:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if isinstance(system, dict):
|
||||||
|
bw = _lookup_apple_bandwidth(system)
|
||||||
|
if bw is not None:
|
||||||
|
return bw
|
||||||
|
|
||||||
|
gn = gpu_name.lower()
|
||||||
for key in _BW_KEYS_SORTED:
|
for key in _BW_KEYS_SORTED:
|
||||||
if key in gn:
|
if key in gn:
|
||||||
return GPU_BANDWIDTH[key]
|
return GPU_BANDWIDTH[key]
|
||||||
@@ -84,7 +135,7 @@ def _estimate_speed(model, quant, run_mode, system, offload_frac=0.0):
|
|||||||
"""
|
"""
|
||||||
pb = _active_params_b(model)
|
pb = _active_params_b(model)
|
||||||
is_moe = model.get("is_moe", False)
|
is_moe = model.get("is_moe", False)
|
||||||
bw = _lookup_bandwidth(system.get("gpu_name"))
|
bw = _lookup_bandwidth(system)
|
||||||
backend = system.get("backend", "cpu_x86")
|
backend = system.get("backend", "cpu_x86")
|
||||||
|
|
||||||
if bw and run_mode in ("gpu", "cpu_offload"):
|
if bw and run_mode in ("gpu", "cpu_offload"):
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
@@ -335,6 +336,37 @@ def _detect_apple_silicon():
|
|||||||
if total_gb <= 0:
|
if total_gb <= 0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _parse_apple_gpu_cores(text):
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
data = json.loads(text)
|
||||||
|
except (TypeError, ValueError, json.JSONDecodeError):
|
||||||
|
data = None
|
||||||
|
if isinstance(data, dict):
|
||||||
|
for gpu in data.get("SPDisplaysDataType") or []:
|
||||||
|
if not isinstance(gpu, dict):
|
||||||
|
continue
|
||||||
|
model = str(gpu.get("sppci_model") or gpu.get("_name") or "")
|
||||||
|
if "apple" not in model.lower():
|
||||||
|
continue
|
||||||
|
cores = gpu.get("sppci_cores")
|
||||||
|
try:
|
||||||
|
return int(str(cores).strip())
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
m = re.search(r"Total Number of Cores:\s*(\d+)", text)
|
||||||
|
if m:
|
||||||
|
try:
|
||||||
|
return int(m.group(1))
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
gpu_cores = _parse_apple_gpu_cores(_run(["system_profiler", "SPDisplaysDataType", "-json"]))
|
||||||
|
if gpu_cores is None:
|
||||||
|
gpu_cores = _parse_apple_gpu_cores(_run(["system_profiler", "SPDisplaysDataType"]))
|
||||||
|
|
||||||
# Usable GPU budget. macOS lets Metal use most of unified memory, but the
|
# Usable GPU budget. macOS lets Metal use most of unified memory, but the
|
||||||
# default working-set limit scales with RAM: small machines have to keep
|
# default working-set limit scales with RAM: small machines have to keep
|
||||||
# more back for the OS + app. These fractions track Apple's
|
# more back for the OS + app. These fractions track Apple's
|
||||||
@@ -357,7 +389,7 @@ def _detect_apple_silicon():
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
gpu = {"index": 0, "name": brand, "vram_gb": vram_gb}
|
gpu = {"index": 0, "name": brand, "vram_gb": vram_gb}
|
||||||
return {
|
info = {
|
||||||
"gpu_name": brand,
|
"gpu_name": brand,
|
||||||
"gpu_vram_gb": vram_gb,
|
"gpu_vram_gb": vram_gb,
|
||||||
"gpu_count": 1,
|
"gpu_count": 1,
|
||||||
@@ -369,6 +401,9 @@ def _detect_apple_silicon():
|
|||||||
# separate pool — downstream fit logic uses this to avoid double-budgeting.
|
# separate pool — downstream fit logic uses this to avoid double-budgeting.
|
||||||
"unified_memory": True,
|
"unified_memory": True,
|
||||||
}
|
}
|
||||||
|
if gpu_cores is not None:
|
||||||
|
info["gpu_cores"] = gpu_cores
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
def _read_file(path):
|
def _read_file(path):
|
||||||
@@ -772,6 +807,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
|
|||||||
"gpu_name": gpu_info["gpu_name"],
|
"gpu_name": gpu_info["gpu_name"],
|
||||||
"gpu_vram_gb": gpu_info["gpu_vram_gb"],
|
"gpu_vram_gb": gpu_info["gpu_vram_gb"],
|
||||||
"gpu_count": gpu_info["gpu_count"],
|
"gpu_count": gpu_info["gpu_count"],
|
||||||
|
"gpu_cores": gpu_info.get("gpu_cores"),
|
||||||
"gpus": gpu_info.get("gpus", []),
|
"gpus": gpu_info.get("gpus", []),
|
||||||
"gpu_groups": gpu_info.get("gpu_groups", []),
|
"gpu_groups": gpu_info.get("gpu_groups", []),
|
||||||
"homogeneous": gpu_info.get("homogeneous", True),
|
"homogeneous": gpu_info.get("homogeneous", True),
|
||||||
|
|||||||
@@ -0,0 +1,40 @@
|
|||||||
|
from services.hwfit.fit import _lookup_bandwidth
|
||||||
|
|
||||||
|
|
||||||
|
def test_m3_max_bandwidth_uses_gpu_cores():
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "Apple M3 Max", "gpu_cores": 30}) == 300
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "Apple M3 Max", "gpu_cores": 40}) == 400
|
||||||
|
|
||||||
|
|
||||||
|
def test_m4_max_bandwidth_uses_gpu_cores():
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "Apple M4 Max", "gpu_cores": 32}) == 410
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "Apple M4 Max", "gpu_cores": 40}) == 546
|
||||||
|
|
||||||
|
|
||||||
|
def test_m5_max_bandwidth_uses_gpu_cores():
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "Apple M5 Max", "gpu_cores": 32}) == 460
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "Apple M5 Max", "gpu_cores": 40}) == 614
|
||||||
|
|
||||||
|
|
||||||
|
def test_apple_max_bandwidth_falls_back_conservatively_without_gpu_cores():
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "Apple M3 Max"}) == 300
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "Apple M4 Max"}) == 410
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "Apple M5 Max"}) == 460
|
||||||
|
|
||||||
|
|
||||||
|
def test_fixed_apple_bandwidth_entries_include_updated_m5_values():
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "Apple M5 Pro"}) == 307
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "Apple M5"}) == 153
|
||||||
|
|
||||||
|
|
||||||
|
def test_non_apple_gpu_does_not_match_apple_bandwidth():
|
||||||
|
"""NVIDIA Quadro M4 000 should NOT match Apple bandwidth lookup."""
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "NVIDIA Quadro M4 000"}) is None
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "NVIDIA Quadro M3 000"}) is None
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "NVIDIA Quadro M5 000"}) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_non_apple_gpu_with_cores_does_not_match():
|
||||||
|
"""NVIDIA GPU with core count should not match Apple bandwidth."""
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "NVIDIA GeForce RTX 4090", "gpu_cores": 128}) is None
|
||||||
|
assert _lookup_bandwidth({"gpu_name": "AMD Radeon RX 9070 XT", "gpu_cores": 64}) is None
|
||||||
@@ -4,6 +4,8 @@ Covers the Metal-specific behavior added for Apple Silicon and locks in the
|
|||||||
guarantee that non-macOS (Linux/Windows) detection is unchanged.
|
guarantee that non-macOS (Linux/Windows) detection is unchanged.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
from services.hwfit import hardware
|
from services.hwfit import hardware
|
||||||
from services.hwfit.fit import rank_models
|
from services.hwfit.fit import rank_models
|
||||||
from services.hwfit.models import get_models
|
from services.hwfit.models import get_models
|
||||||
@@ -22,7 +24,7 @@ def _metal_system(ram_gb=16.0, vram_gb=10.7):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _fake_sysctl(brand="Apple M2 Pro", memsize_gb=32, wired_mb=None):
|
def _fake_sysctl(brand="Apple M2 Pro", memsize_gb=32, wired_mb=None, display_json=None, display_text=None):
|
||||||
def run(cmd):
|
def run(cmd):
|
||||||
joined = " ".join(cmd)
|
joined = " ".join(cmd)
|
||||||
if "machdep.cpu.brand_string" in joined:
|
if "machdep.cpu.brand_string" in joined:
|
||||||
@@ -31,6 +33,12 @@ def _fake_sysctl(brand="Apple M2 Pro", memsize_gb=32, wired_mb=None):
|
|||||||
return str(int(memsize_gb * 1024**3))
|
return str(int(memsize_gb * 1024**3))
|
||||||
if "iogpu.wired_limit_mb" in joined:
|
if "iogpu.wired_limit_mb" in joined:
|
||||||
return str(wired_mb) if wired_mb is not None else None
|
return str(wired_mb) if wired_mb is not None else None
|
||||||
|
if "system_profiler SPDisplaysDataType -json" in joined:
|
||||||
|
if isinstance(display_json, (dict, list)):
|
||||||
|
return json.dumps(display_json)
|
||||||
|
return display_json
|
||||||
|
if "system_profiler SPDisplaysDataType" in joined:
|
||||||
|
return display_text
|
||||||
return None
|
return None
|
||||||
return run
|
return run
|
||||||
|
|
||||||
@@ -98,16 +106,47 @@ def test_apple_silicon_detected_as_metal(monkeypatch):
|
|||||||
monkeypatch.setattr(hardware, "_remote_host", None)
|
monkeypatch.setattr(hardware, "_remote_host", None)
|
||||||
monkeypatch.setattr(hardware.platform, "system", lambda: "Darwin")
|
monkeypatch.setattr(hardware.platform, "system", lambda: "Darwin")
|
||||||
monkeypatch.setattr(hardware.platform, "machine", lambda: "arm64")
|
monkeypatch.setattr(hardware.platform, "machine", lambda: "arm64")
|
||||||
monkeypatch.setattr(hardware, "_run", _fake_sysctl(memsize_gb=32))
|
monkeypatch.setattr(hardware, "_run", _fake_sysctl(
|
||||||
|
memsize_gb=32,
|
||||||
|
display_json={"SPDisplaysDataType": [{"sppci_model": "Apple M2 Pro", "sppci_cores": "19"}]},
|
||||||
|
))
|
||||||
|
|
||||||
info = hardware._detect_apple_silicon()
|
info = hardware._detect_apple_silicon()
|
||||||
assert info is not None
|
assert info is not None
|
||||||
assert info["backend"] == "metal"
|
assert info["backend"] == "metal"
|
||||||
assert info["gpu_name"] == "Apple M2 Pro"
|
assert info["gpu_name"] == "Apple M2 Pro"
|
||||||
assert info["unified_memory"] is True
|
assert info["unified_memory"] is True
|
||||||
|
assert info["gpu_cores"] == 19
|
||||||
assert info["gpu_vram_gb"] == 24.0 # 32GB * 0.75
|
assert info["gpu_vram_gb"] == 24.0 # 32GB * 0.75
|
||||||
|
|
||||||
|
|
||||||
|
def test_apple_silicon_gpu_cores_fall_back_to_plain_text(monkeypatch):
|
||||||
|
monkeypatch.setattr(hardware, "_remote_host", None)
|
||||||
|
monkeypatch.setattr(hardware.platform, "system", lambda: "Darwin")
|
||||||
|
monkeypatch.setattr(hardware.platform, "machine", lambda: "arm64")
|
||||||
|
monkeypatch.setattr(hardware, "_run", _fake_sysctl(
|
||||||
|
brand="Apple M4 Max",
|
||||||
|
memsize_gb=64,
|
||||||
|
display_json="{not-json",
|
||||||
|
display_text="Graphics/Displays:\n\nApple M4 Max:\n Total Number of Cores: 32\n",
|
||||||
|
))
|
||||||
|
|
||||||
|
info = hardware._detect_apple_silicon()
|
||||||
|
assert info is not None
|
||||||
|
assert info["gpu_cores"] == 32
|
||||||
|
|
||||||
|
|
||||||
|
def test_apple_silicon_gpu_cores_are_optional(monkeypatch):
|
||||||
|
monkeypatch.setattr(hardware, "_remote_host", None)
|
||||||
|
monkeypatch.setattr(hardware.platform, "system", lambda: "Darwin")
|
||||||
|
monkeypatch.setattr(hardware.platform, "machine", lambda: "arm64")
|
||||||
|
monkeypatch.setattr(hardware, "_run", _fake_sysctl(memsize_gb=32))
|
||||||
|
|
||||||
|
info = hardware._detect_apple_silicon()
|
||||||
|
assert info is not None
|
||||||
|
assert "gpu_cores" not in info
|
||||||
|
|
||||||
|
|
||||||
def test_apple_silicon_skipped_on_linux(monkeypatch):
|
def test_apple_silicon_skipped_on_linux(monkeypatch):
|
||||||
"""Guarantee Linux detection is untouched: the Metal probe bails immediately."""
|
"""Guarantee Linux detection is untouched: the Metal probe bails immediately."""
|
||||||
monkeypatch.setattr(hardware, "_remote_host", None)
|
monkeypatch.setattr(hardware, "_remote_host", None)
|
||||||
@@ -132,7 +171,7 @@ def test_detect_system_propagates_unified_memory(monkeypatch):
|
|||||||
monkeypatch.setattr(hardware, "_detect_apple_silicon", lambda: {
|
monkeypatch.setattr(hardware, "_detect_apple_silicon", lambda: {
|
||||||
"gpu_name": "Apple M4", "gpu_vram_gb": 10.7, "gpu_count": 1,
|
"gpu_name": "Apple M4", "gpu_vram_gb": 10.7, "gpu_count": 1,
|
||||||
"gpus": [], "gpu_groups": [], "homogeneous": True,
|
"gpus": [], "gpu_groups": [], "homogeneous": True,
|
||||||
"backend": "metal", "unified_memory": True,
|
"backend": "metal", "unified_memory": True, "gpu_cores": 10,
|
||||||
})
|
})
|
||||||
monkeypatch.setattr(hardware, "_get_ram_gb", lambda: 16.0)
|
monkeypatch.setattr(hardware, "_get_ram_gb", lambda: 16.0)
|
||||||
monkeypatch.setattr(hardware, "_get_available_ram_gb", lambda: 11.0)
|
monkeypatch.setattr(hardware, "_get_available_ram_gb", lambda: 11.0)
|
||||||
@@ -142,3 +181,4 @@ def test_detect_system_propagates_unified_memory(monkeypatch):
|
|||||||
s = hardware.detect_system(fresh=True)
|
s = hardware.detect_system(fresh=True)
|
||||||
assert s["backend"] == "metal"
|
assert s["backend"] == "metal"
|
||||||
assert s.get("unified_memory") is True
|
assert s.get("unified_memory") is True
|
||||||
|
assert s["gpu_cores"] == 10
|
||||||
|
|||||||
Reference in New Issue
Block a user