From d6d2e17214ad1d29622bc36b206eb787364d69a0 Mon Sep 17 00:00:00 2001 From: darius-f96 <71006968+darius-f96@users.noreply.github.com> Date: Mon, 15 Jun 2026 19:55:15 +0300 Subject: [PATCH] fix(hwfit): add GB10 unified-memory bandwidth so speed scores are real (#4270) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NVIDIA Grace Blackwell GB10 / DGX Spark was missing from GPU_BANDWIDTH, so _lookup_bandwidth() returned None for it and _estimate_speed() fell through to the crude FALLBACK_K path (k/active-params). That over-stated tok/s and let speed scores saturate regardless of the box's real ~273 GB/s LPDDR5X pool — distorting model ranking on these 128GB unified-memory rigs. Add "gb10": 273 (GB/s). nvidia-smi reports the device name as "NVIDIA GB10", which substring-matches the new key, so detected GB10 boxes now estimate speed from the real bandwidth instead of the fallback. --- services/hwfit/fit.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/services/hwfit/fit.py b/services/hwfit/fit.py index 7a3d4c4f2..242050e7a 100644 --- a/services/hwfit/fit.py +++ b/services/hwfit/fit.py @@ -19,6 +19,10 @@ GPU_BANDWIDTH = { "6950 xt": 576, "6900 xt": 512, "6800 xt": 512, "6800": 512, "6700 xt": 384, "6600 xt": 256, "6600": 224, "mi300x": 5300, "mi300": 5300, "mi250x": 3277, "mi250": 3277, "mi210": 1638, "mi100": 1229, "9070 xt": 624, "9070": 488, "9060 xt": 322, "9060": 322, + # NVIDIA GB10 Grace-Blackwell superchip (DGX Spark). Unified LPDDR5X memory, + # not Apple Silicon, so it lives in the generic GPU table — the Apple-only + # lookup never matches it (its name carries no "apple"). + "gb10": 273, } # Pre-sort keys by length descending for correct substring matching