Ignore invalid serve profile inputs (#1827)

This commit is contained in:
red person
2026-06-29 10:47:19 -07:00
committed by GitHub
parent 139d76ab57
commit d2a6d73aa5
2 changed files with 9 additions and 0 deletions
+3
View File
@@ -103,6 +103,9 @@ def compute_serve_profiles(system, model, serve_weights_gb=None, serve_quant=Non
in the actual serving knobs (n_cpu_moe, KV-cache type, context). serve_quant
is the file's quant label (e.g. "Q4_K_M") just for display.
"""
if not isinstance(system, dict) or not isinstance(model, dict):
return []
vram = float(system.get("gpu_vram_gb") or 0)
if vram <= 0:
return []
+6
View File
@@ -28,6 +28,12 @@ def _sys(vram, family="rdna"):
return {"backend": "rocm", "gpu_vram_gb": vram, "gpu_family": family}
def test_compute_serve_profiles_ignores_invalid_inputs():
assert compute_serve_profiles(None, _DENSE_8B) == []
assert compute_serve_profiles(_sys(8), None) == []
assert compute_serve_profiles(["bad"], _DENSE_8B) == []
def test_big_moe_on_small_card_offloads_not_fails():
"""A 35B MoE can't hold its weights on 16 GB, so the Quality profile must
offload experts to CPU (n_cpu_moe > 0) rather than be dropped."""