Ignore invalid serve profile inputs (#1827)

This commit is contained in:
red person
2026-06-29 10:47:19 -07:00
committed by GitHub
parent 139d76ab57
commit d2a6d73aa5
2 changed files with 9 additions and 0 deletions
+3
View File
@@ -103,6 +103,9 @@ def compute_serve_profiles(system, model, serve_weights_gb=None, serve_quant=Non
in the actual serving knobs (n_cpu_moe, KV-cache type, context). serve_quant in the actual serving knobs (n_cpu_moe, KV-cache type, context). serve_quant
is the file's quant label (e.g. "Q4_K_M") just for display. is the file's quant label (e.g. "Q4_K_M") just for display.
""" """
if not isinstance(system, dict) or not isinstance(model, dict):
return []
vram = float(system.get("gpu_vram_gb") or 0) vram = float(system.get("gpu_vram_gb") or 0)
if vram <= 0: if vram <= 0:
return [] return []
+6
View File
@@ -28,6 +28,12 @@ def _sys(vram, family="rdna"):
return {"backend": "rocm", "gpu_vram_gb": vram, "gpu_family": family} return {"backend": "rocm", "gpu_vram_gb": vram, "gpu_family": family}
def test_compute_serve_profiles_ignores_invalid_inputs():
assert compute_serve_profiles(None, _DENSE_8B) == []
assert compute_serve_profiles(_sys(8), None) == []
assert compute_serve_profiles(["bad"], _DENSE_8B) == []
def test_big_moe_on_small_card_offloads_not_fails(): def test_big_moe_on_small_card_offloads_not_fails():
"""A 35B MoE can't hold its weights on 16 GB, so the Quality profile must """A 35B MoE can't hold its weights on 16 GB, so the Quality profile must
offload experts to CPU (n_cpu_moe > 0) rather than be dropped.""" offload experts to CPU (n_cpu_moe > 0) rather than be dropped."""