Ignore invalid serve profile inputs (#1827)

2026-07-02 01:22:07 -04:00 · 2026-06-29 10:47:19 -07:00
parent 139d76ab57
commit d2a6d73aa5
2 changed files with 9 additions and 0 deletions
@@ -103,6 +103,9 @@ def compute_serve_profiles(system, model, serve_weights_gb=None, serve_quant=Non
    in the actual serving knobs (n_cpu_moe, KV-cache type, context). serve_quant
    is the file's quant label (e.g. "Q4_K_M") just for display.
    """
    if not isinstance(system, dict) or not isinstance(model, dict):
        return []
    vram = float(system.get("gpu_vram_gb") or 0)
    if vram <= 0:
        return []
@@ -28,6 +28,12 @@ def _sys(vram, family="rdna"):
    return {"backend": "rocm", "gpu_vram_gb": vram, "gpu_family": family}
 def test_compute_serve_profiles_ignores_invalid_inputs():
    assert compute_serve_profiles(None, _DENSE_8B) == []
    assert compute_serve_profiles(_sys(8), None) == []
    assert compute_serve_profiles(["bad"], _DENSE_8B) == []
 def test_big_moe_on_small_card_offloads_not_fails():
    """A 35B MoE can't hold its weights on 16 GB, so the Quality profile must
    offload experts to CPU (n_cpu_moe > 0) rather than be dropped."""