mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-30 16:42:15 -04:00
Ignore invalid serve profile inputs (#1827)
This commit is contained in:
@@ -103,6 +103,9 @@ def compute_serve_profiles(system, model, serve_weights_gb=None, serve_quant=Non
|
||||
in the actual serving knobs (n_cpu_moe, KV-cache type, context). serve_quant
|
||||
is the file's quant label (e.g. "Q4_K_M") just for display.
|
||||
"""
|
||||
if not isinstance(system, dict) or not isinstance(model, dict):
|
||||
return []
|
||||
|
||||
vram = float(system.get("gpu_vram_gb") or 0)
|
||||
if vram <= 0:
|
||||
return []
|
||||
|
||||
@@ -28,6 +28,12 @@ def _sys(vram, family="rdna"):
|
||||
return {"backend": "rocm", "gpu_vram_gb": vram, "gpu_family": family}
|
||||
|
||||
|
||||
def test_compute_serve_profiles_ignores_invalid_inputs():
|
||||
assert compute_serve_profiles(None, _DENSE_8B) == []
|
||||
assert compute_serve_profiles(_sys(8), None) == []
|
||||
assert compute_serve_profiles(["bad"], _DENSE_8B) == []
|
||||
|
||||
|
||||
def test_big_moe_on_small_card_offloads_not_fails():
|
||||
"""A 35B MoE can't hold its weights on 16 GB, so the Quality profile must
|
||||
offload experts to CPU (n_cpu_moe > 0) rather than be dropped."""
|
||||
|
||||
Reference in New Issue
Block a user