Improve Cookbook serve diagnostics and recommendations

This commit is contained in:
pewdiepie-archdaemon
2026-06-02 12:15:41 +09:00
parent bdc99d746a
commit 966b53df77
14 changed files with 1113 additions and 191 deletions
+7 -6
View File
@@ -5,7 +5,7 @@ import re
QUANT_HIERARCHY = ["Q8_0", "Q6_K", "Q5_K_M", "Q4_K_M", "Q3_K_M", "Q2_K"]
QUANT_BPP = {
"F32": 4.0, "F16": 2.0, "BF16": 2.0, "FP8": 1.0,
"F32": 4.0, "F16": 2.0, "BF16": 2.0, "FP8": 1.0, "NVFP4": 0.5,
"Q8_0": 1.05, "Q6_K": 0.80, "Q5_K_M": 0.68,
"Q4_K_M": 0.58, "Q4_0": 0.58, "Q3_K_M": 0.48, "Q2_K": 0.37,
"AWQ-4bit": 0.50, "AWQ-8bit": 1.0,
@@ -14,7 +14,7 @@ QUANT_BPP = {
}
QUANT_SPEED_MULT = {
"F16": 0.6, "BF16": 0.6, "FP8": 0.85,
"F16": 0.6, "BF16": 0.6, "FP8": 0.85, "NVFP4": 1.1,
"Q8_0": 0.8, "Q6_K": 0.95, "Q5_K_M": 1.0,
"Q4_K_M": 1.15, "Q4_0": 1.15, "Q3_K_M": 1.25, "Q2_K": 1.35,
"AWQ-4bit": 1.2, "AWQ-8bit": 0.85,
@@ -23,7 +23,7 @@ QUANT_SPEED_MULT = {
}
QUANT_QUALITY_PENALTY = {
"F16": 0.0, "BF16": 0.0, "FP8": 0.0,
"F16": 0.0, "BF16": 0.0, "FP8": 0.0, "NVFP4": 0.0,
"Q8_0": 0.0, "Q6_K": -1.0, "Q5_K_M": -2.0,
"Q4_K_M": -5.0, "Q4_0": -5.0, "Q3_K_M": -8.0, "Q2_K": -12.0,
"AWQ-4bit": -3.0, "AWQ-8bit": 0.0,
@@ -32,7 +32,7 @@ QUANT_QUALITY_PENALTY = {
}
QUANT_BYTES_PER_PARAM = {
"F16": 2.0, "BF16": 2.0, "FP8": 1.0,
"F16": 2.0, "BF16": 2.0, "FP8": 1.0, "NVFP4": 0.5,
"Q8_0": 1.0, "Q6_K": 0.75, "Q5_K_M": 0.625,
"Q4_K_M": 0.5, "Q4_0": 0.5, "Q3_K_M": 0.375, "Q2_K": 0.25,
"AWQ-4bit": 0.5, "AWQ-8bit": 1.0,
@@ -41,12 +41,13 @@ QUANT_BYTES_PER_PARAM = {
}
# Pre-quantized formats that should NOT go through the GGUF quant hierarchy
PREQUANTIZED_PREFIXES = ("AWQ-", "GPTQ-", "mlx-", "FP8")
PREQUANTIZED_PREFIXES = ("AWQ-", "GPTQ-", "mlx-", "FP8", "NVFP4")
def is_prequantized(model):
q = model.get("quantization", "")
return any(q.startswith(p) for p in PREQUANTIZED_PREFIXES)
name = (model.get("name") or "").lower()
return "nvfp4" in name or any(q.startswith(p) for p in PREQUANTIZED_PREFIXES)
def params_b(model):