Improve Cookbook serve diagnostics and recommendations

This commit is contained in:
pewdiepie-archdaemon
2026-06-02 12:15:41 +09:00
parent bdc99d746a
commit 966b53df77
14 changed files with 1113 additions and 191 deletions
+25 -6
View File
@@ -962,13 +962,23 @@ def setup_cookbook_routes() -> APIRouter:
# failed CUDA attempt) doesn't cause the next configure to reuse # failed CUDA attempt) doesn't cause the next configure to reuse
# stale settings and silently produce a CPU-only binary. # stale settings and silently produce a CPU-only binary.
runner_lines.append(' cd ~/llama.cpp && rm -rf build') runner_lines.append(' cd ~/llama.cpp && rm -rf build')
runner_lines.append(' _ody_has_cuda_runtime=0')
runner_lines.append(' if command -v nvcc &>/dev/null; then') runner_lines.append(' if command -v nvcc &>/dev/null; then')
runner_lines.append(' for _cudalib in "${CUDA_HOME:-}/lib64"/libcudart.so* "${CUDA_HOME:-}/lib"/libcudart.so* /usr/local/cuda/lib64/libcudart.so* /usr/lib*/libcudart.so*; do')
runner_lines.append(' [ -e "$_cudalib" ] && _ody_has_cuda_runtime=1 && break')
runner_lines.append(' done')
runner_lines.append(' fi')
runner_lines.append(' if command -v nvcc &>/dev/null && [ "$_ody_has_cuda_runtime" = "1" ]; then')
runner_lines.append(' echo "[odysseus] CUDA nvcc found — building llama-server with CUDA (GPU) support..."') runner_lines.append(' echo "[odysseus] CUDA nvcc found — building llama-server with CUDA (GPU) support..."')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON \\') runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON \\')
runner_lines.append(' && cmake --build build -j"$NPROC" --target llama-server \\') runner_lines.append(' && cmake --build build -j"$NPROC" --target llama-server \\')
runner_lines.append(' && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') runner_lines.append(' && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' else') runner_lines.append(' else')
runner_lines.append(' echo "[odysseus] WARNING: nvcc not found — building llama-server for CPU only."') runner_lines.append(' if command -v nvcc &>/dev/null; then')
runner_lines.append(' echo "[odysseus] WARNING: nvcc found but CUDA runtime library was not found — building llama-server for CPU only."')
runner_lines.append(' else')
runner_lines.append(' echo "[odysseus] WARNING: nvcc not found — building llama-server for CPU only."')
runner_lines.append(' fi')
runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."') runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."')
runner_lines.append(' echo "[odysseus] To get a GPU build, first install vLLM via Cookbook -> Dependencies"') runner_lines.append(' echo "[odysseus] To get a GPU build, first install vLLM via Cookbook -> Dependencies"')
runner_lines.append(' echo "[odysseus] (its CUDA wheels include nvcc), then re-launch this serve task."') runner_lines.append(' echo "[odysseus] (its CUDA wheels include nvcc), then re-launch this serve task."')
@@ -982,6 +992,10 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(' echo "llama-server build failed — installing Python bindings as fallback..."') runner_lines.append(' echo "llama-server build failed — installing Python bindings as fallback..."')
runner_lines.append(f" {_pip_install_fallback_chain('llama-cpp-python', python_cmd='pip')} || true") runner_lines.append(f" {_pip_install_fallback_chain('llama-cpp-python', python_cmd='pip')} || true")
runner_lines.append(' fi') runner_lines.append(' fi')
runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then')
runner_lines.append(' echo "ERROR: llama.cpp serving is not available after install/build attempts."')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append(' fi')
runner_lines.append('fi') runner_lines.append('fi')
elif "ollama" in req.cmd: elif "ollama" in req.cmd:
handled_ollama_serve = True handled_ollama_serve = True
@@ -1037,19 +1051,24 @@ def setup_cookbook_routes() -> APIRouter:
# find the `vllm` CLI ("command not found"). Mirrors llama.cpp above. # find the `vllm` CLI ("command not found"). Mirrors llama.cpp above.
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
runner_lines.append('if ! command -v vllm &>/dev/null; then') runner_lines.append('if ! command -v vllm &>/dev/null; then')
runner_lines.append(' echo "ERROR: vLLM is not installed. Open Cookbook -> Dependencies and install vllm on this server, then launch again."') runner_lines.append(' echo "ERROR: vLLM is not installed."')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append('fi') runner_lines.append('fi')
elif "sglang.launch_server" in req.cmd: elif "sglang.launch_server" in req.cmd:
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
runner_lines.append('if ! python3 -c "import sglang" 2>/dev/null; then') runner_lines.append('if ! command -v sglang &>/dev/null; then')
runner_lines.append(' echo "ERROR: SGLang is not installed. Open Cookbook -> Dependencies and install sglang on this server, then launch again."') runner_lines.append(' echo "ERROR: SGLang is not installed."')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append('elif ! ODYSSEUS_SGLANG_IMPORT_ERROR="$(python3 -c "import sglang" 2>&1)"; then')
runner_lines.append(' echo "ERROR: SGLang is installed but failed to import."')
runner_lines.append(' printf "%s\\n" "$ODYSSEUS_SGLANG_IMPORT_ERROR"')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append('fi') runner_lines.append('fi')
elif "scripts/diffusion_server.py" in req.cmd or ".diffusion_server.py" in req.cmd: elif "scripts/diffusion_server.py" in req.cmd or ".diffusion_server.py" in req.cmd:
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
runner_lines.append('if ! python3 -c "import torch, diffusers" 2>/dev/null; then') runner_lines.append('if ! ODYSSEUS_DIFFUSION_IMPORT_ERROR="$(python3 -c "import torch, diffusers" 2>&1)"; then')
runner_lines.append(' echo "ERROR: Diffusion serving requires PyTorch + diffusers. Open Cookbook -> Dependencies and install diffusers on this server, then launch again."') runner_lines.append(' echo "ERROR: Diffusion serving requires PyTorch + diffusers."')
runner_lines.append(' printf "%s\\n" "$ODYSSEUS_DIFFUSION_IMPORT_ERROR"')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append('fi') runner_lines.append('fi')
+4 -2
View File
@@ -88,6 +88,8 @@ def _quant_from_name(name):
if "6bit" in n: if "6bit" in n:
return "mlx-6bit" return "mlx-6bit"
return "mlx-8bit" if is8 else "mlx-4bit" return "mlx-8bit" if is8 else "mlx-4bit"
if "nvfp4" in n:
return "NVFP4"
if "fp8" in n: if "fp8" in n:
return "FP8" return "FP8"
if "int4" in n or "4bit" in n or "4-bit" in n: if "int4" in n or "4bit" in n or "4-bit" in n:
@@ -136,7 +138,7 @@ def _entry_from_modelinfo(mi, overrides):
params_by_dtype = getattr(st, "parameters", None) or {} params_by_dtype = getattr(st, "parameters", None) or {}
if quant.endswith("4bit") or quant.endswith("Int4"): if quant.endswith("4bit") or quant.endswith("Int4"):
pack_factor = 8 pack_factor = 8
elif quant.endswith("8bit") or quant.endswith("Int8") or quant == "FP8": elif quant.endswith("8bit") or quant.endswith("Int8") or quant in ("FP8", "NVFP4"):
pack_factor = 4 pack_factor = 4
else: else:
pack_factor = 1 pack_factor = 1
@@ -158,7 +160,7 @@ def _entry_from_modelinfo(mi, overrides):
rel = created.strftime("%Y-%m-%d") if created else datetime.utcnow().strftime("%Y-%m-%d") rel = created.strftime("%Y-%m-%d") if created else datetime.utcnow().strftime("%Y-%m-%d")
# Rough RAM/VRAM hints (fit.py recomputes the real requirement from params+quant). # Rough RAM/VRAM hints (fit.py recomputes the real requirement from params+quant).
_BPP = {"AWQ-4bit": 0.58, "GPTQ-Int4": 0.58, "mlx-4bit": 0.55, "mlx-6bit": 0.85, _BPP = {"AWQ-4bit": 0.58, "GPTQ-Int4": 0.58, "mlx-4bit": 0.55, "mlx-6bit": 0.85,
"AWQ-8bit": 1.1, "GPTQ-Int8": 1.1, "mlx-8bit": 1.1, "FP8": 1.1, "Q4_K_M": 0.6} "AWQ-8bit": 1.1, "GPTQ-Int8": 1.1, "mlx-8bit": 1.1, "FP8": 1.1, "NVFP4": 0.6, "Q4_K_M": 0.6}
bpp = _BPP.get(quant, 0.6) bpp = _BPP.get(quant, 0.6)
vram = round(pb * bpp + 0.5, 1) vram = round(pb * bpp + 0.5, 1)
entry = { entry = {
+326 -4
View File
@@ -13919,7 +13919,12 @@
"architecture": "gemma4", "architecture": "gemma4",
"pipeline_tag": "image-text-to-text", "pipeline_tag": "image-text-to-text",
"release_date": "2026-04-01", "release_date": "2026-04-01",
"gguf_sources": [], "gguf_sources": [
{
"repo": "unsloth/gemma-4-E2B-it-GGUF",
"provider": "unsloth"
}
],
"capabilities": [ "capabilities": [
"vision" "vision"
] ]
@@ -13942,7 +13947,12 @@
"architecture": "gemma4", "architecture": "gemma4",
"pipeline_tag": "image-text-to-text", "pipeline_tag": "image-text-to-text",
"release_date": "2026-04-01", "release_date": "2026-04-01",
"gguf_sources": [], "gguf_sources": [
{
"repo": "unsloth/gemma-4-E4B-it-GGUF",
"provider": "unsloth"
}
],
"capabilities": [ "capabilities": [
"vision" "vision"
] ]
@@ -13965,7 +13975,12 @@
"architecture": "gemma4", "architecture": "gemma4",
"pipeline_tag": "image-text-to-text", "pipeline_tag": "image-text-to-text",
"release_date": "2026-04-01", "release_date": "2026-04-01",
"gguf_sources": [], "gguf_sources": [
{
"repo": "unsloth/gemma-4-31B-it-GGUF",
"provider": "unsloth"
}
],
"capabilities": [ "capabilities": [
"vision" "vision"
] ]
@@ -13988,7 +14003,12 @@
"architecture": "gemma4", "architecture": "gemma4",
"pipeline_tag": "image-text-to-text", "pipeline_tag": "image-text-to-text",
"release_date": "2026-04-01", "release_date": "2026-04-01",
"gguf_sources": [], "gguf_sources": [
{
"repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
"provider": "unsloth"
}
],
"capabilities": [ "capabilities": [
"vision" "vision"
] ]
@@ -18719,5 +18739,307 @@
"hf_likes": 0, "hf_likes": 0,
"release_date": "2026-04-19", "release_date": "2026-04-19",
"_discovered": true "_discovered": true
},
{
"name": "Qwen/Qwen3.6-27B-MTP",
"provider": "Qwen",
"parameter_count": "27.8B",
"parameters_raw": 27781427952,
"min_ram_gb": 16.6,
"recommended_ram_gb": 21.6,
"min_vram_gb": 16.6,
"quantization": "Q4_K_M",
"context_length": 262144,
"use_case": "General purpose, coding, MTP",
"is_moe": false,
"num_experts": null,
"active_experts": null,
"active_parameters": null,
"architecture": "qwen3",
"pipeline_tag": "text-generation",
"release_date": "2026-04-01",
"gguf_sources": [
{
"repo": "unsloth/Qwen3.6-27B-MTP-GGUF",
"provider": "unsloth"
}
],
"capabilities": [
"mtp"
],
"_discovered": true
},
{
"name": "Qwen/Qwen3.6-35B-A3B-MTP",
"provider": "Qwen",
"parameter_count": "36.0B",
"parameters_raw": 35951822704,
"min_ram_gb": 21.4,
"recommended_ram_gb": 27.8,
"min_vram_gb": 21.4,
"quantization": "Q4_K_M",
"context_length": 262144,
"use_case": "General purpose (MoE), MTP",
"is_moe": true,
"num_experts": null,
"active_experts": null,
"active_parameters": 3000000000,
"architecture": "qwen3_moe",
"pipeline_tag": "text-generation",
"release_date": "2026-04-01",
"gguf_sources": [
{
"repo": "unsloth/Qwen3.6-35B-A3B-MTP-GGUF",
"provider": "unsloth"
}
],
"capabilities": [
"mtp"
],
"_discovered": true
},
{
"name": "Qwen/Qwen3.5-0.8B-MTP",
"provider": "Qwen",
"parameter_count": "873M",
"parameters_raw": 873438784,
"min_ram_gb": 1.0,
"recommended_ram_gb": 2.0,
"min_vram_gb": 0.5,
"quantization": "Q4_K_M",
"context_length": 262144,
"use_case": "General purpose, MTP",
"capabilities": [
"mtp",
"tool_use",
"vision"
],
"pipeline_tag": "image-text-to-text",
"architecture": "qwen3_5",
"hf_downloads": 93448,
"hf_likes": 208,
"release_date": "2026-02-28",
"gguf_sources": [
{
"repo": "unsloth/Qwen3.5-0.8B-MTP-GGUF",
"provider": "unsloth"
}
],
"_discovered": true
},
{
"name": "Qwen/Qwen3.5-2B-MTP",
"provider": "Qwen",
"parameter_count": "2.3B",
"parameters_raw": 2274069824,
"min_ram_gb": 1.3,
"recommended_ram_gb": 2.1,
"min_vram_gb": 1.2,
"quantization": "Q4_K_M",
"context_length": 262144,
"use_case": "General purpose, MTP",
"capabilities": [
"mtp",
"tool_use",
"vision"
],
"pipeline_tag": "image-text-to-text",
"architecture": "qwen3_5",
"hf_downloads": 46974,
"hf_likes": 115,
"release_date": "2026-02-28",
"gguf_sources": [
{
"repo": "unsloth/Qwen3.5-2B-MTP-GGUF",
"provider": "unsloth"
}
],
"_discovered": true
},
{
"name": "Qwen/Qwen3.5-4B-MTP",
"provider": "Qwen",
"parameter_count": "4.7B",
"parameters_raw": 4659865088,
"min_ram_gb": 2.6,
"recommended_ram_gb": 4.3,
"min_vram_gb": 2.4,
"quantization": "Q4_K_M",
"context_length": 262144,
"use_case": "General purpose, MTP",
"capabilities": [
"mtp",
"tool_use",
"vision"
],
"pipeline_tag": "image-text-to-text",
"architecture": "qwen3_5",
"hf_downloads": 99087,
"hf_likes": 202,
"release_date": "2026-02-27",
"gguf_sources": [
{
"repo": "unsloth/Qwen3.5-4B-MTP-GGUF",
"provider": "unsloth"
}
],
"_discovered": true
},
{
"name": "Qwen/Qwen3.5-9B-MTP",
"provider": "Qwen",
"parameter_count": "9.7B",
"parameters_raw": 9653104368,
"min_ram_gb": 5.4,
"recommended_ram_gb": 9.0,
"min_vram_gb": 4.9,
"quantization": "Q4_K_M",
"context_length": 262144,
"use_case": "General purpose, MTP",
"capabilities": [
"mtp",
"tool_use",
"vision"
],
"pipeline_tag": "image-text-to-text",
"architecture": "qwen3_5",
"hf_downloads": 172298,
"hf_likes": 345,
"release_date": "2026-02-27",
"gguf_sources": [
{
"repo": "unsloth/Qwen3.5-9B-MTP-GGUF",
"provider": "unsloth"
}
],
"_discovered": true
},
{
"name": "Qwen/Qwen3.5-27B-MTP",
"provider": "Qwen",
"parameter_count": "27.8B",
"parameters_raw": 27781427952,
"min_ram_gb": 15.5,
"recommended_ram_gb": 25.9,
"min_vram_gb": 14.2,
"quantization": "Q4_K_M",
"context_length": 262144,
"use_case": "General purpose, MTP",
"capabilities": [
"mtp",
"tool_use",
"vision"
],
"pipeline_tag": "image-text-to-text",
"architecture": "qwen3_5",
"hf_downloads": 406808,
"hf_likes": 565,
"release_date": "2026-02-24",
"gguf_sources": [
{
"repo": "unsloth/Qwen3.5-27B-MTP-GGUF",
"provider": "unsloth"
}
],
"_discovered": true
},
{
"name": "Qwen/Qwen3.5-35B-A3B-MTP",
"provider": "Qwen",
"parameter_count": "36.0B",
"parameters_raw": 35951822704,
"min_ram_gb": 20.1,
"recommended_ram_gb": 33.5,
"min_vram_gb": 18.4,
"quantization": "Q4_K_M",
"context_length": 262144,
"use_case": "General purpose, MTP",
"capabilities": [
"mtp",
"tool_use",
"vision"
],
"pipeline_tag": "image-text-to-text",
"architecture": "qwen3_5_moe",
"hf_downloads": 769032,
"hf_likes": 905,
"release_date": "2026-02-24",
"is_moe": true,
"num_experts": 256,
"active_experts": 8,
"active_parameters": 3000000000,
"gguf_sources": [
{
"repo": "unsloth/Qwen3.5-35B-A3B-MTP-GGUF",
"provider": "unsloth"
}
],
"_discovered": true
},
{
"name": "Qwen/Qwen3.5-122B-A10B-MTP",
"provider": "Qwen",
"parameter_count": "125.1B",
"parameters_raw": 125086497008,
"min_ram_gb": 69.9,
"recommended_ram_gb": 116.5,
"min_vram_gb": 64.1,
"quantization": "Q4_K_M",
"context_length": 262144,
"use_case": "General purpose, MTP",
"capabilities": [
"mtp",
"tool_use",
"vision"
],
"pipeline_tag": "image-text-to-text",
"architecture": "qwen3_5_moe",
"hf_downloads": 171055,
"hf_likes": 389,
"release_date": "2026-02-24",
"is_moe": true,
"num_experts": 256,
"active_experts": 8,
"active_parameters": 10000000000,
"gguf_sources": [
{
"repo": "unsloth/Qwen3.5-122B-A10B-MTP-GGUF",
"provider": "unsloth"
}
],
"_discovered": true
},
{
"name": "Qwen/Qwen3.5-397B-A17B-MTP",
"provider": "Qwen",
"parameter_count": "403.4B",
"parameters_raw": 403397928944,
"min_ram_gb": 225.4,
"recommended_ram_gb": 375.7,
"min_vram_gb": 206.6,
"quantization": "Q4_K_M",
"context_length": 262144,
"use_case": "General purpose, MTP",
"capabilities": [
"mtp",
"tool_use",
"vision"
],
"pipeline_tag": "image-text-to-text",
"architecture": "qwen3_5_moe",
"hf_downloads": 1291825,
"hf_likes": 1214,
"release_date": "2026-02-16",
"is_moe": true,
"num_experts": 256,
"active_experts": 8,
"active_parameters": 17000000000,
"gguf_sources": [
{
"repo": "unsloth/Qwen3.5-397B-A17B-MTP-GGUF",
"provider": "unsloth"
}
],
"_discovered": true
} }
] ]
+60 -21
View File
@@ -99,6 +99,27 @@ def _estimate_speed(model, quant, run_mode, system):
return k / pb * sm return k / pb * sm
def _architecture_bonus(model):
name = (model.get("name") or "").lower()
arch = (model.get("architecture") or "").lower()
text = f"{name} {arch}"
# Keep this intentionally small: hardware fit and speed still matter, but
# current model families should not be scored the same as older Qwen2/LLama
# era entries just because the parameter count is similar.
if "qwen3.6" in text or "qwen3_6" in text:
return 9
if "qwen3.5" in text or "qwen3_5" in text:
return 8
if "qwen3-next" in text or "qwen3_next" in text:
return 6
if "qwen3" in text or arch.startswith("qwen3"):
return 4
if "qwen2.5" in text or "qwen2_5" in text:
return 2
return 0
def _quality_score(model, quant, use_case): def _quality_score(model, quant, use_case):
pb = params_b(model) pb = params_b(model)
if pb < 1: if pb < 1:
@@ -128,6 +149,7 @@ def _quality_score(model, quant, use_case):
if "gemma" in name_lower: if "gemma" in name_lower:
base += 1 base += 1
base += _architecture_bonus(model)
base += QUANT_QUALITY_PENALTY.get(quant, 0) base += QUANT_QUALITY_PENALTY.get(quant, 0)
model_uc = infer_use_case(model) model_uc = infer_use_case(model)
@@ -220,12 +242,13 @@ def _quant_bits(q):
return 0 return 0
def analyze_model(model, system, target_quant=None): def analyze_model(model, system, target_quant=None, scoring_use_case=None):
pb = params_b(model) pb = params_b(model)
if pb <= 0: if pb <= 0:
return None return None
use_case = infer_use_case(model) model_use_case = infer_use_case(model)
score_use_case = scoring_use_case or "general"
has_gpu = system.get("has_gpu", False) has_gpu = system.get("has_gpu", False)
gpu_vram = (system.get("gpu_vram_gb") or 0) if has_gpu else 0 gpu_vram = (system.get("gpu_vram_gb") or 0) if has_gpu else 0
gpu_count = system.get("gpu_count", 1) or 1 gpu_count = system.get("gpu_count", 1) or 1
@@ -242,6 +265,8 @@ def analyze_model(model, system, target_quant=None):
ctx = model.get("context_length", 4096) or 4096 ctx = model.get("context_length", 4096) or 4096
native_quant = model.get("quantization", "Q4_K_M") native_quant = model.get("quantization", "Q4_K_M")
if "nvfp4" in (model.get("name") or "").lower():
native_quant = "NVFP4"
preq = is_prequantized(model) preq = is_prequantized(model)
# GGUF models can't be sharded across GPUs — use single GPU VRAM # GGUF models can't be sharded across GPUs — use single GPU VRAM
@@ -260,10 +285,13 @@ def analyze_model(model, system, target_quant=None):
# Determine which quant to evaluate at # Determine which quant to evaluate at
if preq: if preq:
# AWQ/GPTQ/FP8/MLX come at a fixed bit-width. If the user picked a # AWQ/GPTQ/FP8/MLX come at a fixed bit-width. If the user picked a
# specific quant tier (e.g. Q8 → 8-bit), only keep prequant models whose # GGUF quant tier (Q4/Q8/etc.), do not treat a same-bit AWQ/GPTQ build
# native bit-width matches — otherwise selecting Q8 would still surface # as equivalent. "Q4" means llama.cpp/Ollama-style GGUF in this UI;
# AWQ-4bit models, mixing 4- and 8-bit in one view. # AWQ/GPTQ/FP8 are separate GPU-serving formats and must only appear
# when explicitly selected or when no quant filter is applied.
if target_quant: if target_quant:
if not any(target_quant.startswith(p) for p in ("AWQ-", "GPTQ-", "FP8", "NVFP4")):
return None
_tb, _nb = _quant_bits(target_quant), _quant_bits(native_quant) _tb, _nb = _quant_bits(target_quant), _quant_bits(native_quant)
if _tb and _nb and _tb != _nb: if _tb and _nb and _tb != _nb:
return None return None
@@ -300,7 +328,7 @@ def analyze_model(model, system, target_quant=None):
"parameter_count": model.get("parameter_count"), "parameter_count": model.get("parameter_count"),
"params_b": round(pb, 1), "params_b": round(pb, 1),
"is_moe": is_moe, "is_moe": is_moe,
"use_case": use_case, "use_case": model_use_case,
"fit_level": "too_tight", "fit_level": "too_tight",
"run_mode": "no_fit", "run_mode": "no_fit",
"quant": quant_to_try, "quant": quant_to_try,
@@ -334,12 +362,12 @@ def analyze_model(model, system, target_quant=None):
tps = _estimate_speed(model, quant, run_mode, system) tps = _estimate_speed(model, quant, run_mode, system)
q_score = _quality_score(model, quant, use_case) q_score = _quality_score(model, quant, score_use_case)
s_score = _speed_score(tps, use_case) s_score = _speed_score(tps, score_use_case)
f_score = _fit_score(required_gb, budget) f_score = _fit_score(required_gb, budget)
c_score = _context_score(fit_ctx, use_case) c_score = _context_score(fit_ctx, score_use_case)
wq, ws, wf, wc = USE_CASE_WEIGHTS.get(use_case, (0.45, 0.30, 0.15, 0.10)) wq, ws, wf, wc = USE_CASE_WEIGHTS.get(score_use_case, (0.45, 0.30, 0.15, 0.10))
composite = q_score * wq + s_score * ws + f_score * wf + c_score * wc composite = q_score * wq + s_score * ws + f_score * wf + c_score * wc
return { return {
@@ -348,7 +376,7 @@ def analyze_model(model, system, target_quant=None):
"parameter_count": model.get("parameter_count"), "parameter_count": model.get("parameter_count"),
"params_b": round(pb, 1), "params_b": round(pb, 1),
"is_moe": is_moe, "is_moe": is_moe,
"use_case": use_case, "use_case": model_use_case,
"fit_level": fit_level, "fit_level": fit_level,
"run_mode": run_mode, "run_mode": run_mode,
"quant": quant, "quant": quant,
@@ -419,21 +447,29 @@ def rank_models(system, use_case=None, limit=50, search=None, sort="score", quan
results.sort(key=sort_fn, reverse=(sort != "vram")) results.sort(key=sort_fn, reverse=(sort != "vram"))
return results[:limit] return results[:limit]
# If user picked a prequantized format (AWQ/FP8/GPTQ), filter to only those models # If user picked a prequantized format (AWQ/FP8/GPTQ/NVFP4), filter to only those models
filter_native = quant and any(quant.startswith(p) for p in ("AWQ-", "GPTQ-", "FP8")) filter_native = quant and any(quant.startswith(p) for p in ("AWQ-", "GPTQ-", "FP8", "NVFP4"))
system_backend = (system.get("backend") or "").lower() system_backend = (system.get("backend") or "").lower()
apple_silicon = system_backend in ("mps", "metal", "apple") apple_silicon = system_backend in ("mps", "metal", "apple")
rocm = system_backend == "rocm"
for m in models: for m in models:
native_q = m.get("quantization", "") native_q = m.get("quantization", "")
if "nvfp4" in (m.get("name") or "").lower():
native_q = "NVFP4"
# MLX-quantized models need the MLX runtime (mlx_lm), which Odysseus # MLX is Apple Silicon only. Hide MLX rows on non-Mac hardware scans,
# doesn't generate serve commands for — only llama.cpp/Ollama (Metal) # but leave them visible on Metal/MPS so Mac support is not broken.
# and vLLM/SGLang (CUDA). MLX repos ship no GGUF alternative, so they're if not apple_silicon and (native_q.startswith("mlx-") or "mlx" in (m.get("name") or "").lower()):
# unrunnable on every backend we support. Always drop them, on Apple continue
# Silicon too, so the Cookbook never recommends a model it can't serve.
if native_q.startswith("mlx-"): # ROCm support for vLLM/SGLang quantized safetensors is too brittle to
# recommend blindly in the default scan. Keep AWQ/GPTQ/FP8 discoverable
# only when the user explicitly picks that format from the quant filter;
# otherwise prefer GGUF/Q* entries that Odysseus can route through
# llama.cpp/Ollama without pretending "fits VRAM" means "servable".
if rocm and is_prequantized(m) and not filter_native:
continue continue
# On Apple Silicon the only serving engines are llama.cpp and Ollama, # On Apple Silicon the only serving engines are llama.cpp and Ollama,
@@ -443,7 +479,8 @@ def rank_models(system, use_case=None, limit=50, search=None, sort="score", quan
# default GGUF quant) and vLLM-only AWQ/GPTQ/FP8 builds alike. Without # default GGUF quant) and vLLM-only AWQ/GPTQ/FP8 builds alike. Without
# this the Cookbook recommends models the Mac can't run; on CUDA these # this the Cookbook recommends models the Mac can't run; on CUDA these
# stay visible because vLLM serves safetensors directly. # stay visible because vLLM serves safetensors directly.
if apple_silicon and not (m.get("is_gguf") or m.get("gguf_sources")): is_mlx = native_q.startswith("mlx-") or "mlx" in (m.get("name") or "").lower()
if apple_silicon and not (m.get("is_gguf") or m.get("gguf_sources") or is_mlx):
continue continue
# Format filter: AWQ tab → only AWQ models, FP8 tab → only FP8 models # Format filter: AWQ tab → only AWQ models, FP8 tab → only FP8 models
@@ -454,6 +491,8 @@ def rank_models(system, use_case=None, limit=50, search=None, sort="score", quan
continue continue
if quant.startswith("GPTQ") and not native_q.startswith("GPTQ"): if quant.startswith("GPTQ") and not native_q.startswith("GPTQ"):
continue continue
if quant.startswith("NVFP4") and not native_q.startswith("NVFP4"):
continue
if search: if search:
name = m.get("name", "").lower() name = m.get("name", "").lower()
@@ -461,7 +500,7 @@ def rank_models(system, use_case=None, limit=50, search=None, sort="score", quan
if search.lower() not in name and search.lower() not in provider: if search.lower() not in name and search.lower() not in provider:
continue continue
result = analyze_model(m, system, target_quant=quant) result = analyze_model(m, system, target_quant=quant, scoring_use_case=(use_case or "general"))
if result is None: if result is None:
continue continue
+7 -6
View File
@@ -5,7 +5,7 @@ import re
QUANT_HIERARCHY = ["Q8_0", "Q6_K", "Q5_K_M", "Q4_K_M", "Q3_K_M", "Q2_K"] QUANT_HIERARCHY = ["Q8_0", "Q6_K", "Q5_K_M", "Q4_K_M", "Q3_K_M", "Q2_K"]
QUANT_BPP = { QUANT_BPP = {
"F32": 4.0, "F16": 2.0, "BF16": 2.0, "FP8": 1.0, "F32": 4.0, "F16": 2.0, "BF16": 2.0, "FP8": 1.0, "NVFP4": 0.5,
"Q8_0": 1.05, "Q6_K": 0.80, "Q5_K_M": 0.68, "Q8_0": 1.05, "Q6_K": 0.80, "Q5_K_M": 0.68,
"Q4_K_M": 0.58, "Q4_0": 0.58, "Q3_K_M": 0.48, "Q2_K": 0.37, "Q4_K_M": 0.58, "Q4_0": 0.58, "Q3_K_M": 0.48, "Q2_K": 0.37,
"AWQ-4bit": 0.50, "AWQ-8bit": 1.0, "AWQ-4bit": 0.50, "AWQ-8bit": 1.0,
@@ -14,7 +14,7 @@ QUANT_BPP = {
} }
QUANT_SPEED_MULT = { QUANT_SPEED_MULT = {
"F16": 0.6, "BF16": 0.6, "FP8": 0.85, "F16": 0.6, "BF16": 0.6, "FP8": 0.85, "NVFP4": 1.1,
"Q8_0": 0.8, "Q6_K": 0.95, "Q5_K_M": 1.0, "Q8_0": 0.8, "Q6_K": 0.95, "Q5_K_M": 1.0,
"Q4_K_M": 1.15, "Q4_0": 1.15, "Q3_K_M": 1.25, "Q2_K": 1.35, "Q4_K_M": 1.15, "Q4_0": 1.15, "Q3_K_M": 1.25, "Q2_K": 1.35,
"AWQ-4bit": 1.2, "AWQ-8bit": 0.85, "AWQ-4bit": 1.2, "AWQ-8bit": 0.85,
@@ -23,7 +23,7 @@ QUANT_SPEED_MULT = {
} }
QUANT_QUALITY_PENALTY = { QUANT_QUALITY_PENALTY = {
"F16": 0.0, "BF16": 0.0, "FP8": 0.0, "F16": 0.0, "BF16": 0.0, "FP8": 0.0, "NVFP4": 0.0,
"Q8_0": 0.0, "Q6_K": -1.0, "Q5_K_M": -2.0, "Q8_0": 0.0, "Q6_K": -1.0, "Q5_K_M": -2.0,
"Q4_K_M": -5.0, "Q4_0": -5.0, "Q3_K_M": -8.0, "Q2_K": -12.0, "Q4_K_M": -5.0, "Q4_0": -5.0, "Q3_K_M": -8.0, "Q2_K": -12.0,
"AWQ-4bit": -3.0, "AWQ-8bit": 0.0, "AWQ-4bit": -3.0, "AWQ-8bit": 0.0,
@@ -32,7 +32,7 @@ QUANT_QUALITY_PENALTY = {
} }
QUANT_BYTES_PER_PARAM = { QUANT_BYTES_PER_PARAM = {
"F16": 2.0, "BF16": 2.0, "FP8": 1.0, "F16": 2.0, "BF16": 2.0, "FP8": 1.0, "NVFP4": 0.5,
"Q8_0": 1.0, "Q6_K": 0.75, "Q5_K_M": 0.625, "Q8_0": 1.0, "Q6_K": 0.75, "Q5_K_M": 0.625,
"Q4_K_M": 0.5, "Q4_0": 0.5, "Q3_K_M": 0.375, "Q2_K": 0.25, "Q4_K_M": 0.5, "Q4_0": 0.5, "Q3_K_M": 0.375, "Q2_K": 0.25,
"AWQ-4bit": 0.5, "AWQ-8bit": 1.0, "AWQ-4bit": 0.5, "AWQ-8bit": 1.0,
@@ -41,12 +41,13 @@ QUANT_BYTES_PER_PARAM = {
} }
# Pre-quantized formats that should NOT go through the GGUF quant hierarchy # Pre-quantized formats that should NOT go through the GGUF quant hierarchy
PREQUANTIZED_PREFIXES = ("AWQ-", "GPTQ-", "mlx-", "FP8") PREQUANTIZED_PREFIXES = ("AWQ-", "GPTQ-", "mlx-", "FP8", "NVFP4")
def is_prequantized(model): def is_prequantized(model):
q = model.get("quantization", "") q = model.get("quantization", "")
return any(q.startswith(p) for p in PREQUANTIZED_PREFIXES) name = (model.get("name") or "").lower()
return "nvfp4" in name or any(q.startswith(p) for p in PREQUANTIZED_PREFIXES)
def params_b(model): def params_b(model):
+5
View File
@@ -502,6 +502,11 @@ async def _direct_fallback(
) )
except asyncio.TimeoutError: except asyncio.TimeoutError:
return {"error": f"web_fetch: timed out fetching {url}", "exit_code": 1} return {"error": f"web_fetch: timed out fetching {url}", "exit_code": 1}
except Exception as e:
# Direct URL fetches can hit bot protection / auth walls
# (e.g. eBay 403). Treat that as a tool failure the model can
# reason around, not an uncaught chat-stream 500.
return {"error": f"web_fetch: {url}: {e}", "exit_code": 1}
err = result.get("error") err = result.get("error")
text = (result.get("content") or "").strip() text = (result.get("content") or "").strip()
title = result.get("title") or "" title = result.get("title") or ""
+253 -48
View File
@@ -27,6 +27,56 @@ import spinnerModule from './spinner.js';
// ── Error diagnosis ── // ── Error diagnosis ──
function _openCookbookDependencies(pkgName = '') {
const cookbook = window.cookbookModule;
if (cookbook && typeof cookbook.open === 'function') {
cookbook.open({ tab: 'Dependencies' });
} else {
document.getElementById('tool-cookbook-btn')?.click();
}
const wanted = String(pkgName || '').toLowerCase();
const tryHighlight = (attempt = 0) => {
const modal = document.getElementById('cookbook-modal');
const tab = modal?.querySelector('.cookbook-tab[data-backend="Dependencies"]');
if (tab && !tab.classList.contains('active')) tab.click();
const rows = [...document.querySelectorAll('#cookbook-deps-list [data-pkg-name]')];
if (!rows.length) {
if (attempt < 45) setTimeout(() => tryHighlight(attempt + 1), 100);
return;
}
if (!wanted) return;
const row = rows.find(r => {
const name = (r.dataset.pkgName || '').toLowerCase();
const pip = (r.dataset.depPip || '').toLowerCase();
return name === wanted || pip.includes(wanted) || wanted.includes(name);
});
if (row) {
row.scrollIntoView({ block: 'center' });
row.classList.add('cookbook-pkg-flash');
setTimeout(() => row.classList.remove('cookbook-pkg-flash'), 1800);
}
};
tryHighlight();
}
function _openServeEditFromDiagnosis(panel, fields = null) {
const task = panel?.closest?.('.cookbook-task');
if (!task) return;
task.dispatchEvent(new CustomEvent('cookbook:edit-serve', { bubbles: true, detail: { fields } }));
}
function _openCpuServeEdit(panel) {
_openServeEditFromDiagnosis(panel, {
backend: 'llamacpp',
gpus: '',
tp: '1',
gpu_mem: '0.80',
_forceBackend: true,
});
}
// Infer the gated base repo that single-file checkpoints need configs from // Infer the gated base repo that single-file checkpoints need configs from
function _inferBaseRepo(text) { function _inferBaseRepo(text) {
if (!text) return null; if (!text) return null;
@@ -218,6 +268,7 @@ export const ERROR_PATTERNS = [
pattern: /vllm.*command not found|No module named vllm/i, pattern: /vllm.*command not found|No module named vllm/i,
message: 'vLLM is not installed or not in PATH.', message: 'vLLM is not installed or not in PATH.',
fixes: [ fixes: [
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('vllm') },
{ label: 'Check environment is set', action: (panel) => { { label: 'Check environment is set', action: (panel) => {
const el = panel.querySelector('[data-field="env_type"]'); const el = panel.querySelector('[data-field="env_type"]');
if (el) { el.focus(); el.style.borderColor = 'var(--red)'; } if (el) { el.focus(); el.style.borderColor = 'var(--red)'; }
@@ -226,11 +277,21 @@ export const ERROR_PATTERNS = [
}, },
{ {
pattern: /sglang.*command not found|No module named sglang|SGLang is not installed/i, pattern: /sglang.*command not found|No module named sglang|SGLang is not installed/i,
message: 'SGLang is not installed or not in PATH. Open Cookbook → Dependencies and install sglang on this server.', message: 'SGLang is not installed or not in PATH.',
fixes: [ fixes: [
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('sglang') },
{ label: 'Copy install command', action: () => _copyText('python3 -m pip install "sglang[all]"') }, { label: 'Copy install command', action: () => _copyText('python3 -m pip install "sglang[all]"') },
], ],
}, },
{
pattern: /No accelerator \(CUDA, XPU, HPU, NPU, MUSA, MPS\) is available|Triton is not supported on current platform/i,
message: 'SGLang needs a visible GPU/accelerator on this server.',
suggestion: 'Suggested action: switch this serve config to llama.cpp for CPU/local serving, or choose a GPU server.',
fixes: [
{ label: 'Switch to llama.cpp', action: (panel) => _openCpuServeEdit(panel) },
{ label: 'Choose GPU server', action: (panel) => _openServeEditFromDiagnosis(panel) },
],
},
{ {
pattern: /flashinfer.*version.*does not match|flashinfer-cubin version/i, pattern: /flashinfer.*version.*does not match|flashinfer-cubin version/i,
message: 'FlashInfer version mismatch.', message: 'FlashInfer version mismatch.',
@@ -241,8 +302,12 @@ export const ERROR_PATTERNS = [
}, },
{ {
pattern: /torch\.cuda\.is_available\(\).*False|No CUDA runtime/i, pattern: /torch\.cuda\.is_available\(\).*False|No CUDA runtime/i,
message: 'CUDA not available in this environment.', message: 'vLLM needs a visible CUDA/ROCm GPU.',
fixes: [], suggestion: 'Suggested action: switch this serve config to llama.cpp for CPU/local serving, or choose a GPU server.',
fixes: [
{ label: 'Switch to llama.cpp', action: (panel) => _openCpuServeEdit(panel) },
{ label: 'Choose GPU server', action: (panel) => _openServeEditFromDiagnosis(panel) },
],
}, },
{ {
pattern: /Engine core initialization failed/i, pattern: /Engine core initialization failed/i,
@@ -295,17 +360,20 @@ export const ERROR_PATTERNS = [
}, },
{ {
pattern: /Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels\/layer/i, pattern: /Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels\/layer/i,
message: 'vLLM/Transformers kernel package mismatch.', message: 'Transformers/kernels package mismatch.',
fixes: [ fixes: [
{ label: 'Update vLLM/Transformers/kernels', action: (panel) => { { label: 'Repair kernel package', action: (panel) => {
const taskEl = panel.closest('.cookbook-task'); const taskEl = panel.closest('.cookbook-task');
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null; const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
const host = task?.remoteHost || ''; const host = task?.remoteHost || '';
const prefix = _buildEnvPrefix(); const prefix = _buildEnvPrefix();
const pipCmd = prefix ? prefix + ' python3 -m pip install -U vllm transformers kernels' : 'python3 -m pip install -U vllm transformers kernels'; const pipCmd = prefix
? prefix + ' python3 -m pip install --user --break-system-packages "kernels<0.15"'
: 'python3 -m pip install --user --break-system-packages "kernels<0.15"';
const cmd = host ? _sshCmd(host, pipCmd) : pipCmd; const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
_launchServeTask('update-vllm-stack', 'pip-update', cmd); _launchServeTask('repair-kernels', 'pip-update', cmd);
}}, }},
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('sglang') },
], ],
}, },
{ {
@@ -319,13 +387,24 @@ export const ERROR_PATTERNS = [
pattern: /llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'/i, pattern: /llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'/i,
message: 'llama-cpp-python server is not installed. Run: pip install "llama-cpp-python[server]"', message: 'llama-cpp-python server is not installed. Run: pip install "llama-cpp-python[server]"',
fixes: [ fixes: [
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('llama_cpp') },
{ label: 'Copy install command', action: () => _copyText('pip install "llama-cpp-python[server]"') }, { label: 'Copy install command', action: () => _copyText('pip install "llama-cpp-python[server]"') },
], ],
}, },
{
pattern: /CUDA Toolkit not found|Unable to find cudart library|missing:\s*CUDA_CUDART/i,
message: 'llama.cpp found nvcc, but the CUDA runtime library is missing.',
suggestion: 'Suggested action: relaunch with the updated runner so llama.cpp builds CPU-only, or install a complete CUDA toolkit/runtime on this server for GPU llama.cpp.',
fixes: [
{ label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) },
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('llama_cpp') },
],
},
{ {
pattern: /No module named ['"]?torch|No module named ['"]?diffusers|diffusers.*command not found/i, pattern: /No module named ['"]?torch|No module named ['"]?diffusers|diffusers.*command not found/i,
message: 'Diffusion serving needs PyTorch and diffusers. Install diffusers from Cookbook → Dependencies.', message: 'Diffusion serving needs PyTorch and diffusers. Install diffusers from Cookbook → Dependencies.',
fixes: [ fixes: [
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('diffusers') },
{ label: 'Copy install command', action: () => _copyText('python3 -m pip install "diffusers[torch]"') }, { label: 'Copy install command', action: () => _copyText('python3 -m pip install "diffusers[torch]"') },
], ],
}, },
@@ -402,10 +481,32 @@ export function _diagnose(text) {
return null; return null;
} }
function _diagnosisCopyBundle(task, diagnosis, sourceText, suggestionText) {
const lines = ['## Odysseus Cookbook troubleshooting'];
if (task) {
lines.push(
'',
'### Task',
`- ID: ${task.sessionId || task.id || 'unknown'}`,
`- Type: ${task.type || 'unknown'}`,
`- Status: ${task.status || 'unknown'}`,
`- Model: ${task.payload?.repo_id || task.name || 'unknown'}`,
`- Host: ${task.remoteHost || 'local'}${task.sshPort ? `:${task.sshPort}` : ''}`,
);
}
lines.push('', '### Diagnosis', diagnosis?.message || '(none)');
if (suggestionText) lines.push('', '### Suggested action', suggestionText.replace(/^Suggested action:\s*/i, ''));
const cmd = task?.payload?._cmd || '';
if (cmd) lines.push('', '### Launch command', '```bash', cmd, '```');
if (sourceText) lines.push('', '### Captured output', '```text', String(sourceText).trim(), '```');
return lines.join('\n');
}
export function _showDiagnosis(panel, diagnosis, sourceText) { export function _showDiagnosis(panel, diagnosis, sourceText) {
if (panel._lastDiagMsg === diagnosis.message) return; const wasCollapsed = panel._lastDiagMsg === diagnosis.message && panel._diagCollapsed;
if (panel._diagDismissed === diagnosis.message) return; // stay dismissed until new error if (panel._diagDismissed === diagnosis.message) return;
panel._lastDiagMsg = diagnosis.message; panel._lastDiagMsg = diagnosis.message;
panel._diagCollapsed = !!wasCollapsed;
let diag = panel.querySelector('.cookbook-diagnosis'); let diag = panel.querySelector('.cookbook-diagnosis');
if (!diag) { if (!diag) {
@@ -417,57 +518,161 @@ export function _showDiagnosis(panel, diagnosis, sourceText) {
} }
diag.classList.remove('hidden'); diag.classList.remove('hidden');
diag.innerHTML = ''; diag.innerHTML = '';
const taskEl = panel?.closest?.('.cookbook-task');
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
const fixes = [...(diagnosis.fixes || [])];
if (task?.type === 'serve' && task.payload?._cmd && !fixes.some(f => f.label === 'Edit serve')) {
fixes.push({ label: 'Edit serve', action: (p) => _openServeEditFromDiagnosis(p) });
}
const suggestionText = diagnosis.suggestion || (fixes.length
? `Suggested action: ${fixes[0].label}.`
: 'Suggested action: copy the error and adjust the serve settings.');
const header = document.createElement('div'); const header = document.createElement('div');
header.style.cssText = 'display:flex;align-items:center;justify-content:space-between;'; header.className = 'cookbook-diag-header';
const msg = document.createElement('div'); const fold = document.createElement('button');
msg.className = 'cookbook-diag-message'; fold.className = 'cookbook-diag-fold';
msg.textContent = diagnosis.message; fold.type = 'button';
header.appendChild(msg); fold.innerHTML = '<span class="cookbook-diag-chevron">▾</span><span>Error message:</span>';
header.appendChild(fold);
const copy = document.createElement('button');
copy.className = 'cookbook-diag-copy';
copy.type = 'button';
copy.title = 'Copy troubleshooting bundle';
copy.setAttribute('aria-label', 'Copy troubleshooting bundle');
copy.innerHTML = '<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg>';
copy.addEventListener('click', (e) => {
e.stopPropagation();
_copyText(_diagnosisCopyBundle(task, diagnosis, sourceText, suggestionText));
copy.classList.add('copied');
copy.innerHTML = '<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.6" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg>';
setTimeout(() => {
if (!copy.isConnected) return;
copy.classList.remove('copied');
copy.innerHTML = '<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg>';
}, 1200);
});
header.appendChild(copy);
const dismiss = document.createElement('button'); const dismiss = document.createElement('button');
dismiss.className = 'close-btn'; dismiss.className = 'cookbook-diag-dismiss';
dismiss.style.cssText = 'width:16px;height:16px;font-size:9px;flex-shrink:0;'; dismiss.type = 'button';
dismiss.textContent = '\u2715'; dismiss.title = 'Dismiss error';
dismiss.addEventListener('click', () => { panel._diagDismissed = diagnosis.message; _clearDiagnosis(panel); }); dismiss.setAttribute('aria-label', 'Dismiss error');
dismiss.textContent = '×';
dismiss.addEventListener('click', (e) => {
e.stopPropagation();
panel._diagDismissed = diagnosis.message;
_clearDiagnosis(panel);
});
header.appendChild(dismiss); header.appendChild(dismiss);
diag.appendChild(header); diag.appendChild(header);
if (diagnosis.fixes && diagnosis.fixes.length) { const body = document.createElement('div');
body.className = 'cookbook-diag-body';
body.classList.toggle('hidden', panel._diagCollapsed);
fold.querySelector('.cookbook-diag-chevron').textContent = panel._diagCollapsed ? '▸' : '▾';
const msg = document.createElement('div');
msg.className = 'cookbook-diag-message';
msg.textContent = diagnosis.message;
body.appendChild(msg);
const suggestion = document.createElement('div');
suggestion.className = 'cookbook-diag-suggestion';
suggestion.textContent = suggestionText;
body.appendChild(suggestion);
fold.addEventListener('click', (e) => {
e.stopPropagation();
panel._diagCollapsed = !panel._diagCollapsed;
body.classList.toggle('hidden', panel._diagCollapsed);
fold.querySelector('.cookbook-diag-chevron').textContent = panel._diagCollapsed ? '▸' : '▾';
});
diag.appendChild(body);
const runFix = async (fix, button, busyLabel = fix.label, onStart = null, onDone = null) => {
if (!fix || !button || button.dataset.busy) return;
button.dataset.busy = '1';
const _orig = button.textContent;
const wp = spinnerModule.createWhirlpool(12);
wp.element.style.cssText = 'display:inline-block;vertical-align:middle;width:12px;height:12px;margin-right:5px;';
button.textContent = '';
button.appendChild(wp.element);
const _lbl = document.createElement('span');
_lbl.textContent = busyLabel;
_lbl.style.verticalAlign = 'middle';
button.appendChild(_lbl);
try {
if (typeof onStart === 'function') onStart();
await fix.action(panel, sourceText);
} catch (err) {
console.error('[cookbook] diagnosis fix failed', err);
} finally {
if (button.isConnected) {
try { wp.destroy(); } catch {}
button.textContent = _orig;
delete button.dataset.busy;
}
if (typeof onDone === 'function') onDone();
}
};
if (fixes.length) {
const row = document.createElement('div'); const row = document.createElement('div');
row.className = 'cookbook-diag-fixes'; row.className = 'cookbook-diag-fixes';
for (const fix of diagnosis.fixes) {
const btn = document.createElement('button'); if (fixes.length <= 3) {
btn.className = 'cookbook-btn cookbook-diag-btn'; for (const fix of fixes) {
btn.textContent = fix.label; const btn = document.createElement('button');
btn.addEventListener('click', async () => { btn.className = 'cookbook-btn cookbook-diag-btn';
if (btn.dataset.busy) return; btn.type = 'button';
btn.dataset.busy = '1'; btn.textContent = fix.label;
// Spinner feedback while the fix runs (kill + relaunch takes a moment). btn.addEventListener('click', (e) => {
const _orig = btn.textContent; e.stopPropagation();
const wp = spinnerModule.createWhirlpool(12); runFix(fix, btn);
wp.element.style.cssText = 'display:inline-block;vertical-align:middle;width:12px;height:12px;margin-right:5px;'; });
btn.textContent = ''; row.appendChild(btn);
btn.appendChild(wp.element); }
const _lbl = document.createElement('span'); body.appendChild(row);
_lbl.textContent = _orig; return;
_lbl.style.verticalAlign = 'middle';
btn.appendChild(_lbl);
try {
await fix.action(panel, sourceText);
} catch (e) {
console.error('[cookbook] diagnosis fix failed', e);
} finally {
// Retries animate the whole card away (button goes with it). For fixes
// that leave the card in place, restore the label.
if (btn.isConnected) { try { wp.destroy(); } catch {} btn.textContent = _orig; delete btn.dataset.busy; }
}
});
row.appendChild(btn);
} }
diag.appendChild(row);
const wrap = document.createElement('div');
wrap.className = 'cookbook-diag-actions';
const trigger = document.createElement('button');
trigger.className = 'cookbook-btn cookbook-diag-action-trigger';
trigger.type = 'button';
trigger.textContent = 'Actions';
trigger.appendChild(document.createTextNode(' ▾'));
wrap.appendChild(trigger);
const menu = document.createElement('div');
menu.className = 'dropdown cookbook-diag-menu hidden';
for (const fix of fixes) {
const item = document.createElement('button');
item.type = 'button';
item.textContent = fix.label;
item.addEventListener('click', async (e) => {
e.stopPropagation();
if (item.dataset.busy || trigger.dataset.busy) return;
item.dataset.busy = '1';
await runFix(fix, trigger, fix.label, () => menu.classList.add('hidden'), () => delete item.dataset.busy);
});
menu.appendChild(item);
}
wrap.appendChild(menu);
trigger.addEventListener('click', (e) => {
e.stopPropagation();
if (trigger.dataset.busy) return;
document.querySelectorAll('.cookbook-diag-menu').forEach(m => {
if (m !== menu) m.classList.add('hidden');
});
menu.classList.toggle('hidden');
});
row.appendChild(wrap);
body.appendChild(row);
} }
} }
+2
View File
@@ -193,6 +193,8 @@ export function _renderGpuToggles(system) {
if (quantSel) { if (quantSel) {
if (count <= 1) { if (count <= 1) {
quantSel.value = 'Q4_K_M'; // RAM or 1 GPU -> Q4 sweet spot quantSel.value = 'Q4_K_M'; // RAM or 1 GPU -> Q4 sweet spot
} else if (String(system?.backend || '').toLowerCase() === 'rocm') {
quantSel.value = 'Q4_K_M'; // ROCm default stays GGUF/local-safe; AWQ is explicit only
} else { } else {
quantSel.value = 'AWQ-4bit'; // Multi-GPU -> AWQ for vLLM quantSel.value = 'AWQ-4bit'; // Multi-GPU -> AWQ for vLLM
} }
+54 -28
View File
@@ -260,12 +260,31 @@ export function _detectBackend(model) {
const q = (model.quant || '').toUpperCase(); const q = (model.quant || '').toUpperCase();
const sysBackend = String(_hwfitCache?.system?.backend || '').toLowerCase(); const sysBackend = String(_hwfitCache?.system?.backend || '').toLowerCase();
const isRocm = sysBackend === 'rocm'; const isRocm = sysBackend === 'rocm';
const isAppleSilicon = ['metal', 'mps', 'apple'].includes(sysBackend);
const _nm = `${model.repo_id || ''} ${model.path || ''} ${model.name || ''}`.toLowerCase();
if (!isAppleSilicon && (/\bmlx\b|mlx-|_mlx/i.test(_nm) || q.startsWith('MLX'))) {
return { backend: 'unsupported', label: 'Unsupported' };
}
const isAwqLike = /^AWQ|^GPTQ|^NVFP4/.test(q) || q === 'FP8' || /\b(awq|gptq|fp8|nvfp4)\b/i.test(_nm);
const isGgufLike = model.is_gguf || /^Q[2-8]/.test(q) || /^IQ/.test(q) || q === 'GGUF' || _nm.includes('gguf');
// Image gen models → diffusers // Image gen models → diffusers
if (model.is_image_gen || model.is_diffusion || model._tag === 'image') { if (model.is_image_gen || model.is_diffusion || model._tag === 'image') {
return { backend: 'diffusers', label: 'Diffusers' }; return { backend: 'diffusers', label: 'Diffusers' };
} }
// AWQ / GPTQ / FP8 are safetensors GPU-serving formats. Never route them
// through llama.cpp/Ollama just because the host is Mac/Windows; those engines
// need GGUF. The UI will warn/block on Metal where vLLM/SGLang aren't viable.
if (isAwqLike) {
return { backend: 'vllm', label: 'vLLM' };
}
// GGUF → llama.cpp/Ollama-compatible.
if (isGgufLike) {
return { backend: 'llamacpp', label: 'llama.cpp' };
}
// Windows → default to llama.cpp (no vLLM support on Windows) // Windows → default to llama.cpp (no vLLM support on Windows)
if (_isWindows()) { if (_isWindows()) {
return { backend: 'llamacpp', label: 'llama.cpp' }; return { backend: 'llamacpp', label: 'llama.cpp' };
@@ -278,19 +297,6 @@ export function _detectBackend(model) {
return { backend: 'llamacpp', label: 'llama.cpp' }; return { backend: 'llamacpp', label: 'llama.cpp' };
} }
// AWQ / GPTQ / FP8 → vLLM
if (/^AWQ|^GPTQ/.test(q) || q === 'FP8') {
return { backend: 'vllm', label: 'vLLM' };
}
// GGUF → llama.cpp. Match the quant tag OR a gguf hint in the repo/path/name:
// a raw .gguf file often has no quant field, which made it fall through to the
// vLLM default below.
const _nm = `${model.repo_id || ''} ${model.path || ''} ${model.name || ''}`.toLowerCase();
if (model.is_gguf || /^Q[2-8]/.test(q) || /^IQ/.test(q) || q === 'GGUF' || _nm.includes('gguf')) {
return { backend: 'llamacpp', label: 'llama.cpp' };
}
// ROCm/AMD machines should not blindly default HF safetensors models to // ROCm/AMD machines should not blindly default HF safetensors models to
// vLLM. SGLang is the safer OpenAI-compatible default for plain HF text // vLLM. SGLang is the safer OpenAI-compatible default for plain HF text
// repos there; llama.cpp still wins above whenever the model is GGUF. // repos there; llama.cpp still wins above whenever the model is GGUF.
@@ -1020,6 +1026,16 @@ function _wireTabEvents(body) {
// Download input // Download input
const dlBtn = document.getElementById('cookbook-dl-btn'); const dlBtn = document.getElementById('cookbook-dl-btn');
const dlInput = document.getElementById('cookbook-dl-repo'); const dlInput = document.getElementById('cookbook-dl-repo');
const dlCardToggle = document.getElementById('cookbook-download-card-toggle');
const dlCardBody = document.getElementById('cookbook-download-card-body');
const dlCardArrow = document.getElementById('cookbook-download-card-arrow');
if (dlCardToggle && dlCardBody) {
dlCardToggle.addEventListener('click', () => {
const isOpen = dlCardBody.style.display !== 'none';
dlCardBody.style.display = isOpen ? 'none' : 'block';
if (dlCardArrow) dlCardArrow.style.transform = isOpen ? 'rotate(0deg)' : 'rotate(90deg)';
});
}
if (dlBtn && dlInput) { if (dlBtn && dlInput) {
function _stripHfUrl(input) { function _stripHfUrl(input) {
let repo = input.trim(); let repo = input.trim();
@@ -1099,8 +1115,12 @@ function _wireTabEvents(body) {
if (hfToggle && hfList) { if (hfToggle && hfList) {
let _loaded = false; let _loaded = false;
// Per-server VRAM cache so we don't re-probe on every expand // Per-server VRAM cache so we don't re-probe on every expand
const _vramCache = {}; const _hwCache = {};
async function _getSelectedServerVram() { function _hfModelLooksAwqLike(m) {
const text = `${m?.repo_id || ''} ${(m?.tags || []).join(' ')}`.toLowerCase();
return /\b(awq|gptq|fp8|4bit|int4)\b/.test(text);
}
async function _getSelectedServerHw() {
// Prefer the "What Fits" dropdown (the main control that shows hardware); // Prefer the "What Fits" dropdown (the main control that shows hardware);
// fall back to the download dropdown. This is the server the list ranks for. // fall back to the download dropdown. This is the server the list ranks for.
const dlSrv = document.getElementById('hwfit-server-select') || document.getElementById('hwfit-dl-server'); const dlSrv = document.getElementById('hwfit-server-select') || document.getElementById('hwfit-dl-server');
@@ -1117,7 +1137,7 @@ function _wireTabEvents(body) {
} }
} }
const cacheKey = host || 'local'; const cacheKey = host || 'local';
if (_vramCache[cacheKey] !== undefined) return _vramCache[cacheKey]; if (_hwCache[cacheKey]) return _hwCache[cacheKey];
// Fetch system info for this server from hwfit // Fetch system info for this server from hwfit
try { try {
const qp = new URLSearchParams(); const qp = new URLSearchParams();
@@ -1127,13 +1147,13 @@ function _wireTabEvents(body) {
const r = await fetch(`/api/hwfit/system?${qp}`); const r = await fetch(`/api/hwfit/system?${qp}`);
if (r.ok) { if (r.ok) {
const sys = await r.json(); const sys = await r.json();
const v = sys?.gpu_vram_gb || 0; const hw = { vram: sys?.gpu_vram_gb || 0, backend: String(sys?.backend || '').toLowerCase() };
_vramCache[cacheKey] = v; _hwCache[cacheKey] = hw;
return v; return hw;
} }
} catch {} } catch {}
_vramCache[cacheKey] = 0; _hwCache[cacheKey] = { vram: 0, backend: '' };
return 0; return _hwCache[cacheKey];
} }
async function _loadLatest() { async function _loadLatest() {
// Match the Dependencies loader: whirlpool spinner + text label so the // Match the Dependencies loader: whirlpool spinner + text label so the
@@ -1152,7 +1172,8 @@ function _wireTabEvents(body) {
} catch { } catch {
hfList.innerHTML = '<div class="hwfit-loading">Scanning models…</div>'; hfList.innerHTML = '<div class="hwfit-loading">Scanning models…</div>';
} }
const vram = await _getSelectedServerVram(); const hwInfo = await _getSelectedServerHw();
const vram = hwInfo.vram || 0;
try { try {
let lastErr = ''; let lastErr = '';
const _fetchLatest = async (v) => { const _fetchLatest = async (v) => {
@@ -1168,6 +1189,9 @@ function _wireTabEvents(body) {
if (!models.length && vram > 0) { if (!models.length && vram > 0) {
models = await _fetchLatest(0); models = await _fetchLatest(0);
} }
if (['rocm', 'metal', 'mps', 'apple', 'generic', 'cpu'].includes(hwInfo.backend)) {
models = models.filter(m => !_hfModelLooksAwqLike(m));
}
if (!models.length) { if (!models.length) {
// Distinguish "the HF API failed" from "nothing matched" so an outage // Distinguish "the HF API failed" from "nothing matched" so an outage
// doesn't masquerade as no-fitting-models. // doesn't masquerade as no-fitting-models.
@@ -1351,10 +1375,12 @@ function _renderRecipes() {
// Search group // Search group
html += '<div class="cookbook-group" data-backend-group="Search" style="flex:0 0 auto;">'; html += '<div class="cookbook-group" data-backend-group="Search" style="flex:0 0 auto;">';
html += '<div class="admin-card" style="display:flex;flex-direction:column;overflow:hidden;">'; html += '<div class="admin-card" style="display:flex;flex-direction:column;overflow:hidden;">';
html += '<div style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;">'; html += '<button type="button" id="cookbook-download-card-toggle" style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;width:100%;background:transparent;border:0;padding:0;color:inherit;text-align:left;cursor:pointer;">';
html += '<h2 style="margin:0;padding:0;line-height:1;">Download</h2>'; html += '<h2 style="margin:0;padding:0;line-height:1;">Download</h2>';
html += '</div>'; html += '<span id="cookbook-download-card-arrow" style="margin-left:auto;display:inline-block;transition:transform 0.15s;font-size:13px;line-height:1;">\u25B8</span>';
html += '<p class="memory-desc doclib-desc" style="margin-top:6px;">Download from <a href="https://huggingface.co/models" target="_blank" rel="noopener" style="color:var(--accent,var(--red));text-decoration:none;"><svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-1px;margin-right:1px;"><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"/><polyline points="15 3 21 3 21 9"/><line x1="10" y1="14" x2="21" y2="3"/></svg>HuggingFace</a> by pasting model link, or download directly in the Scan section below.</p>'; html += '</button>';
html += '<div id="cookbook-download-card-body" style="display:none;">';
html += '<p class="memory-desc doclib-desc" style="margin-top:6px;">Download directly from Scan, or paste a HuggingFace model link.</p>';
html += '<div class="hwfit-container" id="hwfit-container">'; html += '<div class="hwfit-container" id="hwfit-container">';
// Section 1: Settings // Section 1: Settings
@@ -1383,7 +1409,7 @@ function _renderRecipes() {
// silently sending downloads to the wrong server. An empty selection means Local; the user // silently sending downloads to the wrong server. An empty selection means Local; the user
// chooses a remote server explicitly via the dropdown. // chooses a remote server explicitly via the dropdown.
// Download input // Manual download input
html += `<div style="margin-top:7px;margin-bottom:2px;display:flex;gap:4px;align-items:center;">`; html += `<div style="margin-top:7px;margin-bottom:2px;display:flex;gap:4px;align-items:center;">`;
if (_es.servers.length > 1) { if (_es.servers.length > 1) {
html += `<select class="cookbook-field-input hwfit-dl-server" id="hwfit-dl-server" style="height:28px;position:relative;top:0px;">`; html += `<select class="cookbook-field-input hwfit-dl-server" id="hwfit-dl-server" style="height:28px;position:relative;top:0px;">`;
@@ -1399,7 +1425,7 @@ function _renderRecipes() {
html += `<button class="cookbook-btn cookbook-dl-btn" id="cookbook-dl-btn">Download</button>`; html += `<button class="cookbook-btn cookbook-dl-btn" id="cookbook-dl-btn">Download</button>`;
html += `</div>`; html += `</div>`;
// Latest HF models that fit — collapsible card list // Latest HF models that fit — collapsible card list
html += `<div style="margin-top:2px;position:relative;top:-8px;">`; html += `<div style="margin-top:5px;position:relative;top:-3px;">`;
html += `<div style="display:flex;gap:4px;align-items:center;">`; html += `<div style="display:flex;gap:4px;align-items:center;">`;
html += `<button type="button" class="memory-toolbar-btn" id="cookbook-hf-latest-toggle" style="flex:1;text-align:left;height:26px;display:flex;align-items:center;gap:6px;border-radius:4px;">`; html += `<button type="button" class="memory-toolbar-btn" id="cookbook-hf-latest-toggle" style="flex:1;text-align:left;height:26px;display:flex;align-items:center;gap:6px;border-radius:4px;">`;
html += `<span id="cookbook-hf-latest-arrow" style="display:inline-block;transition:transform 0.15s;pointer-events:none;">\u25B8</span>`; html += `<span id="cookbook-hf-latest-arrow" style="display:inline-block;transition:transform 0.15s;pointer-events:none;">\u25B8</span>`;
@@ -1411,7 +1437,7 @@ function _renderRecipes() {
html += `</div>`; html += `</div>`;
// Search section // Search section
html += '</div></div></div>'; html += '</div></div></div></div>';
html += '<div class="cookbook-group" data-backend-group="Search">'; html += '<div class="cookbook-group" data-backend-group="Search">';
html += '<div class="admin-card" style="flex:1;display:flex;flex-direction:column;overflow:hidden;">'; html += '<div class="admin-card" style="flex:1;display:flex;flex-direction:column;overflow:hidden;">';
html += '<div style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;">'; html += '<div style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;">';
+3
View File
@@ -86,6 +86,9 @@ function _ggufIncludePattern(model, source) {
function _missingGgufMessage(model) { function _missingGgufMessage(model) {
const name = model?.name || 'this model'; const name = model?.name || 'this model';
if (/\bnvfp4\b/i.test(name)) {
return `${name} is an NVIDIA NVFP4 checkpoint, not a GGUF download. Pick the base model row with an Unsloth GGUF source, or paste the GGUF repo directly.`;
}
return `No GGUF source is configured for ${name}. Pick a model with a GGUF source, or paste the GGUF repo in Download.`; return `No GGUF source is configured for ${name}. Pick a model with a GGUF source, or paste the GGUF repo in Download.`;
} }
+173 -44
View File
@@ -34,12 +34,106 @@ function _taskBadge(task) {
return { text: _statusLabel(task.status, task.type), cls: 'cookbook-task-' + task.status }; return { text: _statusLabel(task.status, task.type), cls: 'cookbook-task-' + task.status };
} }
function _canClearTask(task) {
if (!task || task.status === 'running') return false;
if (task.type === 'serve' && (task.status === 'ready' || task._serveReady)) return false;
if (task.type === 'download' && task.status === 'done' && !task.payload?._dep) return false;
return ['done', 'stopped', 'error', 'crashed', 'failed'].includes(task.status);
}
function _clearPillLabel(task) {
return 'clear';
}
function _shouldOfferCrashReport(task) { function _shouldOfferCrashReport(task) {
if (!task) return false; if (!task) return false;
if (task._unreachable && task.type === 'serve') return true; if (task._unreachable && task.type === 'serve') return true;
return ['error', 'crashed', 'failed'].includes(task.status); return ['error', 'crashed', 'failed'].includes(task.status);
} }
function _serveTaskLooksAwqOnLocalBackend(task, outputText = '') {
const repo = `${task?.payload?.repo_id || ''} ${task?.name || ''}`.toLowerCase();
const cmd = `${task?.payload?._cmd || ''} ${outputText || ''}`.toLowerCase();
return /\b(awq|gptq|fp8)\b/.test(repo) && /(llama-server|llama_cpp\.server|ollama|ggml_cuda_enable_unified_memory)/.test(cmd);
}
function _serveTaskLooksAwqWithoutUsableAccelerator(task, outputText = '') {
const repo = `${task?.payload?.repo_id || ''} ${task?.name || ''}`.toLowerCase();
const out = String(outputText || '').toLowerCase();
return /\b(awq|gptq|fp8)\b/.test(repo)
&& /(no accelerator|no cuda runtime|failed to infer device type|triton is not supported|0 active driver)/i.test(out);
}
async function _openDownloadForGgufTask(task) {
const raw = task?.payload?.repo_id || task?.name || '';
const modelName = String(raw)
.split('/').pop()
.replace(/[-_](?:AWQ|GPTQ|FP8|4bit|8bit|Int4|Int8).*$/i, '')
.replace(/[-_]+$/g, '')
|| String(raw).split('/').pop()
|| raw;
const cookbook = window.cookbookModule;
if (cookbook && typeof cookbook.open === 'function') {
cookbook.open({ tab: 'Search' });
} else {
document.getElementById('tool-cookbook-btn')?.click();
}
setTimeout(async () => {
const modal = document.getElementById('cookbook-modal');
const tab = modal?.querySelector('.cookbook-tab[data-backend="Search"]');
if (tab && !tab.classList.contains('active')) tab.click();
const search = document.getElementById('hwfit-search');
if (search) {
search.value = modelName;
search.dispatchEvent(new Event('input', { bubbles: true }));
search.focus();
}
const quant = document.getElementById('hwfit-quant');
if (quant) {
quant.value = 'Q4_K_M';
quant.dispatchEvent(new Event('change', { bubbles: true }));
}
try {
const hwfit = await import('./cookbook-hwfit.js');
if (typeof hwfit._hwfitFetch === 'function') hwfit._hwfitFetch(true);
} catch {}
}, 80);
}
function _terminalServeDiagnosis(task, outputText) {
const out = String(outputText || task?.output || '');
if (!task || task.type !== 'serve' || !['stopped', 'error', 'crashed', 'failed'].includes(task.status) || !out.trim()) return null;
if (_serveTaskLooksAwqOnLocalBackend(task, out)) {
return {
message: 'AWQ/GPTQ/FP8 cannot be served through llama.cpp/Ollama unified-memory mode.',
suggestion: 'Suggested action: use vLLM/SGLang on a compatible CUDA/ROCm GPU server, or download a GGUF version for llama.cpp/Ollama/unified-memory serving.',
fixes: [
{ label: 'Find GGUF download', action: () => _openDownloadForGgufTask(task) },
{ label: 'Edit serve', action: (panel) => _openServeEditForTask(task) },
],
};
}
if (_serveTaskLooksAwqWithoutUsableAccelerator(task, out)) {
return {
message: 'AWQ/GPTQ/FP8 needs a working vLLM/SGLang accelerator path; this server did not expose one.',
suggestion: 'Suggested action: choose a CUDA/ROCm server where vLLM/SGLang can see the GPU, or download a GGUF version and serve it with llama.cpp/Ollama.',
fixes: [
{ label: 'Find GGUF download', action: () => _openDownloadForGgufTask(task) },
{ label: 'Edit serve', action: (panel) => _openServeEditForTask(task) },
],
};
}
return _diagnose(out) || {
message: /Native llama-server not found|building llama-server|llama\.cpp/i.test(out)
? 'llama.cpp build stopped before the server became reachable.'
: 'Serve stopped before the model became reachable.',
suggestion: /Native llama-server not found|building llama-server|llama\.cpp/i.test(out)
? 'Suggested action: copy the troubleshooting bundle, then edit serve settings. For the quickest local/CPU path, use Ollama or a prebuilt llama-server; source builds can take several minutes and fail if build dependencies are incomplete.'
: 'Suggested action: copy the troubleshooting bundle, then edit serve settings or relaunch with a CPU/backend fallback.',
fixes: [{ label: 'Edit serve', action: (panel) => _openServeEditForTask(task) }],
};
}
function _redactCrashReportText(text) { function _redactCrashReportText(text) {
if (!text) return ''; if (!text) return '';
return String(text) return String(text)
@@ -173,6 +267,23 @@ export function _parseServePhase(snapshot) {
if (/Ollama API ready on port\s+\d+/i.test(flat)) { if (/Ollama API ready on port\s+\d+/i.test(flat)) {
return { phase: 'ready', status: 'ready' }; return { phase: 'ready', status: 'ready' };
} }
const llamaBuildMatches = [...flat.matchAll(/\[\s*(\d{1,3})%\]\s*(?:Building|Linking)/gi)];
if (llamaBuildMatches.length) {
const pct = Math.min(100, parseInt(llamaBuildMatches[llamaBuildMatches.length - 1][1], 10));
return { phase: `building llama.cpp ${pct}%`, status: 'running', pct };
}
if (/Native llama-server not found|building from source/i.test(flat)) {
if (/Cloning into ['"]?llama\.cpp/i.test(flat) && !/Receiving objects:\s*100%/i.test(flat)) {
return { phase: 'cloning llama.cpp', status: 'running' };
}
if (/Configuring incomplete|CMake Error/i.test(flat)) {
return {};
}
if (/CMAKE_BUILD_TYPE|Detecting CXX|Found Threads|Including CPU backend|CUDA nvcc found|building llama-server/i.test(flat)) {
return { phase: 'configuring llama.cpp', status: 'running' };
}
return { phase: 'building llama.cpp', status: 'running' };
}
// HTTP access logs (e.g. GET /v1/models 200 OK) mean the server is up // HTTP access logs (e.g. GET /v1/models 200 OK) mean the server is up
if (/(?:GET|POST)\s+\/[^\s]*\s+HTTP\/[\d.]+"\s*\d{3}/.test(flat)) { if (/(?:GET|POST)\s+\/[^\s]*\s+HTTP\/[\d.]+"\s*\d{3}/.test(flat)) {
return { phase: 'idle', status: 'ready' }; return { phase: 'idle', status: 'ready' };
@@ -341,8 +452,24 @@ async function _startQueuedDownload(task) {
// ── Task CRUD ── // ── Task CRUD ──
function _serveOutputLooksReady(task) {
const out = String(task?.output || '');
return !!task?._serveReady
|| /Application startup complete/i.test(out)
|| /Ollama API ready on port\s+\d+/i.test(out)
|| /(?:GET|POST)\s+\/[^\s]*\s+HTTP\/[\d.]+"\s*2\d\d/i.test(out);
}
function _normalizeTaskForDisplay(task) {
if (!task || typeof task !== 'object') return task;
if (task.type === 'serve' && task.status === 'done' && !_serveOutputLooksReady(task)) {
return { ...task, status: 'error' };
}
return task;
}
export function _loadTasks() { export function _loadTasks() {
try { return JSON.parse(localStorage.getItem(TASKS_KEY)) || []; } try { return (JSON.parse(localStorage.getItem(TASKS_KEY)) || []).map(_normalizeTaskForDisplay); }
catch { return []; } catch { return []; }
} }
@@ -876,7 +1003,7 @@ export async function _serveAutoFix(panel, envVar) {
// Edit button, but optionally with a modified command (used by the diagnosis // Edit button, but optionally with a modified command (used by the diagnosis
// "Retry with X" buttons so a retry lands in the editable Serve panel with the // "Retry with X" buttons so a retry lands in the editable Serve panel with the
// adjusted setting, instead of blindly relaunching). // adjusted setting, instead of blindly relaunching).
async function _openServeEditForTask(task, cmdOverride) { async function _openServeEditForTask(task, cmdOverride, fieldOverrides = null) {
const repo = task.payload?.repo_id; const repo = task.payload?.repo_id;
if (!repo) { uiModule.showToast('No model info on this task'); return; } if (!repo) { uiModule.showToast('No model info on this task'); return; }
const cmd = cmdOverride || task.payload?._cmd; const cmd = cmdOverride || task.payload?._cmd;
@@ -884,6 +1011,9 @@ async function _openServeEditForTask(task, cmdOverride) {
let fields = cmdOverride let fields = cmdOverride
? _parseServeCmdToFields(cmd) ? _parseServeCmdToFields(cmd)
: (task.payload?._fields || (cmd ? _parseServeCmdToFields(cmd) : null)); : (task.payload?._fields || (cmd ? _parseServeCmdToFields(cmd) : null));
if (fieldOverrides && typeof fieldOverrides === 'object') {
fields = { ...(fields || {}), ...fieldOverrides };
}
// Switch the active server to the one this serve ran on (mirrors _openEdit). // Switch the active server to the one this serve ran on (mirrors _openEdit).
const _tHost = task.remoteHost || ''; const _tHost = task.remoteHost || '';
_envState.remoteHost = _tHost; _envState.remoteHost = _tHost;
@@ -1352,8 +1482,8 @@ export function _renderRunningTab() {
const host = btn.dataset.clearServer; const host = btn.dataset.clearServer;
if (!await window.styledConfirm(`Clear finished tasks on ${_serverName(host)}?`, { confirmText: 'Clear' })) return; if (!await window.styledConfirm(`Clear finished tasks on ${_serverName(host)}?`, { confirmText: 'Clear' })) return;
const allTasks = _loadTasks(); const allTasks = _loadTasks();
const toRemove = allTasks.filter(t => (t.remoteHost || '') === host && t.status !== 'running'); const toRemove = allTasks.filter(t => (t.remoteHost || '') === host && _canClearTask(t));
const remaining = allTasks.filter(t => (t.remoteHost || '') !== host || t.status === 'running'); const remaining = allTasks.filter(t => (t.remoteHost || '') !== host || !_canClearTask(t));
_saveTasks(remaining); _saveTasks(remaining);
// Fade/slide each finished card out (same exit as the per-card clear) // Fade/slide each finished card out (same exit as the per-card clear)
// instead of yanking them instantly. // instead of yanking them instantly.
@@ -1443,16 +1573,19 @@ export function _renderRunningTab() {
const _bdg = _taskBadge(task); const _bdg = _taskBadge(task);
badge.textContent = _bdg.text; badge.textContent = _bdg.text;
badge.className = 'cookbook-task-status' + (_bdg.cls ? ' ' + _bdg.cls : ''); badge.className = 'cookbook-task-status' + (_bdg.cls ? ' ' + _bdg.cls : '');
badge.style.display = isDone ? 'none' : ''; // hidden — type chip carries it badge.style.display = '';
} }
// Indicator: spinning wave while running, green check when finished. // Indicator: spinning wave while running, green check when finished.
const wave = el.querySelector('.cookbook-task-wave'); const wave = el.querySelector('.cookbook-task-wave');
if (wave) wave.style.display = task.status === 'running' ? '' : 'none'; if (wave) wave.style.display = task.status === 'running' ? '' : 'none';
// Model downloads (which have a Serve → button) don't get a clear pill —
// pressing Serve clears them. Dep installs / serve tasks keep it.
const check = el.querySelector('.cookbook-task-check'); const check = el.querySelector('.cookbook-task-check');
const _showClear = isDone && !(task.type === 'download' && !task.payload?._dep); if (check) {
if (check) check.style.display = _showClear ? '' : 'none'; check.style.display = _canClearTask(task) ? '' : 'none';
const label = check.querySelector('.cookbook-task-done-label');
if (label) label.textContent = _clearPillLabel(task);
}
const terminalDiag = _terminalServeDiagnosis(task, el.querySelector('.cookbook-output-pre')?.textContent || task.output || '');
if (terminalDiag) _showDiagnosis(el, terminalDiag, el.querySelector('.cookbook-output-pre')?.textContent || task.output || '');
} }
if (!task) { if (!task) {
if (el._uptimeInterval) { clearInterval(el._uptimeInterval); el._uptimeInterval = null; } if (el._uptimeInterval) { clearInterval(el._uptimeInterval); el._uptimeInterval = null; }
@@ -1476,11 +1609,8 @@ export function _renderRunningTab() {
<div class="cookbook-task-header"> <div class="cookbook-task-header">
<span class="cookbook-task-type${(task.status === 'done' && task.type === 'download') ? ' cookbook-task-type-done' : ''}" data-type="${esc(task.type)}">${esc((task.status === 'done' && task.type === 'download') ? 'finished' : task.type)}</span> <span class="cookbook-task-type${(task.status === 'done' && task.type === 'download') ? ' cookbook-task-type-done' : ''}" data-type="${esc(task.type)}">${esc((task.status === 'done' && task.type === 'download') ? 'finished' : task.type)}</span>
<span class="cookbook-task-name">${modelLogo(task.name)}${esc(task.name)}</span> <span class="cookbook-task-name">${modelLogo(task.name)}${esc(task.name)}</span>
<span class="cookbook-task-status ${_bdg.cls}" style="display:${task.status === 'done' ? 'none' : ''}"${_bdgTitle}>${esc(_bdg.text)}</span> <span class="cookbook-task-indicator"><span class="cookbook-task-wave" style="display:${task.status === 'running' ? '' : 'none'}"></span><span class="cookbook-task-check" title="Clear" style="display:${_canClearTask(task) ? '' : 'none'}"><svg class="cookbook-task-check-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="#50fa7b" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg><svg class="cookbook-task-clear-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg><span class="cookbook-task-done-label">${esc(_clearPillLabel(task))}</span><span class="cookbook-task-clear-label">clear</span></span></span>
${task.type === 'serve' && task.payload?._cmd ? '<button class="cookbook-task-edit-btn" title="Edit settings &amp; relaunch"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"/><path d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"/></svg></button>' : ''} <span class="cookbook-task-status ${_bdg.cls}"${_bdgTitle}>${esc(_bdg.text)}</span>
${task.type === 'serve' && task.payload?._cmd ? '<button class="cookbook-task-save-btn" title="Save preset"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M19 21H5a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h11l5 5v11a2 2 0 0 1-2 2z"/><polyline points="17 21 17 13 7 13 7 21"/><polyline points="7 3 7 8 15 8"/></svg></button>' : ''}
<span class="cookbook-task-indicator"><span class="cookbook-task-wave" style="display:${task.status === 'running' ? '' : 'none'}"></span><span class="cookbook-task-check" title="Clear" style="display:${(task.status === 'done' && !(task.type === 'download' && !task.payload?._dep)) ? '' : 'none'}"><svg class="cookbook-task-check-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="#50fa7b" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg><svg class="cookbook-task-clear-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg><span class="cookbook-task-done-label">done</span><span class="cookbook-task-clear-label">clear</span></span></span>
${task.type === 'download' && !task.payload?._dep && task.status === 'done' ? `<span class="cookbook-task-status cookbook-task-done">finished</span>` : ''}
<button class="cookbook-task-menu-btn" title="Actions">&#8942;</button> <button class="cookbook-task-menu-btn" title="Actions">&#8942;</button>
</div> </div>
<div class="cookbook-task-sub"><span class="cookbook-task-session">${esc(task.sessionId)}</span><span class="cookbook-task-uptime" style="display:${((task.type === 'serve' || task.type === 'download') && task.status === 'running') ? '' : 'none'}"></span></div> <div class="cookbook-task-sub"><span class="cookbook-task-session">${esc(task.sessionId)}</span><span class="cookbook-task-uptime" style="display:${((task.type === 'serve' || task.type === 'download') && task.status === 'running') ? '' : 'none'}"></span></div>
@@ -1490,6 +1620,9 @@ export function _renderRunningTab() {
const _waveEl = el.querySelector('.cookbook-task-wave'); const _waveEl = el.querySelector('.cookbook-task-wave');
if (_waveEl && task.status === 'running') _registerWaveEl(_waveEl); if (_waveEl && task.status === 'running') _registerWaveEl(_waveEl);
const terminalDiag = _terminalServeDiagnosis(task, task.output || '');
if (terminalDiag) _showDiagnosis(el, terminalDiag, task.output || '');
const _uptimeEl = el.querySelector('.cookbook-task-uptime'); const _uptimeEl = el.querySelector('.cookbook-task-uptime');
if (_uptimeEl && (task.type === 'serve' || task.type === 'download') && task.status === 'running') { if (_uptimeEl && (task.type === 'serve' || task.type === 'download') && task.status === 'running') {
const _startedAt = task.ts || Date.now(); const _startedAt = task.ts || Date.now();
@@ -1506,35 +1639,12 @@ export function _renderRunningTab() {
} }
// Re-open the Serve panel for this model, pre-filled with the EXACT // Re-open the Serve panel for this model, pre-filled with the EXACT
// settings this instance launched with, and on the SERVER it runs on // settings this instance launched with, and on the SERVER it runs on.
// shared by the edit icon button and the ⋮ "Edit settings" menu item.
const _openEdit = () => _openServeEditForTask(task); const _openEdit = () => _openServeEditForTask(task);
const editBtn = el.querySelector('.cookbook-task-edit-btn'); el.addEventListener('cookbook:edit-serve', (e) => {
if (editBtn) { e.stopPropagation();
editBtn.addEventListener('click', (e) => { e.stopPropagation(); _openEdit(); }); _openServeEditForTask(task, null, e.detail?.fields || null);
} });
// Wire save icon button
const saveBtn = el.querySelector('.cookbook-task-save-btn');
if (saveBtn) {
saveBtn.addEventListener('click', async (e) => {
e.stopPropagation();
// Tell them it's already saved up front (often true now that working
// configs auto-save) instead of after they've typed a name.
if (_loadPresets().some(p => p.cmd === task.payload?._cmd)) {
uiModule.showToast('Already saved');
return;
}
const label = (await uiModule.styledPrompt('Name this config so you can recall it later.', {
title: 'Save Config', defaultValue: task.name, placeholder: 'e.g. 8-bit, fast', confirmText: 'Save',
}) || '').trim();
if (!label) return;
if (!_saveTaskAsPreset(task, label)) { uiModule.showToast('Already saved'); return; }
saveBtn.innerHTML = '<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="#50fa7b" stroke-width="2.5" stroke-linecap="round"><polyline points="20 6 9 17 4 12"/></svg>';
uiModule.showToast(`Saved "${label}"`);
setTimeout(() => { saveBtn.style.display = 'none'; }, 1500);
});
}
// Finished download → an explicit "Serve →" button jumps straight to the // Finished download → an explicit "Serve →" button jumps straight to the
// Serve tab with this model pre-selected (on the server it downloaded to). // Serve tab with this model pre-selected (on the server it downloaded to).
@@ -2018,12 +2128,31 @@ async function _reconnectTask(el, task) {
if (badge) { badge.textContent = _statusLabel('error', task.type); badge.className = 'cookbook-task-status cookbook-task-error'; } if (badge) { badge.textContent = _statusLabel('error', task.type); badge.className = 'cookbook-task-status cookbook-task-error'; }
_showCookbookNotif(true); _showCookbookNotif(true);
} else { } else {
const looksSuccessful = !lastOutput.includes('DOWNLOAD_FAILED') && (lastOutput.includes('DONE') || lastOutput.includes('100%') || lastOutput.includes('Application startup complete') || lastOutput.includes('/snapshots/') || lastOutput.includes('Download complete') || lastOutput.includes('DOWNLOAD_OK')); const downloadLooksSuccessful = !lastOutput.includes('DOWNLOAD_FAILED')
if (!lastOutput.trim() || (task.type === 'download' && !looksSuccessful)) { && (lastOutput.includes('DONE') || lastOutput.includes('100%') || lastOutput.includes('/snapshots/') || lastOutput.includes('Download complete') || lastOutput.includes('DOWNLOAD_OK'));
const serveLooksReady = task.type === 'serve' && _serveOutputLooksReady({ ...task, output: lastOutput });
const looksSuccessful = task.type === 'download' ? downloadLooksSuccessful : serveLooksReady;
if (!lastOutput.trim() || !looksSuccessful) {
_updateTask(task.sessionId, { status: 'crashed' }); _updateTask(task.sessionId, { status: 'crashed' });
el.dataset.status = 'crashed'; el.dataset.status = 'crashed';
const badge = el.querySelector('.cookbook-task-status'); const badge = el.querySelector('.cookbook-task-status');
if (badge) { badge.textContent = _statusLabel('crashed', task.type); badge.className = 'cookbook-task-status cookbook-task-crashed'; } if (badge) { badge.textContent = _statusLabel('crashed', task.type); badge.className = 'cookbook-task-status cookbook-task-crashed'; }
if (task.type === 'serve') {
const diag = _diagnose(lastOutput) || {
message: _serveTaskLooksAwqOnLocalBackend(task, lastOutput)
? 'AWQ/GPTQ/FP8 cannot be served through llama.cpp/Ollama unified-memory mode.'
: /Native llama-server not found|building llama-server|llama\.cpp/i.test(lastOutput)
? 'llama.cpp build stopped before the server became reachable.'
: 'Serve stopped before the model became reachable.',
suggestion: _serveTaskLooksAwqOnLocalBackend(task, lastOutput)
? 'Suggested action: use vLLM/SGLang on a compatible CUDA/ROCm GPU server, or download a GGUF version for llama.cpp/Ollama/unified-memory serving.'
: /Native llama-server not found|building llama-server|llama\.cpp/i.test(lastOutput)
? 'Suggested action: copy the troubleshooting bundle, then edit serve settings. For the quickest local/CPU path, use Ollama or a prebuilt llama-server; source builds can take several minutes and fail if build dependencies are incomplete.'
: 'Suggested action: copy the troubleshooting bundle, then edit serve settings or relaunch with a CPU/backend fallback.',
fixes: [{ label: 'Edit serve', action: (panel) => _openServeEditForTask(task) }],
};
_showDiagnosis(el, diag, lastOutput);
}
_showCookbookNotif(true); _showCookbookNotif(true);
} else { } else {
_updateTask(task.sessionId, { status: 'done' }); _updateTask(task.sessionId, { status: 'done' });
+59 -9
View File
@@ -41,6 +41,48 @@ const SERVE_STATE_KEY = 'cookbook-serve-state';
let _cachedAllModels = []; let _cachedAllModels = [];
function _repoLooksAwqLike(model, repo) {
const q = String(model?.quant || '').toUpperCase();
const n = `${repo || ''} ${model?.repo_id || ''} ${model?.name || ''} ${model?.path || ''}`.toLowerCase();
return /^AWQ|^GPTQ/.test(q) || q === 'FP8' || /\b(awq|gptq|fp8)\b/i.test(n);
}
function _repoLooksGgufLike(model, repo) {
const q = String(model?.quant || '').toUpperCase();
const n = `${repo || ''} ${model?.repo_id || ''} ${model?.name || ''} ${model?.path || ''}`.toLowerCase();
return !!model?.is_gguf || /^Q[2-8]/.test(q) || /^IQ/.test(q) || q === 'GGUF' || n.includes('gguf');
}
function _serveBackendWarning(model, repo, backend, fields = {}) {
const awqLike = _repoLooksAwqLike(model, repo);
const ggufLike = _repoLooksGgufLike(model, repo);
if (awqLike && (backend === 'llamacpp' || backend === 'ollama')) {
return {
title: 'AWQ needs vLLM or SGLang',
body: 'This model looks like AWQ/GPTQ/FP8 safetensors. llama.cpp and Ollama need GGUF files, so this backend cannot serve it. Choose vLLM/SGLang on a CUDA/ROCm GPU server, or download a GGUF version for llama.cpp/Ollama.',
};
}
if (awqLike && _isMetal() && (backend === 'vllm' || backend === 'sglang')) {
return {
title: 'AWQ is not a unified-memory path',
body: 'This model looks like AWQ/GPTQ/FP8 safetensors. AWQ is for vLLM/SGLang on CUDA/ROCm-style GPU servers, not local unified-memory llama.cpp/Ollama serving. For unified memory, download a GGUF model and use llama.cpp/Ollama.',
};
}
if (awqLike && fields.unified_mem) {
return {
title: 'AWQ is not a unified-memory path',
body: 'This model looks like AWQ/GPTQ/FP8 safetensors, but unified-memory local serving expects GGUF. Use vLLM/SGLang on a compatible GPU server, or download a GGUF version for llama.cpp/Ollama.',
};
}
if (ggufLike && (backend === 'vllm' || backend === 'sglang')) {
return {
title: 'GGUF needs llama.cpp or Ollama',
body: 'This model looks like GGUF. vLLM/SGLang expect HuggingFace safetensors-style repos. Choose llama.cpp/Ollama for GGUF, or download a safetensors model for vLLM/SGLang.',
};
}
return null;
}
function _hasOwn(obj, key) { function _hasOwn(obj, key) {
return Object.prototype.hasOwnProperty.call(obj || {}, key); return Object.prototype.hasOwnProperty.call(obj || {}, key);
} }
@@ -324,12 +366,6 @@ function _rerenderCachedModels() {
c.style.alignItems = ''; c.style.alignItems = '';
}); });
// Capture grid height
const _tb = list.closest('.admin-card')?.querySelector('.memory-toolbar');
const _tbH = _tb ? _tb.offsetHeight : 0;
list.style.minHeight = (list.offsetHeight + _tbH) + 'px';
list.style.maxHeight = (list.offsetHeight + _tbH) + 'px';
const shortName = repo.split('/').pop(); const shortName = repo.split('/').pop();
const _es = _envState; const _es = _envState;
// The venv set per-server in Settings (server.envPath). Used as the venv // The venv set per-server in Settings (server.envPath). Used as the venv
@@ -350,8 +386,13 @@ function _rerenderCachedModels() {
? _byRepo[repo] ? _byRepo[repo]
: (_lastUsed || (_isLegacyFlat ? _allSs : {})); : (_lastUsed || (_isLegacyFlat ? _allSs : {}));
const detectedBackend = _detectBackend(m).backend; const detectedBackend = _detectBackend(m).backend;
const defaultBackend = detectedBackend; const _allowedBackends = new Set(_isWindows()
const savedMatchesBackend = (ss.backend || 'vllm') === detectedBackend; ? ['llamacpp']
: (_isMetal() ? ['llamacpp', 'ollama'] : ['vllm', 'sglang', 'llamacpp', 'ollama', 'diffusers']));
const defaultBackend = (ss._forceBackend && ss.backend && _allowedBackends.has(ss.backend))
? ss.backend
: detectedBackend;
const savedMatchesBackend = !!ss._forceBackend || (ss.backend || 'vllm') === detectedBackend;
const sv = (k, def) => (ss[k] !== undefined && savedMatchesBackend) ? ss[k] : def; const sv = (k, def) => (ss[k] !== undefined && savedMatchesBackend) ? ss[k] : def;
const defaultTp = defaultBackend === 'llamacpp' ? '1' : sv('tp', '1'); const defaultTp = defaultBackend === 'llamacpp' ? '1' : sv('tp', '1');
const detectedGpuIds = _allGpuIds(_getGpuToggleTotal?.()); const detectedGpuIds = _allGpuIds(_getGpuToggleTotal?.());
@@ -1200,7 +1241,16 @@ function _rerenderCachedModels() {
if (el.type === 'checkbox') serveState[el.dataset.field] = el.checked; if (el.type === 'checkbox') serveState[el.dataset.field] = el.checked;
else serveState[el.dataset.field] = el.value; else serveState[el.dataset.field] = el.value;
}); });
serveState.backend = (_detectBackend(m).backend) || serveState.backend || 'vllm'; serveState.backend = serveState.backend || (_detectBackend(m).backend) || 'vllm';
const backendWarning = _serveBackendWarning(m, repo, serveState.backend, serveState);
if (backendWarning) {
await window.styledConfirm(backendWarning.body, {
title: backendWarning.title,
confirmText: 'Edit settings',
cancelText: 'Close',
});
return;
}
// Save in the { _byRepo, _lastUsed } schema — no legacy flat keys at // Save in the { _byRepo, _lastUsed } schema — no legacy flat keys at
// the root so per-model state doesn't leak between models. // the root so per-model state doesn't leak between models.
try { try {
+4 -2
View File
@@ -2253,8 +2253,9 @@ function _renderActivityEntry(entry) {
const hue = _categoryHue(entry.taskName, entry.kind); const hue = _categoryHue(entry.taskName, entry.kind);
// CSS vars feed the colored title + accent stripe. // CSS vars feed the colored title + accent stripe.
const styleVars = `--cat-hue:${hue};`; const styleVars = `--cat-hue:${hue};`;
const _runningPlaceholder = /^(Starting…|Starting\.\.\.|_Running…_|_Running\.\.\._|_Queued\b)/i.test((entry.result || '').trim());
const hasResult = !!(entry.result && entry.result.trim() && entry.status !== 'running' && entry.status !== 'queued'); const hasResult = !!(entry.result && entry.result.trim() && entry.status !== 'running' && entry.status !== 'queued');
const hasRunningProgress = !!(entry.result && entry.result.trim() && (entry.status === 'running' || entry.status === 'queued')); const hasRunningProgress = !!(entry.result && entry.result.trim() && !_runningPlaceholder && (entry.status === 'running' || entry.status === 'queued'));
// "Open in chat" only makes sense for runs whose result is a real assistant // "Open in chat" only makes sense for runs whose result is a real assistant
// message (Prompt / Research tasks). Action/event runs are just log lines // message (Prompt / Research tasks). Action/event runs are just log lines
// (e.g. "No recent emails", "Tidied N memories") — for those, replace the // (e.g. "No recent emails", "Tidied N memories") — for those, replace the
@@ -2299,9 +2300,10 @@ function _renderActivityEntry(entry) {
let rightHtml; let rightHtml;
if (_isRunning) { if (_isRunning) {
const isQueued = entry.status === 'queued'; const isQueued = entry.status === 'queued';
const label = isQueued ? 'Queued' : 'Running';
// Initial elapsed for the first paint; the 1s interval below keeps it live. // Initial elapsed for the first paint; the 1s interval below keeps it live.
const startMs = entry.ts ? new Date(entry.ts).getTime() : Date.now(); const startMs = entry.ts ? new Date(entry.ts).getTime() : Date.now();
const stale = !isQueued && (Date.now() - startMs) > 30 * 60 * 1000;
const label = isQueued ? 'Queued' : stale ? 'Still running' : 'Running';
const elapsedInit = isQueued ? '' : `<span class="task-log-running-elapsed" data-since="${startMs}">${_fmtElapsed(Date.now() - startMs)}</span>`; const elapsedInit = isQueued ? '' : `<span class="task-log-running-elapsed" data-since="${startMs}">${_fmtElapsed(Date.now() - startMs)}</span>`;
const forceBtn = isQueued && entry.taskId ? `<button class="task-log-force-run" type="button" title="Start now in parallel, bypassing the queue" style="border:0;background:transparent;box-shadow:none;margin-left:5px;padding:0;width:12px;height:12px;display:inline-flex;align-items:center;justify-content:center;font-size:10px;line-height:1;color:inherit;opacity:.8;"><svg width="9" height="9" viewBox="0 0 24 24" fill="currentColor" style="display:block;"><polygon points="6 4 20 12 6 20 6 4"/></svg></button>` : ''; const forceBtn = isQueued && entry.taskId ? `<button class="task-log-force-run" type="button" title="Start now in parallel, bypassing the queue" style="border:0;background:transparent;box-shadow:none;margin-left:5px;padding:0;width:12px;height:12px;display:inline-flex;align-items:center;justify-content:center;font-size:10px;line-height:1;color:inherit;opacity:.8;"><svg width="9" height="9" viewBox="0 0 24 24" fill="currentColor" style="display:block;"><polygon points="6 4 20 12 6 20 6 4"/></svg></button>` : '';
const stopBtn = entry.taskId ? `<button class="task-log-stop" type="button" title="Stop this task"><svg width="9" height="9" viewBox="0 0 24 24" fill="currentColor"><rect x="6" y="6" width="12" height="12" rx="1"/></svg></button>` : ''; const stopBtn = entry.taskId ? `<button class="task-log-stop" type="button" title="Stop this task"><svg width="9" height="9" viewBox="0 0 24 24" fill="currentColor"><rect x="6" y="6" width="12" height="12" rx="1"/></svg></button>` : '';
+138 -21
View File
@@ -5363,19 +5363,20 @@ body.bg-pattern-sparkles {
#compare-model-overlay .modal-header h4 { #compare-model-overlay .modal-header h4 {
pointer-events: none; pointer-events: none;
} }
/* Compare modal sizes to content the global .modal-content max-height /* Compare model selector: keep manually-resized/tiny windows contained.
+ .modal-body overflow combo makes BOTH the outer card and the inner Picker dropdowns are appended to document.body, so the card itself can
body scrollable, so even when the content fits the viewport you get clip and scroll without cropping the dropdown list. */
a stray vertical scrollbar. Drop the cap and disable inner scroll
here; if the viewport is genuinely tiny the modal still won't exceed
it because it's centered and the parent .modal flex layout shrinks. */
#compare-model-overlay .modal-content { #compare-model-overlay .modal-content {
max-height: none; display: flex;
overflow: visible; flex-direction: column;
max-height: min(720px, calc(100dvh - 48px));
overflow: hidden;
min-height: 180px;
} }
#compare-model-overlay .modal-body { #compare-model-overlay .modal-body {
overflow: visible; overflow: auto;
flex: 0 0 auto; flex: 1 1 auto;
min-height: 0;
} }
.vis-hint { .vis-hint {
font-size: 10px; font-size: 10px;
@@ -6955,6 +6956,8 @@ pre { background: var(--code-bg, var(--hl-bg, #282c34)) !important; }
.compare-mode-tabs { .compare-mode-tabs {
display: flex; display: flex;
gap: 4px; gap: 4px;
flex-wrap: wrap;
min-width: 0;
} }
/* Type tabs match Mode toggles 1:1 (same flex column layout, same metrics) */ /* Type tabs match Mode toggles 1:1 (same flex column layout, same metrics) */
.compare-mode-tab { .compare-mode-tab {
@@ -19015,7 +19018,7 @@ body.gallery-selecting .gallery-dl-btn,
align-items: center; align-items: center;
gap: 3px; gap: 3px;
position: relative; position: relative;
top: 2px; top: 0;
cursor: pointer; cursor: pointer;
padding: 1px 6px 1px 4px; padding: 1px 6px 1px 4px;
border-radius: 9px; border-radius: 9px;
@@ -19024,22 +19027,17 @@ body.gallery-selecting .gallery-dl-btn,
} }
.cookbook-task-check svg { flex-shrink: 0; } .cookbook-task-check svg { flex-shrink: 0; }
.cookbook-task-check:hover { background: color-mix(in srgb, var(--red, #ff5555) 18%, transparent); } .cookbook-task-check:hover { background: color-mix(in srgb, var(--red, #ff5555) 18%, transparent); }
/* Shows "done" (green) normally; on hover the icon + label swap to a red / /* Terminal task clear pill. */
"clear" to reveal it's a dismiss action. */
.cookbook-task-done-label, .cookbook-task-done-label,
.cookbook-task-clear-label { .cookbook-task-clear-label {
font-size: 9px; font-size: 9px;
line-height: 1; line-height: 1;
text-transform: lowercase; text-transform: lowercase;
} }
.cookbook-task-done-label { color: var(--green, #50fa7b); } .cookbook-task-done-label { color: var(--red, #ff5555); }
.cookbook-task-clear-label { display: none; color: var(--red, #ff5555); } .cookbook-task-clear-label { display: none; }
.cookbook-task-check:hover .cookbook-task-done-label { display: none; } .cookbook-task-check-ico { display: none; }
.cookbook-task-check:hover .cookbook-task-clear-label { display: inline; } .cookbook-task-clear-ico { display: inline; }
/* Default: show the green check. On hover: swap to a red ✕ to signal "clear". */
.cookbook-task-clear-ico { display: none; }
.cookbook-task-check:hover .cookbook-task-check-ico { display: none; }
.cookbook-task-check:hover .cookbook-task-clear-ico { display: inline; }
/* "Serve" button on a finished download green pill matching the "running" / /* "Serve" button on a finished download green pill matching the "running" /
finished badge (it sits next to the green FINISHED chip + check). */ finished badge (it sits next to the green FINISHED chip + check). */
.cookbook-task-serve-btn { .cookbook-task-serve-btn {
@@ -19583,17 +19581,136 @@ body.gallery-selecting .gallery-dl-btn,
border: 1px solid color-mix(in srgb, var(--color-error) 30%, transparent); border: 1px solid color-mix(in srgb, var(--color-error) 30%, transparent);
border-radius: 6px; border-radius: 6px;
} }
.cookbook-diag-header {
display: flex;
align-items: center;
gap: 7px;
position: relative;
top: -4px;
margin-bottom: -4px;
}
.cookbook-diag-fold {
display: inline-flex;
align-items: center;
gap: 5px;
padding: 0;
min-height: 0;
border: 0;
background: transparent;
color: var(--color-error);
font: inherit;
font-size: 11px;
font-weight: 700;
cursor: pointer;
margin-right: auto;
}
.cookbook-diag-fold:hover {
background: transparent;
color: var(--color-error);
opacity: 0.85;
}
.cookbook-diag-chevron {
display: inline-block;
width: 10px;
font-size: 10px;
}
.cookbook-diag-copy {
border: 0;
background: transparent;
color: var(--fg-muted);
padding: 0 2px;
width: 18px;
height: 18px;
min-height: 18px;
cursor: pointer;
display: inline-flex;
align-items: center;
justify-content: center;
}
.cookbook-diag-copy:hover {
background: transparent;
color: var(--fg);
}
.cookbook-diag-copy.copied {
color: var(--green, #50fa7b);
}
.cookbook-diag-copy svg {
display: block;
}
.cookbook-diag-dismiss {
border: 0;
background: transparent;
color: var(--fg-muted);
padding: 0;
width: 16px;
height: 18px;
min-height: 18px;
line-height: 16px;
font-size: 13px;
cursor: pointer;
display: inline-flex;
align-items: center;
justify-content: center;
position: relative;
top: -2px;
}
.cookbook-diag-dismiss:hover {
background: transparent;
color: var(--color-error);
}
.cookbook-diag-body {
margin-top: 7px;
}
.cookbook-diag-message { .cookbook-diag-message {
font-size: 12px; font-size: 12px;
font-weight: 600; font-weight: 600;
color: var(--color-error); color: var(--color-error);
margin-bottom: 4px;
margin-left: 2px;
user-select: text;
}
.cookbook-diag-suggestion {
font-size: 11px;
line-height: 1.35;
color: var(--fg-muted);
margin-bottom: 8px; margin-bottom: 8px;
margin-left: 2px;
user-select: text;
} }
.cookbook-diag-fixes { .cookbook-diag-fixes {
display: flex; display: flex;
flex-wrap: wrap; flex-wrap: wrap;
gap: 6px; gap: 6px;
} }
.cookbook-diag-actions {
position: relative;
display: inline-flex;
}
.cookbook-diag-action-trigger {
font-size: 11px;
padding: 4px 10px;
min-height: 24px;
background: var(--panel);
border: 1px solid color-mix(in srgb, var(--color-error) 40%, transparent);
color: var(--fg);
}
.cookbook-diag-action-trigger:hover {
border-color: var(--color-error);
background: color-mix(in srgb, var(--color-error) 12%, transparent);
}
.cookbook-diag-menu {
position: absolute;
left: 0;
top: calc(100% + 4px);
min-width: 180px;
z-index: 80;
}
.cookbook-diag-menu button {
width: 100%;
justify-content: flex-start;
text-align: left;
white-space: nowrap;
}
.cookbook-diag-btn { .cookbook-diag-btn {
font-size: 11px; font-size: 11px;
padding: 4px 10px; padding: 4px 10px;