Cookbook launch and gallery upload fixes

This commit is contained in:
pewdiepie-archdaemon
2026-06-22 01:49:15 +00:00
parent 75f04bc088
commit 92daf4e560
15 changed files with 1047 additions and 135 deletions
+13
View File
@@ -2446,6 +2446,17 @@ def setup_cookbook_routes() -> APIRouter:
disk_tasks = on_disk.get("tasks") or [] if isinstance(on_disk, dict) else []
incoming_tasks = data.get("tasks") if isinstance(data.get("tasks"), list) else []
incoming_removed = data.get("removedTasks") if isinstance(data.get("removedTasks"), dict) else {}
disk_removed = on_disk.get("removedTasks") if isinstance(on_disk, dict) and isinstance(on_disk.get("removedTasks"), dict) else {}
removed_tasks = {**disk_removed, **incoming_removed}
data["removedTasks"] = removed_tasks
removed_ids = set(removed_tasks.keys())
if removed_ids:
incoming_tasks = [
t for t in incoming_tasks
if not (isinstance(t, dict) and t.get("sessionId") in removed_ids)
]
data["tasks"] = incoming_tasks
# Anti-poisoning guard: a stale browser tab can keep POSTing a
# download task as status='done' from before the strict-finish
# fix landed, undoing any server-side correction. For each
@@ -2483,6 +2494,8 @@ def setup_cookbook_routes() -> APIRouter:
sid = t.get("sessionId")
if not sid or sid in incoming_ids:
continue # client's version wins
if sid in removed_ids:
continue # intentional cross-device clear/remove
ts = t.get("ts") or 0
if isinstance(ts, (int, float)) and (now_ms - ts) <= RACE_WINDOW_MS:
preserved.append(t)
+58 -4
View File
@@ -714,6 +714,16 @@ def _effective_endpoint_kind(ep: Any, base_url: str) -> str:
return "auto"
def _is_loading_model_response(resp: Any) -> bool:
if getattr(resp, "status_code", None) != 503:
return False
try:
body = resp.text or ""
except Exception:
body = ""
return "loading model" in body.lower()
def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> List[str]:
"""Probe a base URL's /models endpoint and return list of model IDs.
@@ -778,6 +788,9 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
models.append(_e)
return [m for m in models if _is_chat_model(m)]
except httpx.HTTPStatusError as e:
if e.response is not None and _is_loading_model_response(e.response):
logger.info(f"Endpoint still loading model at {url}")
return []
if api_key:
status = e.response.status_code if e.response is not None else "unknown"
logger.warning(f"Failed to probe {url} with API key: HTTP {status}")
@@ -827,6 +840,15 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
or "ollama" in (parsed_base.hostname or "").lower()
)
def _is_loading_model_response(r) -> bool:
if getattr(r, "status_code", None) != 503:
return False
try:
body = r.text or ""
except Exception:
body = ""
return "loading model" in body.lower()
def _result_from_response(r) -> Dict[str, Any]:
if 300 <= r.status_code < 400:
loc = r.headers.get("location", "")
@@ -843,6 +865,13 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
"status_code": r.status_code,
"error": None,
}
if _is_loading_model_response(r):
return {
"reachable": True,
"loading": True,
"status_code": r.status_code,
"error": "Loading model",
}
return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code}"}
last_error: Optional[str] = None
@@ -1427,7 +1456,7 @@ def setup_model_routes(model_discovery):
t0 = _time.time()
ping = _ping_endpoint(base, ep.api_key, timeout=1.5)
entry["latency_ms"] = round((_time.time() - t0) * 1000)
entry["status"] = "online" if ping.get("reachable") or cached_count else "offline"
entry["status"] = "loading" if ping.get("loading") else ("online" if ping.get("reachable") or cached_count else "offline")
entry["error"] = ping.get("error")
entry["model_count"] = cached_count or (len(ANTHROPIC_MODELS) if provider == "anthropic" else 0)
except Exception as e:
@@ -1606,7 +1635,32 @@ def setup_model_routes(model_discovery):
ping_timeout = 10.0 if _classify_endpoint(base_for_ping, kind_for_ping) == "local" else 3.5
ping = _ping_endpoint(r.base_url, r.api_key, timeout=ping_timeout)
if ping.get("reachable"):
status = "empty"
status = "loading" if ping.get("loading") else "empty"
if ping.get("loading"):
base = _normalize_base(r.base_url)
kind = _effective_endpoint_kind(r, base)
results.append({
"id": r.id,
"name": r.name,
"base_url": r.base_url,
"has_key": bool(r.api_key),
"api_key_fingerprint": _api_key_fingerprint(r.api_key),
"is_enabled": r.is_enabled,
"models": visible,
"pinned_models": pinned,
"hidden_count": len(hidden),
"online": True,
"status": status,
"ping_error": (ping or {}).get("error") if ping else None,
"model_type": getattr(r, "model_type", None) or "llm",
"supports_tools": getattr(r, "supports_tools", None),
"endpoint_kind": kind,
"category": _classify_endpoint(base, kind),
"model_refresh_mode": _endpoint_refresh_mode(r, kind),
"model_refresh_interval": getattr(r, "model_refresh_interval", None),
"model_refresh_timeout": getattr(r, "model_refresh_timeout", None),
})
continue
# Best-effort: if the probe came back reachable, try
# to populate cached_models in the background so the
# NEXT picker load shows "online" instead of "empty".
@@ -1859,7 +1913,7 @@ def setup_model_routes(model_discovery):
"models": _merge_model_ids(model_ids, _pinned),
"pinned_models": _pinned,
"online": bool(model_ids) or bool(_pinned) or bool(ping.get("reachable")),
"status": "online" if (model_ids or _pinned) else ("empty" if ping.get("reachable") else "offline"),
"status": "online" if (model_ids or _pinned) else ("loading" if ping.get("loading") else ("empty" if ping.get("reachable") else "offline")),
"ping_error": ping.get("error") if ping else None,
"endpoint_kind": requested_kind,
"category": _classify_endpoint(base_url, requested_kind),
@@ -1888,7 +1942,7 @@ def setup_model_routes(model_discovery):
return {
"base_url": base_url,
"online": bool(models) or bool(ping.get("reachable")),
"status": "online" if models else ("empty" if ping.get("reachable") else "offline"),
"status": "online" if models else ("loading" if ping.get("loading") else ("empty" if ping.get("reachable") else "offline")),
"ping_error": ping.get("error") if ping else None,
"models": models,
"count": len(models),
+2 -2
View File
@@ -1108,7 +1108,7 @@ def setup_shell_routes() -> APIRouter:
{
"name": "llama_cpp",
"pip": "llama-cpp-python[server]",
"desc": "Serve GGUF models via llama.cpp",
"desc": "Great for single-GPU or CPU inference with GGUF models",
"category": "LLM",
"target": "remote",
# Build-toolchain prereqs. Cookbook's launch bootstrap
@@ -1129,7 +1129,7 @@ def setup_shell_routes() -> APIRouter:
{
"name": "vllm",
"pip": "vllm",
"desc": "High-throughput LLM serving engine",
"desc": "Great for high-throughput multi-GPU inference",
"category": "LLM",
"target": "remote",
},
+76 -3
View File
@@ -3,11 +3,16 @@ import os
import time
import json
import asyncio
import shutil
import uuid
from pathlib import Path
from fastapi import APIRouter, Request, File, UploadFile, HTTPException
from typing import List
import logging
from core.middleware import require_admin
from core.database import SessionLocal, GalleryImage
from src.auth_helpers import effective_user
from src.constants import GENERATED_IMAGES_DIR
from src.upload_handler import count_recent_uploads
logger = logging.getLogger(__name__)
@@ -50,6 +55,69 @@ def setup_upload_routes(upload_handler):
raise HTTPException(404, "File not found")
raise HTTPException(404, "File not found")
def _promote_chat_image_to_gallery(meta: dict, owner: str | None) -> str | None:
"""Make chat-uploaded images visible in Gallery without changing chat storage."""
is_image_file = getattr(upload_handler, "is_image_file", None)
if not callable(is_image_file):
return None
if not is_image_file(meta.get("name", ""), meta.get("mime", "")):
return None
source_path = meta.get("path")
if not source_path or not os.path.isfile(source_path):
return None
db = SessionLocal()
try:
file_hash = meta.get("hash")
if file_hash:
q = db.query(GalleryImage).filter(
GalleryImage.file_hash == file_hash,
GalleryImage.is_active == True, # noqa: E712
)
if owner:
q = q.filter(GalleryImage.owner == owner)
existing = q.first()
if existing:
return existing.id
image_dir = Path(GENERATED_IMAGES_DIR)
image_dir.mkdir(parents=True, exist_ok=True)
ext = Path(meta.get("name") or source_path).suffix.lower()
if ext not in {".png", ".jpg", ".jpeg", ".webp", ".gif"}:
mime_ext = {
"image/png": ".png",
"image/jpeg": ".jpg",
"image/jpg": ".jpg",
"image/webp": ".webp",
"image/gif": ".gif",
}.get(meta.get("mime", ""))
ext = mime_ext or ".png"
filename = f"{uuid.uuid4().hex[:12]}{ext}"
dest_path = image_dir / filename
shutil.copy2(source_path, dest_path)
image_id = str(uuid.uuid4())
db.add(GalleryImage(
id=image_id,
filename=filename,
prompt=meta.get("name") or "Chat upload",
model="chat-upload",
owner=owner,
file_hash=file_hash,
width=meta.get("width"),
height=meta.get("height"),
file_size=meta.get("size"),
))
db.commit()
return image_id
except Exception as e:
db.rollback()
logger.warning("Failed to add chat image upload to gallery: %s", e)
return None
finally:
db.close()
@router.post("")
async def api_upload(request: Request, files: List[UploadFile] = File(...)):
@@ -78,8 +146,10 @@ def setup_upload_routes(upload_handler):
for u in files:
try:
meta = upload_handler.save_upload(u, client_ip, owner=effective_user(request))
out.append({
owner = effective_user(request)
meta = upload_handler.save_upload(u, client_ip, owner=owner)
gallery_id = _promote_chat_image_to_gallery(meta, owner)
item = {
"id": meta["id"],
"name": meta["name"],
"mime": meta["mime"],
@@ -89,7 +159,10 @@ def setup_upload_routes(upload_handler):
"width": meta.get("width"),
"height": meta.get("height"),
"is_duplicate": meta.get("is_duplicate", False)
})
}
if gallery_id:
item["gallery_id"] = gallery_id
out.append(item)
except HTTPException:
raise
except Exception as e:
+4 -1
View File
@@ -907,7 +907,10 @@ def _anthropic_rejects_temperature(model: str) -> bool:
return (int(match.group(1)), int(match.group(2))) >= (4, 7)
# Models that support structured thinking — may output </think> without opening tag
_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma")
_THINKING_MODEL_PATTERNS = (
"qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax",
"m2-reap", "gemma", "stepfun", "step-3", "step3",
)
def _supports_thinking(model: str) -> bool:
"""Check if model supports structured thinking output."""
+124 -2
View File
@@ -39,6 +39,10 @@ _XML_TOOL_CALL_RE = re.compile(
r"<(?:[\w]+:)?(?:tool_call|function_call)>\s*([\s\S]*?)</(?:[\w]+:)?(?:tool_call|function_call)>",
re.IGNORECASE,
)
_XML_OPEN_TOOL_CALL_RE = re.compile(
r"<(?:[\w]+:)?(?:tool_call|function_call)>\s*([\s\S]*)\Z",
re.IGNORECASE,
)
_XML_INVOKE_RE = re.compile(
r'<invoke\s+name=["\'](\w+)["\']>\s*([\s\S]*?)</invoke>',
re.IGNORECASE,
@@ -47,6 +51,24 @@ _XML_PARAM_RE = re.compile(
r'<parameter\s+name=["\'](\w+)["\']>([\s\S]*?)</parameter>',
re.IGNORECASE,
)
_XML_DIRECT_TOOL_RE = re.compile(
r"<\s*([A-Za-z_][\w-]*)\s*>([\s\S]*?)</\s*\1\s*>",
re.IGNORECASE,
)
# Pattern 3b: StepFun Step-3.x native tool-call tokens. The tokenizer defines:
# <tool▁calls▁begin> ... <tool▁calls▁end>
# <tool▁call▁begin>tool_name<tool▁sep>{...}<tool▁call▁end>
# These can leak as text through llama.cpp/Ollama-style endpoints when the
# engine does not return structured OpenAI tool_calls.
_STEPFUN_TOOL_CALL_RE = re.compile(
r"<tool▁call▁begin>\s*([A-Za-z_][\w.-]*)\s*<tool▁sep>\s*([\s\S]*?)\s*<tool▁call▁end>",
re.IGNORECASE,
)
_STEPFUN_TOOL_CALLS_WRAPPER_RE = re.compile(
r"</?tool▁calls▁(?:begin|end)>",
re.IGNORECASE,
)
# Pattern 4: <tool_code> blocks (MiniMax-M2.5 style)
# {tool => 'tool_name', args => '<param>value</param>'}
@@ -446,6 +468,76 @@ def _parse_xml_invoke(inv_match) -> Optional[ToolBlock]:
return function_call_to_tool_block(tool_name, json.dumps(params))
def _parse_xml_direct_tool(tool_match) -> Optional[ToolBlock]:
"""Parse direct XML tool tags inside <tool_call>.
Some local models emit:
<tool_call><web_search>query</web_search></tool_call>
instead of the invoke/parameter shape:
<tool_call><invoke name="web_search"><parameter name="query">query</parameter></invoke></tool_call>
Keep this as an adapter to the canonical function-call converter so aliases
and per-tool argument formatting stay in one place.
"""
tool_name = tool_match.group(1).lower().replace("-", "_")
if tool_name in {"invoke", "parameter", "tool_call", "function_call"}:
return None
mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None)
if not mapped:
return None
body = tool_match.group(2).strip()
if not body:
return None
try:
params = json.loads(body)
if not isinstance(params, dict):
params = {}
except json.JSONDecodeError:
if mapped == "web_search":
params = {"query": body}
elif mapped == "web_fetch":
params = {"url": body}
elif mapped == "bash":
params = {"command": body}
elif mapped == "python":
params = {"code": body}
elif mapped in ("read_file", "write_file"):
params = {"path": body}
else:
params = {"content": body}
from src.tool_schemas import function_call_to_tool_block
return function_call_to_tool_block(mapped, json.dumps(params))
def _parse_stepfun_tool_call(call_match) -> Optional[ToolBlock]:
"""Parse StepFun native tool-call tokens into an Odysseus ToolBlock."""
tool_name = call_match.group(1).lower().replace("-", "_").replace(".", "_")
mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None)
if not mapped:
return None
body = call_match.group(2).strip()
if not body:
return None
try:
params = json.loads(body)
if not isinstance(params, dict):
params = {}
except json.JSONDecodeError:
if mapped == "web_search":
params = {"query": body}
elif mapped == "web_fetch":
params = {"url": body}
elif mapped == "bash":
params = {"command": body}
elif mapped == "python":
params = {"code": body}
elif mapped in ("read_file", "write_file"):
params = {"path": body}
else:
params = {"content": body}
from src.tool_schemas import function_call_to_tool_block
return function_call_to_tool_block(mapped, json.dumps(params))
def _parse_tool_code_block(raw: str) -> Optional[ToolBlock]:
"""Parse a <tool_code>{tool => 'name', args => '...'}</tool_code> block (MiniMax style)."""
# Extract tool name
@@ -511,8 +603,9 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
2. [TOOL_CALL] ... [/TOOL_CALL] blocks (some models)
3. XML-style <tool_call>/<invoke> blocks
4. <tool_code> blocks (MiniMax-M2.5 style)
5. DeepSeek DSML markup (normalized to <invoke> first)
6. Non-native local model fallback: prose mentioning web_search followed by
5. StepFun Step-3 native <toolcallbegin> tokens
6. DeepSeek DSML markup (normalized to <invoke> first)
7. Non-native local model fallback: prose mentioning web_search followed by
bare JSON args, e.g. {"query":"...", "time_filter":"week"}
`skip_fenced`: when True, Pattern 1 (fenced ```bash/```python/```json code
@@ -567,12 +660,38 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
# Pattern 3: XML-style <tool_call>/<invoke> blocks
if not blocks:
for step_call in _STEPFUN_TOOL_CALL_RE.finditer(text):
block = _parse_stepfun_tool_call(step_call)
if block:
blocks.append(block)
if blocks:
return blocks
# Try wrapped: <tool_call><invoke ...>...</invoke></tool_call>
for m in _XML_TOOL_CALL_RE.finditer(text):
for inv in _XML_INVOKE_RE.finditer(m.group(1)):
block = _parse_xml_invoke(inv)
if block:
blocks.append(block)
if not blocks:
for direct in _XML_DIRECT_TOOL_RE.finditer(m.group(1)):
block = _parse_xml_direct_tool(direct)
if block:
blocks.append(block)
# Some local models stream an opening <tool_call> wrapper and a
# complete inner tool tag, but forget the closing </tool_call>.
if not blocks:
for m in _XML_OPEN_TOOL_CALL_RE.finditer(text):
body = m.group(1)
for inv in _XML_INVOKE_RE.finditer(body):
block = _parse_xml_invoke(inv)
if block:
blocks.append(block)
if blocks:
break
for direct in _XML_DIRECT_TOOL_RE.finditer(body):
block = _parse_xml_direct_tool(direct)
if block:
blocks.append(block)
# Try bare <invoke> without wrapper
if not blocks:
for inv in _XML_INVOKE_RE.finditer(text):
@@ -614,7 +733,10 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str:
text = _normalize_dsml(text)
cleaned = text if skip_fenced else _TOOL_BLOCK_RE.sub('', text)
cleaned = _TOOL_CALL_RE.sub('', cleaned)
cleaned = _STEPFUN_TOOL_CALL_RE.sub('', cleaned)
cleaned = _STEPFUN_TOOL_CALLS_WRAPPER_RE.sub('', cleaned)
cleaned = _XML_TOOL_CALL_RE.sub('', cleaned)
cleaned = _XML_OPEN_TOOL_CALL_RE.sub('', cleaned)
cleaned = _TOOL_CODE_RE.sub('', cleaned)
if not skip_fenced:
raw_web_json = _parse_raw_web_json_lookup(cleaned)
+93 -19
View File
@@ -230,14 +230,30 @@ export function _isMetal() {
}
/** Detect model-specific vLLM optimizations */
function _isStepFunStepModel(modelName) {
const n = (modelName || '').toLowerCase();
return n.includes('stepfun')
|| n.includes('step-3')
|| n.includes('step3')
|| n.includes('step_3');
}
function _detectModelOptimizations(modelName) {
const n = (modelName || '').toLowerCase();
const opts = { envVars: [], flags: [], tips: [] };
// StepFun Step-3.x MoE models. Their tokenizer defines the Step tool-call
// and thinking tags; vLLM/SGLang need the step3p5 parser instead of generic
// Hermes/XML guesses, and the MoE backend should default to expert parallel.
if (_isStepFunStepModel(modelName)) {
opts.flags.push('--enable-expert-parallel');
opts.tips.push('StepFun Step-3 MoE: expert parallel');
opts.tips.push('StepFun parser: step3p5 for native tool calls and reasoning tags');
}
// Qwen3.5 MoE models — MoE-specific env vars + expert-parallel.
// The --reasoning-parser flag is added uniformly below via
// _detectReasoningParser, no longer hardcoded here.
if (n.includes('qwen3.5') || n.includes('qwen3-') && (n.includes('a10b') || n.includes('a22b') || n.includes('a3b'))) {
else if (n.includes('qwen3.5') || n.includes('qwen3-') && (n.includes('a10b') || n.includes('a22b') || n.includes('a3b'))) {
opts.envVars.push('VLLM_USE_DEEP_GEMM=0', 'VLLM_USE_FLASHINFER_MOE_FP16=1', 'VLLM_USE_FLASHINFER_SAMPLER=0', 'OMP_NUM_THREADS=4');
opts.flags.push('--enable-expert-parallel');
opts.tips.push('MoE optimizations: expert parallel + flashinfer MoE kernels');
@@ -312,6 +328,9 @@ function _detectModelOptimizations(modelName) {
*/
export function _detectReasoningParser(modelName) {
const n = (modelName || '').toLowerCase();
// StepFun Step-3.x uses Step's native <think> / tool-call tokens. vLLM
// registers this parser as step3p5.
if (_isStepFunStepModel(modelName)) return 'step3p5';
// MiniMax M3 — newer vLLM nightly/parser builds use minimax_m3. This must
// be checked before the M2.x rule and before the generic MiniMax tool parser.
if (n.includes('minimax') && /\bm3\b/.test(n)) return 'minimax_m3';
@@ -348,6 +367,7 @@ export function _detectReasoningParser(modelName) {
*/
export function _detectToolParser(modelName) {
const n = (modelName || '').toLowerCase();
if (_isStepFunStepModel(modelName)) return 'step3p5';
if (n.includes('qwen3') && n.includes('coder')) return 'qwen3_coder';
if (n.includes('qwen3')) return 'qwen3_xml';
if (n.includes('qwen')) return 'hermes'; // Qwen2.5 / Qwen2 / Qwen1.5
@@ -601,6 +621,13 @@ export function _buildServeCmd(f, modelName, backend) {
if (f.dtype && f.dtype !== 'auto') cmd += ` --dtype ${f.dtype}`;
if (f.max_seqs && f.max_seqs.toString().trim()) cmd += ` --max-running-requests ${f.max_seqs.toString().trim()}`;
if (f.trust_remote) cmd += ' --trust-remote-code';
if (f.auto_tool) cmd += ` --enable-auto-tool-choice --tool-call-parser ${_detectToolParser(modelName)}`;
if (f.expert_parallel) cmd += ' --enable-expert-parallel';
if (f.reasoning_parser) {
const rp = typeof f.reasoning_parser === 'string' && f.reasoning_parser !== 'true'
? f.reasoning_parser : (f._reasoning_parser_value || _detectReasoningParser(modelName) || '');
if (rp) cmd += ` --reasoning-parser ${rp}`;
}
if (!f.prefix_cache) cmd += ' --disable-radix-cache';
if (f.enforce_eager) cmd += ' --disable-cuda-graph';
} else if (backend === 'llamacpp') {
@@ -909,10 +936,10 @@ async function _fetchDependencies() {
// matches the engine you're configuring. Unknown packages get no
// icon (the name alone is fine for librosa, hf_transfer, etc.).
const _DEP_GLYPHS = {
vllm: '<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><path d="M3 4l7 16 7-16"/><path d="M14 4l4 9 3-9"/></svg>',
sglang: '<svg width="13" height="13" viewBox="0 0 24 24" fill="currentColor" stroke="none" aria-hidden="true"><polygon points="13 2 3 14 12 14 11 22 21 10 12 10 13 2"/></svg>',
llama_cpp: '<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><circle cx="12" cy="12" r="9"/><path d="M8 12h8M12 8v8"/></svg>',
ollama: '<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><path d="M6 10a6 6 0 0 1 12 0v4a4 4 0 0 1-8 0v-1"/><circle cx="10" cy="9" r="1"/><circle cx="14" cy="9" r="1"/></svg>',
vllm: '<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><path d="M3 4l7 16 7-16"/><path d="M14 4l4 9 3-9"/></svg>',
sglang: '<span aria-hidden="true" style="display:block;width:13px;height:13px;background:currentColor;-webkit-mask:url(/static/icons/sglang-mark.png) center/contain no-repeat;mask:url(/static/icons/sglang-mark.png) center/contain no-repeat;"></span>',
llama_cpp: '<svg width="13" height="13" viewBox="0 0 600 600" fill="none" aria-hidden="true"><path d="M600 392L504.249 558L504.137 557.929C487.252 584.069 458.193 600 426.864 600H120L240 392H600Z" fill="currentColor"/><path d="M240 392H0L199.602 46.0254C216.032 17.5463 246.411 0 279.29 0H466.154L240 392Z" fill="currentColor"/></svg>',
ollama: '<img src="/static/icons/ollama-mark-crop.png" alt="" aria-hidden="true" width="13" height="13" style="display:block;width:13px;height:13px;object-fit:contain;" />',
diffusers: '<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><circle cx="12" cy="12" r="4"/><path d="M12 2v3M12 19v3M2 12h3M19 12h3M5 5l2 2M17 17l2 2M5 19l2-2M17 7l2-2"/></svg>',
};
const _depGlyphHtml = (name) => {
@@ -2138,7 +2165,10 @@ function _wireTabEvents(body) {
}
}
const shortName = repo.split('/').pop();
_retryDownload(shortName, payload);
const displayName = payload.include
? `${shortName} · ${_ggufQuantFromPath(String(payload.include).replace(/\*/g, '')) || String(payload.include).replace(/\*/g, '').replace(/\.gguf$/i, '')}`
: shortName;
_retryDownload(displayName, payload);
dlInput.value = '';
};
dlBtn.addEventListener('click', triggerDownload);
@@ -2179,18 +2209,13 @@ function _wireTabEvents(body) {
const folded = dlFoldBody.classList.contains('is-folded');
_setFolded(!folded);
});
// Auto-fold on any downward scroll inside the cookbook modal,
// and auto-expand when the user scrolls all the way back to the
// top of whichever scroller they're in. The chevron ▸ still
// toggles manually.
// Auto-fold on any downward scroll inside the cookbook modal. Do not
// auto-expand on upward/top scroll — once the user collapses Download,
// it should stay collapsed until the header is clicked again.
const _maybeFold = () => {
if (dlFoldBody.classList.contains('is-folded')) return;
_setFolded(true, /* persist */ false);
};
const _maybeExpand = () => {
if (!dlFoldBody.classList.contains('is-folded')) return;
_setFolded(false, /* persist */ false);
};
// Capture phase so scrolls on nested scrollers (.hwfit-list,
// .cookbook-body, .modal-content) all hit us.
const _modal = dlFold.closest('#cookbook-modal') || document;
@@ -2205,7 +2230,6 @@ function _wireTabEvents(body) {
const y = tgt.scrollTop;
const prev = _lastY.get(tgt) || 0;
if (y > prev) _maybeFold();
else if (y <= 0) _maybeExpand();
_lastY.set(tgt, y);
}, true);
}
@@ -2621,10 +2645,10 @@ function _renderRecipes() {
html += `<input type="text" class="cookbook-dl-repo" id="cookbook-dl-repo" placeholder="org/model-name, qwen2.5:14b, or HF URL" style="flex:1;min-width:0;" />`;
html += `<button class="cookbook-btn cookbook-dl-btn" id="cookbook-dl-btn">Download</button>`;
html += `</div>`;
html += `<div id="cookbook-dl-gguf-row" style="display:none;margin-top:1px;gap:5px;align-items:center;font-size:11px;">`;
html += `<span style="opacity:0.65;flex-shrink:0;">GGUF</span>`;
html += `<select class="cookbook-field-input" id="cookbook-dl-gguf-quant" style="height:28px;min-width:118px;flex:0 0 auto;"></select>`;
html += `<span id="cookbook-dl-gguf-note" style="opacity:0.55;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;"></span>`;
html += `<div id="cookbook-dl-gguf-row" class="cookbook-dl-gguf-row" style="display:none;">`;
html += `<span class="cookbook-dl-gguf-label">GGUF</span>`;
html += `<select class="cookbook-field-input" id="cookbook-dl-gguf-quant"></select>`;
html += `<span id="cookbook-dl-gguf-note"></span>`;
html += `</div>`;
// Ollama-library browse used to live here as its own collapsible dropdown,
// but that duplicated the Engine filter (which already has Ollama). The
@@ -3047,6 +3071,56 @@ export function isVisible() {
return !modal.classList.contains('hidden');
}
let _sharedSyncInFlight = false;
let _sharedSyncLast = 0;
async function _refreshSharedCookbookState(reason = '') {
if (!isVisible() || _sharedSyncInFlight) return;
const now = Date.now();
if (now - _sharedSyncLast < 1500) return;
_sharedSyncInFlight = true;
_sharedSyncLast = now;
try {
const ok = await _syncFromServer();
if (!ok) return;
try { Object.assign(_envState, _readStoredEnvState()); } catch {}
const modal = document.getElementById('cookbook-modal');
const activeTab = modal?.querySelector('.cookbook-tab.active')?.dataset?.backend || '';
if (activeTab === 'Running') {
_renderRunningTab();
} else if (activeTab === 'Settings') {
const active = document.activeElement;
const editingSettings = active && active.closest && active.closest('.cookbook-settings-stack');
if (!editingSettings) {
_renderRecipes();
const tab = document.querySelector('#cookbook-modal .cookbook-tab[data-backend="Settings"]');
if (tab) tab.click();
}
}
} catch (e) {
console.warn('[cookbook] shared state refresh failed', reason, e);
} finally {
_sharedSyncInFlight = false;
}
}
document.addEventListener('cookbook:state-synced', () => {
try { Object.assign(_envState, _readStoredEnvState()); } catch {}
if (isVisible()) {
const activeTab = document.querySelector('#cookbook-modal .cookbook-tab.active')?.dataset?.backend || '';
if (activeTab === 'Running') _renderRunningTab();
}
});
window.addEventListener('focus', () => { _refreshSharedCookbookState('focus'); });
document.addEventListener('visibilitychange', () => {
if (document.visibilityState === 'visible') _refreshSharedCookbookState('visible');
});
setInterval(() => {
if (!isVisible()) return;
const activeTab = document.querySelector('#cookbook-modal .cookbook-tab.active')?.dataset?.backend || '';
if (activeTab === 'Running') _refreshSharedCookbookState('active-poll');
}, 5000);
// Close button
document.addEventListener('DOMContentLoaded', () => {
const closeBtn = document.getElementById('close-cookbook-modal');
+20 -3
View File
@@ -85,6 +85,22 @@ function _ggufIncludePattern(model, source) {
return '*.gguf';
}
function _ggufDisplayPartFromInclude(include) {
const clean = String(include || '').replace(/\*/g, '');
const parts = clean.split('/').filter(Boolean);
const file = parts[parts.length - 1] || clean;
const dir = parts.length > 1 ? parts[parts.length - 2] : '';
const quant = `${dir} ${file}`.match(/\b(?:UD-)?(?:IQ[1-8]_[A-Z0-9]+|Q[2-8]_K_[MLS]|Q[2-8]_[0-9A-Z]+|Q[2-8])\b/i);
if (quant) return quant[0].toUpperCase().replace(/^UD-/, '');
return file.replace(/\.gguf$/i, '').replace(/-\d{5}-of-\d{5}$/i, '');
}
function _downloadTaskName(shortName, payload) {
const include = payload?.include || '';
const part = include ? _ggufDisplayPartFromInclude(include) : '';
return part ? `${shortName} · ${part}` : shortName;
}
function _missingGgufMessage(model) {
const name = model?.name || 'this model';
if (/\bnvfp4\b/i.test(name)) {
@@ -519,6 +535,7 @@ export async function _runModelDownload(panel, model, backend, hostOverride) {
}
const shortName = (model.name || repo).split('/').pop();
const taskName = _downloadTaskName(shortName, payload);
const targetHost = host || 'local';
const tasks = _loadTasks();
@@ -576,7 +593,7 @@ export async function _runModelDownload(panel, model, backend, hostOverride) {
if (activeOnHost) {
const queueId = `queue-${Date.now().toString(36)}`;
const allTasks = _loadTasks();
allTasks.push({ id: queueId, sessionId: queueId, name: shortName, type: 'download', status: 'queued', output: '', ts: Date.now(), payload, remoteHost: host });
allTasks.push({ id: queueId, sessionId: queueId, name: taskName, type: 'download', status: 'queued', output: '', ts: Date.now(), payload, remoteHost: host });
_saveTasks(allTasks);
_renderRunningTab();
uiModule.showToast(`Queued ${shortName} — waiting for current download`);
@@ -601,8 +618,8 @@ export async function _runModelDownload(panel, model, backend, hostOverride) {
uiModule.showToast('Download failed: ' + (data.error || ''), 9000);
return;
}
_addTask(data.session_id, shortName, 'download', payload);
uiModule.showToast(`Downloading ${shortName}...`);
_addTask(data.session_id, taskName, 'download', payload);
uiModule.showToast(`Downloading ${taskName}...`);
} catch (e) {
uiModule.showToast('Download failed: ' + e.message, 9000);
}
+144 -29
View File
@@ -38,6 +38,47 @@ function _taskBadge(task) {
return { text: _statusLabel(task.status, task.type), cls: 'cookbook-task-' + task.status };
}
function _ggufDisplayPartFromPath(path) {
const parts = String(path || '').split('/').filter(Boolean);
const file = parts[parts.length - 1] || '';
const dir = parts.length > 1 ? parts[parts.length - 2] : '';
const text = `${dir} ${file}`;
const quant = text.match(/\b(?:UD-)?(?:IQ[1-8]_[A-Z0-9]+|Q[2-8]_K_[MLS]|Q[2-8]_[0-9A-Z]+|Q[2-8])\b/i);
if (quant) return quant[0].toUpperCase().replace(/^UD-/, '');
return file.replace(/\.gguf$/i, '').replace(/-\d{5}-of-\d{5}$/i, '');
}
function _downloadDisplayName(name, task) {
const include = task?.payload?.include || '';
if (!include || String(name || '').includes(' · ')) return name;
const part = _ggufDisplayPartFromPath(include.replace(/\*/g, ''));
return part ? `${name} · ${part}` : name;
}
function _taskDisplayName(task) {
const name = String(task?.name || '').trim();
if (task?.type === 'download') return _downloadDisplayName(name, task);
if (task?.type !== 'serve') return name;
const gguf = task?.payload?._fields?.gguf_file || task?.payload?.gguf_file || '';
if (!gguf || name.includes(' · ')) return name;
const part = _ggufDisplayPartFromPath(gguf);
return part ? `${name} · ${part}` : name;
}
function _canLaunchDownloadedTask(task) {
return task?.type === 'download' && ['done', 'completed'].includes(task.status || '') && !!(task.payload?.repo_id || task.name);
}
function _downloadServeFields(task) {
const include = String(task?.payload?.include || '').trim();
if (!include) return null;
return {
backend: 'llamacpp',
_forceBackend: true,
_preferredGgufInclude: include,
};
}
// A download task whose tmux output still shows an active per-shard line
// (e.g. "model-00012-of-00082.safetensors: 56%|") is NOT actually finished —
// the cookbook just lost track. The clear pill becomes a "reconnect" affordance
@@ -282,6 +323,40 @@ let _detectToolParser;
let _detectModelOptimizations;
let _buildServeCmd;
function _taskServerSelection(task) {
const host = task?.remoteHost || task?.payload?.remote_host || '';
const savedKey = task?.remoteServerKey || task?.payload?.remote_server_key || '';
const server = (savedKey ? _serverByVal(savedKey) : null)
|| (host ? _serverByVal(host) : null)
|| (host ? _envState.servers.find(s => s.host === host) : null)
|| null;
const key = server ? (_serverKey ? _serverKey(server) : savedKey) : (savedKey || (host || 'local'));
return { host, server, key };
}
function _selectTaskServer(task) {
const { host, server, key } = _taskServerSelection(task);
_envState.remoteHost = host;
_envState.remoteServerKey = key === 'local' ? '' : key;
if (server) {
_envState.env = server.env || 'none';
_envState.envPath = server.envPath || '';
_envState.platform = server.platform || '';
} else if (!host) {
_envState.env = 'none';
_envState.envPath = '';
_envState.platform = '';
}
document.querySelectorAll('#hwfit-server-select, #hwfit-dl-server, #hwfit-cache-server, #hwfit-deps-server').forEach(sel => {
if (!sel || sel.tagName !== 'SELECT') return;
const wanted = key || (host || 'local');
if ([...sel.options].some(o => o.value === wanted)) sel.value = wanted;
else if (host && [...sel.options].some(o => o.value === host)) sel.value = host;
else sel.value = host ? wanted : 'local';
});
return { host, server, key };
}
// When a new action is started (download / dependency / serve), this holds the
// new task's id so the next render collapses every other card and leaves only
// the new one open. Consumed (cleared) by _renderRunningTab.
@@ -654,16 +729,31 @@ function _loadPrunedTasks() {
const _REMOVED_KEY = 'cookbook-removed-tasks';
const _TOMBSTONE_TTL_MS = 24 * 3600 * 1000;
function _loadTombstones() {
try { return JSON.parse(localStorage.getItem(_REMOVED_KEY)) || {}; }
try {
const tomb = JSON.parse(localStorage.getItem(_REMOVED_KEY)) || {};
const now = Date.now();
let changed = false;
for (const k in tomb) {
if (now - tomb[k] > _TOMBSTONE_TTL_MS) {
delete tomb[k];
changed = true;
}
}
if (changed) localStorage.setItem(_REMOVED_KEY, JSON.stringify(tomb));
return tomb;
}
catch { return {}; }
}
function _saveTombstones(tomb) {
localStorage.setItem(_REMOVED_KEY, JSON.stringify(tomb || {}));
}
function _tombstoneTask(id) {
if (!id) return;
const tomb = _loadTombstones();
const now = Date.now();
tomb[id] = now;
for (const k in tomb) { if (now - tomb[k] > _TOMBSTONE_TTL_MS) delete tomb[k]; }
localStorage.setItem(_REMOVED_KEY, JSON.stringify(tomb));
_saveTombstones(tomb);
}
function _isTombstoned(id) {
const ts = _loadTombstones()[id];
@@ -1098,6 +1188,7 @@ function _syncToServer() {
if (!_envState || !Array.isArray(_envState.servers) || _envState.servers.length === 0) return;
const state = {
tasks: _loadTasks(),
removedTasks: _loadTombstones(),
presets: _loadPresets(),
env: _envState,
serveState: null,
@@ -1146,9 +1237,16 @@ export async function _syncFromServer() {
const localTasks = _loadTasks();
const serverTasks = state.tasks || [];
const serverTombstones = (state.removedTasks && typeof state.removedTasks === 'object') ? state.removedTasks : {};
const localTombstones = _loadTombstones();
const mergedTombstones = { ...serverTombstones, ...localTombstones };
for (const [id, ts] of Object.entries(serverTombstones)) {
if (localTombstones[id] == null || Number(ts) > Number(localTombstones[id])) mergedTombstones[id] = ts;
}
_saveTombstones(mergedTombstones);
const localIds = new Set(localTasks.map(t => t.sessionId));
const merged = [...localTasks];
const merged = localTasks.filter(t => !_isTombstoned(t.sessionId));
for (const t of serverTasks) {
if (!localIds.has(t.sessionId) && !_isTombstoned(t.sessionId)) {
merged.push(t);
@@ -1165,6 +1263,18 @@ export async function _syncFromServer() {
const { remoteHost: _rh, env: _e, envPath: _ep, platform: _pf, ...settings } = state.env;
delete settings.hfToken;
Object.assign(_envState, settings);
const selected = (_envState.remoteServerKey && _serverByVal?.(_envState.remoteServerKey))
|| (_envState.remoteHost ? (_envState.servers || []).find(s => s.host === _envState.remoteHost) : null);
if (selected) {
_envState.env = selected.env || 'none';
_envState.envPath = selected.envPath || '';
_envState.platform = selected.platform || '';
} else if (!_envState.remoteHost) {
const local = (_envState.servers || []).find(s => !s.host || s.host === 'local');
_envState.env = local?.env || 'none';
_envState.envPath = local?.envPath || '';
_envState.platform = local?.platform || '';
}
const { hfToken, ...safeState } = _envState;
localStorage.setItem('cookbook-last-state', JSON.stringify(safeState));
}
@@ -1174,6 +1284,7 @@ export async function _syncFromServer() {
if (state.serveState) {
localStorage.setItem(SERVE_STATE_KEY, JSON.stringify(state.serveState));
}
document.dispatchEvent(new CustomEvent('cookbook:state-synced', { detail: state }));
return true;
} catch { return false; }
}
@@ -1332,17 +1443,11 @@ async function _openServeEditForTask(task, cmdOverride, fieldOverrides = null) {
if (fieldOverrides && typeof fieldOverrides === 'object') {
fields = { ...(fields || {}), ...fieldOverrides };
}
// Switch the active server to the one this serve ran on (mirrors _openEdit).
const _tHost = task.remoteHost || '';
_envState.remoteHost = _tHost;
const _tSrv = _serverByVal(_envState.remoteServerKey || _tHost)
|| _envState.servers.find(s => s.host === _tHost);
if (_tSrv) { _envState.env = _tSrv.env || 'none'; _envState.envPath = _tSrv.envPath || ''; _envState.platform = _tSrv.platform || ''; }
else if (!_tHost) { _envState.env = 'none'; _envState.envPath = ''; _envState.platform = ''; }
document.querySelectorAll('#hwfit-server-select, #hwfit-dl-server, #hwfit-cache-server, #hwfit-deps-server').forEach(sel => {
if (!sel || sel.tagName !== 'SELECT') return;
sel.value = _tHost || 'local';
});
fields = { ...(fields || {}), _replaceTaskId: task.sessionId };
// Switch the active server to the exact profile this serve ran on. The
// dropdown stores stable srv: keys, not raw host strings, so preserving only
// task.remoteHost can relaunch against the local container by accident.
_selectTaskServer(task);
try {
const { openServePanelForRepo } = await import('./cookbookServe.js');
await openServePanelForRepo(repo, fields);
@@ -1553,6 +1658,20 @@ export async function _launchServeTask(shortName, repo, cmd, fields, hostOverrid
const _serverMetaKey = _targetKey || (_hsrv && _serverKey ? _serverKey(_hsrv) : '') || (_host || 'local');
const _serverMetaName = targetMeta?.serverName || _hsrv.name || (_host ? _host : 'Local');
const _hplatform = _host ? (_hsrv.platform || '') : (_envState.platform || '');
const _replaceTaskId = fields?._replaceTaskId || '';
if (_replaceTaskId) {
try {
const _old = _loadTasks().find(t => t.sessionId === _replaceTaskId);
if (_old && _old.type === 'serve') {
await fetch('/api/shell/exec', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ command: _tmuxGracefulKill(_old) }),
});
_removeTask(_old.sessionId);
}
} catch {}
}
// Replace any serve already targeting this same host:port — you can't run two
// servers on one port, so re-serving (or retrying) should stop & remove the
@@ -1750,7 +1869,7 @@ export function _renderRunningTab() {
'<div style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;">' +
'<h2 style="margin:0;padding:0;line-height:1;">Active <span id="running-count" class="memory-count" style="font-size:0.6em;opacity:0.6;font-weight:normal">' + activeCount + '</span></h2>' +
'</div>' +
'<p class="memory-desc doclib-desc" style="margin-top:6px;">Active downloads and serving processes.</p>' +
'<p class="memory-desc doclib-desc" style="margin-top:6px;">Active downloads, installs and model launches.</p>' +
'</div>';
const firstGroup = body.querySelector('.cookbook-group');
if (firstGroup) body.insertBefore(group, firstGroup);
@@ -1863,6 +1982,7 @@ export function _renderRunningTab() {
return;
}
if (!await window.styledConfirm(`Clear ${toRemove.length} finished task${toRemove.length === 1 ? '' : 's'} on ${_serverName(host)}?`, { confirmText: 'Clear' })) return;
toRemove.forEach(t => _tombstoneTask(t.sessionId));
const remaining = allTasks.filter(t => _taskServerKey(t) !== host || !_canClearTask(t));
_saveTasks(remaining);
// Fade/slide each finished card out (same exit as the per-card clear)
@@ -2000,11 +2120,12 @@ export function _renderRunningTab() {
const _bdg = _taskBadge(task);
const _bdgTitle = (task._unreachable && task.status === 'running') ? ' title="Server not responding — it may have crashed"' : '';
const displayName = _taskDisplayName(task);
el.innerHTML = `
<div class="cookbook-task-header">
<span class="cookbook-task-type${(task.status === 'done' && task.type === 'download') ? ' cookbook-task-type-done' : ''}" data-type="${esc(task.type)}">${esc((task.status === 'done' && task.type === 'download') ? 'finished' : task.type)}</span>
<span class="cookbook-task-name">${modelLogo(task.name)}${esc(task.name)}</span>
<span class="cookbook-task-indicator"><span class="cookbook-task-wave" style="display:${task.status === 'running' ? '' : 'none'}"></span><span class="cookbook-task-check" title="Clear" style="display:${_canClearTask(task) ? '' : 'none'}"><svg class="cookbook-task-check-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="#50fa7b" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg><svg class="cookbook-task-clear-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg><span class="cookbook-task-done-label">${esc(_clearPillLabel(task))}</span><span class="cookbook-task-clear-label">clear</span></span></span>
<span class="cookbook-task-name">${modelLogo(task.name)}${esc(displayName)}</span>
<span class="cookbook-task-indicator"><span class="cookbook-task-wave" style="display:${task.status === 'running' ? '' : 'none'}"></span>${_canLaunchDownloadedTask(task) ? '<button type="button" class="cookbook-task-serve-btn" title="Open in Launch"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round"><polygon points="13 2 3 14 12 14 11 22 21 10 12 10 13 2"/></svg><span>Launch</span></button>' : ''}<span class="cookbook-task-check" title="Clear" style="display:${_canClearTask(task) ? '' : 'none'}"><svg class="cookbook-task-check-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="#50fa7b" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg><svg class="cookbook-task-clear-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg><span class="cookbook-task-done-label">${esc(_clearPillLabel(task))}</span><span class="cookbook-task-clear-label">clear</span></span></span>
<button type="button" class="cookbook-task-start-now" title="Start this queued download now" style="display:${(task.type === 'download' && task.status === 'queued') ? '' : 'none'}"><svg width="11" height="11" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true"><polygon points="8 5 19 12 8 19 8 5"/></svg><span>start now</span></button>
<span class="cookbook-task-status ${_bdg.cls}"${_bdgTitle}>${esc(_bdg.text)}</span>
<button class="cookbook-task-menu-btn" title="Actions">&#8942;</button>
@@ -2076,19 +2197,11 @@ export function _renderRunningTab() {
e.stopPropagation();
const repo = task.payload?.repo_id || task.name;
if (!repo) { uiModule.showToast('No model info on this task'); return; }
// Point the active server at the one it downloaded to.
const _tHost = task.remoteHost || '';
_envState.remoteHost = _tHost;
const _tSrv = _serverByVal(_envState.remoteServerKey || _tHost)
|| _envState.servers.find(s => s.host === _tHost);
if (_tSrv) { _envState.env = _tSrv.env || 'none'; _envState.envPath = _tSrv.envPath || ''; _envState.platform = _tSrv.platform || ''; }
else if (!_tHost) { _envState.env = 'none'; _envState.envPath = ''; _envState.platform = ''; }
document.querySelectorAll('#hwfit-server-select, #hwfit-dl-server, #hwfit-cache-server, #hwfit-deps-server').forEach(sel => {
if (sel && sel.tagName === 'SELECT') sel.value = _tHost || 'local';
});
// Point the active server at the exact profile it downloaded to.
_selectTaskServer(task);
try {
const { openServePanelForRepo } = await import('./cookbookServe.js');
await openServePanelForRepo(repo);
await openServePanelForRepo(repo, _downloadServeFields(task));
// Serving it supersedes the finished download — clear the card from
// the Running tab (smooth exit) now that we've jumped to Serve.
_animateOutThenRemove(el, task.sessionId);
@@ -3558,7 +3671,9 @@ async function _probeEndpointUntilOnline(epId, host, port) {
try {
// Hit the probe endpoint — it re-probes server-side and updates
// cached_models. We consume (and discard) the SSE stream.
await fetch(`/api/model-endpoints/${epId}/probe`, { credentials: 'same-origin' }).then(r => r.text()).catch(() => {});
const probeRes = await fetch(`/api/model-endpoints/${epId}/probe`, { credentials: 'same-origin' }).catch(() => null);
if (probeRes && probeRes.status === 404) return;
if (probeRes) await probeRes.text().catch(() => {});
const eps = await fetch('/api/model-endpoints', { credentials: 'same-origin' }).then(r => r.json()).catch(() => []);
const ep = (eps || []).find(e => e.id === epId);
if (ep && (ep.models || []).length) {
+183 -14
View File
@@ -477,7 +477,9 @@ function _estimateLlamaContextFit(model, fields, modelCtxMax, modelWeightsGb = 0
}
function _selectedServeTarget(panel) {
const select = document.getElementById('hwfit-server-select') || document.getElementById('hwfit-dl-server');
const select = panel?.querySelector?.('#hwfit-server-select')
|| document.getElementById('hwfit-server-select')
|| document.getElementById('hwfit-dl-server');
const servers = Array.isArray(_envState.servers) ? _envState.servers : [];
let host = _envState.remoteHost || '';
let server = host ? (_serverByVal?.(_envState.remoteServerKey || host) || servers.find(s => s.host === host)) : null;
@@ -643,6 +645,122 @@ function _ggufFileLabel(file) {
return `${quant}${base}${size || split ? ` (${[size, split.replace(/^, /, '')].filter(Boolean).join(', ')})` : ''}${role}`;
}
function _ggufTaskDisplayPart(model, relPath) {
const rel = String(relPath || '');
if (!rel) return '';
const file = _ggufFilesForModel(model).find(f => f.rel_path === rel);
if (file?.quant) return String(file.quant).toUpperCase().replace(/^UD-/, '');
const parts = rel.split('/').filter(Boolean);
const base = parts[parts.length - 1] || '';
const parent = parts.length > 1 ? parts[parts.length - 2] : '';
const text = `${parent} ${base}`;
const quant = text.match(/\b(?:UD-)?(?:IQ[1-8]_[A-Z0-9]+|Q[2-8]_K_[MLS]|Q[2-8]_[0-9A-Z]+|Q[2-8])\b/i);
if (quant) return quant[0].toUpperCase().replace(/^UD-/, '');
return base.replace(/\.gguf$/i, '').replace(/-\d{5}-of-\d{5}$/i, '');
}
function _serveTaskDisplayName(shortName, model, fields) {
const name = String(shortName || '').trim();
const backend = String(fields?.backend || '').toLowerCase();
if (backend !== 'llamacpp' && backend !== 'ollama') return name;
const part = _ggufTaskDisplayPart(model, fields?.gguf_file);
return part && !name.includes(` · ${part}`) ? `${name} · ${part}` : name;
}
function _safeGgufRelPath(relPath) {
const rel = String(relPath || '').replace(/\\/g, '/').replace(/^\/+/, '');
if (!rel || rel.startsWith('../') || rel.includes('/../') || rel === '..') return '';
if (rel.includes('\0')) return '';
return rel;
}
function _ggufDeleteChoice(repo, files) {
return new Promise(resolve => {
let overlay = document.getElementById('cookbook-gguf-delete-overlay');
if (!overlay) {
overlay = document.createElement('div');
overlay.id = 'cookbook-gguf-delete-overlay';
overlay.className = 'modal hidden';
overlay.innerHTML =
'<div class="modal-content styled-confirm-box cookbook-gguf-delete-box" role="dialog" aria-modal="true" aria-labelledby="cookbook-gguf-delete-title">' +
'<div class="modal-header"><h4 id="cookbook-gguf-delete-title">Delete GGUF files</h4></div>' +
'<div class="modal-body">' +
'<p id="cookbook-gguf-delete-msg"></p>' +
'<div id="cookbook-gguf-delete-list" class="cookbook-gguf-delete-list"></div>' +
'</div>' +
'<div class="modal-footer cookbook-gguf-delete-actions">' +
'<button type="button" id="cookbook-gguf-delete-cancel" class="confirm-btn confirm-btn-secondary">Cancel</button>' +
'<button type="button" id="cookbook-gguf-delete-repo" class="confirm-btn confirm-btn-secondary">Whole repo</button>' +
'<button type="button" id="cookbook-gguf-delete-selected" class="confirm-btn confirm-btn-danger">Delete selected</button>' +
'</div>' +
'</div>';
document.body.appendChild(overlay);
}
const safeFiles = files
.map(f => ({ ...f, rel_path: _safeGgufRelPath(f.rel_path) }))
.filter(f => f.rel_path);
const msg = overlay.querySelector('#cookbook-gguf-delete-msg');
const list = overlay.querySelector('#cookbook-gguf-delete-list');
const cancelBtn = overlay.querySelector('#cookbook-gguf-delete-cancel');
const repoBtn = overlay.querySelector('#cookbook-gguf-delete-repo');
const selectedBtn = overlay.querySelector('#cookbook-gguf-delete-selected');
const prevFocus = document.activeElement;
msg.textContent = `${repo} has multiple GGUF files. Pick what to delete.`;
list.innerHTML = safeFiles.map((file, idx) => {
const label = esc ? esc(_ggufFileLabel(file)) : _ggufFileLabel(file);
const rel = esc ? esc(file.rel_path) : file.rel_path;
return `<label class="cookbook-gguf-delete-row">
<input class="cookbook-gguf-delete-cb" type="checkbox" value="${idx}">
<span class="cookbook-gguf-delete-main">${label}</span>
<span class="cookbook-gguf-delete-path">${rel}</span>
</label>`;
}).join('');
function cleanup(result) {
overlay.classList.add('hidden');
overlay.style.display = 'none';
cancelBtn.removeEventListener('click', onCancel);
repoBtn.removeEventListener('click', onRepo);
selectedBtn.removeEventListener('click', onSelected);
overlay.removeEventListener('click', onBackdrop);
document.removeEventListener('keydown', onKey);
try { prevFocus && prevFocus.focus && prevFocus.focus(); } catch {}
resolve(result);
}
function onCancel() { cleanup(null); }
function onRepo() { cleanup({ mode: 'repo' }); }
function onSelected() {
const selected = [...list.querySelectorAll('input[type="checkbox"]:checked')]
.map(input => safeFiles[Number(input.value)])
.filter(Boolean);
if (!selected.length) {
uiModule.showToast?.('Select at least one GGUF file.');
return;
}
cleanup({ mode: 'files', files: selected });
}
function onBackdrop(e) { if (e.target === overlay) cleanup(null); }
function onKey(e) {
if (e.key === 'Escape') {
e.preventDefault();
e.stopPropagation();
cleanup(null);
}
}
cancelBtn.addEventListener('click', onCancel);
repoBtn.addEventListener('click', onRepo);
selectedBtn.addEventListener('click', onSelected);
overlay.addEventListener('click', onBackdrop);
document.addEventListener('keydown', onKey);
overlay.classList.remove('hidden');
overlay.style.display = '';
selectedBtn.focus();
});
}
function _shellPathExpr(path) {
const s = String(path || '');
if (s === '~') return '${HOME}';
@@ -729,7 +847,7 @@ function _rerenderCachedModels() {
? ` <span class="cookbook-serve-downloading-pill${_isDlActive ? '' : ' is-stalled'}" title="${_isDlActive ? 'Download in progress' : 'Download stalled — retry to resume'}">${_isDlActive ? 'downloading' : 'stalled'}</span>`
: '';
const _favoritePill = _isFavorite ? ' <span class="memory-cat-badge memory-cat-pinned cookbook-serve-fav-badge">pinned</span>' : '';
html += `<div class="memory-item-title"${_mc ? ` style="color:${_mc}"` : ''}>${modelLogo(m.repo_id)}${esc(shortName)}${_favoritePill}${hfLink ? ` <a href="${esc(hfLink)}" target="_blank" rel="noopener" class="cookbook-hf-link">HF ↗</a>` : ''}${_runningPill}${_downloadingPill}</div>`;
html += `<div class="memory-item-title cookbook-serve-title"${_mc ? ` style="color:${_mc}"` : ''}><span class="cookbook-serve-title-name">${modelLogo(m.repo_id)}${esc(shortName)}</span>${_favoritePill}${hfLink ? ` <a href="${esc(hfLink)}" target="_blank" rel="noopener" class="cookbook-hf-link">HF ↗</a>` : ''}${_runningPill}${_downloadingPill}</div>`;
html += `<div class="memory-item-meta" style="font-size:10px;opacity:0.4;margin-top:2px;">${metaParts.join(' \u00b7 ')}</div>`;
html += `</div>`;
const _bk = _detectBackend(m).backend;
@@ -962,6 +1080,11 @@ function _rerenderCachedModels() {
const _isMiniMaxM3 = _isMiniMaxM3Model({ ...m, repo_id: repo });
const _isMiniMaxM2 = _isMiniMaxM2Model({ ...m, repo_id: repo });
const _isMiniMaxMSeries = _isMiniMaxM3 || _isMiniMaxM2;
const _toolParserDefault = _detectToolParser(repo);
const _isStepFunStep = _toolParserDefault === 'step3p5';
const _nativeToolDefault = _isMiniMaxMSeries || _isStepFunStep;
const _reasoningDefault = _isMiniMaxMSeries || _isStepFunStep;
const _expertParallelDefault = _isMiniMaxMSeries || _isStepFunStep;
const svm = (k, def) => (_modelSs && _hasOwn(_modelSs, k)) ? _modelSs[k] : def;
const _serveTarget = _selectedServeTarget();
const _backendChoices = _backendChoicesForTarget(_serveTarget);
@@ -993,8 +1116,15 @@ function _rerenderCachedModels() {
const _l = (name, tip) => `<span>${name}<span class="hwfit-hint" title="${tip}">?</span></span>`;
const _ggufChoices = _runnableGgufFiles(m);
const _savedGguf = String(sv('gguf_file', '') || '');
const _preferredGgufInclude = String(sv('_preferredGgufInclude', '') || '').replace(/\*/g, '').toLowerCase();
const _preferredGguf = _preferredGgufInclude
? (_ggufChoices.find(f => String(f.rel_path || '').toLowerCase().includes(_preferredGgufInclude))
|| _ggufChoices.find(f => String(f.name || '').toLowerCase().includes(_preferredGgufInclude)))
: null;
const _defaultGguf = _ggufChoices.some(f => f.rel_path === _savedGguf)
? _savedGguf
: (_preferredGguf?.rel_path || '')
? _preferredGguf.rel_path
: (_ggufChoices[0]?.rel_path || '');
const _ggufOptions = _ggufChoices.map(f =>
`<option value="${esc(f.rel_path)}"${f.rel_path === _defaultGguf ? ' selected' : ''}>${esc(_ggufFileLabel(f))}</option>`
@@ -1026,6 +1156,10 @@ function _rerenderCachedModels() {
+ `</div>`;
let panelHtml = `<div class="hwfit-serve-panel">`;
const _replaceTaskId = String(sv('_replaceTaskId', '') || '');
if (_replaceTaskId) {
panelHtml += `<input type="hidden" class="hwfit-sf" data-field="_replaceTaskId" value="${esc(_replaceTaskId)}" />`;
}
// Runtime-readiness note pinned at the top of the serve area so the
// user sees "vLLM ready on …" before scrolling into the configure
// form. Hidden until the readiness probe returns. The × button
@@ -1202,20 +1336,20 @@ function _rerenderCachedModels() {
const _rp_name = _rp_flag ? _rp_flag.split(' ')[1] : '';
panelHtml += `<div class="hwfit-serve-checks hwfit-backend-vllm hwfit-backend-sglang">`;
panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="trust_remote"${sv('trust_remote',_isMiniMaxMSeries)?' checked':''} /> Trust Remote Code${_h('Allow model to run custom code from HuggingFace')}</label>`;
panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm"><input type="checkbox" class="hwfit-sf" data-field="auto_tool"${sv('auto_tool',_isMiniMaxMSeries)?' checked':''} /> Auto Tool Choice${_h('Enable function/tool calling for agent mode')}</label>`;
panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm hwfit-backend-sglang"><input type="checkbox" class="hwfit-sf" data-field="auto_tool"${sv('auto_tool',_nativeToolDefault)?' checked':''} /> Auto Tool Choice${_h('Enable function/tool calling for agent mode')}</label>`;
// Always-render the Reasoning Parser, Expert Parallel, and MoE Env
// checkboxes — the model-family detection above is a hint, not a
// hard gate. User asked to keep these visible regardless so that
// a borderline-undetected MoE/reasoning model can still toggle
// them without dropping back to the raw command box.
panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm"><input type="checkbox" class="hwfit-sf" data-field="reasoning_parser" data-parser="${_rp_name || ''}"${sv('reasoning_parser',_isMiniMaxMSeries)?' checked':''} /> Reasoning Parser${_rp_name ? ` <span class="hwfit-parser-tag">${_rp_name}</span>` : ''}${_h('Splits <think> tokens into a separate channel. The tag (when shown) is the auto-detected parser; edit the command if you need a different one.')}</label>`;
panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm hwfit-backend-sglang"><input type="checkbox" class="hwfit-sf" data-field="reasoning_parser" data-parser="${_rp_name || ''}"${sv('reasoning_parser',_reasoningDefault)?' checked':''} /> Reasoning Parser${_rp_name ? ` <span class="hwfit-parser-tag">${_rp_name}</span>` : ''}${_h('Splits <think> tokens into a separate channel. The tag (when shown) is the auto-detected parser; edit the command if you need a different one.')}</label>`;
panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="enforce_eager"${sv('enforce_eager',false)?' checked':''} /> Enforce Eager${_h('Disable CUDA graphs. Slower but uses less memory')}</label>`;
panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="prefix_cache"${sv('prefix_cache',false)?' checked':''} /> Prefix Caching${_h('Cache shared prompt prefixes across requests')}</label>`;
// Inline the previously-second vLLM checks row so Expert Parallel /
// Speculative / MoE Env sit next to Prefix Caching with no gap. All
// three are vLLM-only — class-gated so they hide on SGLang. Always
// render so the user can flip them on for any MoE model.
panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm"><input type="checkbox" class="hwfit-sf" data-field="expert_parallel"${sv('expert_parallel',_isMiniMaxMSeries)?' checked':''} /> Expert Parallel${_h('MoE: shard expert layers across GPUs. Helps for MiniMax M-series, Qwen3 A3B/A10B/A22B MoE, DeepSeek V3+/R1. Ignored / wasteful on dense models.')}</label>`;
panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm hwfit-backend-sglang"><input type="checkbox" class="hwfit-sf" data-field="expert_parallel"${sv('expert_parallel',_expertParallelDefault)?' checked':''} /> Expert Parallel${_h('MoE: shard expert layers across GPUs. Helps for MiniMax M-series, StepFun Step-3, Qwen3 A3B/A10B/A22B MoE, DeepSeek V3+/R1. Ignored / wasteful on dense models.')}</label>`;
panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm"><input type="checkbox" class="hwfit-sf" data-field="language_model_only"${sv('language_model_only',_isMiniMaxM3)?' checked':''} /> Language Model Only${_h('vLLM --language-model-only. Needed by MiniMax M3 text serving when the repo also contains VL components.')}</label>`;
panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm"><input type="checkbox" class="hwfit-sf" data-field="disable_custom_all_reduce"${sv('disable_custom_all_reduce',_isMiniMaxM3)?' checked':''} /> Disable Custom All Reduce${_h('vLLM --disable-custom-all-reduce. Useful for some 8-GPU/nightly configurations.')}</label>`;
{
@@ -2870,11 +3004,11 @@ function _rerenderCachedModels() {
// preflight and let the launch silently fall to CPU.
let _hwGpus = [];
try {
const _gh = (_selectedServeTarget.host || '').trim();
const _gh = (launchTarget.host || '').trim();
const _gp = new URLSearchParams();
if (_gh) {
_gp.set('host', _gh);
const _sp = (_serverByVal?.(_selectedServeTarget.serverKey || _gh) || {}).port;
const _sp = (_serverByVal?.(launchTarget.serverKey || _gh) || {}).port;
if (_sp) _gp.set('ssh_port', _sp);
}
const _gr = await fetch('/api/cookbook/gpus' + (_gp.toString() ? '?' + _gp : ''), { credentials: 'same-origin' });
@@ -3069,6 +3203,7 @@ function _rerenderCachedModels() {
try { cur = JSON.parse(localStorage.getItem(SERVE_STATE_KEY)) || {}; } catch {}
const byRepo = (cur && cur._byRepo && typeof cur._byRepo === 'object') ? cur._byRepo : {};
const _saved = { ...serveState, _forceBackend: true };
delete _saved._replaceTaskId;
byRepo[repo] = _saved;
localStorage.setItem(SERVE_STATE_KEY, JSON.stringify({ _byRepo: byRepo, _lastUsed: _saved }));
} catch {}
@@ -3127,7 +3262,8 @@ function _rerenderCachedModels() {
await _withSpinner(_launchBtn, async () => {
// Pass the exact form values so the running task can be re-opened
// in the Serve panel pre-filled with these settings (Edit button).
await _launchServeTask(shortName, repo, launchCmd, serveState, serveHost, { serverKey: serveServerKey, serverName: serveServerName });
const taskDisplayName = _serveTaskDisplayName(shortName, m, serveState);
await _launchServeTask(taskDisplayName, repo, launchCmd, serveState, serveHost, { serverKey: serveServerKey, serverName: serveServerName });
});
} finally {
_envState.env = origEnv;
@@ -3188,7 +3324,6 @@ function _resolveCacheHost() {
}
async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = null) {
if (!skipConfirm && !(await uiModule.styledConfirm(`Delete ${repo} from cache?`, { confirmText: 'Delete', danger: true }))) return;
const m = model || _cachedAllModels.find(x => x.repo_id === repo);
// Delete the EXACT on-disk path the scan reported. Models in a custom
// model dir live at <path>/<repo>; HF-cache models at
@@ -3204,13 +3339,32 @@ async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = nul
} else {
target = `~/.cache/huggingface/hub/models--${repo.replace(/\//g, '--')}`;
}
let deleteChoice = { mode: 'repo' };
const ggufFiles = _ggufFilesForModel(m);
if (!skipConfirm) {
if (ggufFiles.length > 1) {
deleteChoice = await _ggufDeleteChoice(repo, ggufFiles);
if (!deleteChoice) return;
} else if (!(await uiModule.styledConfirm(`Delete ${repo} from cache?`, { confirmText: 'Delete', danger: true }))) {
return;
}
}
const host = _resolveCacheHost();
let cmd;
if (_isWindows()) {
const winTarget = target.startsWith('~')
? target.replace(/^~/, '$env:USERPROFILE').replace(/\//g, '\\')
: target.replace(/\//g, '\\');
cmd = `Remove-Item -Recurse -Force "${winTarget}" -ErrorAction SilentlyContinue`;
if (deleteChoice.mode === 'files') {
const targets = deleteChoice.files
.map(f => _safeGgufRelPath(f.rel_path))
.filter(Boolean)
.map(rel => `${winTarget}\\${rel.replace(/\//g, '\\')}`);
if (!targets.length) return;
cmd = targets.map(p => `Remove-Item -Force "${p.replace(/"/g, '\\"')}" -ErrorAction SilentlyContinue`).join('; ');
} else {
cmd = `Remove-Item -Recurse -Force "${winTarget}" -ErrorAction SilentlyContinue`;
}
if (host) {
const pf = _sshPrefix(_getPort(host));
cmd = `ssh ${pf}${host} "powershell -Command \\"${cmd}\\""`;
@@ -3219,7 +3373,16 @@ async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = nul
// $HOME expands inside double quotes; ~ would not, so normalize the
// fallback. Quoting also handles spaces in custom model-dir paths.
const unixTarget = target.startsWith('~') ? target.replace(/^~/, '$HOME') : target;
cmd = `rm -rf "${unixTarget}"`;
if (deleteChoice.mode === 'files') {
const targets = deleteChoice.files
.map(f => _safeGgufRelPath(f.rel_path))
.filter(Boolean)
.map(rel => `${target.replace(/\/+$/, '')}/${rel}`);
if (!targets.length) return;
cmd = `rm -f ${targets.map(p => _shellPathExpr(p)).join(' ')} && find ${_shellPathExpr(target)} -type d -empty -delete`;
} else {
cmd = `rm -rf "${unixTarget}"`;
}
if (host) cmd = _sshCmd(host, cmd, _getPort(host));
}
// Deleting a large model (tens/hundreds of GB) can take a while, especially
@@ -3244,7 +3407,13 @@ async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = nul
body: JSON.stringify({ command: cmd }),
});
if (!res.ok) { uiModule.showError(`Delete failed (${res.status})`); return; }
if (itemEl) {
if (deleteChoice.mode === 'files') {
if (m && Array.isArray(m.gguf_files)) {
const removed = new Set(deleteChoice.files.map(f => _safeGgufRelPath(f.rel_path)));
m.gguf_files = m.gguf_files.filter(f => !removed.has(_safeGgufRelPath(f.rel_path)));
}
await _fetchCachedModels(false);
} else if (itemEl) {
itemEl.querySelector('.cookbook-delete-overlay')?.remove();
itemEl.style.transition = 'opacity 0.24s ease, transform 0.24s ease, max-height 0.28s ease, padding 0.28s ease, margin 0.28s ease';
itemEl.style.maxHeight = `${Math.max(itemEl.getBoundingClientRect().height, itemEl.scrollHeight)}px`;
@@ -3258,9 +3427,9 @@ async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = nul
requestAnimationFrame(() => { itemEl.style.maxHeight = '0'; });
await new Promise(resolve => setTimeout(resolve, 300));
if (itemEl.parentElement) itemEl.remove();
// Drop from the in-memory list so a re-render/filter doesn't resurrect it.
_cachedAllModels = _cachedAllModels.filter(x => x.repo_id !== repo);
}
// Drop from the in-memory list so a re-render/filter doesn't resurrect it.
_cachedAllModels = _cachedAllModels.filter(x => x.repo_id !== repo);
} catch (e) {
uiModule.showError('Delete failed: ' + (e && e.message ? e.message : e));
} finally {
+39 -19
View File
@@ -77,6 +77,7 @@ function _handlePickerKeydown(e, listEl, itemSelector, closeFn) {
// Dependencies injected via initModelPicker()
let _deps = null;
let _autoSelectingDefault = false;
let _defaultChatPickInFlight = false;
function _modelExists(modelId, url) {
if (!modelId || !window.modelsModule || !window.modelsModule.getCachedItems) return false;
@@ -91,6 +92,43 @@ function _modelExists(modelId, url) {
});
}
async function _ensureDefaultPendingChat() {
if (!_deps || _defaultChatPickInFlight) return;
if (_deps.getCurrentSessionId && _deps.getCurrentSessionId()) return;
const pending = _deps.getPendingChat && _deps.getPendingChat();
if (pending && pending.modelId) return;
_defaultChatPickInFlight = true;
try {
let dc = null;
try {
const res = await fetch(`${API_BASE}/api/default-chat`, { credentials: 'same-origin' });
if (res.ok) dc = await res.json();
} catch (_) {}
if (dc && dc.endpoint_url && dc.model) {
_deps.setPendingChat({
url: dc.endpoint_url,
modelId: dc.model,
endpointId: dc.endpoint_id || '',
});
try { window.__odysseusDefaultChat = dc; } catch (_) {}
updateModelPicker();
return;
}
// No configured default: preserve the old convenience fallback.
if (window.modelsModule && window.modelsModule.getCachedItems) {
const items = window.modelsModule.getCachedItems();
const first = items.find(item => !item.offline && ((item.models || []).length || (item.models_extra || []).length));
if (first) {
const models = (first.models || []).concat(first.models_extra || []);
_deps.setPendingChat({ url: first.url, modelId: models[0], endpointId: first.endpoint_id });
updateModelPicker();
}
}
} finally {
_defaultChatPickInFlight = false;
}
}
/**
* Initialize the model picker dropdown.
* @param {Object} deps
@@ -710,25 +748,7 @@ export function updateModelPicker() {
}
}
if (!modelId && !_autoSelectingDefault && window.modelsModule && window.modelsModule.getCachedItems) {
const items = window.modelsModule.getCachedItems();
const first = items.find(item => !item.offline && ((item.models || []).length || (item.models_extra || []).length));
if (first) {
const models = (first.models || []).concat(first.models_extra || []);
modelId = models[0];
if (!currentSessionId) {
_deps.setPendingChat({ url: first.url, modelId, endpointId: first.endpoint_id });
} else {
if (s) { s.model = modelId; s.endpoint_url = first.url; }
_autoSelectingDefault = true;
const fd = new FormData();
fd.append('model', modelId);
fd.append('endpoint_url', first.url || '');
if (first.endpoint_id) fd.append('endpoint_id', first.endpoint_id);
fetch(`${API_BASE}/api/session/${currentSessionId}`, { method: 'PATCH', body: fd })
.catch(() => {})
.finally(() => { _autoSelectingDefault = false; });
}
}
_ensureDefaultPendingChat();
}
const displayName = modelId ? modelId.split('/').pop() : 'Select model';
+2 -18
View File
@@ -1896,10 +1896,6 @@ function _renderNotes() {
${_hasItems(note) ? `<div class="note-cl-quickadd"><input type="text" class="note-cl-quickadd-input" placeholder="+ Add item" data-note-id="${note.id}" /></div>` : ''}
${reminderTagHtml}
${noteTags.length ? `<div class="note-card-label">${noteTags.map(t => `<button type="button" class="note-card-label-chip" data-note-label-filter="${_esc(t)}" title="Filter #${_esc(t)}">#${_esc(t)}</button>`).join(' ')}</div>` : ''}
${note.agent_session_id ? `<button class="note-agent-tag" data-note-id="${note.id}" data-session-id="${_esc(note.agent_session_id)}" title="Open the agent's chat for this note">
<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 8V4H8"/><rect x="4" y="8" width="16" height="12" rx="2"/><path d="M2 14h2M20 14h2M15 13v2M9 13v2"/></svg>
<span>Agent</span>
</button>` : ''}
<div class="note-card-actions">
<div class="note-card-colors">${colorDots}</div>
<span style="flex:1"></span>
@@ -2304,16 +2300,6 @@ function _bindCardEvents(body) {
_openNoteCornerMenu(btn);
});
});
// Agent tag — opens the chat session the agent ran for this note.
body.querySelectorAll('.note-agent-tag').forEach(tag => {
tag.addEventListener('click', (e) => {
e.preventDefault();
e.stopPropagation();
const sid = tag.dataset.sessionId;
const _sm = window.sessionModule;
if (sid && _sm && _sm.selectSession) { closePanel(); _sm.selectSession(sid); }
});
});
body.querySelectorAll('.note-card-label-chip').forEach(chip => {
chip.addEventListener('click', (e) => {
e.preventDefault();
@@ -4383,18 +4369,16 @@ function _openTodoAgentMenu(btn) {
const noteId = btn.dataset.noteId;
const idx = parseInt(btn.dataset.idx);
const sid = btn.dataset.sessionId || '';
const title = btn.dataset.agentTitle || 'Agent chat';
const menu = document.createElement('div');
menu.className = 'note-corner-menu-dropdown note-agent-item-menu';
menu.innerHTML = `
<div class="ncm-title">${_esc(title)}</div>
${sid ? `<button type="button" class="ncm-item" data-act="open">
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M15 3h6v6"/><path d="M10 14L21 3"/><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"/></svg>
<span>Open this agent chat</span>
<span>Open</span>
</button>` : ''}
<button type="button" class="ncm-item" data-act="run">
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 8V4H8"/><rect x="4" y="8" width="16" height="12" rx="2"/><path d="M2 14h2M20 14h2M15 13v2M9 13v2"/></svg>
<span>${sid ? 'Run again for this todo' : 'Start agent for this todo'}</span>
<span>${sid ? 'Run again' : 'Run Agent'}</span>
</button>`;
_positionNoteMenu(menu, btn);
const openBtn = menu.querySelector('[data-act="open"]');
+165 -21
View File
@@ -5324,6 +5324,84 @@ body.bg-pattern-sparkles {
.confirm-btn-primary:hover { filter:brightness(1.15); }
.confirm-btn-danger { background:var(--color-danger); color:#fff; border-color:transparent; }
.confirm-btn-danger:hover { background:var(--color-error); }
#cookbook-gguf-delete-overlay {
background:rgba(0,0,0,0.5);
backdrop-filter:blur(4px);
pointer-events:auto !important;
z-index:99999 !important;
position:fixed !important;
inset:0 !important;
}
.cookbook-gguf-delete-box {
width:560px;
max-width:92vw;
}
.cookbook-gguf-delete-list {
display:flex;
flex-direction:column;
gap:6px;
max-height:42vh;
overflow:auto;
padding:2px 2px 4px;
}
.cookbook-gguf-delete-row {
display:grid;
grid-template-columns:18px minmax(0,1fr);
gap:7px 8px;
align-items:start;
padding:7px 8px;
border:1px solid var(--border);
border-radius:7px;
background:color-mix(in srgb, var(--panel, var(--bg)) 92%, var(--fg) 8%);
cursor:pointer;
}
.cookbook-gguf-delete-row:hover {
border-color:color-mix(in srgb, var(--accent-primary, var(--fg)) 45%, var(--border));
}
.cookbook-gguf-delete-cb {
-webkit-appearance:none;
appearance:none;
width:8px !important;
height:8px !important;
min-width:8px;
min-height:8px;
padding:0;
margin:4px 0 0;
border:1px solid var(--border);
border-radius:50%;
background:transparent;
box-sizing:content-box;
cursor:pointer;
transition:background 0.15s, border-color 0.15s, transform 0.12s;
}
.cookbook-gguf-delete-cb:hover {
border-color:var(--accent, var(--red));
transform:scale(1.12);
}
.cookbook-gguf-delete-cb:checked {
background:var(--accent, var(--red));
border-color:var(--accent, var(--red));
}
.cookbook-gguf-delete-main,
.cookbook-gguf-delete-path {
min-width:0;
overflow:hidden;
text-overflow:ellipsis;
white-space:nowrap;
}
.cookbook-gguf-delete-main {
font-size:0.86rem;
color:var(--fg);
}
.cookbook-gguf-delete-path {
grid-column:2;
margin-top:-2px;
font-size:0.74rem;
opacity:0.58;
}
.cookbook-gguf-delete-actions {
flex-wrap:wrap;
}
/* Styled prompt — text-input dialog (used in place of window.prompt) */
#styled-prompt-overlay {
background:rgba(0,0,0,0.5);
@@ -19222,6 +19300,18 @@ body.gallery-selecting .gallery-dl-btn,
background: color-mix(in srgb, var(--red) 20%, transparent);
}
.cookbook-gpu-kill:disabled { opacity: 0.4; cursor: wait; }
.cookbook-serve-title {
display: flex;
align-items: center;
gap: 4px;
min-width: 0;
}
.cookbook-serve-title-name {
min-width: 0;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.cookbook-hf-link {
font-size: 9px;
text-decoration: none;
@@ -19234,6 +19324,7 @@ body.gallery-selecting .gallery-dl-btn,
vertical-align: 1px;
letter-spacing: 0.3px;
font-weight: 600;
flex-shrink: 0;
}
.cookbook-hf-link:hover {
opacity: 0.8;
@@ -19626,6 +19717,9 @@ body.gallery-selecting .gallery-dl-btn,
position: relative;
top: -2px;
}
.cookbook-dep-reinstall {
top: -3px;
}
.cookbook-dep-rebuild:hover {
background: color-mix(in srgb, var(--accent, var(--red)) 18%, transparent);
color: var(--accent, var(--red));
@@ -20619,6 +20713,11 @@ body.gallery-selecting .gallery-dl-btn,
}
.cookbook-task[data-status="done"] .cookbook-task-check-ico { display: inline; }
.cookbook-task[data-status="done"] .cookbook-task-clear-ico { display: none; }
@media (max-width: 820px) {
.cookbook-task-check {
top: 2px;
}
}
.cookbook-task-start-now {
display: inline-flex;
align-items: center;
@@ -20652,24 +20751,30 @@ body.gallery-selecting .gallery-dl-btn,
/* "Serve" button on a finished download green pill matching the "running" /
finished badge (it sits next to the green FINISHED chip + check). */
.cookbook-task-serve-btn {
font-size: 9px;
font-weight: 600;
padding: 1px 6px;
border: none;
border-radius: 3px;
line-height: 16px;
display: inline-flex;
align-items: center;
gap: 3px;
padding: 1px 6px 1px 4px;
border: 0;
border-radius: 9px;
line-height: 1;
flex-shrink: 0;
cursor: pointer;
font-family: inherit;
background: color-mix(in srgb, var(--green, #50fa7b) 20%, transparent);
font-size: 9px;
text-transform: lowercase;
background: transparent;
color: var(--green, #50fa7b);
position: relative;
top: -2px;
margin-right: 2px;
appearance: none;
-webkit-appearance: none;
-moz-appearance: none;
transition: background 0.15s;
}
.cookbook-task-serve-btn:hover { background: color-mix(in srgb, var(--green, #50fa7b) 32%, transparent); }
.cookbook-task-serve-btn svg { flex-shrink: 0; }
.cookbook-task-serve-btn:hover { background: color-mix(in srgb, var(--green, #50fa7b) 16%, transparent); }
.cookbook-task-sub {
padding: 1px 10px 4px;
line-height: 1;
@@ -21448,6 +21553,31 @@ body.gallery-selecting .gallery-dl-btn,
.cookbook-dl-btn:hover {
opacity: 0.9;
}
.cookbook-dl-gguf-row {
margin-top: -1px;
gap: 5px;
align-items: center;
justify-content: flex-end;
font-size: 11px;
position: relative;
top: -2px;
}
.cookbook-dl-gguf-label {
opacity: 0.65;
flex-shrink: 0;
}
#cookbook-dl-gguf-quant {
height: 28px;
min-width: 118px;
flex: 0 0 auto;
}
#cookbook-dl-gguf-note {
opacity: 0.55;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
max-width: 240px;
}
/* HF link in search panel */
.hwfit-panel-hf-link {
@@ -31920,24 +32050,34 @@ body.notes-drag-mode .note-card-pin svg {
.note-corner-menu-dropdown .ncm-item:hover {
background: color-mix(in srgb, var(--fg) 8%, transparent);
}
/* "Agent" tag on a note that has a linked agent chat session */
.note-agent-tag {
align-self: flex-start;
.note-checkbox-agent {
display: inline-flex;
align-items: center;
gap: 5px;
background: color-mix(in srgb, var(--accent, var(--red)) 14%, transparent);
border: 1px solid color-mix(in srgb, var(--accent, var(--red)) 35%, transparent);
justify-content: center;
width: 14px;
height: 14px;
padding: 0;
margin: 0 1px;
border: 0;
background: transparent;
color: var(--accent, var(--red));
border-radius: 999px;
padding: 3px 10px 3px 8px;
font-size: 11px;
font-weight: 600;
box-shadow: none;
cursor: pointer;
margin-top: 2px;
transition: background 0.12s;
opacity: 0;
transition: opacity 0.12s, color 0.12s;
}
.note-checkbox:hover .note-checkbox-agent { opacity: 0.55; }
.note-checkbox-agent:hover {
background: transparent;
opacity: 1 !important;
}
.note-checkbox-agent.is-agent-stream-complete {
color: #50fa7b;
opacity: 0.9;
}
.note-checkbox-agent svg {
display: block;
}
.note-agent-tag:hover { background: color-mix(in srgb, var(--accent, var(--red)) 24%, transparent); }
.note-card {
/* Same tint that .doclib-card uses so a default (uncolored) note
@@ -36414,6 +36554,10 @@ body.research-panel-view #research-divider { display:none; }
.research-setting {
display:flex; flex-direction:column; flex:1; min-width:90px;
}
.research-settings-row .research-setting:nth-last-child(-n + 3) {
position: relative;
top: 3px;
}
.research-setting-label {
font-size:9px; text-transform:uppercase; letter-spacing:0.5px;
opacity:0.5; margin-bottom:2px;
@@ -221,6 +221,60 @@ def test_skip_fenced_still_recovers_xml_invoke_markup():
assert "latest python release" in blocks[0].content
def test_stepfun_native_tool_tokens_are_executed_even_when_fenced_fallback_is_skipped():
leaked = (
"<tool▁calls▁begin>"
"<tool▁call▁begin>web_search<tool▁sep>"
'{"query":"Sweden news today"}'
"<tool▁call▁end>"
"<tool▁calls▁end>"
)
blocks = parse_tool_blocks(leaked, skip_fenced=True)
assert len(blocks) == 1
assert blocks[0].tool_type == "web_search"
assert "Sweden news today" in blocks[0].content
assert strip_tool_blocks(leaked, skip_fenced=True) == ""
def test_stepfun_native_tool_tokens_accept_plain_web_query():
leaked = (
"<tool▁call▁begin>web_search<tool▁sep>"
"Sweden news today"
"<tool▁call▁end>"
)
blocks = parse_tool_blocks(leaked, skip_fenced=True)
assert len(blocks) == 1
assert blocks[0].tool_type == "web_search"
assert "Sweden news today" in blocks[0].content
def test_skip_fenced_still_recovers_direct_xml_tool_markup():
leaked = (
"I'll search now.\n"
"<tool_call><web_search>News in Sweden today 2026-06-22</web_search></tool_call>"
)
blocks = parse_tool_blocks(leaked, skip_fenced=True)
assert len(blocks) == 1
assert blocks[0].tool_type == "web_search"
assert "News in Sweden today 2026-06-22" in blocks[0].content
assert strip_tool_blocks(leaked, skip_fenced=True) == "I'll search now."
def test_skip_fenced_recovers_direct_xml_tool_markup_with_unclosed_wrapper():
leaked = (
"I'll search now.\n"
"<tool_call>\n"
"<web_search>\n"
"Sweden news today 2026-06-22\n"
"</web_search>"
)
blocks = parse_tool_blocks(leaked, skip_fenced=True)
assert len(blocks) == 1
assert blocks[0].tool_type == "web_search"
assert "Sweden news today 2026-06-22" in blocks[0].content
assert strip_tool_blocks(leaked, skip_fenced=True) == "I'll search now."
def test_skip_fenced_still_recovers_dsml_markup():
dsml = (
"Let me search for that.\n"
+70
View File
@@ -19,7 +19,12 @@ from pathlib import Path
import pytest
from fastapi import APIRouter
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import NullPool
import core.database as cdb
from core.database import GalleryImage
from src.upload_handler import count_recent_uploads, UploadHandler
import routes.upload_routes as up
@@ -82,6 +87,10 @@ def _files(n):
return [types.SimpleNamespace(filename=f"f{i}.txt") for i in range(n)]
def _image_upload(name="photo.png", content=b"not really png but enough for route metadata"):
return types.SimpleNamespace(filename=name, file=io.BytesIO(content))
@pytest.fixture(autouse=True)
def _reset_router(monkeypatch):
# Module-level router accumulates routes across setup calls; reset it.
@@ -163,3 +172,64 @@ def test_six_file_batch_is_not_rate_limited(tmp_path):
assert meta and meta.get("id")
saved += 1
assert saved == 6
async def test_chat_image_upload_is_added_to_gallery(tmp_path, monkeypatch):
engine = create_engine(
f"sqlite:///{tmp_path / 'gallery.db'}",
connect_args={"check_same_thread": False},
poolclass=NullPool,
)
cdb.Base.metadata.create_all(engine)
TestingSession = sessionmaker(bind=engine, autoflush=False, autocommit=False)
gallery_dir = tmp_path / "generated_images"
monkeypatch.setattr(up, "SessionLocal", TestingSession)
monkeypatch.setattr(up, "GENERATED_IMAGES_DIR", str(gallery_dir))
h = UploadHandler(base_dir=str(tmp_path), upload_dir=str(tmp_path / "uploads"))
up.setup_upload_routes(h)
endpoint = _endpoint(up.router)
result = await endpoint(_request(user="alice"), [_image_upload()])
uploaded = result["files"][0]
assert uploaded["gallery_id"]
db = TestingSession()
try:
image = db.query(GalleryImage).filter(GalleryImage.id == uploaded["gallery_id"]).one()
assert image.owner == "alice"
assert image.model == "chat-upload"
assert image.prompt == "photo.png"
assert image.file_hash == uploaded["hash"]
assert (gallery_dir / image.filename).exists()
finally:
db.close()
async def test_non_image_chat_upload_is_not_added_to_gallery(tmp_path, monkeypatch):
engine = create_engine(
f"sqlite:///{tmp_path / 'gallery.db'}",
connect_args={"check_same_thread": False},
poolclass=NullPool,
)
cdb.Base.metadata.create_all(engine)
TestingSession = sessionmaker(bind=engine, autoflush=False, autocommit=False)
monkeypatch.setattr(up, "SessionLocal", TestingSession)
monkeypatch.setattr(up, "GENERATED_IMAGES_DIR", str(tmp_path / "generated_images"))
h = UploadHandler(base_dir=str(tmp_path), upload_dir=str(tmp_path / "uploads"))
up.setup_upload_routes(h)
endpoint = _endpoint(up.router)
result = await endpoint(_request(user="alice"), [types.SimpleNamespace(
filename="notes.txt",
file=io.BytesIO(b"plain text upload"),
)])
assert "gallery_id" not in result["files"][0]
db = TestingSession()
try:
assert db.query(GalleryImage).count() == 0
finally:
db.close()