Cookbook launch and gallery upload fixes

This commit is contained in:
pewdiepie-archdaemon
2026-06-22 01:49:15 +00:00
parent 75f04bc088
commit 92daf4e560
15 changed files with 1047 additions and 135 deletions
+13
View File
@@ -2446,6 +2446,17 @@ def setup_cookbook_routes() -> APIRouter:
disk_tasks = on_disk.get("tasks") or [] if isinstance(on_disk, dict) else [] disk_tasks = on_disk.get("tasks") or [] if isinstance(on_disk, dict) else []
incoming_tasks = data.get("tasks") if isinstance(data.get("tasks"), list) else [] incoming_tasks = data.get("tasks") if isinstance(data.get("tasks"), list) else []
incoming_removed = data.get("removedTasks") if isinstance(data.get("removedTasks"), dict) else {}
disk_removed = on_disk.get("removedTasks") if isinstance(on_disk, dict) and isinstance(on_disk.get("removedTasks"), dict) else {}
removed_tasks = {**disk_removed, **incoming_removed}
data["removedTasks"] = removed_tasks
removed_ids = set(removed_tasks.keys())
if removed_ids:
incoming_tasks = [
t for t in incoming_tasks
if not (isinstance(t, dict) and t.get("sessionId") in removed_ids)
]
data["tasks"] = incoming_tasks
# Anti-poisoning guard: a stale browser tab can keep POSTing a # Anti-poisoning guard: a stale browser tab can keep POSTing a
# download task as status='done' from before the strict-finish # download task as status='done' from before the strict-finish
# fix landed, undoing any server-side correction. For each # fix landed, undoing any server-side correction. For each
@@ -2483,6 +2494,8 @@ def setup_cookbook_routes() -> APIRouter:
sid = t.get("sessionId") sid = t.get("sessionId")
if not sid or sid in incoming_ids: if not sid or sid in incoming_ids:
continue # client's version wins continue # client's version wins
if sid in removed_ids:
continue # intentional cross-device clear/remove
ts = t.get("ts") or 0 ts = t.get("ts") or 0
if isinstance(ts, (int, float)) and (now_ms - ts) <= RACE_WINDOW_MS: if isinstance(ts, (int, float)) and (now_ms - ts) <= RACE_WINDOW_MS:
preserved.append(t) preserved.append(t)
+58 -4
View File
@@ -714,6 +714,16 @@ def _effective_endpoint_kind(ep: Any, base_url: str) -> str:
return "auto" return "auto"
def _is_loading_model_response(resp: Any) -> bool:
if getattr(resp, "status_code", None) != 503:
return False
try:
body = resp.text or ""
except Exception:
body = ""
return "loading model" in body.lower()
def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> List[str]: def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> List[str]:
"""Probe a base URL's /models endpoint and return list of model IDs. """Probe a base URL's /models endpoint and return list of model IDs.
@@ -778,6 +788,9 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
models.append(_e) models.append(_e)
return [m for m in models if _is_chat_model(m)] return [m for m in models if _is_chat_model(m)]
except httpx.HTTPStatusError as e: except httpx.HTTPStatusError as e:
if e.response is not None and _is_loading_model_response(e.response):
logger.info(f"Endpoint still loading model at {url}")
return []
if api_key: if api_key:
status = e.response.status_code if e.response is not None else "unknown" status = e.response.status_code if e.response is not None else "unknown"
logger.warning(f"Failed to probe {url} with API key: HTTP {status}") logger.warning(f"Failed to probe {url} with API key: HTTP {status}")
@@ -827,6 +840,15 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
or "ollama" in (parsed_base.hostname or "").lower() or "ollama" in (parsed_base.hostname or "").lower()
) )
def _is_loading_model_response(r) -> bool:
if getattr(r, "status_code", None) != 503:
return False
try:
body = r.text or ""
except Exception:
body = ""
return "loading model" in body.lower()
def _result_from_response(r) -> Dict[str, Any]: def _result_from_response(r) -> Dict[str, Any]:
if 300 <= r.status_code < 400: if 300 <= r.status_code < 400:
loc = r.headers.get("location", "") loc = r.headers.get("location", "")
@@ -843,6 +865,13 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
"status_code": r.status_code, "status_code": r.status_code,
"error": None, "error": None,
} }
if _is_loading_model_response(r):
return {
"reachable": True,
"loading": True,
"status_code": r.status_code,
"error": "Loading model",
}
return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code}"} return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code}"}
last_error: Optional[str] = None last_error: Optional[str] = None
@@ -1427,7 +1456,7 @@ def setup_model_routes(model_discovery):
t0 = _time.time() t0 = _time.time()
ping = _ping_endpoint(base, ep.api_key, timeout=1.5) ping = _ping_endpoint(base, ep.api_key, timeout=1.5)
entry["latency_ms"] = round((_time.time() - t0) * 1000) entry["latency_ms"] = round((_time.time() - t0) * 1000)
entry["status"] = "online" if ping.get("reachable") or cached_count else "offline" entry["status"] = "loading" if ping.get("loading") else ("online" if ping.get("reachable") or cached_count else "offline")
entry["error"] = ping.get("error") entry["error"] = ping.get("error")
entry["model_count"] = cached_count or (len(ANTHROPIC_MODELS) if provider == "anthropic" else 0) entry["model_count"] = cached_count or (len(ANTHROPIC_MODELS) if provider == "anthropic" else 0)
except Exception as e: except Exception as e:
@@ -1606,7 +1635,32 @@ def setup_model_routes(model_discovery):
ping_timeout = 10.0 if _classify_endpoint(base_for_ping, kind_for_ping) == "local" else 3.5 ping_timeout = 10.0 if _classify_endpoint(base_for_ping, kind_for_ping) == "local" else 3.5
ping = _ping_endpoint(r.base_url, r.api_key, timeout=ping_timeout) ping = _ping_endpoint(r.base_url, r.api_key, timeout=ping_timeout)
if ping.get("reachable"): if ping.get("reachable"):
status = "empty" status = "loading" if ping.get("loading") else "empty"
if ping.get("loading"):
base = _normalize_base(r.base_url)
kind = _effective_endpoint_kind(r, base)
results.append({
"id": r.id,
"name": r.name,
"base_url": r.base_url,
"has_key": bool(r.api_key),
"api_key_fingerprint": _api_key_fingerprint(r.api_key),
"is_enabled": r.is_enabled,
"models": visible,
"pinned_models": pinned,
"hidden_count": len(hidden),
"online": True,
"status": status,
"ping_error": (ping or {}).get("error") if ping else None,
"model_type": getattr(r, "model_type", None) or "llm",
"supports_tools": getattr(r, "supports_tools", None),
"endpoint_kind": kind,
"category": _classify_endpoint(base, kind),
"model_refresh_mode": _endpoint_refresh_mode(r, kind),
"model_refresh_interval": getattr(r, "model_refresh_interval", None),
"model_refresh_timeout": getattr(r, "model_refresh_timeout", None),
})
continue
# Best-effort: if the probe came back reachable, try # Best-effort: if the probe came back reachable, try
# to populate cached_models in the background so the # to populate cached_models in the background so the
# NEXT picker load shows "online" instead of "empty". # NEXT picker load shows "online" instead of "empty".
@@ -1859,7 +1913,7 @@ def setup_model_routes(model_discovery):
"models": _merge_model_ids(model_ids, _pinned), "models": _merge_model_ids(model_ids, _pinned),
"pinned_models": _pinned, "pinned_models": _pinned,
"online": bool(model_ids) or bool(_pinned) or bool(ping.get("reachable")), "online": bool(model_ids) or bool(_pinned) or bool(ping.get("reachable")),
"status": "online" if (model_ids or _pinned) else ("empty" if ping.get("reachable") else "offline"), "status": "online" if (model_ids or _pinned) else ("loading" if ping.get("loading") else ("empty" if ping.get("reachable") else "offline")),
"ping_error": ping.get("error") if ping else None, "ping_error": ping.get("error") if ping else None,
"endpoint_kind": requested_kind, "endpoint_kind": requested_kind,
"category": _classify_endpoint(base_url, requested_kind), "category": _classify_endpoint(base_url, requested_kind),
@@ -1888,7 +1942,7 @@ def setup_model_routes(model_discovery):
return { return {
"base_url": base_url, "base_url": base_url,
"online": bool(models) or bool(ping.get("reachable")), "online": bool(models) or bool(ping.get("reachable")),
"status": "online" if models else ("empty" if ping.get("reachable") else "offline"), "status": "online" if models else ("loading" if ping.get("loading") else ("empty" if ping.get("reachable") else "offline")),
"ping_error": ping.get("error") if ping else None, "ping_error": ping.get("error") if ping else None,
"models": models, "models": models,
"count": len(models), "count": len(models),
+2 -2
View File
@@ -1108,7 +1108,7 @@ def setup_shell_routes() -> APIRouter:
{ {
"name": "llama_cpp", "name": "llama_cpp",
"pip": "llama-cpp-python[server]", "pip": "llama-cpp-python[server]",
"desc": "Serve GGUF models via llama.cpp", "desc": "Great for single-GPU or CPU inference with GGUF models",
"category": "LLM", "category": "LLM",
"target": "remote", "target": "remote",
# Build-toolchain prereqs. Cookbook's launch bootstrap # Build-toolchain prereqs. Cookbook's launch bootstrap
@@ -1129,7 +1129,7 @@ def setup_shell_routes() -> APIRouter:
{ {
"name": "vllm", "name": "vllm",
"pip": "vllm", "pip": "vllm",
"desc": "High-throughput LLM serving engine", "desc": "Great for high-throughput multi-GPU inference",
"category": "LLM", "category": "LLM",
"target": "remote", "target": "remote",
}, },
+76 -3
View File
@@ -3,11 +3,16 @@ import os
import time import time
import json import json
import asyncio import asyncio
import shutil
import uuid
from pathlib import Path
from fastapi import APIRouter, Request, File, UploadFile, HTTPException from fastapi import APIRouter, Request, File, UploadFile, HTTPException
from typing import List from typing import List
import logging import logging
from core.middleware import require_admin from core.middleware import require_admin
from core.database import SessionLocal, GalleryImage
from src.auth_helpers import effective_user from src.auth_helpers import effective_user
from src.constants import GENERATED_IMAGES_DIR
from src.upload_handler import count_recent_uploads from src.upload_handler import count_recent_uploads
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -50,6 +55,69 @@ def setup_upload_routes(upload_handler):
raise HTTPException(404, "File not found") raise HTTPException(404, "File not found")
raise HTTPException(404, "File not found") raise HTTPException(404, "File not found")
def _promote_chat_image_to_gallery(meta: dict, owner: str | None) -> str | None:
"""Make chat-uploaded images visible in Gallery without changing chat storage."""
is_image_file = getattr(upload_handler, "is_image_file", None)
if not callable(is_image_file):
return None
if not is_image_file(meta.get("name", ""), meta.get("mime", "")):
return None
source_path = meta.get("path")
if not source_path or not os.path.isfile(source_path):
return None
db = SessionLocal()
try:
file_hash = meta.get("hash")
if file_hash:
q = db.query(GalleryImage).filter(
GalleryImage.file_hash == file_hash,
GalleryImage.is_active == True, # noqa: E712
)
if owner:
q = q.filter(GalleryImage.owner == owner)
existing = q.first()
if existing:
return existing.id
image_dir = Path(GENERATED_IMAGES_DIR)
image_dir.mkdir(parents=True, exist_ok=True)
ext = Path(meta.get("name") or source_path).suffix.lower()
if ext not in {".png", ".jpg", ".jpeg", ".webp", ".gif"}:
mime_ext = {
"image/png": ".png",
"image/jpeg": ".jpg",
"image/jpg": ".jpg",
"image/webp": ".webp",
"image/gif": ".gif",
}.get(meta.get("mime", ""))
ext = mime_ext or ".png"
filename = f"{uuid.uuid4().hex[:12]}{ext}"
dest_path = image_dir / filename
shutil.copy2(source_path, dest_path)
image_id = str(uuid.uuid4())
db.add(GalleryImage(
id=image_id,
filename=filename,
prompt=meta.get("name") or "Chat upload",
model="chat-upload",
owner=owner,
file_hash=file_hash,
width=meta.get("width"),
height=meta.get("height"),
file_size=meta.get("size"),
))
db.commit()
return image_id
except Exception as e:
db.rollback()
logger.warning("Failed to add chat image upload to gallery: %s", e)
return None
finally:
db.close()
@router.post("") @router.post("")
async def api_upload(request: Request, files: List[UploadFile] = File(...)): async def api_upload(request: Request, files: List[UploadFile] = File(...)):
@@ -78,8 +146,10 @@ def setup_upload_routes(upload_handler):
for u in files: for u in files:
try: try:
meta = upload_handler.save_upload(u, client_ip, owner=effective_user(request)) owner = effective_user(request)
out.append({ meta = upload_handler.save_upload(u, client_ip, owner=owner)
gallery_id = _promote_chat_image_to_gallery(meta, owner)
item = {
"id": meta["id"], "id": meta["id"],
"name": meta["name"], "name": meta["name"],
"mime": meta["mime"], "mime": meta["mime"],
@@ -89,7 +159,10 @@ def setup_upload_routes(upload_handler):
"width": meta.get("width"), "width": meta.get("width"),
"height": meta.get("height"), "height": meta.get("height"),
"is_duplicate": meta.get("is_duplicate", False) "is_duplicate": meta.get("is_duplicate", False)
}) }
if gallery_id:
item["gallery_id"] = gallery_id
out.append(item)
except HTTPException: except HTTPException:
raise raise
except Exception as e: except Exception as e:
+4 -1
View File
@@ -907,7 +907,10 @@ def _anthropic_rejects_temperature(model: str) -> bool:
return (int(match.group(1)), int(match.group(2))) >= (4, 7) return (int(match.group(1)), int(match.group(2))) >= (4, 7)
# Models that support structured thinking — may output </think> without opening tag # Models that support structured thinking — may output </think> without opening tag
_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma") _THINKING_MODEL_PATTERNS = (
"qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax",
"m2-reap", "gemma", "stepfun", "step-3", "step3",
)
def _supports_thinking(model: str) -> bool: def _supports_thinking(model: str) -> bool:
"""Check if model supports structured thinking output.""" """Check if model supports structured thinking output."""
+124 -2
View File
@@ -39,6 +39,10 @@ _XML_TOOL_CALL_RE = re.compile(
r"<(?:[\w]+:)?(?:tool_call|function_call)>\s*([\s\S]*?)</(?:[\w]+:)?(?:tool_call|function_call)>", r"<(?:[\w]+:)?(?:tool_call|function_call)>\s*([\s\S]*?)</(?:[\w]+:)?(?:tool_call|function_call)>",
re.IGNORECASE, re.IGNORECASE,
) )
_XML_OPEN_TOOL_CALL_RE = re.compile(
r"<(?:[\w]+:)?(?:tool_call|function_call)>\s*([\s\S]*)\Z",
re.IGNORECASE,
)
_XML_INVOKE_RE = re.compile( _XML_INVOKE_RE = re.compile(
r'<invoke\s+name=["\'](\w+)["\']>\s*([\s\S]*?)</invoke>', r'<invoke\s+name=["\'](\w+)["\']>\s*([\s\S]*?)</invoke>',
re.IGNORECASE, re.IGNORECASE,
@@ -47,6 +51,24 @@ _XML_PARAM_RE = re.compile(
r'<parameter\s+name=["\'](\w+)["\']>([\s\S]*?)</parameter>', r'<parameter\s+name=["\'](\w+)["\']>([\s\S]*?)</parameter>',
re.IGNORECASE, re.IGNORECASE,
) )
_XML_DIRECT_TOOL_RE = re.compile(
r"<\s*([A-Za-z_][\w-]*)\s*>([\s\S]*?)</\s*\1\s*>",
re.IGNORECASE,
)
# Pattern 3b: StepFun Step-3.x native tool-call tokens. The tokenizer defines:
# <tool▁calls▁begin> ... <tool▁calls▁end>
# <tool▁call▁begin>tool_name<tool▁sep>{...}<tool▁call▁end>
# These can leak as text through llama.cpp/Ollama-style endpoints when the
# engine does not return structured OpenAI tool_calls.
_STEPFUN_TOOL_CALL_RE = re.compile(
r"<tool▁call▁begin>\s*([A-Za-z_][\w.-]*)\s*<tool▁sep>\s*([\s\S]*?)\s*<tool▁call▁end>",
re.IGNORECASE,
)
_STEPFUN_TOOL_CALLS_WRAPPER_RE = re.compile(
r"</?tool▁calls▁(?:begin|end)>",
re.IGNORECASE,
)
# Pattern 4: <tool_code> blocks (MiniMax-M2.5 style) # Pattern 4: <tool_code> blocks (MiniMax-M2.5 style)
# {tool => 'tool_name', args => '<param>value</param>'} # {tool => 'tool_name', args => '<param>value</param>'}
@@ -446,6 +468,76 @@ def _parse_xml_invoke(inv_match) -> Optional[ToolBlock]:
return function_call_to_tool_block(tool_name, json.dumps(params)) return function_call_to_tool_block(tool_name, json.dumps(params))
def _parse_xml_direct_tool(tool_match) -> Optional[ToolBlock]:
"""Parse direct XML tool tags inside <tool_call>.
Some local models emit:
<tool_call><web_search>query</web_search></tool_call>
instead of the invoke/parameter shape:
<tool_call><invoke name="web_search"><parameter name="query">query</parameter></invoke></tool_call>
Keep this as an adapter to the canonical function-call converter so aliases
and per-tool argument formatting stay in one place.
"""
tool_name = tool_match.group(1).lower().replace("-", "_")
if tool_name in {"invoke", "parameter", "tool_call", "function_call"}:
return None
mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None)
if not mapped:
return None
body = tool_match.group(2).strip()
if not body:
return None
try:
params = json.loads(body)
if not isinstance(params, dict):
params = {}
except json.JSONDecodeError:
if mapped == "web_search":
params = {"query": body}
elif mapped == "web_fetch":
params = {"url": body}
elif mapped == "bash":
params = {"command": body}
elif mapped == "python":
params = {"code": body}
elif mapped in ("read_file", "write_file"):
params = {"path": body}
else:
params = {"content": body}
from src.tool_schemas import function_call_to_tool_block
return function_call_to_tool_block(mapped, json.dumps(params))
def _parse_stepfun_tool_call(call_match) -> Optional[ToolBlock]:
"""Parse StepFun native tool-call tokens into an Odysseus ToolBlock."""
tool_name = call_match.group(1).lower().replace("-", "_").replace(".", "_")
mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None)
if not mapped:
return None
body = call_match.group(2).strip()
if not body:
return None
try:
params = json.loads(body)
if not isinstance(params, dict):
params = {}
except json.JSONDecodeError:
if mapped == "web_search":
params = {"query": body}
elif mapped == "web_fetch":
params = {"url": body}
elif mapped == "bash":
params = {"command": body}
elif mapped == "python":
params = {"code": body}
elif mapped in ("read_file", "write_file"):
params = {"path": body}
else:
params = {"content": body}
from src.tool_schemas import function_call_to_tool_block
return function_call_to_tool_block(mapped, json.dumps(params))
def _parse_tool_code_block(raw: str) -> Optional[ToolBlock]: def _parse_tool_code_block(raw: str) -> Optional[ToolBlock]:
"""Parse a <tool_code>{tool => 'name', args => '...'}</tool_code> block (MiniMax style).""" """Parse a <tool_code>{tool => 'name', args => '...'}</tool_code> block (MiniMax style)."""
# Extract tool name # Extract tool name
@@ -511,8 +603,9 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
2. [TOOL_CALL] ... [/TOOL_CALL] blocks (some models) 2. [TOOL_CALL] ... [/TOOL_CALL] blocks (some models)
3. XML-style <tool_call>/<invoke> blocks 3. XML-style <tool_call>/<invoke> blocks
4. <tool_code> blocks (MiniMax-M2.5 style) 4. <tool_code> blocks (MiniMax-M2.5 style)
5. DeepSeek DSML markup (normalized to <invoke> first) 5. StepFun Step-3 native <tool▁call▁begin> tokens
6. Non-native local model fallback: prose mentioning web_search followed by 6. DeepSeek DSML markup (normalized to <invoke> first)
7. Non-native local model fallback: prose mentioning web_search followed by
bare JSON args, e.g. {"query":"...", "time_filter":"week"} bare JSON args, e.g. {"query":"...", "time_filter":"week"}
`skip_fenced`: when True, Pattern 1 (fenced ```bash/```python/```json code `skip_fenced`: when True, Pattern 1 (fenced ```bash/```python/```json code
@@ -567,12 +660,38 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
# Pattern 3: XML-style <tool_call>/<invoke> blocks # Pattern 3: XML-style <tool_call>/<invoke> blocks
if not blocks: if not blocks:
for step_call in _STEPFUN_TOOL_CALL_RE.finditer(text):
block = _parse_stepfun_tool_call(step_call)
if block:
blocks.append(block)
if blocks:
return blocks
# Try wrapped: <tool_call><invoke ...>...</invoke></tool_call> # Try wrapped: <tool_call><invoke ...>...</invoke></tool_call>
for m in _XML_TOOL_CALL_RE.finditer(text): for m in _XML_TOOL_CALL_RE.finditer(text):
for inv in _XML_INVOKE_RE.finditer(m.group(1)): for inv in _XML_INVOKE_RE.finditer(m.group(1)):
block = _parse_xml_invoke(inv) block = _parse_xml_invoke(inv)
if block: if block:
blocks.append(block) blocks.append(block)
if not blocks:
for direct in _XML_DIRECT_TOOL_RE.finditer(m.group(1)):
block = _parse_xml_direct_tool(direct)
if block:
blocks.append(block)
# Some local models stream an opening <tool_call> wrapper and a
# complete inner tool tag, but forget the closing </tool_call>.
if not blocks:
for m in _XML_OPEN_TOOL_CALL_RE.finditer(text):
body = m.group(1)
for inv in _XML_INVOKE_RE.finditer(body):
block = _parse_xml_invoke(inv)
if block:
blocks.append(block)
if blocks:
break
for direct in _XML_DIRECT_TOOL_RE.finditer(body):
block = _parse_xml_direct_tool(direct)
if block:
blocks.append(block)
# Try bare <invoke> without wrapper # Try bare <invoke> without wrapper
if not blocks: if not blocks:
for inv in _XML_INVOKE_RE.finditer(text): for inv in _XML_INVOKE_RE.finditer(text):
@@ -614,7 +733,10 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str:
text = _normalize_dsml(text) text = _normalize_dsml(text)
cleaned = text if skip_fenced else _TOOL_BLOCK_RE.sub('', text) cleaned = text if skip_fenced else _TOOL_BLOCK_RE.sub('', text)
cleaned = _TOOL_CALL_RE.sub('', cleaned) cleaned = _TOOL_CALL_RE.sub('', cleaned)
cleaned = _STEPFUN_TOOL_CALL_RE.sub('', cleaned)
cleaned = _STEPFUN_TOOL_CALLS_WRAPPER_RE.sub('', cleaned)
cleaned = _XML_TOOL_CALL_RE.sub('', cleaned) cleaned = _XML_TOOL_CALL_RE.sub('', cleaned)
cleaned = _XML_OPEN_TOOL_CALL_RE.sub('', cleaned)
cleaned = _TOOL_CODE_RE.sub('', cleaned) cleaned = _TOOL_CODE_RE.sub('', cleaned)
if not skip_fenced: if not skip_fenced:
raw_web_json = _parse_raw_web_json_lookup(cleaned) raw_web_json = _parse_raw_web_json_lookup(cleaned)
+93 -19
View File
@@ -230,14 +230,30 @@ export function _isMetal() {
} }
/** Detect model-specific vLLM optimizations */ /** Detect model-specific vLLM optimizations */
function _isStepFunStepModel(modelName) {
const n = (modelName || '').toLowerCase();
return n.includes('stepfun')
|| n.includes('step-3')
|| n.includes('step3')
|| n.includes('step_3');
}
function _detectModelOptimizations(modelName) { function _detectModelOptimizations(modelName) {
const n = (modelName || '').toLowerCase(); const n = (modelName || '').toLowerCase();
const opts = { envVars: [], flags: [], tips: [] }; const opts = { envVars: [], flags: [], tips: [] };
// StepFun Step-3.x MoE models. Their tokenizer defines the Step tool-call
// and thinking tags; vLLM/SGLang need the step3p5 parser instead of generic
// Hermes/XML guesses, and the MoE backend should default to expert parallel.
if (_isStepFunStepModel(modelName)) {
opts.flags.push('--enable-expert-parallel');
opts.tips.push('StepFun Step-3 MoE: expert parallel');
opts.tips.push('StepFun parser: step3p5 for native tool calls and reasoning tags');
}
// Qwen3.5 MoE models — MoE-specific env vars + expert-parallel. // Qwen3.5 MoE models — MoE-specific env vars + expert-parallel.
// The --reasoning-parser flag is added uniformly below via // The --reasoning-parser flag is added uniformly below via
// _detectReasoningParser, no longer hardcoded here. // _detectReasoningParser, no longer hardcoded here.
if (n.includes('qwen3.5') || n.includes('qwen3-') && (n.includes('a10b') || n.includes('a22b') || n.includes('a3b'))) { else if (n.includes('qwen3.5') || n.includes('qwen3-') && (n.includes('a10b') || n.includes('a22b') || n.includes('a3b'))) {
opts.envVars.push('VLLM_USE_DEEP_GEMM=0', 'VLLM_USE_FLASHINFER_MOE_FP16=1', 'VLLM_USE_FLASHINFER_SAMPLER=0', 'OMP_NUM_THREADS=4'); opts.envVars.push('VLLM_USE_DEEP_GEMM=0', 'VLLM_USE_FLASHINFER_MOE_FP16=1', 'VLLM_USE_FLASHINFER_SAMPLER=0', 'OMP_NUM_THREADS=4');
opts.flags.push('--enable-expert-parallel'); opts.flags.push('--enable-expert-parallel');
opts.tips.push('MoE optimizations: expert parallel + flashinfer MoE kernels'); opts.tips.push('MoE optimizations: expert parallel + flashinfer MoE kernels');
@@ -312,6 +328,9 @@ function _detectModelOptimizations(modelName) {
*/ */
export function _detectReasoningParser(modelName) { export function _detectReasoningParser(modelName) {
const n = (modelName || '').toLowerCase(); const n = (modelName || '').toLowerCase();
// StepFun Step-3.x uses Step's native <think> / tool-call tokens. vLLM
// registers this parser as step3p5.
if (_isStepFunStepModel(modelName)) return 'step3p5';
// MiniMax M3 — newer vLLM nightly/parser builds use minimax_m3. This must // MiniMax M3 — newer vLLM nightly/parser builds use minimax_m3. This must
// be checked before the M2.x rule and before the generic MiniMax tool parser. // be checked before the M2.x rule and before the generic MiniMax tool parser.
if (n.includes('minimax') && /\bm3\b/.test(n)) return 'minimax_m3'; if (n.includes('minimax') && /\bm3\b/.test(n)) return 'minimax_m3';
@@ -348,6 +367,7 @@ export function _detectReasoningParser(modelName) {
*/ */
export function _detectToolParser(modelName) { export function _detectToolParser(modelName) {
const n = (modelName || '').toLowerCase(); const n = (modelName || '').toLowerCase();
if (_isStepFunStepModel(modelName)) return 'step3p5';
if (n.includes('qwen3') && n.includes('coder')) return 'qwen3_coder'; if (n.includes('qwen3') && n.includes('coder')) return 'qwen3_coder';
if (n.includes('qwen3')) return 'qwen3_xml'; if (n.includes('qwen3')) return 'qwen3_xml';
if (n.includes('qwen')) return 'hermes'; // Qwen2.5 / Qwen2 / Qwen1.5 if (n.includes('qwen')) return 'hermes'; // Qwen2.5 / Qwen2 / Qwen1.5
@@ -601,6 +621,13 @@ export function _buildServeCmd(f, modelName, backend) {
if (f.dtype && f.dtype !== 'auto') cmd += ` --dtype ${f.dtype}`; if (f.dtype && f.dtype !== 'auto') cmd += ` --dtype ${f.dtype}`;
if (f.max_seqs && f.max_seqs.toString().trim()) cmd += ` --max-running-requests ${f.max_seqs.toString().trim()}`; if (f.max_seqs && f.max_seqs.toString().trim()) cmd += ` --max-running-requests ${f.max_seqs.toString().trim()}`;
if (f.trust_remote) cmd += ' --trust-remote-code'; if (f.trust_remote) cmd += ' --trust-remote-code';
if (f.auto_tool) cmd += ` --enable-auto-tool-choice --tool-call-parser ${_detectToolParser(modelName)}`;
if (f.expert_parallel) cmd += ' --enable-expert-parallel';
if (f.reasoning_parser) {
const rp = typeof f.reasoning_parser === 'string' && f.reasoning_parser !== 'true'
? f.reasoning_parser : (f._reasoning_parser_value || _detectReasoningParser(modelName) || '');
if (rp) cmd += ` --reasoning-parser ${rp}`;
}
if (!f.prefix_cache) cmd += ' --disable-radix-cache'; if (!f.prefix_cache) cmd += ' --disable-radix-cache';
if (f.enforce_eager) cmd += ' --disable-cuda-graph'; if (f.enforce_eager) cmd += ' --disable-cuda-graph';
} else if (backend === 'llamacpp') { } else if (backend === 'llamacpp') {
@@ -909,10 +936,10 @@ async function _fetchDependencies() {
// matches the engine you're configuring. Unknown packages get no // matches the engine you're configuring. Unknown packages get no
// icon (the name alone is fine for librosa, hf_transfer, etc.). // icon (the name alone is fine for librosa, hf_transfer, etc.).
const _DEP_GLYPHS = { const _DEP_GLYPHS = {
vllm: '<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><path d="M3 4l7 16 7-16"/><path d="M14 4l4 9 3-9"/></svg>', vllm: '<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><path d="M3 4l7 16 7-16"/><path d="M14 4l4 9 3-9"/></svg>',
sglang: '<svg width="13" height="13" viewBox="0 0 24 24" fill="currentColor" stroke="none" aria-hidden="true"><polygon points="13 2 3 14 12 14 11 22 21 10 12 10 13 2"/></svg>', sglang: '<span aria-hidden="true" style="display:block;width:13px;height:13px;background:currentColor;-webkit-mask:url(/static/icons/sglang-mark.png) center/contain no-repeat;mask:url(/static/icons/sglang-mark.png) center/contain no-repeat;"></span>',
llama_cpp: '<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><circle cx="12" cy="12" r="9"/><path d="M8 12h8M12 8v8"/></svg>', llama_cpp: '<svg width="13" height="13" viewBox="0 0 600 600" fill="none" aria-hidden="true"><path d="M600 392L504.249 558L504.137 557.929C487.252 584.069 458.193 600 426.864 600H120L240 392H600Z" fill="currentColor"/><path d="M240 392H0L199.602 46.0254C216.032 17.5463 246.411 0 279.29 0H466.154L240 392Z" fill="currentColor"/></svg>',
ollama: '<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><path d="M6 10a6 6 0 0 1 12 0v4a4 4 0 0 1-8 0v-1"/><circle cx="10" cy="9" r="1"/><circle cx="14" cy="9" r="1"/></svg>', ollama: '<img src="/static/icons/ollama-mark-crop.png" alt="" aria-hidden="true" width="13" height="13" style="display:block;width:13px;height:13px;object-fit:contain;" />',
diffusers: '<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><circle cx="12" cy="12" r="4"/><path d="M12 2v3M12 19v3M2 12h3M19 12h3M5 5l2 2M17 17l2 2M5 19l2-2M17 7l2-2"/></svg>', diffusers: '<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><circle cx="12" cy="12" r="4"/><path d="M12 2v3M12 19v3M2 12h3M19 12h3M5 5l2 2M17 17l2 2M5 19l2-2M17 7l2-2"/></svg>',
}; };
const _depGlyphHtml = (name) => { const _depGlyphHtml = (name) => {
@@ -2138,7 +2165,10 @@ function _wireTabEvents(body) {
} }
} }
const shortName = repo.split('/').pop(); const shortName = repo.split('/').pop();
_retryDownload(shortName, payload); const displayName = payload.include
? `${shortName} · ${_ggufQuantFromPath(String(payload.include).replace(/\*/g, '')) || String(payload.include).replace(/\*/g, '').replace(/\.gguf$/i, '')}`
: shortName;
_retryDownload(displayName, payload);
dlInput.value = ''; dlInput.value = '';
}; };
dlBtn.addEventListener('click', triggerDownload); dlBtn.addEventListener('click', triggerDownload);
@@ -2179,18 +2209,13 @@ function _wireTabEvents(body) {
const folded = dlFoldBody.classList.contains('is-folded'); const folded = dlFoldBody.classList.contains('is-folded');
_setFolded(!folded); _setFolded(!folded);
}); });
// Auto-fold on any downward scroll inside the cookbook modal, // Auto-fold on any downward scroll inside the cookbook modal. Do not
// and auto-expand when the user scrolls all the way back to the // auto-expand on upward/top scroll — once the user collapses Download,
// top of whichever scroller they're in. The chevron ▸ still // it should stay collapsed until the header is clicked again.
// toggles manually.
const _maybeFold = () => { const _maybeFold = () => {
if (dlFoldBody.classList.contains('is-folded')) return; if (dlFoldBody.classList.contains('is-folded')) return;
_setFolded(true, /* persist */ false); _setFolded(true, /* persist */ false);
}; };
const _maybeExpand = () => {
if (!dlFoldBody.classList.contains('is-folded')) return;
_setFolded(false, /* persist */ false);
};
// Capture phase so scrolls on nested scrollers (.hwfit-list, // Capture phase so scrolls on nested scrollers (.hwfit-list,
// .cookbook-body, .modal-content) all hit us. // .cookbook-body, .modal-content) all hit us.
const _modal = dlFold.closest('#cookbook-modal') || document; const _modal = dlFold.closest('#cookbook-modal') || document;
@@ -2205,7 +2230,6 @@ function _wireTabEvents(body) {
const y = tgt.scrollTop; const y = tgt.scrollTop;
const prev = _lastY.get(tgt) || 0; const prev = _lastY.get(tgt) || 0;
if (y > prev) _maybeFold(); if (y > prev) _maybeFold();
else if (y <= 0) _maybeExpand();
_lastY.set(tgt, y); _lastY.set(tgt, y);
}, true); }, true);
} }
@@ -2621,10 +2645,10 @@ function _renderRecipes() {
html += `<input type="text" class="cookbook-dl-repo" id="cookbook-dl-repo" placeholder="org/model-name, qwen2.5:14b, or HF URL" style="flex:1;min-width:0;" />`; html += `<input type="text" class="cookbook-dl-repo" id="cookbook-dl-repo" placeholder="org/model-name, qwen2.5:14b, or HF URL" style="flex:1;min-width:0;" />`;
html += `<button class="cookbook-btn cookbook-dl-btn" id="cookbook-dl-btn">Download</button>`; html += `<button class="cookbook-btn cookbook-dl-btn" id="cookbook-dl-btn">Download</button>`;
html += `</div>`; html += `</div>`;
html += `<div id="cookbook-dl-gguf-row" style="display:none;margin-top:1px;gap:5px;align-items:center;font-size:11px;">`; html += `<div id="cookbook-dl-gguf-row" class="cookbook-dl-gguf-row" style="display:none;">`;
html += `<span style="opacity:0.65;flex-shrink:0;">GGUF</span>`; html += `<span class="cookbook-dl-gguf-label">GGUF</span>`;
html += `<select class="cookbook-field-input" id="cookbook-dl-gguf-quant" style="height:28px;min-width:118px;flex:0 0 auto;"></select>`; html += `<select class="cookbook-field-input" id="cookbook-dl-gguf-quant"></select>`;
html += `<span id="cookbook-dl-gguf-note" style="opacity:0.55;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;"></span>`; html += `<span id="cookbook-dl-gguf-note"></span>`;
html += `</div>`; html += `</div>`;
// Ollama-library browse used to live here as its own collapsible dropdown, // Ollama-library browse used to live here as its own collapsible dropdown,
// but that duplicated the Engine filter (which already has Ollama). The // but that duplicated the Engine filter (which already has Ollama). The
@@ -3047,6 +3071,56 @@ export function isVisible() {
return !modal.classList.contains('hidden'); return !modal.classList.contains('hidden');
} }
let _sharedSyncInFlight = false;
let _sharedSyncLast = 0;
async function _refreshSharedCookbookState(reason = '') {
if (!isVisible() || _sharedSyncInFlight) return;
const now = Date.now();
if (now - _sharedSyncLast < 1500) return;
_sharedSyncInFlight = true;
_sharedSyncLast = now;
try {
const ok = await _syncFromServer();
if (!ok) return;
try { Object.assign(_envState, _readStoredEnvState()); } catch {}
const modal = document.getElementById('cookbook-modal');
const activeTab = modal?.querySelector('.cookbook-tab.active')?.dataset?.backend || '';
if (activeTab === 'Running') {
_renderRunningTab();
} else if (activeTab === 'Settings') {
const active = document.activeElement;
const editingSettings = active && active.closest && active.closest('.cookbook-settings-stack');
if (!editingSettings) {
_renderRecipes();
const tab = document.querySelector('#cookbook-modal .cookbook-tab[data-backend="Settings"]');
if (tab) tab.click();
}
}
} catch (e) {
console.warn('[cookbook] shared state refresh failed', reason, e);
} finally {
_sharedSyncInFlight = false;
}
}
document.addEventListener('cookbook:state-synced', () => {
try { Object.assign(_envState, _readStoredEnvState()); } catch {}
if (isVisible()) {
const activeTab = document.querySelector('#cookbook-modal .cookbook-tab.active')?.dataset?.backend || '';
if (activeTab === 'Running') _renderRunningTab();
}
});
window.addEventListener('focus', () => { _refreshSharedCookbookState('focus'); });
document.addEventListener('visibilitychange', () => {
if (document.visibilityState === 'visible') _refreshSharedCookbookState('visible');
});
setInterval(() => {
if (!isVisible()) return;
const activeTab = document.querySelector('#cookbook-modal .cookbook-tab.active')?.dataset?.backend || '';
if (activeTab === 'Running') _refreshSharedCookbookState('active-poll');
}, 5000);
// Close button // Close button
document.addEventListener('DOMContentLoaded', () => { document.addEventListener('DOMContentLoaded', () => {
const closeBtn = document.getElementById('close-cookbook-modal'); const closeBtn = document.getElementById('close-cookbook-modal');
+20 -3
View File
@@ -85,6 +85,22 @@ function _ggufIncludePattern(model, source) {
return '*.gguf'; return '*.gguf';
} }
function _ggufDisplayPartFromInclude(include) {
const clean = String(include || '').replace(/\*/g, '');
const parts = clean.split('/').filter(Boolean);
const file = parts[parts.length - 1] || clean;
const dir = parts.length > 1 ? parts[parts.length - 2] : '';
const quant = `${dir} ${file}`.match(/\b(?:UD-)?(?:IQ[1-8]_[A-Z0-9]+|Q[2-8]_K_[MLS]|Q[2-8]_[0-9A-Z]+|Q[2-8])\b/i);
if (quant) return quant[0].toUpperCase().replace(/^UD-/, '');
return file.replace(/\.gguf$/i, '').replace(/-\d{5}-of-\d{5}$/i, '');
}
function _downloadTaskName(shortName, payload) {
const include = payload?.include || '';
const part = include ? _ggufDisplayPartFromInclude(include) : '';
return part ? `${shortName} · ${part}` : shortName;
}
function _missingGgufMessage(model) { function _missingGgufMessage(model) {
const name = model?.name || 'this model'; const name = model?.name || 'this model';
if (/\bnvfp4\b/i.test(name)) { if (/\bnvfp4\b/i.test(name)) {
@@ -519,6 +535,7 @@ export async function _runModelDownload(panel, model, backend, hostOverride) {
} }
const shortName = (model.name || repo).split('/').pop(); const shortName = (model.name || repo).split('/').pop();
const taskName = _downloadTaskName(shortName, payload);
const targetHost = host || 'local'; const targetHost = host || 'local';
const tasks = _loadTasks(); const tasks = _loadTasks();
@@ -576,7 +593,7 @@ export async function _runModelDownload(panel, model, backend, hostOverride) {
if (activeOnHost) { if (activeOnHost) {
const queueId = `queue-${Date.now().toString(36)}`; const queueId = `queue-${Date.now().toString(36)}`;
const allTasks = _loadTasks(); const allTasks = _loadTasks();
allTasks.push({ id: queueId, sessionId: queueId, name: shortName, type: 'download', status: 'queued', output: '', ts: Date.now(), payload, remoteHost: host }); allTasks.push({ id: queueId, sessionId: queueId, name: taskName, type: 'download', status: 'queued', output: '', ts: Date.now(), payload, remoteHost: host });
_saveTasks(allTasks); _saveTasks(allTasks);
_renderRunningTab(); _renderRunningTab();
uiModule.showToast(`Queued ${shortName} — waiting for current download`); uiModule.showToast(`Queued ${shortName} — waiting for current download`);
@@ -601,8 +618,8 @@ export async function _runModelDownload(panel, model, backend, hostOverride) {
uiModule.showToast('Download failed: ' + (data.error || ''), 9000); uiModule.showToast('Download failed: ' + (data.error || ''), 9000);
return; return;
} }
_addTask(data.session_id, shortName, 'download', payload); _addTask(data.session_id, taskName, 'download', payload);
uiModule.showToast(`Downloading ${shortName}...`); uiModule.showToast(`Downloading ${taskName}...`);
} catch (e) { } catch (e) {
uiModule.showToast('Download failed: ' + e.message, 9000); uiModule.showToast('Download failed: ' + e.message, 9000);
} }
+144 -29
View File
@@ -38,6 +38,47 @@ function _taskBadge(task) {
return { text: _statusLabel(task.status, task.type), cls: 'cookbook-task-' + task.status }; return { text: _statusLabel(task.status, task.type), cls: 'cookbook-task-' + task.status };
} }
function _ggufDisplayPartFromPath(path) {
const parts = String(path || '').split('/').filter(Boolean);
const file = parts[parts.length - 1] || '';
const dir = parts.length > 1 ? parts[parts.length - 2] : '';
const text = `${dir} ${file}`;
const quant = text.match(/\b(?:UD-)?(?:IQ[1-8]_[A-Z0-9]+|Q[2-8]_K_[MLS]|Q[2-8]_[0-9A-Z]+|Q[2-8])\b/i);
if (quant) return quant[0].toUpperCase().replace(/^UD-/, '');
return file.replace(/\.gguf$/i, '').replace(/-\d{5}-of-\d{5}$/i, '');
}
function _downloadDisplayName(name, task) {
const include = task?.payload?.include || '';
if (!include || String(name || '').includes(' · ')) return name;
const part = _ggufDisplayPartFromPath(include.replace(/\*/g, ''));
return part ? `${name} · ${part}` : name;
}
function _taskDisplayName(task) {
const name = String(task?.name || '').trim();
if (task?.type === 'download') return _downloadDisplayName(name, task);
if (task?.type !== 'serve') return name;
const gguf = task?.payload?._fields?.gguf_file || task?.payload?.gguf_file || '';
if (!gguf || name.includes(' · ')) return name;
const part = _ggufDisplayPartFromPath(gguf);
return part ? `${name} · ${part}` : name;
}
function _canLaunchDownloadedTask(task) {
return task?.type === 'download' && ['done', 'completed'].includes(task.status || '') && !!(task.payload?.repo_id || task.name);
}
function _downloadServeFields(task) {
const include = String(task?.payload?.include || '').trim();
if (!include) return null;
return {
backend: 'llamacpp',
_forceBackend: true,
_preferredGgufInclude: include,
};
}
// A download task whose tmux output still shows an active per-shard line // A download task whose tmux output still shows an active per-shard line
// (e.g. "model-00012-of-00082.safetensors: 56%|") is NOT actually finished — // (e.g. "model-00012-of-00082.safetensors: 56%|") is NOT actually finished —
// the cookbook just lost track. The clear pill becomes a "reconnect" affordance // the cookbook just lost track. The clear pill becomes a "reconnect" affordance
@@ -282,6 +323,40 @@ let _detectToolParser;
let _detectModelOptimizations; let _detectModelOptimizations;
let _buildServeCmd; let _buildServeCmd;
function _taskServerSelection(task) {
const host = task?.remoteHost || task?.payload?.remote_host || '';
const savedKey = task?.remoteServerKey || task?.payload?.remote_server_key || '';
const server = (savedKey ? _serverByVal(savedKey) : null)
|| (host ? _serverByVal(host) : null)
|| (host ? _envState.servers.find(s => s.host === host) : null)
|| null;
const key = server ? (_serverKey ? _serverKey(server) : savedKey) : (savedKey || (host || 'local'));
return { host, server, key };
}
function _selectTaskServer(task) {
const { host, server, key } = _taskServerSelection(task);
_envState.remoteHost = host;
_envState.remoteServerKey = key === 'local' ? '' : key;
if (server) {
_envState.env = server.env || 'none';
_envState.envPath = server.envPath || '';
_envState.platform = server.platform || '';
} else if (!host) {
_envState.env = 'none';
_envState.envPath = '';
_envState.platform = '';
}
document.querySelectorAll('#hwfit-server-select, #hwfit-dl-server, #hwfit-cache-server, #hwfit-deps-server').forEach(sel => {
if (!sel || sel.tagName !== 'SELECT') return;
const wanted = key || (host || 'local');
if ([...sel.options].some(o => o.value === wanted)) sel.value = wanted;
else if (host && [...sel.options].some(o => o.value === host)) sel.value = host;
else sel.value = host ? wanted : 'local';
});
return { host, server, key };
}
// When a new action is started (download / dependency / serve), this holds the // When a new action is started (download / dependency / serve), this holds the
// new task's id so the next render collapses every other card and leaves only // new task's id so the next render collapses every other card and leaves only
// the new one open. Consumed (cleared) by _renderRunningTab. // the new one open. Consumed (cleared) by _renderRunningTab.
@@ -654,16 +729,31 @@ function _loadPrunedTasks() {
const _REMOVED_KEY = 'cookbook-removed-tasks'; const _REMOVED_KEY = 'cookbook-removed-tasks';
const _TOMBSTONE_TTL_MS = 24 * 3600 * 1000; const _TOMBSTONE_TTL_MS = 24 * 3600 * 1000;
function _loadTombstones() { function _loadTombstones() {
try { return JSON.parse(localStorage.getItem(_REMOVED_KEY)) || {}; } try {
const tomb = JSON.parse(localStorage.getItem(_REMOVED_KEY)) || {};
const now = Date.now();
let changed = false;
for (const k in tomb) {
if (now - tomb[k] > _TOMBSTONE_TTL_MS) {
delete tomb[k];
changed = true;
}
}
if (changed) localStorage.setItem(_REMOVED_KEY, JSON.stringify(tomb));
return tomb;
}
catch { return {}; } catch { return {}; }
} }
function _saveTombstones(tomb) {
localStorage.setItem(_REMOVED_KEY, JSON.stringify(tomb || {}));
}
function _tombstoneTask(id) { function _tombstoneTask(id) {
if (!id) return; if (!id) return;
const tomb = _loadTombstones(); const tomb = _loadTombstones();
const now = Date.now(); const now = Date.now();
tomb[id] = now; tomb[id] = now;
for (const k in tomb) { if (now - tomb[k] > _TOMBSTONE_TTL_MS) delete tomb[k]; } for (const k in tomb) { if (now - tomb[k] > _TOMBSTONE_TTL_MS) delete tomb[k]; }
localStorage.setItem(_REMOVED_KEY, JSON.stringify(tomb)); _saveTombstones(tomb);
} }
function _isTombstoned(id) { function _isTombstoned(id) {
const ts = _loadTombstones()[id]; const ts = _loadTombstones()[id];
@@ -1098,6 +1188,7 @@ function _syncToServer() {
if (!_envState || !Array.isArray(_envState.servers) || _envState.servers.length === 0) return; if (!_envState || !Array.isArray(_envState.servers) || _envState.servers.length === 0) return;
const state = { const state = {
tasks: _loadTasks(), tasks: _loadTasks(),
removedTasks: _loadTombstones(),
presets: _loadPresets(), presets: _loadPresets(),
env: _envState, env: _envState,
serveState: null, serveState: null,
@@ -1146,9 +1237,16 @@ export async function _syncFromServer() {
const localTasks = _loadTasks(); const localTasks = _loadTasks();
const serverTasks = state.tasks || []; const serverTasks = state.tasks || [];
const serverTombstones = (state.removedTasks && typeof state.removedTasks === 'object') ? state.removedTasks : {};
const localTombstones = _loadTombstones();
const mergedTombstones = { ...serverTombstones, ...localTombstones };
for (const [id, ts] of Object.entries(serverTombstones)) {
if (localTombstones[id] == null || Number(ts) > Number(localTombstones[id])) mergedTombstones[id] = ts;
}
_saveTombstones(mergedTombstones);
const localIds = new Set(localTasks.map(t => t.sessionId)); const localIds = new Set(localTasks.map(t => t.sessionId));
const merged = [...localTasks]; const merged = localTasks.filter(t => !_isTombstoned(t.sessionId));
for (const t of serverTasks) { for (const t of serverTasks) {
if (!localIds.has(t.sessionId) && !_isTombstoned(t.sessionId)) { if (!localIds.has(t.sessionId) && !_isTombstoned(t.sessionId)) {
merged.push(t); merged.push(t);
@@ -1165,6 +1263,18 @@ export async function _syncFromServer() {
const { remoteHost: _rh, env: _e, envPath: _ep, platform: _pf, ...settings } = state.env; const { remoteHost: _rh, env: _e, envPath: _ep, platform: _pf, ...settings } = state.env;
delete settings.hfToken; delete settings.hfToken;
Object.assign(_envState, settings); Object.assign(_envState, settings);
const selected = (_envState.remoteServerKey && _serverByVal?.(_envState.remoteServerKey))
|| (_envState.remoteHost ? (_envState.servers || []).find(s => s.host === _envState.remoteHost) : null);
if (selected) {
_envState.env = selected.env || 'none';
_envState.envPath = selected.envPath || '';
_envState.platform = selected.platform || '';
} else if (!_envState.remoteHost) {
const local = (_envState.servers || []).find(s => !s.host || s.host === 'local');
_envState.env = local?.env || 'none';
_envState.envPath = local?.envPath || '';
_envState.platform = local?.platform || '';
}
const { hfToken, ...safeState } = _envState; const { hfToken, ...safeState } = _envState;
localStorage.setItem('cookbook-last-state', JSON.stringify(safeState)); localStorage.setItem('cookbook-last-state', JSON.stringify(safeState));
} }
@@ -1174,6 +1284,7 @@ export async function _syncFromServer() {
if (state.serveState) { if (state.serveState) {
localStorage.setItem(SERVE_STATE_KEY, JSON.stringify(state.serveState)); localStorage.setItem(SERVE_STATE_KEY, JSON.stringify(state.serveState));
} }
document.dispatchEvent(new CustomEvent('cookbook:state-synced', { detail: state }));
return true; return true;
} catch { return false; } } catch { return false; }
} }
@@ -1332,17 +1443,11 @@ async function _openServeEditForTask(task, cmdOverride, fieldOverrides = null) {
if (fieldOverrides && typeof fieldOverrides === 'object') { if (fieldOverrides && typeof fieldOverrides === 'object') {
fields = { ...(fields || {}), ...fieldOverrides }; fields = { ...(fields || {}), ...fieldOverrides };
} }
// Switch the active server to the one this serve ran on (mirrors _openEdit). fields = { ...(fields || {}), _replaceTaskId: task.sessionId };
const _tHost = task.remoteHost || ''; // Switch the active server to the exact profile this serve ran on. The
_envState.remoteHost = _tHost; // dropdown stores stable srv: keys, not raw host strings, so preserving only
const _tSrv = _serverByVal(_envState.remoteServerKey || _tHost) // task.remoteHost can relaunch against the local container by accident.
|| _envState.servers.find(s => s.host === _tHost); _selectTaskServer(task);
if (_tSrv) { _envState.env = _tSrv.env || 'none'; _envState.envPath = _tSrv.envPath || ''; _envState.platform = _tSrv.platform || ''; }
else if (!_tHost) { _envState.env = 'none'; _envState.envPath = ''; _envState.platform = ''; }
document.querySelectorAll('#hwfit-server-select, #hwfit-dl-server, #hwfit-cache-server, #hwfit-deps-server').forEach(sel => {
if (!sel || sel.tagName !== 'SELECT') return;
sel.value = _tHost || 'local';
});
try { try {
const { openServePanelForRepo } = await import('./cookbookServe.js'); const { openServePanelForRepo } = await import('./cookbookServe.js');
await openServePanelForRepo(repo, fields); await openServePanelForRepo(repo, fields);
@@ -1553,6 +1658,20 @@ export async function _launchServeTask(shortName, repo, cmd, fields, hostOverrid
const _serverMetaKey = _targetKey || (_hsrv && _serverKey ? _serverKey(_hsrv) : '') || (_host || 'local'); const _serverMetaKey = _targetKey || (_hsrv && _serverKey ? _serverKey(_hsrv) : '') || (_host || 'local');
const _serverMetaName = targetMeta?.serverName || _hsrv.name || (_host ? _host : 'Local'); const _serverMetaName = targetMeta?.serverName || _hsrv.name || (_host ? _host : 'Local');
const _hplatform = _host ? (_hsrv.platform || '') : (_envState.platform || ''); const _hplatform = _host ? (_hsrv.platform || '') : (_envState.platform || '');
const _replaceTaskId = fields?._replaceTaskId || '';
if (_replaceTaskId) {
try {
const _old = _loadTasks().find(t => t.sessionId === _replaceTaskId);
if (_old && _old.type === 'serve') {
await fetch('/api/shell/exec', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ command: _tmuxGracefulKill(_old) }),
});
_removeTask(_old.sessionId);
}
} catch {}
}
// Replace any serve already targeting this same host:port — you can't run two // Replace any serve already targeting this same host:port — you can't run two
// servers on one port, so re-serving (or retrying) should stop & remove the // servers on one port, so re-serving (or retrying) should stop & remove the
@@ -1750,7 +1869,7 @@ export function _renderRunningTab() {
'<div style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;">' + '<div style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;">' +
'<h2 style="margin:0;padding:0;line-height:1;">Active <span id="running-count" class="memory-count" style="font-size:0.6em;opacity:0.6;font-weight:normal">' + activeCount + '</span></h2>' + '<h2 style="margin:0;padding:0;line-height:1;">Active <span id="running-count" class="memory-count" style="font-size:0.6em;opacity:0.6;font-weight:normal">' + activeCount + '</span></h2>' +
'</div>' + '</div>' +
'<p class="memory-desc doclib-desc" style="margin-top:6px;">Active downloads and serving processes.</p>' + '<p class="memory-desc doclib-desc" style="margin-top:6px;">Active downloads, installs and model launches.</p>' +
'</div>'; '</div>';
const firstGroup = body.querySelector('.cookbook-group'); const firstGroup = body.querySelector('.cookbook-group');
if (firstGroup) body.insertBefore(group, firstGroup); if (firstGroup) body.insertBefore(group, firstGroup);
@@ -1863,6 +1982,7 @@ export function _renderRunningTab() {
return; return;
} }
if (!await window.styledConfirm(`Clear ${toRemove.length} finished task${toRemove.length === 1 ? '' : 's'} on ${_serverName(host)}?`, { confirmText: 'Clear' })) return; if (!await window.styledConfirm(`Clear ${toRemove.length} finished task${toRemove.length === 1 ? '' : 's'} on ${_serverName(host)}?`, { confirmText: 'Clear' })) return;
toRemove.forEach(t => _tombstoneTask(t.sessionId));
const remaining = allTasks.filter(t => _taskServerKey(t) !== host || !_canClearTask(t)); const remaining = allTasks.filter(t => _taskServerKey(t) !== host || !_canClearTask(t));
_saveTasks(remaining); _saveTasks(remaining);
// Fade/slide each finished card out (same exit as the per-card clear) // Fade/slide each finished card out (same exit as the per-card clear)
@@ -2000,11 +2120,12 @@ export function _renderRunningTab() {
const _bdg = _taskBadge(task); const _bdg = _taskBadge(task);
const _bdgTitle = (task._unreachable && task.status === 'running') ? ' title="Server not responding — it may have crashed"' : ''; const _bdgTitle = (task._unreachable && task.status === 'running') ? ' title="Server not responding — it may have crashed"' : '';
const displayName = _taskDisplayName(task);
el.innerHTML = ` el.innerHTML = `
<div class="cookbook-task-header"> <div class="cookbook-task-header">
<span class="cookbook-task-type${(task.status === 'done' && task.type === 'download') ? ' cookbook-task-type-done' : ''}" data-type="${esc(task.type)}">${esc((task.status === 'done' && task.type === 'download') ? 'finished' : task.type)}</span> <span class="cookbook-task-type${(task.status === 'done' && task.type === 'download') ? ' cookbook-task-type-done' : ''}" data-type="${esc(task.type)}">${esc((task.status === 'done' && task.type === 'download') ? 'finished' : task.type)}</span>
<span class="cookbook-task-name">${modelLogo(task.name)}${esc(task.name)}</span> <span class="cookbook-task-name">${modelLogo(task.name)}${esc(displayName)}</span>
<span class="cookbook-task-indicator"><span class="cookbook-task-wave" style="display:${task.status === 'running' ? '' : 'none'}"></span><span class="cookbook-task-check" title="Clear" style="display:${_canClearTask(task) ? '' : 'none'}"><svg class="cookbook-task-check-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="#50fa7b" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg><svg class="cookbook-task-clear-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg><span class="cookbook-task-done-label">${esc(_clearPillLabel(task))}</span><span class="cookbook-task-clear-label">clear</span></span></span> <span class="cookbook-task-indicator"><span class="cookbook-task-wave" style="display:${task.status === 'running' ? '' : 'none'}"></span>${_canLaunchDownloadedTask(task) ? '<button type="button" class="cookbook-task-serve-btn" title="Open in Launch"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round"><polygon points="13 2 3 14 12 14 11 22 21 10 12 10 13 2"/></svg><span>Launch</span></button>' : ''}<span class="cookbook-task-check" title="Clear" style="display:${_canClearTask(task) ? '' : 'none'}"><svg class="cookbook-task-check-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="#50fa7b" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg><svg class="cookbook-task-clear-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg><span class="cookbook-task-done-label">${esc(_clearPillLabel(task))}</span><span class="cookbook-task-clear-label">clear</span></span></span>
<button type="button" class="cookbook-task-start-now" title="Start this queued download now" style="display:${(task.type === 'download' && task.status === 'queued') ? '' : 'none'}"><svg width="11" height="11" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true"><polygon points="8 5 19 12 8 19 8 5"/></svg><span>start now</span></button> <button type="button" class="cookbook-task-start-now" title="Start this queued download now" style="display:${(task.type === 'download' && task.status === 'queued') ? '' : 'none'}"><svg width="11" height="11" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true"><polygon points="8 5 19 12 8 19 8 5"/></svg><span>start now</span></button>
<span class="cookbook-task-status ${_bdg.cls}"${_bdgTitle}>${esc(_bdg.text)}</span> <span class="cookbook-task-status ${_bdg.cls}"${_bdgTitle}>${esc(_bdg.text)}</span>
<button class="cookbook-task-menu-btn" title="Actions">&#8942;</button> <button class="cookbook-task-menu-btn" title="Actions">&#8942;</button>
@@ -2076,19 +2197,11 @@ export function _renderRunningTab() {
e.stopPropagation(); e.stopPropagation();
const repo = task.payload?.repo_id || task.name; const repo = task.payload?.repo_id || task.name;
if (!repo) { uiModule.showToast('No model info on this task'); return; } if (!repo) { uiModule.showToast('No model info on this task'); return; }
// Point the active server at the one it downloaded to. // Point the active server at the exact profile it downloaded to.
const _tHost = task.remoteHost || ''; _selectTaskServer(task);
_envState.remoteHost = _tHost;
const _tSrv = _serverByVal(_envState.remoteServerKey || _tHost)
|| _envState.servers.find(s => s.host === _tHost);
if (_tSrv) { _envState.env = _tSrv.env || 'none'; _envState.envPath = _tSrv.envPath || ''; _envState.platform = _tSrv.platform || ''; }
else if (!_tHost) { _envState.env = 'none'; _envState.envPath = ''; _envState.platform = ''; }
document.querySelectorAll('#hwfit-server-select, #hwfit-dl-server, #hwfit-cache-server, #hwfit-deps-server').forEach(sel => {
if (sel && sel.tagName === 'SELECT') sel.value = _tHost || 'local';
});
try { try {
const { openServePanelForRepo } = await import('./cookbookServe.js'); const { openServePanelForRepo } = await import('./cookbookServe.js');
await openServePanelForRepo(repo); await openServePanelForRepo(repo, _downloadServeFields(task));
// Serving it supersedes the finished download — clear the card from // Serving it supersedes the finished download — clear the card from
// the Running tab (smooth exit) now that we've jumped to Serve. // the Running tab (smooth exit) now that we've jumped to Serve.
_animateOutThenRemove(el, task.sessionId); _animateOutThenRemove(el, task.sessionId);
@@ -3558,7 +3671,9 @@ async function _probeEndpointUntilOnline(epId, host, port) {
try { try {
// Hit the probe endpoint — it re-probes server-side and updates // Hit the probe endpoint — it re-probes server-side and updates
// cached_models. We consume (and discard) the SSE stream. // cached_models. We consume (and discard) the SSE stream.
await fetch(`/api/model-endpoints/${epId}/probe`, { credentials: 'same-origin' }).then(r => r.text()).catch(() => {}); const probeRes = await fetch(`/api/model-endpoints/${epId}/probe`, { credentials: 'same-origin' }).catch(() => null);
if (probeRes && probeRes.status === 404) return;
if (probeRes) await probeRes.text().catch(() => {});
const eps = await fetch('/api/model-endpoints', { credentials: 'same-origin' }).then(r => r.json()).catch(() => []); const eps = await fetch('/api/model-endpoints', { credentials: 'same-origin' }).then(r => r.json()).catch(() => []);
const ep = (eps || []).find(e => e.id === epId); const ep = (eps || []).find(e => e.id === epId);
if (ep && (ep.models || []).length) { if (ep && (ep.models || []).length) {
+183 -14
View File
@@ -477,7 +477,9 @@ function _estimateLlamaContextFit(model, fields, modelCtxMax, modelWeightsGb = 0
} }
function _selectedServeTarget(panel) { function _selectedServeTarget(panel) {
const select = document.getElementById('hwfit-server-select') || document.getElementById('hwfit-dl-server'); const select = panel?.querySelector?.('#hwfit-server-select')
|| document.getElementById('hwfit-server-select')
|| document.getElementById('hwfit-dl-server');
const servers = Array.isArray(_envState.servers) ? _envState.servers : []; const servers = Array.isArray(_envState.servers) ? _envState.servers : [];
let host = _envState.remoteHost || ''; let host = _envState.remoteHost || '';
let server = host ? (_serverByVal?.(_envState.remoteServerKey || host) || servers.find(s => s.host === host)) : null; let server = host ? (_serverByVal?.(_envState.remoteServerKey || host) || servers.find(s => s.host === host)) : null;
@@ -643,6 +645,122 @@ function _ggufFileLabel(file) {
return `${quant}${base}${size || split ? ` (${[size, split.replace(/^, /, '')].filter(Boolean).join(', ')})` : ''}${role}`; return `${quant}${base}${size || split ? ` (${[size, split.replace(/^, /, '')].filter(Boolean).join(', ')})` : ''}${role}`;
} }
function _ggufTaskDisplayPart(model, relPath) {
const rel = String(relPath || '');
if (!rel) return '';
const file = _ggufFilesForModel(model).find(f => f.rel_path === rel);
if (file?.quant) return String(file.quant).toUpperCase().replace(/^UD-/, '');
const parts = rel.split('/').filter(Boolean);
const base = parts[parts.length - 1] || '';
const parent = parts.length > 1 ? parts[parts.length - 2] : '';
const text = `${parent} ${base}`;
const quant = text.match(/\b(?:UD-)?(?:IQ[1-8]_[A-Z0-9]+|Q[2-8]_K_[MLS]|Q[2-8]_[0-9A-Z]+|Q[2-8])\b/i);
if (quant) return quant[0].toUpperCase().replace(/^UD-/, '');
return base.replace(/\.gguf$/i, '').replace(/-\d{5}-of-\d{5}$/i, '');
}
function _serveTaskDisplayName(shortName, model, fields) {
const name = String(shortName || '').trim();
const backend = String(fields?.backend || '').toLowerCase();
if (backend !== 'llamacpp' && backend !== 'ollama') return name;
const part = _ggufTaskDisplayPart(model, fields?.gguf_file);
return part && !name.includes(` · ${part}`) ? `${name} · ${part}` : name;
}
function _safeGgufRelPath(relPath) {
const rel = String(relPath || '').replace(/\\/g, '/').replace(/^\/+/, '');
if (!rel || rel.startsWith('../') || rel.includes('/../') || rel === '..') return '';
if (rel.includes('\0')) return '';
return rel;
}
function _ggufDeleteChoice(repo, files) {
return new Promise(resolve => {
let overlay = document.getElementById('cookbook-gguf-delete-overlay');
if (!overlay) {
overlay = document.createElement('div');
overlay.id = 'cookbook-gguf-delete-overlay';
overlay.className = 'modal hidden';
overlay.innerHTML =
'<div class="modal-content styled-confirm-box cookbook-gguf-delete-box" role="dialog" aria-modal="true" aria-labelledby="cookbook-gguf-delete-title">' +
'<div class="modal-header"><h4 id="cookbook-gguf-delete-title">Delete GGUF files</h4></div>' +
'<div class="modal-body">' +
'<p id="cookbook-gguf-delete-msg"></p>' +
'<div id="cookbook-gguf-delete-list" class="cookbook-gguf-delete-list"></div>' +
'</div>' +
'<div class="modal-footer cookbook-gguf-delete-actions">' +
'<button type="button" id="cookbook-gguf-delete-cancel" class="confirm-btn confirm-btn-secondary">Cancel</button>' +
'<button type="button" id="cookbook-gguf-delete-repo" class="confirm-btn confirm-btn-secondary">Whole repo</button>' +
'<button type="button" id="cookbook-gguf-delete-selected" class="confirm-btn confirm-btn-danger">Delete selected</button>' +
'</div>' +
'</div>';
document.body.appendChild(overlay);
}
const safeFiles = files
.map(f => ({ ...f, rel_path: _safeGgufRelPath(f.rel_path) }))
.filter(f => f.rel_path);
const msg = overlay.querySelector('#cookbook-gguf-delete-msg');
const list = overlay.querySelector('#cookbook-gguf-delete-list');
const cancelBtn = overlay.querySelector('#cookbook-gguf-delete-cancel');
const repoBtn = overlay.querySelector('#cookbook-gguf-delete-repo');
const selectedBtn = overlay.querySelector('#cookbook-gguf-delete-selected');
const prevFocus = document.activeElement;
msg.textContent = `${repo} has multiple GGUF files. Pick what to delete.`;
list.innerHTML = safeFiles.map((file, idx) => {
const label = esc ? esc(_ggufFileLabel(file)) : _ggufFileLabel(file);
const rel = esc ? esc(file.rel_path) : file.rel_path;
return `<label class="cookbook-gguf-delete-row">
<input class="cookbook-gguf-delete-cb" type="checkbox" value="${idx}">
<span class="cookbook-gguf-delete-main">${label}</span>
<span class="cookbook-gguf-delete-path">${rel}</span>
</label>`;
}).join('');
function cleanup(result) {
overlay.classList.add('hidden');
overlay.style.display = 'none';
cancelBtn.removeEventListener('click', onCancel);
repoBtn.removeEventListener('click', onRepo);
selectedBtn.removeEventListener('click', onSelected);
overlay.removeEventListener('click', onBackdrop);
document.removeEventListener('keydown', onKey);
try { prevFocus && prevFocus.focus && prevFocus.focus(); } catch {}
resolve(result);
}
function onCancel() { cleanup(null); }
function onRepo() { cleanup({ mode: 'repo' }); }
function onSelected() {
const selected = [...list.querySelectorAll('input[type="checkbox"]:checked')]
.map(input => safeFiles[Number(input.value)])
.filter(Boolean);
if (!selected.length) {
uiModule.showToast?.('Select at least one GGUF file.');
return;
}
cleanup({ mode: 'files', files: selected });
}
function onBackdrop(e) { if (e.target === overlay) cleanup(null); }
function onKey(e) {
if (e.key === 'Escape') {
e.preventDefault();
e.stopPropagation();
cleanup(null);
}
}
cancelBtn.addEventListener('click', onCancel);
repoBtn.addEventListener('click', onRepo);
selectedBtn.addEventListener('click', onSelected);
overlay.addEventListener('click', onBackdrop);
document.addEventListener('keydown', onKey);
overlay.classList.remove('hidden');
overlay.style.display = '';
selectedBtn.focus();
});
}
function _shellPathExpr(path) { function _shellPathExpr(path) {
const s = String(path || ''); const s = String(path || '');
if (s === '~') return '${HOME}'; if (s === '~') return '${HOME}';
@@ -729,7 +847,7 @@ function _rerenderCachedModels() {
? ` <span class="cookbook-serve-downloading-pill${_isDlActive ? '' : ' is-stalled'}" title="${_isDlActive ? 'Download in progress' : 'Download stalled — retry to resume'}">${_isDlActive ? 'downloading' : 'stalled'}</span>` ? ` <span class="cookbook-serve-downloading-pill${_isDlActive ? '' : ' is-stalled'}" title="${_isDlActive ? 'Download in progress' : 'Download stalled — retry to resume'}">${_isDlActive ? 'downloading' : 'stalled'}</span>`
: ''; : '';
const _favoritePill = _isFavorite ? ' <span class="memory-cat-badge memory-cat-pinned cookbook-serve-fav-badge">pinned</span>' : ''; const _favoritePill = _isFavorite ? ' <span class="memory-cat-badge memory-cat-pinned cookbook-serve-fav-badge">pinned</span>' : '';
html += `<div class="memory-item-title"${_mc ? ` style="color:${_mc}"` : ''}>${modelLogo(m.repo_id)}${esc(shortName)}${_favoritePill}${hfLink ? ` <a href="${esc(hfLink)}" target="_blank" rel="noopener" class="cookbook-hf-link">HF ↗</a>` : ''}${_runningPill}${_downloadingPill}</div>`; html += `<div class="memory-item-title cookbook-serve-title"${_mc ? ` style="color:${_mc}"` : ''}><span class="cookbook-serve-title-name">${modelLogo(m.repo_id)}${esc(shortName)}</span>${_favoritePill}${hfLink ? ` <a href="${esc(hfLink)}" target="_blank" rel="noopener" class="cookbook-hf-link">HF ↗</a>` : ''}${_runningPill}${_downloadingPill}</div>`;
html += `<div class="memory-item-meta" style="font-size:10px;opacity:0.4;margin-top:2px;">${metaParts.join(' \u00b7 ')}</div>`; html += `<div class="memory-item-meta" style="font-size:10px;opacity:0.4;margin-top:2px;">${metaParts.join(' \u00b7 ')}</div>`;
html += `</div>`; html += `</div>`;
const _bk = _detectBackend(m).backend; const _bk = _detectBackend(m).backend;
@@ -962,6 +1080,11 @@ function _rerenderCachedModels() {
const _isMiniMaxM3 = _isMiniMaxM3Model({ ...m, repo_id: repo }); const _isMiniMaxM3 = _isMiniMaxM3Model({ ...m, repo_id: repo });
const _isMiniMaxM2 = _isMiniMaxM2Model({ ...m, repo_id: repo }); const _isMiniMaxM2 = _isMiniMaxM2Model({ ...m, repo_id: repo });
const _isMiniMaxMSeries = _isMiniMaxM3 || _isMiniMaxM2; const _isMiniMaxMSeries = _isMiniMaxM3 || _isMiniMaxM2;
const _toolParserDefault = _detectToolParser(repo);
const _isStepFunStep = _toolParserDefault === 'step3p5';
const _nativeToolDefault = _isMiniMaxMSeries || _isStepFunStep;
const _reasoningDefault = _isMiniMaxMSeries || _isStepFunStep;
const _expertParallelDefault = _isMiniMaxMSeries || _isStepFunStep;
const svm = (k, def) => (_modelSs && _hasOwn(_modelSs, k)) ? _modelSs[k] : def; const svm = (k, def) => (_modelSs && _hasOwn(_modelSs, k)) ? _modelSs[k] : def;
const _serveTarget = _selectedServeTarget(); const _serveTarget = _selectedServeTarget();
const _backendChoices = _backendChoicesForTarget(_serveTarget); const _backendChoices = _backendChoicesForTarget(_serveTarget);
@@ -993,8 +1116,15 @@ function _rerenderCachedModels() {
const _l = (name, tip) => `<span>${name}<span class="hwfit-hint" title="${tip}">?</span></span>`; const _l = (name, tip) => `<span>${name}<span class="hwfit-hint" title="${tip}">?</span></span>`;
const _ggufChoices = _runnableGgufFiles(m); const _ggufChoices = _runnableGgufFiles(m);
const _savedGguf = String(sv('gguf_file', '') || ''); const _savedGguf = String(sv('gguf_file', '') || '');
const _preferredGgufInclude = String(sv('_preferredGgufInclude', '') || '').replace(/\*/g, '').toLowerCase();
const _preferredGguf = _preferredGgufInclude
? (_ggufChoices.find(f => String(f.rel_path || '').toLowerCase().includes(_preferredGgufInclude))
|| _ggufChoices.find(f => String(f.name || '').toLowerCase().includes(_preferredGgufInclude)))
: null;
const _defaultGguf = _ggufChoices.some(f => f.rel_path === _savedGguf) const _defaultGguf = _ggufChoices.some(f => f.rel_path === _savedGguf)
? _savedGguf ? _savedGguf
: (_preferredGguf?.rel_path || '')
? _preferredGguf.rel_path
: (_ggufChoices[0]?.rel_path || ''); : (_ggufChoices[0]?.rel_path || '');
const _ggufOptions = _ggufChoices.map(f => const _ggufOptions = _ggufChoices.map(f =>
`<option value="${esc(f.rel_path)}"${f.rel_path === _defaultGguf ? ' selected' : ''}>${esc(_ggufFileLabel(f))}</option>` `<option value="${esc(f.rel_path)}"${f.rel_path === _defaultGguf ? ' selected' : ''}>${esc(_ggufFileLabel(f))}</option>`
@@ -1026,6 +1156,10 @@ function _rerenderCachedModels() {
+ `</div>`; + `</div>`;
let panelHtml = `<div class="hwfit-serve-panel">`; let panelHtml = `<div class="hwfit-serve-panel">`;
const _replaceTaskId = String(sv('_replaceTaskId', '') || '');
if (_replaceTaskId) {
panelHtml += `<input type="hidden" class="hwfit-sf" data-field="_replaceTaskId" value="${esc(_replaceTaskId)}" />`;
}
// Runtime-readiness note pinned at the top of the serve area so the // Runtime-readiness note pinned at the top of the serve area so the
// user sees "vLLM ready on …" before scrolling into the configure // user sees "vLLM ready on …" before scrolling into the configure
// form. Hidden until the readiness probe returns. The × button // form. Hidden until the readiness probe returns. The × button
@@ -1202,20 +1336,20 @@ function _rerenderCachedModels() {
const _rp_name = _rp_flag ? _rp_flag.split(' ')[1] : ''; const _rp_name = _rp_flag ? _rp_flag.split(' ')[1] : '';
panelHtml += `<div class="hwfit-serve-checks hwfit-backend-vllm hwfit-backend-sglang">`; panelHtml += `<div class="hwfit-serve-checks hwfit-backend-vllm hwfit-backend-sglang">`;
panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="trust_remote"${sv('trust_remote',_isMiniMaxMSeries)?' checked':''} /> Trust Remote Code${_h('Allow model to run custom code from HuggingFace')}</label>`; panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="trust_remote"${sv('trust_remote',_isMiniMaxMSeries)?' checked':''} /> Trust Remote Code${_h('Allow model to run custom code from HuggingFace')}</label>`;
panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm"><input type="checkbox" class="hwfit-sf" data-field="auto_tool"${sv('auto_tool',_isMiniMaxMSeries)?' checked':''} /> Auto Tool Choice${_h('Enable function/tool calling for agent mode')}</label>`; panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm hwfit-backend-sglang"><input type="checkbox" class="hwfit-sf" data-field="auto_tool"${sv('auto_tool',_nativeToolDefault)?' checked':''} /> Auto Tool Choice${_h('Enable function/tool calling for agent mode')}</label>`;
// Always-render the Reasoning Parser, Expert Parallel, and MoE Env // Always-render the Reasoning Parser, Expert Parallel, and MoE Env
// checkboxes — the model-family detection above is a hint, not a // checkboxes — the model-family detection above is a hint, not a
// hard gate. User asked to keep these visible regardless so that // hard gate. User asked to keep these visible regardless so that
// a borderline-undetected MoE/reasoning model can still toggle // a borderline-undetected MoE/reasoning model can still toggle
// them without dropping back to the raw command box. // them without dropping back to the raw command box.
panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm"><input type="checkbox" class="hwfit-sf" data-field="reasoning_parser" data-parser="${_rp_name || ''}"${sv('reasoning_parser',_isMiniMaxMSeries)?' checked':''} /> Reasoning Parser${_rp_name ? ` <span class="hwfit-parser-tag">${_rp_name}</span>` : ''}${_h('Splits <think> tokens into a separate channel. The tag (when shown) is the auto-detected parser; edit the command if you need a different one.')}</label>`; panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm hwfit-backend-sglang"><input type="checkbox" class="hwfit-sf" data-field="reasoning_parser" data-parser="${_rp_name || ''}"${sv('reasoning_parser',_reasoningDefault)?' checked':''} /> Reasoning Parser${_rp_name ? ` <span class="hwfit-parser-tag">${_rp_name}</span>` : ''}${_h('Splits <think> tokens into a separate channel. The tag (when shown) is the auto-detected parser; edit the command if you need a different one.')}</label>`;
panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="enforce_eager"${sv('enforce_eager',false)?' checked':''} /> Enforce Eager${_h('Disable CUDA graphs. Slower but uses less memory')}</label>`; panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="enforce_eager"${sv('enforce_eager',false)?' checked':''} /> Enforce Eager${_h('Disable CUDA graphs. Slower but uses less memory')}</label>`;
panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="prefix_cache"${sv('prefix_cache',false)?' checked':''} /> Prefix Caching${_h('Cache shared prompt prefixes across requests')}</label>`; panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="prefix_cache"${sv('prefix_cache',false)?' checked':''} /> Prefix Caching${_h('Cache shared prompt prefixes across requests')}</label>`;
// Inline the previously-second vLLM checks row so Expert Parallel / // Inline the previously-second vLLM checks row so Expert Parallel /
// Speculative / MoE Env sit next to Prefix Caching with no gap. All // Speculative / MoE Env sit next to Prefix Caching with no gap. All
// three are vLLM-only — class-gated so they hide on SGLang. Always // three are vLLM-only — class-gated so they hide on SGLang. Always
// render so the user can flip them on for any MoE model. // render so the user can flip them on for any MoE model.
panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm"><input type="checkbox" class="hwfit-sf" data-field="expert_parallel"${sv('expert_parallel',_isMiniMaxMSeries)?' checked':''} /> Expert Parallel${_h('MoE: shard expert layers across GPUs. Helps for MiniMax M-series, Qwen3 A3B/A10B/A22B MoE, DeepSeek V3+/R1. Ignored / wasteful on dense models.')}</label>`; panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm hwfit-backend-sglang"><input type="checkbox" class="hwfit-sf" data-field="expert_parallel"${sv('expert_parallel',_expertParallelDefault)?' checked':''} /> Expert Parallel${_h('MoE: shard expert layers across GPUs. Helps for MiniMax M-series, StepFun Step-3, Qwen3 A3B/A10B/A22B MoE, DeepSeek V3+/R1. Ignored / wasteful on dense models.')}</label>`;
panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm"><input type="checkbox" class="hwfit-sf" data-field="language_model_only"${sv('language_model_only',_isMiniMaxM3)?' checked':''} /> Language Model Only${_h('vLLM --language-model-only. Needed by MiniMax M3 text serving when the repo also contains VL components.')}</label>`; panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm"><input type="checkbox" class="hwfit-sf" data-field="language_model_only"${sv('language_model_only',_isMiniMaxM3)?' checked':''} /> Language Model Only${_h('vLLM --language-model-only. Needed by MiniMax M3 text serving when the repo also contains VL components.')}</label>`;
panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm"><input type="checkbox" class="hwfit-sf" data-field="disable_custom_all_reduce"${sv('disable_custom_all_reduce',_isMiniMaxM3)?' checked':''} /> Disable Custom All Reduce${_h('vLLM --disable-custom-all-reduce. Useful for some 8-GPU/nightly configurations.')}</label>`; panelHtml += `<label class="hwfit-sf-cb hwfit-backend-vllm"><input type="checkbox" class="hwfit-sf" data-field="disable_custom_all_reduce"${sv('disable_custom_all_reduce',_isMiniMaxM3)?' checked':''} /> Disable Custom All Reduce${_h('vLLM --disable-custom-all-reduce. Useful for some 8-GPU/nightly configurations.')}</label>`;
{ {
@@ -2870,11 +3004,11 @@ function _rerenderCachedModels() {
// preflight and let the launch silently fall to CPU. // preflight and let the launch silently fall to CPU.
let _hwGpus = []; let _hwGpus = [];
try { try {
const _gh = (_selectedServeTarget.host || '').trim(); const _gh = (launchTarget.host || '').trim();
const _gp = new URLSearchParams(); const _gp = new URLSearchParams();
if (_gh) { if (_gh) {
_gp.set('host', _gh); _gp.set('host', _gh);
const _sp = (_serverByVal?.(_selectedServeTarget.serverKey || _gh) || {}).port; const _sp = (_serverByVal?.(launchTarget.serverKey || _gh) || {}).port;
if (_sp) _gp.set('ssh_port', _sp); if (_sp) _gp.set('ssh_port', _sp);
} }
const _gr = await fetch('/api/cookbook/gpus' + (_gp.toString() ? '?' + _gp : ''), { credentials: 'same-origin' }); const _gr = await fetch('/api/cookbook/gpus' + (_gp.toString() ? '?' + _gp : ''), { credentials: 'same-origin' });
@@ -3069,6 +3203,7 @@ function _rerenderCachedModels() {
try { cur = JSON.parse(localStorage.getItem(SERVE_STATE_KEY)) || {}; } catch {} try { cur = JSON.parse(localStorage.getItem(SERVE_STATE_KEY)) || {}; } catch {}
const byRepo = (cur && cur._byRepo && typeof cur._byRepo === 'object') ? cur._byRepo : {}; const byRepo = (cur && cur._byRepo && typeof cur._byRepo === 'object') ? cur._byRepo : {};
const _saved = { ...serveState, _forceBackend: true }; const _saved = { ...serveState, _forceBackend: true };
delete _saved._replaceTaskId;
byRepo[repo] = _saved; byRepo[repo] = _saved;
localStorage.setItem(SERVE_STATE_KEY, JSON.stringify({ _byRepo: byRepo, _lastUsed: _saved })); localStorage.setItem(SERVE_STATE_KEY, JSON.stringify({ _byRepo: byRepo, _lastUsed: _saved }));
} catch {} } catch {}
@@ -3127,7 +3262,8 @@ function _rerenderCachedModels() {
await _withSpinner(_launchBtn, async () => { await _withSpinner(_launchBtn, async () => {
// Pass the exact form values so the running task can be re-opened // Pass the exact form values so the running task can be re-opened
// in the Serve panel pre-filled with these settings (Edit button). // in the Serve panel pre-filled with these settings (Edit button).
await _launchServeTask(shortName, repo, launchCmd, serveState, serveHost, { serverKey: serveServerKey, serverName: serveServerName }); const taskDisplayName = _serveTaskDisplayName(shortName, m, serveState);
await _launchServeTask(taskDisplayName, repo, launchCmd, serveState, serveHost, { serverKey: serveServerKey, serverName: serveServerName });
}); });
} finally { } finally {
_envState.env = origEnv; _envState.env = origEnv;
@@ -3188,7 +3324,6 @@ function _resolveCacheHost() {
} }
async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = null) { async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = null) {
if (!skipConfirm && !(await uiModule.styledConfirm(`Delete ${repo} from cache?`, { confirmText: 'Delete', danger: true }))) return;
const m = model || _cachedAllModels.find(x => x.repo_id === repo); const m = model || _cachedAllModels.find(x => x.repo_id === repo);
// Delete the EXACT on-disk path the scan reported. Models in a custom // Delete the EXACT on-disk path the scan reported. Models in a custom
// model dir live at <path>/<repo>; HF-cache models at // model dir live at <path>/<repo>; HF-cache models at
@@ -3204,13 +3339,32 @@ async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = nul
} else { } else {
target = `~/.cache/huggingface/hub/models--${repo.replace(/\//g, '--')}`; target = `~/.cache/huggingface/hub/models--${repo.replace(/\//g, '--')}`;
} }
let deleteChoice = { mode: 'repo' };
const ggufFiles = _ggufFilesForModel(m);
if (!skipConfirm) {
if (ggufFiles.length > 1) {
deleteChoice = await _ggufDeleteChoice(repo, ggufFiles);
if (!deleteChoice) return;
} else if (!(await uiModule.styledConfirm(`Delete ${repo} from cache?`, { confirmText: 'Delete', danger: true }))) {
return;
}
}
const host = _resolveCacheHost(); const host = _resolveCacheHost();
let cmd; let cmd;
if (_isWindows()) { if (_isWindows()) {
const winTarget = target.startsWith('~') const winTarget = target.startsWith('~')
? target.replace(/^~/, '$env:USERPROFILE').replace(/\//g, '\\') ? target.replace(/^~/, '$env:USERPROFILE').replace(/\//g, '\\')
: target.replace(/\//g, '\\'); : target.replace(/\//g, '\\');
cmd = `Remove-Item -Recurse -Force "${winTarget}" -ErrorAction SilentlyContinue`; if (deleteChoice.mode === 'files') {
const targets = deleteChoice.files
.map(f => _safeGgufRelPath(f.rel_path))
.filter(Boolean)
.map(rel => `${winTarget}\\${rel.replace(/\//g, '\\')}`);
if (!targets.length) return;
cmd = targets.map(p => `Remove-Item -Force "${p.replace(/"/g, '\\"')}" -ErrorAction SilentlyContinue`).join('; ');
} else {
cmd = `Remove-Item -Recurse -Force "${winTarget}" -ErrorAction SilentlyContinue`;
}
if (host) { if (host) {
const pf = _sshPrefix(_getPort(host)); const pf = _sshPrefix(_getPort(host));
cmd = `ssh ${pf}${host} "powershell -Command \\"${cmd}\\""`; cmd = `ssh ${pf}${host} "powershell -Command \\"${cmd}\\""`;
@@ -3219,7 +3373,16 @@ async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = nul
// $HOME expands inside double quotes; ~ would not, so normalize the // $HOME expands inside double quotes; ~ would not, so normalize the
// fallback. Quoting also handles spaces in custom model-dir paths. // fallback. Quoting also handles spaces in custom model-dir paths.
const unixTarget = target.startsWith('~') ? target.replace(/^~/, '$HOME') : target; const unixTarget = target.startsWith('~') ? target.replace(/^~/, '$HOME') : target;
cmd = `rm -rf "${unixTarget}"`; if (deleteChoice.mode === 'files') {
const targets = deleteChoice.files
.map(f => _safeGgufRelPath(f.rel_path))
.filter(Boolean)
.map(rel => `${target.replace(/\/+$/, '')}/${rel}`);
if (!targets.length) return;
cmd = `rm -f ${targets.map(p => _shellPathExpr(p)).join(' ')} && find ${_shellPathExpr(target)} -type d -empty -delete`;
} else {
cmd = `rm -rf "${unixTarget}"`;
}
if (host) cmd = _sshCmd(host, cmd, _getPort(host)); if (host) cmd = _sshCmd(host, cmd, _getPort(host));
} }
// Deleting a large model (tens/hundreds of GB) can take a while, especially // Deleting a large model (tens/hundreds of GB) can take a while, especially
@@ -3244,7 +3407,13 @@ async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = nul
body: JSON.stringify({ command: cmd }), body: JSON.stringify({ command: cmd }),
}); });
if (!res.ok) { uiModule.showError(`Delete failed (${res.status})`); return; } if (!res.ok) { uiModule.showError(`Delete failed (${res.status})`); return; }
if (itemEl) { if (deleteChoice.mode === 'files') {
if (m && Array.isArray(m.gguf_files)) {
const removed = new Set(deleteChoice.files.map(f => _safeGgufRelPath(f.rel_path)));
m.gguf_files = m.gguf_files.filter(f => !removed.has(_safeGgufRelPath(f.rel_path)));
}
await _fetchCachedModels(false);
} else if (itemEl) {
itemEl.querySelector('.cookbook-delete-overlay')?.remove(); itemEl.querySelector('.cookbook-delete-overlay')?.remove();
itemEl.style.transition = 'opacity 0.24s ease, transform 0.24s ease, max-height 0.28s ease, padding 0.28s ease, margin 0.28s ease'; itemEl.style.transition = 'opacity 0.24s ease, transform 0.24s ease, max-height 0.28s ease, padding 0.28s ease, margin 0.28s ease';
itemEl.style.maxHeight = `${Math.max(itemEl.getBoundingClientRect().height, itemEl.scrollHeight)}px`; itemEl.style.maxHeight = `${Math.max(itemEl.getBoundingClientRect().height, itemEl.scrollHeight)}px`;
@@ -3258,9 +3427,9 @@ async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = nul
requestAnimationFrame(() => { itemEl.style.maxHeight = '0'; }); requestAnimationFrame(() => { itemEl.style.maxHeight = '0'; });
await new Promise(resolve => setTimeout(resolve, 300)); await new Promise(resolve => setTimeout(resolve, 300));
if (itemEl.parentElement) itemEl.remove(); if (itemEl.parentElement) itemEl.remove();
// Drop from the in-memory list so a re-render/filter doesn't resurrect it.
_cachedAllModels = _cachedAllModels.filter(x => x.repo_id !== repo);
} }
// Drop from the in-memory list so a re-render/filter doesn't resurrect it.
_cachedAllModels = _cachedAllModels.filter(x => x.repo_id !== repo);
} catch (e) { } catch (e) {
uiModule.showError('Delete failed: ' + (e && e.message ? e.message : e)); uiModule.showError('Delete failed: ' + (e && e.message ? e.message : e));
} finally { } finally {
+39 -19
View File
@@ -77,6 +77,7 @@ function _handlePickerKeydown(e, listEl, itemSelector, closeFn) {
// Dependencies injected via initModelPicker() // Dependencies injected via initModelPicker()
let _deps = null; let _deps = null;
let _autoSelectingDefault = false; let _autoSelectingDefault = false;
let _defaultChatPickInFlight = false;
function _modelExists(modelId, url) { function _modelExists(modelId, url) {
if (!modelId || !window.modelsModule || !window.modelsModule.getCachedItems) return false; if (!modelId || !window.modelsModule || !window.modelsModule.getCachedItems) return false;
@@ -91,6 +92,43 @@ function _modelExists(modelId, url) {
}); });
} }
async function _ensureDefaultPendingChat() {
if (!_deps || _defaultChatPickInFlight) return;
if (_deps.getCurrentSessionId && _deps.getCurrentSessionId()) return;
const pending = _deps.getPendingChat && _deps.getPendingChat();
if (pending && pending.modelId) return;
_defaultChatPickInFlight = true;
try {
let dc = null;
try {
const res = await fetch(`${API_BASE}/api/default-chat`, { credentials: 'same-origin' });
if (res.ok) dc = await res.json();
} catch (_) {}
if (dc && dc.endpoint_url && dc.model) {
_deps.setPendingChat({
url: dc.endpoint_url,
modelId: dc.model,
endpointId: dc.endpoint_id || '',
});
try { window.__odysseusDefaultChat = dc; } catch (_) {}
updateModelPicker();
return;
}
// No configured default: preserve the old convenience fallback.
if (window.modelsModule && window.modelsModule.getCachedItems) {
const items = window.modelsModule.getCachedItems();
const first = items.find(item => !item.offline && ((item.models || []).length || (item.models_extra || []).length));
if (first) {
const models = (first.models || []).concat(first.models_extra || []);
_deps.setPendingChat({ url: first.url, modelId: models[0], endpointId: first.endpoint_id });
updateModelPicker();
}
}
} finally {
_defaultChatPickInFlight = false;
}
}
/** /**
* Initialize the model picker dropdown. * Initialize the model picker dropdown.
* @param {Object} deps * @param {Object} deps
@@ -710,25 +748,7 @@ export function updateModelPicker() {
} }
} }
if (!modelId && !_autoSelectingDefault && window.modelsModule && window.modelsModule.getCachedItems) { if (!modelId && !_autoSelectingDefault && window.modelsModule && window.modelsModule.getCachedItems) {
const items = window.modelsModule.getCachedItems(); _ensureDefaultPendingChat();
const first = items.find(item => !item.offline && ((item.models || []).length || (item.models_extra || []).length));
if (first) {
const models = (first.models || []).concat(first.models_extra || []);
modelId = models[0];
if (!currentSessionId) {
_deps.setPendingChat({ url: first.url, modelId, endpointId: first.endpoint_id });
} else {
if (s) { s.model = modelId; s.endpoint_url = first.url; }
_autoSelectingDefault = true;
const fd = new FormData();
fd.append('model', modelId);
fd.append('endpoint_url', first.url || '');
if (first.endpoint_id) fd.append('endpoint_id', first.endpoint_id);
fetch(`${API_BASE}/api/session/${currentSessionId}`, { method: 'PATCH', body: fd })
.catch(() => {})
.finally(() => { _autoSelectingDefault = false; });
}
}
} }
const displayName = modelId ? modelId.split('/').pop() : 'Select model'; const displayName = modelId ? modelId.split('/').pop() : 'Select model';
+2 -18
View File
@@ -1896,10 +1896,6 @@ function _renderNotes() {
${_hasItems(note) ? `<div class="note-cl-quickadd"><input type="text" class="note-cl-quickadd-input" placeholder="+ Add item" data-note-id="${note.id}" /></div>` : ''} ${_hasItems(note) ? `<div class="note-cl-quickadd"><input type="text" class="note-cl-quickadd-input" placeholder="+ Add item" data-note-id="${note.id}" /></div>` : ''}
${reminderTagHtml} ${reminderTagHtml}
${noteTags.length ? `<div class="note-card-label">${noteTags.map(t => `<button type="button" class="note-card-label-chip" data-note-label-filter="${_esc(t)}" title="Filter #${_esc(t)}">#${_esc(t)}</button>`).join(' ')}</div>` : ''} ${noteTags.length ? `<div class="note-card-label">${noteTags.map(t => `<button type="button" class="note-card-label-chip" data-note-label-filter="${_esc(t)}" title="Filter #${_esc(t)}">#${_esc(t)}</button>`).join(' ')}</div>` : ''}
${note.agent_session_id ? `<button class="note-agent-tag" data-note-id="${note.id}" data-session-id="${_esc(note.agent_session_id)}" title="Open the agent's chat for this note">
<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 8V4H8"/><rect x="4" y="8" width="16" height="12" rx="2"/><path d="M2 14h2M20 14h2M15 13v2M9 13v2"/></svg>
<span>Agent</span>
</button>` : ''}
<div class="note-card-actions"> <div class="note-card-actions">
<div class="note-card-colors">${colorDots}</div> <div class="note-card-colors">${colorDots}</div>
<span style="flex:1"></span> <span style="flex:1"></span>
@@ -2304,16 +2300,6 @@ function _bindCardEvents(body) {
_openNoteCornerMenu(btn); _openNoteCornerMenu(btn);
}); });
}); });
// Agent tag — opens the chat session the agent ran for this note.
body.querySelectorAll('.note-agent-tag').forEach(tag => {
tag.addEventListener('click', (e) => {
e.preventDefault();
e.stopPropagation();
const sid = tag.dataset.sessionId;
const _sm = window.sessionModule;
if (sid && _sm && _sm.selectSession) { closePanel(); _sm.selectSession(sid); }
});
});
body.querySelectorAll('.note-card-label-chip').forEach(chip => { body.querySelectorAll('.note-card-label-chip').forEach(chip => {
chip.addEventListener('click', (e) => { chip.addEventListener('click', (e) => {
e.preventDefault(); e.preventDefault();
@@ -4383,18 +4369,16 @@ function _openTodoAgentMenu(btn) {
const noteId = btn.dataset.noteId; const noteId = btn.dataset.noteId;
const idx = parseInt(btn.dataset.idx); const idx = parseInt(btn.dataset.idx);
const sid = btn.dataset.sessionId || ''; const sid = btn.dataset.sessionId || '';
const title = btn.dataset.agentTitle || 'Agent chat';
const menu = document.createElement('div'); const menu = document.createElement('div');
menu.className = 'note-corner-menu-dropdown note-agent-item-menu'; menu.className = 'note-corner-menu-dropdown note-agent-item-menu';
menu.innerHTML = ` menu.innerHTML = `
<div class="ncm-title">${_esc(title)}</div>
${sid ? `<button type="button" class="ncm-item" data-act="open"> ${sid ? `<button type="button" class="ncm-item" data-act="open">
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M15 3h6v6"/><path d="M10 14L21 3"/><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"/></svg> <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M15 3h6v6"/><path d="M10 14L21 3"/><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"/></svg>
<span>Open this agent chat</span> <span>Open</span>
</button>` : ''} </button>` : ''}
<button type="button" class="ncm-item" data-act="run"> <button type="button" class="ncm-item" data-act="run">
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 8V4H8"/><rect x="4" y="8" width="16" height="12" rx="2"/><path d="M2 14h2M20 14h2M15 13v2M9 13v2"/></svg> <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 8V4H8"/><rect x="4" y="8" width="16" height="12" rx="2"/><path d="M2 14h2M20 14h2M15 13v2M9 13v2"/></svg>
<span>${sid ? 'Run again for this todo' : 'Start agent for this todo'}</span> <span>${sid ? 'Run again' : 'Run Agent'}</span>
</button>`; </button>`;
_positionNoteMenu(menu, btn); _positionNoteMenu(menu, btn);
const openBtn = menu.querySelector('[data-act="open"]'); const openBtn = menu.querySelector('[data-act="open"]');
+165 -21
View File
@@ -5324,6 +5324,84 @@ body.bg-pattern-sparkles {
.confirm-btn-primary:hover { filter:brightness(1.15); } .confirm-btn-primary:hover { filter:brightness(1.15); }
.confirm-btn-danger { background:var(--color-danger); color:#fff; border-color:transparent; } .confirm-btn-danger { background:var(--color-danger); color:#fff; border-color:transparent; }
.confirm-btn-danger:hover { background:var(--color-error); } .confirm-btn-danger:hover { background:var(--color-error); }
#cookbook-gguf-delete-overlay {
background:rgba(0,0,0,0.5);
backdrop-filter:blur(4px);
pointer-events:auto !important;
z-index:99999 !important;
position:fixed !important;
inset:0 !important;
}
.cookbook-gguf-delete-box {
width:560px;
max-width:92vw;
}
.cookbook-gguf-delete-list {
display:flex;
flex-direction:column;
gap:6px;
max-height:42vh;
overflow:auto;
padding:2px 2px 4px;
}
.cookbook-gguf-delete-row {
display:grid;
grid-template-columns:18px minmax(0,1fr);
gap:7px 8px;
align-items:start;
padding:7px 8px;
border:1px solid var(--border);
border-radius:7px;
background:color-mix(in srgb, var(--panel, var(--bg)) 92%, var(--fg) 8%);
cursor:pointer;
}
.cookbook-gguf-delete-row:hover {
border-color:color-mix(in srgb, var(--accent-primary, var(--fg)) 45%, var(--border));
}
.cookbook-gguf-delete-cb {
-webkit-appearance:none;
appearance:none;
width:8px !important;
height:8px !important;
min-width:8px;
min-height:8px;
padding:0;
margin:4px 0 0;
border:1px solid var(--border);
border-radius:50%;
background:transparent;
box-sizing:content-box;
cursor:pointer;
transition:background 0.15s, border-color 0.15s, transform 0.12s;
}
.cookbook-gguf-delete-cb:hover {
border-color:var(--accent, var(--red));
transform:scale(1.12);
}
.cookbook-gguf-delete-cb:checked {
background:var(--accent, var(--red));
border-color:var(--accent, var(--red));
}
.cookbook-gguf-delete-main,
.cookbook-gguf-delete-path {
min-width:0;
overflow:hidden;
text-overflow:ellipsis;
white-space:nowrap;
}
.cookbook-gguf-delete-main {
font-size:0.86rem;
color:var(--fg);
}
.cookbook-gguf-delete-path {
grid-column:2;
margin-top:-2px;
font-size:0.74rem;
opacity:0.58;
}
.cookbook-gguf-delete-actions {
flex-wrap:wrap;
}
/* Styled prompt — text-input dialog (used in place of window.prompt) */ /* Styled prompt — text-input dialog (used in place of window.prompt) */
#styled-prompt-overlay { #styled-prompt-overlay {
background:rgba(0,0,0,0.5); background:rgba(0,0,0,0.5);
@@ -19222,6 +19300,18 @@ body.gallery-selecting .gallery-dl-btn,
background: color-mix(in srgb, var(--red) 20%, transparent); background: color-mix(in srgb, var(--red) 20%, transparent);
} }
.cookbook-gpu-kill:disabled { opacity: 0.4; cursor: wait; } .cookbook-gpu-kill:disabled { opacity: 0.4; cursor: wait; }
.cookbook-serve-title {
display: flex;
align-items: center;
gap: 4px;
min-width: 0;
}
.cookbook-serve-title-name {
min-width: 0;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.cookbook-hf-link { .cookbook-hf-link {
font-size: 9px; font-size: 9px;
text-decoration: none; text-decoration: none;
@@ -19234,6 +19324,7 @@ body.gallery-selecting .gallery-dl-btn,
vertical-align: 1px; vertical-align: 1px;
letter-spacing: 0.3px; letter-spacing: 0.3px;
font-weight: 600; font-weight: 600;
flex-shrink: 0;
} }
.cookbook-hf-link:hover { .cookbook-hf-link:hover {
opacity: 0.8; opacity: 0.8;
@@ -19626,6 +19717,9 @@ body.gallery-selecting .gallery-dl-btn,
position: relative; position: relative;
top: -2px; top: -2px;
} }
.cookbook-dep-reinstall {
top: -3px;
}
.cookbook-dep-rebuild:hover { .cookbook-dep-rebuild:hover {
background: color-mix(in srgb, var(--accent, var(--red)) 18%, transparent); background: color-mix(in srgb, var(--accent, var(--red)) 18%, transparent);
color: var(--accent, var(--red)); color: var(--accent, var(--red));
@@ -20619,6 +20713,11 @@ body.gallery-selecting .gallery-dl-btn,
} }
.cookbook-task[data-status="done"] .cookbook-task-check-ico { display: inline; } .cookbook-task[data-status="done"] .cookbook-task-check-ico { display: inline; }
.cookbook-task[data-status="done"] .cookbook-task-clear-ico { display: none; } .cookbook-task[data-status="done"] .cookbook-task-clear-ico { display: none; }
@media (max-width: 820px) {
.cookbook-task-check {
top: 2px;
}
}
.cookbook-task-start-now { .cookbook-task-start-now {
display: inline-flex; display: inline-flex;
align-items: center; align-items: center;
@@ -20652,24 +20751,30 @@ body.gallery-selecting .gallery-dl-btn,
/* "Serve" button on a finished download green pill matching the "running" / /* "Serve" button on a finished download green pill matching the "running" /
finished badge (it sits next to the green FINISHED chip + check). */ finished badge (it sits next to the green FINISHED chip + check). */
.cookbook-task-serve-btn { .cookbook-task-serve-btn {
font-size: 9px; display: inline-flex;
font-weight: 600; align-items: center;
padding: 1px 6px; gap: 3px;
border: none; padding: 1px 6px 1px 4px;
border-radius: 3px; border: 0;
line-height: 16px; border-radius: 9px;
line-height: 1;
flex-shrink: 0; flex-shrink: 0;
cursor: pointer; cursor: pointer;
font-family: inherit; font-family: inherit;
background: color-mix(in srgb, var(--green, #50fa7b) 20%, transparent); font-size: 9px;
text-transform: lowercase;
background: transparent;
color: var(--green, #50fa7b); color: var(--green, #50fa7b);
position: relative; position: relative;
top: -2px; top: -2px;
margin-right: 2px;
appearance: none; appearance: none;
-webkit-appearance: none; -webkit-appearance: none;
-moz-appearance: none; -moz-appearance: none;
transition: background 0.15s;
} }
.cookbook-task-serve-btn:hover { background: color-mix(in srgb, var(--green, #50fa7b) 32%, transparent); } .cookbook-task-serve-btn svg { flex-shrink: 0; }
.cookbook-task-serve-btn:hover { background: color-mix(in srgb, var(--green, #50fa7b) 16%, transparent); }
.cookbook-task-sub { .cookbook-task-sub {
padding: 1px 10px 4px; padding: 1px 10px 4px;
line-height: 1; line-height: 1;
@@ -21448,6 +21553,31 @@ body.gallery-selecting .gallery-dl-btn,
.cookbook-dl-btn:hover { .cookbook-dl-btn:hover {
opacity: 0.9; opacity: 0.9;
} }
.cookbook-dl-gguf-row {
margin-top: -1px;
gap: 5px;
align-items: center;
justify-content: flex-end;
font-size: 11px;
position: relative;
top: -2px;
}
.cookbook-dl-gguf-label {
opacity: 0.65;
flex-shrink: 0;
}
#cookbook-dl-gguf-quant {
height: 28px;
min-width: 118px;
flex: 0 0 auto;
}
#cookbook-dl-gguf-note {
opacity: 0.55;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
max-width: 240px;
}
/* HF link in search panel */ /* HF link in search panel */
.hwfit-panel-hf-link { .hwfit-panel-hf-link {
@@ -31920,24 +32050,34 @@ body.notes-drag-mode .note-card-pin svg {
.note-corner-menu-dropdown .ncm-item:hover { .note-corner-menu-dropdown .ncm-item:hover {
background: color-mix(in srgb, var(--fg) 8%, transparent); background: color-mix(in srgb, var(--fg) 8%, transparent);
} }
/* "Agent" tag on a note that has a linked agent chat session */ .note-checkbox-agent {
.note-agent-tag {
align-self: flex-start;
display: inline-flex; display: inline-flex;
align-items: center; align-items: center;
gap: 5px; justify-content: center;
background: color-mix(in srgb, var(--accent, var(--red)) 14%, transparent); width: 14px;
border: 1px solid color-mix(in srgb, var(--accent, var(--red)) 35%, transparent); height: 14px;
padding: 0;
margin: 0 1px;
border: 0;
background: transparent;
color: var(--accent, var(--red)); color: var(--accent, var(--red));
border-radius: 999px; box-shadow: none;
padding: 3px 10px 3px 8px;
font-size: 11px;
font-weight: 600;
cursor: pointer; cursor: pointer;
margin-top: 2px; opacity: 0;
transition: background 0.12s; transition: opacity 0.12s, color 0.12s;
}
.note-checkbox:hover .note-checkbox-agent { opacity: 0.55; }
.note-checkbox-agent:hover {
background: transparent;
opacity: 1 !important;
}
.note-checkbox-agent.is-agent-stream-complete {
color: #50fa7b;
opacity: 0.9;
}
.note-checkbox-agent svg {
display: block;
} }
.note-agent-tag:hover { background: color-mix(in srgb, var(--accent, var(--red)) 24%, transparent); }
.note-card { .note-card {
/* Same tint that .doclib-card uses so a default (uncolored) note /* Same tint that .doclib-card uses so a default (uncolored) note
@@ -36414,6 +36554,10 @@ body.research-panel-view #research-divider { display:none; }
.research-setting { .research-setting {
display:flex; flex-direction:column; flex:1; min-width:90px; display:flex; flex-direction:column; flex:1; min-width:90px;
} }
.research-settings-row .research-setting:nth-last-child(-n + 3) {
position: relative;
top: 3px;
}
.research-setting-label { .research-setting-label {
font-size:9px; text-transform:uppercase; letter-spacing:0.5px; font-size:9px; text-transform:uppercase; letter-spacing:0.5px;
opacity:0.5; margin-bottom:2px; opacity:0.5; margin-bottom:2px;
@@ -221,6 +221,60 @@ def test_skip_fenced_still_recovers_xml_invoke_markup():
assert "latest python release" in blocks[0].content assert "latest python release" in blocks[0].content
def test_stepfun_native_tool_tokens_are_executed_even_when_fenced_fallback_is_skipped():
leaked = (
"<tool▁calls▁begin>"
"<tool▁call▁begin>web_search<tool▁sep>"
'{"query":"Sweden news today"}'
"<tool▁call▁end>"
"<tool▁calls▁end>"
)
blocks = parse_tool_blocks(leaked, skip_fenced=True)
assert len(blocks) == 1
assert blocks[0].tool_type == "web_search"
assert "Sweden news today" in blocks[0].content
assert strip_tool_blocks(leaked, skip_fenced=True) == ""
def test_stepfun_native_tool_tokens_accept_plain_web_query():
leaked = (
"<tool▁call▁begin>web_search<tool▁sep>"
"Sweden news today"
"<tool▁call▁end>"
)
blocks = parse_tool_blocks(leaked, skip_fenced=True)
assert len(blocks) == 1
assert blocks[0].tool_type == "web_search"
assert "Sweden news today" in blocks[0].content
def test_skip_fenced_still_recovers_direct_xml_tool_markup():
leaked = (
"I'll search now.\n"
"<tool_call><web_search>News in Sweden today 2026-06-22</web_search></tool_call>"
)
blocks = parse_tool_blocks(leaked, skip_fenced=True)
assert len(blocks) == 1
assert blocks[0].tool_type == "web_search"
assert "News in Sweden today 2026-06-22" in blocks[0].content
assert strip_tool_blocks(leaked, skip_fenced=True) == "I'll search now."
def test_skip_fenced_recovers_direct_xml_tool_markup_with_unclosed_wrapper():
leaked = (
"I'll search now.\n"
"<tool_call>\n"
"<web_search>\n"
"Sweden news today 2026-06-22\n"
"</web_search>"
)
blocks = parse_tool_blocks(leaked, skip_fenced=True)
assert len(blocks) == 1
assert blocks[0].tool_type == "web_search"
assert "Sweden news today 2026-06-22" in blocks[0].content
assert strip_tool_blocks(leaked, skip_fenced=True) == "I'll search now."
def test_skip_fenced_still_recovers_dsml_markup(): def test_skip_fenced_still_recovers_dsml_markup():
dsml = ( dsml = (
"Let me search for that.\n" "Let me search for that.\n"
+70
View File
@@ -19,7 +19,12 @@ from pathlib import Path
import pytest import pytest
from fastapi import APIRouter from fastapi import APIRouter
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import NullPool
import core.database as cdb
from core.database import GalleryImage
from src.upload_handler import count_recent_uploads, UploadHandler from src.upload_handler import count_recent_uploads, UploadHandler
import routes.upload_routes as up import routes.upload_routes as up
@@ -82,6 +87,10 @@ def _files(n):
return [types.SimpleNamespace(filename=f"f{i}.txt") for i in range(n)] return [types.SimpleNamespace(filename=f"f{i}.txt") for i in range(n)]
def _image_upload(name="photo.png", content=b"not really png but enough for route metadata"):
return types.SimpleNamespace(filename=name, file=io.BytesIO(content))
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def _reset_router(monkeypatch): def _reset_router(monkeypatch):
# Module-level router accumulates routes across setup calls; reset it. # Module-level router accumulates routes across setup calls; reset it.
@@ -163,3 +172,64 @@ def test_six_file_batch_is_not_rate_limited(tmp_path):
assert meta and meta.get("id") assert meta and meta.get("id")
saved += 1 saved += 1
assert saved == 6 assert saved == 6
async def test_chat_image_upload_is_added_to_gallery(tmp_path, monkeypatch):
engine = create_engine(
f"sqlite:///{tmp_path / 'gallery.db'}",
connect_args={"check_same_thread": False},
poolclass=NullPool,
)
cdb.Base.metadata.create_all(engine)
TestingSession = sessionmaker(bind=engine, autoflush=False, autocommit=False)
gallery_dir = tmp_path / "generated_images"
monkeypatch.setattr(up, "SessionLocal", TestingSession)
monkeypatch.setattr(up, "GENERATED_IMAGES_DIR", str(gallery_dir))
h = UploadHandler(base_dir=str(tmp_path), upload_dir=str(tmp_path / "uploads"))
up.setup_upload_routes(h)
endpoint = _endpoint(up.router)
result = await endpoint(_request(user="alice"), [_image_upload()])
uploaded = result["files"][0]
assert uploaded["gallery_id"]
db = TestingSession()
try:
image = db.query(GalleryImage).filter(GalleryImage.id == uploaded["gallery_id"]).one()
assert image.owner == "alice"
assert image.model == "chat-upload"
assert image.prompt == "photo.png"
assert image.file_hash == uploaded["hash"]
assert (gallery_dir / image.filename).exists()
finally:
db.close()
async def test_non_image_chat_upload_is_not_added_to_gallery(tmp_path, monkeypatch):
engine = create_engine(
f"sqlite:///{tmp_path / 'gallery.db'}",
connect_args={"check_same_thread": False},
poolclass=NullPool,
)
cdb.Base.metadata.create_all(engine)
TestingSession = sessionmaker(bind=engine, autoflush=False, autocommit=False)
monkeypatch.setattr(up, "SessionLocal", TestingSession)
monkeypatch.setattr(up, "GENERATED_IMAGES_DIR", str(tmp_path / "generated_images"))
h = UploadHandler(base_dir=str(tmp_path), upload_dir=str(tmp_path / "uploads"))
up.setup_upload_routes(h)
endpoint = _endpoint(up.router)
result = await endpoint(_request(user="alice"), [types.SimpleNamespace(
filename="notes.txt",
file=io.BytesIO(b"plain text upload"),
)])
assert "gallery_id" not in result["files"][0]
db = TestingSession()
try:
assert db.query(GalleryImage).count() == 0
finally:
db.close()