From 92daf4e56049a00aa1b8aa43c8f4afc37b27bc80 Mon Sep 17 00:00:00 2001 From: pewdiepie-archdaemon Date: Mon, 22 Jun 2026 01:49:15 +0000 Subject: [PATCH] Cookbook launch and gallery upload fixes --- routes/cookbook_routes.py | 13 ++ routes/model_routes.py | 62 +++++- routes/shell_routes.py | 4 +- routes/upload_routes.py | 79 ++++++- src/llm_core.py | 5 +- src/tool_parsing.py | 126 ++++++++++- static/js/cookbook.js | 112 ++++++++-- static/js/cookbookDownload.js | 23 +- static/js/cookbookRunning.js | 173 ++++++++++++--- static/js/cookbookServe.js | 197 ++++++++++++++++-- static/js/modelPicker.js | 58 ++++-- static/js/notes.js | 20 +- static/style.css | 186 +++++++++++++++-- ..._example_not_executed_for_native_models.py | 54 +++++ tests/test_upload_multifile.py | 70 +++++++ 15 files changed, 1047 insertions(+), 135 deletions(-) diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py index 3308d26ae..89f15543d 100644 --- a/routes/cookbook_routes.py +++ b/routes/cookbook_routes.py @@ -2446,6 +2446,17 @@ def setup_cookbook_routes() -> APIRouter: disk_tasks = on_disk.get("tasks") or [] if isinstance(on_disk, dict) else [] incoming_tasks = data.get("tasks") if isinstance(data.get("tasks"), list) else [] + incoming_removed = data.get("removedTasks") if isinstance(data.get("removedTasks"), dict) else {} + disk_removed = on_disk.get("removedTasks") if isinstance(on_disk, dict) and isinstance(on_disk.get("removedTasks"), dict) else {} + removed_tasks = {**disk_removed, **incoming_removed} + data["removedTasks"] = removed_tasks + removed_ids = set(removed_tasks.keys()) + if removed_ids: + incoming_tasks = [ + t for t in incoming_tasks + if not (isinstance(t, dict) and t.get("sessionId") in removed_ids) + ] + data["tasks"] = incoming_tasks # Anti-poisoning guard: a stale browser tab can keep POSTing a # download task as status='done' from before the strict-finish # fix landed, undoing any server-side correction. For each @@ -2483,6 +2494,8 @@ def setup_cookbook_routes() -> APIRouter: sid = t.get("sessionId") if not sid or sid in incoming_ids: continue # client's version wins + if sid in removed_ids: + continue # intentional cross-device clear/remove ts = t.get("ts") or 0 if isinstance(ts, (int, float)) and (now_ms - ts) <= RACE_WINDOW_MS: preserved.append(t) diff --git a/routes/model_routes.py b/routes/model_routes.py index 69e882d60..89636b310 100644 --- a/routes/model_routes.py +++ b/routes/model_routes.py @@ -714,6 +714,16 @@ def _effective_endpoint_kind(ep: Any, base_url: str) -> str: return "auto" +def _is_loading_model_response(resp: Any) -> bool: + if getattr(resp, "status_code", None) != 503: + return False + try: + body = resp.text or "" + except Exception: + body = "" + return "loading model" in body.lower() + + def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> List[str]: """Probe a base URL's /models endpoint and return list of model IDs. @@ -778,6 +788,9 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis models.append(_e) return [m for m in models if _is_chat_model(m)] except httpx.HTTPStatusError as e: + if e.response is not None and _is_loading_model_response(e.response): + logger.info(f"Endpoint still loading model at {url}") + return [] if api_key: status = e.response.status_code if e.response is not None else "unknown" logger.warning(f"Failed to probe {url} with API key: HTTP {status}") @@ -827,6 +840,15 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) -> or "ollama" in (parsed_base.hostname or "").lower() ) + def _is_loading_model_response(r) -> bool: + if getattr(r, "status_code", None) != 503: + return False + try: + body = r.text or "" + except Exception: + body = "" + return "loading model" in body.lower() + def _result_from_response(r) -> Dict[str, Any]: if 300 <= r.status_code < 400: loc = r.headers.get("location", "") @@ -843,6 +865,13 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) -> "status_code": r.status_code, "error": None, } + if _is_loading_model_response(r): + return { + "reachable": True, + "loading": True, + "status_code": r.status_code, + "error": "Loading model", + } return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code}"} last_error: Optional[str] = None @@ -1427,7 +1456,7 @@ def setup_model_routes(model_discovery): t0 = _time.time() ping = _ping_endpoint(base, ep.api_key, timeout=1.5) entry["latency_ms"] = round((_time.time() - t0) * 1000) - entry["status"] = "online" if ping.get("reachable") or cached_count else "offline" + entry["status"] = "loading" if ping.get("loading") else ("online" if ping.get("reachable") or cached_count else "offline") entry["error"] = ping.get("error") entry["model_count"] = cached_count or (len(ANTHROPIC_MODELS) if provider == "anthropic" else 0) except Exception as e: @@ -1606,7 +1635,32 @@ def setup_model_routes(model_discovery): ping_timeout = 10.0 if _classify_endpoint(base_for_ping, kind_for_ping) == "local" else 3.5 ping = _ping_endpoint(r.base_url, r.api_key, timeout=ping_timeout) if ping.get("reachable"): - status = "empty" + status = "loading" if ping.get("loading") else "empty" + if ping.get("loading"): + base = _normalize_base(r.base_url) + kind = _effective_endpoint_kind(r, base) + results.append({ + "id": r.id, + "name": r.name, + "base_url": r.base_url, + "has_key": bool(r.api_key), + "api_key_fingerprint": _api_key_fingerprint(r.api_key), + "is_enabled": r.is_enabled, + "models": visible, + "pinned_models": pinned, + "hidden_count": len(hidden), + "online": True, + "status": status, + "ping_error": (ping or {}).get("error") if ping else None, + "model_type": getattr(r, "model_type", None) or "llm", + "supports_tools": getattr(r, "supports_tools", None), + "endpoint_kind": kind, + "category": _classify_endpoint(base, kind), + "model_refresh_mode": _endpoint_refresh_mode(r, kind), + "model_refresh_interval": getattr(r, "model_refresh_interval", None), + "model_refresh_timeout": getattr(r, "model_refresh_timeout", None), + }) + continue # Best-effort: if the probe came back reachable, try # to populate cached_models in the background so the # NEXT picker load shows "online" instead of "empty". @@ -1859,7 +1913,7 @@ def setup_model_routes(model_discovery): "models": _merge_model_ids(model_ids, _pinned), "pinned_models": _pinned, "online": bool(model_ids) or bool(_pinned) or bool(ping.get("reachable")), - "status": "online" if (model_ids or _pinned) else ("empty" if ping.get("reachable") else "offline"), + "status": "online" if (model_ids or _pinned) else ("loading" if ping.get("loading") else ("empty" if ping.get("reachable") else "offline")), "ping_error": ping.get("error") if ping else None, "endpoint_kind": requested_kind, "category": _classify_endpoint(base_url, requested_kind), @@ -1888,7 +1942,7 @@ def setup_model_routes(model_discovery): return { "base_url": base_url, "online": bool(models) or bool(ping.get("reachable")), - "status": "online" if models else ("empty" if ping.get("reachable") else "offline"), + "status": "online" if models else ("loading" if ping.get("loading") else ("empty" if ping.get("reachable") else "offline")), "ping_error": ping.get("error") if ping else None, "models": models, "count": len(models), diff --git a/routes/shell_routes.py b/routes/shell_routes.py index 52b39e3bb..d133b9254 100644 --- a/routes/shell_routes.py +++ b/routes/shell_routes.py @@ -1108,7 +1108,7 @@ def setup_shell_routes() -> APIRouter: { "name": "llama_cpp", "pip": "llama-cpp-python[server]", - "desc": "Serve GGUF models via llama.cpp", + "desc": "Great for single-GPU or CPU inference with GGUF models", "category": "LLM", "target": "remote", # Build-toolchain prereqs. Cookbook's launch bootstrap @@ -1129,7 +1129,7 @@ def setup_shell_routes() -> APIRouter: { "name": "vllm", "pip": "vllm", - "desc": "High-throughput LLM serving engine", + "desc": "Great for high-throughput multi-GPU inference", "category": "LLM", "target": "remote", }, diff --git a/routes/upload_routes.py b/routes/upload_routes.py index 1e197dd49..8b8f2d292 100644 --- a/routes/upload_routes.py +++ b/routes/upload_routes.py @@ -3,11 +3,16 @@ import os import time import json import asyncio +import shutil +import uuid +from pathlib import Path from fastapi import APIRouter, Request, File, UploadFile, HTTPException from typing import List import logging from core.middleware import require_admin +from core.database import SessionLocal, GalleryImage from src.auth_helpers import effective_user +from src.constants import GENERATED_IMAGES_DIR from src.upload_handler import count_recent_uploads logger = logging.getLogger(__name__) @@ -50,6 +55,69 @@ def setup_upload_routes(upload_handler): raise HTTPException(404, "File not found") raise HTTPException(404, "File not found") + + def _promote_chat_image_to_gallery(meta: dict, owner: str | None) -> str | None: + """Make chat-uploaded images visible in Gallery without changing chat storage.""" + is_image_file = getattr(upload_handler, "is_image_file", None) + if not callable(is_image_file): + return None + if not is_image_file(meta.get("name", ""), meta.get("mime", "")): + return None + + source_path = meta.get("path") + if not source_path or not os.path.isfile(source_path): + return None + + db = SessionLocal() + try: + file_hash = meta.get("hash") + if file_hash: + q = db.query(GalleryImage).filter( + GalleryImage.file_hash == file_hash, + GalleryImage.is_active == True, # noqa: E712 + ) + if owner: + q = q.filter(GalleryImage.owner == owner) + existing = q.first() + if existing: + return existing.id + + image_dir = Path(GENERATED_IMAGES_DIR) + image_dir.mkdir(parents=True, exist_ok=True) + ext = Path(meta.get("name") or source_path).suffix.lower() + if ext not in {".png", ".jpg", ".jpeg", ".webp", ".gif"}: + mime_ext = { + "image/png": ".png", + "image/jpeg": ".jpg", + "image/jpg": ".jpg", + "image/webp": ".webp", + "image/gif": ".gif", + }.get(meta.get("mime", "")) + ext = mime_ext or ".png" + filename = f"{uuid.uuid4().hex[:12]}{ext}" + dest_path = image_dir / filename + shutil.copy2(source_path, dest_path) + + image_id = str(uuid.uuid4()) + db.add(GalleryImage( + id=image_id, + filename=filename, + prompt=meta.get("name") or "Chat upload", + model="chat-upload", + owner=owner, + file_hash=file_hash, + width=meta.get("width"), + height=meta.get("height"), + file_size=meta.get("size"), + )) + db.commit() + return image_id + except Exception as e: + db.rollback() + logger.warning("Failed to add chat image upload to gallery: %s", e) + return None + finally: + db.close() @router.post("") async def api_upload(request: Request, files: List[UploadFile] = File(...)): @@ -78,8 +146,10 @@ def setup_upload_routes(upload_handler): for u in files: try: - meta = upload_handler.save_upload(u, client_ip, owner=effective_user(request)) - out.append({ + owner = effective_user(request) + meta = upload_handler.save_upload(u, client_ip, owner=owner) + gallery_id = _promote_chat_image_to_gallery(meta, owner) + item = { "id": meta["id"], "name": meta["name"], "mime": meta["mime"], @@ -89,7 +159,10 @@ def setup_upload_routes(upload_handler): "width": meta.get("width"), "height": meta.get("height"), "is_duplicate": meta.get("is_duplicate", False) - }) + } + if gallery_id: + item["gallery_id"] = gallery_id + out.append(item) except HTTPException: raise except Exception as e: diff --git a/src/llm_core.py b/src/llm_core.py index 9981e41e2..20a4b544c 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -907,7 +907,10 @@ def _anthropic_rejects_temperature(model: str) -> bool: return (int(match.group(1)), int(match.group(2))) >= (4, 7) # Models that support structured thinking — may output without opening tag -_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma") +_THINKING_MODEL_PATTERNS = ( + "qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", + "m2-reap", "gemma", "stepfun", "step-3", "step3", +) def _supports_thinking(model: str) -> bool: """Check if model supports structured thinking output.""" diff --git a/src/tool_parsing.py b/src/tool_parsing.py index c9548cce9..7a0638973 100644 --- a/src/tool_parsing.py +++ b/src/tool_parsing.py @@ -39,6 +39,10 @@ _XML_TOOL_CALL_RE = re.compile( r"<(?:[\w]+:)?(?:tool_call|function_call)>\s*([\s\S]*?)", re.IGNORECASE, ) +_XML_OPEN_TOOL_CALL_RE = re.compile( + r"<(?:[\w]+:)?(?:tool_call|function_call)>\s*([\s\S]*)\Z", + re.IGNORECASE, +) _XML_INVOKE_RE = re.compile( r'\s*([\s\S]*?)', re.IGNORECASE, @@ -47,6 +51,24 @@ _XML_PARAM_RE = re.compile( r'([\s\S]*?)', re.IGNORECASE, ) +_XML_DIRECT_TOOL_RE = re.compile( + r"<\s*([A-Za-z_][\w-]*)\s*>([\s\S]*?)", + re.IGNORECASE, +) + +# Pattern 3b: StepFun Step-3.x native tool-call tokens. The tokenizer defines: +# <|tool▁calls▁begin|> ... <|tool▁calls▁end|> +# <|tool▁call▁begin|>tool_name<|tool▁sep|>{...}<|tool▁call▁end|> +# These can leak as text through llama.cpp/Ollama-style endpoints when the +# engine does not return structured OpenAI tool_calls. +_STEPFUN_TOOL_CALL_RE = re.compile( + r"<|tool▁call▁begin|>\s*([A-Za-z_][\w.-]*)\s*<|tool▁sep|>\s*([\s\S]*?)\s*<|tool▁call▁end|>", + re.IGNORECASE, +) +_STEPFUN_TOOL_CALLS_WRAPPER_RE = re.compile( + r"", + re.IGNORECASE, +) # Pattern 4: blocks (MiniMax-M2.5 style) # {tool => 'tool_name', args => 'value'} @@ -446,6 +468,76 @@ def _parse_xml_invoke(inv_match) -> Optional[ToolBlock]: return function_call_to_tool_block(tool_name, json.dumps(params)) +def _parse_xml_direct_tool(tool_match) -> Optional[ToolBlock]: + """Parse direct XML tool tags inside . + + Some local models emit: + query + instead of the invoke/parameter shape: + query + Keep this as an adapter to the canonical function-call converter so aliases + and per-tool argument formatting stay in one place. + """ + tool_name = tool_match.group(1).lower().replace("-", "_") + if tool_name in {"invoke", "parameter", "tool_call", "function_call"}: + return None + mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None) + if not mapped: + return None + body = tool_match.group(2).strip() + if not body: + return None + try: + params = json.loads(body) + if not isinstance(params, dict): + params = {} + except json.JSONDecodeError: + if mapped == "web_search": + params = {"query": body} + elif mapped == "web_fetch": + params = {"url": body} + elif mapped == "bash": + params = {"command": body} + elif mapped == "python": + params = {"code": body} + elif mapped in ("read_file", "write_file"): + params = {"path": body} + else: + params = {"content": body} + from src.tool_schemas import function_call_to_tool_block + return function_call_to_tool_block(mapped, json.dumps(params)) + + +def _parse_stepfun_tool_call(call_match) -> Optional[ToolBlock]: + """Parse StepFun native tool-call tokens into an Odysseus ToolBlock.""" + tool_name = call_match.group(1).lower().replace("-", "_").replace(".", "_") + mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None) + if not mapped: + return None + body = call_match.group(2).strip() + if not body: + return None + try: + params = json.loads(body) + if not isinstance(params, dict): + params = {} + except json.JSONDecodeError: + if mapped == "web_search": + params = {"query": body} + elif mapped == "web_fetch": + params = {"url": body} + elif mapped == "bash": + params = {"command": body} + elif mapped == "python": + params = {"code": body} + elif mapped in ("read_file", "write_file"): + params = {"path": body} + else: + params = {"content": body} + from src.tool_schemas import function_call_to_tool_block + return function_call_to_tool_block(mapped, json.dumps(params)) + + def _parse_tool_code_block(raw: str) -> Optional[ToolBlock]: """Parse a {tool => 'name', args => '...'} block (MiniMax style).""" # Extract tool name @@ -511,8 +603,9 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]: 2. [TOOL_CALL] ... [/TOOL_CALL] blocks (some models) 3. XML-style / blocks 4. blocks (MiniMax-M2.5 style) - 5. DeepSeek DSML markup (normalized to first) - 6. Non-native local model fallback: prose mentioning web_search followed by + 5. StepFun Step-3 native <|tool▁call▁begin|> tokens + 6. DeepSeek DSML markup (normalized to first) + 7. Non-native local model fallback: prose mentioning web_search followed by bare JSON args, e.g. {"query":"...", "time_filter":"week"} `skip_fenced`: when True, Pattern 1 (fenced ```bash/```python/```json code @@ -567,12 +660,38 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]: # Pattern 3: XML-style / blocks if not blocks: + for step_call in _STEPFUN_TOOL_CALL_RE.finditer(text): + block = _parse_stepfun_tool_call(step_call) + if block: + blocks.append(block) + if blocks: + return blocks # Try wrapped: ... for m in _XML_TOOL_CALL_RE.finditer(text): for inv in _XML_INVOKE_RE.finditer(m.group(1)): block = _parse_xml_invoke(inv) if block: blocks.append(block) + if not blocks: + for direct in _XML_DIRECT_TOOL_RE.finditer(m.group(1)): + block = _parse_xml_direct_tool(direct) + if block: + blocks.append(block) + # Some local models stream an opening wrapper and a + # complete inner tool tag, but forget the closing . + if not blocks: + for m in _XML_OPEN_TOOL_CALL_RE.finditer(text): + body = m.group(1) + for inv in _XML_INVOKE_RE.finditer(body): + block = _parse_xml_invoke(inv) + if block: + blocks.append(block) + if blocks: + break + for direct in _XML_DIRECT_TOOL_RE.finditer(body): + block = _parse_xml_direct_tool(direct) + if block: + blocks.append(block) # Try bare without wrapper if not blocks: for inv in _XML_INVOKE_RE.finditer(text): @@ -614,7 +733,10 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str: text = _normalize_dsml(text) cleaned = text if skip_fenced else _TOOL_BLOCK_RE.sub('', text) cleaned = _TOOL_CALL_RE.sub('', cleaned) + cleaned = _STEPFUN_TOOL_CALL_RE.sub('', cleaned) + cleaned = _STEPFUN_TOOL_CALLS_WRAPPER_RE.sub('', cleaned) cleaned = _XML_TOOL_CALL_RE.sub('', cleaned) + cleaned = _XML_OPEN_TOOL_CALL_RE.sub('', cleaned) cleaned = _TOOL_CODE_RE.sub('', cleaned) if not skip_fenced: raw_web_json = _parse_raw_web_json_lookup(cleaned) diff --git a/static/js/cookbook.js b/static/js/cookbook.js index b778c1df9..43a3ad5d0 100644 --- a/static/js/cookbook.js +++ b/static/js/cookbook.js @@ -230,14 +230,30 @@ export function _isMetal() { } /** Detect model-specific vLLM optimizations */ +function _isStepFunStepModel(modelName) { + const n = (modelName || '').toLowerCase(); + return n.includes('stepfun') + || n.includes('step-3') + || n.includes('step3') + || n.includes('step_3'); +} + function _detectModelOptimizations(modelName) { const n = (modelName || '').toLowerCase(); const opts = { envVars: [], flags: [], tips: [] }; + // StepFun Step-3.x MoE models. Their tokenizer defines the Step tool-call + // and thinking tags; vLLM/SGLang need the step3p5 parser instead of generic + // Hermes/XML guesses, and the MoE backend should default to expert parallel. + if (_isStepFunStepModel(modelName)) { + opts.flags.push('--enable-expert-parallel'); + opts.tips.push('StepFun Step-3 MoE: expert parallel'); + opts.tips.push('StepFun parser: step3p5 for native tool calls and reasoning tags'); + } // Qwen3.5 MoE models — MoE-specific env vars + expert-parallel. // The --reasoning-parser flag is added uniformly below via // _detectReasoningParser, no longer hardcoded here. - if (n.includes('qwen3.5') || n.includes('qwen3-') && (n.includes('a10b') || n.includes('a22b') || n.includes('a3b'))) { + else if (n.includes('qwen3.5') || n.includes('qwen3-') && (n.includes('a10b') || n.includes('a22b') || n.includes('a3b'))) { opts.envVars.push('VLLM_USE_DEEP_GEMM=0', 'VLLM_USE_FLASHINFER_MOE_FP16=1', 'VLLM_USE_FLASHINFER_SAMPLER=0', 'OMP_NUM_THREADS=4'); opts.flags.push('--enable-expert-parallel'); opts.tips.push('MoE optimizations: expert parallel + flashinfer MoE kernels'); @@ -312,6 +328,9 @@ function _detectModelOptimizations(modelName) { */ export function _detectReasoningParser(modelName) { const n = (modelName || '').toLowerCase(); + // StepFun Step-3.x uses Step's native / tool-call tokens. vLLM + // registers this parser as step3p5. + if (_isStepFunStepModel(modelName)) return 'step3p5'; // MiniMax M3 — newer vLLM nightly/parser builds use minimax_m3. This must // be checked before the M2.x rule and before the generic MiniMax tool parser. if (n.includes('minimax') && /\bm3\b/.test(n)) return 'minimax_m3'; @@ -348,6 +367,7 @@ export function _detectReasoningParser(modelName) { */ export function _detectToolParser(modelName) { const n = (modelName || '').toLowerCase(); + if (_isStepFunStepModel(modelName)) return 'step3p5'; if (n.includes('qwen3') && n.includes('coder')) return 'qwen3_coder'; if (n.includes('qwen3')) return 'qwen3_xml'; if (n.includes('qwen')) return 'hermes'; // Qwen2.5 / Qwen2 / Qwen1.5 @@ -601,6 +621,13 @@ export function _buildServeCmd(f, modelName, backend) { if (f.dtype && f.dtype !== 'auto') cmd += ` --dtype ${f.dtype}`; if (f.max_seqs && f.max_seqs.toString().trim()) cmd += ` --max-running-requests ${f.max_seqs.toString().trim()}`; if (f.trust_remote) cmd += ' --trust-remote-code'; + if (f.auto_tool) cmd += ` --enable-auto-tool-choice --tool-call-parser ${_detectToolParser(modelName)}`; + if (f.expert_parallel) cmd += ' --enable-expert-parallel'; + if (f.reasoning_parser) { + const rp = typeof f.reasoning_parser === 'string' && f.reasoning_parser !== 'true' + ? f.reasoning_parser : (f._reasoning_parser_value || _detectReasoningParser(modelName) || ''); + if (rp) cmd += ` --reasoning-parser ${rp}`; + } if (!f.prefix_cache) cmd += ' --disable-radix-cache'; if (f.enforce_eager) cmd += ' --disable-cuda-graph'; } else if (backend === 'llamacpp') { @@ -909,10 +936,10 @@ async function _fetchDependencies() { // matches the engine you're configuring. Unknown packages get no // icon (the name alone is fine for librosa, hf_transfer, etc.). const _DEP_GLYPHS = { - vllm: '', - sglang: '', - llama_cpp: '', - ollama: '', + vllm: '', + sglang: '', + llama_cpp: '', + ollama: '', diffusers: '', }; const _depGlyphHtml = (name) => { @@ -2138,7 +2165,10 @@ function _wireTabEvents(body) { } } const shortName = repo.split('/').pop(); - _retryDownload(shortName, payload); + const displayName = payload.include + ? `${shortName} · ${_ggufQuantFromPath(String(payload.include).replace(/\*/g, '')) || String(payload.include).replace(/\*/g, '').replace(/\.gguf$/i, '')}` + : shortName; + _retryDownload(displayName, payload); dlInput.value = ''; }; dlBtn.addEventListener('click', triggerDownload); @@ -2179,18 +2209,13 @@ function _wireTabEvents(body) { const folded = dlFoldBody.classList.contains('is-folded'); _setFolded(!folded); }); - // Auto-fold on any downward scroll inside the cookbook modal, - // and auto-expand when the user scrolls all the way back to the - // top of whichever scroller they're in. The chevron ▸ still - // toggles manually. + // Auto-fold on any downward scroll inside the cookbook modal. Do not + // auto-expand on upward/top scroll — once the user collapses Download, + // it should stay collapsed until the header is clicked again. const _maybeFold = () => { if (dlFoldBody.classList.contains('is-folded')) return; _setFolded(true, /* persist */ false); }; - const _maybeExpand = () => { - if (!dlFoldBody.classList.contains('is-folded')) return; - _setFolded(false, /* persist */ false); - }; // Capture phase so scrolls on nested scrollers (.hwfit-list, // .cookbook-body, .modal-content) all hit us. const _modal = dlFold.closest('#cookbook-modal') || document; @@ -2205,7 +2230,6 @@ function _wireTabEvents(body) { const y = tgt.scrollTop; const prev = _lastY.get(tgt) || 0; if (y > prev) _maybeFold(); - else if (y <= 0) _maybeExpand(); _lastY.set(tgt, y); }, true); } @@ -2621,10 +2645,10 @@ function _renderRecipes() { html += ``; html += ``; html += ``; - html += `'; const firstGroup = body.querySelector('.cookbook-group'); if (firstGroup) body.insertBefore(group, firstGroup); @@ -1863,6 +1982,7 @@ export function _renderRunningTab() { return; } if (!await window.styledConfirm(`Clear ${toRemove.length} finished task${toRemove.length === 1 ? '' : 's'} on ${_serverName(host)}?`, { confirmText: 'Clear' })) return; + toRemove.forEach(t => _tombstoneTask(t.sessionId)); const remaining = allTasks.filter(t => _taskServerKey(t) !== host || !_canClearTask(t)); _saveTasks(remaining); // Fade/slide each finished card out (same exit as the per-card clear) @@ -2000,11 +2120,12 @@ export function _renderRunningTab() { const _bdg = _taskBadge(task); const _bdgTitle = (task._unreachable && task.status === 'running') ? ' title="Server not responding — it may have crashed"' : ''; + const displayName = _taskDisplayName(task); el.innerHTML = `
${esc((task.status === 'done' && task.type === 'download') ? 'finished' : task.type)} - ${modelLogo(task.name)}${esc(task.name)} - ${esc(_clearPillLabel(task))}clear + ${modelLogo(task.name)}${esc(displayName)} + ${_canLaunchDownloadedTask(task) ? '' : ''}${esc(_clearPillLabel(task))}clear ${esc(_bdg.text)} @@ -2076,19 +2197,11 @@ export function _renderRunningTab() { e.stopPropagation(); const repo = task.payload?.repo_id || task.name; if (!repo) { uiModule.showToast('No model info on this task'); return; } - // Point the active server at the one it downloaded to. - const _tHost = task.remoteHost || ''; - _envState.remoteHost = _tHost; - const _tSrv = _serverByVal(_envState.remoteServerKey || _tHost) - || _envState.servers.find(s => s.host === _tHost); - if (_tSrv) { _envState.env = _tSrv.env || 'none'; _envState.envPath = _tSrv.envPath || ''; _envState.platform = _tSrv.platform || ''; } - else if (!_tHost) { _envState.env = 'none'; _envState.envPath = ''; _envState.platform = ''; } - document.querySelectorAll('#hwfit-server-select, #hwfit-dl-server, #hwfit-cache-server, #hwfit-deps-server').forEach(sel => { - if (sel && sel.tagName === 'SELECT') sel.value = _tHost || 'local'; - }); + // Point the active server at the exact profile it downloaded to. + _selectTaskServer(task); try { const { openServePanelForRepo } = await import('./cookbookServe.js'); - await openServePanelForRepo(repo); + await openServePanelForRepo(repo, _downloadServeFields(task)); // Serving it supersedes the finished download — clear the card from // the Running tab (smooth exit) now that we've jumped to Serve. _animateOutThenRemove(el, task.sessionId); @@ -3558,7 +3671,9 @@ async function _probeEndpointUntilOnline(epId, host, port) { try { // Hit the probe endpoint — it re-probes server-side and updates // cached_models. We consume (and discard) the SSE stream. - await fetch(`/api/model-endpoints/${epId}/probe`, { credentials: 'same-origin' }).then(r => r.text()).catch(() => {}); + const probeRes = await fetch(`/api/model-endpoints/${epId}/probe`, { credentials: 'same-origin' }).catch(() => null); + if (probeRes && probeRes.status === 404) return; + if (probeRes) await probeRes.text().catch(() => {}); const eps = await fetch('/api/model-endpoints', { credentials: 'same-origin' }).then(r => r.json()).catch(() => []); const ep = (eps || []).find(e => e.id === epId); if (ep && (ep.models || []).length) { diff --git a/static/js/cookbookServe.js b/static/js/cookbookServe.js index a22c73af3..5ebeb0aaf 100644 --- a/static/js/cookbookServe.js +++ b/static/js/cookbookServe.js @@ -477,7 +477,9 @@ function _estimateLlamaContextFit(model, fields, modelCtxMax, modelWeightsGb = 0 } function _selectedServeTarget(panel) { - const select = document.getElementById('hwfit-server-select') || document.getElementById('hwfit-dl-server'); + const select = panel?.querySelector?.('#hwfit-server-select') + || document.getElementById('hwfit-server-select') + || document.getElementById('hwfit-dl-server'); const servers = Array.isArray(_envState.servers) ? _envState.servers : []; let host = _envState.remoteHost || ''; let server = host ? (_serverByVal?.(_envState.remoteServerKey || host) || servers.find(s => s.host === host)) : null; @@ -643,6 +645,122 @@ function _ggufFileLabel(file) { return `${quant}${base}${size || split ? ` (${[size, split.replace(/^, /, '')].filter(Boolean).join(', ')})` : ''}${role}`; } +function _ggufTaskDisplayPart(model, relPath) { + const rel = String(relPath || ''); + if (!rel) return ''; + const file = _ggufFilesForModel(model).find(f => f.rel_path === rel); + if (file?.quant) return String(file.quant).toUpperCase().replace(/^UD-/, ''); + const parts = rel.split('/').filter(Boolean); + const base = parts[parts.length - 1] || ''; + const parent = parts.length > 1 ? parts[parts.length - 2] : ''; + const text = `${parent} ${base}`; + const quant = text.match(/\b(?:UD-)?(?:IQ[1-8]_[A-Z0-9]+|Q[2-8]_K_[MLS]|Q[2-8]_[0-9A-Z]+|Q[2-8])\b/i); + if (quant) return quant[0].toUpperCase().replace(/^UD-/, ''); + return base.replace(/\.gguf$/i, '').replace(/-\d{5}-of-\d{5}$/i, ''); +} + +function _serveTaskDisplayName(shortName, model, fields) { + const name = String(shortName || '').trim(); + const backend = String(fields?.backend || '').toLowerCase(); + if (backend !== 'llamacpp' && backend !== 'ollama') return name; + const part = _ggufTaskDisplayPart(model, fields?.gguf_file); + return part && !name.includes(` · ${part}`) ? `${name} · ${part}` : name; +} + +function _safeGgufRelPath(relPath) { + const rel = String(relPath || '').replace(/\\/g, '/').replace(/^\/+/, ''); + if (!rel || rel.startsWith('../') || rel.includes('/../') || rel === '..') return ''; + if (rel.includes('\0')) return ''; + return rel; +} + +function _ggufDeleteChoice(repo, files) { + return new Promise(resolve => { + let overlay = document.getElementById('cookbook-gguf-delete-overlay'); + if (!overlay) { + overlay = document.createElement('div'); + overlay.id = 'cookbook-gguf-delete-overlay'; + overlay.className = 'modal hidden'; + overlay.innerHTML = + ''; + document.body.appendChild(overlay); + } + + const safeFiles = files + .map(f => ({ ...f, rel_path: _safeGgufRelPath(f.rel_path) })) + .filter(f => f.rel_path); + const msg = overlay.querySelector('#cookbook-gguf-delete-msg'); + const list = overlay.querySelector('#cookbook-gguf-delete-list'); + const cancelBtn = overlay.querySelector('#cookbook-gguf-delete-cancel'); + const repoBtn = overlay.querySelector('#cookbook-gguf-delete-repo'); + const selectedBtn = overlay.querySelector('#cookbook-gguf-delete-selected'); + const prevFocus = document.activeElement; + + msg.textContent = `${repo} has multiple GGUF files. Pick what to delete.`; + list.innerHTML = safeFiles.map((file, idx) => { + const label = esc ? esc(_ggufFileLabel(file)) : _ggufFileLabel(file); + const rel = esc ? esc(file.rel_path) : file.rel_path; + return ``; + }).join(''); + + function cleanup(result) { + overlay.classList.add('hidden'); + overlay.style.display = 'none'; + cancelBtn.removeEventListener('click', onCancel); + repoBtn.removeEventListener('click', onRepo); + selectedBtn.removeEventListener('click', onSelected); + overlay.removeEventListener('click', onBackdrop); + document.removeEventListener('keydown', onKey); + try { prevFocus && prevFocus.focus && prevFocus.focus(); } catch {} + resolve(result); + } + function onCancel() { cleanup(null); } + function onRepo() { cleanup({ mode: 'repo' }); } + function onSelected() { + const selected = [...list.querySelectorAll('input[type="checkbox"]:checked')] + .map(input => safeFiles[Number(input.value)]) + .filter(Boolean); + if (!selected.length) { + uiModule.showToast?.('Select at least one GGUF file.'); + return; + } + cleanup({ mode: 'files', files: selected }); + } + function onBackdrop(e) { if (e.target === overlay) cleanup(null); } + function onKey(e) { + if (e.key === 'Escape') { + e.preventDefault(); + e.stopPropagation(); + cleanup(null); + } + } + + cancelBtn.addEventListener('click', onCancel); + repoBtn.addEventListener('click', onRepo); + selectedBtn.addEventListener('click', onSelected); + overlay.addEventListener('click', onBackdrop); + document.addEventListener('keydown', onKey); + overlay.classList.remove('hidden'); + overlay.style.display = ''; + selectedBtn.focus(); + }); +} + function _shellPathExpr(path) { const s = String(path || ''); if (s === '~') return '${HOME}'; @@ -729,7 +847,7 @@ function _rerenderCachedModels() { ? ` ${_isDlActive ? 'downloading' : 'stalled'}` : ''; const _favoritePill = _isFavorite ? ' pinned' : ''; - html += `
${modelLogo(m.repo_id)}${esc(shortName)}${_favoritePill}${hfLink ? ` HF ↗` : ''}${_runningPill}${_downloadingPill}
`; + html += `
${modelLogo(m.repo_id)}${esc(shortName)}${_favoritePill}${hfLink ? ` HF ↗` : ''}${_runningPill}${_downloadingPill}
`; html += `
${metaParts.join(' \u00b7 ')}
`; html += `
`; const _bk = _detectBackend(m).backend; @@ -962,6 +1080,11 @@ function _rerenderCachedModels() { const _isMiniMaxM3 = _isMiniMaxM3Model({ ...m, repo_id: repo }); const _isMiniMaxM2 = _isMiniMaxM2Model({ ...m, repo_id: repo }); const _isMiniMaxMSeries = _isMiniMaxM3 || _isMiniMaxM2; + const _toolParserDefault = _detectToolParser(repo); + const _isStepFunStep = _toolParserDefault === 'step3p5'; + const _nativeToolDefault = _isMiniMaxMSeries || _isStepFunStep; + const _reasoningDefault = _isMiniMaxMSeries || _isStepFunStep; + const _expertParallelDefault = _isMiniMaxMSeries || _isStepFunStep; const svm = (k, def) => (_modelSs && _hasOwn(_modelSs, k)) ? _modelSs[k] : def; const _serveTarget = _selectedServeTarget(); const _backendChoices = _backendChoicesForTarget(_serveTarget); @@ -993,8 +1116,15 @@ function _rerenderCachedModels() { const _l = (name, tip) => `${name}?`; const _ggufChoices = _runnableGgufFiles(m); const _savedGguf = String(sv('gguf_file', '') || ''); + const _preferredGgufInclude = String(sv('_preferredGgufInclude', '') || '').replace(/\*/g, '').toLowerCase(); + const _preferredGguf = _preferredGgufInclude + ? (_ggufChoices.find(f => String(f.rel_path || '').toLowerCase().includes(_preferredGgufInclude)) + || _ggufChoices.find(f => String(f.name || '').toLowerCase().includes(_preferredGgufInclude))) + : null; const _defaultGguf = _ggufChoices.some(f => f.rel_path === _savedGguf) ? _savedGguf + : (_preferredGguf?.rel_path || '') + ? _preferredGguf.rel_path : (_ggufChoices[0]?.rel_path || ''); const _ggufOptions = _ggufChoices.map(f => `` @@ -1026,6 +1156,10 @@ function _rerenderCachedModels() { + ``; let panelHtml = `
`; + const _replaceTaskId = String(sv('_replaceTaskId', '') || ''); + if (_replaceTaskId) { + panelHtml += ``; + } // Runtime-readiness note pinned at the top of the serve area so the // user sees "vLLM ready on …" before scrolling into the configure // form. Hidden until the readiness probe returns. The × button @@ -1202,20 +1336,20 @@ function _rerenderCachedModels() { const _rp_name = _rp_flag ? _rp_flag.split(' ')[1] : ''; panelHtml += `
`; panelHtml += ``; - panelHtml += ``; + panelHtml += ``; // Always-render the Reasoning Parser, Expert Parallel, and MoE Env // checkboxes — the model-family detection above is a hint, not a // hard gate. User asked to keep these visible regardless so that // a borderline-undetected MoE/reasoning model can still toggle // them without dropping back to the raw command box. - panelHtml += ``; + panelHtml += ``; panelHtml += ``; panelHtml += ``; // Inline the previously-second vLLM checks row so Expert Parallel / // Speculative / MoE Env sit next to Prefix Caching with no gap. All // three are vLLM-only — class-gated so they hide on SGLang. Always // render so the user can flip them on for any MoE model. - panelHtml += ``; + panelHtml += ``; panelHtml += ``; panelHtml += ``; { @@ -2870,11 +3004,11 @@ function _rerenderCachedModels() { // preflight and let the launch silently fall to CPU. let _hwGpus = []; try { - const _gh = (_selectedServeTarget.host || '').trim(); + const _gh = (launchTarget.host || '').trim(); const _gp = new URLSearchParams(); if (_gh) { _gp.set('host', _gh); - const _sp = (_serverByVal?.(_selectedServeTarget.serverKey || _gh) || {}).port; + const _sp = (_serverByVal?.(launchTarget.serverKey || _gh) || {}).port; if (_sp) _gp.set('ssh_port', _sp); } const _gr = await fetch('/api/cookbook/gpus' + (_gp.toString() ? '?' + _gp : ''), { credentials: 'same-origin' }); @@ -3069,6 +3203,7 @@ function _rerenderCachedModels() { try { cur = JSON.parse(localStorage.getItem(SERVE_STATE_KEY)) || {}; } catch {} const byRepo = (cur && cur._byRepo && typeof cur._byRepo === 'object') ? cur._byRepo : {}; const _saved = { ...serveState, _forceBackend: true }; + delete _saved._replaceTaskId; byRepo[repo] = _saved; localStorage.setItem(SERVE_STATE_KEY, JSON.stringify({ _byRepo: byRepo, _lastUsed: _saved })); } catch {} @@ -3127,7 +3262,8 @@ function _rerenderCachedModels() { await _withSpinner(_launchBtn, async () => { // Pass the exact form values so the running task can be re-opened // in the Serve panel pre-filled with these settings (Edit button). - await _launchServeTask(shortName, repo, launchCmd, serveState, serveHost, { serverKey: serveServerKey, serverName: serveServerName }); + const taskDisplayName = _serveTaskDisplayName(shortName, m, serveState); + await _launchServeTask(taskDisplayName, repo, launchCmd, serveState, serveHost, { serverKey: serveServerKey, serverName: serveServerName }); }); } finally { _envState.env = origEnv; @@ -3188,7 +3324,6 @@ function _resolveCacheHost() { } async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = null) { - if (!skipConfirm && !(await uiModule.styledConfirm(`Delete ${repo} from cache?`, { confirmText: 'Delete', danger: true }))) return; const m = model || _cachedAllModels.find(x => x.repo_id === repo); // Delete the EXACT on-disk path the scan reported. Models in a custom // model dir live at /; HF-cache models at @@ -3204,13 +3339,32 @@ async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = nul } else { target = `~/.cache/huggingface/hub/models--${repo.replace(/\//g, '--')}`; } + let deleteChoice = { mode: 'repo' }; + const ggufFiles = _ggufFilesForModel(m); + if (!skipConfirm) { + if (ggufFiles.length > 1) { + deleteChoice = await _ggufDeleteChoice(repo, ggufFiles); + if (!deleteChoice) return; + } else if (!(await uiModule.styledConfirm(`Delete ${repo} from cache?`, { confirmText: 'Delete', danger: true }))) { + return; + } + } const host = _resolveCacheHost(); let cmd; if (_isWindows()) { const winTarget = target.startsWith('~') ? target.replace(/^~/, '$env:USERPROFILE').replace(/\//g, '\\') : target.replace(/\//g, '\\'); - cmd = `Remove-Item -Recurse -Force "${winTarget}" -ErrorAction SilentlyContinue`; + if (deleteChoice.mode === 'files') { + const targets = deleteChoice.files + .map(f => _safeGgufRelPath(f.rel_path)) + .filter(Boolean) + .map(rel => `${winTarget}\\${rel.replace(/\//g, '\\')}`); + if (!targets.length) return; + cmd = targets.map(p => `Remove-Item -Force "${p.replace(/"/g, '\\"')}" -ErrorAction SilentlyContinue`).join('; '); + } else { + cmd = `Remove-Item -Recurse -Force "${winTarget}" -ErrorAction SilentlyContinue`; + } if (host) { const pf = _sshPrefix(_getPort(host)); cmd = `ssh ${pf}${host} "powershell -Command \\"${cmd}\\""`; @@ -3219,7 +3373,16 @@ async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = nul // $HOME expands inside double quotes; ~ would not, so normalize the // fallback. Quoting also handles spaces in custom model-dir paths. const unixTarget = target.startsWith('~') ? target.replace(/^~/, '$HOME') : target; - cmd = `rm -rf "${unixTarget}"`; + if (deleteChoice.mode === 'files') { + const targets = deleteChoice.files + .map(f => _safeGgufRelPath(f.rel_path)) + .filter(Boolean) + .map(rel => `${target.replace(/\/+$/, '')}/${rel}`); + if (!targets.length) return; + cmd = `rm -f ${targets.map(p => _shellPathExpr(p)).join(' ')} && find ${_shellPathExpr(target)} -type d -empty -delete`; + } else { + cmd = `rm -rf "${unixTarget}"`; + } if (host) cmd = _sshCmd(host, cmd, _getPort(host)); } // Deleting a large model (tens/hundreds of GB) can take a while, especially @@ -3244,7 +3407,13 @@ async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = nul body: JSON.stringify({ command: cmd }), }); if (!res.ok) { uiModule.showError(`Delete failed (${res.status})`); return; } - if (itemEl) { + if (deleteChoice.mode === 'files') { + if (m && Array.isArray(m.gguf_files)) { + const removed = new Set(deleteChoice.files.map(f => _safeGgufRelPath(f.rel_path))); + m.gguf_files = m.gguf_files.filter(f => !removed.has(_safeGgufRelPath(f.rel_path))); + } + await _fetchCachedModels(false); + } else if (itemEl) { itemEl.querySelector('.cookbook-delete-overlay')?.remove(); itemEl.style.transition = 'opacity 0.24s ease, transform 0.24s ease, max-height 0.28s ease, padding 0.28s ease, margin 0.28s ease'; itemEl.style.maxHeight = `${Math.max(itemEl.getBoundingClientRect().height, itemEl.scrollHeight)}px`; @@ -3258,9 +3427,9 @@ async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = nul requestAnimationFrame(() => { itemEl.style.maxHeight = '0'; }); await new Promise(resolve => setTimeout(resolve, 300)); if (itemEl.parentElement) itemEl.remove(); + // Drop from the in-memory list so a re-render/filter doesn't resurrect it. + _cachedAllModels = _cachedAllModels.filter(x => x.repo_id !== repo); } - // Drop from the in-memory list so a re-render/filter doesn't resurrect it. - _cachedAllModels = _cachedAllModels.filter(x => x.repo_id !== repo); } catch (e) { uiModule.showError('Delete failed: ' + (e && e.message ? e.message : e)); } finally { diff --git a/static/js/modelPicker.js b/static/js/modelPicker.js index 5538bf278..12fb3479e 100644 --- a/static/js/modelPicker.js +++ b/static/js/modelPicker.js @@ -77,6 +77,7 @@ function _handlePickerKeydown(e, listEl, itemSelector, closeFn) { // Dependencies injected via initModelPicker() let _deps = null; let _autoSelectingDefault = false; +let _defaultChatPickInFlight = false; function _modelExists(modelId, url) { if (!modelId || !window.modelsModule || !window.modelsModule.getCachedItems) return false; @@ -91,6 +92,43 @@ function _modelExists(modelId, url) { }); } +async function _ensureDefaultPendingChat() { + if (!_deps || _defaultChatPickInFlight) return; + if (_deps.getCurrentSessionId && _deps.getCurrentSessionId()) return; + const pending = _deps.getPendingChat && _deps.getPendingChat(); + if (pending && pending.modelId) return; + _defaultChatPickInFlight = true; + try { + let dc = null; + try { + const res = await fetch(`${API_BASE}/api/default-chat`, { credentials: 'same-origin' }); + if (res.ok) dc = await res.json(); + } catch (_) {} + if (dc && dc.endpoint_url && dc.model) { + _deps.setPendingChat({ + url: dc.endpoint_url, + modelId: dc.model, + endpointId: dc.endpoint_id || '', + }); + try { window.__odysseusDefaultChat = dc; } catch (_) {} + updateModelPicker(); + return; + } + // No configured default: preserve the old convenience fallback. + if (window.modelsModule && window.modelsModule.getCachedItems) { + const items = window.modelsModule.getCachedItems(); + const first = items.find(item => !item.offline && ((item.models || []).length || (item.models_extra || []).length)); + if (first) { + const models = (first.models || []).concat(first.models_extra || []); + _deps.setPendingChat({ url: first.url, modelId: models[0], endpointId: first.endpoint_id }); + updateModelPicker(); + } + } + } finally { + _defaultChatPickInFlight = false; + } +} + /** * Initialize the model picker dropdown. * @param {Object} deps @@ -710,25 +748,7 @@ export function updateModelPicker() { } } if (!modelId && !_autoSelectingDefault && window.modelsModule && window.modelsModule.getCachedItems) { - const items = window.modelsModule.getCachedItems(); - const first = items.find(item => !item.offline && ((item.models || []).length || (item.models_extra || []).length)); - if (first) { - const models = (first.models || []).concat(first.models_extra || []); - modelId = models[0]; - if (!currentSessionId) { - _deps.setPendingChat({ url: first.url, modelId, endpointId: first.endpoint_id }); - } else { - if (s) { s.model = modelId; s.endpoint_url = first.url; } - _autoSelectingDefault = true; - const fd = new FormData(); - fd.append('model', modelId); - fd.append('endpoint_url', first.url || ''); - if (first.endpoint_id) fd.append('endpoint_id', first.endpoint_id); - fetch(`${API_BASE}/api/session/${currentSessionId}`, { method: 'PATCH', body: fd }) - .catch(() => {}) - .finally(() => { _autoSelectingDefault = false; }); - } - } + _ensureDefaultPendingChat(); } const displayName = modelId ? modelId.split('/').pop() : 'Select model'; diff --git a/static/js/notes.js b/static/js/notes.js index fa754b771..9758f3608 100644 --- a/static/js/notes.js +++ b/static/js/notes.js @@ -1896,10 +1896,6 @@ function _renderNotes() { ${_hasItems(note) ? `
` : ''} ${reminderTagHtml} ${noteTags.length ? `
${noteTags.map(t => ``).join(' ')}
` : ''} - ${note.agent_session_id ? `` : ''}
${colorDots}
@@ -2304,16 +2300,6 @@ function _bindCardEvents(body) { _openNoteCornerMenu(btn); }); }); - // Agent tag — opens the chat session the agent ran for this note. - body.querySelectorAll('.note-agent-tag').forEach(tag => { - tag.addEventListener('click', (e) => { - e.preventDefault(); - e.stopPropagation(); - const sid = tag.dataset.sessionId; - const _sm = window.sessionModule; - if (sid && _sm && _sm.selectSession) { closePanel(); _sm.selectSession(sid); } - }); - }); body.querySelectorAll('.note-card-label-chip').forEach(chip => { chip.addEventListener('click', (e) => { e.preventDefault(); @@ -4383,18 +4369,16 @@ function _openTodoAgentMenu(btn) { const noteId = btn.dataset.noteId; const idx = parseInt(btn.dataset.idx); const sid = btn.dataset.sessionId || ''; - const title = btn.dataset.agentTitle || 'Agent chat'; const menu = document.createElement('div'); menu.className = 'note-corner-menu-dropdown note-agent-item-menu'; menu.innerHTML = ` -
${_esc(title)}
${sid ? `` : ''} `; _positionNoteMenu(menu, btn); const openBtn = menu.querySelector('[data-act="open"]'); diff --git a/static/style.css b/static/style.css index 44ef5e6c9..97dec8b08 100644 --- a/static/style.css +++ b/static/style.css @@ -5324,6 +5324,84 @@ body.bg-pattern-sparkles { .confirm-btn-primary:hover { filter:brightness(1.15); } .confirm-btn-danger { background:var(--color-danger); color:#fff; border-color:transparent; } .confirm-btn-danger:hover { background:var(--color-error); } + #cookbook-gguf-delete-overlay { + background:rgba(0,0,0,0.5); + backdrop-filter:blur(4px); + pointer-events:auto !important; + z-index:99999 !important; + position:fixed !important; + inset:0 !important; + } + .cookbook-gguf-delete-box { + width:560px; + max-width:92vw; + } + .cookbook-gguf-delete-list { + display:flex; + flex-direction:column; + gap:6px; + max-height:42vh; + overflow:auto; + padding:2px 2px 4px; + } + .cookbook-gguf-delete-row { + display:grid; + grid-template-columns:18px minmax(0,1fr); + gap:7px 8px; + align-items:start; + padding:7px 8px; + border:1px solid var(--border); + border-radius:7px; + background:color-mix(in srgb, var(--panel, var(--bg)) 92%, var(--fg) 8%); + cursor:pointer; + } + .cookbook-gguf-delete-row:hover { + border-color:color-mix(in srgb, var(--accent-primary, var(--fg)) 45%, var(--border)); + } + .cookbook-gguf-delete-cb { + -webkit-appearance:none; + appearance:none; + width:8px !important; + height:8px !important; + min-width:8px; + min-height:8px; + padding:0; + margin:4px 0 0; + border:1px solid var(--border); + border-radius:50%; + background:transparent; + box-sizing:content-box; + cursor:pointer; + transition:background 0.15s, border-color 0.15s, transform 0.12s; + } + .cookbook-gguf-delete-cb:hover { + border-color:var(--accent, var(--red)); + transform:scale(1.12); + } + .cookbook-gguf-delete-cb:checked { + background:var(--accent, var(--red)); + border-color:var(--accent, var(--red)); + } + .cookbook-gguf-delete-main, + .cookbook-gguf-delete-path { + min-width:0; + overflow:hidden; + text-overflow:ellipsis; + white-space:nowrap; + } + .cookbook-gguf-delete-main { + font-size:0.86rem; + color:var(--fg); + } + .cookbook-gguf-delete-path { + grid-column:2; + margin-top:-2px; + font-size:0.74rem; + opacity:0.58; + } + .cookbook-gguf-delete-actions { + flex-wrap:wrap; + } /* Styled prompt — text-input dialog (used in place of window.prompt) */ #styled-prompt-overlay { background:rgba(0,0,0,0.5); @@ -19222,6 +19300,18 @@ body.gallery-selecting .gallery-dl-btn, background: color-mix(in srgb, var(--red) 20%, transparent); } .cookbook-gpu-kill:disabled { opacity: 0.4; cursor: wait; } +.cookbook-serve-title { + display: flex; + align-items: center; + gap: 4px; + min-width: 0; +} +.cookbook-serve-title-name { + min-width: 0; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} .cookbook-hf-link { font-size: 9px; text-decoration: none; @@ -19234,6 +19324,7 @@ body.gallery-selecting .gallery-dl-btn, vertical-align: 1px; letter-spacing: 0.3px; font-weight: 600; + flex-shrink: 0; } .cookbook-hf-link:hover { opacity: 0.8; @@ -19626,6 +19717,9 @@ body.gallery-selecting .gallery-dl-btn, position: relative; top: -2px; } +.cookbook-dep-reinstall { + top: -3px; +} .cookbook-dep-rebuild:hover { background: color-mix(in srgb, var(--accent, var(--red)) 18%, transparent); color: var(--accent, var(--red)); @@ -20619,6 +20713,11 @@ body.gallery-selecting .gallery-dl-btn, } .cookbook-task[data-status="done"] .cookbook-task-check-ico { display: inline; } .cookbook-task[data-status="done"] .cookbook-task-clear-ico { display: none; } +@media (max-width: 820px) { + .cookbook-task-check { + top: 2px; + } +} .cookbook-task-start-now { display: inline-flex; align-items: center; @@ -20652,24 +20751,30 @@ body.gallery-selecting .gallery-dl-btn, /* "Serve" button on a finished download — green pill matching the "running" / finished badge (it sits next to the green FINISHED chip + check). */ .cookbook-task-serve-btn { - font-size: 9px; - font-weight: 600; - padding: 1px 6px; - border: none; - border-radius: 3px; - line-height: 16px; + display: inline-flex; + align-items: center; + gap: 3px; + padding: 1px 6px 1px 4px; + border: 0; + border-radius: 9px; + line-height: 1; flex-shrink: 0; cursor: pointer; font-family: inherit; - background: color-mix(in srgb, var(--green, #50fa7b) 20%, transparent); + font-size: 9px; + text-transform: lowercase; + background: transparent; color: var(--green, #50fa7b); position: relative; top: -2px; + margin-right: 2px; appearance: none; -webkit-appearance: none; -moz-appearance: none; + transition: background 0.15s; } -.cookbook-task-serve-btn:hover { background: color-mix(in srgb, var(--green, #50fa7b) 32%, transparent); } +.cookbook-task-serve-btn svg { flex-shrink: 0; } +.cookbook-task-serve-btn:hover { background: color-mix(in srgb, var(--green, #50fa7b) 16%, transparent); } .cookbook-task-sub { padding: 1px 10px 4px; line-height: 1; @@ -21448,6 +21553,31 @@ body.gallery-selecting .gallery-dl-btn, .cookbook-dl-btn:hover { opacity: 0.9; } +.cookbook-dl-gguf-row { + margin-top: -1px; + gap: 5px; + align-items: center; + justify-content: flex-end; + font-size: 11px; + position: relative; + top: -2px; +} +.cookbook-dl-gguf-label { + opacity: 0.65; + flex-shrink: 0; +} +#cookbook-dl-gguf-quant { + height: 28px; + min-width: 118px; + flex: 0 0 auto; +} +#cookbook-dl-gguf-note { + opacity: 0.55; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + max-width: 240px; +} /* HF link in search panel */ .hwfit-panel-hf-link { @@ -31920,24 +32050,34 @@ body.notes-drag-mode .note-card-pin svg { .note-corner-menu-dropdown .ncm-item:hover { background: color-mix(in srgb, var(--fg) 8%, transparent); } -/* "Agent" tag on a note that has a linked agent chat session */ -.note-agent-tag { - align-self: flex-start; +.note-checkbox-agent { display: inline-flex; align-items: center; - gap: 5px; - background: color-mix(in srgb, var(--accent, var(--red)) 14%, transparent); - border: 1px solid color-mix(in srgb, var(--accent, var(--red)) 35%, transparent); + justify-content: center; + width: 14px; + height: 14px; + padding: 0; + margin: 0 1px; + border: 0; + background: transparent; color: var(--accent, var(--red)); - border-radius: 999px; - padding: 3px 10px 3px 8px; - font-size: 11px; - font-weight: 600; + box-shadow: none; cursor: pointer; - margin-top: 2px; - transition: background 0.12s; + opacity: 0; + transition: opacity 0.12s, color 0.12s; +} +.note-checkbox:hover .note-checkbox-agent { opacity: 0.55; } +.note-checkbox-agent:hover { + background: transparent; + opacity: 1 !important; +} +.note-checkbox-agent.is-agent-stream-complete { + color: #50fa7b; + opacity: 0.9; +} +.note-checkbox-agent svg { + display: block; } -.note-agent-tag:hover { background: color-mix(in srgb, var(--accent, var(--red)) 24%, transparent); } .note-card { /* Same tint that .doclib-card uses so a default (uncolored) note @@ -36414,6 +36554,10 @@ body.research-panel-view #research-divider { display:none; } .research-setting { display:flex; flex-direction:column; flex:1; min-width:90px; } +.research-settings-row .research-setting:nth-last-child(-n + 3) { + position: relative; + top: 3px; +} .research-setting-label { font-size:9px; text-transform:uppercase; letter-spacing:0.5px; opacity:0.5; margin-bottom:2px; diff --git a/tests/test_fenced_example_not_executed_for_native_models.py b/tests/test_fenced_example_not_executed_for_native_models.py index 2b69ebc5b..9cac7ab8d 100644 --- a/tests/test_fenced_example_not_executed_for_native_models.py +++ b/tests/test_fenced_example_not_executed_for_native_models.py @@ -221,6 +221,60 @@ def test_skip_fenced_still_recovers_xml_invoke_markup(): assert "latest python release" in blocks[0].content +def test_stepfun_native_tool_tokens_are_executed_even_when_fenced_fallback_is_skipped(): + leaked = ( + "<|tool▁calls▁begin|>" + "<|tool▁call▁begin|>web_search<|tool▁sep|>" + '{"query":"Sweden news today"}' + "<|tool▁call▁end|>" + "<|tool▁calls▁end|>" + ) + blocks = parse_tool_blocks(leaked, skip_fenced=True) + assert len(blocks) == 1 + assert blocks[0].tool_type == "web_search" + assert "Sweden news today" in blocks[0].content + assert strip_tool_blocks(leaked, skip_fenced=True) == "" + + +def test_stepfun_native_tool_tokens_accept_plain_web_query(): + leaked = ( + "<|tool▁call▁begin|>web_search<|tool▁sep|>" + "Sweden news today" + "<|tool▁call▁end|>" + ) + blocks = parse_tool_blocks(leaked, skip_fenced=True) + assert len(blocks) == 1 + assert blocks[0].tool_type == "web_search" + assert "Sweden news today" in blocks[0].content + + +def test_skip_fenced_still_recovers_direct_xml_tool_markup(): + leaked = ( + "I'll search now.\n" + "News in Sweden today 2026-06-22" + ) + blocks = parse_tool_blocks(leaked, skip_fenced=True) + assert len(blocks) == 1 + assert blocks[0].tool_type == "web_search" + assert "News in Sweden today 2026-06-22" in blocks[0].content + assert strip_tool_blocks(leaked, skip_fenced=True) == "I'll search now." + + +def test_skip_fenced_recovers_direct_xml_tool_markup_with_unclosed_wrapper(): + leaked = ( + "I'll search now.\n" + "\n" + "\n" + "Sweden news today 2026-06-22\n" + "" + ) + blocks = parse_tool_blocks(leaked, skip_fenced=True) + assert len(blocks) == 1 + assert blocks[0].tool_type == "web_search" + assert "Sweden news today 2026-06-22" in blocks[0].content + assert strip_tool_blocks(leaked, skip_fenced=True) == "I'll search now." + + def test_skip_fenced_still_recovers_dsml_markup(): dsml = ( "Let me search for that.\n" diff --git a/tests/test_upload_multifile.py b/tests/test_upload_multifile.py index ef2e43596..2e40948e6 100644 --- a/tests/test_upload_multifile.py +++ b/tests/test_upload_multifile.py @@ -19,7 +19,12 @@ from pathlib import Path import pytest from fastapi import APIRouter +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from sqlalchemy.pool import NullPool +import core.database as cdb +from core.database import GalleryImage from src.upload_handler import count_recent_uploads, UploadHandler import routes.upload_routes as up @@ -82,6 +87,10 @@ def _files(n): return [types.SimpleNamespace(filename=f"f{i}.txt") for i in range(n)] +def _image_upload(name="photo.png", content=b"not really png but enough for route metadata"): + return types.SimpleNamespace(filename=name, file=io.BytesIO(content)) + + @pytest.fixture(autouse=True) def _reset_router(monkeypatch): # Module-level router accumulates routes across setup calls; reset it. @@ -163,3 +172,64 @@ def test_six_file_batch_is_not_rate_limited(tmp_path): assert meta and meta.get("id") saved += 1 assert saved == 6 + + +async def test_chat_image_upload_is_added_to_gallery(tmp_path, monkeypatch): + engine = create_engine( + f"sqlite:///{tmp_path / 'gallery.db'}", + connect_args={"check_same_thread": False}, + poolclass=NullPool, + ) + cdb.Base.metadata.create_all(engine) + TestingSession = sessionmaker(bind=engine, autoflush=False, autocommit=False) + gallery_dir = tmp_path / "generated_images" + + monkeypatch.setattr(up, "SessionLocal", TestingSession) + monkeypatch.setattr(up, "GENERATED_IMAGES_DIR", str(gallery_dir)) + + h = UploadHandler(base_dir=str(tmp_path), upload_dir=str(tmp_path / "uploads")) + up.setup_upload_routes(h) + endpoint = _endpoint(up.router) + + result = await endpoint(_request(user="alice"), [_image_upload()]) + uploaded = result["files"][0] + + assert uploaded["gallery_id"] + db = TestingSession() + try: + image = db.query(GalleryImage).filter(GalleryImage.id == uploaded["gallery_id"]).one() + assert image.owner == "alice" + assert image.model == "chat-upload" + assert image.prompt == "photo.png" + assert image.file_hash == uploaded["hash"] + assert (gallery_dir / image.filename).exists() + finally: + db.close() + + +async def test_non_image_chat_upload_is_not_added_to_gallery(tmp_path, monkeypatch): + engine = create_engine( + f"sqlite:///{tmp_path / 'gallery.db'}", + connect_args={"check_same_thread": False}, + poolclass=NullPool, + ) + cdb.Base.metadata.create_all(engine) + TestingSession = sessionmaker(bind=engine, autoflush=False, autocommit=False) + monkeypatch.setattr(up, "SessionLocal", TestingSession) + monkeypatch.setattr(up, "GENERATED_IMAGES_DIR", str(tmp_path / "generated_images")) + + h = UploadHandler(base_dir=str(tmp_path), upload_dir=str(tmp_path / "uploads")) + up.setup_upload_routes(h) + endpoint = _endpoint(up.router) + + result = await endpoint(_request(user="alice"), [types.SimpleNamespace( + filename="notes.txt", + file=io.BytesIO(b"plain text upload"), + )]) + + assert "gallery_id" not in result["files"][0] + db = TestingSession() + try: + assert db.query(GalleryImage).count() == 0 + finally: + db.close()