Cookbook model workflow fixes

This commit is contained in:
pewdiepie-archdaemon
2026-06-21 11:02:35 +00:00
parent 8c46172e87
commit c504214925
38 changed files with 3042 additions and 459 deletions
+33 -4
View File
@@ -22,6 +22,31 @@ from fastapi import HTTPException
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
_CASUAL_OPENING_RE = re.compile(
r"^\s*(?:h+i+|hey+|hello+|yo+|sup+|what'?s up|wass?up|hiya|howdy|"
r"lol|lmao|haha+|hehe+|thanks?|thank you|ty|idk|dunno|meh|bruh|bro)\b(?P<tail>.*)$",
re.IGNORECASE,
)
_CASUAL_BLOCKLIST_RE = re.compile(
r"\b(?:cookbook|serve|serving|launch|start|vllm|sglang|llama\.?cpp|ollama|"
r"download|model|email|document|doc|note|calendar|task|search|web|research|"
r"file|folder|repo|git|settings?|endpoint|api|token|mcp)\b",
re.IGNORECASE,
)
def _is_casual_low_signal(text: str) -> bool:
"""Short greetings/slang should not pull memory, skills, RAG, or docs."""
s = str(text or "").strip()
m = _CASUAL_OPENING_RE.match(s)
if not m:
return False
tail = m.group("tail") or ""
if _CASUAL_BLOCKLIST_RE.search(tail):
return False
tail_words = re.findall(r"[A-Za-z0-9_'-]+", tail)
return len(tail_words) <= 2
# ── Data containers ────────────────────────────────────────────────────── # # ── Data containers ────────────────────────────────────────────────────── #
@@ -579,6 +604,7 @@ async def build_chat_context(
# Resolve user prefs # Resolve user prefs
user = get_current_user(request) user = get_current_user(request)
uprefs = load_prefs_for_user(user) uprefs = load_prefs_for_user(user)
casual_low_signal = _is_casual_low_signal(message)
# Memory enabled? # Memory enabled?
mem_enabled = not incognito and not no_memory and uprefs.get("memory_enabled", True) mem_enabled = not incognito and not no_memory and uprefs.get("memory_enabled", True)
@@ -588,6 +614,9 @@ async def build_chat_context(
if not allow_tool_preprocessing: if not allow_tool_preprocessing:
mem_enabled = False mem_enabled = False
skills_enabled = False skills_enabled = False
if casual_low_signal:
mem_enabled = False
skills_enabled = False
logger.debug( logger.debug(
"Memory enabled=%s for user=%s (incognito=%s, no_memory=%s, pref=%s)", "Memory enabled=%s for user=%s (incognito=%s, no_memory=%s, pref=%s)",
mem_enabled, user, incognito, no_memory, uprefs.get("memory_enabled", "NOT_SET"), mem_enabled, user, incognito, no_memory, uprefs.get("memory_enabled", "NOT_SET"),
@@ -603,11 +632,11 @@ async def build_chat_context(
# Use RAG? # Use RAG?
use_rag_val = (str(use_rag).lower() != "false") if use_rag is not None else True use_rag_val = (str(use_rag).lower() != "false") if use_rag is not None else True
if incognito or not allow_tool_preprocessing or is_research_spinoff: if incognito or not allow_tool_preprocessing or is_research_spinoff or casual_low_signal:
use_rag_val = False use_rag_val = False
# If pre-fetched search context was provided (compare mode), skip live web search # If pre-fetched search context was provided (compare mode), skip live web search
skip_web = bool(search_context) or not allow_tool_preprocessing skip_web = bool(search_context) or not allow_tool_preprocessing or casual_low_signal
# Build context preface # Build context preface
# The stream path uses enhanced_message (with CoT/preprocessing applied), # The stream path uses enhanced_message (with CoT/preprocessing applied),
@@ -626,7 +655,7 @@ async def build_chat_context(
incognito=incognito, incognito=incognito,
use_skills=skills_enabled, use_skills=skills_enabled,
) )
if use_rag is not None or is_research_spinoff: if use_rag is not None or is_research_spinoff or casual_low_signal:
_preface_kwargs["use_rag"] = use_rag_val _preface_kwargs["use_rag"] = use_rag_val
preface, rag_sources, web_sources = chat_processor.build_context_preface(**_preface_kwargs) preface, rag_sources, web_sources = chat_processor.build_context_preface(**_preface_kwargs)
@@ -634,7 +663,7 @@ async def build_chat_context(
used_memories = getattr(chat_processor, '_last_used_memories', []) used_memories = getattr(chat_processor, '_last_used_memories', [])
# Inject pre-fetched search context (compare mode) # Inject pre-fetched search context (compare mode)
if search_context and allow_tool_preprocessing: if search_context and allow_tool_preprocessing and not casual_low_signal:
preface.append(untrusted_context_message("prefetched search context", search_context)) preface.append(untrusted_context_message("prefetched search context", search_context))
# YouTube transcripts # YouTube transcripts
+10 -1
View File
@@ -826,7 +826,11 @@ def setup_chat_routes(
from src.settings import get_setting from src.settings import get_setting
_global_disabled = get_setting("disabled_tools", []) _global_disabled = get_setting("disabled_tools", [])
if _global_disabled and isinstance(_global_disabled, list): if _global_disabled and isinstance(_global_disabled, list):
disabled_tools.update(_global_disabled) explicit_web_allowed = allow_web_search is not None and str(allow_web_search).lower() == "true"
if explicit_web_allowed:
disabled_tools.update(t for t in _global_disabled if t not in {"web_search", "web_fetch"})
else:
disabled_tools.update(_global_disabled)
# Light auto-escalation: the user is in chat mode and just expressed a # Light auto-escalation: the user is in chat mode and just expressed a
# notes/calendar/email intent. Grant the relevant managers but withhold # notes/calendar/email intent. Grant the relevant managers but withhold
@@ -1256,6 +1260,10 @@ def setup_chat_routes(
_max_rounds = _DEFAULT_ROUNDS _max_rounds = _DEFAULT_ROUNDS
_max_rounds = max(1, min(_max_rounds, 200)) _max_rounds = max(1, min(_max_rounds, 200))
_forced_tools = None
if allow_web_search is not None and str(allow_web_search).lower() == "true":
_forced_tools = {"web_search", "web_fetch"}
async for chunk in stream_agent_loop( async for chunk in stream_agent_loop(
sess.endpoint_url, sess.endpoint_url,
sess.model, sess.model,
@@ -1277,6 +1285,7 @@ def setup_chat_routes(
plan_mode=plan_mode, plan_mode=plan_mode,
approved_plan=approved_plan or None, approved_plan=approved_plan or None,
workspace=workspace or None, workspace=workspace or None,
forced_tools=_forced_tools,
): ):
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"): if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
try: try:
+16 -3
View File
@@ -964,18 +964,31 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
runner_lines.append(' fi # end _odysseus_have_prebuilt guard') runner_lines.append(' fi # end _odysseus_have_prebuilt guard')
def _llama_cpp_rebuild_cmd() -> str: def _llama_cpp_rebuild_cmd(update_source: bool = False) -> str:
"""Shell command that clears the Cookbook-managed llama.cpp build. """Shell command that clears the Cookbook-managed llama.cpp build.
Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build*`` Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build*``
directory so the next llama.cpp serve recompiles from source, picking up a directory so the next llama.cpp serve recompiles from source, picking up a
CUDA or HIP toolchain if one is now available. The serve bootstrap only CUDA or HIP toolchain if one is now available. The serve bootstrap only
builds when ``llama-server`` is missing from PATH, so without this an builds when ``llama-server`` is missing from PATH, so without this an
existing CPU-only build is reused forever. It deliberately installs and existing CPU-only build is reused forever. When ``update_source`` is true,
downloads nothing; the rebuild itself happens on the next serve. the command also fast-forwards the Cookbook-managed ``~/llama.cpp`` checkout
if it exists. The rebuild itself happens on the next serve.
""" """
update_cmd = ''
if update_source:
update_cmd = (
'if [ -d "$HOME/llama.cpp/.git" ]; then '
'git -C "$HOME/llama.cpp" pull --ff-only --depth 1 || '
'echo "[odysseus] WARNING: llama.cpp source update failed; clearing cached build anyway."; '
'elif command -v git >/dev/null 2>&1; then '
'git clone --depth 1 https://github.com/ggml-org/llama.cpp "$HOME/llama.cpp" || '
'echo "[odysseus] WARNING: llama.cpp clone failed; clearing cached build anyway."; '
'fi && '
)
return ( return (
'mkdir -p "$HOME/bin" && ' 'mkdir -p "$HOME/bin" && '
f'{update_cmd}'
'rm -f "$HOME/bin/llama-server" && ' 'rm -f "$HOME/bin/llama-server" && '
'rm -rf "$HOME/llama.cpp/build" "$HOME/llama.cpp/build-vulkan" && ' 'rm -rf "$HOME/llama.cpp/build" "$HOME/llama.cpp/build-vulkan" && '
'echo "[odysseus] Cleared the cached llama.cpp build. ' 'echo "[odysseus] Cleared the cached llama.cpp build. '
+201 -11
View File
@@ -273,6 +273,78 @@ def setup_cookbook_routes() -> APIRouter:
def _load_stored_hf_token() -> str: def _load_stored_hf_token() -> str:
return load_stored_hf_token(state_path=_cookbook_state_path) return load_stored_hf_token(state_path=_cookbook_state_path)
def _normalize_minimax_m3_vllm_cmd(cmd: str) -> str:
"""Patch MiniMax M3 vLLM launches into the known-good local form.
The browser form can be stale or omit advanced-only fields. MiniMax M3
is sensitive to several flags: using the HF repo id with block-size 128
fails KV-cache setup, and FlashInfer sampler JIT fails on this host's
system nvcc. Normalize server-side before writing the tmux runner.
"""
if not cmd or "vllm serve" not in cmd or not re.search(r"minimax.*m3", cmd, re.I):
return cmd
try:
parts = shlex.split(cmd)
except ValueError:
return cmd
if "serve" not in parts:
return cmd
env_re = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=")
env_parts = [p for p in parts if env_re.match(p)]
body = [p for p in parts if not env_re.match(p)]
try:
serve_i = body.index("serve")
except ValueError:
return cmd
if serve_i + 1 >= len(body):
return cmd
repo_id = "cyankiwi/MiniMax-M3-AWQ-INT4"
snapshot = (
"/home/pewds/.cache/huggingface/hub/"
"models--cyankiwi--MiniMax-M3-AWQ-INT4/"
"snapshots/4082acbbec1236d21828d55b6bb0fe02ade4ab5b"
)
if body[serve_i + 1] == repo_id:
body[serve_i + 1] = snapshot
def add_env(key: str, value: str) -> None:
if not any(p.startswith(f"{key}=") for p in env_parts):
env_parts.append(f"{key}={value}")
def has_flag(flag: str) -> bool:
return any(p == flag or p.startswith(flag + "=") for p in body)
def set_flag(flag: str, value: str) -> None:
for i, part in enumerate(body):
if part == flag:
if i + 1 < len(body):
body[i + 1] = value
else:
body.append(value)
return
if part.startswith(flag + "="):
body[i] = f"{flag}={value}"
return
body.extend([flag, value])
def add_bool(flag: str) -> None:
if not has_flag(flag):
body.append(flag)
add_env("VLLM_TARGET_DEVICE", "cuda")
add_env("VLLM_USE_FLASHINFER_SAMPLER", "0")
set_flag("--served-model-name", repo_id)
set_flag("--tool-call-parser", "minimax_m3")
set_flag("--reasoning-parser", "minimax_m3")
set_flag("--attention-backend", "TRITON_ATTN")
set_flag("--block-size", "128")
add_bool("--language-model-only")
add_bool("--disable-custom-all-reduce")
add_bool("--enable-expert-parallel")
return shlex.join(env_parts + body)
def _cookbook_ssh_dir() -> Path: def _cookbook_ssh_dir() -> Path:
# The Docker image keeps cookbook keys under /app/.ssh; that path only # The Docker image keeps cookbook keys under /app/.ssh; that path only
# exists inside the container. On Windows (and any non-container host) # exists inside the container. On Windows (and any non-container host)
@@ -1249,6 +1321,7 @@ def setup_cookbook_routes() -> APIRouter:
# `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400). # `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400).
req.cmd = _validate_serve_cmd(req.cmd) or "" req.cmd = _validate_serve_cmd(req.cmd) or ""
req.cmd = _normalize_llama_cpp_python_cache_types(req.cmd) or "" req.cmd = _normalize_llama_cpp_python_cache_types(req.cmd) or ""
req.cmd = _normalize_minimax_m3_vllm_cmd(req.cmd)
req.cmd = _venv_safe_local_pip_install_cmd( req.cmd = _venv_safe_local_pip_install_cmd(
req.cmd, req.cmd,
local=not bool(req.remote_host), local=not bool(req.remote_host),
@@ -1579,6 +1652,96 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(' echo "ERROR: vLLM is not installed."') runner_lines.append(' echo "ERROR: vLLM is not installed."')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append('fi') runner_lines.append('fi')
runner_lines.append(f"ODYSSEUS_SERVE_CMD='{_bash_squote(req.cmd)}'")
runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ]; then')
runner_lines.append(' ODYSSEUS_VLLM_HELP_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
runner_lines.append('import shlex, sys')
runner_lines.append('parts = shlex.split(sys.argv[1])')
runner_lines.append('try:')
runner_lines.append(' serve_i = parts.index("serve")')
runner_lines.append('except ValueError:')
runner_lines.append(' print("vllm serve --help")')
runner_lines.append('else:')
runner_lines.append(' print(shlex.join(parts[:serve_i + 1] + ["--help"]))')
runner_lines.append('PY')
runner_lines.append(')"')
runner_lines.append(' ODYSSEUS_VLLM_SUPPORTS_SWAP=0')
runner_lines.append(' if eval "$ODYSSEUS_VLLM_HELP_CMD" 2>&1 | grep -q -- "--swap-space"; then ODYSSEUS_VLLM_SUPPORTS_SWAP=1; fi')
runner_lines.append('fi')
runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ] && [ "${ODYSSEUS_VLLM_SUPPORTS_SWAP:-0}" = "1" ] && ! printf "%s" "$ODYSSEUS_SERVE_CMD" | grep -q -- "--swap-space"; then')
runner_lines.append(' echo "[odysseus] Setting vLLM --swap-space 0 so the runtime does not reserve CPU swap per GPU."')
runner_lines.append(' ODYSSEUS_SERVE_CMD="${ODYSSEUS_SERVE_CMD} --swap-space 0"')
runner_lines.append('fi')
runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ] && [ "${ODYSSEUS_VLLM_SUPPORTS_SWAP:-0}" != "1" ]; then')
runner_lines.append(' if printf "%s" "$ODYSSEUS_SERVE_CMD" | grep -q -- "--swap-space"; then')
runner_lines.append(' echo "[odysseus] vLLM serve does not expose --swap-space; removing the flag and patching the runtime default to 0."')
runner_lines.append(' ODYSSEUS_SERVE_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
runner_lines.append('import shlex, sys')
runner_lines.append('parts = shlex.split(sys.argv[1])')
runner_lines.append('out = []')
runner_lines.append('skip = False')
runner_lines.append('for part in parts:')
runner_lines.append(' if skip:')
runner_lines.append(' skip = False')
runner_lines.append(' continue')
runner_lines.append(' if part == "--swap-space":')
runner_lines.append(' skip = True')
runner_lines.append(' continue')
runner_lines.append(' if part.startswith("--swap-space="):')
runner_lines.append(' continue')
runner_lines.append(' out.append(part)')
runner_lines.append('print(shlex.join(out))')
runner_lines.append('PY')
runner_lines.append(')"')
runner_lines.append(' fi')
runner_lines.append(' ODYSSEUS_SERVE_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
runner_lines.append('import shlex, sys')
runner_lines.append('parts = shlex.split(sys.argv[1])')
runner_lines.append('patch = r"""import inspect, sys')
runner_lines.append('from vllm.engine.arg_utils import EngineArgs, AsyncEngineArgs')
runner_lines.append('def _odysseus_swap0(cls):')
runner_lines.append(' params = list(inspect.signature(cls).parameters)')
runner_lines.append(' if "swap_space" not in params:')
runner_lines.append(' return')
runner_lines.append(' idx = params.index("swap_space")')
runner_lines.append(' defaults = list(cls.__init__.__defaults__ or ())')
runner_lines.append(' if idx < len(defaults):')
runner_lines.append(' defaults[idx] = 0')
runner_lines.append(' cls.__init__.__defaults__ = tuple(defaults)')
runner_lines.append(' fields = getattr(cls, "__dataclass_fields__", {})')
runner_lines.append(' if "swap_space" in fields:')
runner_lines.append(' fields["swap_space"].default = 0')
runner_lines.append('_odysseus_swap0(EngineArgs)')
runner_lines.append('_odysseus_swap0(AsyncEngineArgs)')
runner_lines.append('try:')
runner_lines.append(' from vllm.config import CacheConfig')
runner_lines.append(' CacheConfig.swap_space = 0')
runner_lines.append('except Exception:')
runner_lines.append(' pass')
runner_lines.append('_orig_create_engine_config = EngineArgs.create_engine_config')
runner_lines.append('def _odysseus_create_engine_config(self, *args, **kwargs):')
runner_lines.append(' self.swap_space = 0')
runner_lines.append(' return _orig_create_engine_config(self, *args, **kwargs)')
runner_lines.append('EngineArgs.create_engine_config = _odysseus_create_engine_config')
runner_lines.append('AsyncEngineArgs.create_engine_config = _odysseus_create_engine_config')
runner_lines.append('from vllm.entrypoints.cli.main import main')
runner_lines.append('sys.exit(main())"""')
runner_lines.append('try:')
runner_lines.append(' serve_i = parts.index("serve")')
runner_lines.append('except ValueError:')
runner_lines.append(' print(shlex.join(parts))')
runner_lines.append('else:')
runner_lines.append(' exe_i = serve_i - 1')
runner_lines.append(' exe = parts[exe_i] if exe_i >= 0 else "vllm"')
runner_lines.append(' py = "python3"')
runner_lines.append(' if exe.endswith("/bin/vllm"):')
runner_lines.append(' py = exe[:-len("/bin/vllm")] + "/bin/python"')
runner_lines.append(' parts[exe_i:serve_i] = [py, "-c", patch]')
runner_lines.append(' print(shlex.join(parts))')
runner_lines.append('PY')
runner_lines.append(')"')
runner_lines.append(' echo "[odysseus] Patched vLLM internal swap_space default to 0 for this runtime."')
runner_lines.append('fi')
elif "sglang.launch_server" in req.cmd: elif "sglang.launch_server" in req.cmd:
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
runner_lines.append('if ! command -v sglang &>/dev/null; then') runner_lines.append('if ! command -v sglang &>/dev/null; then')
@@ -1620,7 +1783,10 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines, runner_lines,
keep_shell_open=not local_windows, keep_shell_open=not local_windows,
) )
runner_lines.append(req.cmd) if "vllm serve" in req.cmd:
runner_lines.append('eval "$ODYSSEUS_SERVE_CMD"')
else:
runner_lines.append(req.cmd)
if local_windows: if local_windows:
# Detached background process — no interactive shell to keep open. # Detached background process — no interactive shell to keep open.
# Print the exit marker the status poller looks for, then stop. # Print the exit marker the status poller looks for, then stop.
@@ -2418,16 +2584,14 @@ def setup_cookbook_routes() -> APIRouter:
# Add 30% headroom for KV cache, activations, etc. # Add 30% headroom for KV cache, activations, etc.
needed_vram = (est_vram * 1.3) if est_vram else None needed_vram = (est_vram * 1.3) if est_vram else None
if vram_gb > 0 and needed_vram is not None and needed_vram > vram_gb: if vram_gb > 0:
continue if needed_vram is None:
# Unknown-size models (e.g. MiniMax-M2.7, DeepSeek-V4-Flash) have no # The "trending models that fit" list must be conservative:
# "NB" in the repo id, so the regex above can't extract their # if we cannot estimate size from the repo id/tags, do not
# param count. Previously we dropped them entirely, which made # present it as runnable on this hardware.
# brand-new flagship releases silently vanish from this list even continue
# on rigs with hundreds of GB of VRAM. Adapters/LoRAs are already if needed_vram > vram_gb:
# filtered by _is_excluded(), so what falls through here is continue
# overwhelmingly full models — keep them, just without a size
# badge (the frontend handles needed_vram_gb=null gracefully).
out.append({ out.append({
"repo_id": repo_id, "repo_id": repo_id,
@@ -2624,6 +2788,32 @@ def setup_cookbook_routes() -> APIRouter:
except Exception as e: except Exception as e:
logger.warning(f"orphan sweep: state write failed: {e}") logger.warning(f"orphan sweep: state write failed: {e}")
@router.get("/api/cookbook/hf-gguf-files")
async def hf_gguf_files(repo_id: str, owner: str = Depends(require_user)):
"""List GGUF files in a HuggingFace repo for the direct-download picker."""
import httpx
repo_id = _validate_repo_id(repo_id)
url = f"https://huggingface.co/api/models/{repo_id}"
try:
headers = {}
token = _load_stored_hf_token()
if token:
headers["Authorization"] = f"Bearer {token}"
async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client:
resp = await client.get(url, headers=headers)
if resp.status_code != 200:
return {"ok": False, "files": [], "error": f"HF API HTTP {resp.status_code}"}
data = resp.json()
except Exception as e:
return {"ok": False, "files": [], "error": str(e)}
files = [
str(s.get("rfilename") or "")
for s in data.get("siblings", [])
if str(s.get("rfilename") or "").lower().endswith(".gguf")
]
return {"ok": True, "repo_id": repo_id, "files": files}
# In-memory cache for the Ollama library scrape. ollama.com is a public # In-memory cache for the Ollama library scrape. ollama.com is a public
# site, but it doesn't expose a stable JSON listing — we fetch the HTML # site, but it doesn't expose a stable JSON listing — we fetch the HTML
# search page and regex out the model cards. Cached for 1 h so a busy # search page and regex out the model cards. Cached for 1 h so a busy
+45 -8
View File
@@ -1109,22 +1109,30 @@ def _list_attachments_from_msg(msg):
return attachments return attachments
idx = 0 idx = 0
for part in msg.walk(): for part in msg.walk():
if part.is_multipart():
continue
cd = str(part.get("Content-Disposition", "")) cd = str(part.get("Content-Disposition", ""))
ct = part.get_content_type() ct = part.get_content_type()
is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
if part.is_multipart() and not is_attached_email:
continue
# Skip text/html body parts (only consider real attachments) # Skip text/html body parts (only consider real attachments)
if ct in ("text/plain", "text/html") and "attachment" not in cd: if ct in ("text/plain", "text/html") and "attachment" not in cd:
continue continue
filename = part.get_filename() filename = part.get_filename()
if filename: if filename:
filename = _decode_header(filename) filename = _decode_header(filename)
if ct == "message/rfc822" and not re.search(r"\.[A-Za-z0-9]{1,8}$", filename):
filename = f"{filename}.eml"
else: else:
# Inline images, etc. - generate a name # Inline images, etc. - generate a name
ext = ct.split("/")[-1] if "/" in ct else "bin" ext = "eml" if ct == "message/rfc822" else (ct.split("/")[-1] if "/" in ct else "bin")
filename = f"attachment_{idx}.{ext}" filename = f"attachment_{idx}.{ext}"
payload = part.get_payload(decode=True) payload = part.get_payload(decode=True)
size = len(payload) if payload else 0 if payload is None and ct == "message/rfc822":
try:
payload = part.as_bytes()
except Exception:
payload = b""
size = len(payload) if payload is not None else 0
attachments.append({ attachments.append({
"index": idx, "index": idx,
"filename": filename, "filename": filename,
@@ -1136,29 +1144,58 @@ def _list_attachments_from_msg(msg):
return attachments return attachments
def _is_likely_signature_image_attachment(att: dict) -> bool:
"""Match the reader's inline signature/logo image filter."""
filename = str((att or {}).get("filename") or "").lower()
if not re.search(r"\.(png|jpe?g|gif|bmp|svg|webp)$", filename):
return False
size = int((att or {}).get("size") or 0)
if re.search(r"^image\d{3,}\.(png|jpe?g|gif)$", filename):
return True
if re.search(r"^(signature|logo|sig|footer|banner)[-_\d]*\.(png|jpe?g|gif|svg)$", filename):
return True
return 0 < size < 30 * 1024
def _has_visible_attachments(msg) -> bool:
"""Return True only for attachments the reader will render as chips."""
return any(
not _is_likely_signature_image_attachment(att)
for att in _list_attachments_from_msg(msg)
)
def _extract_attachment_to_disk(msg, index, target_dir): def _extract_attachment_to_disk(msg, index, target_dir):
"""Extract a specific attachment to disk and return the file path.""" """Extract a specific attachment to disk and return the file path."""
if not msg.is_multipart(): if not msg.is_multipart():
return None return None
idx = 0 idx = 0
for part in msg.walk(): for part in msg.walk():
if part.is_multipart():
continue
cd = str(part.get("Content-Disposition", "")) cd = str(part.get("Content-Disposition", ""))
ct = part.get_content_type() ct = part.get_content_type()
is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
if part.is_multipart() and not is_attached_email:
continue
if ct in ("text/plain", "text/html") and "attachment" not in cd: if ct in ("text/plain", "text/html") and "attachment" not in cd:
continue continue
if idx == index: if idx == index:
filename = part.get_filename() filename = part.get_filename()
if filename: if filename:
filename = _decode_header(filename) filename = _decode_header(filename)
if ct == "message/rfc822" and not re.search(r"\.[A-Za-z0-9]{1,8}$", filename):
filename = f"{filename}.eml"
else: else:
ext = ct.split("/")[-1] if "/" in ct else "bin" ext = "eml" if ct == "message/rfc822" else (ct.split("/")[-1] if "/" in ct else "bin")
filename = f"attachment_{idx}.{ext}" filename = f"attachment_{idx}.{ext}"
# Sanitize # Sanitize
safe_name = re.sub(r"[^\w\s\-.]", "_", filename).strip() safe_name = re.sub(r"[^\w\s\-.]", "_", filename).strip()
payload = part.get_payload(decode=True) payload = part.get_payload(decode=True)
if not payload: if payload is None and ct == "message/rfc822":
try:
payload = part.as_bytes()
except Exception:
payload = b""
if payload is None:
return None return None
target_dir.mkdir(parents=True, exist_ok=True) target_dir.mkdir(parents=True, exist_ok=True)
filepath = target_dir / safe_name filepath = target_dir / safe_name
+165 -39
View File
@@ -44,7 +44,7 @@ from routes.email_helpers import (
_send_smtp_message, _smtp_security_mode, _send_smtp_message, _smtp_security_mode,
_IMAP_TIMEOUT_SECONDS, _open_imap_connection, _IMAP_TIMEOUT_SECONDS, _open_imap_connection,
_imap_connect, _imap, _decode_header, _detect_sent_folder, _detect_drafts_folder, _imap_connect, _imap, _decode_header, _detect_sent_folder, _detect_drafts_folder,
_extract_attachment_text, _list_attachments_from_msg, _extract_attachment_text, _list_attachments_from_msg, _has_visible_attachments, _is_likely_signature_image_attachment,
_extract_attachment_to_disk, _extract_html, _extract_text, _extract_attachment_to_disk, _extract_html, _extract_text,
_fetch_sender_thread_context, _pre_retrieve_context, _fetch_sender_thread_context, _pre_retrieve_context,
_EMAIL_REPLY_SYS_PROMPT_BASE, _POOL_HOOKS, _EMAIL_REPLY_SYS_PROMPT_BASE, _POOL_HOOKS,
@@ -58,6 +58,7 @@ from routes.email_pollers import _start_poller
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
ODYSSEUS_MAIL_ORIGIN = "odysseus-ui" ODYSSEUS_MAIL_ORIGIN = "odysseus-ui"
EMAIL_READ_ATTACHMENT_VERSION = 2
def _email_tag_owner_aliases(account_id: str | None, owner: str = "") -> list[str]: def _email_tag_owner_aliases(account_id: str | None, owner: str = "") -> list[str]:
@@ -244,6 +245,21 @@ def _imap_uid_fetch(conn, uid_set: str | bytes, query: str):
return conn.uid("FETCH", _uid_bytes(uid_set), query) return conn.uid("FETCH", _uid_bytes(uid_set), query)
def _imap_search_quote(value: str) -> str:
return '"' + str(value or "").replace("\\", "\\\\").replace('"', '\\"') + '"'
def _message_id_chain(*values: str) -> list[str]:
seen = set()
out = []
for value in values:
for mid in re.findall(r"<[^>]+>", value or ""):
if mid not in seen:
seen.add(mid)
out.append(mid)
return out
def _uid_from_fetch_meta(meta_b: bytes) -> str: def _uid_from_fetch_meta(meta_b: bytes) -> str:
m = re.search(rb"\bUID\s+(\d+)\b", meta_b) m = re.search(rb"\bUID\s+(\d+)\b", meta_b)
return m.group(1).decode() if m else "" return m.group(1).decode() if m else ""
@@ -1003,6 +1019,65 @@ def setup_email_routes():
except Exception: except Exception:
pass pass
def _related_thread_attachments_sync(
folder: str,
account_id: str | None,
owner: str,
current_uid: str,
current_message_id: str,
in_reply_to: str,
references: str,
limit: int = 12,
) -> list[dict]:
"""Return visible attachments from referenced messages in this folder."""
wanted_ids = _message_id_chain(references, in_reply_to)
current_mid = (current_message_id or "").strip()
wanted_ids = [mid for mid in wanted_ids if mid and mid != current_mid]
if not wanted_ids:
return []
related: list[dict] = []
try:
with _imap(account_id, owner=owner) as conn:
conn.select(_q(folder), readonly=True)
# Search newest referenced messages first; cap work so opening
# a long thread stays bounded.
for mid in reversed(wanted_ids[-10:]):
if len(related) >= limit:
break
status, data = _imap_uid_search(conn, f'(HEADER Message-ID {_imap_search_quote(mid)})')
if status != "OK" or not data or not data[0]:
continue
for uid_b in reversed(data[0].split()[-3:]):
source_uid = uid_b.decode(errors="ignore")
if not source_uid or source_uid == str(current_uid):
continue
st2, msg_data = _imap_uid_fetch(conn, source_uid, "(BODY.PEEK[])")
if st2 != "OK" or not msg_data or not isinstance(msg_data[0], tuple):
continue
msg = email_mod.message_from_bytes(msg_data[0][1])
source_from = _decode_header(msg.get("From", ""))
source_subject = _decode_header(msg.get("Subject", ""))
source_date = msg.get("Date", "")
for att in _list_attachments_from_msg(msg):
if _is_likely_signature_image_attachment(att):
continue
enriched = dict(att)
enriched.update({
"source_uid": source_uid,
"source_folder": folder,
"source_message_id": (msg.get("Message-ID") or "").strip(),
"source_from": source_from,
"source_subject": source_subject,
"source_date": source_date,
})
related.append(enriched)
if len(related) >= limit:
break
except Exception as e:
logger.debug(f"related thread attachment lookup failed uid={current_uid}: {e}")
return related
@router.get("/list") @router.get("/list")
async def list_emails( async def list_emails(
folder: str = Query("INBOX"), folder: str = Query("INBOX"),
@@ -1273,6 +1348,17 @@ def setup_email_routes():
sender_name, sender_addr = email.utils.parseaddr(sender) sender_name, sender_addr = email.utils.parseaddr(sender)
parsed_date = email.utils.parsedate_to_datetime(date_str) if date_str else None parsed_date = email.utils.parsedate_to_datetime(date_str) if date_str else None
attachments = _list_attachments_from_msg(msg) attachments = _list_attachments_from_msg(msg)
related_attachments = []
if not _has_visible_attachments(msg):
related_attachments = _related_thread_attachments_sync(
folder,
account_id,
owner,
uid,
message_id,
in_reply_to,
references,
)
if mark_seen: if mark_seen:
# Set \Seen in a separate readwrite session so concurrent reads # Set \Seen in a separate readwrite session so concurrent reads
@@ -1381,6 +1467,8 @@ def setup_email_routes():
"body": body, "body": body,
"body_html": body_html, "body_html": body_html,
"attachments": attachments, "attachments": attachments,
"related_attachments": related_attachments,
"attachment_version": EMAIL_READ_ATTACHMENT_VERSION,
"cached_summary": cached_summary, "cached_summary": cached_summary,
"cached_ai_reply": cached_ai_reply, "cached_ai_reply": cached_ai_reply,
"boundaries": cached_boundaries, "boundaries": cached_boundaries,
@@ -1411,6 +1499,12 @@ def setup_email_routes():
"""Read email body. Cached for 30m, sync IMAP work runs in a thread.""" """Read email body. Cached for 30m, sync IMAP work runs in a thread."""
ck = _read_cache_key(account_id, folder, uid, owner=owner) ck = _read_cache_key(account_id, folder, uid, owner=owner)
cached = _read_cache_get(ck) cached = _read_cache_get(ck)
if cached is not None:
# Older cached read responses lack the thread-attachment fallback.
# Fetch once so replies that reference prior attachments can show
# those files without waiting for cache expiry.
if cached.get("attachment_version") != EMAIL_READ_ATTACHMENT_VERSION:
cached = None
if cached is not None: if cached is not None:
if mark_seen: if mark_seen:
try: try:
@@ -1599,6 +1693,65 @@ def setup_email_routes():
return None return None
doc_session_id = _resolve_doc_session() doc_session_id = _resolve_doc_session()
def _create_markdown_doc(content: str, summary: str):
from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
doc_id = str(uuid.uuid4())
ver_id = str(uuid.uuid4())
_db = _SL()
try:
_db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
_db.add(_Doc(
id=doc_id, session_id=doc_session_id, title=title,
language="markdown", current_content=content,
version_count=1, is_active=True,
))
_db.add(_DV(
id=ver_id, document_id=doc_id, version_number=1,
content=content, summary=summary, source="upload",
))
_db.commit()
finally:
_db.close()
_tag_doc_with_source(doc_id)
return doc_id
def _attached_email_markdown(path):
raw_bytes = path.read_bytes()
if not raw_bytes:
return f"# Attached email: {base}\n\n_(empty email attachment)_"
try:
attached_msg = email_mod.message_from_bytes(raw_bytes)
except Exception as e:
return f"# Attached email: {base}\n\nCould not parse this email attachment: {e}"
attached_subject = _decode_header(attached_msg.get("Subject", "")) or base
attached_from = _decode_header(attached_msg.get("From", ""))
attached_to = _decode_header(attached_msg.get("To", ""))
attached_cc = _decode_header(attached_msg.get("Cc", ""))
attached_date = attached_msg.get("Date", "")
attached_body = _extract_text(attached_msg).strip()
attached_atts = _list_attachments_from_msg(attached_msg)
lines = [f"# Attached email: {attached_subject}", ""]
if attached_from:
lines.append(f"**From:** {attached_from}")
if attached_to:
lines.append(f"**To:** {attached_to}")
if attached_cc:
lines.append(f"**Cc:** {attached_cc}")
if attached_date:
lines.append(f"**Date:** {attached_date}")
lines.extend(["", "## Body", "", attached_body or "_(no readable body)_"])
if attached_atts:
lines.extend(["", "## Attachments", ""])
for att in attached_atts:
size = int(att.get("size") or 0)
size_label = f"{size} B" if size < 1024 else f"{round(size / 1024)} KB"
name = att.get("filename") or f"attachment_{att.get('index', '')}"
ctype = att.get("content_type") or "application/octet-stream"
lines.append(f"- {name} ({ctype}, {size_label})")
return "\n".join(lines).strip()
# ── PDF path (existing) ──────────────────────────────────── # ── PDF path (existing) ────────────────────────────────────
if ext == ".pdf": if ext == ".pdf":
import shutil as _shutil import shutil as _shutil
@@ -1645,6 +1798,15 @@ def setup_email_routes():
_tag_doc_with_source(doc_id) _tag_doc_with_source(doc_id)
return {"doc_id": doc_id, "filename": filepath.name} return {"doc_id": doc_id, "filename": filepath.name}
# ── Attached email (.eml / message/rfc822) ────────────────
if ext == ".eml":
try:
content = _attached_email_markdown(filepath)
except Exception as e:
return {"error": f"Failed to read email attachment: {e}", "filename": base}
doc_id = _create_markdown_doc(content, "Imported attached email")
return {"doc_id": doc_id, "filename": filepath.name}
# ── DOCX path: extract text → markdown document ─────────── # ── DOCX path: extract text → markdown document ───────────
if ext == ".docx": if ext == ".docx":
try: try:
@@ -1682,25 +1844,7 @@ def setup_email_routes():
lines.append("") lines.append("")
content = "\n".join(lines).strip() or f"_(empty {base})_" content = "\n".join(lines).strip() or f"_(empty {base})_"
from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV doc_id = _create_markdown_doc(content, "Imported from DOCX")
doc_id = str(uuid.uuid4())
ver_id = str(uuid.uuid4())
_db = _SL()
try:
_db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
_db.add(_Doc(
id=doc_id, session_id=doc_session_id, title=title,
language="markdown", current_content=content,
version_count=1, is_active=True,
))
_db.add(_DV(
id=ver_id, document_id=doc_id, version_number=1,
content=content, summary="Imported from DOCX", source="upload",
))
_db.commit()
finally:
_db.close()
_tag_doc_with_source(doc_id)
return {"doc_id": doc_id, "filename": filepath.name} return {"doc_id": doc_id, "filename": filepath.name}
# ── Plain text / markdown ──────────────────────────────── # ── Plain text / markdown ────────────────────────────────
@@ -1709,25 +1853,7 @@ def setup_email_routes():
content = filepath.read_text(encoding="utf-8", errors="replace") content = filepath.read_text(encoding="utf-8", errors="replace")
except Exception as e: except Exception as e:
return {"error": f"Failed to read text file: {e}", "filename": base} return {"error": f"Failed to read text file: {e}", "filename": base}
from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV doc_id = _create_markdown_doc(content, "Imported from email attachment")
doc_id = str(uuid.uuid4())
ver_id = str(uuid.uuid4())
_db = _SL()
try:
_db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
_db.add(_Doc(
id=doc_id, session_id=doc_session_id, title=title,
language="markdown", current_content=content,
version_count=1, is_active=True,
))
_db.add(_DV(
id=ver_id, document_id=doc_id, version_number=1,
content=content, summary="Imported from email attachment", source="upload",
))
_db.commit()
finally:
_db.close()
_tag_doc_with_source(doc_id)
return {"doc_id": doc_id, "filename": filepath.name} return {"doc_id": doc_id, "filename": filepath.name}
return {"error": f"Unsupported attachment type: {ext}", "filename": base} return {"error": f"Unsupported attachment type: {ext}", "filename": base}
+94 -2
View File
@@ -1,8 +1,13 @@
import json
import os
import re import re
import shlex
import subprocess
from copy import deepcopy from copy import deepcopy
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from core.platform_compat import run_ssh_command
from routes._validators import validate_remote_host, validate_ssh_port from routes._validators import validate_remote_host, validate_ssh_port
@@ -107,6 +112,73 @@ def _apply_manual_hardware(system, manual_mode="", manual_gpu_count="", manual_v
return system return system
def _run_model_probe(host: str, ssh_port: str, cmd: str) -> str:
try:
if host:
r = run_ssh_command(
host,
ssh_port or None,
cmd,
timeout=15,
connect_timeout=5,
strict_host_key_checking=False,
text=True,
)
else:
r = subprocess.run(["bash", "-lc", cmd], capture_output=True, text=True, timeout=15)
if r.returncode == 0:
return (r.stdout or "").strip()
except Exception:
return ""
return ""
def _inspect_model_path(model_path: str, host: str = "", ssh_port: str = "") -> dict:
"""Read lightweight metadata from a local or SSH-visible HF model folder."""
path = (model_path or "").strip()
if not path or path.startswith(("http://", "https://")):
return {}
if not (path.startswith("/") or path.startswith("~")):
return {}
qpath = shlex.quote(path)
qconfig = shlex.quote(os.path.join(path, "config.json"))
out = {}
exists = _run_model_probe(host, ssh_port, f"test -d {qpath} && printf found || printf missing")
if exists != "found":
target = host or "local container"
out["model_probe_error"] = f"Model path is not visible on {target}: {path}"
return out
raw_config = _run_model_probe(host, ssh_port, f"test -f {qconfig} && sed -n '1,240p' {qconfig}")
if raw_config:
try:
cfg = json.loads(raw_config)
except Exception:
cfg = {}
for key in ("context_length", "max_position_embeddings", "n_ctx_train", "model_max_length", "max_seq_len"):
value = cfg.get(key)
if isinstance(value, (int, float)) and value > 0:
out["model_ctx_max"] = int(value)
break
else:
out["model_probe_error"] = f"config.json not found in model path: {path}"
size_cmd = (
f"find {qpath} -type f \\( -name '*.safetensors' -o -name '*.bin' -o -name '*.gguf' \\) "
"-printf '%s\\n' 2>/dev/null | awk '{s+=$1} END {if (s>0) printf \"%.6f\", s/1073741824}'"
)
weights = _run_model_probe(host, ssh_port, size_cmd)
try:
weights_gb = float(weights)
except Exception:
weights_gb = 0.0
if weights_gb > 0:
out["model_weights_gb"] = round(weights_gb, 3)
elif "model_probe_error" not in out:
out["model_probe_error"] = f"No model weight files found in: {path}"
return out
def setup_hwfit_routes(): def setup_hwfit_routes():
router = APIRouter(prefix="/api/hwfit", tags=["hwfit"]) router = APIRouter(prefix="/api/hwfit", tags=["hwfit"])
@@ -235,7 +307,7 @@ def setup_hwfit_routes():
return {"system": system, "models": results} return {"system": system, "models": results}
@router.get("/profiles") @router.get("/profiles")
def get_serve_profiles(model: str = "", host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, serve_weights_gb: float = 0.0, serve_quant: str = ""): def get_serve_profiles(model: str = "", model_path: str = "", host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, serve_weights_gb: float = 0.0, serve_quant: str = ""):
"""Compute llama.cpp serve profiles (Quality/Balanced/Speed) for `model` """Compute llama.cpp serve profiles (Quality/Balanced/Speed) for `model`
against the detected hardware on `host` (or local). Returns concrete against the detected hardware on `host` (or local). Returns concrete
flags (n_gpu_layers, n_cpu_moe, cache_type, ctx) the serve UI can apply. flags (n_gpu_layers, n_cpu_moe, cache_type, ctx) the serve UI can apply.
@@ -272,8 +344,16 @@ def setup_hwfit_routes():
if nn and (nn == want or want.endswith(nn) or nn.endswith(want)): if nn and (nn == want or want.endswith(nn) or nn.endswith(want)):
m = entry m = entry
break break
path_meta = _inspect_model_path(model_path or model, host=host, ssh_port=ssh_port)
if m is None: if m is None:
return {"system": system, "profiles": [], "error": "model not in catalog"} return {
"system": system,
"profiles": [],
"error": "model not in catalog",
"model_ctx_max": int(path_meta.get("model_ctx_max") or 0),
"model_weights_gb": float(path_meta.get("model_weights_gb") or 0),
"model_probe_error": path_meta.get("model_probe_error") or "",
}
# Surface the model's trained context limit so the serve UI can clamp a # Surface the model's trained context limit so the serve UI can clamp a
# user-typed context down to it (asking for ctx > n_ctx_train overflows # user-typed context down to it (asking for ctx > n_ctx_train overflows
# and, with a quantized KV cache, can crash the GPU). # and, with a quantized KV cache, can crash the GPU).
@@ -283,6 +363,16 @@ def setup_hwfit_routes():
if isinstance(v, (int, float)) and v > 0: if isinstance(v, (int, float)) and v > 0:
model_ctx_max = int(v) model_ctx_max = int(v)
break break
path_ctx_max = int(path_meta.get("model_ctx_max") or 0)
if path_ctx_max > 0:
model_ctx_max = max(model_ctx_max, path_ctx_max)
model_weights_gb = float(path_meta.get("model_weights_gb") or 0)
if model_weights_gb <= 0:
for k in ("min_vram_gb", "required_gb", "size_gb", "recommended_ram_gb", "min_ram_gb"):
v = m.get(k)
if isinstance(v, (int, float)) and v > 0:
model_weights_gb = float(v)
break
return { return {
"system": system, "system": system,
"profiles": compute_serve_profiles( "profiles": compute_serve_profiles(
@@ -291,6 +381,8 @@ def setup_hwfit_routes():
serve_quant=(serve_quant or None), serve_quant=(serve_quant or None),
), ),
"model_ctx_max": model_ctx_max, "model_ctx_max": model_ctx_max,
"model_weights_gb": model_weights_gb,
"model_probe_error": path_meta.get("model_probe_error") or "",
} }
@router.get("/image-models") @router.get("/image-models")
+9 -2
View File
@@ -1064,9 +1064,11 @@ def setup_model_routes(model_discovery):
except Exception: except Exception:
return 0.0 return 0.0
def _failure_delay(fails: int) -> float: def _failure_delay(fails: int, *, empty_local: bool = False) -> float:
if fails <= 0: if fails <= 0:
return 0.0 return 0.0
if empty_local:
return min(5.0 * (2 ** max(0, fails - 1)), 30.0)
return min(_REFRESH_FAILURE_BASE * (2 ** max(0, fails - 1)), _REFRESH_FAILURE_MAX) return min(_REFRESH_FAILURE_BASE * (2 ** max(0, fails - 1)), _REFRESH_FAILURE_MAX)
def _should_refresh_endpoint(ep: Any, now: float, force: bool = False) -> tuple[bool, Dict[str, Any]]: def _should_refresh_endpoint(ep: Any, now: float, force: bool = False) -> tuple[bool, Dict[str, Any]]:
@@ -1097,7 +1099,12 @@ def setup_model_routes(model_discovery):
fails = int(state.get("fail_count") or 0) fails = int(state.get("fail_count") or 0)
if fails and not force: if fails and not force:
last_failure = float(state.get("last_failure") or 0.0) last_failure = float(state.get("last_failure") or 0.0)
if now - last_failure < _failure_delay(fails): empty_local = (
not cached
and category == "local"
and str(getattr(ep, "id", "") or "").startswith("local-")
)
if now - last_failure < _failure_delay(fails, empty_local=empty_local):
return False, info return False, info
if cached and not force: if cached and not force:
interval = _endpoint_refresh_interval(ep, category) interval = _endpoint_refresh_interval(ep, category)
+53 -5
View File
@@ -330,6 +330,9 @@ def add_user_install_bins_to_path():
candidates.append(os.path.join(site.USER_BASE, 'bin')) candidates.append(os.path.join(site.USER_BASE, 'bin'))
except Exception: except Exception:
pass pass
candidates.append(os.path.expanduser('~/bin'))
candidates.append(os.path.expanduser('~/llama.cpp/build/bin'))
candidates.append(os.path.expanduser('~/llama.cpp/build-vulkan/bin'))
candidates.append(os.path.expanduser('~/.local/bin')) candidates.append(os.path.expanduser('~/.local/bin'))
parts = os.environ.get('PATH', '').split(os.pathsep) if os.environ.get('PATH') else [] parts = os.environ.get('PATH', '').split(os.pathsep) if os.environ.get('PATH') else []
changed = False changed = False
@@ -1188,6 +1191,7 @@ def setup_shell_routes() -> APIRouter:
# venv over SSH so a remote `pip install` actually reflects here. # venv over SSH so a remote `pip install` actually reflects here.
remote_status: dict = {} remote_status: dict = {}
remote_details: dict = {} remote_details: dict = {}
remote_probe_error = ""
remote_names = [ remote_names = [
p["name"] p["name"]
for p in packages for p in packages
@@ -1226,8 +1230,34 @@ def setup_shell_routes() -> APIRouter:
break break
except ValueError as e: except ValueError as e:
raise HTTPException(400, str(e)) raise HTTPException(400, str(e))
except Exception: except Exception as e:
remote_status = {} remote_status = {}
remote_probe_error = f"SSH package probe failed: {str(e)[:160]}"
if "llama_cpp" in remote_names:
try:
inner = (
'export PATH="$HOME/.local/bin:$HOME/bin:'
'$HOME/llama.cpp/build/bin:$HOME/llama.cpp/build-vulkan/bin:$PATH"; '
"command -v llama-server 2>/dev/null || true"
)
argv = _ssh_base_argv(host, ssh_port) + [inner]
proc = await asyncio.create_subprocess_exec(
*argv,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
out, _err = await asyncio.wait_for(proc.communicate(), timeout=8)
llama_server_path = out.decode("utf-8", errors="replace").strip().splitlines()
llama_server_path = llama_server_path[-1].strip() if llama_server_path else ""
if llama_server_path:
remote_status["llama_cpp"] = True
probe = remote_details.setdefault("llama_cpp", {})
if isinstance(probe, dict):
probe.setdefault("binaries", {})["llama-server"] = llama_server_path
except Exception as e:
if not remote_probe_error:
remote_probe_error = f"SSH llama-server probe failed: {str(e)[:160]}"
pass
# Union of system_names + every package's system_prereqs. Probing # Union of system_names + every package's system_prereqs. Probing
# the prereqs alongside the main system deps in a single SSH call # the prereqs alongside the main system deps in a single SSH call
# avoids a second round-trip per Cookbook → Dependencies refresh. # avoids a second round-trip per Cookbook → Dependencies refresh.
@@ -1272,7 +1302,9 @@ def setup_shell_routes() -> APIRouter:
target_os_id = _os_id_from_release("\n".join(_osrel_lines)) target_os_id = _os_id_from_release("\n".join(_osrel_lines))
except ValueError as e: except ValueError as e:
raise HTTPException(400, str(e)) raise HTTPException(400, str(e))
except Exception: except Exception as e:
if not remote_probe_error:
remote_probe_error = f"SSH system probe failed: {str(e)[:160]}"
pass pass
elif not host: elif not host:
# Local target — probe in-process so the inline install command # Local target — probe in-process so the inline install command
@@ -1290,7 +1322,12 @@ def setup_shell_routes() -> APIRouter:
on_remote = bool(host and pkg.get("target") == "remote") on_remote = bool(host and pkg.get("target") == "remote")
probe = None probe = None
if on_remote: if on_remote:
pkg["installed"] = bool(remote_status.get(pkg["name"], False)) if remote_probe_error and pkg["name"] not in remote_status:
pkg["installed"] = None
pkg["probe_error"] = remote_probe_error
pkg["status_note"] = remote_probe_error
else:
pkg["installed"] = bool(remote_status.get(pkg["name"], False))
probe = remote_details.get(pkg["name"]) probe = remote_details.get(pkg["name"])
if isinstance(probe, dict): if isinstance(probe, dict):
pkg["details"] = probe pkg["details"] = probe
@@ -1353,9 +1390,19 @@ def setup_shell_routes() -> APIRouter:
# reads "ready" green while inference runs at 3 tok/s on GPU # reads "ready" green while inference runs at 3 tok/s on GPU
# silicon — actively misleading. # silicon — actively misleading.
if pkg["name"] == "llama_cpp" and pkg.get("installed"): if pkg["name"] == "llama_cpp" and pkg.get("installed"):
_native_llama_server = bool(
isinstance(probe, dict)
and isinstance(probe.get("binaries"), dict)
and probe["binaries"].get("llama-server")
)
_gpu_capable = False _gpu_capable = False
_has_nvidia_target = False _has_nvidia_target = False
if on_remote and host: if _native_llama_server:
# Native llama-server is the launcher path Cookbook now
# prefers. Do not mark this as a CPU-only Python wheel just
# because llama-cpp-python is absent from the selected venv.
_gpu_capable = True
elif on_remote and host:
try: try:
# Activate the configured venv FIRST so the probe # Activate the configured venv FIRST so the probe
# runs against the same python the launch script # runs against the same python the launch script
@@ -1609,7 +1656,8 @@ def setup_shell_routes() -> APIRouter:
return {"ok": False, "error": f"Unsupported engine: {engine}"} return {"ok": False, "error": f"Unsupported engine: {engine}"}
host = str(body.get("remote_host") or "").strip() host = str(body.get("remote_host") or "").strip()
ssh_port = body.get("ssh_port") ssh_port = body.get("ssh_port")
cmd = _llama_cpp_rebuild_cmd() update_source = bool(body.get("update_source"))
cmd = _llama_cpp_rebuild_cmd(update_source=update_source)
try: try:
argv = ( argv = (
(_ssh_base_argv(host, ssh_port) + [cmd]) (_ssh_base_argv(host, ssh_port) + [cmd])
+174 -8
View File
@@ -751,6 +751,17 @@ def _extract_last_user_message(messages: List[Dict]) -> str:
_LOW_SIGNAL_RE = re.compile(r"^[\W_]*$", re.UNICODE) _LOW_SIGNAL_RE = re.compile(r"^[\W_]*$", re.UNICODE)
_CASUAL_OPENING_RE = re.compile(
r"^\s*(?:h+i+|hey+|hello+|yo+|sup+|what'?s up|wass?up|hiya|howdy|"
r"lol|lmao|haha+|hehe+|thanks?|thank you|ty|idk|dunno|meh|bruh|bro)\b(?P<tail>.*)$",
re.IGNORECASE,
)
_CASUAL_BLOCKLIST_RE = re.compile(
r"\b(?:cookbook|serve|serving|launch|start|vllm|sglang|llama\.?cpp|ollama|"
r"download|model|email|document|doc|note|calendar|task|search|web|research|"
r"file|folder|repo|git|settings?|endpoint|api|token|mcp)\b",
re.IGNORECASE,
)
_EXPLICIT_CONTINUATION_RE = re.compile( _EXPLICIT_CONTINUATION_RE = re.compile(
r"^\s*(?:" r"^\s*(?:"
r"yes|y|yeah|yep|ok|okay|sure|do it|go ahead|continue|carry on|" r"yes|y|yeah|yep|ok|okay|sure|do it|go ahead|continue|carry on|"
@@ -760,6 +771,17 @@ _EXPLICIT_CONTINUATION_RE = re.compile(
r")\s*[.!?]*\s*$", r")\s*[.!?]*\s*$",
re.IGNORECASE, re.IGNORECASE,
) )
_RETRY_CONTINUATION_RE = re.compile(
r"\b(?:try again|retry|again|rerun|re-run|run it again|launch it again|"
r"start it again|failed|fails?|died|crashed|broke|insta|instantly)\b",
re.IGNORECASE,
)
_COOKBOOK_CONTEXT_RE = re.compile(
r"\b(?:cookbook|serve|serving|served|launch|start|preset|vllm|sglang|"
r"llama\.?cpp|ollama|download|cached models?|model servers?|running models?|"
r"gpu box|ajax|qwen|gemma|llama|mistral|minimax)\b",
re.IGNORECASE,
)
def _is_explicit_continuation(text: str) -> bool: def _is_explicit_continuation(text: str) -> bool:
@@ -767,6 +789,37 @@ def _is_explicit_continuation(text: str) -> bool:
return bool(_EXPLICIT_CONTINUATION_RE.match(str(text or "").strip())) return bool(_EXPLICIT_CONTINUATION_RE.match(str(text or "").strip()))
def _is_casual_low_signal(text: str) -> bool:
"""True for short greetings/slang that should not inherit stale context."""
s = str(text or "").strip()
m = _CASUAL_OPENING_RE.match(s)
if not m:
return False
tail = m.group("tail") or ""
if _CASUAL_BLOCKLIST_RE.search(tail):
return False
# Allow a short vocative/address after the opener without hardcoding the
# address term itself: "hey man", "yo dude", "sup <name>". Longer tails are
# more likely to be an actual request and should get normal context/tooling.
tail_words = re.findall(r"[A-Za-z0-9_'-]+", tail)
return len(tail_words) <= 2
def _is_contextual_retry_continuation(messages: List[Dict], text: str) -> bool:
"""Treat "try again / it failed" as a continuation only for active tool work.
These follow-ups are common after Cookbook launches: the latest user turn
says only "try again it failed", while the actionable model/host/command
details live one or two turns back. Keep this intentionally narrow so
ordinary chat does not inherit stale Cookbook context.
"""
latest = str(text or "").strip()
if not latest or not _RETRY_CONTINUATION_RE.search(latest):
return False
recent = _recent_context_for_retrieval(messages, max_user=5, max_chars=1200)
return bool(_COOKBOOK_CONTEXT_RE.search(recent))
def _assistant_requested_followup(messages: List[Dict]) -> bool: def _assistant_requested_followup(messages: List[Dict]) -> bool:
"""True when the previous assistant turn asked for missing task details. """True when the previous assistant turn asked for missing task details.
@@ -808,11 +861,12 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
which domain rule packs get appended to the system prompt. which domain rule packs get appended to the system prompt.
""" """
text = str(last_user or "").strip() text = str(last_user or "").strip()
continuation = _is_explicit_continuation(text) or _assistant_requested_followup(messages) retry_continuation = _is_contextual_retry_continuation(messages, text)
continuation = _is_explicit_continuation(text) or _assistant_requested_followup(messages) or retry_continuation
retrieval_query = _recent_context_for_retrieval(messages) if continuation else text retrieval_query = _recent_context_for_retrieval(messages) if continuation else text
q = retrieval_query.lower() q = retrieval_query.lower()
if not text or bool(_LOW_SIGNAL_RE.match(text)): if not text or bool(_LOW_SIGNAL_RE.match(text)) or _is_casual_low_signal(text):
return { return {
"low_signal": True, "low_signal": True,
"continuation": False, "continuation": False,
@@ -907,6 +961,7 @@ def _build_system_prompt(
compact: bool = False, compact: bool = False,
owner: Optional[str] = None, owner: Optional[str] = None,
suppress_local_context: bool = False, suppress_local_context: bool = False,
suppress_skills: bool = False,
active_email: Optional[Dict[str, str]] = None, active_email: Optional[Dict[str, str]] = None,
) -> List[Dict]: ) -> List[Dict]:
"""Build agent system prompt, inject MCP/document context, merge consecutive system msgs.""" """Build agent system prompt, inject MCP/document context, merge consecutive system msgs."""
@@ -924,7 +979,7 @@ def _build_system_prompt(
_ov_sig = _hl.sha256(_json.dumps(get_builtin_overrides() or {}, sort_keys=True).encode()).hexdigest() _ov_sig = _hl.sha256(_json.dumps(get_builtin_overrides() or {}, sort_keys=True).encode()).hexdigest()
except Exception: except Exception:
_ov_sig = "" _ov_sig = ""
cache_key = (frozenset(disabled_tools or []), bool(mcp_mgr), needs_admin, _rt_key, compact, _ov_sig, owner, suppress_local_context) cache_key = (frozenset(disabled_tools or []), bool(mcp_mgr), needs_admin, _rt_key, compact, _ov_sig, owner, suppress_local_context, suppress_skills)
if _cached_base_prompt and _cached_base_prompt_key == cache_key and not active_document: if _cached_base_prompt and _cached_base_prompt_key == cache_key and not active_document:
agent_prompt = _cached_base_prompt agent_prompt = _cached_base_prompt
# Skill index is user-editable (name + description), so it must never # Skill index is user-editable (name + description), so it must never
@@ -934,6 +989,7 @@ def _build_system_prompt(
disabled_tools, mcp_mgr, needs_admin, relevant_tools, disabled_tools, mcp_mgr, needs_admin, relevant_tools,
mcp_disabled_map=mcp_disabled_map, compact=compact, owner=owner, mcp_disabled_map=mcp_disabled_map, compact=compact, owner=owner,
suppress_local_context=suppress_local_context, suppress_local_context=suppress_local_context,
suppress_skills=suppress_skills,
) )
else: else:
agent_prompt, _skill_index_block = _build_base_prompt( agent_prompt, _skill_index_block = _build_base_prompt(
@@ -945,6 +1001,7 @@ def _build_system_prompt(
compact=compact, compact=compact,
owner=owner, owner=owner,
suppress_local_context=suppress_local_context, suppress_local_context=suppress_local_context,
suppress_skills=suppress_skills,
) )
if not active_document: if not active_document:
_cached_base_prompt = agent_prompt _cached_base_prompt = agent_prompt
@@ -1228,7 +1285,7 @@ def _build_system_prompt(
# few. If the teacher wrote a procedure for "open my X chat" last # few. If the teacher wrote a procedure for "open my X chat" last
# time the student failed, this is where the student finds it # time the student failed, this is where the student finds it
# before deciding which tool to call. # before deciding which tool to call.
if not suppress_local_context: if not suppress_local_context and not suppress_skills:
try: try:
last_user = _extract_last_user_message(messages) last_user = _extract_last_user_message(messages)
# Respect the user's skills-enabled toggle (mirrors memory_enabled). # Respect the user's skills-enabled toggle (mirrors memory_enabled).
@@ -1395,6 +1452,7 @@ def _build_base_prompt(
compact: bool = False, compact: bool = False,
owner: Optional[str] = None, owner: Optional[str] = None,
suppress_local_context: bool = False, suppress_local_context: bool = False,
suppress_skills: bool = False,
): ):
"""Build the agent prompt with only relevant tools included. """Build the agent prompt with only relevant tools included.
@@ -1447,7 +1505,7 @@ def _build_base_prompt(
# The caller wraps it in untrusted_context_message and ships it as a # The caller wraps it in untrusted_context_message and ships it as a
# user-role message — same treatment as the matched-skills block. # user-role message — same treatment as the matched-skills block.
skill_index_block = "" skill_index_block = ""
if not suppress_local_context: if not suppress_local_context and not suppress_skills:
try: try:
from services.memory.skills import SkillsManager from services.memory.skills import SkillsManager
from src.constants import DATA_DIR from src.constants import DATA_DIR
@@ -1866,6 +1924,7 @@ async def stream_agent_loop(
approved_plan: Optional[str] = None, approved_plan: Optional[str] = None,
tool_policy: Optional[ToolPolicy] = None, tool_policy: Optional[ToolPolicy] = None,
workspace: Optional[str] = None, workspace: Optional[str] = None,
forced_tools: Optional[Set[str]] = None,
_is_teacher_run: bool = False, _is_teacher_run: bool = False,
) -> AsyncGenerator[str, None]: ) -> AsyncGenerator[str, None]:
"""Streaming agent loop generator. """Streaming agent loop generator.
@@ -1905,6 +1964,18 @@ async def stream_agent_loop(
_needs_admin = _detect_admin_intent(messages) _needs_admin = _detect_admin_intent(messages)
_last_user = _extract_last_user_message(messages) _last_user = _extract_last_user_message(messages)
_intent = _classify_agent_request(messages, _last_user) _intent = _classify_agent_request(messages, _last_user)
_low_signal_turn = bool(_intent.get("low_signal"))
_casual_low_signal_turn = _is_casual_low_signal(_last_user)
_direct_low_signal = (
_low_signal_turn
and not bool(_intent.get("continuation"))
and not plan_mode
and not approved_plan
and (_casual_low_signal_turn or active_document is None)
and (_casual_low_signal_turn or not active_email)
and (_casual_low_signal_turn or not workspace)
and not forced_tools
)
# Tool retrieval uses the latest message by default. It may inherit recent # Tool retrieval uses the latest message by default. It may inherit recent
# user turns only for explicit continuations ("yes", "do it", "1"). # user turns only for explicit continuations ("yes", "do it", "1").
_retrieval_query = str(_intent.get("retrieval_query") or _last_user) _retrieval_query = str(_intent.get("retrieval_query") or _last_user)
@@ -1912,11 +1983,86 @@ async def stream_agent_loop(
"[agent-intent] latest=%r continuation=%s low_signal=%s domains=%s retrieval_query=%r", "[agent-intent] latest=%r continuation=%s low_signal=%s domains=%s retrieval_query=%r",
_last_user[:120], _last_user[:120],
bool(_intent.get("continuation")), bool(_intent.get("continuation")),
bool(_intent.get("low_signal")), _low_signal_turn,
sorted(_intent.get("domains") or []), sorted(_intent.get("domains") or []),
_retrieval_query[:200], _retrieval_query[:200],
) )
_mcp_disabled_map = _load_mcp_disabled_map() if mcp_mgr else {} _mcp_disabled_map = _load_mcp_disabled_map() if mcp_mgr else {}
if _direct_low_signal:
logger.info("[agent] direct low-signal reply path for latest=%r", _last_user[:80])
direct_messages = [{"role": "user", "content": _last_user}]
direct_response = ""
direct_start = time.time()
direct_actual_model = model
real_input_tokens = 0
real_output_tokens = 0
try:
async for chunk in stream_llm_with_fallback(
[(endpoint_url, model, headers)] + list(fallbacks or []),
direct_messages,
temperature=temperature,
max_tokens=min(max_tokens or 128, 128),
prompt_type=None,
tools=None,
timeout=int(get_setting("agent_stream_timeout_seconds", 300) or 300),
session_id=session_id,
):
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
try:
data = json.loads(chunk[6:])
except json.JSONDecodeError:
yield chunk
continue
if data.get("type") == "usage":
usage = data.get("data", {}) or {}
direct_actual_model = usage.get("model") or direct_actual_model
real_input_tokens += usage.get("input_tokens", 0) or 0
real_output_tokens += usage.get("output_tokens", 0) or 0
continue
if data.get("type") == "model_actual":
direct_actual_model = data.get("model") or direct_actual_model
data["requested_model"] = model
yield f"data: {json.dumps(data)}\n\n"
continue
if data.get("type") == "fallback":
direct_actual_model = data.get("answered_by") or direct_actual_model
yield chunk
continue
if "delta" in data:
if not data.get("thinking"):
direct_response += data.get("delta", "")
yield chunk
continue
yield chunk
elif chunk.startswith("event: "):
yield chunk
except Exception as _direct_err:
logger.warning("[agent] direct low-signal path failed: %s", _direct_err)
fallback = "Hey."
direct_response += fallback
yield f"data: {json.dumps({'delta': fallback})}\n\n"
if not direct_response.strip():
fallback = "Hey."
direct_response = fallback
yield f"data: {json.dumps({'delta': fallback})}\n\n"
duration = time.time() - direct_start
metrics = {
"model": direct_actual_model,
"requested_model": model,
"input_tokens": real_input_tokens or estimate_tokens(direct_messages),
"output_tokens": real_output_tokens or max(len(direct_response) // 4, 1),
"total_time": round(duration, 2),
"response_time": round(duration, 2),
"agent_rounds": 0,
"tool_calls": 0,
"direct_low_signal": True,
}
yield f"data: {json.dumps({'type': 'metrics', 'data': metrics})}\n\n"
yield "data: [DONE]\n\n"
return
if plan_mode and mcp_mgr: if plan_mode and mcp_mgr:
# Allow read-only MCP tools to investigate, block write/unknown ones: # Allow read-only MCP tools to investigate, block write/unknown ones:
# hide them from the schemas AND reject them at runtime by qualified name. # hide them from the schemas AND reject them at runtime by qualified name.
@@ -1932,7 +2078,7 @@ async def stream_agent_loop(
_t1 = time.time() _t1 = time.time()
if _relevant_tools: if _relevant_tools:
logger.info(f"[tool-rag] Using caller-provided relevant_tools ({len(_relevant_tools)} tools)") logger.info(f"[tool-rag] Using caller-provided relevant_tools ({len(_relevant_tools)} tools)")
if not guide_only and not _relevant_tools and bool(_intent.get("low_signal")): if not guide_only and not _relevant_tools and _low_signal_turn:
from src.tool_index import ALWAYS_AVAILABLE from src.tool_index import ALWAYS_AVAILABLE
if workspace: if workspace:
# An active workspace IS the file-work signal: a vague "look at the # An active workspace IS the file-work signal: a vague "look at the
@@ -2023,6 +2169,15 @@ async def stream_agent_loop(
if _relevant_tools is not None and active_document is not None: if _relevant_tools is not None and active_document is not None:
_relevant_tools.update({"edit_document", "update_document", "suggest_document"}) _relevant_tools.update({"edit_document", "update_document", "suggest_document"})
# Per-request UI toggles are stronger than retrieval. If the user turns on
# Search, the model must see the search tools even when the latest text is a
# typo or otherwise low-signal for tool RAG.
if not guide_only and forced_tools:
if _relevant_tools is None:
from src.tool_index import ALWAYS_AVAILABLE
_relevant_tools = set(ALWAYS_AVAILABLE)
_relevant_tools.update(t for t in forced_tools if t not in disabled_tools)
# The skill index injected by _build_system_prompt tells the model to # The skill index injected by _build_system_prompt tells the model to
# call `manage_skills action=view`, and Jaccard-matched skills are pasted # call `manage_skills action=view`, and Jaccard-matched skills are pasted
# into the prompt as procedures to follow — but neither path goes through # into the prompt as procedures to follow — but neither path goes through
@@ -2030,7 +2185,7 @@ async def stream_agent_loop(
# (grep, read_file, ...) that aren't in its schema list. Keep the schemas # (grep, read_file, ...) that aren't in its schema list. Keep the schemas
# in lockstep: manage_skills is callable whenever any skill is indexed, # in lockstep: manage_skills is callable whenever any skill is indexed,
# and a matched skill's declared requires_toolsets ride along with it. # and a matched skill's declared requires_toolsets ride along with it.
if not guide_only and _relevant_tools is not None: if not guide_only and _relevant_tools is not None and not _low_signal_turn:
try: try:
from services.memory.skills import SkillsManager from services.memory.skills import SkillsManager
from src.constants import DATA_DIR from src.constants import DATA_DIR
@@ -2147,6 +2302,7 @@ async def stream_agent_loop(
compact=_compact_agent_prompt, compact=_compact_agent_prompt,
owner=owner, owner=owner,
suppress_local_context=guide_only, suppress_local_context=guide_only,
suppress_skills=_low_signal_turn,
active_email=active_email, active_email=active_email,
) )
if plan_mode and not guide_only: if plan_mode and not guide_only:
@@ -2753,6 +2909,15 @@ async def stream_agent_loop(
_intent_nudge_count += 1 _intent_nudge_count += 1
_matched_phrase = _intent_match.group(0).strip() _matched_phrase = _intent_match.group(0).strip()
logger.info(f"[agent] intent-without-action nudge #{_intent_nudge_count} on round {round_num}: {_matched_phrase!r}") logger.info(f"[agent] intent-without-action nudge #{_intent_nudge_count} on round {round_num}: {_matched_phrase!r}")
_lower_phrase = _matched_phrase.lower()
_cookbook_log_hint = ""
if any(_word in _lower_phrase for _word in ("log", "logs", "output", "tail", "status")):
_cookbook_log_hint = (
" If this is about a Cookbook/model serve, the concrete calls are: "
"`list_served_models` first, then `tail_serve_output` with the "
"session_id from the serve/list result. Never answer with "
"\"check logs\" when those tools are available."
)
messages.append({ messages.append({
"role": "system", "role": "system",
"content": ( "content": (
@@ -2761,6 +2926,7 @@ async def stream_agent_loop(
"see you announced the action but didn't run it, which " "see you announced the action but didn't run it, which "
"is the most frustrating thing you can do. " "is the most frustrating thing you can do. "
"DO IT NOW: emit the actual function call this turn. " "DO IT NOW: emit the actual function call this turn. "
f"{_cookbook_log_hint}"
"If you decided not to do it after all, say so plainly in " "If you decided not to do it after all, say so plainly in "
"one sentence instead of restating the plan." "one sentence instead of restating the plan."
), ),
+33 -11
View File
@@ -7,6 +7,7 @@ from src.constants import MAX_OUTPUT_CHARS
class WebSearchTool: class WebSearchTool:
async def execute(self, content: str, ctx: dict) -> dict: async def execute(self, content: str, ctx: dict) -> dict:
from src.search import comprehensive_web_search from src.search import comprehensive_web_search
progress_cb = ctx.get("progress_cb") if isinstance(ctx, dict) else None
raw = content.strip() raw = content.strip()
query = raw query = raw
time_filter = None time_filter = None
@@ -37,18 +38,39 @@ class WebSearchTool:
elif " news" in q_lc or q_lc.startswith("news ") or q_lc.endswith(" news"): elif " news" in q_lc or q_lc.startswith("news ") or q_lc.endswith(" news"):
time_filter = "week" time_filter = "week"
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
text, sources = await asyncio.wait_for( if progress_cb:
loop.run_in_executor( await progress_cb({
None, "elapsed_s": 0,
lambda: comprehensive_web_search( "tail": f"Searching web for: {query[:160]}",
query, })
max_pages=max_pages, try:
time_filter=time_filter, text, sources = await asyncio.wait_for(
return_sources=True, loop.run_in_executor(
None,
lambda: comprehensive_web_search(
query,
max_pages=max_pages,
time_filter=time_filter,
return_sources=True,
),
), ),
), timeout=30,
timeout=30, )
) except asyncio.TimeoutError:
return {
"error": f"web_search timed out after 30s: {query[:200]}",
"exit_code": 1,
}
except Exception as e:
return {
"error": f"web_search failed: {type(e).__name__}: {str(e) or 'no details'}",
"exit_code": 1,
}
if progress_cb:
await progress_cb({
"elapsed_s": 30,
"tail": "Search completed; preparing sources.",
})
output = text[:MAX_OUTPUT_CHARS] if len(text) > MAX_OUTPUT_CHARS else text output = text[:MAX_OUTPUT_CHARS] if len(text) > MAX_OUTPUT_CHARS else text
if sources: if sources:
output += "\n\n<!-- SOURCES:" + json.dumps(sources) + " -->" output += "\n\n<!-- SOURCES:" + json.dumps(sources) + " -->"
+50 -48
View File
@@ -76,8 +76,7 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
import json import json
import re import re
from src.constants import DATA_DIR from src.constants import DATA_DIR
from src.endpoint_resolver import resolve_endpoint from src.llm_core import llm_call_async_with_fallback
from src.llm_core import llm_call_async
from src.memory import MemoryManager from src.memory import MemoryManager
manager = MemoryManager(DATA_DIR) manager = MemoryManager(DATA_DIR)
@@ -116,10 +115,9 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
if len(group_memories) < 2: if len(group_memories) < 2:
return False return False
url, model, headers = resolve_endpoint("utility", owner=group_owner or None) from src.task_endpoint import resolve_task_candidates
if not url or not model: candidates = resolve_task_candidates(owner=group_owner or None)
url, model, headers = resolve_endpoint("default", owner=group_owner or None) if not candidates:
if not url or not model:
return False return False
try: try:
@@ -147,13 +145,11 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
"\"drop\":[{\"id\":\"existing id\",\"reason\":\"short reason\"}]}\n\n" "\"drop\":[{\"id\":\"existing id\",\"reason\":\"short reason\"}]}\n\n"
f"MEMORIES:\n{json.dumps(items, ensure_ascii=False)}" f"MEMORIES:\n{json.dumps(items, ensure_ascii=False)}"
) )
raw = await llm_call_async( raw = await llm_call_async_with_fallback(
url=url, candidates,
model=model,
messages=[{"role": "user", "content": prompt}], messages=[{"role": "user", "content": prompt}],
temperature=0.0, temperature=0.0,
max_tokens=4096, max_tokens=4096,
headers=headers,
timeout=120, timeout=120,
) )
from src.text_helpers import strip_think from src.text_helpers import strip_think
@@ -604,8 +600,7 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
try: try:
from datetime import timedelta from datetime import timedelta
from core.database import SessionLocal, CalendarEvent from core.database import SessionLocal, CalendarEvent
from src.endpoint_resolver import resolve_endpoint from src.llm_core import llm_call_async_with_fallback
from src.llm_core import llm_call_async
import re as _re, json as _json import re as _re, json as _json
db = SessionLocal() db = SessionLocal()
@@ -620,10 +615,9 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
if not events: if not events:
return "No upcoming events to classify", True return "No upcoming events to classify", True
llm_url, llm_model, llm_headers = resolve_endpoint("utility", owner=owner) from src.task_endpoint import resolve_task_candidates
if not llm_url: llm_candidates = resolve_task_candidates(owner=owner)
llm_url, llm_model, llm_headers = resolve_endpoint("default", owner=owner) llm_available = bool(llm_candidates)
llm_available = bool(llm_url and llm_model)
# Pull user memories so the LLM has personal context (relationships, # Pull user memories so the LLM has personal context (relationships,
# job, hobbies). Helps it know e.g. "<name> is your spouse" so their # job, hobbies). Helps it know e.g. "<name> is your spouse" so their
@@ -699,11 +693,11 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
f"EVENTS: {_json.dumps(items)}" f"EVENTS: {_json.dumps(items)}"
) )
try: try:
raw = await llm_call_async( raw = await llm_call_async_with_fallback(
url=llm_url, model=llm_model, llm_candidates,
messages=[{"role": "user", "content": prompt}], messages=[{"role": "user", "content": prompt}],
temperature=0.1, max_tokens=16384, temperature=0.1, max_tokens=16384,
headers=llm_headers, timeout=180, timeout=180,
) )
from src.text_helpers import strip_think as _st from src.text_helpers import strip_think as _st
raw = _st(raw or "", prose=False, prompt_echo=False) raw = _st(raw or "", prose=False, prompt_echo=False)
@@ -810,8 +804,7 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
import asyncio as _aio import asyncio as _aio
from datetime import datetime as _dt, timedelta as _td from datetime import datetime as _dt, timedelta as _td
from routes.email_helpers import _email_cache_owner_clause, _imap_connect, SCHEDULED_DB from routes.email_helpers import _email_cache_owner_clause, _imap_connect, SCHEDULED_DB
from src.endpoint_resolver import resolve_endpoint from src.llm_core import llm_call_async_with_fallback
from src.llm_core import llm_call_async
# 1. Pull recent UIDs + From headers cheaply (header-only fetch). # 1. Pull recent UIDs + From headers cheaply (header-only fetch).
def _pull_headers(): def _pull_headers():
@@ -891,11 +884,11 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
if not eligible: if not eligible:
return "All sender sigs already cached (or no eligible senders)", True return "All sender sigs already cached (or no eligible senders)", True
url, model, headers = resolve_endpoint("utility", owner=owner) from src.task_endpoint import resolve_task_candidates
if not url or not model: candidates = resolve_task_candidates(owner=owner)
url, model, headers = resolve_endpoint("default", owner=owner) if not candidates:
if not url or not model:
return "No LLM endpoint available", False return "No LLM endpoint available", False
model = candidates[0][1]
analyzed = 0 analyzed = 0
no_sig = 0 no_sig = 0
@@ -949,11 +942,11 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
) )
try: try:
raw = await llm_call_async( raw = await llm_call_async_with_fallback(
url=url, model=model, candidates,
messages=[{"role": "user", "content": prompt}], messages=[{"role": "user", "content": prompt}],
temperature=0.0, max_tokens=600, temperature=0.0, max_tokens=600,
headers=headers, timeout=60, timeout=60,
) )
from src.text_helpers import strip_think as _st from src.text_helpers import strip_think as _st
sig = _st(raw or "", prose=False, prompt_echo=False).strip() sig = _st(raw or "", prose=False, prompt_echo=False).strip()
@@ -1137,7 +1130,6 @@ async def action_test_skills(owner: str, **kwargs) -> Tuple[str, bool]:
from services.memory.skills import SkillsManager from services.memory.skills import SkillsManager
from src.constants import DATA_DIR from src.constants import DATA_DIR
from routes.skills_routes import _run_skill_test_once, _skill_test_task from routes.skills_routes import _run_skill_test_once, _skill_test_task
from src.endpoint_resolver import resolve_endpoint
# #3 SCOPE GUARD: refuse to run on a None/empty owner — otherwise # #3 SCOPE GUARD: refuse to run on a None/empty owner — otherwise
# `sm.load(owner=None)` returns every user's skills and we'd cross- # `sm.load(owner=None)` returns every user's skills and we'd cross-
@@ -1152,27 +1144,40 @@ async def action_test_skills(owner: str, **kwargs) -> Tuple[str, bool]:
if not names: if not names:
raise TaskNoop("no skills to test") raise TaskNoop("no skills to test")
url, model, headers = resolve_endpoint("default", owner=owner) from src.task_endpoint import resolve_task_candidates
if not url or not model: candidates = resolve_task_candidates(owner=owner)
if not candidates:
return "No Default/Utility model configured — set one in Settings.", False return "No Default/Utility model configured — set one in Settings.", False
# #2 NO SILENT MODEL SWAP: if the configured model isn't served by the # #2 NO SILENT MODEL SWAP: if the configured model isn't served by the
# endpoint, try a basename match — but fail loudly instead of grabbing # endpoint, try a basename match — but fail loudly instead of grabbing
# `avail[0]` which could be an embedding-only model and produce 36 # `avail[0]` which could be an embedding-only model and produce 36
# garbage transcripts → 36 'unknown' verdicts with no hint why. # garbage transcripts → 36 'unknown' verdicts with no hint why.
url, model, headers = candidates[0]
try: try:
from src.llm_core import list_model_ids from src.llm_core import list_model_ids
avail = list_model_ids(url, headers=headers) import os as _os
if avail and model not in avail:
import os as _os selected = None
base = _os.path.basename((model or "").rstrip("/")) mismatch_notes = []
m = next((a for a in avail if _os.path.basename(a.rstrip("/")) == base), None) for cand_url, cand_model, cand_headers in candidates:
if m: avail = list_model_ids(cand_url, headers=cand_headers)
model = m if not avail or cand_model in avail:
else: selected = (cand_url, cand_model, cand_headers)
return (f"Default model '{model}' not served by endpoint {url}. " break
f"Available: {', '.join(avail[:8])}{'' if len(avail) > 8 else ''}. " base = _os.path.basename((cand_model or "").rstrip("/"))
"Set a valid Default model in Settings."), False matched = next((a for a in avail if _os.path.basename(a.rstrip("/")) == base), None)
if matched:
selected = (cand_url, matched, cand_headers)
break
mismatch_notes.append(
f"{cand_model} not served by {cand_url}; available: "
f"{', '.join(avail[:8])}{'...' if len(avail) > 8 else ''}"
)
if selected:
url, model, headers = selected
elif mismatch_notes:
return "No configured task fallback model is served. " + " | ".join(mismatch_notes[:3]), False
except Exception as _e: except Exception as _e:
logger.warning(f"test_skills model resolve check failed (continuing): {_e}") logger.warning(f"test_skills model resolve check failed (continuing): {_e}")
@@ -1483,7 +1488,6 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]:
from pathlib import Path as _P from pathlib import Path as _P
from core.database import SessionLocal as _SL, EmailAccount as _EA from core.database import SessionLocal as _SL, EmailAccount as _EA
from routes.email_helpers import _imap_connect, _decode_header from routes.email_helpers import _imap_connect, _decode_header
from src.endpoint_resolver import resolve_endpoint, resolve_utility_fallback_candidates
from src.llm_core import llm_call_async_with_fallback from src.llm_core import llm_call_async_with_fallback
# Per-owner state file so multi-user runs don't clobber each other's # Per-owner state file so multi-user runs don't clobber each other's
@@ -1505,12 +1509,10 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]:
# ── 1. Resolve LLM candidates (utility primary + utility fallbacks; fall # ── 1. Resolve LLM candidates (utility primary + utility fallbacks; fall
# through to default chat as a last resort). # through to default chat as a last resort).
url, model, headers = resolve_endpoint("utility", owner=owner) from src.task_endpoint import resolve_task_candidates
if not url or not model: candidates = resolve_task_candidates(owner=owner)
url, model, headers = resolve_endpoint("default", owner=owner) if not candidates:
if not url or not model:
return "No LLM endpoint available", False return "No LLM endpoint available", False
candidates = [(url, model, headers)] + resolve_utility_fallback_candidates(owner=owner)
# ── 2. Enumerate enabled accounts. Match this task's owner AND fall # ── 2. Enumerate enabled accounts. Match this task's owner AND fall
# back to the legacy "unowned account whose imap_user / from_address # back to the legacy "unowned account whose imap_user / from_address
+3
View File
@@ -396,6 +396,9 @@ def resolve_utility_fallback_candidates(owner: Optional[str] = None) -> list:
settings = load_settings() settings = load_settings()
utility_ep = (get_user_setting("utility_endpoint_id", owner or "", settings.get("utility_endpoint_id", "")) or "").strip() utility_ep = (get_user_setting("utility_endpoint_id", owner or "", settings.get("utility_endpoint_id", "")) or "").strip()
if not utility_ep: if not utility_ep:
utility_chain = get_user_setting("utility_model_fallbacks", owner or "", settings.get("utility_model_fallbacks") or []) or []
if utility_chain:
return _resolve_fallback_candidates("utility_model_fallbacks", owner=owner)
return _resolve_fallback_candidates("default_model_fallbacks", owner=owner) return _resolve_fallback_candidates("default_model_fallbacks", owner=owner)
except Exception: except Exception:
pass pass
+2
View File
@@ -2130,6 +2130,8 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
yield _stream_delta_event(reasoning, thinking=True) yield _stream_delta_event(reasoning, thinking=True)
content = delta.get("content") or "" content = delta.get("content") or ""
if content: if content:
content = re.sub(r"<mm:think(\s+[^>]*)?>", r"<think\1>", content, flags=re.IGNORECASE)
content = re.sub(r"</mm:think>", "</think>", content, flags=re.IGNORECASE)
stripped = content.lstrip() stripped = content.lstrip()
# gpt-oss harmony format (<|channel|>analysis/final): route via the harmony # gpt-oss harmony format (<|channel|>analysis/final): route via the harmony
# stream router. Sticky once the first marker appears — distinct from the # stream router. Sticky once the first marker appears — distinct from the
+64 -2
View File
@@ -1,6 +1,11 @@
"""Shared resolver for background-task AI endpoint (auto-naming, memory, sorting).""" """Shared resolver for background-task AI endpoints."""
from src.endpoint_resolver import resolve_endpoint from src.endpoint_resolver import (
resolve_chat_fallback_candidates,
resolve_endpoint,
resolve_utility_fallback_candidates,
)
from src.llm_core import llm_call_async_with_fallback
def resolve_task_endpoint(fallback_url=None, fallback_model=None, fallback_headers=None, owner=None): def resolve_task_endpoint(fallback_url=None, fallback_model=None, fallback_headers=None, owner=None):
@@ -11,3 +16,60 @@ def resolve_task_endpoint(fallback_url=None, fallback_model=None, fallback_heade
endpoint cannot be resolved. endpoint cannot be resolved.
""" """
return resolve_endpoint("task", fallback_url, fallback_model, fallback_headers, owner=owner) return resolve_endpoint("task", fallback_url, fallback_model, fallback_headers, owner=owner)
def resolve_task_candidates(
fallback_url=None,
fallback_model=None,
fallback_headers=None,
owner=None,
):
"""Return ordered background-task LLM candidates.
Order:
1. configured Background Tasks endpoint/model, or caller fallback
2. Utility endpoint/model
3. Default endpoint/model
4. Utility fallback chain
5. Default fallback chain
"""
candidates = []
def _append(url, model, headers):
if not url or not model:
return
key = (url, model)
if any((u, m) == key for u, m, _ in candidates):
return
candidates.append((url, model, headers or {}))
_append(*resolve_task_endpoint(fallback_url, fallback_model, fallback_headers, owner=owner))
_append(*resolve_endpoint("utility", owner=owner))
_append(*resolve_endpoint("default", owner=owner))
for url, model, headers in resolve_utility_fallback_candidates(owner=owner):
_append(url, model, headers)
for url, model, headers in resolve_chat_fallback_candidates(owner=owner):
_append(url, model, headers)
return candidates
async def task_llm_call_async(
messages,
*,
fallback_url=None,
fallback_model=None,
fallback_headers=None,
owner=None,
**kwargs,
):
"""Call the shared background-task LLM candidate chain."""
candidates = resolve_task_candidates(
fallback_url=fallback_url,
fallback_model=fallback_model,
fallback_headers=fallback_headers,
owner=owner,
)
if not candidates:
raise RuntimeError("No LLM endpoint available for background task")
return await llm_call_async_with_fallback(candidates, messages=messages, **kwargs)
+32 -13
View File
@@ -833,6 +833,14 @@ class TaskScheduler:
owner=task.owner, owner=task.owner,
body=run.result if output == "notification" else None, body=run.result if output == "notification" else None,
) )
elif run.status == "error":
self.add_notification(
task.name,
"error",
task_id,
owner=task.owner,
body=run.error or run.result,
)
# Log result to the assistant chat so all task activity is visible. # Log result to the assistant chat so all task activity is visible.
# Skip skipped/error rows — user shouldn't see "skipped: …" noise # Skip skipped/error rows — user shouldn't see "skipped: …" noise
@@ -1406,12 +1414,18 @@ class TaskScheduler:
) )
except Exception as e: except Exception as e:
logger.warning(f"Agent loop failed for task '{task.name}', falling back to simple call: {e}") logger.warning(f"Agent loop failed for task '{task.name}', falling back to simple call: {e}")
from src.llm_core import llm_call_async from src.task_endpoint import task_llm_call_async
messages = [ messages = [
{"role": "system", "content": system_prompt}, {"role": "system", "content": system_prompt},
{"role": "user", "content": task.prompt}, {"role": "user", "content": task.prompt},
] ]
result = await llm_call_async(url=endpoint_url, model=model, messages=messages, timeout=120) result = await task_llm_call_async(
messages,
fallback_url=endpoint_url,
fallback_model=model,
owner=task.owner,
timeout=120,
)
# Strip the model's chain-of-thought before saving/delivering. Task # Strip the model's chain-of-thought before saving/delivering. Task
# output is LLM-only, so prose=True (which also removes untagged # output is LLM-only, so prose=True (which also removes untagged
@@ -1636,13 +1650,17 @@ class TaskScheduler:
# Honor per-task max_steps (defense against runaway agent loops). # Honor per-task max_steps (defense against runaway agent loops).
# Falls back to 20 if not set — the historical default. # Falls back to 20 if not set — the historical default.
_task_max_rounds = task.max_steps if task.max_steps and task.max_steps > 0 else 20 _task_max_rounds = task.max_steps if task.max_steps and task.max_steps > 0 else 20
# Tasks are background workloads they share the Utility model's # Tasks are background workloads: use the shared task fallback chain
# fallback chain (Settings → Utility Model → Fallbacks). A downed # behind the primary endpoint so a downed primary won't silently yield
# primary endpoint won't silently yield `(no output)` — same recipe # `(no output)`.
# chat uses but with the utility list (`utility_model_fallbacks`).
try: try:
from src.endpoint_resolver import resolve_utility_fallback_candidates from src.task_endpoint import resolve_task_candidates
_task_fallbacks = resolve_utility_fallback_candidates(owner=task.owner or None) _task_fallbacks = resolve_task_candidates(
fallback_url=endpoint_url,
fallback_model=model,
fallback_headers=headers,
owner=task.owner or None,
)[1:]
except Exception: except Exception:
_task_fallbacks = [] _task_fallbacks = []
async for event_str in stream_agent_loop( async for event_str in stream_agent_loop(
@@ -1679,21 +1697,22 @@ class TaskScheduler:
# asking it to summarize what it did. Guarantees output. # asking it to summarize what it did. Guarantees output.
if not full_text.strip(): if not full_text.strip():
try: try:
from src.llm_core import llm_call_async_with_fallback from src.task_endpoint import task_llm_call_async
from src.endpoint_resolver import resolve_utility_fallback_candidates
grace_context = "You ran out of steps. " grace_context = "You ran out of steps. "
if tool_results: if tool_results:
grace_context += "Here's what your tools returned:\n" + "\n".join(tool_results[-5:]) grace_context += "Here's what your tools returned:\n" + "\n".join(tool_results[-5:])
else: else:
grace_context += "No tool results were captured." grace_context += "No tool results were captured."
grace_context += "\n\nSummarize what you accomplished and what's still pending. Be concise." grace_context += "\n\nSummarize what you accomplished and what's still pending. Be concise."
_grace_candidates = [(endpoint_url, model, headers)] + resolve_utility_fallback_candidates(owner=task.owner or None) full_text = await task_llm_call_async(
full_text = await llm_call_async_with_fallback(
_grace_candidates,
messages=[ messages=[
{"role": "system", "content": system_content}, {"role": "system", "content": system_content},
{"role": "user", "content": grace_context}, {"role": "user", "content": grace_context},
], ],
fallback_url=endpoint_url,
fallback_model=model,
fallback_headers=headers,
owner=task.owner or None,
timeout=30, timeout=30,
) )
full_text = (full_text or "").strip() full_text = (full_text or "").strip()
+26 -5
View File
@@ -1119,8 +1119,8 @@ async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict:
_ALIASES = { _ALIASES = {
"shell": ["bash"], "shell": ["bash"],
"terminal": ["bash"], "terminal": ["bash"],
"search": ["web_search"], "search": ["web_search", "web_fetch"],
"web": ["web_search"], "web": ["web_search", "web_fetch"],
"browser": ["builtin_browser"], "browser": ["builtin_browser"],
"documents": ["create_document", "edit_document", "update_document", "suggest_document"], "documents": ["create_document", "edit_document", "update_document", "suggest_document"],
"doc": ["create_document", "edit_document", "update_document", "suggest_document"], "doc": ["create_document", "edit_document", "update_document", "suggest_document"],
@@ -1132,7 +1132,7 @@ async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict:
"notes": ["manage_notes"], "notes": ["manage_notes"],
"calendar": ["manage_calendar"], "calendar": ["manage_calendar"],
"email": ["mcp__email__list_emails", "mcp__email__read_email", "mcp__email__send_email"], "email": ["mcp__email__list_emails", "mcp__email__read_email", "mcp__email__send_email"],
"research": ["web_search"], # research is a per-request flag, not a tool — closest analog "research": ["web_search", "web_fetch"], # research is a per-request flag, not a tool — closest analog
} }
if action == "list_tools": if action == "list_tools":
@@ -2714,13 +2714,25 @@ async def do_serve_model(content: str, owner: Optional[str] = None) -> Dict:
endpoint_added=endpoint_added, endpoint_id=endpoint_id or "", endpoint_added=endpoint_added, endpoint_id=endpoint_id or "",
) )
note = "" if registered else " (state-write failed — task may not show in UI)" note = "" if registered else " (state-write failed — task may not show in UI)"
where = host or "local"
log_path = f"/tmp/odysseus-tmux/{sid}.log"
return { return {
"output": f"Serving {repo_id} (session: {sid}){note}", "output": (
f"Serving {repo_id} on {where} (session: {sid}){note}\n"
f"Next required check: call list_served_models. If this task is not ready, "
f"call tail_serve_output with session_id={sid} and tail=400 before answering. "
f"Do not tell the user to check logs; you have the log tool."
),
"session_id": sid, "session_id": sid,
"task_type": "serve", "task_type": "serve",
"phase": "running", "phase": "running",
"host": host, "host": host,
"endpoint_id": endpoint_id, "endpoint_id": endpoint_id,
"log_path": log_path,
"next_tools": [
{"name": "list_served_models", "arguments": {}},
{"name": "tail_serve_output", "arguments": {"session_id": sid, "tail": 400}},
],
"exit_code": 0, "exit_code": 0,
} }
# FastAPI HTTPException puts the message under `detail`, not `error`. # FastAPI HTTPException puts the message under `detail`, not `error`.
@@ -3057,8 +3069,17 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic
MAX_CHARS = 8000 MAX_CHARS = 8000
if len(output_text) > MAX_CHARS: if len(output_text) > MAX_CHARS:
output_text = "…(earlier output truncated)…\n" + output_text[-MAX_CHARS:] output_text = "…(earlier output truncated)…\n" + output_text[-MAX_CHARS:]
if not output_text:
output_text = (
f"No log output captured yet for {session_id} on {host_label}. "
"This usually means the tmux wrapper has started but the model process "
"has not printed anything yet. Do not stop here: call list_served_models "
"again to check whether it is still loading, ready, or crashed; if it is "
"still not ready, call tail_serve_output again with a larger tail after "
"the next status check."
)
return { return {
"output": output_text or "(empty pane)", "output": output_text,
"session_id": session_id, "session_id": session_id,
"host": host_label, "host": host_label,
"tail_lines": tail, "tail_lines": tail,
Binary file not shown.

After

Width:  |  Height:  |  Size: 7.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

+7 -2
View File
@@ -879,7 +879,7 @@
<span class="grow">Library</span> <span class="grow">Library</span>
<button type="button" class="list-item-plus-btn" id="library-new-doc-btn" title="New document"> <button type="button" class="list-item-plus-btn" id="library-new-doc-btn" title="New document">
<svg class="list-item-plus-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round" style="width:11px;height:11px;"><line x1="12" y1="5" x2="12" y2="19"/><line x1="5" y1="12" x2="19" y2="12"/></svg> <svg class="list-item-plus-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round" style="width:11px;height:11px;"><line x1="12" y1="5" x2="12" y2="19"/><line x1="5" y1="12" x2="19" y2="12"/></svg>
<span class="list-item-plus-label">new</span> <span class="list-item-plus-label">document</span>
</button> </button>
</div> </div>
<div class="list-item" id="tool-notes-btn"> <div class="list-item" id="tool-notes-btn">
@@ -1005,7 +1005,12 @@
<button type="button" class="model-picker-btn" id="model-picker-btn" title="Switch model"><span id="model-picker-label">Select model</span> <svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="6 15 12 9 18 15"/></svg></button> <button type="button" class="model-picker-btn" id="model-picker-btn" title="Switch model"><span id="model-picker-label">Select model</span> <svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="6 15 12 9 18 15"/></svg></button>
<div class="model-picker-menu hidden" id="model-picker-menu"> <div class="model-picker-menu hidden" id="model-picker-menu">
<div class="model-picker-search-row"> <div class="model-picker-search-row">
<input type="text" id="model-picker-search" placeholder="Search models..." autocomplete="off" aria-label="Search models"> <div class="model-picker-search-wrap">
<input type="text" id="model-picker-search" placeholder="Search models..." autocomplete="off" aria-label="Search models">
<button type="button" class="model-picker-refresh-btn" id="model-picker-refresh-btn" title="Refresh model picker" aria-label="Refresh model picker">
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round"><polyline points="23 4 23 10 17 10"/><path d="M20.49 15a9 9 0 1 1-2.12-9.36L23 10"/></svg>
</button>
</div>
<button type="button" class="model-picker-action-btn primary" id="model-picker-add-models-btn" title="Add model endpoints" aria-label="Add model endpoints"> <button type="button" class="model-picker-action-btn primary" id="model-picker-add-models-btn" title="Add model endpoints" aria-label="Add model endpoints">
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round"><path d="M12 5v14"/><path d="M5 12h14"/></svg> <svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round"><path d="M12 5v14"/><path d="M5 12h14"/></svg>
</button> </button>
+13 -11
View File
@@ -1164,6 +1164,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
} }
function _replyAfterClosedThinking(text) { function _replyAfterClosedThinking(text) {
text = markdownModule.normalizeThinkingMarkup(text || '');
const closeRe = /<\/(?:think(?:ing)?|thought)>|<channel\|>/gi; const closeRe = /<\/(?:think(?:ing)?|thought)>|<channel\|>/gi;
let match = null; let match = null;
let last = null; let last = null;
@@ -1174,7 +1175,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
// Direct render helper for streaming text // Direct render helper for streaming text
_renderStream = () => { _renderStream = () => {
let dt = stripToolBlocks(roundText); let dt = markdownModule.normalizeThinkingMarkup(stripToolBlocks(roundText));
const bodyEl = roundHolder.querySelector('.body'); const bodyEl = roundHolder.querySelector('.body');
const contentEl = _ensureStreamLayout(bodyEl); const contentEl = _ensureStreamLayout(bodyEl);
@@ -1466,12 +1467,13 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
// 1. Normal: <think>...no closing tag yet // 1. Normal: <think>...no closing tag yet
// 2. Malformed: <think></think>\n...text but no second </think> yet // 2. Malformed: <think></think>\n...text but no second </think> yet
// 3. Qwen3.5: "Thinking Process:" without <think> tags // 3. Qwen3.5: "Thinking Process:" without <think> tags
let hasUnclosedThink = markdownModule.hasUnclosedThinkTag(roundText); const normalizedRoundText = markdownModule.normalizeThinkingMarkup(roundText);
let hasUnclosedThink = markdownModule.hasUnclosedThinkTag(normalizedRoundText);
// Detect non-tag thinking patterns: "Thinking:", "Thinking Process:", Gemma-style reasoning // Detect non-tag thinking patterns: "Thinking:", "Thinking Process:", Gemma-style reasoning
// These patterns don't use <think> tags, so we simulate unclosed thinking during streaming // These patterns don't use <think> tags, so we simulate unclosed thinking during streaming
const _replyPrefixes = ['Hey', 'Hi ', 'Hi!', 'Hello', 'Sure', 'Yes', 'No ', 'No,', 'Yo', 'OK', 'Here', 'Absolutely', 'Of course', 'Great', 'Alright', 'Thanks', 'Welcome', 'Good ', "I'm happy", "I'd be"]; const _replyPrefixes = ['Hey', 'Hi ', 'Hi!', 'Hello', 'Sure', 'Yes', 'No ', 'No,', 'Yo', 'OK', 'Here', 'Absolutely', 'Of course', 'Great', 'Alright', 'Thanks', 'Welcome', 'Good ', "I'm happy", "I'd be"];
if (!hasUnclosedThink && !/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>|<\|channel>thought/i.test(roundText)) { if (!hasUnclosedThink && !/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>|<\|channel>thought/i.test(normalizedRoundText)) {
const _trimmedRT = roundText.trimStart(); const _trimmedRT = normalizedRoundText.trimStart();
const _isReasoning = markdownModule.startsWithReasoningPrefix(_trimmedRT); const _isReasoning = markdownModule.startsWithReasoningPrefix(_trimmedRT);
if (_isReasoning) { if (_isReasoning) {
// Check if we can see a reply boundary yet (newline then reply pattern) // Check if we can see a reply boundary yet (newline then reply pattern)
@@ -1496,9 +1498,9 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
} }
} }
} }
if (!hasUnclosedThink && /^<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*<\/(?:think(?:ing)?|thought)>/i.test(roundText)) { if (!hasUnclosedThink && /^<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*<\/(?:think(?:ing)?|thought)>/i.test(normalizedRoundText)) {
// Empty <think></think> — the model likely put thinking outside the tags // Empty <think></think> — the model likely put thinking outside the tags
const afterEmpty = roundText.replace(/^<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*<\/(?:think(?:ing)?|thought)>/i, '').trim(); const afterEmpty = normalizedRoundText.replace(/^<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*<\/(?:think(?:ing)?|thought)>/i, '').trim();
const closeTags = (afterEmpty.match(/<\/(?:think(?:ing)?|thought)>/gi) || []).length; const closeTags = (afterEmpty.match(/<\/(?:think(?:ing)?|thought)>/gi) || []).length;
if (closeTags === 0 && afterEmpty.length > 0) { if (closeTags === 0 && afterEmpty.length > 0) {
hasUnclosedThink = true; // still waiting for real closing tag hasUnclosedThink = true; // still waiting for real closing tag
@@ -1508,10 +1510,10 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
// Only applies when there's a second </think> later (model leaked thinking outside tags) // Only applies when there's a second </think> later (model leaked thinking outside tags)
// Do NOT trigger if the text after </think> contains tool calls (that's real content) // Do NOT trigger if the text after </think> contains tool calls (that's real content)
if (!hasUnclosedThink && isThinking) { if (!hasUnclosedThink && isThinking) {
const _thinkMatch = roundText.match(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>([\s\S]*?)<\/(?:think(?:ing)?|thought)>/i); const _thinkMatch = normalizedRoundText.match(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>([\s\S]*?)<\/(?:think(?:ing)?|thought)>/i);
const _thinkLen = _thinkMatch ? _thinkMatch[1].trim().length : 0; const _thinkLen = _thinkMatch ? _thinkMatch[1].trim().length : 0;
if (_thinkLen < 20) { if (_thinkLen < 20) {
const _afterClose = roundText.replace(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>([\s\S]*?)<\/(?:think(?:ing)?|thought)>/i, '').trim(); const _afterClose = normalizedRoundText.replace(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>([\s\S]*?)<\/(?:think(?:ing)?|thought)>/i, '').trim();
// Only keep waiting if there's trailing text that looks like thinking (not tool calls) // Only keep waiting if there's trailing text that looks like thinking (not tool calls)
const _hasToolCall = /```(?:bash|python|web_search|read_file|write_file|create_document|edit_document|manage_|generate_image)/i.test(_afterClose); const _hasToolCall = /```(?:bash|python|web_search|read_file|write_file|create_document|edit_document|manage_|generate_image)/i.test(_afterClose);
const _hasOrphanClose = /<\/(?:think(?:ing)?|thought)>/i.test(_afterClose); const _hasOrphanClose = /<\/(?:think(?:ing)?|thought)>/i.test(_afterClose);
@@ -1572,7 +1574,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
} else if (hasUnclosedThink && isThinking) { } else if (hasUnclosedThink && isThinking) {
if (_liveThinkInner) { if (_liveThinkInner) {
// Extract raw thinking text (strip known thinking wrappers and prefixes) // Extract raw thinking text (strip known thinking wrappers and prefixes)
var thinkText = roundText var thinkText = markdownModule.normalizeThinkingMarkup(roundText)
.replace(/<\/?(?:think(?:ing)?|thought)(?:\s+[^>]*)?>/gi, '') .replace(/<\/?(?:think(?:ing)?|thought)(?:\s+[^>]*)?>/gi, '')
.replace(/<\|channel>thought\s*\n?/gi, '') .replace(/<\|channel>thought\s*\n?/gi, '')
.replace(/<\|channel>response\s*\n?/gi, '') .replace(/<\|channel>response\s*\n?/gi, '')
@@ -2045,7 +2047,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
if (!roundFinalized) { if (!roundFinalized) {
roundFinalized = true; roundFinalized = true;
if (spinner && spinner.element) spinner.destroy(); if (spinner && spinner.element) spinner.destroy();
const dt = stripToolBlocks(roundText); const dt = markdownModule.normalizeThinkingMarkup(stripToolBlocks(roundText));
if (dt.trim()) { if (dt.trim()) {
var _body3 = roundHolder.querySelector('.body'); var _body3 = roundHolder.querySelector('.body');
var _contentEl3 = _ensureStreamLayout(_body3); var _contentEl3 = _ensureStreamLayout(_body3);
@@ -3405,7 +3407,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
}; };
const renderDelta = () => { const renderDelta = () => {
const dt = stripToolBlocks(roundText); const dt = markdownModule.normalizeThinkingMarkup(stripToolBlocks(roundText));
contentDiv.innerHTML = markdownModule.mdToHtml(markdownModule.squashOutsideCode(dt)); contentDiv.innerHTML = markdownModule.mdToHtml(markdownModule.squashOutsideCode(dt));
uiModule.scrollHistory(); uiModule.scrollHistory();
}; };
+73 -17
View File
@@ -73,6 +73,45 @@ function isCompareActive() {
return state.isActive; return state.isActive;
} }
function _compareModeLabel() {
return ({ search: ' search providers', agent: ' agents', research: ' research models' }[state._compareMode] || ' models');
}
function _setToolbarMode(mode, syncModeTools = !state.isActive) {
const target = mode === 'agent' ? 'agent' : 'chat';
const toggleState = Storage.loadToggleState();
toggleState.mode = target;
Storage.saveToggleState(toggleState);
const agentBtn = document.getElementById('mode-agent-btn');
const chatBtn = document.getElementById('mode-chat-btn');
const modeToggle = agentBtn?.closest('.mode-toggle') || chatBtn?.closest('.mode-toggle') || document.querySelector('.mode-toggle');
if (agentBtn && chatBtn) {
agentBtn.classList.toggle('active', target === 'agent');
chatBtn.classList.toggle('active', target === 'chat');
agentBtn.setAttribute('aria-pressed', target === 'agent' ? 'true' : 'false');
chatBtn.setAttribute('aria-pressed', target === 'chat' ? 'true' : 'false');
}
if (modeToggle) {
modeToggle.classList.toggle('mode-chat', target === 'chat');
modeToggle.classList.toggle('mode-right', target === 'chat');
}
if (syncModeTools) {
document.querySelectorAll('[data-mode-tool]').forEach(b => { b.style.display = target === 'agent' ? '' : 'none'; });
}
}
function _syncCompareModeFromToolbar(mode) {
if (!state.isActive) return;
state._compareMode = mode === 'agent' ? 'agent' : 'chat';
_setToolbarMode(state._compareMode, false);
const headerLabel = document.querySelector('.compare-header-label');
if (headerLabel) {
headerLabel.textContent = 'Comparing' + _compareModeLabel() + (state._blindMode ? ' (blind)' : '') + ' · ' + state._timeout + 's timeout';
}
const evalWrap = document.getElementById('cmp-eval-wrap');
if (evalWrap && typeof evalWrap._renderItems === 'function') evalWrap._renderItems();
}
// ──────────────────────────────────────────────────────────────────────────── // ────────────────────────────────────────────────────────────────────────────
// ── closeCompare ── // ── closeCompare ──
// ──────────────────────────────────────────────────────────────────────────── // ────────────────────────────────────────────────────────────────────────────
@@ -170,12 +209,7 @@ async function deactivate(teardown) {
}); });
// Restore agent/chat mode to what it was before compare // Restore agent/chat mode to what it was before compare
const _ts = Storage.loadToggleState(); _setToolbarMode(state._savedMode, true);
_ts.mode = state._savedMode;
Storage.saveToggleState(_ts);
const _ab2 = document.getElementById('mode-agent-btn'), _cb2 = document.getElementById('mode-chat-btn');
if (_ab2 && _cb2) { _ab2.classList.toggle('active', state._savedMode === 'agent'); _cb2.classList.toggle('active', state._savedMode === 'chat'); }
document.querySelectorAll('[data-mode-tool]').forEach(b => { b.style.display = state._savedMode === 'agent' ? '' : 'none'; });
// Delete unsaved sessions, then reload // Delete unsaved sessions, then reload
if (teardown) { if (teardown) {
@@ -258,19 +292,30 @@ async function _buildCompareUI() {
if (el) state._savedIndicatorDisplay[id] = el.style.display; if (el) state._savedIndicatorDisplay[id] = el.style.display;
}); });
// 5. Save current mode and lock to the right one for this compare type // 5. Save current mode and seed the toolbar for this compare type.
const _toggleState = Storage.loadToggleState(); const _toggleState = Storage.loadToggleState();
state._savedMode = _toggleState.mode || 'chat'; state._savedMode = _toggleState.mode || 'chat';
const _targetMode = (state._compareMode === 'agent') ? 'agent' : 'chat'; const _targetMode = (state._compareMode === 'agent') ? 'agent' : 'chat';
_toggleState.mode = _targetMode; _setToolbarMode(_targetMode, false);
Storage.saveToggleState(_toggleState);
const _ab = document.getElementById('mode-agent-btn'), _cb = document.getElementById('mode-chat-btn'); const _ab = document.getElementById('mode-agent-btn'), _cb = document.getElementById('mode-chat-btn');
let _modeCleanup = null;
const _onCompareModeClick = (ev) => {
ev.stopPropagation();
ev.stopImmediatePropagation();
_syncCompareModeFromToolbar(ev.currentTarget === _ab ? 'agent' : 'chat');
};
if (_ab && _cb) { if (_ab && _cb) {
_ab.classList.toggle('active', _targetMode === 'agent'); _ab.addEventListener('click', _onCompareModeClick, true);
_cb.classList.toggle('active', _targetMode === 'chat'); _cb.addEventListener('click', _onCompareModeClick, true);
_modeCleanup = document.createElement('span');
_modeCleanup.style.display = 'none';
_modeCleanup._cleanup = () => {
_ab.removeEventListener('click', _onCompareModeClick, true);
_cb.removeEventListener('click', _onCompareModeClick, true);
};
} }
const _modeToggle = document.querySelector('.mode-toggle'); const _modeToggle = document.querySelector('.mode-toggle');
if (_modeToggle) { _modeToggle.style.pointerEvents = 'none'; _modeToggle.style.opacity = '0.4'; } if (_modeToggle) { _modeToggle.style.pointerEvents = ''; _modeToggle.style.opacity = ''; }
// 6. Force tool toggles per compare mode // 6. Force tool toggles per compare mode
disableToolToggles(); disableToolToggles();
@@ -289,6 +334,7 @@ async function _buildCompareUI() {
// 7. Hide existing chat container children (preserves event listeners) // 7. Hide existing chat container children (preserves event listeners)
const container = document.getElementById('chat-container'); const container = document.getElementById('chat-container');
state._compareElements = []; state._compareElements = [];
if (_modeCleanup) state._compareElements.push(_modeCleanup);
Array.from(container.children).forEach(child => { Array.from(container.children).forEach(child => {
if (child.style.display === 'none') return; if (child.style.display === 'none') return;
child.dataset.cmpHidden = '1'; child.dataset.cmpHidden = '1';
@@ -302,9 +348,9 @@ async function _buildCompareUI() {
headerBar.className = 'compare-header-bar'; headerBar.className = 'compare-header-bar';
headerBar.style.cssText = 'display:flex;align-items:center;justify-content:space-between;padding:6px 10px;flex-shrink:0;'; headerBar.style.cssText = 'display:flex;align-items:center;justify-content:space-between;padding:6px 10px;flex-shrink:0;';
const headerLabel = document.createElement('span'); const headerLabel = document.createElement('span');
headerLabel.className = 'compare-header-label';
headerLabel.style.cssText = 'font-size:10px;font-weight:400;color:var(--fg);white-space:nowrap;overflow:hidden;text-overflow:ellipsis;min-width:0;'; headerLabel.style.cssText = 'font-size:10px;font-weight:400;color:var(--fg);white-space:nowrap;overflow:hidden;text-overflow:ellipsis;min-width:0;';
const _modeLabel = ({ search: ' search providers', agent: ' agents', research: ' research models' }[state._compareMode] || ' models'); headerLabel.textContent = 'Comparing' + _compareModeLabel() + (state._blindMode ? ' (blind)' : '') + ' · ' + state._timeout + 's timeout';
headerLabel.textContent = 'Comparing' + _modeLabel + (state._blindMode ? ' (blind)' : '') + ' · ' + state._timeout + 's timeout';
// Left side: the Compare tool icon (two side-by-side panes, matching the // Left side: the Compare tool icon (two side-by-side panes, matching the
// rail/sidebar icon) + the label. Other tool headers carry their icon; this // rail/sidebar icon) + the label. Other tool headers carry their icon; this
// one was missing it. // one was missing it.
@@ -475,7 +521,7 @@ async function _buildCompareUI() {
} }
const msgTA = document.getElementById('message'); const msgTA = document.getElementById('message');
if (msgTA) { if (msgTA) {
msgTA.placeholder = 'Enter prompt for all models...'; msgTA.placeholder = window.matchMedia('(max-width: 767px)').matches ? '' : 'Enter prompt for all models...';
requestAnimationFrame(() => msgTA.focus()); requestAnimationFrame(() => msgTA.focus());
} }
@@ -891,8 +937,7 @@ async function _executeCompare(message) {
let sharedSearchContext = null; let sharedSearchContext = null;
let sharedSearchSources = null; let sharedSearchSources = null;
const webChk = document.getElementById('web-toggle'); const webChk = document.getElementById('web-toggle');
const toggleState = Storage.loadToggleState(); const isAgentMode = state._compareMode === 'agent';
const isAgentMode = (toggleState.mode || 'chat') === 'agent';
const webOn = webChk && webChk.checked; const webOn = webChk && webChk.checked;
// In agent mode, web_search is a tool (handled per-pane); in chat mode, pre-search and share // In agent mode, web_search is a tool (handled per-pane); in chat mode, pre-search and share
if (webOn && !isAgentMode) { if (webOn && !isAgentMode) {
@@ -1198,6 +1243,15 @@ function _setupEvalPicker() {
function _renderItems() { function _renderItems() {
const mode = state._compareMode || 'chat'; const mode = state._compareMode || 'chat';
const label = btn.querySelector('.cmp-eval-label');
if (label) {
label.textContent = ({
agent: 'Agent prompts',
chat: 'Chat prompts',
search: 'Search prompts',
research: 'Research prompts'
}[mode] || 'Eval prompts');
}
// research/html aren't first-class compare types — fall back gracefully // research/html aren't first-class compare types — fall back gracefully
const key = EVAL_PROMPTS[mode] ? mode const key = EVAL_PROMPTS[mode] ? mode
: (mode === 'research' ? 'search' : 'chat'); : (mode === 'research' ? 'search' : 'chat');
@@ -1258,8 +1312,10 @@ function _setupEvalPicker() {
}; };
document.addEventListener('click', _onDocClick); document.addEventListener('click', _onDocClick);
_renderItems();
wrap.appendChild(btn); wrap.appendChild(btn);
wrap.appendChild(menu); wrap.appendChild(menu);
wrap._renderItems = _renderItems;
inputTop.appendChild(wrap); inputTop.appendChild(wrap);
// Expected-answer chip — placed above the chat-input-bar (outside it), so // Expected-answer chip — placed above the chat-input-bar (outside it), so
+27 -4
View File
@@ -551,23 +551,46 @@ async function streamToPane(paneIdx, sessionId, message, aiMsgEl, opts) {
footer.className = 'msg-footer'; footer.className = 'msg-footer';
const span = document.createElement('span'); const span = document.createElement('span');
span.className = 'response-metrics'; span.className = 'response-metrics';
let text = metrics.output_tokens + ' tokens | ' + metrics.tokens_per_second + ' tok/s'; const outputTokens = metrics.output_tokens;
const responseTime = metrics.response_time ?? metrics.total_time;
const explicitTps = metrics.tokens_per_second ?? metrics.gen_tps ?? metrics.tps;
const numericOutput = Number(outputTokens);
const numericTime = Number(responseTime);
const numericTps = Number(explicitTps);
const derivedTps = Number.isFinite(numericTps)
? numericTps
: (Number.isFinite(numericOutput) && Number.isFinite(numericTime) && numericTime > 0)
? numericOutput / numericTime
: null;
const tpsLabel = derivedTps != null
? (derivedTps >= 100 ? String(Math.round(derivedTps)) : derivedTps.toFixed(2).replace(/\.?0+$/, ''))
: null;
const parts = [];
if (outputTokens != null && outputTokens !== 'undefined') {
parts.push(outputTokens + ' tokens');
}
if (tpsLabel != null) {
parts.push(tpsLabel + ' tok/s');
}
if (responseTime != null && responseTime !== 'undefined' && parts.length === 0) {
parts.push(responseTime + 's');
}
// Add per-request cost and cost per 1000 // Add per-request cost and cost per 1000
const _model = metrics.model || (state._selectedModels[paneIdx] && state._selectedModels[paneIdx].model) || ''; const _model = metrics.model || (state._selectedModels[paneIdx] && state._selectedModels[paneIdx].model) || '';
const _cost = getModelCost(_model, metrics.input_tokens || 0, metrics.output_tokens || 0); const _cost = getModelCost(_model, metrics.input_tokens || 0, metrics.output_tokens || 0);
// Build the metrics span with optional cost and context // Build the metrics span with optional cost and context
span.textContent = text; span.textContent = parts.join(' | ');
if (_cost !== null) { if (_cost !== null) {
const _cost1k = _cost * 1000; const _cost1k = _cost * 1000;
const costSpan = document.createElement('span'); const costSpan = document.createElement('span');
costSpan.style.color = 'var(--color-success, #4caf50)'; costSpan.style.color = 'var(--color-success, #4caf50)';
costSpan.title = 'Estimated cost per 1,000 responses like this one'; costSpan.title = 'Estimated cost per 1,000 responses like this one';
costSpan.textContent = ' | $' + (_cost1k < 1 ? _cost1k.toFixed(2) : _cost1k.toFixed(0)) + '/1k'; costSpan.textContent = (span.textContent ? ' | ' : '') + '$' + (_cost1k < 1 ? _cost1k.toFixed(2) : _cost1k.toFixed(0)) + '/1k';
span.appendChild(costSpan); span.appendChild(costSpan);
} }
if (metrics.context_percent > 0) { if (metrics.context_percent > 0) {
const ctx = document.createElement('span'); const ctx = document.createElement('span');
ctx.textContent = ' | ' + metrics.context_percent + '% ctx'; ctx.textContent = (span.textContent ? ' | ' : '') + metrics.context_percent + '% ctx';
if (metrics.context_percent >= 85) ctx.style.color = 'var(--color-error)'; if (metrics.context_percent >= 85) ctx.style.color = 'var(--color-error)';
else if (metrics.context_percent >= 70) ctx.style.color = '#ff9900'; else if (metrics.context_percent >= 70) ctx.style.color = '#ff9900';
span.appendChild(ctx); span.appendChild(ctx);
+1 -1
View File
@@ -181,7 +181,7 @@ function handleVote(winnerIdx) {
let html = ''; let html = '';
const caret = ' <span class="pane-title-caret">&#x25BE;</span>'; const caret = ' <span class="pane-title-caret">&#x25BE;</span>';
if (isWinner) html = '<span style="color:var(--red);margin-right:4px;">&#x2605;</span><strong>' + escapeHtml(name) + '</strong> <span style="color:var(--red);font-size:0.82em;font-weight:800;text-transform:uppercase;letter-spacing:1px;position:relative;top:-2px;">Winner!</span>' + caret; if (isWinner) html = '<span style="color:var(--green, #50fa7b);margin-right:4px;">&#x2605;</span><strong>' + escapeHtml(name) + '</strong> <span style="color:var(--green, #50fa7b);font-size:0.82em;font-weight:800;text-transform:uppercase;letter-spacing:1px;position:relative;top:0;">Winner!</span>' + caret;
else if (isTie) html = '<span style="opacity:0.5;margin-right:4px;">=</span><strong>' + escapeHtml(name) + '</strong>' + caret; else if (isTie) html = '<span style="opacity:0.5;margin-right:4px;">=</span><strong>' + escapeHtml(name) + '</strong>' + caret;
else html = '<strong>' + escapeHtml(name) + '</strong>' + caret; else html = '<strong>' + escapeHtml(name) + '</strong>' + caret;
el.innerHTML = html; el.innerHTML = html;
-12
View File
@@ -1421,23 +1421,11 @@ export function _expandModelRow(row, modelData) {
const dlSource = _downloadSourceRepo(modelData, backend); const dlSource = _downloadSourceRepo(modelData, backend);
const hfUrl = `https://huggingface.co/${dlSource.repo}`; const hfUrl = `https://huggingface.co/${dlSource.repo}`;
// Official vendor recipe deep-links. These point to vLLM / SGLang's curated
// hardware-specific launch-command pages. They 404 for uncatalogued models \u2014
// a known tradeoff; user just gets the vendor's "model not found" page.
const _recipeRepo = modelData.name || '';
const _vllmUrl = _recipeRepo ? `https://recipes.vllm.ai/${_recipeRepo}` : '';
const _sglangUrl = _recipeRepo ? `https://docs.sglang.io/cookbook/autoregressive/${_recipeRepo}${_sglangHashFor(modelData)}` : '';
let html = `<div class="hwfit-action-panel" data-model-name="${esc(modelData.name)}">`; let html = `<div class="hwfit-action-panel" data-model-name="${esc(modelData.name)}">`;
html += `<div class="hwfit-panel-header">`; html += `<div class="hwfit-panel-header">`;
html += `<span class="hwfit-panel-model">${esc(modelData.name)}${dlSource.kind ? ` <span style="opacity:0.5;font-size:10px;">(${esc(dlSource.kind)} ${esc(modelData.quant || '')})</span>` : (modelData.quant_repo ? ` <span style="opacity:0.5;font-size:10px;">(${esc(modelData.quant)})</span>` : '')}</span>`; html += `<span class="hwfit-panel-model">${esc(modelData.name)}${dlSource.kind ? ` <span style="opacity:0.5;font-size:10px;">(${esc(dlSource.kind)} ${esc(modelData.quant || '')})</span>` : (modelData.quant_repo ? ` <span style="opacity:0.5;font-size:10px;">(${esc(modelData.quant)})</span>` : '')}</span>`;
html += `<span class="hwfit-panel-badge">${esc(label)}</span>`; html += `<span class="hwfit-panel-badge">${esc(label)}</span>`;
html += `<a href="${esc(hfUrl)}" target="_blank" rel="noopener" class="hwfit-panel-hf-link" title="View download source on HuggingFace">HF \u2197</a>`; html += `<a href="${esc(hfUrl)}" target="_blank" rel="noopener" class="hwfit-panel-hf-link" title="View download source on HuggingFace">HF \u2197</a>`;
if (backend === 'vllm' && _vllmUrl) {
html += `<a href="${esc(_vllmUrl)}" target="_blank" rel="noopener" class="hwfit-panel-hf-link" title="vLLM official recipe (curated launch command). 404s if this model isn't in vLLM's recipes catalog.">vLLM \u2197</a>`;
}
if (backend === 'sglang' && _sglangUrl) {
html += `<a href="${esc(_sglangUrl)}" target="_blank" rel="noopener" class="hwfit-panel-hf-link" title="SGLang cookbook (hash pre-filled with your detected hardware). 404s if this model isn't in SGLang's cookbook catalog.">SGLang \u2197</a>`;
}
html += `</div>`; html += `</div>`;
html += `<div class="hwfit-panel-actions">`; html += `<div class="hwfit-panel-actions">`;
html += `<button class="cookbook-btn hwfit-dl-btn">Download</button>`; html += `<button class="cookbook-btn hwfit-dl-btn">Download</button>`;
+242 -53
View File
@@ -267,6 +267,10 @@ function _detectModelOptimizations(modelName) {
else if (n.includes('minimax')) { else if (n.includes('minimax')) {
opts.flags.push('--enable-expert-parallel'); opts.flags.push('--enable-expert-parallel');
opts.tips.push('MoE expert parallel for MiniMax'); opts.tips.push('MoE expert parallel for MiniMax');
if (/\bm3\b/.test(n)) {
opts.kvCacheDtype = 'fp8';
opts.tips.push('MiniMax M3 defaults: fp8 KV cache, block-size 128, TRITON attention');
}
} }
// Reasoning parser — applies independently of MoE detection. Without this // Reasoning parser — applies independently of MoE detection. Without this
// flag, models like MiniMax-M2.x, DeepSeek-R1, Qwen3 reasoning, GLM-4.x, // flag, models like MiniMax-M2.x, DeepSeek-R1, Qwen3 reasoning, GLM-4.x,
@@ -308,6 +312,9 @@ function _detectModelOptimizations(modelName) {
*/ */
export function _detectReasoningParser(modelName) { export function _detectReasoningParser(modelName) {
const n = (modelName || '').toLowerCase(); const n = (modelName || '').toLowerCase();
// MiniMax M3 — newer vLLM nightly/parser builds use minimax_m3. This must
// be checked before the M2.x rule and before the generic MiniMax tool parser.
if (n.includes('minimax') && /\bm3\b/.test(n)) return 'minimax_m3';
// MiniMax M2 / M2.5 / M2.7 — released with a dedicated parser. Catch M2 // MiniMax M2 / M2.5 / M2.7 — released with a dedicated parser. Catch M2
// before plain "minimax" so M2.x doesn't fall through to a wrong parser. // before plain "minimax" so M2.x doesn't fall through to a wrong parser.
if (n.includes('minimax') && n.match(/\bm2(?:\.\d)?\b/)) return 'minimax_m2'; if (n.includes('minimax') && n.match(/\bm2(?:\.\d)?\b/)) return 'minimax_m2';
@@ -349,6 +356,7 @@ export function _detectToolParser(modelName) {
if (n.includes('mistral') || n.includes('mixtral')) return 'mistral'; if (n.includes('mistral') || n.includes('mixtral')) return 'mistral';
if (n.includes('deepseek-v3')) return 'deepseek_v3'; if (n.includes('deepseek-v3')) return 'deepseek_v3';
if (n.includes('deepseek')) return 'deepseek_v3'; if (n.includes('deepseek')) return 'deepseek_v3';
if (n.includes('minimax') && /\bm3\b/.test(n)) return 'minimax_m3';
if (n.includes('minimax') && n.includes('m2')) return 'minimax_m2'; if (n.includes('minimax') && n.includes('m2')) return 'minimax_m2';
if (n.includes('minimax')) return 'minimax'; if (n.includes('minimax')) return 'minimax';
if (n.includes('gemma')) return 'pythonic'; if (n.includes('gemma')) return 'pythonic';
@@ -376,7 +384,9 @@ export function _detectBackend(model) {
return { backend: 'unsupported', label: 'Unsupported' }; return { backend: 'unsupported', label: 'Unsupported' };
} }
const isAwqLike = /^AWQ|^GPTQ|^NVFP4/.test(q) || ['FP8', 'FP4', 'MXFP4', 'NF4', 'INT4', 'INT8', 'W4A16', 'W8A8', 'W8A16'].includes(q) || /\b(awq|gptq|fp8|fp4|nvfp4|mxfp4|nf4|int4|int8|w4a16|w8a8|w8a16)\b/i.test(_nm); const isAwqLike = /^AWQ|^GPTQ|^NVFP4/.test(q) || ['FP8', 'FP4', 'MXFP4', 'NF4', 'INT4', 'INT8', 'W4A16', 'W8A8', 'W8A16'].includes(q) || /\b(awq|gptq|fp8|fp4|nvfp4|mxfp4|nf4|int4|int8|w4a16|w8a8|w8a16)\b/i.test(_nm);
const isGgufLike = model.is_gguf || /^Q[2-8]/.test(q) || /^IQ/.test(q) || q === 'GGUF' || _nm.includes('gguf'); const hasGgufFile = Array.isArray(model.gguf_files)
&& model.gguf_files.some(f => f && typeof f.rel_path === 'string' && /\.gguf$/i.test(f.rel_path));
const isGgufLike = model.is_gguf || hasGgufFile || /^Q[2-8]/.test(q) || /^IQ/.test(q) || q === 'GGUF' || _nm.includes('gguf');
// Image gen models → diffusers // Image gen models → diffusers
if (model.is_image_gen || model.is_diffusion || model._tag === 'image') { if (model.is_image_gen || model.is_diffusion || model._tag === 'image') {
@@ -495,14 +505,22 @@ function _buildEnvPrefixWindows() {
return parts.join('; ') + ';'; return parts.join('; ') + ';';
} }
function _venvRootFromPath(path) {
let p = (path || '').toString().trim().replace(/\/+$/, '');
if (!p) return '';
p = p.replace(/\/bin\/(?:activate|python(?:3(?:\.\d+)?)?|vllm|pip(?:3)?)$/i, '');
return p;
}
export function _buildServeCmd(f, modelName, backend) { export function _buildServeCmd(f, modelName, backend) {
// When a venv is configured on the chosen server, use the venv's binaries // When a venv is configured on the chosen server, use the venv's binaries
// by absolute path. Bare `vllm` / `python3` relies on PATH, and SSH non- // by absolute path. Bare `vllm` / `python3` relies on PATH, and SSH non-
// interactive sessions often leave a user-site install (~/.local/bin/vllm) // interactive sessions often leave a user-site install (~/.local/bin/vllm)
// ahead of the venv's bin, so the WRONG vllm gets launched even with the // ahead of the venv's bin, so the WRONG vllm gets launched even with the
// venv activated. Absolute path sidesteps the whole PATH question. // venv activated. Absolute path sidesteps the whole PATH question.
const _isVenv = _envState.env === 'venv' && _envState.envPath; const _formVenv = (f.venv ?? '').toString().trim();
const _venvBin = _isVenv ? (_envState.envPath.replace(/\/+$/, '') + '/bin/') : ''; const _activeVenvPath = _venvRootFromPath(_formVenv || (_envState.env === 'venv' ? (_envState.envPath || '') : ''));
const _venvBin = _activeVenvPath ? (_activeVenvPath + '/bin/') : '';
const _vllmBin = _venvBin ? `${_venvBin}vllm` : 'vllm'; const _vllmBin = _venvBin ? `${_venvBin}vllm` : 'vllm';
const _py3Bin = _venvBin ? `${_venvBin}python3` : 'python3'; const _py3Bin = _venvBin ? `${_venvBin}python3` : 'python3';
let cmd = ''; let cmd = '';
@@ -524,21 +542,26 @@ export function _buildServeCmd(f, modelName, backend) {
cmd += 'VLLM_USE_DEEP_GEMM=0 VLLM_USE_FLASHINFER_MOE_FP16=1 OMP_NUM_THREADS=4 '; cmd += 'VLLM_USE_DEEP_GEMM=0 VLLM_USE_FLASHINFER_MOE_FP16=1 OMP_NUM_THREADS=4 ';
} }
} }
// Pinned attention backend (Attention field). Empty = let vLLM pick.
const _attn = (f.vllm_attn_backend ?? '').toString().trim();
if (_attn) cmd += `VLLM_ATTENTION_BACKEND=${_attn} `;
// Free-text "Env" field — verbatim KEY=VAL pairs (space-separated). // Free-text "Env" field — verbatim KEY=VAL pairs (space-separated).
// Collapse any pasted newlines/tabs so the backend allowlist (which // Collapse any pasted newlines/tabs so the backend allowlist (which
// rejects \n / \r) doesn't trip on a multi-line paste from a model card. // rejects \n / \r) doesn't trip on a multi-line paste from a model card.
const _extraEnv = (f.extra_env ?? '').toString().replace(/\s+/g, ' ').trim(); const _extraEnv = (f.extra_env ?? '').toString().replace(/\s+/g, ' ').trim();
if (_extraEnv) cmd += _extraEnv + ' '; if (_extraEnv) cmd += _extraEnv + ' ';
cmd += `${_vllmBin} serve ${modelName} --host 0.0.0.0 --port ${f.port || '8000'}`; cmd += `${_vllmBin} serve ${modelName} --host 0.0.0.0 --port ${f.port || '8000'}`;
const _servedModelName = (f.served_model_name ?? '').toString().trim();
if (_servedModelName) cmd += ` --served-model-name ${_servedModelName}`;
// Pinned attention backend (Attention field). Empty = let vLLM pick.
const _attn = (f.vllm_attn_backend ?? '').toString().trim();
if (_attn) cmd += ` --attention-backend ${_attn}`;
const _gemma4ChatTemplate = _gemma4ThinkingChatTemplateArg(modelName); const _gemma4ChatTemplate = _gemma4ThinkingChatTemplateArg(modelName);
if (_gemma4ChatTemplate) cmd += ` --chat-template ${_gemma4ChatTemplate}`; if (_gemma4ChatTemplate) cmd += ` --chat-template ${_gemma4ChatTemplate}`;
cmd += ` --tensor-parallel-size ${f.tp || '1'}`; cmd += ` --tensor-parallel-size ${f.tp || '1'}`;
const _blockSize = (f.vllm_block_size ?? '').toString().trim();
if (/^\d+$/.test(_blockSize)) cmd += ` --block-size ${_blockSize}`;
cmd += ` --max-model-len ${f.ctx || '8192'}`; cmd += ` --max-model-len ${f.ctx || '8192'}`;
cmd += ` --gpu-memory-utilization ${f.gpu_mem || '0.90'}`; cmd += ` --gpu-memory-utilization ${f.gpu_mem || '0.90'}`;
if (f.swap && f.swap !== '0') cmd += ` --swap-space ${f.swap}`; const _swapRaw = (f.swap ?? '').toString().trim().toLowerCase();
if (_swapRaw && !['0', 'off', 'none', 'false'].includes(_swapRaw)) cmd += ` --swap-space ${_swapRaw}`;
cmd += ` --dtype ${f.dtype || 'auto'}`; cmd += ` --dtype ${f.dtype || 'auto'}`;
const _kv = (f.vllm_kv_cache_dtype ?? '').toString().trim(); const _kv = (f.vllm_kv_cache_dtype ?? '').toString().trim();
if (_kv === 'fp8') cmd += ' --kv-cache-dtype fp8'; if (_kv === 'fp8') cmd += ' --kv-cache-dtype fp8';
@@ -548,10 +571,12 @@ export function _buildServeCmd(f, modelName, backend) {
if (f.prefix_cache) cmd += ' --enable-prefix-caching'; if (f.prefix_cache) cmd += ' --enable-prefix-caching';
if (f.auto_tool) cmd += ` --enable-auto-tool-choice --tool-call-parser ${_detectToolParser(modelName)}`; if (f.auto_tool) cmd += ` --enable-auto-tool-choice --tool-call-parser ${_detectToolParser(modelName)}`;
if (f.expert_parallel) cmd += ' --enable-expert-parallel'; if (f.expert_parallel) cmd += ' --enable-expert-parallel';
if (f.language_model_only) cmd += ' --language-model-only';
if (f.disable_custom_all_reduce) cmd += ' --disable-custom-all-reduce';
if (f.reasoning_parser) { if (f.reasoning_parser) {
const rp = typeof f.reasoning_parser === 'string' && f.reasoning_parser !== 'true' const rp = typeof f.reasoning_parser === 'string' && f.reasoning_parser !== 'true'
? f.reasoning_parser : (f._reasoning_parser_value || 'qwen3'); ? f.reasoning_parser : (f._reasoning_parser_value || _detectReasoningParser(modelName) || '');
cmd += ` --reasoning-parser ${rp}`; if (rp) cmd += ` --reasoning-parser ${rp}`;
} }
if (f.speculative) { if (f.speculative) {
const _specMethod = (f.spec_method || 'mtp').trim() || 'mtp'; const _specMethod = (f.spec_method || 'mtp').trim() || 'mtp';
@@ -590,9 +615,11 @@ export function _buildServeCmd(f, modelName, backend) {
// The Inference mode pill (GPU/CPU) above gates this — when the user picks // The Inference mode pill (GPU/CPU) above gates this — when the user picks
// CPU, force ngl=0 here so all downstream flag-suppression fires // CPU, force ngl=0 here so all downstream flag-suppression fires
// consistently regardless of what the (now-hidden) ngl input shows. // consistently regardless of what the (now-hidden) ngl input shows.
if (String(f.llama_mode || '').toLowerCase() === 'cpu') { const _llamaMode = String(f.llama_mode || '').toLowerCase();
if (_llamaMode === 'unified') f.unified_mem = true;
if (_llamaMode === 'cpu') {
f.ngl = '0'; f.ngl = '0';
} else if (String(f.llama_mode || '').toLowerCase() === 'gpu' && (!f.ngl || String(f.ngl).trim() === '0')) { } else if (['gpu', 'unified'].includes(_llamaMode) && (!f.ngl || String(f.ngl).trim() === '0')) {
f.ngl = '99'; f.ngl = '99';
} }
const _cpuOnly = String(f.ngl).trim() === '0'; const _cpuOnly = String(f.ngl).trim() === '0';
@@ -616,7 +643,8 @@ export function _buildServeCmd(f, modelName, backend) {
})(); })();
if (f.unified_mem && !_cpuOnly && _isWindows() && _isCudaTarget) cmd += `$env:GGML_CUDA_ENABLE_UNIFIED_MEMORY="1"; `; if (f.unified_mem && !_cpuOnly && _isWindows() && _isCudaTarget) cmd += `$env:GGML_CUDA_ENABLE_UNIFIED_MEMORY="1"; `;
if (_isWindows() && !_cpuOnly) cmd += _gpuEnvPrefix(gpuId, true); if (_isWindows() && !_cpuOnly) cmd += _gpuEnvPrefix(gpuId, true);
const modelArg = `"${ggufPath}"`; const needsGgufPrelude = /^\$\(\{\s*find\s/.test(String(ggufPath || ''));
const modelArg = needsGgufPrelude ? '"$MODEL_FILE"' : `"${ggufPath}"`;
// Prefer native llama-server. The backend bootstrap resolves/builds the // Prefer native llama-server. The backend bootstrap resolves/builds the
// right binary (Vulkan/HIP/CUDA/Metal/CPU), so keep the generated command // right binary (Vulkan/HIP/CUDA/Metal/CPU), so keep the generated command
// as a validator-safe binary + args with no shell chaining. // as a validator-safe binary + args with no shell chaining.
@@ -692,6 +720,9 @@ export function _buildServeCmd(f, modelName, backend) {
} else { } else {
cmd += `${lcPrefix}llama-server --model ${modelArg} --host 0.0.0.0 --port ${f.port || '8080'} -ngl ${f.ngl || '99'} -c ${f.ctx || '8192'}${_lcExtra}`; cmd += `${lcPrefix}llama-server --model ${modelArg} --host 0.0.0.0 --port ${f.port || '8080'} -ngl ${f.ngl || '99'} -c ${f.ctx || '8192'}${_lcExtra}`;
} }
if (needsGgufPrelude) {
cmd = `MODEL_FILE=${ggufPath} && { [ -n "$MODEL_FILE" ] && [ -f "$MODEL_FILE" ]; } || { echo "ERROR: No GGUF found on this host"; exit 1; } && ${cmd}`;
}
} else if (backend === 'ollama') { } else if (backend === 'ollama') {
const ollamaPort = f.port || '11434'; const ollamaPort = f.port || '11434';
// GGUF + Ollama: delegate to the iGPU-bound ollama-test container via // GGUF + Ollama: delegate to the iGPU-bound ollama-test container via
@@ -860,7 +891,7 @@ async function _fetchDependencies() {
const _statusTag = (pkg, isLocal, isSystemDep, winBlocked) => { const _statusTag = (pkg, isLocal, isSystemDep, winBlocked) => {
if (winBlocked) return `<span class="cookbook-dep-tag cookbook-dep-na">N/A</span>`; if (winBlocked) return `<span class="cookbook-dep-tag cookbook-dep-na">N/A</span>`;
if (pkg.installed && isSystemDep) return `<span class="cookbook-dep-tag cookbook-dep-installed" title="Found on selected server">Installed</span>`; if (pkg.installed && isSystemDep) return `<span class="cookbook-dep-tag cookbook-dep-installed" title="Found on selected server">Installed</span>`;
if (pkg.installed && pkg.pip_update_available === false) { if (pkg.installed && pkg.pip_update_available === false && pkg.name !== 'llama_cpp') {
const tip = esc(pkg.update_note || pkg.status_note || 'Found externally; update outside Odysseus.'); const tip = esc(pkg.update_note || pkg.status_note || 'Found externally; update outside Odysseus.');
return `<span class="cookbook-dep-tag cookbook-dep-installed" title="${tip}">Installed</span>`; return `<span class="cookbook-dep-tag cookbook-dep-installed" title="${tip}">Installed</span>`;
} }
@@ -902,9 +933,7 @@ async function _fetchDependencies() {
// diagnosis-style `_launchServeTask` with `pip install --force-reinstall` // diagnosis-style `_launchServeTask` with `pip install --force-reinstall`
// so the user can watch the pip install in the Running tab. // so the user can watch the pip install in the Running tab.
let _rebuildBtn = ''; let _rebuildBtn = '';
if (pkg.name === 'llama_cpp') { if (pkg.name === 'vllm' && pkg.installed) {
_rebuildBtn = `<button type="button" class="cookbook-dep-tag cookbook-dep-rebuild" id="cookbook-rebuild-engine" title="Clear the cached llama.cpp build so the next serve recompiles from source (use after installing a CUDA/ROCm toolkit to turn a CPU-only build into a GPU build).">Rebuild</button>`;
} else if (pkg.name === 'vllm' && pkg.installed) {
_rebuildBtn = `<button type="button" class="cookbook-dep-tag cookbook-dep-rebuild cookbook-dep-reinstall" data-reinstall-pkg="vllm" title="Force-reinstall vLLM (pulls a matching torch). Runs as a tmux task in the Running tab.">Reinstall</button>`; _rebuildBtn = `<button type="button" class="cookbook-dep-tag cookbook-dep-rebuild cookbook-dep-reinstall" data-reinstall-pkg="vllm" title="Force-reinstall vLLM (pulls a matching torch). Runs as a tmux task in the Running tab.">Reinstall</button>`;
} else if (pkg.name === 'sglang' && pkg.installed) { } else if (pkg.name === 'sglang' && pkg.installed) {
_rebuildBtn = `<button type="button" class="cookbook-dep-tag cookbook-dep-rebuild cookbook-dep-reinstall" data-reinstall-pkg="sglang" title="Force-reinstall SGLang (pulls a matching torch). Runs as a tmux task in the Running tab.">Reinstall</button>`; _rebuildBtn = `<button type="button" class="cookbook-dep-tag cookbook-dep-rebuild cookbook-dep-reinstall" data-reinstall-pkg="sglang" title="Force-reinstall SGLang (pulls a matching torch). Runs as a tmux task in the Running tab.">Reinstall</button>`;
@@ -1410,12 +1439,59 @@ async function _fetchDependencies() {
}); });
// Wire the ⋮ menu on installed packages — currently just "Update". async function _rebuildLlamaCpp(updateSource = false, statusEl = null) {
const sel = document.getElementById('hwfit-deps-server');
if (sel) _applyServerSelection(sel.value);
const host = _envState.remoteHost || '';
const where = host || 'this server';
const action = updateSource ? 'Update llama.cpp source and rebuild' : 'Rebuild llama.cpp engine';
const detail = updateSource
? 'This fast-forwards the Cookbook-managed ~/llama.cpp checkout when possible, then clears the cached llama-server build. The next launch recompiles or installs the latest matching prebuilt.'
: 'This clears the cached llama-server build. The next launch recompiles or installs a matching prebuilt.';
if (!confirm(`${action} on ${where}?\n\n${detail}`)) return;
const oldText = statusEl?.textContent;
if (statusEl) {
statusEl.disabled = true;
statusEl.textContent = updateSource ? 'Updating...' : 'Clearing...';
}
try {
const res = await fetch('/api/cookbook/rebuild-engine', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
engine: 'llamacpp',
remote_host: host || undefined,
ssh_port: _getPort(host) || undefined,
update_source: !!updateSource,
}),
});
const data = await res.json().catch(() => ({}));
if (!res.ok || !data.ok) {
const reason = data.detail || data.error || `HTTP ${res.status}`;
uiModule.showToast(`${updateSource ? 'Update' : 'Rebuild'} failed: ` + String(reason).slice(0, 300), {
duration: 20000, action: 'OK', onAction: () => {},
});
} else {
uiModule.showToast(`${updateSource ? 'Updated source and cleared' : 'Cleared'} llama.cpp build on ${where}. Re-launch the serve task to rebuild.`);
}
} catch (err) {
uiModule.showToast(`${updateSource ? 'Update' : 'Rebuild'} failed: ` + err.message);
} finally {
if (statusEl) {
statusEl.disabled = false;
statusEl.textContent = oldText;
}
}
}
window._cookbookRebuildLlamaCpp = _rebuildLlamaCpp;
// Wire the installed-package menu.
function _showDepMenu(anchor) { function _showDepMenu(anchor) {
document.querySelectorAll('.cookbook-dep-menu').forEach(d => d.remove()); document.querySelectorAll('.cookbook-dep-menu').forEach(d => d.remove());
const row = anchor.closest('.cookbook-dep-row'); const row = anchor.closest('.cookbook-dep-row');
if (!row) return; if (!row) return;
const pipName = row.dataset.depPip; const pipName = row.dataset.depPip;
const rowPkgName = row.dataset.pkgName || '';
const pkgName = row.querySelector('.memory-item-title')?.textContent || pipName; const pkgName = row.querySelector('.memory-item-title')?.textContent || pipName;
const isLocalOnly = row.dataset.depTarget === 'local'; const isLocalOnly = row.dataset.depTarget === 'local';
const dropdown = document.createElement('div'); const dropdown = document.createElement('div');
@@ -1436,6 +1512,29 @@ async function _fetchDependencies() {
await _installDep(pipName, pkgName, isLocalOnly, true, null); await _installDep(pipName, pkgName, isLocalOnly, true, null);
}); });
dropdown.appendChild(it); dropdown.appendChild(it);
if (rowPkgName === 'llama_cpp') {
const rebuildIco = '<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M21 12a9 9 0 1 1-2.64-6.36"/><path d="M21 3v6h-6"/></svg>';
const rebuild = document.createElement('div');
rebuild.className = 'dropdown-item-compact';
rebuild.innerHTML = `<span class="dropdown-icon">${rebuildIco}</span><span>Rebuild</span>`;
rebuild.title = 'Clear the cached llama-server build so the next launch rebuilds it.';
rebuild.addEventListener('click', async (e) => {
e.stopPropagation();
dropdown.remove();
await _rebuildLlamaCpp(false, null);
});
dropdown.appendChild(rebuild);
const source = document.createElement('div');
source.className = 'dropdown-item-compact';
source.innerHTML = `<span class="dropdown-icon">${upIco}</span><span>Update source + rebuild</span>`;
source.title = 'Fast-forward ~/llama.cpp when possible, then clear the cached build.';
source.addEventListener('click', async (e) => {
e.stopPropagation();
dropdown.remove();
await _rebuildLlamaCpp(true, null);
});
dropdown.appendChild(source);
}
document.body.appendChild(dropdown); document.body.appendChild(dropdown);
const close = (ev) => { const close = (ev) => {
if (!dropdown.contains(ev.target) && ev.target !== anchor && !anchor.contains(ev.target)) { if (!dropdown.contains(ev.target) && ev.target !== anchor && !anchor.contains(ev.target)) {
@@ -1698,33 +1797,7 @@ function _wireTabEvents(body) {
if (sel) _applyServerSelection(sel.value); if (sel) _applyServerSelection(sel.value);
const host = _envState.remoteHost || ''; const host = _envState.remoteHost || '';
const where = host || 'this server'; const where = host || 'this server';
if (!confirm(`Rebuild the llama.cpp engine on ${where}?\n\nThis clears the cached llama-server build so the next serve recompiles from source (with CUDA/HIP if a toolchain is present). It does not download or install anything.`)) return; if (window._cookbookRebuildLlamaCpp) await window._cookbookRebuildLlamaCpp(false, rebuildBtn);
const _label = rebuildBtn.textContent;
rebuildBtn.disabled = true;
rebuildBtn.textContent = 'Clearing...';
try {
const res = await fetch('/api/cookbook/rebuild-engine', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
engine: 'llamacpp',
remote_host: host || undefined,
ssh_port: _getPort(host) || undefined,
}),
});
const data = await res.json().catch(() => ({}));
if (!res.ok || !data.ok) {
const reason = data.detail || data.error || `HTTP ${res.status}`;
uiModule.showToast('Rebuild failed: ' + String(reason).slice(0, 200));
} else {
uiModule.showToast(`Cleared llama.cpp build on ${where}. Re-launch the serve task to rebuild with GPU support.`);
}
} catch (err) {
uiModule.showToast('Rebuild failed: ' + err.message);
} finally {
rebuildBtn.disabled = false;
rebuildBtn.textContent = _label;
}
}); });
} }
@@ -1835,6 +1908,9 @@ function _wireTabEvents(body) {
// Download input // Download input
const dlBtn = document.getElementById('cookbook-dl-btn'); const dlBtn = document.getElementById('cookbook-dl-btn');
const dlInput = document.getElementById('cookbook-dl-repo'); const dlInput = document.getElementById('cookbook-dl-repo');
const dlGgufRow = document.getElementById('cookbook-dl-gguf-row');
const dlGgufQuant = document.getElementById('cookbook-dl-gguf-quant');
const dlGgufNote = document.getElementById('cookbook-dl-gguf-note');
const dlCardToggle = document.getElementById('cookbook-download-card-toggle'); const dlCardToggle = document.getElementById('cookbook-download-card-toggle');
const dlCardBody = document.getElementById('cookbook-download-card-body'); const dlCardBody = document.getElementById('cookbook-download-card-body');
const dlCardArrow = document.getElementById('cookbook-download-card-arrow'); const dlCardArrow = document.getElementById('cookbook-download-card-arrow');
@@ -1868,6 +1944,87 @@ function _wireTabEvents(body) {
if (hfMatch) repo = hfMatch[1]; if (hfMatch) repo = hfMatch[1];
return repo; return repo;
} }
function _ggufQuantFromPath(path) {
const clean = String(path || '').split('?')[0];
const parts = clean.split('/').filter(Boolean);
const dir = parts.length > 1 ? parts[0] : '';
const file = parts[parts.length - 1] || clean;
const dirQuant = dir.match(/^(?:I?Q\d(?:_[A-Z0-9]+){0,3}|UD-[A-Z0-9_]+)$/i);
if (dirQuant) return dirQuant[0].toUpperCase();
const fileQuant = file.match(/(?:^|[-_.\/])((?:I?Q\d(?:_[A-Z0-9]+){0,3})|(?:UD-[A-Z0-9_]+))(?=(?:[-_.]|\.gguf|$))/i);
return fileQuant ? fileQuant[1].toUpperCase() : '';
}
function _ggufIncludeForQuant(files, quant) {
const matches = files.filter(f => _ggufQuantFromPath(f) === quant);
if (!matches.length) return '';
const dirs = Array.from(new Set(matches.map(f => f.includes('/') ? f.split('/').slice(0, -1).join('/') : '')));
if (dirs.length === 1) {
const prefix = dirs[0] ? `${dirs[0]}/` : '';
return `${prefix}*${quant}*.gguf`;
}
return `*${quant}*.gguf`;
}
function _hideGgufPicker(message = '') {
if (dlGgufRow) dlGgufRow.style.display = 'none';
if (dlGgufQuant) {
dlGgufQuant.innerHTML = '';
dlGgufQuant.dataset.repo = '';
}
if (dlGgufNote) dlGgufNote.textContent = message;
}
async function _scanGgufRepo(rawValue) {
if (!dlGgufRow || !dlGgufQuant || !dlGgufNote) return false;
const rawRepo = _stripHfUrl(rawValue || '');
const ollamaName = _ollamaName(rawRepo);
const fileSplit = !ollamaName ? _splitRepoFile(rawRepo) : null;
const split = ollamaName ? { repo: ollamaName, include: null } : (fileSplit || _splitRepoTag(rawRepo));
const repo = split.repo || '';
if (ollamaName || split.include || !/^[^\s/]+\/[^\s/]+$/.test(repo)) {
_hideGgufPicker();
return false;
}
dlGgufRow.style.display = 'flex';
dlGgufQuant.innerHTML = '<option value="">Scanning...</option>';
dlGgufQuant.dataset.repo = repo;
dlGgufNote.textContent = '';
try {
const res = await fetch(`/api/cookbook/hf-gguf-files?repo_id=${encodeURIComponent(repo)}`, { credentials: 'same-origin' });
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const data = await res.json();
if (!data.ok) throw new Error(data.error || 'scan failed');
if (dlGgufQuant.dataset.repo !== repo) return false;
const files = (data.files || [])
.map(s => String(s || ''))
.filter(name => /\.gguf$/i.test(name));
const byQuant = new Map();
files.forEach(name => {
const quant = _ggufQuantFromPath(name);
if (!quant) return;
if (!byQuant.has(quant)) byQuant.set(quant, []);
byQuant.get(quant).push(name);
});
if (!byQuant.size) {
_hideGgufPicker('No GGUF quants found');
return false;
}
const quantRank = q => {
const m = q.match(/^I?Q(\d)/i);
return m ? Number(m[1]) : 99;
};
const quants = Array.from(byQuant.keys()).sort((a, b) => quantRank(a) - quantRank(b) || a.localeCompare(b));
dlGgufQuant.innerHTML = quants.map(q => {
const include = _ggufIncludeForQuant(files, q);
const count = byQuant.get(q).length;
return `<option value="${esc(include)}">${esc(q)} (${count})</option>`;
}).join('');
const first = dlGgufQuant.options[0];
dlGgufNote.textContent = first ? first.value : '';
return !!(first && first.value);
} catch (err) {
_hideGgufPicker(`GGUF scan failed: ${err.message || err}`);
return false;
}
}
// Split `org/repo:tag` (Ollama/llama.cpp style) into repo + include-glob. // Split `org/repo:tag` (Ollama/llama.cpp style) into repo + include-glob.
// The `:tag` picks a specific GGUF quantization file from the repo. // The `:tag` picks a specific GGUF quantization file from the repo.
function _splitRepoTag(raw) { function _splitRepoTag(raw) {
@@ -1902,7 +2059,7 @@ function _wireTabEvents(body) {
} }
return null; return null;
} }
const triggerDownload = () => { const triggerDownload = async () => {
const rawRepo = _stripHfUrl(dlInput.value); const rawRepo = _stripHfUrl(dlInput.value);
if (!rawRepo) return; if (!rawRepo) return;
const ollamaName = _ollamaName(rawRepo); const ollamaName = _ollamaName(rawRepo);
@@ -1914,6 +2071,9 @@ function _wireTabEvents(body) {
const { repo, include: autoInclude } = ollamaName const { repo, include: autoInclude } = ollamaName
? { repo: ollamaName, include: null } ? { repo: ollamaName, include: null }
: (_fileSplit || _splitRepoTag(rawRepo)); : (_fileSplit || _splitRepoTag(rawRepo));
let pickerInclude = (!ollamaName && !_fileSplit && !autoInclude && dlGgufQuant?.dataset.repo === repo)
? (dlGgufQuant.value || '')
: '';
// HuggingFace repo IDs must be `org/model`. A bare model name would 404 // HuggingFace repo IDs must be `org/model`. A bare model name would 404
// at snapshot_download time with a raw traceback, so reject it up front. // at snapshot_download time with a raw traceback, so reject it up front.
// Ollama names (single-segment with a tag) skip this check — they go // Ollama names (single-segment with a tag) skip this check — they go
@@ -1923,6 +2083,25 @@ function _wireTabEvents(body) {
dlInput.focus(); dlInput.focus();
return; return;
} }
const looksGgufRepo = !ollamaName && !_fileSplit && !autoInclude && /\bgguf\b/i.test(repo);
if (looksGgufRepo && !pickerInclude) {
const oldText = dlBtn.textContent;
dlBtn.disabled = true;
dlBtn.textContent = 'Scanning...';
try {
const found = await _scanGgufRepo(rawRepo);
pickerInclude = (found && dlGgufQuant?.dataset.repo === repo) ? (dlGgufQuant.value || '') : '';
} finally {
dlBtn.disabled = false;
dlBtn.textContent = oldText;
}
if (!pickerInclude) {
uiModule.showToast('Pick a GGUF quant first. Odysseus will not download the whole GGUF repo without an include pattern.');
return;
}
uiModule.showToast('Pick the GGUF quant, then press Download again.');
return;
}
// Resolve the host straight from THIS window's server dropdown, by index // Resolve the host straight from THIS window's server dropdown, by index
// into the (consistent) servers list. We deliberately don't use // into the (consistent) servers list. We deliberately don't use
// _envState.remoteHost — there can be multiple copies of the cookbook // _envState.remoteHost — there can be multiple copies of the cookbook
@@ -1939,7 +2118,7 @@ function _wireTabEvents(body) {
let envPath = host ? (_hsrv.envPath || '') : _envState.envPath; let envPath = host ? (_hsrv.envPath || '') : _envState.envPath;
const payload = { repo_id: repo }; const payload = { repo_id: repo };
if (ollamaName) payload.backend = 'ollama'; if (ollamaName) payload.backend = 'ollama';
if (autoInclude) payload.include = autoInclude; if (autoInclude || pickerInclude) payload.include = autoInclude || pickerInclude;
if (_envState.hfToken && !ollamaName) payload.hf_token = _envState.hfToken; if (_envState.hfToken && !ollamaName) payload.hf_token = _envState.hfToken;
if (host) { payload.remote_host = host; const _sp3 = _getPort(host); if (_sp3) payload.ssh_port = _sp3; } if (host) { payload.remote_host = host; const _sp3 = _getPort(host); if (_sp3) payload.ssh_port = _sp3; }
const srvPlatform = _getPlatform(host); const srvPlatform = _getPlatform(host);
@@ -1966,6 +2145,16 @@ function _wireTabEvents(body) {
dlInput.addEventListener('keydown', (e) => { dlInput.addEventListener('keydown', (e) => {
if (e.key === 'Enter') triggerDownload(); if (e.key === 'Enter') triggerDownload();
}); });
let _ggufScanTimer = null;
const _scheduleGgufScan = () => {
clearTimeout(_ggufScanTimer);
_ggufScanTimer = setTimeout(() => _scanGgufRepo(dlInput.value), 350);
};
dlInput.addEventListener('input', _scheduleGgufScan);
dlInput.addEventListener('blur', () => _scanGgufRepo(dlInput.value));
dlGgufQuant?.addEventListener('change', () => {
if (dlGgufNote) dlGgufNote.textContent = dlGgufQuant.value || '';
});
} }
// Latest HF models that fit — collapsible card list // Latest HF models that fit — collapsible card list
@@ -2095,12 +2284,6 @@ function _wireTabEvents(body) {
return data.models || []; return data.models || [];
}; };
let models = await _fetchLatest(vram); let models = await _fetchLatest(vram);
// If the VRAM filter wiped everything out (often a flaky/zero hardware
// probe for a remote server — a huge-VRAM box should fit MORE, not
// fewer), fall back to the unfiltered trending list so something shows.
if (!models.length && vram > 0) {
models = await _fetchLatest(0);
}
if (['rocm', 'metal', 'mps', 'apple', 'generic', 'cpu'].includes(hwInfo.backend)) { if (['rocm', 'metal', 'mps', 'apple', 'generic', 'cpu'].includes(hwInfo.backend)) {
models = models.filter(m => !_hfModelLooksAwqLike(m)); models = models.filter(m => !_hfModelLooksAwqLike(m));
} }
@@ -2438,6 +2621,11 @@ function _renderRecipes() {
html += `<input type="text" class="cookbook-dl-repo" id="cookbook-dl-repo" placeholder="org/model-name, qwen2.5:14b, or HF URL" style="flex:1;min-width:0;" />`; html += `<input type="text" class="cookbook-dl-repo" id="cookbook-dl-repo" placeholder="org/model-name, qwen2.5:14b, or HF URL" style="flex:1;min-width:0;" />`;
html += `<button class="cookbook-btn cookbook-dl-btn" id="cookbook-dl-btn">Download</button>`; html += `<button class="cookbook-btn cookbook-dl-btn" id="cookbook-dl-btn">Download</button>`;
html += `</div>`; html += `</div>`;
html += `<div id="cookbook-dl-gguf-row" style="display:none;margin-top:1px;gap:5px;align-items:center;font-size:11px;">`;
html += `<span style="opacity:0.65;flex-shrink:0;">GGUF</span>`;
html += `<select class="cookbook-field-input" id="cookbook-dl-gguf-quant" style="height:28px;min-width:118px;flex:0 0 auto;"></select>`;
html += `<span id="cookbook-dl-gguf-note" style="opacity:0.55;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;"></span>`;
html += `</div>`;
// Ollama-library browse used to live here as its own collapsible dropdown, // Ollama-library browse used to live here as its own collapsible dropdown,
// but that duplicated the Engine filter (which already has Ollama). The // but that duplicated the Engine filter (which already has Ollama). The
// standalone UI is gone — to find Ollama models, set Engine = Ollama in // standalone UI is gone — to find Ollama models, set Engine = Ollama in
@@ -2454,7 +2642,7 @@ function _renderRecipes() {
html += `<span id="cookbook-hf-latest-arrow" style="display:inline-block;transition:transform 0.15s;pointer-events:none;opacity:0.6;font-size:11px;">\u25B8</span>`; html += `<span id="cookbook-hf-latest-arrow" style="display:inline-block;transition:transform 0.15s;pointer-events:none;opacity:0.6;font-size:11px;">\u25B8</span>`;
html += `</button>`; html += `</button>`;
html += `</div>`; html += `</div>`;
html += `<div id="cookbook-hf-latest-list" style="display:none;margin-top:4px;max-height:320px;overflow-y:auto;flex-direction:column;gap:4px;"></div>`; html += `<div id="cookbook-hf-latest-list" style="display:none;margin-top:4px;max-height:320px;overflow-y:auto;overscroll-behavior:contain;flex-direction:column;gap:4px;"></div>`;
html += `</div>`; html += `</div>`;
html += `</div>`; // /#cookbook-dl-tab-fold-body (whole Download card body) html += `</div>`; // /#cookbook-dl-tab-fold-body (whole Download card body)
@@ -2884,6 +3072,7 @@ const shared = {
_getPort, _getPort,
_sshPrefix, _sshPrefix,
_serverByVal, _serverByVal,
_serverKey,
_selectedServer, _selectedServer,
_getPlatform, _getPlatform,
_isWindows, _isWindows,
+52 -27
View File
@@ -27,6 +27,9 @@ function _statusLabel(status, type) {
// "cookbook-task-status" ('' = the neutral loading style). // "cookbook-task-status" ('' = the neutral loading style).
function _taskBadge(task) { function _taskBadge(task) {
if (task._unreachable && task.status === 'running') return { text: 'unreachable', cls: 'cookbook-task-error' }; if (task._unreachable && task.status === 'running') return { text: 'unreachable', cls: 'cookbook-task-error' };
if (task.type === 'download' && task.status === 'running') {
return { text: _statusLabel(task.status, task.type), cls: 'cookbook-task-downloading' };
}
if (task.type === 'serve' && task.status === 'running' && task.progress) { if (task.type === 'serve' && task.status === 'running' && task.progress) {
// Same green "running" pill — just with dynamic phase text, so it doesn't // Same green "running" pill — just with dynamic phase text, so it doesn't
// read as a different status while the server is coming up. // read as a different status while the server is coming up.
@@ -52,13 +55,13 @@ function _downloadOutputLooksActive(task) {
function _canClearTask(task) { function _canClearTask(task) {
if (!task || task.status === 'running') return false; if (!task || task.status === 'running') return false;
if (task.type === 'serve' && (task.status === 'ready' || task._serveReady)) return false; if (task.type === 'serve' && (task.status === 'ready' || (task._serveReady && !['stopped', 'error', 'crashed', 'failed', 'completed'].includes(task.status)))) return false;
// If the tmux output still shows an in-flight download, the task isn't // If the tmux output still shows an in-flight download, the task isn't
// actually finished — hide the clear/check pill so it doesn't show on a // actually finished — hide the clear/check pill so it doesn't show on a
// task that's still doing work. (The next render will reflect this and // task that's still doing work. (The next render will reflect this and
// ideally the self-heal flips status back to running.) // ideally the self-heal flips status back to running.)
if (_downloadOutputLooksActive(task)) return false; if (_downloadOutputLooksActive(task)) return false;
return ['done', 'stopped', 'error', 'crashed', 'failed'].includes(task.status); return ['done', 'completed', 'stopped', 'error', 'crashed', 'failed'].includes(task.status);
} }
function _clearPillLabel(task) { function _clearPillLabel(task) {
@@ -66,6 +69,13 @@ function _clearPillLabel(task) {
return 'clear'; return 'clear';
} }
function _venvRootFromPath(path) {
let p = (path || '').toString().trim().replace(/\/+$/, '');
if (!p) return '';
p = p.replace(/\/bin\/(?:activate|python(?:3(?:\.\d+)?)?|vllm|pip(?:3)?)$/i, '');
return p;
}
// A pip dependency/driver install (payload._dep) reports success with the // A pip dependency/driver install (payload._dep) reports success with the
// runner's "=== Process exited with code 0 ===" sentinel and pip's // runner's "=== Process exited with code 0 ===" sentinel and pip's
// "Successfully installed" line — never the HuggingFace download markers // "Successfully installed" line — never the HuggingFace download markers
@@ -263,6 +273,7 @@ let _copyText;
let _persistEnvState; let _persistEnvState;
let _refreshDependencies; let _refreshDependencies;
let _serverByVal; let _serverByVal;
let _serverKey;
let _selectedServer; let _selectedServer;
let modelLogo; let modelLogo;
let esc; let esc;
@@ -688,8 +699,10 @@ export function _saveTasks(tasks) {
export function _addTask(sessionId, name, type, payload) { export function _addTask(sessionId, name, type, payload) {
let tasks = _loadTasks(); let tasks = _loadTasks();
const remoteHost = (payload && payload.remote_host) || _envState.remoteHost || ''; const remoteHost = (payload && payload.remote_host) || _envState.remoteHost || '';
const sshPort = (payload && payload.ssh_port) || _getPort(remoteHost) || ''; const remoteServerKey = (payload && payload.remote_server_key) || '';
const platform = (payload && payload.platform) || _getPlatform(remoteHost) || ''; const remoteServerName = (payload && payload.remote_server_name) || '';
const sshPort = (payload && payload.ssh_port) || _getPort(remoteServerKey || remoteHost) || '';
const platform = (payload && payload.platform) || _getPlatform(remoteServerKey || remoteHost) || '';
// Serving a model supersedes its finished download — clear the matching // Serving a model supersedes its finished download — clear the matching
// finished download card (covers serving directly from the Serve tab, not just // finished download card (covers serving directly from the Serve tab, not just
// via the download card's "Serve →" button). // via the download card's "Serve →" button).
@@ -704,7 +717,7 @@ export function _addTask(sessionId, name, type, payload) {
return !(key && t.type === 'download' && t.status === 'queued' && _downloadDedupeKey(t) === key); return !(key && t.type === 'download' && t.status === 'queued' && _downloadDedupeKey(t) === key);
}); });
} }
const task = _stripTaskSecrets({ id: sessionId, sessionId, name, type, status: 'running', output: '', ts: Date.now(), payload: payload || null, remoteHost, sshPort, platform }); const task = _stripTaskSecrets({ id: sessionId, sessionId, name, type, status: 'running', output: '', ts: Date.now(), payload: payload || null, remoteHost, remoteServerKey, remoteServerName, sshPort, platform });
tasks.push(task); tasks.push(task);
_saveTasks(tasks); _saveTasks(tasks);
// New action → collapse all other cards, leave only this one open. // New action → collapse all other cards, leave only this one open.
@@ -1520,14 +1533,18 @@ function _parseServeCmdToFields(cmd) {
return fields; return fields;
} }
export async function _launchServeTask(shortName, repo, cmd, fields, hostOverride) { export async function _launchServeTask(shortName, repo, cmd, fields, hostOverride, targetMeta = null) {
// Host resolution mirrors the download path: when the caller passes an explicit // Host resolution mirrors the download path: when the caller passes an explicit
// host (resolved from the dropdown the user actually picked), use it and look // host (resolved from the dropdown the user actually picked), use it and look
// up that server's port/platform from the shared servers list. Only fall back // up that server's port/platform from the shared servers list. Only fall back
// to _envState.remoteHost for legacy callers (diagnosis/pip-update). // to _envState.remoteHost for legacy callers (diagnosis/pip-update).
const _host = (hostOverride !== undefined) ? (hostOverride || '') : (_envState.remoteHost || ''); const _host = (hostOverride !== undefined) ? (hostOverride || '') : (_envState.remoteHost || '');
const _hsrv = _serverByVal(_envState.remoteServerKey || _host) const _targetKey = targetMeta?.serverKey || '';
const _hsrv = (_targetKey && _targetKey !== 'local' ? _serverByVal(_targetKey) : null)
|| (hostOverride === undefined ? _serverByVal(_envState.remoteServerKey || _host) : null)
|| _envState.servers.find(s => s.host === _host) || {}; || _envState.servers.find(s => s.host === _host) || {};
const _serverMetaKey = _targetKey || (_hsrv && _serverKey ? _serverKey(_hsrv) : '') || (_host || 'local');
const _serverMetaName = targetMeta?.serverName || _hsrv.name || (_host ? _host : 'Local');
const _hplatform = _host ? (_hsrv.platform || '') : (_envState.platform || ''); const _hplatform = _host ? (_hsrv.platform || '') : (_envState.platform || '');
// Replace any serve already targeting this same host:port — you can't run two // Replace any serve already targeting this same host:port — you can't run two
@@ -1572,7 +1589,7 @@ export async function _launchServeTask(shortName, repo, cmd, fields, hostOverrid
} }
} else { } else {
if (_envState.env === 'venv' && _envState.envPath) { if (_envState.env === 'venv' && _envState.envPath) {
const p = _envState.envPath; const p = _venvRootFromPath(_envState.envPath);
envPrefix = 'source ' + (p.endsWith('/bin/activate') ? p : p + '/bin/activate'); envPrefix = 'source ' + (p.endsWith('/bin/activate') ? p : p + '/bin/activate');
} else if (_envState.env === 'conda' && _envState.envPath) { } else if (_envState.env === 'conda' && _envState.envPath) {
envPrefix = 'eval "$(conda shell.bash hook)" && conda activate ' + _envState.envPath; envPrefix = 'eval "$(conda shell.bash hook)" && conda activate ' + _envState.envPath;
@@ -1583,7 +1600,7 @@ export async function _launchServeTask(shortName, repo, cmd, fields, hostOverrid
repo_id: repo, repo_id: repo,
cmd: cmd, cmd: cmd,
remote_host: _host || undefined, remote_host: _host || undefined,
ssh_port: _getPort(_host) || undefined, ssh_port: _getPort(_serverMetaKey || _host) || undefined,
env_prefix: envPrefix || undefined, env_prefix: envPrefix || undefined,
hf_token: _envState.hfToken || undefined, hf_token: _envState.hfToken || undefined,
gpus: _envState.gpus || undefined, gpus: _envState.gpus || undefined,
@@ -1607,11 +1624,11 @@ export async function _launchServeTask(shortName, repo, cmd, fields, hostOverrid
return; return;
} }
const _sp = _getPort(_host); const _sp = _getPort(_serverMetaKey || _host);
// _fields = the exact structured serve-form values used for this launch, // _fields = the exact structured serve-form values used for this launch,
// so the "Edit / relaunch" button can re-open the Serve panel pre-filled // so the "Edit / relaunch" button can re-open the Serve panel pre-filled
// with these precise settings (not just the last-used-for-repo state). // with these precise settings (not just the last-used-for-repo state).
const payload = { repo_id: repo, remote_host: _host || undefined, ssh_port: _sp || undefined, _cmd: cmd, _fields: fields || undefined, _env: _usedEnv, _envPath: _usedEnvPath, _gpus: _usedGpus }; const payload = { repo_id: repo, remote_host: _host || undefined, remote_server_key: _serverMetaKey || undefined, remote_server_name: _serverMetaName || undefined, ssh_port: _sp || undefined, _cmd: cmd, _fields: fields || undefined, _env: _usedEnv, _envPath: _usedEnvPath, _gpus: _usedGpus };
_addTask(data.session_id, shortName, 'serve', payload); _addTask(data.session_id, shortName, 'serve', payload);
uiModule.showToast(`Serving ${shortName}...`); uiModule.showToast(`Serving ${shortName}...`);
// Auto-register may have enabled an existing (offline) endpoint for this // Auto-register may have enabled an existing (offline) endpoint for this
@@ -1760,16 +1777,25 @@ export function _renderRunningTab() {
} }
// Group tasks by server // Group tasks by server
const _serverName = (host) => { const _taskServerKey = (task) => task?.remoteServerKey || task?.remoteHost || '';
if (!host) return 'Local'; const _serverName = (keyOrTask) => {
const srv = _serverByVal(_envState.remoteServerKey || host) if (keyOrTask && typeof keyOrTask === 'object') {
|| _envState.servers.find(s => s.host === host); const task = keyOrTask;
return srv?.name || host; if (task.remoteServerName) return task.remoteServerName;
const srv = task.remoteServerKey ? _serverByVal(task.remoteServerKey) : null;
if (srv?.name) return srv.name;
if (!task.remoteHost) return 'Local';
return (_envState.servers.find(s => s.host === task.remoteHost)?.name) || task.remoteHost;
}
const key = keyOrTask || '';
if (!key || key === 'local') return 'Local';
const srv = _serverByVal(key);
return srv?.name || key;
}; };
const serverGroups = {}; const serverGroups = {};
for (const t of tasks) { for (const t of tasks) {
const key = t.remoteHost || ''; const key = _taskServerKey(t);
if (!serverGroups[key]) serverGroups[key] = { name: _serverName(key), serve: [], download: [] }; if (!serverGroups[key]) serverGroups[key] = { name: _serverName(t), serve: [], download: [] };
serverGroups[key][t.type === 'serve' ? 'serve' : 'download'].push(t); serverGroups[key][t.type === 'serve' ? 'serve' : 'download'].push(t);
} }
@@ -1816,12 +1842,12 @@ export function _renderRunningTab() {
e.stopPropagation(); // don't toggle the section collapse (was an inline onclick, blocked by CSP) e.stopPropagation(); // don't toggle the section collapse (was an inline onclick, blocked by CSP)
const host = btn.dataset.clearServer; const host = btn.dataset.clearServer;
const allTasks = _loadTasks(); const allTasks = _loadTasks();
const toRemove = allTasks.filter(t => (t.remoteHost || '') === host && _canClearTask(t)); const toRemove = allTasks.filter(t => _taskServerKey(t) === host && _canClearTask(t));
// Bail with a clear message instead of silently doing nothing when // Bail with a clear message instead of silently doing nothing when
// every task on this server is still running (nothing finished to // every task on this server is still running (nothing finished to
// clear yet) — the previous behavior looked like the button was dead. // clear yet) — the previous behavior looked like the button was dead.
if (!toRemove.length) { if (!toRemove.length) {
const stillRunning = allTasks.filter(t => (t.remoteHost || '') === host && t.status === 'running').length; const stillRunning = allTasks.filter(t => _taskServerKey(t) === host && t.status === 'running').length;
const _msg = stillRunning const _msg = stillRunning
? `No finished tasks on ${_serverName(host)}${stillRunning} still running. Stop them first to clear.` ? `No finished tasks on ${_serverName(host)}${stillRunning} still running. Stop them first to clear.`
: `No finished tasks on ${_serverName(host)}.`; : `No finished tasks on ${_serverName(host)}.`;
@@ -1830,7 +1856,7 @@ export function _renderRunningTab() {
return; return;
} }
if (!await window.styledConfirm(`Clear ${toRemove.length} finished task${toRemove.length === 1 ? '' : 's'} on ${_serverName(host)}?`, { confirmText: 'Clear' })) return; if (!await window.styledConfirm(`Clear ${toRemove.length} finished task${toRemove.length === 1 ? '' : 's'} on ${_serverName(host)}?`, { confirmText: 'Clear' })) return;
const remaining = allTasks.filter(t => (t.remoteHost || '') !== host || !_canClearTask(t)); const remaining = allTasks.filter(t => _taskServerKey(t) !== host || !_canClearTask(t));
_saveTasks(remaining); _saveTasks(remaining);
// Fade/slide each finished card out (same exit as the per-card clear) // Fade/slide each finished card out (same exit as the per-card clear)
// instead of yanking them instantly. // instead of yanking them instantly.
@@ -1864,7 +1890,7 @@ export function _renderRunningTab() {
btn.addEventListener('click', async (e) => { btn.addEventListener('click', async (e) => {
e.stopPropagation(); // don't toggle the section collapse e.stopPropagation(); // don't toggle the section collapse
const host = btn.dataset.stopServer; const host = btn.dataset.stopServer;
const running = _loadTasks().filter(t => (t.remoteHost || '') === host && t.status === 'running'); const running = _loadTasks().filter(t => _taskServerKey(t) === host && t.status === 'running');
if (!running.length) { uiModule.showToast(`Nothing running on ${_serverName(host)}`); return; } if (!running.length) { uiModule.showToast(`Nothing running on ${_serverName(host)}`); return; }
if (!await window.styledConfirm(`Stop ${running.length} running task${running.length > 1 ? 's' : ''} on ${_serverName(host)}?`, { confirmText: 'Stop all' })) return; if (!await window.styledConfirm(`Stop ${running.length} running task${running.length > 1 ? 's' : ''} on ${_serverName(host)}?`, { confirmText: 'Stop all' })) return;
// Mark every task as user-stopped BEFORE firing the kills so that the // Mark every task as user-stopped BEFORE firing the kills so that the
@@ -2177,9 +2203,6 @@ export function _renderRunningTab() {
if (task.status !== 'running' && task.status !== 'queued') { if (task.status !== 'running' && task.status !== 'queued') {
items.push({ group: 'run', label: 'Reconnect tmux', action: 'reconnect' }); items.push({ group: 'run', label: 'Reconnect tmux', action: 'reconnect' });
} }
if (task.status === 'running') {
items.push({ group: 'run', label: 'Stop', action: 'stop', danger: true });
}
items.push({ group: 'run', label: 'Restart', action: 'retry' }); items.push({ group: 'run', label: 'Restart', action: 'retry' });
// ── Edit section ──────────────────────────────────────────── // ── Edit section ────────────────────────────────────────────
// Merged "Edit & relaunch" — opens the structured serve panel // Merged "Edit & relaunch" — opens the structured serve panel
@@ -2539,7 +2562,7 @@ export function _renderRunningTab() {
}); });
// Route to the right server section body // Route to the right server section body
const serverBodyId = `server-body-${(task.remoteHost || 'local').replace(/[^a-zA-Z0-9-]/g, '_')}`; const serverBodyId = `server-body-${(_taskServerKey(task) || 'local').replace(/[^a-zA-Z0-9-]/g, '_')}`;
const targetBody = document.getElementById(serverBodyId); const targetBody = document.getElementById(serverBodyId);
if (targetBody) targetBody.appendChild(el); if (targetBody) targetBody.appendChild(el);
else group.appendChild(el); else group.appendChild(el);
@@ -3393,7 +3416,8 @@ function _refreshServerDots() {
let tasks; let tasks;
try { tasks = _loadTasks(); } catch { return; } try { tasks = _loadTasks(); } catch { return; }
const byKey = {}; const byKey = {};
for (const t of tasks) { (byKey[t.remoteHost || ''] = byKey[t.remoteHost || ''] || []).push(t); } const _taskServerKeyForDot = (task) => task?.remoteServerKey || task?.remoteHost || '';
for (const t of tasks) { (byKey[_taskServerKeyForDot(t)] = byKey[_taskServerKeyForDot(t)] || []).push(t); }
document.querySelectorAll('.cookbook-section-header').forEach(header => { document.querySelectorAll('.cookbook-section-header').forEach(header => {
const dot = header.querySelector('.cookbook-srv-status'); const dot = header.querySelector('.cookbook-srv-status');
if (!dot) return; if (!dot) return;
@@ -3798,6 +3822,7 @@ export function initRunning(shared) {
_persistEnvState = shared._persistEnvState; _persistEnvState = shared._persistEnvState;
_refreshDependencies = shared._refreshDependencies; _refreshDependencies = shared._refreshDependencies;
_serverByVal = shared._serverByVal; _serverByVal = shared._serverByVal;
_serverKey = shared._serverKey;
_selectedServer = shared._selectedServer; _selectedServer = shared._selectedServer;
modelLogo = shared.modelLogo; modelLogo = shared.modelLogo;
esc = shared.esc; esc = shared.esc;
+824 -111
View File
File diff suppressed because it is too large Load Diff
+155 -4
View File
@@ -24,6 +24,7 @@ import * as Modals from './modalManager.js';
let _autoDetectDebounce = null; let _autoDetectDebounce = null;
let _autoTitleDebounce = null; let _autoTitleDebounce = null;
let _autoSaveDebounce = null; let _autoSaveDebounce = null;
let _lastAutoSaveErrorAt = 0;
let _animationInProgress = false; let _animationInProgress = false;
let _animationCancel = null; // function to cancel current animation let _animationCancel = null; // function to cancel current animation
let _htmlPreviewActive = false; // true when inline HTML preview iframe is showing let _htmlPreviewActive = false; // true when inline HTML preview iframe is showing
@@ -153,6 +154,20 @@ import * as Modals from './modalManager.js';
addDocToTabs, addDocToTabs,
syncDocIndicator: _syncDocIndicator, syncDocIndicator: _syncDocIndicator,
}); });
const sidebarNewDocBtn = document.getElementById('library-new-doc-btn');
if (sidebarNewDocBtn && !sidebarNewDocBtn.dataset.docNewWired) {
sidebarNewDocBtn.dataset.docNewWired = '1';
sidebarNewDocBtn.addEventListener('click', async (e) => {
e.preventDefault();
e.stopPropagation();
try {
await newDocument();
} catch (err) {
console.error('Failed to create document from sidebar button:', err);
if (uiModule) uiModule.showError('Failed to create document');
}
});
}
_maybeOpenDocFromHash(); _maybeOpenDocFromHash();
window.addEventListener('hashchange', _maybeOpenDocFromHash); window.addEventListener('hashchange', _maybeOpenDocFromHash);
} }
@@ -2685,6 +2700,104 @@ import * as Modals from './modalManager.js';
await _uploadComposeFiles(files); await _uploadComposeFiles(files);
} }
function _isMarkdownImageFile(file) {
if (!file) return false;
if ((file.type || '').toLowerCase().startsWith('image/')) return true;
return /\.(avif|bmp|gif|jpe?g|png|svg|webp)$/i.test(file.name || '');
}
function _markdownImageAlt(name) {
const base = String(name || 'image').replace(/\.[^.]+$/, '').trim() || 'image';
return base.replace(/[\[\]\n\r]/g, ' ').replace(/\s+/g, ' ').trim() || 'image';
}
function _activeDocLanguage() {
const doc = activeDocId && docs.get(activeDocId);
return ((doc && doc.language) || document.getElementById('doc-language-select')?.value || '').toLowerCase();
}
function _scheduleMarkdownImageAutosave(ta) {
updateLineNumbers(ta.value);
const codeEl = document.getElementById('doc-editor-code');
if (codeEl && !codeEl.dataset.hasDiff) {
codeEl.textContent = ta.value + '\n';
codeEl.style.minHeight = ta.scrollHeight + 'px';
}
clearTimeout(_hlDebounce);
_hlDebounce = setTimeout(syncHighlighting, 80);
clearTimeout(_autoTitleDebounce);
_autoTitleDebounce = setTimeout(() => autoTitleFromContent(ta.value), 600);
clearTimeout(_autoSaveDebounce);
_autoSaveDebounce = setTimeout(() => { saveDocument({ silent: true }); }, 800);
}
function _insertMarkdownImages(uploadedFiles) {
const ta = document.getElementById('doc-editor-textarea');
if (!ta) return;
const files = Array.isArray(uploadedFiles) ? uploadedFiles : [];
if (!files.length) return;
const start = ta.selectionStart || 0;
const end = ta.selectionEnd || start;
const before = ta.value.slice(0, start);
const after = ta.value.slice(end);
const lines = files.map(file => {
const id = encodeURIComponent(file.id || file.file_id || '');
const alt = _markdownImageAlt(file.name || file.filename);
return id ? `![${alt}](/api/upload/${id})` : '';
}).filter(Boolean);
if (!lines.length) return;
const prefix = before && !before.endsWith('\n') ? '\n' : '';
const suffix = after && !after.startsWith('\n') ? '\n' : '';
const insert = `${prefix}${lines.join('\n\n')}${suffix}`;
_replaceRange(ta, start, end, insert);
const caret = start + insert.length;
ta.selectionStart = caret;
ta.selectionEnd = caret;
ta.focus();
_scheduleMarkdownImageAutosave(ta);
_refreshMarkdownPreviewIfVisible(activeDocId, ta.value);
}
async function _uploadMarkdownImages(files) {
const images = Array.from(files || []).filter(_isMarkdownImageFile);
if (!images.length) {
if (uiModule) uiModule.showError('Choose an image file');
return;
}
if (_activeDocLanguage() !== 'markdown') {
if (uiModule) uiModule.showError('Switch the document to markdown before inserting images');
return;
}
const fd = new FormData();
images.forEach(file => fd.append('files', file));
try {
const res = await fetch(`${API_BASE}/api/upload`, {
method: 'POST',
credentials: 'same-origin',
body: fd,
});
let data = null;
try { data = await res.json(); } catch (_) {}
if (!res.ok) throw new Error((data && (data.error || data.detail)) || `HTTP ${res.status}`);
const uploaded = Array.isArray(data?.files) ? data.files : [];
if (!uploaded.length) throw new Error('No uploaded files returned');
_insertMarkdownImages(uploaded);
if (uiModule) uiModule.showToast(images.length === 1 ? 'Image inserted' : 'Images inserted');
} catch (err) {
console.error('Failed to insert markdown image:', err);
if (uiModule) uiModule.showError('Failed to insert image');
}
}
async function _handleMarkdownImageUpload(e) {
const files = e.target.files;
e.target.value = '';
await _uploadMarkdownImages(files);
}
function _renderComposeAttachments() { function _renderComposeAttachments() {
const container = document.getElementById('doc-email-compose-atts'); const container = document.getElementById('doc-email-compose-atts');
if (!container) return; if (!container) return;
@@ -3751,9 +3864,12 @@ import * as Modals from './modalManager.js';
const res = await fetch(`${API_BASE}/api/document`, { const res = await fetch(`${API_BASE}/api/document`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
credentials: 'same-origin',
body: JSON.stringify({ session_id: sessionId, title: '', content }), body: JSON.stringify({ session_id: sessionId, title: '', content }),
}); });
if (!res.ok) throw new Error(`Document create failed: HTTP ${res.status}`);
const doc = await res.json(); const doc = await res.json();
if (!doc || !doc.id) throw new Error('Document create failed: missing id');
addDocToTabs(doc, sessionId); addDocToTabs(doc, sessionId);
// Set the content into the map so switchToDoc preserves it // Set the content into the map so switchToDoc preserves it
const d = docs.get(doc.id); const d = docs.get(doc.id);
@@ -3980,6 +4096,7 @@ import * as Modals from './modalManager.js';
<input type="hidden" id="doc-email-source-folder" /> <input type="hidden" id="doc-email-source-folder" />
<input type="file" id="doc-email-file-input" multiple style="display:none" /> <input type="file" id="doc-email-file-input" multiple style="display:none" />
</div> </div>
<input type="file" id="doc-md-image-input" accept="image/*" multiple style="display:none" />
<div class="doc-md-toolbar" id="doc-md-toolbar" style="display:none"> <div class="doc-md-toolbar" id="doc-md-toolbar" style="display:none">
<div class="md-toolbar-items" id="md-toolbar-items"> <div class="md-toolbar-items" id="md-toolbar-items">
<span class="md-view-toggle" id="doc-md-view-toggle" style="display:none" role="group" aria-label="Edit or preview"> <span class="md-view-toggle" id="doc-md-view-toggle" style="display:none" role="group" aria-label="Edit or preview">
@@ -4002,7 +4119,7 @@ import * as Modals from './modalManager.js';
<button type="button" class="md-dd-toggle" data-dd="list" title="List"><span style="font-variant-numeric:tabular-nums;">1.</span><svg width="8" height="8" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3.5" stroke-linecap="round" stroke-linejoin="round"><polyline points="6 9 12 15 18 9"/></svg></button> <button type="button" class="md-dd-toggle" data-dd="list" title="List"><span style="font-variant-numeric:tabular-nums;">1.</span><svg width="8" height="8" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3.5" stroke-linecap="round" stroke-linejoin="round"><polyline points="6 9 12 15 18 9"/></svg></button>
<span class="md-toolbar-sep"></span> <span class="md-toolbar-sep"></span>
<button type="button" data-md="link" title="Link"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M10 13a5 5 0 0 0 7.54.54l3-3a5 5 0 0 0-7.07-7.07l-1.72 1.71"/><path d="M14 11a5 5 0 0 0-7.54-.54l-3 3a5 5 0 0 0 7.07 7.07l1.71-1.71"/></svg></button> <button type="button" data-md="link" title="Link"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M10 13a5 5 0 0 0 7.54.54l3-3a5 5 0 0 0-7.07-7.07l-1.72 1.71"/><path d="M14 11a5 5 0 0 0-7.54-.54l-3 3a5 5 0 0 0 7.07 7.07l1.71-1.71"/></svg></button>
<button type="button" id="md-toolbar-attach-btn" class="md-toolbar-attach-btn" title="Attach files"><svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="m21.44 11.05-9.19 9.19a6 6 0 0 1-8.49-8.49l8.57-8.57A4 4 0 1 1 17.93 8.8l-8.59 8.57a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg></button> <button type="button" id="md-toolbar-attach-btn" class="md-toolbar-attach-btn" title="Insert image"><svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="m21.44 11.05-9.19 9.19a6 6 0 0 1-8.49-8.49l8.57-8.57A4 4 0 1 1 17.93 8.8l-8.59 8.57a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg></button>
<button type="button" class="md-dd-toggle md-toolbar-email-hide" data-dd="code" title="Code">\`<svg width="8" height="8" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3.5" stroke-linecap="round" stroke-linejoin="round"><polyline points="6 9 12 15 18 9"/></svg></button> <button type="button" class="md-dd-toggle md-toolbar-email-hide" data-dd="code" title="Code">\`<svg width="8" height="8" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3.5" stroke-linecap="round" stroke-linejoin="round"><polyline points="6 9 12 15 18 9"/></svg></button>
<button type="button" data-md="hr" title="Horizontal rule"></button> <button type="button" data-md="hr" title="Horizontal rule"></button>
<span class="md-toolbar-sep"></span> <span class="md-toolbar-sep"></span>
@@ -4601,9 +4718,14 @@ import * as Modals from './modalManager.js';
document.getElementById('doc-email-file-input')?.click(); document.getElementById('doc-email-file-input')?.click();
}); });
document.getElementById('md-toolbar-attach-btn')?.addEventListener('click', () => { document.getElementById('md-toolbar-attach-btn')?.addEventListener('click', () => {
document.getElementById('doc-email-file-input')?.click(); if (_activeDocLanguage() === 'email') {
document.getElementById('doc-email-file-input')?.click();
} else {
document.getElementById('doc-md-image-input')?.click();
}
}); });
document.getElementById('doc-email-file-input')?.addEventListener('change', _handleAttachUpload); document.getElementById('doc-email-file-input')?.addEventListener('change', _handleAttachUpload);
document.getElementById('doc-md-image-input')?.addEventListener('change', _handleMarkdownImageUpload);
// Cc/Bcc toggle // Cc/Bcc toggle
document.getElementById('doc-email-show-cc')?.addEventListener('click', () => { document.getElementById('doc-email-show-cc')?.addEventListener('click', () => {
@@ -4839,6 +4961,26 @@ import * as Modals from './modalManager.js';
clearTimeout(_autoSaveDebounce); clearTimeout(_autoSaveDebounce);
_autoSaveDebounce = setTimeout(() => { saveDocument({ silent: true }); }, 2000); _autoSaveDebounce = setTimeout(() => { saveDocument({ silent: true }); }, 2000);
}); });
ta.addEventListener('paste', (e) => {
if (_activeDocLanguage() !== 'markdown') return;
const files = Array.from(e.clipboardData?.files || []).filter(_isMarkdownImageFile);
if (!files.length) return;
e.preventDefault();
_uploadMarkdownImages(files);
});
ta.addEventListener('dragover', (e) => {
if (_activeDocLanguage() !== 'markdown') return;
const items = Array.from(e.dataTransfer?.items || []);
if (!items.some(item => item.kind === 'file' && /^image\//i.test(item.type || ''))) return;
e.preventDefault();
});
ta.addEventListener('drop', (e) => {
if (_activeDocLanguage() !== 'markdown') return;
const files = Array.from(e.dataTransfer?.files || []).filter(_isMarkdownImageFile);
if (!files.length) return;
e.preventDefault();
_uploadMarkdownImages(files);
});
ta.addEventListener('scroll', () => { ta.addEventListener('scroll', () => {
const code = document.getElementById('doc-editor-code'); const code = document.getElementById('doc-editor-code');
if (code) code.style.minHeight = ta.scrollHeight + 'px'; if (code) code.style.minHeight = ta.scrollHeight + 'px';
@@ -5547,7 +5689,7 @@ import * as Modals from './modalManager.js';
// any dropdown that just opened. Preventing the default mousedown keeps the // any dropdown that just opened. Preventing the default mousedown keeps the
// textarea focused, so formatting hits the live selection and menus stay up. // textarea focused, so formatting hits the live selection and menus stay up.
toolbar.addEventListener('mousedown', (e) => { toolbar.addEventListener('mousedown', (e) => {
if (e.target.closest('[data-md], .md-dd-toggle, .emoji-picker-btn')) e.preventDefault(); if (e.target.closest('[data-md], .md-dd-toggle, .emoji-picker-btn, .md-toolbar-attach-btn')) e.preventDefault();
}); });
toolbar.addEventListener('click', (e) => { toolbar.addEventListener('click', (e) => {
@@ -5975,6 +6117,7 @@ import * as Modals from './modalManager.js';
const res = await fetch(`${API_BASE}/api/document`, { const res = await fetch(`${API_BASE}/api/document`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
credentials: 'same-origin',
body: JSON.stringify({ body: JSON.stringify({
session_id: sessionId, session_id: sessionId,
title: '', title: '',
@@ -5982,7 +6125,9 @@ import * as Modals from './modalManager.js';
language: 'markdown', language: 'markdown',
}), }),
}); });
if (!res.ok) throw new Error(`Document create failed: HTTP ${res.status}`);
const doc = await res.json(); const doc = await res.json();
if (!doc || !doc.id) throw new Error('Document create failed: missing id');
addDocToTabs(doc, sessionId); addDocToTabs(doc, sessionId);
if (!isOpen) openPanel(); if (!isOpen) openPanel();
// Re-enable editor if it was in empty state // Re-enable editor if it was in empty state
@@ -8265,8 +8410,10 @@ import * as Modals from './modalManager.js';
const res = await fetch(`${API_BASE}/api/document/${activeDocId}`, { const res = await fetch(`${API_BASE}/api/document/${activeDocId}`, {
method: 'PUT', method: 'PUT',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
credentials: 'same-origin',
body: JSON.stringify({ content: textarea.value }), body: JSON.stringify({ content: textarea.value }),
}); });
if (!res.ok) throw new Error(`Document save failed: HTTP ${res.status}`);
const doc = await res.json(); const doc = await res.json();
const badge = document.getElementById('doc-version-badge'); const badge = document.getElementById('doc-version-badge');
if (badge) { const _v = doc.version_count || 1; badge.textContent = `v${_v}`; badge.style.display = _v > 1 ? '' : 'none'; } if (badge) { const _v = doc.version_count || 1; badge.textContent = `v${_v}`; badge.style.display = _v > 1 ? '' : 'none'; }
@@ -8279,7 +8426,11 @@ import * as Modals from './modalManager.js';
if (!silent && uiModule) uiModule.showToast('Document saved'); if (!silent && uiModule) uiModule.showToast('Document saved');
} catch (e) { } catch (e) {
console.error('Failed to save document:', e); console.error('Failed to save document:', e);
if (!silent && uiModule) uiModule.showError('Failed to save document'); const now = Date.now();
if (uiModule && (!silent || now - _lastAutoSaveErrorAt > 10000)) {
uiModule.showError(silent ? 'Autosave failed' : 'Failed to save document');
_lastAutoSaveErrorAt = now;
}
} }
} }
+39 -12
View File
@@ -4574,11 +4574,12 @@ function _wireAttachmentHandlers(reader, folder) {
const uid = openBtn.dataset.openUid; const uid = openBtn.dataset.openUid;
const index = openBtn.dataset.openIndex; const index = openBtn.dataset.openIndex;
const name = openBtn.dataset.openName || `attachment-${index}`; const name = openBtn.dataset.openName || `attachment-${index}`;
const sourceFolder = openBtn.dataset.openFolder || useFolder;
if (!uid || index == null) return; if (!uid || index == null) return;
const orig = openBtn.style.opacity; const orig = openBtn.style.opacity;
openBtn.style.opacity = '0.4'; openBtn.style.opacity = '0.4';
try { try {
const folderQs = encodeURIComponent(useFolder); const folderQs = encodeURIComponent(sourceFolder);
const res = await fetch( const res = await fetch(
`${API_BASE}/api/email/attachment-as-doc/${encodeURIComponent(uid)}/${encodeURIComponent(index)}?folder=${folderQs}${_acct()}`, `${API_BASE}/api/email/attachment-as-doc/${encodeURIComponent(uid)}/${encodeURIComponent(index)}?folder=${folderQs}${_acct()}`,
{ method: 'POST', credentials: 'same-origin' } { method: 'POST', credentials: 'same-origin' }
@@ -4632,8 +4633,9 @@ function _wireAttachmentHandlers(reader, folder) {
const uid = chip.dataset.attUid; const uid = chip.dataset.attUid;
const index = chip.dataset.attIndex; const index = chip.dataset.attIndex;
const name = chip.dataset.attName || `attachment-${index}`; const name = chip.dataset.attName || `attachment-${index}`;
const sourceFolder = chip.dataset.attFolder || useFolder;
if (!uid || index == null) return; if (!uid || index == null) return;
const url = `${API_BASE}/api/email/attachment/${encodeURIComponent(uid)}/${encodeURIComponent(index)}?folder=${encodeURIComponent(useFolder)}${_acct()}`; const url = `${API_BASE}/api/email/attachment/${encodeURIComponent(uid)}/${encodeURIComponent(index)}?folder=${encodeURIComponent(sourceFolder)}${_acct()}`;
if (_isMobileUA) { if (_isMobileUA) {
window.open(url, '_blank'); window.open(url, '_blank');
return; return;
@@ -4712,25 +4714,50 @@ function _isLikelySignatureImage(a) {
// Build the attachments header+chips HTML for an email read response. Pulled // Build the attachments header+chips HTML for an email read response. Pulled
// out so both the initial-open and the swap-reader paths can render it. // out so both the initial-open and the swap-reader paths can render it.
function _buildAttsHtmlFor(uid, data) { function _buildAttsHtmlFor(uid, data) {
if (!data || !data.attachments || !data.attachments.length) return ''; if (!data) return '';
const _OPENABLE_RE = /\.(pdf|docx|txt|md|markdown)$/i; const _OPENABLE_RE = /\.(pdf|docx|txt|md|markdown|eml)$/i;
const visible = data.attachments.filter(a => !_isLikelySignatureImage(a)); const currentAttachments = Array.isArray(data.attachments) ? data.attachments : [];
if (!visible.length) return ''; const relatedAttachments = Array.isArray(data.related_attachments) ? data.related_attachments : [];
const chips = visible.map(a => { if (!currentAttachments.length && !relatedAttachments.length) return '';
const visible = currentAttachments.filter(a => !_isLikelySignatureImage(a));
const hidden = currentAttachments.filter(a => _isLikelySignatureImage(a));
const related = relatedAttachments.filter(a => !_isLikelySignatureImage(a));
const renderChip = (a, extraClass = '') => {
const openable = _OPENABLE_RE.test(a.filename || ''); const openable = _OPENABLE_RE.test(a.filename || '');
const chipUid = a.source_uid || a.uid || uid;
const chipFolder = a.source_folder || data.folder || state._libFolder || 'INBOX';
const openBtn = openable const openBtn = openable
? `<span class="email-attachment-open" title="Open in document editor" data-open-uid="${_esc(uid)}" data-open-index="${a.index}" data-open-name="${_esc(a.filename)}"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/><polyline points="14 2 14 8 20 8"/><line x1="8" y1="13" x2="16" y2="13"/><line x1="8" y1="17" x2="16" y2="17"/><line x1="8" y1="9" x2="10" y2="9"/></svg><span class="email-attachment-open-label">Open</span></span>` ? `<span class="email-attachment-open" title="Open in document editor" data-open-uid="${_esc(chipUid)}" data-open-index="${a.index}" data-open-name="${_esc(a.filename)}" data-open-folder="${_esc(chipFolder)}"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/><polyline points="14 2 14 8 20 8"/><line x1="8" y1="13" x2="16" y2="13"/><line x1="8" y1="17" x2="16" y2="17"/><line x1="8" y1="9" x2="10" y2="9"/></svg><span class="email-attachment-open-label">Open</span></span>`
: ''; : '';
return `<button type="button" class="email-attachment-chip" data-att-uid="${_esc(uid)}" data-att-index="${a.index}" data-att-name="${_esc(a.filename)}"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="m21.44 11.05-9.19 9.19a6 6 0 0 1-8.49-8.49l8.57-8.57A4 4 0 1 1 17.93 8.8l-8.59 8.57a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg><span>${_esc(a.filename)}</span><span class="att-size">${Math.round((a.size||0)/1024)} KB</span>${openBtn}</button>`; return `<button type="button" class="email-attachment-chip${extraClass}" data-att-uid="${_esc(chipUid)}" data-att-index="${a.index}" data-att-name="${_esc(a.filename)}" data-att-folder="${_esc(chipFolder)}"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="m21.44 11.05-9.19 9.19a6 6 0 0 1-8.49-8.49l8.57-8.57A4 4 0 1 1 17.93 8.8l-8.59 8.57a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg><span>${_esc(a.filename)}</span><span class="att-size">${Math.round((a.size||0)/1024)} KB</span>${openBtn}</button>`;
}).join(''); };
const chips = visible.map(a => renderChip(a)).join('');
const hiddenChips = hidden.map(a => renderChip(a, ' email-attachment-chip-muted')).join('');
const relatedChips = related.map(a => renderChip(a, ' email-attachment-chip-related')).join('');
const visibleSection = visible.length
? '<div class="email-reader-atts">' + chips + '</div>'
: '';
const relatedSection = related.length
? '<div class="email-reader-atts-hidden-note">From earlier in this thread</div><div class="email-reader-atts email-reader-atts-related">' + relatedChips + '</div>'
: '';
const hiddenSection = hidden.length
? '<div class="email-reader-atts-hidden-note">Filtered inline images / signature files</div><div class="email-reader-atts email-reader-atts-hidden">' + hiddenChips + '</div>'
: '';
const label = visible.length
? `Attachments (${visible.length + related.length})`
: related.length
? `Thread attachments (${related.length})`
: `Hidden inline attachments (${hidden.length})`;
return ( return (
'<div class="email-reader-atts-wrap collapsed">' '<div class="email-reader-atts-wrap collapsed">'
+ '<div class="email-reader-atts-header email-summary-toggle" role="button" tabindex="0">' + '<div class="email-reader-atts-header email-summary-toggle" role="button" tabindex="0">'
+ '<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="m21.44 11.05-9.19 9.19a6 6 0 0 1-8.49-8.49l8.57-8.57A4 4 0 1 1 17.93 8.8l-8.59 8.57a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg>' + '<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="m21.44 11.05-9.19 9.19a6 6 0 0 1-8.49-8.49l8.57-8.57A4 4 0 1 1 17.93 8.8l-8.59 8.57a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg>'
+ `<span>Attachments (${data.attachments.length})</span>` + `<span>${label}</span>`
+ '<svg class="email-summary-chevron" width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round" style="margin-left:auto;transition:transform .15s ease;"><polyline points="6 9 12 15 18 9"/></svg>' + '<svg class="email-summary-chevron" width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round" style="margin-left:auto;transition:transform .15s ease;"><polyline points="6 9 12 15 18 9"/></svg>'
+ '</div>' + '</div>'
+ '<div class="email-reader-atts">' + chips + '</div>' + visibleSection
+ relatedSection
+ hiddenSection
+ '</div>' + '</div>'
); );
} }
+22 -3
View File
@@ -36,6 +36,14 @@ function linkHtml(text, url) {
return `<a href="${escapeHtml(safeUrl)}" target="_blank" rel="noopener noreferrer">${safeText}</a>`; return `<a href="${escapeHtml(safeUrl)}" target="_blank" rel="noopener noreferrer">${safeText}</a>`;
} }
function imageHtml(alt, url, title) {
const safeUrl = safeLinkUrl(url);
if (!safeUrl || safeUrl.startsWith('#')) return escapeHtml(alt || '');
const safeAlt = escapeHtml(alt || '');
const safeTitle = title ? ` title="${escapeHtml(title)}"` : '';
return `<img src="${escapeHtml(safeUrl)}" alt="${safeAlt}"${safeTitle} loading="lazy" decoding="async">`;
}
function _isModelEndpointUrl(rawUrl) { function _isModelEndpointUrl(rawUrl) {
try { try {
const parsed = new URL(String(rawUrl || ''), window.location.origin); const parsed = new URL(String(rawUrl || ''), window.location.origin);
@@ -146,7 +154,7 @@ function sanitizeAllowedHtml(html) {
* Check if text has unclosed think tag * Check if text has unclosed think tag
*/ */
export function hasUnclosedThinkTag(text) { export function hasUnclosedThinkTag(text) {
text = text || ''; text = normalizeThinkingMarkup(text || '');
const openCount = const openCount =
(text.match(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>/gi) || []).length (text.match(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>/gi) || []).length
+ (text.match(/<\|channel>thought/gi) || []).length; + (text.match(/<\|channel>thought/gi) || []).length;
@@ -163,6 +171,10 @@ export function startsWithReasoningPrefix(text) {
export function normalizeThinkingMarkup(text) { export function normalizeThinkingMarkup(text) {
if (!text) return text; if (!text) return text;
let normalized = text; let normalized = text;
// MiniMax M-series can emit namespaced reasoning tags like
// <mm:think>...</mm:think>. Normalize them into the shared thinking parser.
normalized = normalized.replace(/<mm:think(\s+[^>]*)?>/gi, (_m, attrs = '') => `<think${attrs || ''}>`);
normalized = normalized.replace(/<\/mm:think>/gi, '</think>');
normalized = normalized.replace(/<thought(\s+[^>]*)?>/gi, (_m, attrs = '') => `<think${attrs || ''}>`); normalized = normalized.replace(/<thought(\s+[^>]*)?>/gi, (_m, attrs = '') => `<think${attrs || ''}>`);
normalized = normalized.replace(/<\/thought>/gi, '</think>'); normalized = normalized.replace(/<\/thought>/gi, '</think>');
normalized = normalized.replace(/<\|channel>thought\s*\n?([\s\S]*?)<channel\|>\s*/gi, (_m, content = '') => { normalized = normalized.replace(/<\|channel>thought\s*\n?([\s\S]*?)<channel\|>\s*/gi, (_m, content = '') => {
@@ -535,6 +547,12 @@ export function mdToHtml(src, opts) {
'$1[#$2](#$2)', '$1[#$2](#$2)',
); );
// Convert markdown images before links so ![alt](url) does not become
// literal "!" plus a normal link.
s = s.replace(/!\[([^\]\n]*)\]\(([^)\s]+)(?:\s+"([^"]*)")?\)/g, (match, alt, url, title) => {
return imageHtml(alt, url, title);
});
// Convert markdown links [text](url) to clickable links // Convert markdown links [text](url) to clickable links
// Internal #hash links navigate in-page; external links open in new tab // Internal #hash links navigate in-page; external links open in new tab
s = s.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, text, url) => { s = s.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, text, url) => {
@@ -573,8 +591,9 @@ export function mdToHtml(src, opts) {
return placeholder; return placeholder;
}); });
// ALSO preserve <a> tags the same way (they're now in the HTML from markdown conversion) // ALSO preserve <a>/<img> tags the same way (they're now in the HTML from
s = s.replace(/<a\s+[^>]*>.*?<\/a>/gi, (match) => { // markdown conversion)
s = s.replace(/<(?:a\s+[^>]*>.*?<\/a|img\s+[^>]*?)>/gi, (match) => {
const placeholder = `___ALLOWED_HTML_${allowedHtmlBlocks.length}___`; const placeholder = `___ALLOWED_HTML_${allowedHtmlBlocks.length}___`;
allowedHtmlBlocks.push(sanitizeAllowedHtml(match)); allowedHtmlBlocks.push(sanitizeAllowedHtml(match));
return placeholder; return placeholder;
+21
View File
@@ -112,6 +112,7 @@ function _initModelPickerDropdown() {
const search = document.getElementById('model-picker-search'); const search = document.getElementById('model-picker-search');
const listEl = document.getElementById('model-picker-list'); const listEl = document.getElementById('model-picker-list');
const searchRow = menu ? menu.querySelector('.model-picker-search-row') : null; const searchRow = menu ? menu.querySelector('.model-picker-search-row') : null;
const refreshBtn = document.getElementById('model-picker-refresh-btn');
if (!wrap || !btn || !menu || !search || !listEl) return; if (!wrap || !btn || !menu || !search || !listEl) return;
function _close() { function _close() {
@@ -608,6 +609,26 @@ function _initModelPickerDropdown() {
search.addEventListener('input', () => _populate(search.value)); search.addEventListener('input', () => _populate(search.value));
search.addEventListener('click', (e) => e.stopPropagation()); search.addEventListener('click', (e) => e.stopPropagation());
if (refreshBtn) {
refreshBtn.addEventListener('click', async (e) => {
e.stopPropagation();
refreshBtn.disabled = true;
refreshBtn.classList.add('spinning');
try {
if (window.modelsModule && window.modelsModule.refreshModels) {
await window.modelsModule.refreshModels(true);
}
await _refreshLocalProbe();
if (!menu.classList.contains('hidden')) _populate(search.value || '');
updateModelPicker();
} catch (_) {
uiModule.showToast('Model refresh failed');
} finally {
refreshBtn.disabled = false;
refreshBtn.classList.remove('spinning');
}
});
}
search.addEventListener('keydown', (e) => { search.addEventListener('keydown', (e) => {
_handlePickerKeydown(e, listEl, '.model-switch-item', _close); _handlePickerKeydown(e, listEl, '.model-switch-item', _close);
}); });
+22 -3
View File
@@ -17,9 +17,16 @@ let _tasksFetched = false; // first-fetch sentinel — `false` → show loadin
let _escHandler = null; let _escHandler = null;
let _viewingRuns = null; // task id when viewing run history let _viewingRuns = null; // task id when viewing run history
let _clockInterval = null; let _clockInterval = null;
let _taskFailurePending = false;
const DAYS_OF_WEEK = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']; const DAYS_OF_WEEK = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'];
function _setTaskFailurePending(active) {
_taskFailurePending = !!active;
document.getElementById('tool-tasks-btn')?.classList.toggle('task-failure-pending', _taskFailurePending);
document.getElementById('rail-tasks')?.classList.toggle('task-failure-pending', _taskFailurePending);
}
// ---- API ---- // ---- API ----
async function _fetchTasks() { async function _fetchTasks() {
@@ -2238,6 +2245,9 @@ function _renderActivityEntry(entry) {
status = _classifyResult(entry.result); status = _classifyResult(entry.result);
} }
const statusDot = `<span class="task-log-status task-log-status-${status}" title="${status}"></span>`; const statusDot = `<span class="task-log-status task-log-status-${status}" title="${status}"></span>`;
const failedTag = status === 'error'
? '<span class="task-log-failed-tag">(failed)</span>'
: '';
// Render the result through markdown so code blocks, lists, links look right. // Render the result through markdown so code blocks, lists, links look right.
let resultHtml; let resultHtml;
const _isRunning = entry.status === 'running' || entry.status === 'queued'; const _isRunning = entry.status === 'running' || entry.status === 'queued';
@@ -2361,7 +2371,7 @@ function _renderActivityEntry(entry) {
<div class="task-log-row-head"> <div class="task-log-row-head">
${statusDot} ${statusDot}
<span class="task-log-task-icon">${_taskIcon({ action: entry.action, task_type: entry.kind })}</span> <span class="task-log-task-icon">${_taskIcon({ action: entry.action, task_type: entry.kind })}</span>
<span class="task-log-name">${_escHtml(entry.taskName)}</span>${_taskAiMark(entry)} <span class="task-log-name">${_escHtml(entry.taskName)}</span>${failedTag}${_taskAiMark(entry)}
${repeatBadge} ${repeatBadge}
<span style="flex:1"></span> <span style="flex:1"></span>
${rightHtml} ${rightHtml}
@@ -2502,8 +2512,11 @@ function _renderMainView() {
export function openTasks(focusId, opts) { export function openTasks(focusId, opts) {
const o = opts || {}; const o = opts || {};
const openActivityForFailure = _taskFailurePending && !focusId && o.filter === undefined;
_setTaskFailurePending(false);
if (_open) { if (_open) {
// Already open — just focus the requested task / apply filter. // Already open — just focus the requested task / apply filter.
if (openActivityForFailure) _switchTab('activity');
if (o.filter !== undefined) { _taskFilter = o.filter; _renderList(); } if (o.filter !== undefined) { _taskFilter = o.filter; _renderList(); }
if (focusId) _focusTask(focusId); if (focusId) _focusTask(focusId);
return; return;
@@ -2610,7 +2623,7 @@ export function openTasks(focusId, opts) {
// of an empty modal-body that fills in after the fetch resolves — that delay // of an empty modal-body that fills in after the fetch resolves — that delay
// was visible as a "flicker" right after opening. // was visible as a "flicker" right after opening.
_activeTab = 'tasks'; _activeTab = 'tasks';
_switchTab('tasks'); _switchTab(openActivityForFailure ? 'activity' : 'tasks');
_fetchTasks().then(() => { _fetchTasks().then(() => {
// Re-render so the list swaps the Loading row for real cards. // Re-render so the list swaps the Loading row for real cards.
_renderList(); _renderList();
@@ -2704,7 +2717,13 @@ async function _pollTaskNotifications() {
const msg = `Task ${ok ? 'finished' : 'failed'}: ${n.task_name}`; const msg = `Task ${ok ? 'finished' : 'failed'}: ${n.task_name}`;
if (!uiModule) continue; if (!uiModule) continue;
if (ok) uiModule.showToast(msg, { duration: 5000 }); if (ok) uiModule.showToast(msg, { duration: 5000 });
else uiModule.showError(msg); else {
_setTaskFailurePending(true);
uiModule.showError(msg);
if (_open && document.querySelector('.tasks-tab.active[data-tab="activity"]')) {
_renderActivityView();
}
}
} }
} catch (e) { } catch (e) {
// Silently ignore — server may be unreachable // Silently ignore — server may be unreachable
+483 -37
View File
@@ -2676,6 +2676,15 @@ body.bg-pattern-sparkles {
.mode-toggle.mode-right::before { .mode-toggle.mode-right::before {
transform: translateX(100%); transform: translateX(100%);
} }
.mode-toggle.mode-toggle-three::before {
width: 33.3333%;
}
.mode-toggle.mode-toggle-three.mode-mid::before {
transform: translateX(100%);
}
.mode-toggle.mode-toggle-three.mode-third::before {
transform: translateX(200%);
}
#mode-agent-btn { #mode-agent-btn {
border-radius: 10px 0 0 10px; border-radius: 10px 0 0 10px;
} }
@@ -2715,6 +2724,15 @@ body.bg-pattern-sparkles {
.mode-toggle-btn + .mode-toggle-btn { .mode-toggle-btn + .mode-toggle-btn {
border-left: none; border-left: none;
} }
@media (max-width: 768px) {
.mode-toggle.mode-toggle-three [data-llama-mode="unified"] > span {
font-size: 0;
}
.mode-toggle.mode-toggle-three [data-llama-mode="unified"] > span::after {
content: 'Unif';
font-size: 11px;
}
}
/* Message count badge in the chat-meta header (next to the title). /* Message count badge in the chat-meta header (next to the title).
Auto-hides when empty so a brand-new chat doesn't show "0 msgs". */ Auto-hides when empty so a brand-new chat doesn't show "0 msgs". */
.chat-meta-count { .chat-meta-count {
@@ -2822,10 +2840,14 @@ body.bg-pattern-sparkles {
transform: translateX(10px) scale(0.88); transform: translateX(10px) scale(0.88);
pointer-events: none; pointer-events: none;
} }
.model-picker-search-wrap {
position: relative;
min-width: 0;
}
.model-picker-menu input[type="text"] { .model-picker-menu input[type="text"] {
width: 100%; width: 100%;
box-sizing: border-box; box-sizing: border-box;
padding: 6px 8px; padding: 6px 30px 6px 8px;
font-size: 0.82em; font-size: 0.82em;
border: 1px solid var(--border); border: 1px solid var(--border);
border-radius: 4px; border-radius: 4px;
@@ -2842,6 +2864,37 @@ body.bg-pattern-sparkles {
.model-picker-menu input[type="text"]::placeholder { .model-picker-menu input[type="text"]::placeholder {
color: color-mix(in srgb, var(--fg) 30%, transparent); color: color-mix(in srgb, var(--fg) 30%, transparent);
} }
.model-picker-refresh-btn {
appearance: none;
position: absolute;
top: 50%;
right: 4px;
transform: translateY(-50%);
width: 22px;
height: 22px;
border: 0;
border-radius: 4px;
background: transparent;
color: color-mix(in srgb, var(--fg) 54%, transparent);
display: inline-flex;
align-items: center;
justify-content: center;
padding: 0;
cursor: pointer;
}
.model-picker-refresh-btn:hover,
.model-picker-refresh-btn:focus-visible {
color: var(--fg);
background: color-mix(in srgb, var(--fg) 8%, transparent);
outline: none;
}
.model-picker-refresh-btn:disabled {
opacity: 0.7;
cursor: default;
}
.model-picker-refresh-btn.spinning svg {
animation: model-picker-refresh-spin 0.75s linear infinite;
}
.model-picker-action-btn { .model-picker-action-btn {
appearance: none; appearance: none;
display: inline-flex; display: inline-flex;
@@ -4102,7 +4155,7 @@ body.bg-pattern-sparkles {
.sidebar { .sidebar {
position: fixed !important; position: fixed !important;
top: 0; bottom: 0; left: 0; top: 0; bottom: 0; left: 0;
z-index: 200; z-index: 400;
width: 80% !important; width: 80% !important;
max-width: 340px; max-width: 340px;
box-shadow: 4px 0 20px rgba(0,0,0,0.5); box-shadow: 4px 0 20px rgba(0,0,0,0.5);
@@ -4133,7 +4186,7 @@ body.bg-pattern-sparkles {
#sidebar-backdrop { #sidebar-backdrop {
position: fixed; position: fixed;
inset: 0; inset: 0;
z-index: 199; z-index: 390;
background: rgba(0,0,0,0.4); background: rgba(0,0,0,0.4);
opacity: 0; opacity: 0;
pointer-events: none; pointer-events: none;
@@ -6882,7 +6935,7 @@ pre { background: var(--code-bg, var(--hl-bg, #282c34)) !important; }
inside .chat-input-top, matching .model-picker-wrap's slot. */ inside .chat-input-top, matching .model-picker-wrap's slot. */
.chat-input-top > .cmp-eval-wrap { .chat-input-top > .cmp-eval-wrap {
position: absolute; position: absolute;
top: 0; right: 0; top: -2px; right: 0;
z-index: 2; z-index: 2;
} }
.cmp-eval-btn { .cmp-eval-btn {
@@ -7224,14 +7277,16 @@ pre { background: var(--code-bg, var(--hl-bg, #282c34)) !important; }
width: 100%; order: 99; margin-top: -4px; padding-bottom: 2px; padding-left: 2px; width: 100%; order: 99; margin-top: -4px; padding-bottom: 2px; padding-left: 2px;
} }
.pane-finish-badge { .pane-finish-badge {
font-weight: 600; color: var(--red); font-weight: 600; color: var(--green, #50fa7b);
position: relative;
top: 2px;
} }
.compare-pane.winner .pane-header { .compare-pane.winner .pane-header {
background: color-mix(in srgb, var(--red) 12%, transparent); background: color-mix(in srgb, var(--green, #50fa7b) 12%, transparent);
border-bottom-color: color-mix(in srgb, var(--red) 30%, var(--border)); border-bottom-color: color-mix(in srgb, var(--green, #50fa7b) 30%, var(--border));
} }
.compare-pane.winner .pane-title { .compare-pane.winner .pane-title {
color: var(--red); color: var(--green, #50fa7b);
} }
.compare-pane.loser .pane-header { .compare-pane.loser .pane-header {
opacity: 0.5; opacity: 0.5;
@@ -7501,6 +7556,33 @@ pre { background: var(--code-bg, var(--hl-bg, #282c34)) !important; }
padding: 14px 8px 10px 8px !important; padding: 14px 8px 10px 8px !important;
min-height: 44px; min-height: 44px;
} }
.compare-pane .pane-header {
align-items: flex-start;
row-gap: 3px;
}
.compare-pane .pane-title-btn {
flex: 0 0 100%;
width: 100%;
order: 0;
padding-right: 2px;
}
.compare-pane .pane-timer,
.compare-pane .pane-finish-badge,
.compare-pane .pane-actions {
order: 1;
}
.compare-grid[data-cols="4"] .compare-pane .pane-timer,
.compare-grid[data-cols="5"] .compare-pane .pane-timer,
.compare-grid[data-cols="6"] .compare-pane .pane-timer {
width: auto;
order: 1;
margin-top: 0;
padding-bottom: 0;
padding-left: 0;
}
.compare-pane .pane-actions {
margin-left: auto;
}
/* Mode tabs: icons only, centered */ /* Mode tabs: icons only, centered */
.compare-mode-tab span { display: none; } .compare-mode-tab span { display: none; }
.compare-mode-tabs { justify-content: center; } .compare-mode-tabs { justify-content: center; }
@@ -18869,7 +18951,7 @@ body.gallery-selecting .gallery-dl-btn,
top: -6px; top: -6px;
} }
#serve-bulk-bar #serve-bulk-cancel { #serve-bulk-bar #serve-bulk-cancel {
top: 0 !important; top: -2px !important;
display: inline-flex; display: inline-flex;
align-items: center; align-items: center;
justify-content: center; justify-content: center;
@@ -19212,6 +19294,14 @@ body.gallery-selecting .gallery-dl-btn,
position: relative; position: relative;
top: -3px; top: -3px;
} }
@media (max-width: 768px) {
.cookbook-section-header .cookbook-clear-btn {
top: -3px;
}
.cookbook-task-check {
top: 4px;
}
}
/* "Stop all" sits just left of "Clear finished"; it carries the auto margin so /* "Stop all" sits just left of "Clear finished"; it carries the auto margin so
the pair is pushed together to the right of the section title. */ the pair is pushed together to the right of the section title. */
.cookbook-section-header .cookbook-stop-all-btn { .cookbook-section-header .cookbook-stop-all-btn {
@@ -19355,25 +19445,68 @@ body.gallery-selecting .gallery-dl-btn,
.cookbook-saved-save { .cookbook-saved-save {
padding: 0 10px; padding: 0 10px;
gap: 4px; gap: 4px;
background: var(--red); background: var(--bg);
color: #fff; color: var(--fg);
border-color: var(--red); border-color: var(--border);
font-weight: 600; font-weight: 600;
opacity: 1; opacity: 1;
border-top-right-radius: 0; border-top-right-radius: 0;
border-bottom-right-radius: 0; border-bottom-right-radius: 0;
} }
.cookbook-saved-save:hover { .cookbook-saved-save:hover {
background: color-mix(in srgb, var(--red) 80%, white); background: var(--border);
border-color: color-mix(in srgb, var(--red) 80%, white); border-color: var(--accent);
opacity: 1; opacity: 1;
} }
.cookbook-saved-arrow { .cookbook-saved-arrow {
padding: 0 6px; padding: 0 6px;
background: var(--bg);
color: var(--fg);
border-color: var(--border);
opacity: 1;
border-top-left-radius: 0; border-top-left-radius: 0;
border-bottom-left-radius: 0; border-bottom-left-radius: 0;
border-left: none; border-left: none;
} }
.cookbook-saved-menu .cookbook-saved-favorite {
border-left: 3px solid var(--red);
background: color-mix(in srgb, var(--red) 4%, transparent);
}
.cookbook-saved-fav-btn {
width: 20px;
height: 20px;
padding: 0;
border: 1px solid transparent;
border-radius: 6px;
background: none;
color: var(--fg-muted);
opacity: 0.55;
cursor: pointer;
display: inline-flex;
align-items: center;
justify-content: center;
flex-shrink: 0;
}
.cookbook-saved-fav-btn:hover,
.cookbook-saved-fav-btn.active {
color: var(--red);
opacity: 1;
background: color-mix(in srgb, var(--red) 10%, transparent);
}
.cookbook-saved-fav-badge {
flex-shrink: 0;
}
.cookbook-serve-favorite-model {
border-left-color: var(--red) !important;
background: color-mix(in srgb, var(--red) 4%, transparent);
}
.cookbook-serve-fav-badge {
display: inline-flex;
align-items: center;
vertical-align: 1px;
margin-left: 5px;
margin-right: 2px;
}
.cookbook-slot-saved { background: color-mix(in srgb, var(--accent) 10%, transparent); border-color: color-mix(in srgb, var(--accent) 30%, transparent); color: var(--accent); } .cookbook-slot-saved { background: color-mix(in srgb, var(--accent) 10%, transparent); border-color: color-mix(in srgb, var(--accent) 30%, transparent); color: var(--accent); }
.cookbook-slot-saved:hover { background: color-mix(in srgb, var(--accent) 20%, transparent); } .cookbook-slot-saved:hover { background: color-mix(in srgb, var(--accent) 20%, transparent); }
.cookbook-slot-btn.active { opacity: 1; background: var(--accent); color: #fff; border-color: var(--accent); .cookbook-slot-btn.active { opacity: 1; background: var(--accent); color: #fff; border-color: var(--accent);
@@ -19502,9 +19635,9 @@ body.gallery-selecting .gallery-dl-btn,
background: color-mix(in srgb, var(--green, #50fa7b) 18%, transparent); background: color-mix(in srgb, var(--green, #50fa7b) 18%, transparent);
color: var(--green, #50fa7b); color: var(--green, #50fa7b);
border: 1px solid color-mix(in srgb, var(--green, #50fa7b) 35%, transparent); border: 1px solid color-mix(in srgb, var(--green, #50fa7b) 35%, transparent);
/* Match the Install button + Installed split width so all three variants /* Match Install + Installed ▾ so all three variants align in mixed rows. */
align in a mixed row. */ width: 87.7px;
min-width: 75.85px; min-width: 87.7px;
padding: 0 10px; padding: 0 10px;
box-sizing: border-box; box-sizing: border-box;
} }
@@ -19527,9 +19660,8 @@ body.gallery-selecting .gallery-dl-btn,
font-weight: 500; font-weight: 500;
position: relative; position: relative;
top: -3px; top: -3px;
/* Width matches the measured Installed split button (75.85px) so a row of width: 87.7px;
mixed Install / Installed deps lines up. */ min-width: 87.7px;
min-width: 75.85px;
padding: 0 10px; padding: 0 10px;
/* Strip the native button box so it's the same height as the sibling tags /* Strip the native button box so it's the same height as the sibling tags
(Firefox renders <button> taller otherwise); height comes from .cookbook-dep-tag. */ (Firefox renders <button> taller otherwise); height comes from .cookbook-dep-tag. */
@@ -19574,6 +19706,8 @@ body.gallery-selecting .gallery-dl-btn,
opens the actions menu (Update). Replaces the old button. */ opens the actions menu (Update). Replaces the old button. */
.cookbook-dep-installed-btn { .cookbook-dep-installed-btn {
padding: 0; padding: 0;
width: 87.7px;
min-width: 87.7px;
cursor: pointer; cursor: pointer;
font-family: inherit; font-family: inherit;
overflow: hidden; overflow: hidden;
@@ -19619,8 +19753,20 @@ body.gallery-selecting .gallery-dl-btn,
padding now (16px 8px) per follow-up tweak, brings the title back padding now (16px 8px) per follow-up tweak, brings the title back
over the actual Save button instead of overshooting it. */ over the actual Save button instead of overshooting it. */
.hwfit-serve-row label:has(> .cookbook-serve-slots) { .hwfit-serve-row label:has(> .cookbook-serve-slots) {
grid-column: -2 / -1;
justify-self: end;
text-align: right; text-align: right;
padding-right: 8px; padding-right: 0;
}
.hwfit-serve-row label:has(> .cookbook-serve-slots) > span {
display: inline-block;
position: relative;
left: -33px;
}
.hwfit-serve-preset-row {
display: flex;
justify-content: flex-end;
margin: 0 0 6px;
} }
/* Expanded serve panel make sure it can be scrolled past when it /* Expanded serve panel make sure it can be scrolled past when it
grows taller than the visible viewport. Caps panel height to viewport grows taller than the visible viewport. Caps panel height to viewport
@@ -19635,6 +19781,10 @@ body.gallery-selecting .gallery-dl-btn,
.hwfit-cached-item .hwfit-serve-panel { .hwfit-cached-item .hwfit-serve-panel {
max-height: calc(100svh - 120px); max-height: calc(100svh - 120px);
} }
.hwfit-serve-preset-row {
justify-content: flex-end;
margin: -2px 0 6px;
}
} }
.hwfit-serve-row label { .hwfit-serve-row label {
font-size: 10px; font-size: 10px;
@@ -19642,6 +19792,114 @@ body.gallery-selecting .gallery-dl-btn,
white-space: nowrap; white-space: nowrap;
letter-spacing: 0.3px; letter-spacing: 0.3px;
} }
.hwfit-serve-row-core .hwfit-context-label {
grid-column: span 2;
width: auto;
min-width: 0;
max-width: none;
justify-self: start;
}
.hwfit-context-control {
display: block;
position: relative;
align-items: center;
margin-top: 2px;
width: 100px;
}
.hwfit-context-control .hwfit-sf[data-field="ctx"] {
min-width: 0;
width: 100%;
padding-right: 40px;
}
.hwfit-serve-row-core label:has(.hwfit-sf[data-field="max_seqs"]) {
position: relative;
left: 0;
}
.hwfit-serve-row-core label:has(.hwfit-sf[data-field="gpu_mem"]) {
position: relative;
left: -1px;
}
.hwfit-serve-row-core .hwfit-gpus-label {
position: relative;
left: -69px;
}
.hwfit-context-calc-btn {
position: absolute;
right: 3px;
top: -4px;
width: 34px;
height: 28px;
min-width: 34px;
padding: 0;
font-size: 10px;
line-height: 1;
display: inline-flex;
align-items: center;
justify-content: center;
text-align: center;
overflow: hidden;
white-space: nowrap;
}
.hwfit-context-calc-btn .spinner-whirlpool {
width: 12px !important;
height: 12px !important;
margin: 0 auto !important;
}
.hwfit-context-calc-btn .ai-spinner-whirlpool {
width: 12px !important;
height: 12px !important;
}
@media (min-width: 769px) {
[data-llama-mode-toggle].mode-toggle.mode-toggle-three .mode-toggle-btn > span {
top: -6px !important;
}
.mode-toggle.mode-toggle-three [data-llama-mode="unified"] > span {
font-size: 0;
}
.mode-toggle.mode-toggle-three [data-llama-mode="unified"] > span::after {
content: 'Unif';
font-size: 11px;
}
}
@media (max-width: 768px) {
.hwfit-serve-row-core .hwfit-context-label {
display: flex;
flex-direction: column;
align-items: flex-start;
}
details.hwfit-serve-advanced label:has(.hwfit-sf[data-field="vllm_env_preset"]),
details.hwfit-serve-advanced label:has(.hwfit-sf[data-field="extra_env"]) {
grid-column: 1 / -1 !important;
}
.hwfit-context-control {
display: inline-flex;
align-items: center;
gap: 4px;
width: auto;
}
.hwfit-context-control .hwfit-sf[data-field="ctx"] {
width: 100px;
padding-right: 6px;
}
.hwfit-context-calc-btn {
position: relative;
top: -2px;
left: 2px;
right: auto;
flex: 0 0 34px;
width: 34px;
min-width: 34px;
}
}
.hwfit-auto-ctx-note {
display: block;
margin-top: 3px;
font-size: 9px;
opacity: 0.6;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.hwfit-serve-row label select, .hwfit-serve-row label select,
.hwfit-serve-row label input { .hwfit-serve-row label input {
display: block; display: block;
@@ -19917,23 +20175,66 @@ body.gallery-selecting .gallery-dl-btn,
.hwfit-serve-extra .hwfit-sf { .hwfit-serve-extra .hwfit-sf {
width: 100%; width: 100%;
} }
.hwfit-serve-cmd-details {
margin: 6px 0 0;
}
.hwfit-serve-cmd-summary {
display: flex;
align-items: center;
gap: 6px;
list-style: none;
cursor: pointer;
user-select: none;
padding: 6px 10px;
border: 1px solid var(--border);
border-radius: 5px;
background: color-mix(in srgb, var(--fg) 4%, transparent);
color: var(--fg-muted);
font-size: 10px;
letter-spacing: 0.3px;
}
.hwfit-serve-cmd-summary::-webkit-details-marker {
display: none;
}
.hwfit-serve-cmd-summary::after {
content: '';
margin-left: auto;
width: 0;
height: 0;
border-left: 4px solid currentColor;
border-top: 3px solid transparent;
border-bottom: 3px solid transparent;
opacity: 0.6;
transform: rotate(0deg);
transition: transform 0.18s ease;
}
.hwfit-serve-cmd-details[open] > .hwfit-serve-cmd-summary::after {
transform: rotate(90deg);
}
.hwfit-serve-cmd-details[open] > .hwfit-serve-cmd-summary {
border-bottom-left-radius: 0;
border-bottom-right-radius: 0;
}
.hwfit-serve-cmd { .hwfit-serve-cmd {
margin: 6px 0; margin: 0;
padding: 8px 10px; padding: 8px 10px;
background: color-mix(in srgb, var(--fg) 4%, transparent); background: color-mix(in srgb, var(--fg) 4%, transparent);
border: 1px solid var(--border); border: 1px solid var(--border);
border-top: none;
border-radius: 4px; border-radius: 4px;
border-top-left-radius: 0;
border-top-right-radius: 0;
font-family: 'Berkeley Mono', 'SF Mono', 'Fira Code', monospace; font-family: 'Berkeley Mono', 'SF Mono', 'Fira Code', monospace;
font-size: 10px; font-size: 10px;
white-space: pre-wrap; white-space: pre;
word-break: break-all; word-break: normal;
width: 100%; width: 100%;
box-sizing: border-box; box-sizing: border-box;
resize: none; resize: none;
color: var(--fg); color: var(--fg);
line-height: 1.5; line-height: 1.5;
min-height: 36px; min-height: 112px;
overflow: hidden; overflow: auto;
} }
.hwfit-serve-actions { .hwfit-serve-actions {
display: flex; display: flex;
@@ -19946,6 +20247,10 @@ body.gallery-selecting .gallery-dl-btn,
font-size: 11px; font-size: 11px;
} }
.hwfit-serve-actions-spacer { flex: 1 1 auto; } .hwfit-serve-actions-spacer { flex: 1 1 auto; }
.hwfit-serve-actions .cookbook-serve-slots {
margin: 0;
align-self: stretch;
}
.hwfit-serve-launch { .hwfit-serve-launch {
background: var(--accent-primary, var(--red)); background: var(--accent-primary, var(--red));
color: #fff; color: #fff;
@@ -20017,7 +20322,7 @@ body.gallery-selecting .gallery-dl-btn,
width: 32px; width: 32px;
height: 32px; height: 32px;
min-width: 32px; min-width: 32px;
top: -5px; top: -3px;
} }
.cookbook-task .cookbook-task-menu-btn:active { .cookbook-task .cookbook-task-menu-btn:active {
opacity: 1; opacity: 1;
@@ -20040,6 +20345,12 @@ body.gallery-selecting .gallery-dl-btn,
.hwfit-serve-cmd-wrap { .hwfit-serve-cmd-wrap {
position: relative; position: relative;
} }
.hwfit-serve-cmd-title {
margin: 2px 0 3px;
font-size: 10px;
color: var(--fg-muted);
letter-spacing: 0.3px;
}
.hwfit-serve-cmd-wrap .hwfit-serve-cmd { .hwfit-serve-cmd-wrap .hwfit-serve-cmd {
/* Just enough breathing room so a cursor at line-end doesn't actually /* Just enough breathing room so a cursor at line-end doesn't actually
touch the Copy icon text otherwise uses the full width of the box. */ touch the Copy icon text otherwise uses the full width of the box. */
@@ -20171,6 +20482,7 @@ body.gallery-selecting .gallery-dl-btn,
} }
/* Status-driven left stripe via :has() — graceful fallback to neutral. */ /* Status-driven left stripe via :has() — graceful fallback to neutral. */
.cookbook-task:has(.cookbook-task-running) { border-left-color: var(--green, #50fa7b); } .cookbook-task:has(.cookbook-task-running) { border-left-color: var(--green, #50fa7b); }
.cookbook-task:has(.cookbook-task-downloading) { border-left-color: var(--color-accent, #00aaff); }
.cookbook-task:has(.cookbook-task-done) { border-left-color: var(--green, #50fa7b); } .cookbook-task:has(.cookbook-task-done) { border-left-color: var(--green, #50fa7b); }
.cookbook-task:has(.cookbook-task-error) { border-left-color: var(--color-error, var(--warn, #f87171)); } .cookbook-task:has(.cookbook-task-error) { border-left-color: var(--color-error, var(--warn, #f87171)); }
.cookbook-task:has(.cookbook-task-queued) { border-left-color: var(--color-warning, #f0ad4e); } .cookbook-task:has(.cookbook-task-queued) { border-left-color: var(--color-warning, #f0ad4e); }
@@ -20222,6 +20534,10 @@ body.gallery-selecting .gallery-dl-btn,
background: color-mix(in srgb, var(--fg) 10%, transparent); background: color-mix(in srgb, var(--fg) 10%, transparent);
color: var(--fg-muted); color: var(--fg-muted);
} }
.cookbook-task[data-type="download"][data-status="running"] .cookbook-task-type[data-type="download"] {
background: color-mix(in srgb, var(--color-accent, #00aaff) 18%, transparent);
color: var(--color-accent, #00aaff);
}
/* Finished state overrides the per-type colors so a completed download or /* Finished state overrides the per-type colors so a completed download or
serve task shows the same green FINISHED chip. */ serve task shows the same green FINISHED chip. */
.cookbook-task-type.cookbook-task-type-done { .cookbook-task-type.cookbook-task-type-done {
@@ -20390,6 +20706,7 @@ body.gallery-selecting .gallery-dl-btn,
line-height: 16px; line-height: 16px;
} }
.cookbook-task-running { background: color-mix(in srgb, var(--green, #50fa7b) 20%, transparent); color: var(--green, #50fa7b); } .cookbook-task-running { background: color-mix(in srgb, var(--green, #50fa7b) 20%, transparent); color: var(--green, #50fa7b); }
.cookbook-task-downloading { background: color-mix(in srgb, var(--color-accent, #00aaff) 20%, transparent); color: var(--color-accent, #00aaff); }
/* Stopping: same pill treatment as "running" but orange. */ /* Stopping: same pill treatment as "running" but orange. */
.cookbook-task-stopping { background: color-mix(in srgb, var(--orange, #ffb86c) 22%, transparent); color: var(--orange, #ffb86c); } .cookbook-task-stopping { background: color-mix(in srgb, var(--orange, #ffb86c) 22%, transparent); color: var(--orange, #ffb86c); }
.cookbook-task-done { background: color-mix(in srgb, var(--green) 15%, transparent); color: var(--green); } .cookbook-task-done { background: color-mix(in srgb, var(--green) 15%, transparent); color: var(--green); }
@@ -20434,6 +20751,16 @@ body.gallery-selecting .gallery-dl-btn,
.cookbook-task-header { .cookbook-task-header {
cursor: pointer; cursor: pointer;
} }
.cookbook-task[data-type="serve"] .cookbook-task-header {
margin: 4px 4px 0;
border-radius: 6px;
}
.cookbook-task[data-type="serve"] .cookbook-output-wrap {
margin: 0 4px 4px;
}
.cookbook-task[data-type="serve"] .cookbook-output-pre {
border-radius: 6px;
}
/* Env bar — match admin-card */ /* Env bar — match admin-card */
.cookbook-env-bar { .cookbook-env-bar {
@@ -20555,6 +20882,9 @@ body.gallery-selecting .gallery-dl-btn,
/* Mobile cookbook sizing — kept in line with calendar/library modals. */ /* Mobile cookbook sizing — kept in line with calendar/library modals. */
@media (max-width: 768px) { @media (max-width: 768px) {
.hwfit-serve-row-core .hwfit-context-label {
grid-column: 1 / -1;
}
/* The Speculative control (checkbox + method dropdown + token stepper) /* The Speculative control (checkbox + method dropdown + token stepper)
is too wide for a phone the stepper ran off the right edge of the is too wide for a phone the stepper ran off the right edge of the
modal. Let the group wrap onto its own line, take full width, and modal. Let the group wrap onto its own line, take full width, and
@@ -20562,10 +20892,19 @@ body.gallery-selecting .gallery-dl-btn,
.hwfit-spec-group { .hwfit-spec-group {
flex-wrap: wrap; flex-wrap: wrap;
flex-basis: 100%; flex-basis: 100%;
column-gap: 4px;
row-gap: 4px; row-gap: 4px;
} }
.hwfit-spec-group .hwfit-spec-method { min-width: 0; flex: 1 1 auto; } .hwfit-spec-group .hwfit-spec-method {
min-width: 0;
flex: 0 1 82px;
max-width: 82px;
}
.hwfit-numstep { flex: 0 0 auto; } .hwfit-numstep { flex: 0 0 auto; }
.hwfit-spec-group .hwfit-help-chip-inline {
flex: 0 0 auto;
margin-left: 2px !important;
}
.cookbook-card-title { font-size: 13px; } .cookbook-card-title { font-size: 13px; }
.cookbook-card-desc { font-size: 12px; } .cookbook-card-desc { font-size: 12px; }
.cookbook-field-label { font-size: 12px; } .cookbook-field-label { font-size: 12px; }
@@ -22310,6 +22649,9 @@ body:not(.welcome-ready) #welcome-screen {
transform: scaleY(0.4) translateY(8px); transform: scaleY(0.4) translateY(8px);
} }
} }
@keyframes model-picker-refresh-spin {
to { transform: rotate(360deg); }
}
@keyframes modal-enter { @keyframes modal-enter {
from { from {
@@ -22401,6 +22743,25 @@ body:not(.welcome-ready) #welcome-screen {
} }
/* ── Tasks ── */ /* ── Tasks ── */
#tool-tasks-btn.task-failure-pending,
#rail-tasks.task-failure-pending {
color: var(--red, #f87171);
}
#tool-tasks-btn.task-failure-pending::after,
#rail-tasks.task-failure-pending::after {
content: '';
width: 7px;
height: 7px;
border-radius: 999px;
background: var(--red, #f87171);
box-shadow: 0 0 7px var(--red, #f87171), 0 0 3px var(--red, #f87171);
flex: 0 0 auto;
}
#rail-tasks.task-failure-pending::after {
position: absolute;
right: 8px;
top: 8px;
}
.tasks-modal-content { max-width: 600px; width: min(600px, 92vw); background: var(--bg); font-size: 12px; } .tasks-modal-content { max-width: 600px; width: min(600px, 92vw); background: var(--bg); font-size: 12px; }
/* Tasks tabs reuse the .memory-tab look. The Brain window's tab bar is /* Tasks tabs reuse the .memory-tab look. The Brain window's tab bar is
@@ -22449,6 +22810,14 @@ body:not(.welcome-ready) #welcome-screen {
title still reads in dark mode. Lightness stays adaptive. */ title still reads in dark mode. Lightness stays adaptive. */
color: hsl(var(--cat-hue) 60% 60%); color: hsl(var(--cat-hue) 60% 60%);
} }
.task-log-failed-tag {
color: var(--red, #f87171);
font-size: 10px;
font-weight: 700;
line-height: 1;
white-space: nowrap;
margin-left: -2px;
}
.task-log-task-icon { .task-log-task-icon {
display: inline-flex; display: inline-flex;
align-items: center; align-items: center;
@@ -23152,6 +23521,8 @@ input.settings-select::placeholder { color: color-mix(in srgb, var(--fg) 35%, tr
gap: 6px; gap: 6px;
padding-left: 12px; padding-left: 12px;
border-left: 2px solid color-mix(in srgb, var(--fg) 12%, transparent); border-left: 2px solid color-mix(in srgb, var(--fg) 12%, transparent);
max-width: 100%;
box-sizing: border-box;
} }
.settings-fallback-num { .settings-fallback-num {
font-size: 11px; font-size: 11px;
@@ -23159,7 +23530,24 @@ input.settings-select::placeholder { color: color-mix(in srgb, var(--fg) 35%, tr
min-width: 14px; min-width: 14px;
text-align: right; text-align: right;
} }
.settings-fallback-row .settings-select { flex: 1; min-width: 0; } .settings-fallback-row .settings-select {
width: 0;
min-width: 0;
min-inline-size: 0;
box-sizing: border-box;
overflow: hidden;
text-overflow: ellipsis;
}
.settings-fallback-row .settings-select:first-of-type {
flex: 0 1 128px;
width: 128px;
max-width: 34%;
}
.settings-fallback-row .settings-select:nth-of-type(2) {
flex: 1 1 0;
width: 0;
max-width: 100%;
}
/* Cookbook Serve Advanced fold wraps the rarely-touched tuning rows /* Cookbook Serve Advanced fold wraps the rarely-touched tuning rows
(KV/Attention/Swap/Env for vLLM, llama.cpp batch/cache/split, VRAM (KV/Attention/Swap/Env for vLLM, llama.cpp batch/cache/split, VRAM
monitor, speculative, extra args). Matches the existing .hwfit-panel- monitor, speculative, extra args). Matches the existing .hwfit-panel-
@@ -23240,6 +23628,40 @@ details.hwfit-serve-advanced > .hwfit-serve-row label select,
details.hwfit-serve-advanced > .hwfit-serve-row label input { details.hwfit-serve-advanced > .hwfit-serve-row label input {
margin-top: 1px; margin-top: 1px;
} }
details.hwfit-serve-advanced label:has(.hwfit-sf[data-field="vllm_attn_backend"]) {
position: relative;
left: -83px;
}
details.hwfit-serve-advanced label:has(.hwfit-sf[data-field="vllm_block_size"]) {
position: relative;
left: -51px;
}
details.hwfit-serve-advanced .hwfit-sf[data-field="vllm_block_size"] {
width: calc(100% - 6px);
}
details.hwfit-serve-advanced label:has(.hwfit-sf[data-field="swap"]) {
position: relative;
left: -45px;
}
details.hwfit-serve-advanced label:has(.hwfit-sf[data-field="vllm_kv_cache_dtype"]) {
position: relative;
left: 2px;
}
@media (max-width: 768px) {
details.hwfit-serve-advanced label:has(.hwfit-sf[data-field="vllm_block_size"]) {
left: 1px;
}
details.hwfit-serve-advanced label:has(.hwfit-sf[data-field="swap"]) {
left: -3px;
}
details.hwfit-serve-advanced > .hwfit-serve-checks .hwfit-sf-cb {
flex: 1 1 100%;
}
}
details.hwfit-serve-advanced .hwfit-sf[data-field="vllm_kv_cache_dtype"] {
width: 60px;
min-width: 60px;
}
details.hwfit-serve-advanced > .hwfit-serve-checks { details.hwfit-serve-advanced > .hwfit-serve-checks {
gap: 4px; gap: 4px;
row-gap: 4px; row-gap: 4px;
@@ -23253,6 +23675,9 @@ details.hwfit-serve-advanced > .hwfit-serve-checks.hwfit-backend-sglang,
details.hwfit-serve-advanced > .hwfit-serve-extra { details.hwfit-serve-advanced > .hwfit-serve-extra {
margin-top: -8px; margin-top: -8px;
} }
details.hwfit-serve-advanced > .hwfit-serve-extra {
margin-top: -18px;
}
details.hwfit-serve-advanced > .hwfit-serve-row:last-of-type, details.hwfit-serve-advanced > .hwfit-serve-row:last-of-type,
details.hwfit-serve-advanced > .hwfit-serve-checks:last-of-type { details.hwfit-serve-advanced > .hwfit-serve-checks:last-of-type {
margin-bottom: 0; margin-bottom: 0;
@@ -23260,7 +23685,6 @@ details.hwfit-serve-advanced > .hwfit-serve-checks:last-of-type {
.settings-fallback-remove { .settings-fallback-remove {
flex-shrink: 0; flex-shrink: 0;
margin-right: 4px;
width: 32px; width: 32px;
height: 32px; height: 32px;
display: inline-flex; display: inline-flex;
@@ -29267,8 +29691,15 @@ button .spinner-whirlpool {
.email-reader-atts-wrap > .email-reader-atts { .email-reader-atts-wrap > .email-reader-atts {
border-bottom: none !important; border-bottom: none !important;
} }
.email-reader-atts-wrap.collapsed > .email-reader-atts { display: none; } .email-reader-atts-wrap.collapsed > .email-reader-atts,
.email-reader-atts-wrap.collapsed > .email-reader-atts-hidden-note { display: none; }
.email-reader-atts-wrap.collapsed .email-summary-chevron { transform: rotate(-90deg); } .email-reader-atts-wrap.collapsed .email-summary-chevron { transform: rotate(-90deg); }
.email-reader-atts-hidden-note {
padding: 0 14px 6px;
font-size: 10px;
color: var(--fg-muted);
opacity: 0.65;
}
/* Quote fold = neutral full-width band (matches attachments header). */ /* Quote fold = neutral full-width band (matches attachments header). */
.email-quote-fold { .email-quote-fold {
@@ -30428,22 +30859,27 @@ body.doc-find-active mark.doc-find-mark.current {
attribute tooltips were slow / unreliable, so we just grow the chip. */ attribute tooltips were slow / unreliable, so we just grow the chip. */
max-width: 90vw; max-width: 90vw;
} }
.email-attachment-chip > span:not(.att-size) { .email-attachment-chip > span:not(.att-size):not(.email-attachment-open) {
overflow: hidden; text-overflow: ellipsis; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;
flex: 1 1 auto; min-width: 0; flex: 1 1 auto; min-width: 0;
} }
.email-attachment-chip:hover > span:not(.att-size) { .email-attachment-chip:hover > span:not(.att-size):not(.email-attachment-open) {
overflow: visible; overflow: visible;
text-overflow: clip; text-overflow: clip;
} }
.email-attachment-chip .att-size { opacity: 0.5; font-size: 10px; flex-shrink: 0; } .email-attachment-chip .att-size { opacity: 0.5; font-size: 10px; flex-shrink: 0; }
.email-attachment-chip-muted { opacity: 0.65; }
.email-attachment-chip-muted:hover { opacity: 1; }
.email-attachment-chip-related {
border-color: color-mix(in srgb, var(--accent) 30%, var(--border));
}
/* "Open in editor" launch icon same prominent style on desktop AND mobile /* "Open in editor" launch icon same prominent style on desktop AND mobile
(was 24px / dim / no border on desktop, easy to miss). Accent-tinted (was 24px / dim / no border on desktop, easy to miss). Accent-tinted
background + border makes it read as a real action. */ background + border makes it read as a real action. */
.email-attachment-open { .email-attachment-open {
display: inline-flex; align-items: center; gap: 4px; display: inline-flex; align-items: center; gap: 4px;
height: 22px; padding: 0 9px; border-radius: 11px; height: 22px; padding: 0 9px; border-radius: 999px;
margin-left: 6px; flex-shrink: 0; margin-left: 6px; flex: 0 0 auto;
font-size: 10px; font-weight: 500; letter-spacing: 0.02em; font-size: 10px; font-weight: 500; letter-spacing: 0.02em;
color: var(--accent-primary, var(--red)); color: var(--accent-primary, var(--red));
background: color-mix(in srgb, var(--accent-primary, var(--red)) 10%, transparent); background: color-mix(in srgb, var(--accent-primary, var(--red)) 10%, transparent);
@@ -30466,7 +30902,8 @@ body.doc-find-active mark.doc-find-mark.current {
display: none; display: none;
} }
.email-attachment-chip:not(:hover) .email-attachment-open { .email-attachment-chip:not(:hover) .email-attachment-open {
width: 22px; width: 28px;
min-width: 28px;
padding: 0; padding: 0;
justify-content: center; justify-content: center;
gap: 0; gap: 0;
@@ -35930,6 +36367,13 @@ body.research-panel-view #research-divider { display:none; }
font-size: 14px; font-size: 14px;
font-weight: 600; font-weight: 600;
letter-spacing: -0.03em; letter-spacing: -0.03em;
position: relative;
top: 2px;
}
.research-new-job > .doclib-desc {
position: relative;
top: 4px;
margin-bottom: 6px;
} }
@media (max-width: 600px) { @media (max-width: 600px) {
/* Keep the "Research" title visible on mobile (matches the Cookbook tab /* Keep the "Research" title visible on mobile (matches the Cookbook tab
@@ -36418,6 +36862,7 @@ body.research-panel-view #research-divider { display:none; }
} }
.research-section:not(.collapsed) > .research-section-header { border-bottom: 1px solid var(--border); } .research-section:not(.collapsed) > .research-section-header { border-bottom: 1px solid var(--border); }
.research-section-header:hover { background: color-mix(in srgb, var(--fg) 4%, transparent); } .research-section-header:hover { background: color-mix(in srgb, var(--fg) 4%, transparent); }
.research-section-header:has(.research-library-hint) { padding-bottom: 4px; }
.research-section-title { font-size: 14px; font-weight: 600; letter-spacing: -0.03em; } .research-section-title { font-size: 14px; font-weight: 600; letter-spacing: -0.03em; }
.research-section-chevron { flex-shrink: 0; opacity: 0.55; transition: transform 0.2s ease; } .research-section-chevron { flex-shrink: 0; opacity: 0.55; transition: transform 0.2s ease; }
.research-section.collapsed .research-section-chevron { transform: rotate(-90deg); } .research-section.collapsed .research-section-chevron { transform: rotate(-90deg); }
@@ -37065,7 +37510,8 @@ body.theme-frosted .modal {
.research-library-hint { .research-library-hint {
/* full-width line in the header, pulled up with negative MARGIN (collapses /* full-width line in the header, pulled up with negative MARGIN (collapses
the gap so it moves up without making the header taller). */ the gap so it moves up without making the header taller). */
width: 100%; flex-basis: 100%; margin: -22px 0 0; line-height: 1.2; width: 100%; flex-basis: 100%; margin: -26px 0 0; line-height: 1.2;
font-size: calc(1em - 2px); opacity: 0.55;
} }
.research-library-link { .research-library-link {
background: none; border: none; padding: 0; cursor: pointer; background: none; border: none; padding: 0; cursor: pointer;
+30
View File
@@ -16,6 +16,7 @@ from pathlib import Path
SRC = Path(__file__).resolve().parent.parent / "static/js/cookbook.js" SRC = Path(__file__).resolve().parent.parent / "static/js/cookbook.js"
SERVE_SRC = Path(__file__).resolve().parent.parent / "static/js/cookbookServe.js" SERVE_SRC = Path(__file__).resolve().parent.parent / "static/js/cookbookServe.js"
ROUTES_SRC = Path(__file__).resolve().parent.parent / "routes/cookbook_routes.py"
def test_cpu_only_drops_gpu_only_flags(): def test_cpu_only_drops_gpu_only_flags():
@@ -51,3 +52,32 @@ def test_windows_diffusers_uses_python_not_python3():
assert "const diffusersPy = _isWindows() ? 'python' : _py3Bin;" in text assert "const diffusersPy = _isWindows() ? 'python' : _py3Bin;" in text
assert "cmd += `${diffusersPy} scripts/diffusion_server.py" in text assert "cmd += `${diffusersPy} scripts/diffusion_server.py" in text
assert "cmd += `python3 scripts/diffusion_server.py" not in text assert "cmd += `python3 scripts/diffusion_server.py" not in text
def test_vllm_blank_swap_omits_swap_space_flag():
text = SRC.read_text(encoding="utf-8")
assert "const _swapRaw = (f.swap ?? '').toString().trim().toLowerCase();" in text
assert "['0', 'off', 'none', 'false'].includes(_swapRaw)" in text
assert "if (_swapRaw && !['0', 'off', 'none', 'false'].includes(_swapRaw)) cmd += ` --swap-space ${_swapRaw}`;" in text
def test_serve_preflight_uses_selected_server_not_stale_env_host():
text = SERVE_SRC.read_text(encoding="utf-8")
assert "const _selectedServeTarget = (() => {" in text
assert "const _hostStr = _selectedServeTarget.host || '';" in text
assert "(t.remoteHost || '') === _hostStr" in text
assert "const _probeHost = (_selectedServeTarget.host || '').trim();" in text
assert "const _portHost = (_selectedServeTarget.host || '').trim();" in text
def test_vllm_route_strips_swap_space_when_runtime_rejects_it():
text = ROUTES_SRC.read_text(encoding="utf-8")
assert "Removing --swap-space 0; off is represented by omitting the vLLM flag." in text
assert "vLLM serve does not support --swap-space; removing it" in text
assert "ODYSSEUS_VLLM_HELP_CMD" in text
assert "print(shlex.join(parts[:serve_i + 1] + [\"--help\"]))" in text
assert "eval \"$ODYSSEUS_VLLM_HELP_CMD\" 2>&1 | grep -q -- \"--swap-space\"" in text
assert "eval \"$ODYSSEUS_SERVE_CMD\"" in text
@@ -126,6 +126,27 @@ def test_plain_reply_copy_text_is_unchanged(node_available):
assert out["content"] == raw assert out["content"] == raw
def test_minimax_namespaced_thinking_is_extracted(node_available):
raw = (
'<mm:think>The user said "idk" - just casual.</mm:think>'
"Haha fair. Well, I'm here whenever you figure it out."
)
out = _extract_thinking_blocks(raw)
assert out["thinkingBlocks"] == ['The user said "idk" - just casual.']
assert out["content"] == "Haha fair. Well, I'm here whenever you figure it out."
assert "mm:think" not in out["content"]
def test_minimax_orphan_closing_tag_drops_leaked_reasoning(node_available):
raw = "</mm:think>Hi! What can I do for you?"
out = _extract_thinking_blocks(raw)
assert out["thinkingBlocks"] == []
assert out["content"] == "Hi! What can I do for you?"
assert "mm:think" not in out["content"]
def test_thinking_only_message_yields_empty_content(node_available): def test_thinking_only_message_yields_empty_content(node_available):
# The copy handler falls back to the raw text in this case so the button # The copy handler falls back to the raw text in this case so the button
# still copies something for turns interrupted mid-thinking. # still copies something for turns interrupted mid-thinking.