Merge pull request #4701 from pewdiepie-archdaemon/sync-dev-from-main-20260622

chore(dev): sync main cookbook and model workflow fixes
This commit is contained in:
PewDiePie
2026-06-22 11:52:26 +09:00
committed by GitHub
65 changed files with 6072 additions and 846 deletions
+17
View File
@@ -20,6 +20,23 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
gosu \
&& rm -rf /var/lib/apt/lists/*
# Docker CLI (client only — daemon stays on the host via the
# /var/run/docker.sock mount). The Debian `docker.io` package ships
# dockerd but not the client binary on slim, so grab the static client
# tarball from download.docker.com instead.
ARG DOCKER_CLI_VERSION=27.5.1
RUN ARCH="$(dpkg --print-architecture)" \
&& case "$ARCH" in \
amd64) DARCH=x86_64 ;; \
arm64) DARCH=aarch64 ;; \
*) echo "unsupported arch $ARCH"; exit 1 ;; \
esac \
&& curl -fsSL "https://download.docker.com/linux/static/stable/${DARCH}/docker-${DOCKER_CLI_VERSION}.tgz" \
-o /tmp/docker.tgz \
&& tar -xzf /tmp/docker.tgz -C /tmp \
&& install -m 0755 /tmp/docker/docker /usr/local/bin/docker \
&& rm -rf /tmp/docker /tmp/docker.tgz
WORKDIR /app
# Install Python deps first (layer cache). Optional extras (PyMuPDF AGPL, etc.)
+9
View File
@@ -28,6 +28,14 @@ services:
# land under /app/.local for the odysseus user. Persist them so a
# container recreate does not silently remove installed serve engines.
- ${APP_DATA_DIR:-./data}/local:/app/.local:z
# Docker socket — lets Cookbook launch commands like
# `docker exec ollama-rocm ollama show <tag>` reach the host's
# Docker daemon (and sibling containers like ollama-rocm /
# ollama-test). The in-container user needs to be in the
# socket's owning group — see `group_add` below; the GID
# there must match the host's `docker` group (defaults to 963
# on Debian, 999 on Ubuntu — override via env if yours differs).
- /var/run/docker.sock:/var/run/docker.sock
extra_hosts:
# Lets the container reach local services on the Docker host, including
# Ollama at http://host.docker.internal:11434.
@@ -93,6 +101,7 @@ services:
- /dev/kfd
- /dev/dri
group_add:
- "${DOCKER_GID:-963}"
- video
- ${RENDER_GID:-render}
+10
View File
@@ -27,6 +27,16 @@ services:
# land under /app/.local for the odysseus user. Persist them so a
# container recreate does not silently remove installed serve engines.
- ${APP_DATA_DIR:-./data}/local:/app/.local:z
# Docker socket — lets Cookbook launch commands like
# `docker exec ollama-rocm ollama show <tag>` reach the host's
# Docker daemon (and sibling containers like ollama-rocm /
# ollama-test). The in-container user needs to be in the
# socket's owning group — see `group_add` below; the GID
# there must match the host's `docker` group (defaults to 963
# on Debian, 999 on Ubuntu — override via env if yours differs).
- /var/run/docker.sock:/var/run/docker.sock
group_add:
- "${DOCKER_GID:-963}"
extra_hosts:
# Lets the container reach local services on the Docker host, including
# Ollama at http://host.docker.internal:11434.
+10
View File
@@ -16,6 +16,16 @@ services:
# land under /app/.local for the odysseus user. Persist them so a
# container recreate does not silently remove installed serve engines.
- ${APP_DATA_DIR:-./data}/local:/app/.local:z
# Docker socket — lets Cookbook launch commands like
# `docker exec ollama-rocm ollama show <tag>` reach the host's
# Docker daemon (and sibling containers like ollama-rocm /
# ollama-test). The in-container user needs to be in the
# socket's owning group — see `group_add` below; the GID
# there must match the host's `docker` group (defaults to 963
# on Debian, 999 on Ubuntu — override via env if yours differs).
- /var/run/docker.sock:/var/run/docker.sock
group_add:
- "${DOCKER_GID:-963}"
extra_hosts:
# Lets the container reach local services on the Docker host, including
# Ollama at http://host.docker.internal:11434.
+24 -2
View File
@@ -26,6 +26,27 @@ if ! getent passwd "$PUID" >/dev/null 2>&1; then
useradd -u "$PUID" -g "$PGID" -M -s /bin/sh -d /app odysseus
fi
ODY_USER="$(getent passwd "$PUID" | cut -d: -f1)"
[ -z "$ODY_USER" ] && ODY_USER=odysseus
# Docker-socket group plumbing. When /var/run/docker.sock is bind-mounted
# (Cookbook uses docker exec to reach sibling containers), the socket is
# owned by root:<host docker gid>. Add the app user to that group and later
# call gosu by username so supplementary groups are retained.
DOCKER_SOCK="${DOCKER_SOCK:-/var/run/docker.sock}"
if [ -S "$DOCKER_SOCK" ]; then
SOCK_GID="$(stat -c '%g' "$DOCKER_SOCK" 2>/dev/null || echo '')"
if [ -n "$SOCK_GID" ] && [ "$SOCK_GID" != "0" ]; then
if ! getent group "$SOCK_GID" >/dev/null 2>&1; then
groupadd -g "$SOCK_GID" docker_host || true
fi
SOCK_GROUP="$(getent group "$SOCK_GID" | cut -d: -f1)"
if [ -n "$SOCK_GROUP" ]; then
usermod -aG "$SOCK_GROUP" "$ODY_USER" 2>/dev/null || true
fi
fi
fi
mount_root_for() {
awk -v target="$1" '$5 == target { print $4; exit }' /proc/self/mountinfo 2>/dev/null || true
}
@@ -103,6 +124,7 @@ for cu in \
break
fi
done
# Disable the FlashInfer JIT sampler unconditionally — it is sampler-only
# and has no impact on the attention path, but requires nvcc + matching
# CUDA headers at startup. Without this, vLLM crashes with "Could not find
@@ -116,9 +138,9 @@ export PATH="/app/.local/bin:$PATH"
# Run first-time setup as the app user so data/ files get the right ownership.
# setup.py is idempotent — skips auth.json / .env if they already exist.
# || true so a setup failure never prevents the container from starting.
"$GOSU_BIN" "$PUID:$PGID" "$PYTHON_BIN" /app/setup.py || true
"$GOSU_BIN" "$ODY_USER" "$PYTHON_BIN" /app/setup.py || true
# Drop root and run the actual app. `gosu` is preferred over `su` /
# `sudo` because it cleans up the process tree (no extra shell layer)
# so signals (SIGTERM from `docker stop`) reach uvicorn directly.
exec "$GOSU_BIN" "$PUID:$PGID" "$@"
exec "$GOSU_BIN" "$ODY_USER" "$@"
+33 -4
View File
@@ -22,6 +22,31 @@ from fastapi import HTTPException
logger = logging.getLogger(__name__)
_CASUAL_OPENING_RE = re.compile(
r"^\s*(?:h+i+|hey+|hello+|yo+|sup+|what'?s up|wass?up|hiya|howdy|"
r"lol|lmao|haha+|hehe+|thanks?|thank you|ty|idk|dunno|meh|bruh|bro)\b(?P<tail>.*)$",
re.IGNORECASE,
)
_CASUAL_BLOCKLIST_RE = re.compile(
r"\b(?:cookbook|serve|serving|launch|start|vllm|sglang|llama\.?cpp|ollama|"
r"download|model|email|document|doc|note|calendar|task|search|web|research|"
r"file|folder|repo|git|settings?|endpoint|api|token|mcp)\b",
re.IGNORECASE,
)
def _is_casual_low_signal(text: str) -> bool:
"""Short greetings/slang should not pull memory, skills, RAG, or docs."""
s = str(text or "").strip()
m = _CASUAL_OPENING_RE.match(s)
if not m:
return False
tail = m.group("tail") or ""
if _CASUAL_BLOCKLIST_RE.search(tail):
return False
tail_words = re.findall(r"[A-Za-z0-9_'-]+", tail)
return len(tail_words) <= 2
# Strong references to in-flight fire-and-forget tasks scheduled from this
# module. asyncio only keeps weak references to tasks created via
@@ -588,6 +613,7 @@ async def build_chat_context(
# bearer-token chat requests use the token owner instead of the "api" sentinel.
user = effective_user(request)
uprefs = load_prefs_for_user(user)
casual_low_signal = _is_casual_low_signal(message)
# Memory enabled?
mem_enabled = not incognito and not no_memory and uprefs.get("memory_enabled", True)
@@ -597,6 +623,9 @@ async def build_chat_context(
if not allow_tool_preprocessing:
mem_enabled = False
skills_enabled = False
if casual_low_signal:
mem_enabled = False
skills_enabled = False
logger.debug(
"Memory enabled=%s for user=%s (incognito=%s, no_memory=%s, pref=%s)",
mem_enabled, user, incognito, no_memory, uprefs.get("memory_enabled", "NOT_SET"),
@@ -612,11 +641,11 @@ async def build_chat_context(
# Use RAG?
use_rag_val = (str(use_rag).lower() != "false") if use_rag is not None else True
if incognito or not allow_tool_preprocessing or is_research_spinoff:
if incognito or not allow_tool_preprocessing or is_research_spinoff or casual_low_signal:
use_rag_val = False
# If pre-fetched search context was provided (compare mode), skip live web search
skip_web = bool(search_context) or not allow_tool_preprocessing
skip_web = bool(search_context) or not allow_tool_preprocessing or casual_low_signal
# Build context preface
# The stream path uses enhanced_message (with CoT/preprocessing applied),
@@ -635,7 +664,7 @@ async def build_chat_context(
incognito=incognito,
use_skills=skills_enabled,
)
if use_rag is not None or is_research_spinoff:
if use_rag is not None or is_research_spinoff or casual_low_signal:
_preface_kwargs["use_rag"] = use_rag_val
preface, rag_sources, web_sources = chat_processor.build_context_preface(**_preface_kwargs)
@@ -643,7 +672,7 @@ async def build_chat_context(
used_memories = getattr(chat_processor, '_last_used_memories', [])
# Inject pre-fetched search context (compare mode)
if search_context and allow_tool_preprocessing:
if search_context and allow_tool_preprocessing and not casual_low_signal:
preface.append(untrusted_context_message("prefetched search context", search_context))
# YouTube transcripts
+10 -1
View File
@@ -829,7 +829,11 @@ def setup_chat_routes(
from src.settings import get_setting
_global_disabled = get_setting("disabled_tools", [])
if _global_disabled and isinstance(_global_disabled, list):
disabled_tools.update(_global_disabled)
explicit_web_allowed = allow_web_search is not None and str(allow_web_search).lower() == "true"
if explicit_web_allowed:
disabled_tools.update(t for t in _global_disabled if t not in {"web_search", "web_fetch"})
else:
disabled_tools.update(_global_disabled)
# Light auto-escalation: the user is in chat mode and just expressed a
# notes/calendar/email intent. Grant the relevant managers but withhold
@@ -1259,6 +1263,10 @@ def setup_chat_routes(
_max_rounds = _DEFAULT_ROUNDS
_max_rounds = max(1, min(_max_rounds, 200))
_forced_tools = None
if allow_web_search is not None and str(allow_web_search).lower() == "true":
_forced_tools = {"web_search", "web_fetch"}
async for chunk in stream_agent_loop(
sess.endpoint_url,
sess.model,
@@ -1280,6 +1288,7 @@ def setup_chat_routes(
plan_mode=plan_mode,
approved_plan=approved_plan or None,
workspace=workspace or None,
forced_tools=_forced_tools,
):
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
try:
+179 -17
View File
@@ -786,25 +786,149 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
to hard-wire CUDA on Linux. That made ROCm hosts attempt a CUDA configure and
fail with "CUDA Toolkit not found" instead of building with HIP.
"""
# Try a prebuilt binary from llama.cpp's GitHub releases FIRST — no
# cmake/build-essential/git/CUDA-headers needed at all. The from-source
# build below stays as a fallback (custom flags, esoteric arch, no
# internet, etc). 30 seconds vs 5+ minutes of compile, and removes
# every OS-package dep from the launch path. Sets _odysseus_have_prebuilt=1
# on success; the existing build-tier if/elif chain below is gated on
# that variable so we never compile twice or shadow the prebuilt symlink.
runner_lines.append(' _odysseus_have_prebuilt=""')
runner_lines.append(' _odysseus_arch="$(uname -m)"')
runner_lines.append(' _odysseus_prebuilt_url=""')
runner_lines.append(' if command -v curl >/dev/null 2>&1 && [ "$_odysseus_arch" = "x86_64" ]; then')
runner_lines.append(' _odysseus_pat=""')
runner_lines.append(' _odysseus_has_nv_inline() { command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi -L 2>/dev/null | grep -q "GPU "; }')
runner_lines.append(' _odysseus_has_vk_inline() { ldconfig -p 2>/dev/null | grep -q "libvulkan\\.so" || command -v vulkaninfo >/dev/null 2>&1 || [ -e /usr/lib/x86_64-linux-gnu/libvulkan.so.1 ]; }')
runner_lines.append(' _odysseus_has_vkdev_inline() { ls /dev/dri/renderD* >/dev/null 2>&1 || (lspci 2>/dev/null | grep -Ei \'VGA|3D|Display\' | grep -Eiq \'AMD|ATI|Radeon\'); }')
runner_lines.append(' if _odysseus_has_nv_inline; then')
runner_lines.append(' _odysseus_pat="ubuntu.*cuda"')
runner_lines.append(' elif _odysseus_has_vkdev_inline && _odysseus_has_vk_inline; then')
runner_lines.append(' _odysseus_pat="ubuntu.*vulkan"')
runner_lines.append(' else')
runner_lines.append(' _odysseus_pat="ubuntu-x64\\\\.zip"')
runner_lines.append(' fi')
runner_lines.append(' _odysseus_prebuilt_url="$(curl -fsSL --max-time 15 https://api.github.com/repos/ggml-org/llama.cpp/releases/latest 2>/dev/null | grep \'"browser_download_url"\' | cut -d\'"\' -f4 | grep -iE "$_odysseus_pat" | grep -iv "arm\\|aarch64" | head -1)"')
runner_lines.append(' fi')
# Accept any of unzip / bsdtar / python3 -m zipfile as the extractor.
# python3 is essentially always present on modern Linux, so this lets
# the prebuilt path work on minimal Ubuntu installs that lack `unzip`.
runner_lines.append(' if [ -n "$_odysseus_prebuilt_url" ] && (command -v unzip >/dev/null 2>&1 || command -v bsdtar >/dev/null 2>&1 || command -v python3 >/dev/null 2>&1); then')
runner_lines.append(' echo "[odysseus] Found prebuilt llama-server: $_odysseus_prebuilt_url"')
runner_lines.append(' mkdir -p ~/bin "$HOME/.cache/odysseus/llama-cpp-prebuilt" && cd "$HOME/.cache/odysseus/llama-cpp-prebuilt"')
runner_lines.append(' rm -f llama-cpp.zip')
runner_lines.append(' if curl -fsSL --max-time 120 "$_odysseus_prebuilt_url" -o llama-cpp.zip && [ -s llama-cpp.zip ]; then')
runner_lines.append(' rm -rf build && mkdir -p build')
runner_lines.append(' if command -v unzip >/dev/null 2>&1; then unzip -qq -o llama-cpp.zip -d build; elif command -v bsdtar >/dev/null 2>&1; then bsdtar -xf llama-cpp.zip -C build; else python3 -c "import zipfile; zipfile.ZipFile(\\"llama-cpp.zip\\").extractall(\\"build\\")"; fi')
runner_lines.append(' _odysseus_extracted="$(find build -type f -name llama-server 2>/dev/null | head -1)"')
runner_lines.append(' if [ -n "$_odysseus_extracted" ]; then')
runner_lines.append(' chmod +x "$_odysseus_extracted"')
runner_lines.append(' ln -sf "$_odysseus_extracted" ~/bin/llama-server')
runner_lines.append(' _odysseus_libdir="$(dirname "$_odysseus_extracted")"')
runner_lines.append(' mkdir -p ~/.config && echo "export LD_LIBRARY_PATH=\\"$_odysseus_libdir:\\${LD_LIBRARY_PATH:-}\\"" > ~/.config/odysseus-llama-cpp-env')
runner_lines.append(' _odysseus_have_prebuilt=1')
runner_lines.append(' echo "[odysseus] Prebuilt llama-server installed at $_odysseus_extracted"')
runner_lines.append(' fi')
runner_lines.append(' fi')
runner_lines.append(' [ -z "$_odysseus_have_prebuilt" ] && echo "[odysseus] Prebuilt download/extract failed — falling back to from-source build."')
runner_lines.append(' elif [ -z "$_odysseus_prebuilt_url" ]; then')
runner_lines.append(' echo "[odysseus] No matching prebuilt llama-server for this host (arch=$_odysseus_arch) — will build from source."')
runner_lines.append(' fi')
runner_lines.append(' if [ -z "$_odysseus_have_prebuilt" ]; then')
# Detect pip-installed nvcc (from vLLM/nvidia CUDA wheels) and put it on PATH
# so cmake's CUDA configure can find it. We keep this after the ROCm/HIP
# check — a machine with both stacks should honor the native HIP toolchain on
# AMD hosts instead of accidentally preferring a stray nvcc wheel.
runner_lines.append(' for _cudir in ~/.local/lib/python*/site-packages/nvidia/cu13 ~/.local/lib/python*/site-packages/nvidia/cu12 ~/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do')
runner_lines.append(' [ -x "$_cudir/bin/nvcc" ] && export CUDA_HOME="$_cudir" && export PATH="$_cudir/bin:$PATH" && break')
runner_lines.append(' done')
# so cmake's CUDA configure can find it — BUT only when actual NVIDIA
# hardware is present. On AMD/Intel hosts the pip nvcc is a misleading
# leftover (no libcudart, no GPU it could target) and would otherwise
# send the build down the CUDA branch and fail with "CUDA Toolkit not
# found" instead of trying Vulkan.
runner_lines.append(' _odysseus_has_nvidia_hw() {')
runner_lines.append(' command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi -L 2>/dev/null | grep -q "GPU " && return 0')
runner_lines.append(' ls /dev/nvidia* >/dev/null 2>&1 && return 0')
runner_lines.append(' lspci 2>/dev/null | grep -iE \'VGA|3D|Display\' | grep -iq nvidia && return 0')
runner_lines.append(' return 1')
runner_lines.append(' }')
runner_lines.append(' if _odysseus_has_nvidia_hw; then')
runner_lines.append(' for _cudir in ~/.local/lib/python*/site-packages/nvidia/cu13 ~/.local/lib/python*/site-packages/nvidia/cu12 ~/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do')
runner_lines.append(' [ -x "$_cudir/bin/nvcc" ] && export CUDA_HOME="$_cudir" && export PATH="$_cudir/bin:$PATH" && break')
runner_lines.append(' done')
runner_lines.append(' fi')
# rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a failed CUDA
# or HIP attempt) doesn't cause the next configure to reuse stale settings.
runner_lines.append(' mkdir -p ~/bin')
runner_lines.append(' cd ~/llama.cpp && rm -rf build')
# Try to install cmake / build-essential / git automatically before the
# build, but ONLY via passwordless sudo (`sudo -n`) — interactive sudo
# would hang a tmux-backgrounded serve task waiting for a password. If
# sudo asks for a password the install is skipped silently and the
# diagnosis pattern (cookbook_routes.py / cookbook_helpers.py) surfaces
# an explicit "install cmake" suggestion in the Cookbook diagnosis
# toolbar after the inevitable build failure.
runner_lines.append(' _odysseus_apt_bootstrap() {')
runner_lines.append(' local _missing=""')
runner_lines.append(' command -v cmake >/dev/null 2>&1 || _missing="$_missing cmake"')
runner_lines.append(' command -v g++ >/dev/null 2>&1 || command -v gcc >/dev/null 2>&1 || _missing="$_missing build-essential"')
runner_lines.append(' command -v git >/dev/null 2>&1 || _missing="$_missing git"')
runner_lines.append(' [ -z "$_missing" ] && return 0')
runner_lines.append(' if command -v apt-get >/dev/null 2>&1 && sudo -n true 2>/dev/null; then')
runner_lines.append(' echo "[odysseus] Auto-installing missing build deps via apt:$_missing"')
runner_lines.append(' sudo -n env DEBIAN_FRONTEND=noninteractive apt-get update -qq 2>&1 | tail -3')
runner_lines.append(' sudo -n env DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends $_missing 2>&1 | tail -5 || true')
runner_lines.append(' elif command -v pacman >/dev/null 2>&1 && sudo -n true 2>/dev/null; then')
runner_lines.append(' echo "[odysseus] Auto-installing missing build deps via pacman:$_missing"')
runner_lines.append(' local _pacpkgs="$(echo "$_missing" | sed -e \'s/build-essential/base-devel/g\')"')
runner_lines.append(' sudo -n pacman -Sy --needed --noconfirm $_pacpkgs 2>&1 | tail -5 || true')
runner_lines.append(' elif command -v dnf >/dev/null 2>&1 && sudo -n true 2>/dev/null; then')
runner_lines.append(' echo "[odysseus] Auto-installing missing build deps via dnf:$_missing"')
runner_lines.append(' local _dnfpkgs="$(echo "$_missing" | sed -e \'s/build-essential/gcc gcc-c++ make/g\')"')
runner_lines.append(' sudo -n dnf install -y $_dnfpkgs 2>&1 | tail -5 || true')
runner_lines.append(' else')
runner_lines.append(' echo "[odysseus] WARNING: missing build deps ($_missing) — passwordless sudo is unavailable, cannot auto-install. Cookbook Diagnosis will explain the fix after the build fails."')
runner_lines.append(' fi')
runner_lines.append(' }')
runner_lines.append(' _odysseus_apt_bootstrap')
runner_lines.append(' _odysseus_missing_build_deps=""')
runner_lines.append(' command -v cmake >/dev/null 2>&1 || _odysseus_missing_build_deps="$_odysseus_missing_build_deps cmake"')
runner_lines.append(' command -v git >/dev/null 2>&1 || _odysseus_missing_build_deps="$_odysseus_missing_build_deps git"')
runner_lines.append(' command -v g++ >/dev/null 2>&1 || command -v gcc >/dev/null 2>&1 || _odysseus_missing_build_deps="$_odysseus_missing_build_deps build-essential"')
runner_lines.append(' if [ -n "$_odysseus_missing_build_deps" ]; then')
runner_lines.append(' echo "ERROR: llama.cpp source build needs missing packages:$_odysseus_missing_build_deps"')
runner_lines.append(' if command -v apt-get >/dev/null 2>&1; then')
runner_lines.append(' echo "Install on this host: sudo apt-get update && sudo apt-get install -y cmake build-essential git"')
runner_lines.append(' elif command -v pacman >/dev/null 2>&1; then')
runner_lines.append(' echo "Install on this host: sudo pacman -Sy --needed cmake base-devel git"')
runner_lines.append(' elif command -v dnf >/dev/null 2>&1; then')
runner_lines.append(' echo "Install on this host: sudo dnf install -y cmake gcc gcc-c++ make git"')
runner_lines.append(' fi')
runner_lines.append(' echo "Alternative: install a native llama-server on PATH, then relaunch."')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append(' fi')
runner_lines.append(' cd ~/llama.cpp')
runner_lines.append(' _odysseus_has_vulkan() {')
runner_lines.append(' ldconfig -p 2>/dev/null | grep -q \'libvulkan\\.so\' && return 0')
runner_lines.append(' [ -e /usr/lib/libvulkan.so.1 ] && return 0')
runner_lines.append(' [ -e /usr/lib/x86_64-linux-gnu/libvulkan.so.1 ] && return 0')
runner_lines.append(' command -v vulkaninfo >/dev/null 2>&1 && return 0')
runner_lines.append(' return 1')
runner_lines.append(' }')
runner_lines.append(' _odysseus_has_vulkan_device() {')
runner_lines.append(' ls /dev/dri/renderD* >/dev/null 2>&1 && return 0')
runner_lines.append(' lspci 2>/dev/null | grep -Ei \'VGA|3D|Display\' | grep -Eiq \'AMD|ATI|Radeon\' && return 0')
runner_lines.append(' return 1')
runner_lines.append(' }')
# Backend preference: native ROCm/HIP > native CUDA > Vulkan > CPU.
# Vulkan is a portable fallback that works on AMD when ROCm isn't
# installed (e.g. Strix Halo) and on any vendor's discrete GPU, but
# it's ~30-40% slower than native HIP/CUDA for LLM inference — only
# pick it when no native toolchain is present.
runner_lines.append(' if command -v hipconfig &>/dev/null || [ -d /opt/rocm ] || [ -n "$ROCM_PATH" ] || [ -n "$HIP_PATH" ]; then')
runner_lines.append(' rm -rf build')
runner_lines.append(' if command -v hipconfig &>/dev/null; then')
runner_lines.append(' export HIPCXX="${HIPCXX:-$(hipconfig -l)/clang}"')
runner_lines.append(' export HIP_PATH="${HIP_PATH:-$(hipconfig -R)}"')
runner_lines.append(' fi')
runner_lines.append(' echo "[odysseus] ROCm/HIP detected — building llama-server with HIP support..."')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_HIP=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' elif command -v nvcc &>/dev/null; then')
runner_lines.append(' elif command -v nvcc &>/dev/null && _odysseus_has_nvidia_hw; then')
runner_lines.append(' rm -rf build')
# nvcc alone is not sufficient — pip-installed CUDA wheels or incomplete
# tooling can expose nvcc without shipping libcudart, causing cmake to fail
# mid-build with "CUDA runtime library not found". Check cudart explicitly
@@ -828,31 +952,50 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
runner_lines.append(' echo "[odysseus] Ensure libcudart is installed (e.g. cuda-runtime package) and visible via ldconfig or CUDA_HOME."')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' fi')
runner_lines.append(' elif _odysseus_has_vulkan_device && _odysseus_has_vulkan; then')
runner_lines.append(' echo "[odysseus] Vulkan-capable GPU detected (no ROCm/CUDA toolchain installed) — building llama-server with Vulkan support..."')
runner_lines.append(' rm -rf build-vulkan')
runner_lines.append(' cmake -B build-vulkan -DCMAKE_BUILD_TYPE=Release -DGGML_VULKAN=ON && cmake --build build-vulkan -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build-vulkan/bin/llama-server ~/bin/llama-server')
runner_lines.append(' else')
runner_lines.append(' echo "[odysseus] WARNING: no HIP/CUDA toolchain found — building llama-server for CPU only."')
runner_lines.append(' echo "[odysseus] WARNING: no HIP/CUDA/Vulkan toolchain found — building llama-server for CPU only."')
runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."')
runner_lines.append(' echo "[odysseus] Install ROCm for AMD GPUs or vLLM/CUDA tooling for NVIDIA, then re-launch this serve task."')
runner_lines.append(' echo "[odysseus] Install Vulkan (libvulkan-dev) / ROCm for AMD GPUs or CUDA tooling for NVIDIA, then re-launch this serve task."')
runner_lines.append(' rm -rf build')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' fi')
runner_lines.append(' fi # end _odysseus_have_prebuilt guard')
def _llama_cpp_rebuild_cmd() -> str:
def _llama_cpp_rebuild_cmd(update_source: bool = False) -> str:
"""Shell command that clears the Cookbook-managed llama.cpp build.
Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build``
Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build*``
directory so the next llama.cpp serve recompiles from source, picking up a
CUDA or HIP toolchain if one is now available. The serve bootstrap only
builds when ``llama-server`` is missing from PATH, so without this an
existing CPU-only build is reused forever. It deliberately installs and
downloads nothing; the rebuild itself happens on the next serve.
existing CPU-only build is reused forever. When ``update_source`` is true,
the command also fast-forwards the Cookbook-managed ``~/llama.cpp`` checkout
if it exists. The rebuild itself happens on the next serve.
"""
update_cmd = ''
if update_source:
update_cmd = (
'if [ -d "$HOME/llama.cpp/.git" ]; then '
'git -C "$HOME/llama.cpp" pull --ff-only --depth 1 || '
'echo "[odysseus] WARNING: llama.cpp source update failed; clearing cached build anyway."; '
'elif command -v git >/dev/null 2>&1; then '
'git clone --depth 1 https://github.com/ggml-org/llama.cpp "$HOME/llama.cpp" || '
'echo "[odysseus] WARNING: llama.cpp clone failed; clearing cached build anyway."; '
'fi && '
)
return (
'mkdir -p "$HOME/bin" && '
f'{update_cmd}'
'rm -f "$HOME/bin/llama-server" && '
'rm -rf "$HOME/llama.cpp/build" && '
'rm -rf "$HOME/llama.cpp/build" "$HOME/llama.cpp/build-vulkan" && '
'echo "[odysseus] Cleared the cached llama.cpp build. '
'Re-launch the serve task to rebuild llama-server from source '
'(CUDA or HIP will be used if a toolchain is now available)."'
'(Vulkan, HIP, or CUDA will be used if a matching toolchain is now available)."'
)
@@ -1115,8 +1258,27 @@ def _diagnose_serve_output(text: str) -> dict | None:
"SGLang is not installed or not in PATH on this server.",
[{"label": "install SGLang in Cookbook Dependencies", "op": "dependency", "package": "sglang[all]"}],
),
# System build deps come BEFORE the generic llama.cpp catch-all so
# cmake / build-essential / git missing → a specific OS-package
# remediation instead of "install llama-cpp-python[server]" (which
# itself fails to compile when cmake is absent).
(
r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'|git: command not found|cmake: command not found",
r"cmake: command not found|cmake.*not found.*[Cc]ould not",
"cmake is required to build llama.cpp from source but isn't installed on this server.",
[{"label": "install build deps for llama.cpp (apt: cmake build-essential git / pacman: cmake base-devel git / dnf: cmake gcc-c++ make git / brew: cmake git)", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
(
r"^(make|g\+\+|gcc): command not found|Could not find C\+\+ compiler",
"A C/C++ compiler (build-essential) is required to build llama.cpp from source.",
[{"label": "install build deps for llama.cpp on this server", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
(
r"^git: command not found",
"git is required to clone the llama.cpp source tree.",
[{"label": "install build deps for llama.cpp on this server", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
(
r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'",
"llama.cpp / llama-cpp-python dependencies are missing.",
[{"label": "install llama.cpp dependencies or llama-cpp-python[server]", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
+335 -16
View File
@@ -189,8 +189,27 @@ def setup_cookbook_routes() -> APIRouter:
"SGLang is not installed or not in PATH on this server.",
[{"label": "install SGLang in Cookbook Dependencies", "op": "dependency", "package": "sglang[all]"}],
),
# System build deps come BEFORE the generic llama.cpp catch-all
# so cmake / build-essential / git missing → a specific OS-package
# remediation instead of "install llama-cpp-python[server]" (which
# itself fails to compile when cmake is absent).
(
r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'|git: command not found|cmake: command not found",
r"cmake: command not found|cmake.*not found.*[Cc]ould not",
"cmake is required to build llama.cpp from source but isn't installed on this server.",
[{"label": "install build deps for llama.cpp (apt: cmake build-essential git / pacman: cmake base-devel git / dnf: cmake gcc-c++ make git / brew: cmake git)", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
(
r"^(make|g\+\+|gcc): command not found|Could not find C\+\+ compiler",
"A C/C++ compiler (build-essential) is required to build llama.cpp from source.",
[{"label": "install build deps for llama.cpp on this server", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
(
r"^git: command not found",
"git is required to clone the llama.cpp source tree.",
[{"label": "install build deps for llama.cpp on this server", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
(
r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'",
"llama.cpp / llama-cpp-python dependencies are missing.",
[{"label": "install llama.cpp dependencies or llama-cpp-python[server]", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
@@ -254,6 +273,79 @@ def setup_cookbook_routes() -> APIRouter:
def _load_stored_hf_token() -> str:
return load_stored_hf_token(state_path=_cookbook_state_path)
def _normalize_minimax_m3_vllm_cmd(cmd: str) -> str:
"""Patch MiniMax M3 vLLM launches into the known-good local form.
The browser form can be stale or omit advanced-only fields. MiniMax M3
is sensitive to several flags: using the HF repo id with block-size 128
fails KV-cache setup, and FlashInfer sampler JIT fails on this host's
system nvcc. Normalize server-side before writing the tmux runner.
"""
cmd_lower = (cmd or "").lower()
if not cmd or "vllm serve" not in cmd_lower or "minimax" not in cmd_lower or "m3" not in cmd_lower:
return cmd
try:
parts = shlex.split(cmd)
except ValueError:
return cmd
if "serve" not in parts:
return cmd
env_re = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=")
env_parts = [p for p in parts if env_re.match(p)]
body = [p for p in parts if not env_re.match(p)]
try:
serve_i = body.index("serve")
except ValueError:
return cmd
if serve_i + 1 >= len(body):
return cmd
repo_id = "cyankiwi/MiniMax-M3-AWQ-INT4"
snapshot = (
"/home/pewds/.cache/huggingface/hub/"
"models--cyankiwi--MiniMax-M3-AWQ-INT4/"
"snapshots/4082acbbec1236d21828d55b6bb0fe02ade4ab5b"
)
if body[serve_i + 1] == repo_id:
body[serve_i + 1] = snapshot
def add_env(key: str, value: str) -> None:
if not any(p.startswith(f"{key}=") for p in env_parts):
env_parts.append(f"{key}={value}")
def has_flag(flag: str) -> bool:
return any(p == flag or p.startswith(flag + "=") for p in body)
def set_flag(flag: str, value: str) -> None:
for i, part in enumerate(body):
if part == flag:
if i + 1 < len(body):
body[i + 1] = value
else:
body.append(value)
return
if part.startswith(flag + "="):
body[i] = f"{flag}={value}"
return
body.extend([flag, value])
def add_bool(flag: str) -> None:
if not has_flag(flag):
body.append(flag)
add_env("VLLM_TARGET_DEVICE", "cuda")
add_env("VLLM_USE_FLASHINFER_SAMPLER", "0")
set_flag("--served-model-name", repo_id)
set_flag("--tool-call-parser", "minimax_m3")
set_flag("--reasoning-parser", "minimax_m3")
set_flag("--attention-backend", "TRITON_ATTN")
set_flag("--block-size", "128")
add_bool("--language-model-only")
add_bool("--disable-custom-all-reduce")
add_bool("--enable-expert-parallel")
return shlex.join(env_parts + body)
def _cookbook_ssh_dir() -> Path:
# The Docker image keeps cookbook keys under /app/.ssh; that path only
# exists inside the container. On Windows (and any non-container host)
@@ -1230,6 +1322,7 @@ def setup_cookbook_routes() -> APIRouter:
# `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400).
req.cmd = _validate_serve_cmd(req.cmd) or ""
req.cmd = _normalize_llama_cpp_python_cache_types(req.cmd) or ""
req.cmd = _normalize_minimax_m3_vllm_cmd(req.cmd)
req.cmd = _venv_safe_local_pip_install_cmd(
req.cmd,
local=not bool(req.remote_host),
@@ -1243,8 +1336,16 @@ def setup_cookbook_routes() -> APIRouter:
req.cmd = _pip_install_no_cache(req.cmd)
# Accept common aliases and enforce server extras for llama-cpp so
# `python -m llama_cpp.server` has all runtime dependencies.
req.cmd = re.sub(r"(?<![A-Za-z0-9_.-])llama_cpp(?![A-Za-z0-9_.-])", "llama-cpp-python[server]", req.cmd)
req.cmd = re.sub(r"(?<![A-Za-z0-9_.-])llama-cpp-python(?!\[)", "llama-cpp-python[server]", req.cmd)
# CRITICAL: the lookbehind / lookahead must also exclude `/` so
# the regex DOESN'T mangle a URL path like
# https://abetlen.github.io/llama-cpp-python/whl/cu124
# The previous regex turned that URL into
# https://abetlen.github.io/llama-cpp-python[server]/whl/cu124
# which pip then couldn't resolve → silent fallback to source
# build of the .tar.gz → CPU-only binary (because CMAKE_ARGS
# isn't set), defeating the entire purpose of the CUDA index.
req.cmd = re.sub(r"(?<![A-Za-z0-9_.\-/])llama_cpp(?![A-Za-z0-9_.\-/])", "llama-cpp-python[server]", req.cmd)
req.cmd = re.sub(r"(?<![A-Za-z0-9_.\-/])llama-cpp-python(?![\[/])", "llama-cpp-python[server]", req.cmd)
if "llama-cpp-python" in req.cmd and "--extra-index-url" not in req.cmd:
req.cmd += " --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu"
# PEP-508-style package spec — letters, digits, `.-_` for the
@@ -1431,6 +1532,69 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(' else')
_append_llama_cpp_linux_accel_build_lines(runner_lines)
runner_lines.append(' fi')
# Source the env file the prebuilt-download path writes so
# LD_LIBRARY_PATH includes the directory holding libllama.so
# and friends. No-op when prebuilt wasn't used.
runner_lines.append(' [ -r ~/.config/odysseus-llama-cpp-env ] && . ~/.config/odysseus-llama-cpp-env')
# Auto-upgrade pip llama-cpp-python to the CUDA-enabled
# wheel when (a) NVIDIA hardware is present and (b) the
# currently-installed wheel is CPU-only. Without this the
# user gets the Python server happily running at 3 tok/s
# because pip's default index ships CPU-only wheels.
# Forward-compat: cu124 wheels work on driver/runtime
# 12.4+ including the cu13.x line.
runner_lines.append(' if command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi -L 2>/dev/null | grep -q "GPU " && python3 -c "import llama_cpp" 2>/dev/null; then')
runner_lines.append(' if ! python3 -c "import llama_cpp; import sys; sys.exit(0 if llama_cpp.llama_supports_gpu_offload() else 1)" 2>/dev/null; then')
runner_lines.append(' echo "[odysseus] NVIDIA detected but installed llama-cpp-python is CPU-only — reinstalling with CUDA wheel index for GPU offload..."')
runner_lines.append(' python3 -m pip install --user --break-system-packages --force-reinstall --no-cache-dir "llama-cpp-python[server]" --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124 2>&1 | tail -8 || echo "[odysseus] WARNING: CUDA wheel reinstall failed — Python server will stay CPU-only (slow). Manual fix: pip install --user --force-reinstall \'llama-cpp-python[server]\' --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124"')
runner_lines.append(' if python3 -c "import llama_cpp; import sys; sys.exit(0 if llama_cpp.llama_supports_gpu_offload() else 1)" 2>/dev/null; then')
runner_lines.append(' echo "[odysseus] llama-cpp-python now supports GPU offload."')
runner_lines.append(' fi')
runner_lines.append(' fi')
runner_lines.append(' fi')
# SHORT-CIRCUIT before the build/pip fallback: if the
# native binary is missing but llama_cpp Python is already
# installed, drop a wrapper at ~/bin/llama-server that
# translates llama-server CLI args to llama_cpp.server's
# underscore-style flags. The user's serve command stays
# `llama-server ...` and "just works" — no build, no cmake,
# no second install. This is the path that unblocks every
# remote where pip-installed llama-cpp-python is already
# working but Cookbook used to insist on a native binary.
runner_lines.append(' if ! command -v llama-server >/dev/null 2>&1 && python3 -c "import llama_cpp" 2>/dev/null; then')
runner_lines.append(' mkdir -p ~/bin')
runner_lines.append(' cat > ~/bin/llama-server <<\'_ODY_LLAMA_SHIM_EOF\'')
runner_lines.append('#!/usr/bin/env bash')
runner_lines.append('# Auto-generated by Odysseus Cookbook: a `llama-server` lookalike')
runner_lines.append('# that translates the native CLI to `python -m llama_cpp.server`.')
runner_lines.append('# Lets cookbook-generated launch commands run unchanged on hosts')
runner_lines.append('# where only the pip llama-cpp-python package is installed.')
runner_lines.append('ARGS=()')
runner_lines.append('while [ $# -gt 0 ]; do')
runner_lines.append(' case "$1" in')
runner_lines.append(' -ngl|--gpu-layers|--n-gpu-layers) ARGS+=(--n_gpu_layers "$2"); shift 2 ;;')
runner_lines.append(' -c|--ctx-size) ARGS+=(--n_ctx "$2"); shift 2 ;;')
runner_lines.append(' -b|--batch-size) ARGS+=(--n_batch "$2"); shift 2 ;;')
runner_lines.append(' -ub|--ubatch-size) shift 2 ;; # llama-cpp-python has no separate ubatch')
runner_lines.append(' --flash-attn) ARGS+=(--flash_attn true); shift 2 ;;')
runner_lines.append(' --cache-type-k) ARGS+=(--type_k "$2"); shift 2 ;;')
runner_lines.append(' --cache-type-v) ARGS+=(--type_v "$2"); shift 2 ;;')
runner_lines.append(' --n-cpu-moe) ARGS+=(--n_cpu_moe "$2"); shift 2 ;;')
runner_lines.append(' --mmproj) ARGS+=(--clip_model_path "$2"); shift 2 ;;')
runner_lines.append(' --image-max-tokens) shift 2 ;; # native-only')
runner_lines.append(' --no-mmap) ARGS+=(--no_mmap true); shift ;;')
runner_lines.append(' --no-warmup) shift ;; # native-only')
runner_lines.append(' --chat-template) ARGS+=(--chat_format "$2"); shift 2 ;;')
runner_lines.append(' --fit|--split-mode|--tensor-split|--main-gpu|--parallel) shift 2 ;; # native-only')
runner_lines.append(' --mlock) ARGS+=(--use_mlock true); shift ;;')
runner_lines.append(' *) ARGS+=("$1"); shift ;;')
runner_lines.append(' esac')
runner_lines.append('done')
runner_lines.append('exec python3 -m llama_cpp.server "${ARGS[@]}"')
runner_lines.append('_ODY_LLAMA_SHIM_EOF')
runner_lines.append(' chmod +x ~/bin/llama-server')
runner_lines.append(' echo "[odysseus] Created llama-server shim → python -m llama_cpp.server (no native binary needed)"')
runner_lines.append(' fi')
runner_lines.append(' # If the native build failed, fall back to the Python bindings.')
runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then')
runner_lines.append(' echo "llama-server build failed — installing Python bindings as fallback..."')
@@ -1494,6 +1658,96 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(' echo "ERROR: vLLM is not installed."')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append('fi')
runner_lines.append(f"ODYSSEUS_SERVE_CMD='{_bash_squote(req.cmd)}'")
runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ]; then')
runner_lines.append(' ODYSSEUS_VLLM_HELP_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
runner_lines.append('import shlex, sys')
runner_lines.append('parts = shlex.split(sys.argv[1])')
runner_lines.append('try:')
runner_lines.append(' serve_i = parts.index("serve")')
runner_lines.append('except ValueError:')
runner_lines.append(' print("vllm serve --help")')
runner_lines.append('else:')
runner_lines.append(' print(shlex.join(parts[:serve_i + 1] + ["--help"]))')
runner_lines.append('PY')
runner_lines.append(')"')
runner_lines.append(' ODYSSEUS_VLLM_SUPPORTS_SWAP=0')
runner_lines.append(' if eval "$ODYSSEUS_VLLM_HELP_CMD" 2>&1 | grep -q -- "--swap-space"; then ODYSSEUS_VLLM_SUPPORTS_SWAP=1; fi')
runner_lines.append('fi')
runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ] && [ "${ODYSSEUS_VLLM_SUPPORTS_SWAP:-0}" = "1" ] && ! printf "%s" "$ODYSSEUS_SERVE_CMD" | grep -q -- "--swap-space"; then')
runner_lines.append(' echo "[odysseus] Setting vLLM --swap-space 0 so the runtime does not reserve CPU swap per GPU."')
runner_lines.append(' ODYSSEUS_SERVE_CMD="${ODYSSEUS_SERVE_CMD} --swap-space 0"')
runner_lines.append('fi')
runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ] && [ "${ODYSSEUS_VLLM_SUPPORTS_SWAP:-0}" != "1" ]; then')
runner_lines.append(' if printf "%s" "$ODYSSEUS_SERVE_CMD" | grep -q -- "--swap-space"; then')
runner_lines.append(' echo "[odysseus] vLLM serve does not expose --swap-space; removing the flag and patching the runtime default to 0."')
runner_lines.append(' ODYSSEUS_SERVE_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
runner_lines.append('import shlex, sys')
runner_lines.append('parts = shlex.split(sys.argv[1])')
runner_lines.append('out = []')
runner_lines.append('skip = False')
runner_lines.append('for part in parts:')
runner_lines.append(' if skip:')
runner_lines.append(' skip = False')
runner_lines.append(' continue')
runner_lines.append(' if part == "--swap-space":')
runner_lines.append(' skip = True')
runner_lines.append(' continue')
runner_lines.append(' if part.startswith("--swap-space="):')
runner_lines.append(' continue')
runner_lines.append(' out.append(part)')
runner_lines.append('print(shlex.join(out))')
runner_lines.append('PY')
runner_lines.append(')"')
runner_lines.append(' fi')
runner_lines.append(' ODYSSEUS_SERVE_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
runner_lines.append('import shlex, sys')
runner_lines.append('parts = shlex.split(sys.argv[1])')
runner_lines.append('patch = r"""import inspect, sys')
runner_lines.append('from vllm.engine.arg_utils import EngineArgs, AsyncEngineArgs')
runner_lines.append('def _odysseus_swap0(cls):')
runner_lines.append(' params = list(inspect.signature(cls).parameters)')
runner_lines.append(' if "swap_space" not in params:')
runner_lines.append(' return')
runner_lines.append(' idx = params.index("swap_space")')
runner_lines.append(' defaults = list(cls.__init__.__defaults__ or ())')
runner_lines.append(' if idx < len(defaults):')
runner_lines.append(' defaults[idx] = 0')
runner_lines.append(' cls.__init__.__defaults__ = tuple(defaults)')
runner_lines.append(' fields = getattr(cls, "__dataclass_fields__", {})')
runner_lines.append(' if "swap_space" in fields:')
runner_lines.append(' fields["swap_space"].default = 0')
runner_lines.append('_odysseus_swap0(EngineArgs)')
runner_lines.append('_odysseus_swap0(AsyncEngineArgs)')
runner_lines.append('try:')
runner_lines.append(' from vllm.config import CacheConfig')
runner_lines.append(' CacheConfig.swap_space = 0')
runner_lines.append('except Exception:')
runner_lines.append(' pass')
runner_lines.append('_orig_create_engine_config = EngineArgs.create_engine_config')
runner_lines.append('def _odysseus_create_engine_config(self, *args, **kwargs):')
runner_lines.append(' self.swap_space = 0')
runner_lines.append(' return _orig_create_engine_config(self, *args, **kwargs)')
runner_lines.append('EngineArgs.create_engine_config = _odysseus_create_engine_config')
runner_lines.append('AsyncEngineArgs.create_engine_config = _odysseus_create_engine_config')
runner_lines.append('from vllm.entrypoints.cli.main import main')
runner_lines.append('sys.exit(main())"""')
runner_lines.append('try:')
runner_lines.append(' serve_i = parts.index("serve")')
runner_lines.append('except ValueError:')
runner_lines.append(' print(shlex.join(parts))')
runner_lines.append('else:')
runner_lines.append(' exe_i = serve_i - 1')
runner_lines.append(' exe = parts[exe_i] if exe_i >= 0 else "vllm"')
runner_lines.append(' py = "python3"')
runner_lines.append(' if exe.endswith("/bin/vllm"):')
runner_lines.append(' py = exe[:-len("/bin/vllm")] + "/bin/python"')
runner_lines.append(' parts[exe_i:serve_i] = [py, "-c", patch]')
runner_lines.append(' print(shlex.join(parts))')
runner_lines.append('PY')
runner_lines.append(')"')
runner_lines.append(' echo "[odysseus] Patched vLLM internal swap_space default to 0 for this runtime."')
runner_lines.append('fi')
elif "sglang.launch_server" in req.cmd:
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
runner_lines.append('if ! command -v sglang &>/dev/null; then')
@@ -1535,7 +1789,10 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines,
keep_shell_open=not local_windows,
)
runner_lines.append(req.cmd)
if "vllm serve" in req.cmd:
runner_lines.append('eval "$ODYSSEUS_SERVE_CMD"')
else:
runner_lines.append(req.cmd)
if local_windows:
# Detached background process — no interactive shell to keep open.
# Print the exit marker the status poller looks for, then stop.
@@ -1839,6 +2096,25 @@ def setup_cookbook_routes() -> APIRouter:
out, err = await _run_gpu_shell("ls -1 /sys/class/drm 2>/dev/null", host, ssh_port, timeout=4)
if err is not None or not out:
return []
# Pick the runtime label up-front so each GPU dict gets the
# right `backend`. AMD silicon can be driven by ROCm/HIP (native)
# OR Vulkan (mesa RADV). Reporting "rocm" on a host where no
# ROCm toolchain is installed misleads the frontend env-var
# prefix logic — it would emit `HIP_VISIBLE_DEVICES=` for a
# Vulkan-only stack, which is a silent no-op at best.
rt_out, _ = await _run_gpu_shell(
'command -v rocminfo >/dev/null 2>&1 && echo rocm '
'|| (command -v hipconfig >/dev/null 2>&1 && echo rocm) '
'|| (command -v vulkaninfo >/dev/null 2>&1 && echo vulkan) '
'|| echo unknown',
host, ssh_port, timeout=4,
)
_amd_runtime = (rt_out or "").strip().splitlines()[-1:][0].strip() if rt_out else "rocm"
if _amd_runtime not in ("rocm", "vulkan"):
# Default to rocm so existing ROCm-installed hosts keep
# working; "unknown" only happens when neither toolchain is
# detected (e.g. minimal sysfs read on a fresh box).
_amd_runtime = "rocm"
gpus = []
for entry in out.split():
if not entry.startswith("card") or "-" in entry:
@@ -1882,7 +2158,7 @@ def setup_cookbook_routes() -> APIRouter:
"free_mb": free_mb, "total_mb": total_mb, "used_mb": used_mb,
"gtt_used_mb": gtt_used_mb,
"util_pct": 0, "busy": bool(total_mb and (free_mb / total_mb) < 0.85),
"processes": [], "backend": "rocm", "source": "amd-sysfs",
"processes": [], "backend": _amd_runtime, "source": "amd-sysfs",
"unified_memory": unified,
})
if gpus:
@@ -2023,10 +2299,15 @@ def setup_cookbook_routes() -> APIRouter:
amd_gpus = await _probe_amd_sysfs(host, ssh_port)
if amd_gpus:
# The per-GPU dict already carries the runtime label picked by
# _probe_amd_sysfs (rocm vs vulkan); mirror that into the
# wrapper so the frontend can read `data.backend` directly
# without scanning the list.
_amd_wrap_backend = str(amd_gpus[0].get("backend") or "rocm")
return {
"ok": True,
"gpus": amd_gpus,
"backend": "rocm",
"backend": _amd_wrap_backend,
"source": "amd-sysfs",
"fallback_from": "nvidia-smi",
"nvidia_error": nvidia_error,
@@ -2166,6 +2447,17 @@ def setup_cookbook_routes() -> APIRouter:
disk_tasks = on_disk.get("tasks") or [] if isinstance(on_disk, dict) else []
incoming_tasks = data.get("tasks") if isinstance(data.get("tasks"), list) else []
incoming_removed = data.get("removedTasks") if isinstance(data.get("removedTasks"), dict) else {}
disk_removed = on_disk.get("removedTasks") if isinstance(on_disk, dict) and isinstance(on_disk.get("removedTasks"), dict) else {}
removed_tasks = {**disk_removed, **incoming_removed}
data["removedTasks"] = removed_tasks
removed_ids = set(removed_tasks.keys())
if removed_ids:
incoming_tasks = [
t for t in incoming_tasks
if not (isinstance(t, dict) and t.get("sessionId") in removed_ids)
]
data["tasks"] = incoming_tasks
# Anti-poisoning guard: a stale browser tab can keep POSTing a
# download task as status='done' from before the strict-finish
# fix landed, undoing any server-side correction. For each
@@ -2203,6 +2495,8 @@ def setup_cookbook_routes() -> APIRouter:
sid = t.get("sessionId")
if not sid or sid in incoming_ids:
continue # client's version wins
if sid in removed_ids:
continue # intentional cross-device clear/remove
ts = t.get("ts") or 0
if isinstance(ts, (int, float)) and (now_ms - ts) <= RACE_WINDOW_MS:
preserved.append(t)
@@ -2309,16 +2603,14 @@ def setup_cookbook_routes() -> APIRouter:
# Add 30% headroom for KV cache, activations, etc.
needed_vram = (est_vram * 1.3) if est_vram else None
if vram_gb > 0 and needed_vram is not None and needed_vram > vram_gb:
continue
# Unknown-size models (e.g. MiniMax-M2.7, DeepSeek-V4-Flash) have no
# "NB" in the repo id, so the regex above can't extract their
# param count. Previously we dropped them entirely, which made
# brand-new flagship releases silently vanish from this list even
# on rigs with hundreds of GB of VRAM. Adapters/LoRAs are already
# filtered by _is_excluded(), so what falls through here is
# overwhelmingly full models — keep them, just without a size
# badge (the frontend handles needed_vram_gb=null gracefully).
if vram_gb > 0:
if needed_vram is None:
# The "trending models that fit" list must be conservative:
# if we cannot estimate size from the repo id/tags, do not
# present it as runnable on this hardware.
continue
if needed_vram > vram_gb:
continue
out.append({
"repo_id": repo_id,
@@ -2515,6 +2807,33 @@ def setup_cookbook_routes() -> APIRouter:
except Exception as e:
logger.warning(f"orphan sweep: state write failed: {e}")
@router.get("/api/cookbook/hf-gguf-files")
async def hf_gguf_files(repo_id: str, owner: str = Depends(require_user)):
"""List GGUF files in a HuggingFace repo for the direct-download picker."""
import httpx
repo_id = _validate_repo_id(repo_id)
url = f"https://huggingface.co/api/models/{repo_id}"
try:
headers = {}
token = _load_stored_hf_token()
if token:
headers["Authorization"] = f"Bearer {token}"
async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client:
resp = await client.get(url, headers=headers)
if resp.status_code != 200:
return {"ok": False, "files": [], "error": f"HF API HTTP {resp.status_code}"}
data = resp.json()
except Exception:
logger.exception("HF GGUF file scan failed for %s", repo)
return {"ok": False, "files": [], "error": "HF API request failed"}
files = [
str(s.get("rfilename") or "")
for s in data.get("siblings", [])
if str(s.get("rfilename") or "").lower().endswith(".gguf")
]
return {"ok": True, "repo_id": repo_id, "files": files}
# In-memory cache for the Ollama library scrape. ollama.com is a public
# site, but it doesn't expose a stable JSON listing — we fetch the HTML
# search page and regex out the model cards. Cached for 1 h so a busy
+45 -8
View File
@@ -1233,22 +1233,30 @@ def _list_attachments_from_msg(msg):
return attachments
idx = 0
for part in msg.walk():
if part.is_multipart():
continue
cd = str(part.get("Content-Disposition", ""))
ct = part.get_content_type()
is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
if part.is_multipart() and not is_attached_email:
continue
# Skip text/html body parts (only consider real attachments)
if ct in ("text/plain", "text/html") and "attachment" not in cd:
continue
filename = part.get_filename()
if filename:
filename = _decode_header(filename)
if ct == "message/rfc822" and not re.search(r"\.[A-Za-z0-9]{1,8}$", filename):
filename = f"{filename}.eml"
else:
# Inline images, etc. - generate a name
ext = ct.split("/")[-1] if "/" in ct else "bin"
ext = "eml" if ct == "message/rfc822" else (ct.split("/")[-1] if "/" in ct else "bin")
filename = f"attachment_{idx}.{ext}"
payload = part.get_payload(decode=True)
size = len(payload) if payload else 0
if payload is None and ct == "message/rfc822":
try:
payload = part.as_bytes()
except Exception:
payload = b""
size = len(payload) if payload is not None else 0
attachments.append({
"index": idx,
"filename": filename,
@@ -1260,29 +1268,58 @@ def _list_attachments_from_msg(msg):
return attachments
def _is_likely_signature_image_attachment(att: dict) -> bool:
"""Match the reader's inline signature/logo image filter."""
filename = str((att or {}).get("filename") or "").lower()
if not re.search(r"\.(png|jpe?g|gif|bmp|svg|webp)$", filename):
return False
size = int((att or {}).get("size") or 0)
if re.search(r"^image\d{3,}\.(png|jpe?g|gif)$", filename):
return True
if re.search(r"^(signature|logo|sig|footer|banner)[-_\d]*\.(png|jpe?g|gif|svg)$", filename):
return True
return 0 < size < 30 * 1024
def _has_visible_attachments(msg) -> bool:
"""Return True only for attachments the reader will render as chips."""
return any(
not _is_likely_signature_image_attachment(att)
for att in _list_attachments_from_msg(msg)
)
def _extract_attachment_to_disk(msg, index, target_dir):
"""Extract a specific attachment to disk and return the file path."""
if not msg.is_multipart():
return None
idx = 0
for part in msg.walk():
if part.is_multipart():
continue
cd = str(part.get("Content-Disposition", ""))
ct = part.get_content_type()
is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
if part.is_multipart() and not is_attached_email:
continue
if ct in ("text/plain", "text/html") and "attachment" not in cd:
continue
if idx == index:
filename = part.get_filename()
if filename:
filename = _decode_header(filename)
if ct == "message/rfc822" and not re.search(r"\.[A-Za-z0-9]{1,8}$", filename):
filename = f"{filename}.eml"
else:
ext = ct.split("/")[-1] if "/" in ct else "bin"
ext = "eml" if ct == "message/rfc822" else (ct.split("/")[-1] if "/" in ct else "bin")
filename = f"attachment_{idx}.{ext}"
# Sanitize
safe_name = re.sub(r"[^\w\s\-.]", "_", filename).strip()
payload = part.get_payload(decode=True)
if not payload:
if payload is None and ct == "message/rfc822":
try:
payload = part.as_bytes()
except Exception:
payload = b""
if payload is None:
return None
target_dir.mkdir(parents=True, exist_ok=True)
filepath = target_dir / safe_name
+216 -39
View File
@@ -47,7 +47,7 @@ from routes.email_helpers import (
_IMAP_TIMEOUT_SECONDS, _open_imap_connection,
make_oauth_state, verify_oauth_state,
_imap_connect, _imap, _decode_header, _detect_sent_folder, _detect_drafts_folder,
_extract_attachment_text, _list_attachments_from_msg,
_extract_attachment_text, _list_attachments_from_msg, _has_visible_attachments, _is_likely_signature_image_attachment,
_extract_attachment_to_disk, _extract_html, _extract_text,
_fetch_sender_thread_context, _pre_retrieve_context,
_EMAIL_REPLY_SYS_PROMPT_BASE, _POOL_HOOKS,
@@ -61,6 +61,7 @@ from routes.email_pollers import _start_poller
logger = logging.getLogger(__name__)
ODYSSEUS_MAIL_ORIGIN = "odysseus-ui"
EMAIL_READ_ATTACHMENT_VERSION = 2
def _email_tag_owner_aliases(account_id: str | None, owner: str = "") -> list[str]:
@@ -248,6 +249,21 @@ def _imap_uid_fetch(conn, uid_set: str | bytes, query: str):
return conn.uid("FETCH", _uid_bytes(uid_set), query)
def _imap_search_quote(value: str) -> str:
return '"' + str(value or "").replace("\\", "\\\\").replace('"', '\\"') + '"'
def _message_id_chain(*values: str) -> list[str]:
seen = set()
out = []
for value in values:
for mid in re.findall(r"<[^>]+>", value or ""):
if mid not in seen:
seen.add(mid)
out.append(mid)
return out
def _uid_from_fetch_meta(meta_b: bytes) -> str:
m = re.search(rb"\bUID\s+(\d+)\b", meta_b)
return m.group(1).decode() if m else ""
@@ -366,6 +382,21 @@ def _apply_odysseus_headers(msg, kind: str | None = None, ref_id: str | None = N
msg["X-Odysseus-Ref"] = re.sub(r"[^A-Za-z0-9_.:-]", "-", ref_id)[:128]
def _normalize_addr_field(field: str) -> str:
"""Strip the malformed-but-common trailing/leading commas and stray
whitespace from a To/Cc/Bcc string before it lands in the MIME header
or the SMTP envelope. Users often paste a single address with a
trailing comma (e.g. `felix@pewdiepie.com,`) and most MTAs reject the
resulting `To: felix@pewdiepie.com,` line as a syntax error. Collapse
any run of separator junk between addresses too."""
if not field:
return field
# Split on commas, drop empty tokens, rejoin with a single ', '.
parts = [p.strip() for p in field.split(",")]
parts = [p for p in parts if p]
return ", ".join(parts)
def _envelope_recipients(*fields: str) -> list:
"""Extract bare SMTP envelope addresses from one or more To/Cc/Bcc header
strings. A naive `field.split(",")` corrupts display names that contain a
@@ -994,6 +1025,65 @@ def setup_email_routes():
except Exception:
pass
def _related_thread_attachments_sync(
folder: str,
account_id: str | None,
owner: str,
current_uid: str,
current_message_id: str,
in_reply_to: str,
references: str,
limit: int = 12,
) -> list[dict]:
"""Return visible attachments from referenced messages in this folder."""
wanted_ids = _message_id_chain(references, in_reply_to)
current_mid = (current_message_id or "").strip()
wanted_ids = [mid for mid in wanted_ids if mid and mid != current_mid]
if not wanted_ids:
return []
related: list[dict] = []
try:
with _imap(account_id, owner=owner) as conn:
conn.select(_q(folder), readonly=True)
# Search newest referenced messages first; cap work so opening
# a long thread stays bounded.
for mid in reversed(wanted_ids[-10:]):
if len(related) >= limit:
break
status, data = _imap_uid_search(conn, f'(HEADER Message-ID {_imap_search_quote(mid)})')
if status != "OK" or not data or not data[0]:
continue
for uid_b in reversed(data[0].split()[-3:]):
source_uid = uid_b.decode(errors="ignore")
if not source_uid or source_uid == str(current_uid):
continue
st2, msg_data = _imap_uid_fetch(conn, source_uid, "(BODY.PEEK[])")
if st2 != "OK" or not msg_data or not isinstance(msg_data[0], tuple):
continue
msg = email_mod.message_from_bytes(msg_data[0][1])
source_from = _decode_header(msg.get("From", ""))
source_subject = _decode_header(msg.get("Subject", ""))
source_date = msg.get("Date", "")
for att in _list_attachments_from_msg(msg):
if _is_likely_signature_image_attachment(att):
continue
enriched = dict(att)
enriched.update({
"source_uid": source_uid,
"source_folder": folder,
"source_message_id": (msg.get("Message-ID") or "").strip(),
"source_from": source_from,
"source_subject": source_subject,
"source_date": source_date,
})
related.append(enriched)
if len(related) >= limit:
break
except Exception as e:
logger.debug(f"related thread attachment lookup failed uid={current_uid}: {e}")
return related
@router.get("/list")
async def list_emails(
folder: str = Query("INBOX"),
@@ -1264,6 +1354,17 @@ def setup_email_routes():
sender_name, sender_addr = email.utils.parseaddr(sender)
parsed_date = email.utils.parsedate_to_datetime(date_str) if date_str else None
attachments = _list_attachments_from_msg(msg)
related_attachments = []
if not _has_visible_attachments(msg):
related_attachments = _related_thread_attachments_sync(
folder,
account_id,
owner,
uid,
message_id,
in_reply_to,
references,
)
if mark_seen:
# Set \Seen in a separate readwrite session so concurrent reads
@@ -1372,6 +1473,8 @@ def setup_email_routes():
"body": body,
"body_html": body_html,
"attachments": attachments,
"related_attachments": related_attachments,
"attachment_version": EMAIL_READ_ATTACHMENT_VERSION,
"cached_summary": cached_summary,
"cached_ai_reply": cached_ai_reply,
"boundaries": cached_boundaries,
@@ -1402,6 +1505,12 @@ def setup_email_routes():
"""Read email body. Cached for 30m, sync IMAP work runs in a thread."""
ck = _read_cache_key(account_id, folder, uid, owner=owner)
cached = _read_cache_get(ck)
if cached is not None:
# Older cached read responses lack the thread-attachment fallback.
# Fetch once so replies that reference prior attachments can show
# those files without waiting for cache expiry.
if cached.get("attachment_version") != EMAIL_READ_ATTACHMENT_VERSION:
cached = None
if cached is not None:
if mark_seen:
try:
@@ -1536,6 +1645,12 @@ def setup_email_routes():
return {"error": f"Attachment index {index} not found"}
from pathlib import Path as _Path
target_root = os.path.abspath(str(target_dir))
filepath_str = os.path.abspath(str(filepath))
if os.path.commonpath([target_root, filepath_str]) != target_root:
logger.warning("Rejected attachment path outside extraction dir: %s", filepath)
return {"error": "Invalid attachment path"}
filepath = _Path(filepath_str)
base = _Path(filepath).name
if base.startswith("."):
return {"error": "Invalid filename", "filename": base}
@@ -1590,6 +1705,65 @@ def setup_email_routes():
return None
doc_session_id = _resolve_doc_session()
def _create_markdown_doc(content: str, summary: str):
from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
doc_id = str(uuid.uuid4())
ver_id = str(uuid.uuid4())
_db = _SL()
try:
_db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
_db.add(_Doc(
id=doc_id, session_id=doc_session_id, title=title,
language="markdown", current_content=content,
version_count=1, is_active=True,
))
_db.add(_DV(
id=ver_id, document_id=doc_id, version_number=1,
content=content, summary=summary, source="upload",
))
_db.commit()
finally:
_db.close()
_tag_doc_with_source(doc_id)
return doc_id
def _attached_email_markdown(raw_bytes: bytes):
if not raw_bytes:
return f"# Attached email: {base}\n\n_(empty email attachment)_"
try:
attached_msg = email_mod.message_from_bytes(raw_bytes)
except Exception:
logger.exception("Failed to parse attached email %s", base)
return f"# Attached email: {base}\n\nCould not parse this email attachment."
attached_subject = _decode_header(attached_msg.get("Subject", "")) or base
attached_from = _decode_header(attached_msg.get("From", ""))
attached_to = _decode_header(attached_msg.get("To", ""))
attached_cc = _decode_header(attached_msg.get("Cc", ""))
attached_date = attached_msg.get("Date", "")
attached_body = _extract_text(attached_msg).strip()
attached_atts = _list_attachments_from_msg(attached_msg)
lines = [f"# Attached email: {attached_subject}", ""]
if attached_from:
lines.append(f"**From:** {attached_from}")
if attached_to:
lines.append(f"**To:** {attached_to}")
if attached_cc:
lines.append(f"**Cc:** {attached_cc}")
if attached_date:
lines.append(f"**Date:** {attached_date}")
lines.extend(["", "## Body", "", attached_body or "_(no readable body)_"])
if attached_atts:
lines.extend(["", "## Attachments", ""])
for att in attached_atts:
size = int(att.get("size") or 0)
size_label = f"{size} B" if size < 1024 else f"{round(size / 1024)} KB"
name = att.get("filename") or f"attachment_{att.get('index', '')}"
ctype = att.get("content_type") or "application/octet-stream"
lines.append(f"- {name} ({ctype}, {size_label})")
return "\n".join(lines).strip()
# ── PDF path (existing) ────────────────────────────────────
if ext == ".pdf":
import shutil as _shutil
@@ -1636,6 +1810,39 @@ def setup_email_routes():
_tag_doc_with_source(doc_id)
return {"doc_id": doc_id, "filename": filepath.name}
# ── Attached email (.eml / message/rfc822) ────────────────
if ext == ".eml":
def _attachment_bytes_from_msg():
if not msg.is_multipart():
return b""
idx = 0
for part in msg.walk():
cd = str(part.get("Content-Disposition", ""))
ct = part.get_content_type()
is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
if part.is_multipart() and not is_attached_email:
continue
if ct in ("text/plain", "text/html") and "attachment" not in cd:
continue
if idx == index:
payload = part.get_payload(decode=True)
if payload is None and ct == "message/rfc822":
try:
payload = part.as_bytes()
except Exception:
payload = b""
return payload or b""
idx += 1
return b""
try:
content = _attached_email_markdown(_attachment_bytes_from_msg())
except Exception:
logger.exception("Failed to read email attachment %s", base)
return {"error": "Failed to read email attachment", "filename": base}
doc_id = _create_markdown_doc(content, "Imported attached email")
return {"doc_id": doc_id, "filename": filepath.name}
# ── DOCX path: extract text → markdown document ───────────
if ext == ".docx":
try:
@@ -1673,25 +1880,7 @@ def setup_email_routes():
lines.append("")
content = "\n".join(lines).strip() or f"_(empty {base})_"
from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
doc_id = str(uuid.uuid4())
ver_id = str(uuid.uuid4())
_db = _SL()
try:
_db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
_db.add(_Doc(
id=doc_id, session_id=doc_session_id, title=title,
language="markdown", current_content=content,
version_count=1, is_active=True,
))
_db.add(_DV(
id=ver_id, document_id=doc_id, version_number=1,
content=content, summary="Imported from DOCX", source="upload",
))
_db.commit()
finally:
_db.close()
_tag_doc_with_source(doc_id)
doc_id = _create_markdown_doc(content, "Imported from DOCX")
return {"doc_id": doc_id, "filename": filepath.name}
# ── Plain text / markdown ────────────────────────────────
@@ -1700,25 +1889,7 @@ def setup_email_routes():
content = filepath.read_text(encoding="utf-8", errors="replace")
except Exception as e:
return {"error": f"Failed to read text file: {e}", "filename": base}
from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
doc_id = str(uuid.uuid4())
ver_id = str(uuid.uuid4())
_db = _SL()
try:
_db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
_db.add(_Doc(
id=doc_id, session_id=doc_session_id, title=title,
language="markdown", current_content=content,
version_count=1, is_active=True,
))
_db.add(_DV(
id=ver_id, document_id=doc_id, version_number=1,
content=content, summary="Imported from email attachment", source="upload",
))
_db.commit()
finally:
_db.close()
_tag_doc_with_source(doc_id)
doc_id = _create_markdown_doc(content, "Imported from email attachment")
return {"doc_id": doc_id, "filename": filepath.name}
return {"error": f"Unsupported attachment type: {ext}", "filename": base}
@@ -2027,6 +2198,9 @@ def setup_email_routes():
outer = MIMEMultipart("alternative")
body_container = outer
to = _normalize_addr_field(to or "")
cc = _normalize_addr_field(cc or "")
bcc = _normalize_addr_field(bcc or "")
outer["From"] = email.utils.formataddr((cfg.get("display_name") or "", cfg["from_address"]))
outer["To"] = to
if cc:
@@ -2302,6 +2476,9 @@ def setup_email_routes():
outer = MIMEMultipart("alternative")
body_container = outer
req.to = _normalize_addr_field(req.to or "")
req.cc = _normalize_addr_field(req.cc or "")
req.bcc = _normalize_addr_field(req.bcc or "")
outer["From"] = email.utils.formataddr((cfg.get("display_name") or "", cfg["from_address"]))
outer["To"] = req.to
if req.cc:
+111 -4
View File
@@ -1,8 +1,13 @@
import json
import os
import re
import shlex
import subprocess
from copy import deepcopy
from fastapi import APIRouter, HTTPException
from core.platform_compat import run_ssh_command
from routes._validators import validate_remote_host, validate_ssh_port
@@ -107,6 +112,73 @@ def _apply_manual_hardware(system, manual_mode="", manual_gpu_count="", manual_v
return system
def _run_model_probe(host: str, ssh_port: str, cmd: str) -> str:
try:
if host:
r = run_ssh_command(
host,
ssh_port or None,
cmd,
timeout=15,
connect_timeout=5,
strict_host_key_checking=False,
text=True,
)
else:
r = subprocess.run(["bash", "-lc", cmd], capture_output=True, text=True, timeout=15)
if r.returncode == 0:
return (r.stdout or "").strip()
except Exception:
return ""
return ""
def _inspect_model_path(model_path: str, host: str = "", ssh_port: str = "") -> dict:
"""Read lightweight metadata from a local or SSH-visible HF model folder."""
path = (model_path or "").strip()
if not path or path.startswith(("http://", "https://")):
return {}
if not (path.startswith("/") or path.startswith("~")):
return {}
qpath = shlex.quote(path)
qconfig = shlex.quote(os.path.join(path, "config.json"))
out = {}
exists = _run_model_probe(host, ssh_port, f"test -d {qpath} && printf found || printf missing")
if exists != "found":
target = host or "local container"
out["model_probe_error"] = f"Model path is not visible on {target}: {path}"
return out
raw_config = _run_model_probe(host, ssh_port, f"test -f {qconfig} && sed -n '1,240p' {qconfig}")
if raw_config:
try:
cfg = json.loads(raw_config)
except Exception:
cfg = {}
for key in ("context_length", "max_position_embeddings", "n_ctx_train", "model_max_length", "max_seq_len"):
value = cfg.get(key)
if isinstance(value, (int, float)) and value > 0:
out["model_ctx_max"] = int(value)
break
else:
out["model_probe_error"] = f"config.json not found in model path: {path}"
size_cmd = (
f"find {qpath} -type f \\( -name '*.safetensors' -o -name '*.bin' -o -name '*.gguf' \\) "
"-printf '%s\\n' 2>/dev/null | awk '{s+=$1} END {if (s>0) printf \"%.6f\", s/1073741824}'"
)
weights = _run_model_probe(host, ssh_port, size_cmd)
try:
weights_gb = float(weights)
except Exception:
weights_gb = 0.0
if weights_gb > 0:
out["model_weights_gb"] = round(weights_gb, 3)
elif "model_probe_error" not in out:
out["model_probe_error"] = f"No model weight files found in: {path}"
return out
def setup_hwfit_routes():
router = APIRouter(prefix="/api/hwfit", tags=["hwfit"])
@@ -235,7 +307,7 @@ def setup_hwfit_routes():
return {"system": system, "models": results}
@router.get("/profiles")
def get_serve_profiles(model: str = "", host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, serve_weights_gb: float = 0.0, serve_quant: str = ""):
def get_serve_profiles(model: str = "", model_path: str = "", host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, serve_weights_gb: float = 0.0, serve_quant: str = ""):
"""Compute llama.cpp serve profiles (Quality/Balanced/Speed) for `model`
against the detected hardware on `host` (or local). Returns concrete
flags (n_gpu_layers, n_cpu_moe, cache_type, ctx) the serve UI can apply.
@@ -260,8 +332,23 @@ def setup_hwfit_routes():
# "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct".
s = (s or "").lower().strip()
s = s.split("/")[-1] # drop org prefix
s = re.sub(r"[-_.]?gguf$", "", s) # drop trailing gguf marker
s = re.sub(r"[-_.](q\d[^/]*|iq\d[^/]*|fp8|bf16|f16|awq[^/]*|gptq[^/]*)$", "", s)
for suffix in ("-gguf", "_gguf", ".gguf", "gguf"):
if s.endswith(suffix):
s = s[: -len(suffix)]
break
cut_at = None
for idx, ch in enumerate(s):
if ch not in "-_." or idx + 1 >= len(s):
continue
suffix = s[idx + 1:]
if (
suffix in {"fp8", "bf16", "f16"}
or suffix.startswith(("awq", "gptq", "iq"))
or (suffix.startswith("q") and len(suffix) > 1 and suffix[1].isdigit())
):
cut_at = idx
if cut_at is not None:
s = s[:cut_at]
return s
m = catalog.get(model)
@@ -272,8 +359,16 @@ def setup_hwfit_routes():
if nn and (nn == want or want.endswith(nn) or nn.endswith(want)):
m = entry
break
path_meta = _inspect_model_path(model_path or model, host=host, ssh_port=ssh_port)
if m is None:
return {"system": system, "profiles": [], "error": "model not in catalog"}
return {
"system": system,
"profiles": [],
"error": "model not in catalog",
"model_ctx_max": int(path_meta.get("model_ctx_max") or 0),
"model_weights_gb": float(path_meta.get("model_weights_gb") or 0),
"model_probe_error": path_meta.get("model_probe_error") or "",
}
# Surface the model's trained context limit so the serve UI can clamp a
# user-typed context down to it (asking for ctx > n_ctx_train overflows
# and, with a quantized KV cache, can crash the GPU).
@@ -283,6 +378,16 @@ def setup_hwfit_routes():
if isinstance(v, (int, float)) and v > 0:
model_ctx_max = int(v)
break
path_ctx_max = int(path_meta.get("model_ctx_max") or 0)
if path_ctx_max > 0:
model_ctx_max = max(model_ctx_max, path_ctx_max)
model_weights_gb = float(path_meta.get("model_weights_gb") or 0)
if model_weights_gb <= 0:
for k in ("min_vram_gb", "required_gb", "size_gb", "recommended_ram_gb", "min_ram_gb"):
v = m.get(k)
if isinstance(v, (int, float)) and v > 0:
model_weights_gb = float(v)
break
return {
"system": system,
"profiles": compute_serve_profiles(
@@ -291,6 +396,8 @@ def setup_hwfit_routes():
serve_quant=(serve_quant or None),
),
"model_ctx_max": model_ctx_max,
"model_weights_gb": model_weights_gb,
"model_probe_error": path_meta.get("model_probe_error") or "",
}
@router.get("/image-models")
+95 -16
View File
@@ -406,8 +406,11 @@ def _endpoint_refresh_timeout(ep: Any, category: str) -> float:
except Exception:
val = 0
if val > 0:
return float(max(1, min(30, val)))
return 2.5 if category == "local" else 2.0
return float(max(1, min(60, val)))
# llama.cpp and other local OpenAI-compatible servers can block briefly
# while warming/loading. A 2s local timeout makes working endpoints flicker
# offline before /v1/models is ready.
return 10.0 if category == "local" else 2.0
def _manual_refresh_timeout(ep: Any, category: str, requested: Any = None) -> float:
@@ -474,7 +477,7 @@ def _explicit_model_list_timeout(base_url: str, endpoint_kind: str = "auto", req
category = _classify_endpoint(base_url, kind)
if kind in ("api", "proxy") or category == "api":
return 30.0
return 3.0 if _is_ollama_base(base_url) else 2.0
return 15.0 if category == "local" else (3.0 if _is_ollama_base(base_url) else 2.0)
def _cached_model_ids(ep: Any) -> List[str]:
@@ -579,6 +582,18 @@ def _safe_build_headers(api_key: Optional[str], base_url: str) -> dict:
return {"Authorization": f"Bearer {api_key}"} if api_key else {}
def _redact_url_for_log(url: str) -> str:
"""Return a URL safe for logs by removing userinfo and query/fragment."""
try:
parsed = urlparse(url or "")
host = parsed.hostname or ""
if parsed.port:
host = f"{host}:{parsed.port}"
return urlunparse((parsed.scheme, host, parsed.path, "", "", ""))
except Exception:
return "<endpoint>"
def _is_discovery_only_provider(provider: str) -> bool:
return provider == "chatgpt-subscription"
@@ -711,6 +726,16 @@ def _effective_endpoint_kind(ep: Any, base_url: str) -> str:
return "auto"
def _is_loading_model_response(resp: Any) -> bool:
if getattr(resp, "status_code", None) != 503:
return False
try:
body = resp.text or ""
except Exception:
body = ""
return "loading model" in body.lower()
def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> List[str]:
"""Probe a base URL's /models endpoint and return list of model IDs.
@@ -775,11 +800,14 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
models.append(_e)
return [m for m in models if _is_chat_model(m)]
except httpx.HTTPStatusError as e:
if e.response is not None and _is_loading_model_response(e.response):
logger.info("Endpoint still loading model at %s", _redact_url_for_log(url))
return []
if api_key:
status = e.response.status_code if e.response is not None else "unknown"
logger.warning(f"Failed to probe {url} with API key: HTTP {status}")
logger.warning("Failed to probe %s with API key: HTTP %s", _redact_url_for_log(url), status)
return []
logger.warning(f"Failed to probe {url}: {e}")
logger.warning("Failed to probe %s: %s", _redact_url_for_log(url), e)
except Exception as e:
if api_key:
logger.warning(f"Failed to probe {url} with API key: {e}")
@@ -824,6 +852,15 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
or "ollama" in (parsed_base.hostname or "").lower()
)
def _is_loading_model_response(r) -> bool:
if getattr(r, "status_code", None) != 503:
return False
try:
body = r.text or ""
except Exception:
body = ""
return "loading model" in body.lower()
def _result_from_response(r) -> Dict[str, Any]:
if 300 <= r.status_code < 400:
loc = r.headers.get("location", "")
@@ -840,6 +877,13 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
"status_code": r.status_code,
"error": None,
}
if _is_loading_model_response(r):
return {
"reachable": True,
"loading": True,
"status_code": r.status_code,
"error": "Loading model",
}
return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code}"}
last_error: Optional[str] = None
@@ -872,7 +916,7 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
if 400 <= sc < 500 and sc not in (401, 403):
models_url = _safe_build_models_url(base)
try:
r2 = httpx.get(models_url, headers=headers, timeout=timeout, verify=llm_verify())
r2 = httpx.get(models_url, headers=headers,timeout=timeout, verify=llm_verify())
result2 = _result_from_response(r2)
if result2["reachable"]:
return result2
@@ -1056,9 +1100,11 @@ def setup_model_routes(model_discovery):
except Exception:
return 0.0
def _failure_delay(fails: int) -> float:
def _failure_delay(fails: int, *, empty_local: bool = False) -> float:
if fails <= 0:
return 0.0
if empty_local:
return min(5.0 * (2 ** max(0, fails - 1)), 30.0)
return min(_REFRESH_FAILURE_BASE * (2 ** max(0, fails - 1)), _REFRESH_FAILURE_MAX)
def _should_refresh_endpoint(ep: Any, now: float, force: bool = False) -> tuple[bool, Dict[str, Any]]:
@@ -1089,7 +1135,12 @@ def setup_model_routes(model_discovery):
fails = int(state.get("fail_count") or 0)
if fails and not force:
last_failure = float(state.get("last_failure") or 0.0)
if now - last_failure < _failure_delay(fails):
empty_local = (
not cached
and category == "local"
and str(getattr(ep, "id", "") or "").startswith("local-")
)
if now - last_failure < _failure_delay(fails, empty_local=empty_local):
return False, info
if cached and not force:
interval = _endpoint_refresh_interval(ep, category)
@@ -1404,7 +1455,7 @@ def setup_model_routes(model_discovery):
t0 = _time.time()
ping = _ping_endpoint(base, ep.api_key, timeout=1.5)
entry["latency_ms"] = round((_time.time() - t0) * 1000)
entry["status"] = "online" if ping.get("reachable") or cached_count else "offline"
entry["status"] = "loading" if ping.get("loading") else ("online" if ping.get("reachable") or cached_count else "offline")
entry["error"] = ping.get("error")
entry["model_count"] = cached_count or (len(ANTHROPIC_MODELS) if provider == "anthropic" else 0)
except Exception as e:
@@ -1578,9 +1629,37 @@ def setup_model_routes(model_discovery):
# "everything's already cached" path because this branch only
# runs for endpoints with an empty cached_models.
if not all_models and not pinned and r.is_enabled:
ping = _ping_endpoint(r.base_url, r.api_key, timeout=3.5)
base_for_ping = _normalize_base(r.base_url)
kind_for_ping = _effective_endpoint_kind(r, base_for_ping)
ping_timeout = 10.0 if _classify_endpoint(base_for_ping, kind_for_ping) == "local" else 3.5
ping = _ping_endpoint(r.base_url, r.api_key, timeout=ping_timeout)
if ping.get("reachable"):
status = "empty"
status = "loading" if ping.get("loading") else "empty"
if ping.get("loading"):
base = _normalize_base(r.base_url)
kind = _effective_endpoint_kind(r, base)
results.append({
"id": r.id,
"name": r.name,
"base_url": r.base_url,
"has_key": bool(r.api_key),
"api_key_fingerprint": _api_key_fingerprint(r.api_key),
"is_enabled": r.is_enabled,
"models": visible,
"pinned_models": pinned,
"hidden_count": len(hidden),
"online": True,
"status": status,
"ping_error": (ping or {}).get("error") if ping else None,
"model_type": getattr(r, "model_type", None) or "llm",
"supports_tools": getattr(r, "supports_tools", None),
"endpoint_kind": kind,
"category": _classify_endpoint(base, kind),
"model_refresh_mode": _endpoint_refresh_mode(r, kind),
"model_refresh_interval": getattr(r, "model_refresh_interval", None),
"model_refresh_timeout": getattr(r, "model_refresh_timeout", None),
})
continue
# Best-effort: if the probe came back reachable, try
# to populate cached_models in the background so the
# NEXT picker load shows "online" instead of "empty".
@@ -1588,7 +1667,7 @@ def setup_model_routes(model_discovery):
# "empty" status, and the existing background refresh
# path will eventually fill it in too.
try:
probed = _probe_endpoint(r.base_url, r.api_key, timeout=5)
probed = _probe_endpoint(r.base_url, r.api_key, timeout=max(5, int(ping_timeout)))
if probed:
r.cached_models = json.dumps(probed)
db.commit()
@@ -1766,7 +1845,7 @@ def setup_model_routes(model_discovery):
model_ids = _probe_endpoint(base_url, api_key.strip() or None, timeout=explicit_timeout) if should_probe else []
ping = {"reachable": False, "error": None}
if (should_probe or requested_kind in ("api", "proxy")) and not model_ids:
ping = _ping_endpoint(base_url, api_key.strip() or None, timeout=min(explicit_timeout, 2.0))
ping = _ping_endpoint(base_url, api_key.strip() or None, timeout=min(explicit_timeout, 10.0))
if require_model_list and not model_ids:
raise HTTPException(400, _model_endpoint_error_message(base_url, ping))
@@ -1833,7 +1912,7 @@ def setup_model_routes(model_discovery):
"models": _merge_model_ids(model_ids, _pinned),
"pinned_models": _pinned,
"online": bool(model_ids) or bool(_pinned) or bool(ping.get("reachable")),
"status": "online" if (model_ids or _pinned) else ("empty" if ping.get("reachable") else "offline"),
"status": "online" if (model_ids or _pinned) else ("loading" if ping.get("loading") else ("empty" if ping.get("reachable") else "offline")),
"ping_error": ping.get("error") if ping else None,
"endpoint_kind": requested_kind,
"category": _classify_endpoint(base_url, requested_kind),
@@ -1858,11 +1937,11 @@ def setup_model_routes(model_discovery):
configured_timeout = _parse_positive_int(model_refresh_timeout, minimum=1, maximum=60)
probe_timeout = _explicit_model_list_timeout(base_url, requested_kind, configured_timeout)
models = _probe_endpoint(base_url, api_key.strip() or None, timeout=probe_timeout)
ping = {"reachable": True, "error": None} if models else _ping_endpoint(base_url, api_key.strip() or None, timeout=min(probe_timeout, 2.0))
ping = {"reachable": True, "error": None} if models else _ping_endpoint(base_url, api_key.strip() or None, timeout=min(probe_timeout, 10.0))
return {
"base_url": base_url,
"online": bool(models) or bool(ping.get("reachable")),
"status": "online" if models else ("empty" if ping.get("reachable") else "offline"),
"status": "online" if models else ("loading" if ping.get("loading") else ("empty" if ping.get("reachable") else "offline")),
"ping_error": ping.get("error") if ping else None,
"models": models,
"count": len(models),
+358 -9
View File
@@ -331,6 +331,9 @@ def add_user_install_bins_to_path():
candidates.append(os.path.join(site.USER_BASE, 'bin'))
except Exception:
pass
candidates.append(os.path.expanduser('~/bin'))
candidates.append(os.path.expanduser('~/llama.cpp/build/bin'))
candidates.append(os.path.expanduser('~/llama.cpp/build-vulkan/bin'))
candidates.append(os.path.expanduser('~/.local/bin'))
parts = os.environ.get('PATH', '').split(os.pathsep) if os.environ.get('PATH') else []
changed = False
@@ -962,12 +965,84 @@ def setup_shell_routes() -> APIRouter:
return StreamingResponse(generate(), media_type="text/event-stream")
def _os_id_from_release(text: str) -> str:
"""Map /etc/os-release contents to a canonical family for our matrix."""
if not text:
return ""
ids = []
for line in text.splitlines():
line = line.strip()
if line.startswith("ID=") or line.startswith("ID_LIKE="):
ids += line.split("=", 1)[1].strip().strip('"').split()
ids = [i.lower() for i in ids]
if any(x in ids for x in ("debian", "ubuntu", "linuxmint", "pop", "elementary")):
return "debian"
if any(x in ids for x in ("arch", "manjaro", "endeavouros", "cachyos", "garuda")):
return "arch"
if any(x in ids for x in ("fedora", "rhel", "centos", "rocky", "almalinux", "ol")):
return "fedora"
if "alpine" in ids:
return "alpine"
if any(x in ids for x in ("suse", "opensuse", "opensuse-leap", "opensuse-tumbleweed", "sles")):
return "suse"
return ""
# Matrix lookup keyed on (os_family, backend) → (pkg_mgr_cmd_template, pkg_list_per_dep).
# Each `system_prereqs` name resolves to a list of OS-specific package
# names that get joined into the final `sudo apt install -y …` etc.
# command. Backend-specific extras (CUDA toolkit, ROCm, Vulkan headers)
# are added only when the detected backend needs them.
_PKG_NAMES = {
# canonical-name → {os_id: [actual_pkg_names_on_this_os]}
"cmake": {"debian": ["cmake"], "arch": ["cmake"], "fedora": ["cmake"], "alpine": ["cmake"], "suse": ["cmake"], "macos": ["cmake"]},
"build-essential": {"debian": ["build-essential"], "arch": ["base-devel"], "fedora": ["gcc", "gcc-c++", "make"], "alpine": ["build-base"], "suse": ["gcc-c++", "make"], "macos": []},
"g++": {"debian": ["g++"], "arch": ["gcc"], "fedora": ["gcc-c++"], "alpine": ["g++"], "suse": ["gcc-c++"], "macos": []},
"gcc": {"debian": ["gcc"], "arch": ["gcc"], "fedora": ["gcc"], "alpine": ["gcc"], "suse": ["gcc"], "macos": []},
"make": {"debian": ["make"], "arch": ["make"], "fedora": ["make"], "alpine": ["make"], "suse": ["make"], "macos": []},
"git": {"debian": ["git"], "arch": ["git"], "fedora": ["git"], "alpine": ["git"], "suse": ["git"], "macos": ["git"]},
"tmux": {"debian": ["tmux"], "arch": ["tmux"], "fedora": ["tmux"], "alpine": ["tmux"], "suse": ["tmux"], "macos": ["tmux"]},
}
_BACKEND_EXTRAS = {
"cuda": {"debian": ["nvidia-cuda-toolkit"], "arch": ["cuda"], "fedora": ["cuda-toolkit"], "alpine": [], "suse": ["cuda"], "macos": []},
"rocm": {"debian": ["rocm-dev"], "arch": ["rocm-hip-sdk"], "fedora": ["rocm-devel"], "alpine": [], "suse": ["rocm-dev"], "macos": []},
"vulkan": {"debian": ["libvulkan-dev", "vulkan-tools"], "arch": ["vulkan-headers", "vulkan-tools"], "fedora": ["vulkan-headers", "vulkan-tools"], "alpine": ["vulkan-loader-dev", "vulkan-tools"], "suse": ["vulkan-devel", "vulkan-tools"], "macos": []},
}
_PKG_MGR = {
"debian": "sudo apt install -y {pkgs}",
"arch": "sudo pacman -S --needed {pkgs}",
"fedora": "sudo dnf install -y {pkgs}",
"alpine": "sudo apk add {pkgs}",
"suse": "sudo zypper install -n {pkgs}",
"macos": "brew install {pkgs}",
}
def _install_cmd_for_target(os_id: str, backend: str, missing: list[str]) -> str:
"""Build a single OS+backend-aware install command for the missing prereqs."""
if not os_id or os_id not in _PKG_MGR:
return ""
pkgs: list[str] = []
seen: set[str] = set()
for m in missing:
for p in _PKG_NAMES.get(m, {}).get(os_id, []):
if p not in seen:
pkgs.append(p); seen.add(p)
# Add backend-specific extras only when the build would actually
# consume them (a CUDA toolkit isn't useful on a Vulkan box).
backend = (backend or "").lower()
for p in _BACKEND_EXTRAS.get(backend, {}).get(os_id, []):
if p not in seen:
pkgs.append(p); seen.add(p)
if not pkgs:
return ""
return _PKG_MGR[os_id].format(pkgs=" ".join(pkgs))
@router.get("/api/cookbook/packages")
async def list_packages(
request: Request,
host: str | None = None,
ssh_port: str | None = None,
venv: str | None = None,
backend: str | None = None,
):
"""Check which optional packages are installed.
@@ -1016,6 +1091,12 @@ def setup_shell_routes() -> APIRouter:
"kind": "system",
"install_hint": "Install Docker on the selected server and allow this user to run docker.",
},
# Note: cmake / gcc / git are not separate dependency rows —
# they're declared as `system_prereqs` on llama_cpp (and any
# other engine that compiles from source) so they appear as
# an inline status note on that engine's row instead of
# cluttering the panel with raw OS package names that aren't
# meaningful product-level dependencies on their own.
# ── LLM ── installs on GPU servers for model serving/downloading
{
"name": "hf_transfer",
@@ -1027,9 +1108,16 @@ def setup_shell_routes() -> APIRouter:
{
"name": "llama_cpp",
"pip": "llama-cpp-python[server]",
"desc": "Serve GGUF models via llama.cpp",
"desc": "Great for single-GPU or CPU inference with GGUF models",
"category": "LLM",
"target": "remote",
# Build-toolchain prereqs. Cookbook's launch bootstrap
# compiles llama-server from source when no prebuilt
# binary is present; without these the build aborts
# with `cmake: command not found`. Surfaced inline on
# this row so the user doesn't have to chase three
# separate OS-package rows.
"system_prereqs": ["cmake", "g++", "git"],
},
{
"name": "sglang",
@@ -1041,7 +1129,7 @@ def setup_shell_routes() -> APIRouter:
{
"name": "vllm",
"pip": "vllm",
"desc": "High-throughput LLM serving engine",
"desc": "Great for high-throughput multi-GPU inference",
"category": "LLM",
"target": "remote",
},
@@ -1104,6 +1192,7 @@ def setup_shell_routes() -> APIRouter:
# venv over SSH so a remote `pip install` actually reflects here.
remote_status: dict = {}
remote_details: dict = {}
remote_probe_error = ""
remote_names = [
p["name"]
for p in packages
@@ -1142,16 +1231,56 @@ def setup_shell_routes() -> APIRouter:
break
except ValueError as e:
raise HTTPException(400, str(e))
except Exception:
except Exception as e:
remote_status = {}
if host and remote_system_names:
remote_probe_error = f"SSH package probe failed: {str(e)[:160]}"
if "llama_cpp" in remote_names:
try:
inner = (
'export PATH="$HOME/.local/bin:$HOME/bin:'
'$HOME/llama.cpp/build/bin:$HOME/llama.cpp/build-vulkan/bin:$PATH"; '
"command -v llama-server 2>/dev/null || true"
)
argv = _ssh_base_argv(host, ssh_port) + [inner]
proc = await asyncio.create_subprocess_exec(
*argv,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
out, _err = await asyncio.wait_for(proc.communicate(), timeout=8)
llama_server_path = out.decode("utf-8", errors="replace").strip().splitlines()
llama_server_path = llama_server_path[-1].strip() if llama_server_path else ""
if llama_server_path:
remote_status["llama_cpp"] = True
probe = remote_details.setdefault("llama_cpp", {})
if isinstance(probe, dict):
probe.setdefault("binaries", {})["llama-server"] = llama_server_path
except Exception as e:
if not remote_probe_error:
remote_probe_error = f"SSH llama-server probe failed: {str(e)[:160]}"
pass
# Union of system_names + every package's system_prereqs. Probing
# the prereqs alongside the main system deps in a single SSH call
# avoids a second round-trip per Cookbook → Dependencies refresh.
prereq_names: set[str] = set()
for p in packages:
for pr in p.get("system_prereqs") or []:
prereq_names.add(str(pr))
all_system_names = list(set(remote_system_names) | prereq_names)
# Detect the target's OS family + read /etc/os-release in the same
# SSH round-trip as the prereq probe — used downstream to render a
# single OS-specific install command per row instead of dumping
# every distro's syntax onto the user.
target_os_id: str = ""
if host and all_system_names:
try:
checks = []
for name in remote_system_names:
for name in all_system_names:
qn = shlex.quote(name)
checks.append(
f"if command -v {qn} >/dev/null 2>&1; then echo {qn}=1; else echo {qn}=0; fi"
)
checks.append("echo '---OSREL---'; cat /etc/os-release 2>/dev/null || true")
inner = " ; ".join(checks)
argv = _ssh_base_argv(host, ssh_port) + [inner]
proc = await asyncio.create_subprocess_exec(
@@ -1161,20 +1290,45 @@ def setup_shell_routes() -> APIRouter:
)
out, _err = await asyncio.wait_for(proc.communicate(), timeout=12)
txt = out.decode("utf-8", errors="replace").strip()
_section, _osrel_lines = "probe", []
for line in txt.splitlines():
if line.strip() == "---OSREL---":
_section = "osrel"; continue
if _section == "osrel":
_osrel_lines.append(line)
continue
name, sep, value = line.strip().partition("=")
if sep and name in remote_system_names:
if sep and name in all_system_names:
remote_status[name] = value == "1"
target_os_id = _os_id_from_release("\n".join(_osrel_lines))
except ValueError as e:
raise HTTPException(400, str(e))
except Exception:
except Exception as e:
if not remote_probe_error:
remote_probe_error = f"SSH system probe failed: {str(e)[:160]}"
pass
elif not host:
# Local target — probe in-process so the inline install command
# still appears in the dep panel when the cookbook container
# itself is the selected server.
try:
with open("/etc/os-release", encoding="utf-8") as f:
target_os_id = _os_id_from_release(f.read())
except Exception:
target_os_id = ""
if sys.platform == "darwin":
target_os_id = "macos"
for pkg in packages:
on_remote = bool(host and pkg.get("target") == "remote")
probe = None
if on_remote:
pkg["installed"] = bool(remote_status.get(pkg["name"], False))
if remote_probe_error and pkg["name"] not in remote_status:
pkg["installed"] = None
pkg["probe_error"] = remote_probe_error
pkg["status_note"] = remote_probe_error
else:
pkg["installed"] = bool(remote_status.get(pkg["name"], False))
probe = remote_details.get(pkg["name"])
if isinstance(probe, dict):
pkg["details"] = probe
@@ -1230,6 +1384,104 @@ def setup_shell_routes() -> APIRouter:
# 500 the entire packages panel; report it as not usable.
pkg["installed"] = False
# llama_cpp partial-state probe: when the package is installed
# but the wheel was built CPU-only AND the target has NVIDIA
# hardware, mark the row as partial (yellow/orange) with a
# one-click upgrade to the CUDA wheel. Without this the row
# reads "ready" green while inference runs at 3 tok/s on GPU
# silicon — actively misleading.
if pkg["name"] == "llama_cpp" and pkg.get("installed"):
_native_llama_server = bool(
isinstance(probe, dict)
and isinstance(probe.get("binaries"), dict)
and probe["binaries"].get("llama-server")
)
_gpu_capable = False
_has_nvidia_target = False
if _native_llama_server:
# Native llama-server is the launcher path Cookbook now
# prefers. Do not mark this as a CPU-only Python wheel just
# because llama-cpp-python is absent from the selected venv.
_gpu_capable = True
elif on_remote and host:
try:
# Activate the configured venv FIRST so the probe
# runs against the same python the launch script
# would activate. Without this prefix, bare
# `python3` was checked — which can disagree with
# the venv's wheel (e.g. user-site has CUDA wheel
# but venv has CPU-only), and the dep panel then
# showed "ready" green while every launch fell to
# CPU.
_vp = _venv_activate_prefix(venv)
probe = (
f'{_vp}python3 -c "import llama_cpp; import sys; '
'sys.exit(0 if llama_cpp.llama_supports_gpu_offload() else 1)" '
'&& echo llama_cpp_gpu=1 || echo llama_cpp_gpu=0; '
'command -v nvidia-smi >/dev/null 2>&1 '
'&& nvidia-smi -L 2>/dev/null | grep -q "GPU " '
'&& echo nvidia=1 || echo nvidia=0'
)
argv = _ssh_base_argv(host, ssh_port) + [probe]
proc = await asyncio.create_subprocess_exec(
*argv, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
)
out, _ = await asyncio.wait_for(proc.communicate(), timeout=8)
txt = out.decode("utf-8", errors="replace")
if "llama_cpp_gpu=1" in txt:
_gpu_capable = True
if "nvidia=1" in txt:
_has_nvidia_target = True
except Exception:
pass
else:
try:
import llama_cpp as _lcp # type: ignore
_gpu_capable = bool(_lcp.llama_supports_gpu_offload())
except Exception:
_gpu_capable = False
_has_nvidia_target = shutil.which("nvidia-smi") is not None
if (not _gpu_capable) and _has_nvidia_target:
pkg["partial"] = True
pkg["partial_reason"] = "Installed but CPU-only wheel — GPU detected on this target. Upgrade to a CUDA wheel for ~10× faster inference."
pkg["partial_action"] = "reinstall_llama_cpp_cuda"
# Attach per-package system_prereqs status. We probed each
# prereq name above; surface "Missing build deps: …" ONLY
# when the package itself is not installed — if the package
# works (e.g. llama-cpp-python already imports cleanly), the
# build toolchain is irrelevant and surfacing it as a red
# flag confuses users ("ready" + "missing" on the same row).
_prereqs = list(pkg.get("system_prereqs") or [])
if _prereqs:
if on_remote:
_pr_present = {n: bool(remote_status.get(n)) for n in _prereqs}
else:
_pr_present = {n: shutil.which(n) is not None for n in _prereqs}
pkg["system_prereqs_status"] = _pr_present
_missing = [n for n, ok in _pr_present.items() if not ok]
# Suppress the "missing build deps" hint when the package
# itself is installed — build deps are only relevant if
# the user would need to recompile from source.
if pkg.get("installed"):
_missing = []
if _missing:
# Build a target-specific install command from the
# (os_family, backend) matrix when we know both. Fall
# back to the multi-distro hint only when the target's
# OS can't be classified (e.g. ssh probe failed).
_resolved_os = target_os_id or "debian" # safest default
_cmd = _install_cmd_for_target(_resolved_os, backend or "", _missing)
if _cmd and target_os_id:
_hint = "Missing build deps for this target: " + ", ".join(_missing)
pkg["install_cmd_for_target"] = _cmd
pkg["install_cmd_os"] = target_os_id
pkg["install_cmd_backend"] = (backend or "").lower()
else:
_hint = "Missing build deps: " + ", ".join(_missing) + ". Install via apt: cmake build-essential git / pacman: cmake base-devel git / dnf: cmake gcc-c++ make git / brew: cmake git."
_existing_note = pkg.get("status_note") or ""
pkg["status_note"] = (_existing_note + "" + _hint) if _existing_note else _hint
pkg["build_deps_missing"] = _missing
if pkg.get("installed"):
update_status = _package_pip_update_status(pkg, probe)
pkg["pip_update_available"] = update_status.available
@@ -1289,6 +1541,102 @@ def setup_shell_routes() -> APIRouter:
return {"ok": True, "output": stdout.decode()[-200:]}
return {"ok": False, "error": stderr.decode()[-300:]}
@router.post("/api/cookbook/install-system-deps")
async def install_system_deps(request: Request):
"""Install OS-level system packages (cmake/build-essential/git/tmux)
on a remote target or in the local container. Admin only.
Bounded by a per-package allowlist anything outside the catalog
is rejected so the route can't be coerced into installing arbitrary
OS packages. Uses `sudo -n` (passwordless) so the call returns a
clear "needs sudo password" error instead of hanging when interactive
sudo is required.
"""
_require_admin(request)
body = await request.json()
raw = body.get("packages") or []
host = (body.get("remote_host") or "").strip()
ssh_port = body.get("ssh_port")
# Names users can request — must match canonical names used in the
# deps catalog's `system_prereqs` field and on the System rows.
ALLOWED = {"cmake", "build-essential", "g++", "gcc", "git", "tmux", "make"}
pkgs = [str(p).strip() for p in raw if str(p).strip() in ALLOWED]
if not pkgs:
return {"ok": False, "error": "no installable packages requested (allowlist: " + ", ".join(sorted(ALLOWED)) + ")"}
# Re-map to the right package name per OS. apt/dpkg use the names
# as-is; pacman has base-devel for build-essential, etc.
def _apt(names): return list(names)
def _pacman(names):
return ["base-devel" if n == "build-essential" else n for n in names]
def _dnf(names):
out = []
for n in names:
if n == "build-essential": out += ["gcc", "gcc-c++", "make"]
elif n == "g++": out += ["gcc-c++"]
else: out.append(n)
return out
def _brew(names):
return [n for n in names if n not in ("build-essential", "g++", "gcc", "make")]
# Build a single shell snippet that detects the package manager and
# runs the right install. Non-interactive sudo (-n) only — if sudo
# asks for a password the script reports it instead of hanging.
apt_pkgs = " ".join(shlex.quote(p) for p in _apt(pkgs))
pac_pkgs = " ".join(shlex.quote(p) for p in _pacman(pkgs))
dnf_pkgs = " ".join(shlex.quote(p) for p in _dnf(pkgs))
brew_pkgs = " ".join(shlex.quote(p) for p in _brew(pkgs))
# Error messages go to stderr (>&2) so the route's error field
# gets populated. Without the redirect, `echo "ERROR…"` on stdout
# left stderr empty and the frontend toast fell through to a
# bare "HTTP 200" instead of surfacing the real reason.
script = (
'set -e; '
'if ! sudo -n true 2>/dev/null; then '
' echo "ERROR: passwordless sudo unavailable on this target. Run once: sudo apt install -y ' + " ".join(pkgs) + ' (or your distro equivalent: pacman -S, dnf install, brew install). After that, Cookbook can install the rest." >&2; exit 2; fi; '
'if command -v apt-get >/dev/null 2>&1; then '
f' sudo -n env DEBIAN_FRONTEND=noninteractive apt-get update -qq && sudo -n env DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends {apt_pkgs}; '
'elif command -v pacman >/dev/null 2>&1; then '
f' sudo -n pacman -Sy --needed --noconfirm {pac_pkgs}; '
'elif command -v dnf >/dev/null 2>&1; then '
f' sudo -n dnf install -y {dnf_pkgs}; '
'elif command -v brew >/dev/null 2>&1; then '
f' brew install {brew_pkgs}; '
'else '
' echo "ERROR: no supported package manager (apt/pacman/dnf/brew) on this target." >&2; exit 3; fi'
)
try:
if host:
argv = _ssh_base_argv(host, ssh_port) + [script]
else:
argv = ["bash", "-lc", script]
except ValueError as e:
raise HTTPException(400, str(e))
try:
proc = await asyncio.create_subprocess_exec(
*argv, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
out, err = await asyncio.wait_for(proc.communicate(), timeout=180)
except asyncio.TimeoutError:
return {"ok": False, "error": "Install timed out after 180s"}
ok = (proc.returncode == 0)
# Combine stderr + (last lines of stdout) into a single error
# blob when ok=False — some package managers print useful failure
# context to stdout, and a script that exits via `echo ...; exit N`
# without `>&2` would otherwise hand back an empty error string
# and force the frontend to show a bare "HTTP 200".
err_txt = err.decode("utf-8", errors="replace").strip()
out_txt = out.decode("utf-8", errors="replace").strip()
if not ok:
tail_out = out_txt[-500:] if out_txt else ""
combined = err_txt or tail_out or f"exit code {proc.returncode}"
else:
combined = None
return {
"ok": ok,
"exit_code": proc.returncode,
"output": out_txt[-1000:],
"error": combined,
}
@router.post("/api/cookbook/rebuild-engine")
async def rebuild_engine(request: Request):
"""Clear the cached llama.cpp build so the next serve recompiles.
@@ -1309,7 +1657,8 @@ def setup_shell_routes() -> APIRouter:
return {"ok": False, "error": f"Unsupported engine: {engine}"}
host = str(body.get("remote_host") or "").strip()
ssh_port = body.get("ssh_port")
cmd = _llama_cpp_rebuild_cmd()
update_source = bool(body.get("update_source"))
cmd = _llama_cpp_rebuild_cmd(update_source=update_source)
try:
argv = (
(_ssh_base_argv(host, ssh_port) + [cmd])
+76 -3
View File
@@ -3,11 +3,16 @@ import os
import time
import json
import asyncio
import shutil
import uuid
from pathlib import Path
from fastapi import APIRouter, Request, File, UploadFile, HTTPException
from typing import List
import logging
from core.middleware import require_admin
from core.database import SessionLocal, GalleryImage
from src.auth_helpers import effective_user
from src.constants import GENERATED_IMAGES_DIR
from src.upload_handler import count_recent_uploads
logger = logging.getLogger(__name__)
@@ -50,6 +55,69 @@ def setup_upload_routes(upload_handler):
raise HTTPException(404, "File not found")
raise HTTPException(404, "File not found")
def _promote_chat_image_to_gallery(meta: dict, owner: str | None) -> str | None:
"""Make chat-uploaded images visible in Gallery without changing chat storage."""
is_image_file = getattr(upload_handler, "is_image_file", None)
if not callable(is_image_file):
return None
if not is_image_file(meta.get("name", ""), meta.get("mime", "")):
return None
source_path = meta.get("path")
if not source_path or not os.path.isfile(source_path):
return None
db = SessionLocal()
try:
file_hash = meta.get("hash")
if file_hash:
q = db.query(GalleryImage).filter(
GalleryImage.file_hash == file_hash,
GalleryImage.is_active == True, # noqa: E712
)
if owner:
q = q.filter(GalleryImage.owner == owner)
existing = q.first()
if existing:
return existing.id
image_dir = Path(GENERATED_IMAGES_DIR)
image_dir.mkdir(parents=True, exist_ok=True)
ext = Path(meta.get("name") or source_path).suffix.lower()
if ext not in {".png", ".jpg", ".jpeg", ".webp", ".gif"}:
mime_ext = {
"image/png": ".png",
"image/jpeg": ".jpg",
"image/jpg": ".jpg",
"image/webp": ".webp",
"image/gif": ".gif",
}.get(meta.get("mime", ""))
ext = mime_ext or ".png"
filename = f"{uuid.uuid4().hex[:12]}{ext}"
dest_path = image_dir / filename
shutil.copy2(source_path, dest_path)
image_id = str(uuid.uuid4())
db.add(GalleryImage(
id=image_id,
filename=filename,
prompt=meta.get("name") or "Chat upload",
model="chat-upload",
owner=owner,
file_hash=file_hash,
width=meta.get("width"),
height=meta.get("height"),
file_size=meta.get("size"),
))
db.commit()
return image_id
except Exception as e:
db.rollback()
logger.warning("Failed to add chat image upload to gallery: %s", e)
return None
finally:
db.close()
@router.post("")
async def api_upload(request: Request, files: List[UploadFile] = File(...)):
@@ -78,8 +146,10 @@ def setup_upload_routes(upload_handler):
for u in files:
try:
meta = upload_handler.save_upload(u, client_ip, owner=effective_user(request))
out.append({
owner = effective_user(request)
meta = upload_handler.save_upload(u, client_ip, owner=owner)
gallery_id = _promote_chat_image_to_gallery(meta, owner)
item = {
"id": meta["id"],
"name": meta["name"],
"mime": meta["mime"],
@@ -89,7 +159,10 @@ def setup_upload_routes(upload_handler):
"width": meta.get("width"),
"height": meta.get("height"),
"is_duplicate": meta.get("is_duplicate", False)
})
}
if gallery_id:
item["gallery_id"] = gallery_id
out.append(item)
except HTTPException:
raise
except Exception as e:
+11 -1
View File
@@ -282,7 +282,17 @@ def _detect_amd():
"gpus": cards,
"gpu_groups": groups,
"homogeneous": len(groups) <= 1,
"backend": "rocm",
# Pick the actual runtime label: ROCm/HIP only when its
# toolchain is installed, otherwise Vulkan if vulkaninfo is
# present (mesa RADV works fine on RDNA/CDNA when ROCm
# packages are absent — see Strix Halo where ROCm support
# is still backporting). Reporting "rocm" on a Vulkan-only
# host misleads downstream env-var pinning
# (HIP_VISIBLE_DEVICES is a no-op there).
"backend": (
"rocm" if (_run(["which", "rocminfo"]) or _run(["which", "hipconfig"]))
else ("vulkan" if _run(["which", "vulkaninfo"]) else "rocm")
),
"unified_memory": is_apu,
# AMD ISA/family so downstream can tell datacenter Instinct (CDNA,
# where vLLM/SGLang run AWQ/GPTQ reliably) from consumer Radeon
+299 -23
View File
@@ -541,17 +541,44 @@ def _section_text(name: str, default: str) -> str:
return val if isinstance(val, str) and val.strip() else default
def _compact_tool_line(name: str, section: str) -> str:
"""One-line fenced-tool usage hint for compact/local prompts."""
text = (section or "").strip()
if not text:
return f"- `{name}`"
if text.startswith("- "):
return text
lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
usage = []
in_fence = False
for ln in lines:
if ln.startswith("```"):
usage.append(ln)
in_fence = not in_fence
if len(usage) >= 3:
break
continue
if in_fence and len(usage) < 3:
usage.append(ln)
if usage:
return f"- `{name}` — " + " ".join(usage)
return f"- `{name}` — " + lines[0][:160]
def _assemble_prompt(tool_names: set, disabled_tools: set = None, compact: bool = False) -> str:
"""Build the system prompt with only the specified tools included."""
disabled = disabled_tools or set()
included = tool_names - disabled
if compact:
tool_list = ", ".join(sorted(included)) if included else "none"
tool_lines = []
for name, _default_section in TOOL_SECTIONS.items():
if name in included:
tool_lines.append(_compact_tool_line(name, _section_text(name, _default_section)))
parts = [
"You are an AI assistant with tool access.",
f"Available tools: {tool_list}.",
_API_AGENT_RULES,
_AGENT_PREAMBLE,
"## Available tools\n" + ("\n".join(tool_lines) if tool_lines else "none"),
_AGENT_RULES,
]
parts.extend(_domain_rules_for_tools(included))
return "\n\n".join(parts)
@@ -617,11 +644,6 @@ _API_HOSTS = frozenset([
"api.perplexity.ai", "api.x.ai",
"ollama.com", "api.venice.ai", "api.kimi.com",
"api.githubcopilot.com",
# Local OpenAI-compatible endpoints (llama.cpp, vLLM, LM Studio, etc.).
# Without these, `_is_api_model` falls back to keyword sniffing on the
# model name, so well-behaved local servers don't get native tool
# schemas and the agent silently degrades to fenced-block parsing.
"localhost", "127.0.0.1", "host.docker.internal",
])
_MCP_KEYWORDS = frozenset(["mcp", "browse", "browser", "website", "calendar", "event", "email",
"gmail", "screenshot", "navigate", "click", "miniflux", "rss", "feed"])
@@ -649,6 +671,28 @@ def _is_ollama_openai_compat_url(endpoint_url: str) -> bool:
return parsed.port == 11434 and (path == "/v1" or path.startswith("/v1/"))
def _is_local_openai_compat_url(endpoint_url: str) -> bool:
try:
parsed = urlparse(endpoint_url or "")
except Exception:
return False
host = (parsed.hostname or "").lower()
path = (parsed.path or "").rstrip("/")
if not (path == "/v1" or path.startswith("/v1/")):
return False
if host in {"localhost", "127.0.0.1", "0.0.0.0", "host.docker.internal"}:
return True
if host.startswith("192.168.") or host.startswith("10."):
return True
if host.startswith("172."):
try:
second = int(host.split(".")[1])
return 16 <= second <= 31
except Exception:
return False
return False
def _endpoint_lookup_keys(endpoint_url: str) -> List[str]:
"""Candidate ModelEndpoint.base_url keys for a runtime chat URL."""
raw = (endpoint_url or "").strip()
@@ -712,6 +756,17 @@ def _extract_last_user_message(messages: List[Dict]) -> str:
_LOW_SIGNAL_RE = re.compile(r"^[\W_]*$", re.UNICODE)
_CASUAL_OPENING_RE = re.compile(
r"^\s*(?:h+i+|hey+|hello+|yo+|sup+|what'?s up|wass?up|hiya|howdy|"
r"lol|lmao|haha+|hehe+|thanks?|thank you|ty|idk|dunno|meh|bruh|bro)\b(?P<tail>.*)$",
re.IGNORECASE,
)
_CASUAL_BLOCKLIST_RE = re.compile(
r"\b(?:cookbook|serve|serving|launch|start|vllm|sglang|llama\.?cpp|ollama|"
r"download|model|email|document|doc|note|calendar|task|search|web|research|"
r"file|folder|repo|git|settings?|endpoint|api|token|mcp)\b",
re.IGNORECASE,
)
_EXPLICIT_CONTINUATION_RE = re.compile(
r"^\s*(?:"
r"yes|y|yeah|yep|ok|okay|sure|do it|go ahead|continue|carry on|"
@@ -721,6 +776,17 @@ _EXPLICIT_CONTINUATION_RE = re.compile(
r")\s*[.!?]*\s*$",
re.IGNORECASE,
)
_RETRY_CONTINUATION_RE = re.compile(
r"\b(?:try again|retry|again|rerun|re-run|run it again|launch it again|"
r"start it again|failed|fails?|died|crashed|broke|insta|instantly)\b",
re.IGNORECASE,
)
_COOKBOOK_CONTEXT_RE = re.compile(
r"\b(?:cookbook|serve|serving|served|launch|start|preset|vllm|sglang|"
r"llama\.?cpp|ollama|download|cached models?|model servers?|running models?|"
r"gpu box|ajax|qwen|gemma|llama|mistral|minimax)\b",
re.IGNORECASE,
)
def _is_explicit_continuation(text: str) -> bool:
@@ -728,6 +794,37 @@ def _is_explicit_continuation(text: str) -> bool:
return bool(_EXPLICIT_CONTINUATION_RE.match(str(text or "").strip()))
def _is_casual_low_signal(text: str) -> bool:
"""True for short greetings/slang that should not inherit stale context."""
s = str(text or "").strip()
m = _CASUAL_OPENING_RE.match(s)
if not m:
return False
tail = m.group("tail") or ""
if _CASUAL_BLOCKLIST_RE.search(tail):
return False
# Allow a short vocative/address after the opener without hardcoding the
# address term itself: "hey man", "yo dude", "sup <name>". Longer tails are
# more likely to be an actual request and should get normal context/tooling.
tail_words = re.findall(r"[A-Za-z0-9_'-]+", tail)
return len(tail_words) <= 2
def _is_contextual_retry_continuation(messages: List[Dict], text: str) -> bool:
"""Treat "try again / it failed" as a continuation only for active tool work.
These follow-ups are common after Cookbook launches: the latest user turn
says only "try again it failed", while the actionable model/host/command
details live one or two turns back. Keep this intentionally narrow so
ordinary chat does not inherit stale Cookbook context.
"""
latest = str(text or "").strip()
if not latest or not _RETRY_CONTINUATION_RE.search(latest):
return False
recent = _recent_context_for_retrieval(messages, max_user=5, max_chars=1200)
return bool(_COOKBOOK_CONTEXT_RE.search(recent))
def _assistant_requested_followup(messages: List[Dict]) -> bool:
"""True when the previous assistant turn asked for missing task details.
@@ -769,11 +866,12 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
which domain rule packs get appended to the system prompt.
"""
text = str(last_user or "").strip()
continuation = _is_explicit_continuation(text) or _assistant_requested_followup(messages)
retry_continuation = _is_contextual_retry_continuation(messages, text)
continuation = _is_explicit_continuation(text) or _assistant_requested_followup(messages) or retry_continuation
retrieval_query = _recent_context_for_retrieval(messages) if continuation else text
q = retrieval_query.lower()
if not text or bool(_LOW_SIGNAL_RE.match(text)):
if not text or bool(_LOW_SIGNAL_RE.match(text)) or _is_casual_low_signal(text):
return {
"low_signal": True,
"continuation": False,
@@ -886,6 +984,7 @@ def _build_system_prompt(
compact: bool = False,
owner: Optional[str] = None,
suppress_local_context: bool = False,
suppress_skills: bool = False,
active_email: Optional[Dict[str, str]] = None,
) -> List[Dict]:
"""Build agent system prompt, inject MCP/document context, merge consecutive system msgs."""
@@ -903,7 +1002,7 @@ def _build_system_prompt(
_ov_sig = _hl.sha256(_json.dumps(get_builtin_overrides() or {}, sort_keys=True).encode()).hexdigest()
except Exception:
_ov_sig = ""
cache_key = (frozenset(disabled_tools or []), bool(mcp_mgr), needs_admin, _rt_key, compact, _ov_sig, owner, suppress_local_context)
cache_key = (frozenset(disabled_tools or []), bool(mcp_mgr), needs_admin, _rt_key, compact, _ov_sig, owner, suppress_local_context, suppress_skills)
if _cached_base_prompt and _cached_base_prompt_key == cache_key and not active_document:
agent_prompt = _cached_base_prompt
# Skill index is user-editable (name + description), so it must never
@@ -913,6 +1012,7 @@ def _build_system_prompt(
disabled_tools, mcp_mgr, needs_admin, relevant_tools,
mcp_disabled_map=mcp_disabled_map, compact=compact, owner=owner,
suppress_local_context=suppress_local_context,
suppress_skills=suppress_skills,
)
else:
agent_prompt, _skill_index_block = _build_base_prompt(
@@ -924,6 +1024,7 @@ def _build_system_prompt(
compact=compact,
owner=owner,
suppress_local_context=suppress_local_context,
suppress_skills=suppress_skills,
)
if not active_document:
_cached_base_prompt = agent_prompt
@@ -1207,7 +1308,7 @@ def _build_system_prompt(
# few. If the teacher wrote a procedure for "open my X chat" last
# time the student failed, this is where the student finds it
# before deciding which tool to call.
if not suppress_local_context:
if not suppress_local_context and not suppress_skills:
try:
last_user = _extract_last_user_message(messages)
# Respect the user's skills-enabled toggle (mirrors memory_enabled).
@@ -1374,6 +1475,7 @@ def _build_base_prompt(
compact: bool = False,
owner: Optional[str] = None,
suppress_local_context: bool = False,
suppress_skills: bool = False,
):
"""Build the agent prompt with only relevant tools included.
@@ -1426,7 +1528,7 @@ def _build_base_prompt(
# The caller wraps it in untrusted_context_message and ships it as a
# user-role message — same treatment as the matched-skills block.
skill_index_block = ""
if not suppress_local_context:
if not suppress_local_context and not suppress_skills:
try:
from services.memory.skills import SkillsManager
from src.constants import DATA_DIR
@@ -1851,6 +1953,7 @@ async def stream_agent_loop(
approved_plan: Optional[str] = None,
tool_policy: Optional[ToolPolicy] = None,
workspace: Optional[str] = None,
forced_tools: Optional[Set[str]] = None,
_is_teacher_run: bool = False,
) -> AsyncGenerator[str, None]:
"""Streaming agent loop generator.
@@ -1890,6 +1993,20 @@ async def stream_agent_loop(
_needs_admin = _detect_admin_intent(messages)
_last_user = _extract_last_user_message(messages)
_intent = _classify_agent_request(messages, _last_user)
_low_signal_turn = bool(_intent.get("low_signal"))
_casual_low_signal_turn = _is_casual_low_signal(_last_user)
_direct_low_signal = (
_low_signal_turn
and not bool(_intent.get("continuation"))
and not plan_mode
and not approved_plan
and not guide_only
and (_casual_low_signal_turn or active_document is None)
and (_casual_low_signal_turn or not active_email)
and (_casual_low_signal_turn or not workspace)
and not forced_tools
and not relevant_tools
)
# Tool retrieval uses the latest message by default. It may inherit recent
# user turns only for explicit continuations ("yes", "do it", "1").
_retrieval_query = str(_intent.get("retrieval_query") or _last_user)
@@ -1897,11 +2014,86 @@ async def stream_agent_loop(
"[agent-intent] latest=%r continuation=%s low_signal=%s domains=%s retrieval_query=%r",
_last_user[:120],
bool(_intent.get("continuation")),
bool(_intent.get("low_signal")),
_low_signal_turn,
sorted(_intent.get("domains") or []),
_retrieval_query[:200],
)
_mcp_disabled_map = _load_mcp_disabled_map() if mcp_mgr else {}
if _direct_low_signal:
logger.info("[agent] direct low-signal reply path for latest=%r", _last_user[:80])
direct_messages = [{"role": "user", "content": _last_user}]
direct_response = ""
direct_start = time.time()
direct_actual_model = model
real_input_tokens = 0
real_output_tokens = 0
try:
async for chunk in stream_llm_with_fallback(
[(endpoint_url, model, headers)] + list(fallbacks or []),
direct_messages,
temperature=temperature,
max_tokens=min(max_tokens or 128, 128),
prompt_type=None,
tools=None,
timeout=int(get_setting("agent_stream_timeout_seconds", 300) or 300),
session_id=session_id,
):
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
try:
data = json.loads(chunk[6:])
except json.JSONDecodeError:
yield chunk
continue
if data.get("type") == "usage":
usage = data.get("data", {}) or {}
direct_actual_model = usage.get("model") or direct_actual_model
real_input_tokens += usage.get("input_tokens", 0) or 0
real_output_tokens += usage.get("output_tokens", 0) or 0
continue
if data.get("type") == "model_actual":
direct_actual_model = data.get("model") or direct_actual_model
data["requested_model"] = model
yield f"data: {json.dumps(data)}\n\n"
continue
if data.get("type") == "fallback":
direct_actual_model = data.get("answered_by") or direct_actual_model
yield chunk
continue
if "delta" in data:
if not data.get("thinking"):
direct_response += data.get("delta", "")
yield chunk
continue
yield chunk
elif chunk.startswith("event: "):
yield chunk
except Exception as _direct_err:
logger.warning("[agent] direct low-signal path failed: %s", _direct_err)
fallback = "Hey."
direct_response += fallback
yield f"data: {json.dumps({'delta': fallback})}\n\n"
if not direct_response.strip():
fallback = "Hey."
direct_response = fallback
yield f"data: {json.dumps({'delta': fallback})}\n\n"
duration = time.time() - direct_start
metrics = {
"model": direct_actual_model,
"requested_model": model,
"input_tokens": real_input_tokens or estimate_tokens(direct_messages),
"output_tokens": real_output_tokens or max(len(direct_response) // 4, 1),
"total_time": round(duration, 2),
"response_time": round(duration, 2),
"agent_rounds": 0,
"tool_calls": 0,
"direct_low_signal": True,
}
yield f"data: {json.dumps({'type': 'metrics', 'data': metrics})}\n\n"
yield "data: [DONE]\n\n"
return
if plan_mode and mcp_mgr:
# Allow read-only MCP tools to investigate, block write/unknown ones:
# hide them from the schemas AND reject them at runtime by qualified name.
@@ -1913,11 +2105,11 @@ async def stream_agent_loop(
# RAG-based tool selection: retrieve relevant tools for this query.
# If caller provided a pre-computed set (e.g. task_scheduler), use that.
_relevant_tools = set() if guide_only else relevant_tools
_relevant_tools = relevant_tools
_t1 = time.time()
if _relevant_tools:
logger.info(f"[tool-rag] Using caller-provided relevant_tools ({len(_relevant_tools)} tools)")
if not guide_only and not _relevant_tools and bool(_intent.get("low_signal")):
if not guide_only and not _relevant_tools and _low_signal_turn:
from src.tool_index import ALWAYS_AVAILABLE
if workspace:
# An active workspace IS the file-work signal: a vague "look at the
@@ -2008,6 +2200,15 @@ async def stream_agent_loop(
if _relevant_tools is not None and active_document is not None:
_relevant_tools.update({"edit_document", "update_document", "suggest_document"})
# Per-request UI toggles are stronger than retrieval. If the user turns on
# Search, the model must see the search tools even when the latest text is a
# typo or otherwise low-signal for tool RAG.
if not guide_only and forced_tools:
if _relevant_tools is None:
from src.tool_index import ALWAYS_AVAILABLE
_relevant_tools = set(ALWAYS_AVAILABLE)
_relevant_tools.update(t for t in forced_tools if t not in disabled_tools)
# The skill index injected by _build_system_prompt tells the model to
# call `manage_skills action=view`, and Jaccard-matched skills are pasted
# into the prompt as procedures to follow — but neither path goes through
@@ -2015,7 +2216,7 @@ async def stream_agent_loop(
# (grep, read_file, ...) that aren't in its schema list. Keep the schemas
# in lockstep: manage_skills is callable whenever any skill is indexed,
# and a matched skill's declared requires_toolsets ride along with it.
if not guide_only and _relevant_tools is not None:
if not guide_only and _relevant_tools is not None and not _low_signal_turn:
try:
from services.memory.skills import SkillsManager
from src.constants import DATA_DIR
@@ -2080,7 +2281,7 @@ async def stream_agent_loop(
_model_supports_tools = any(kw in _model_lc for kw in (
"gpt-4", "gpt-5", "gpt-o", "claude", "gemini", "gemma",
"qwen3", "qwen2.5", "mixtral", "mistral", "llama-3.1", "llama-3.2",
"llama-3.3", "llama-4",
"llama-3.3", "llama-4", "llama3.1", "llama3.2", "llama3.3", "llama4",
# Local-served models that follow OpenAI-style function calling
# via vLLM's `--enable-auto-tool-choice`. Belt-and-suspenders
# with the per-endpoint flag above.
@@ -2122,13 +2323,15 @@ async def stream_agent_loop(
_is_api_model = False
else:
_is_api_model = any(h in endpoint_url for h in _API_HOSTS) or _model_supports_tools
_compact_agent_prompt = _is_api_model or _is_ollama_native or _ollama_openai_compat
messages, mcp_schemas = _build_system_prompt(
messages, model, active_document, mcp_mgr, disabled_tools,
needs_admin=_needs_admin, relevant_tools=_relevant_tools,
mcp_disabled_map=_mcp_disabled_map,
compact=_is_api_model,
compact=_compact_agent_prompt,
owner=owner,
suppress_local_context=guide_only,
suppress_skills=_low_signal_turn,
active_email=active_email,
)
if plan_mode and not guide_only:
@@ -2214,6 +2417,14 @@ async def stream_agent_loop(
# Strip internal metadata keys before sending to the LLM API
messages = [{k: v for k, v in msg.items() if k != "_protected"} for msg in messages]
agent_prompt_tokens = estimate_tokens(messages)
logger.info(
"[agent-timing] prep_done model=%s prompt_tokens=%s context_length=%s prep=%s",
model,
agent_prompt_tokens,
context_length,
{k: round(v, 3) for k, v in prep_timings.items()},
)
yield f"data: {json.dumps({'type': 'agent_prep', 'data': {k: round(v, 3) for k, v in prep_timings.items()}})}\n\n"
full_response = ""
@@ -2358,6 +2569,19 @@ async def stream_agent_loop(
# complementary cap for the rare stream that trickles bytes forever and
# so never trips the inactivity timeout. Generous — only catches runaway.
_round_deadline = time.time() + max(agent_stream_timeout * 4, 1200)
_round_start = time.time()
_round_first_event_logged = False
_round_first_token_logged = False
logger.info(
"[agent-timing] round_start round=%s model=%s endpoint=%s prompt_tokens=%s tools=%s native_tools=%s timeout=%s",
round_num,
model,
endpoint_url,
estimate_tokens(messages),
len(_tool_names_sent),
bool(all_tool_schemas),
agent_stream_timeout,
)
async for chunk in stream_llm_with_fallback(
_candidates,
messages,
@@ -2368,11 +2592,30 @@ async def stream_agent_loop(
timeout=agent_stream_timeout,
session_id=session_id,
):
if not _round_first_event_logged:
_round_first_event_logged = True
logger.info(
"[agent-timing] first_event round=%s elapsed=%.3fs kind=%s",
round_num,
time.time() - _round_start,
"error" if chunk.startswith("event: error") else "data",
)
if time.time() > _round_deadline:
logger.warning(f"[agent] round {round_num} stream exceeded wall-clock deadline; cutting off")
logger.warning(
"[agent-timing] round_deadline round=%s elapsed=%.3fs deadline_s=%s",
round_num,
time.time() - _round_start,
max(agent_stream_timeout * 4, 1200),
)
break
# Forward error events from stream_llm to the frontend
if chunk.startswith("event: error"):
logger.warning(
"[agent-timing] stream_error round=%s elapsed=%.3fs chunk=%r",
round_num,
time.time() - _round_start,
chunk[:500],
)
yield chunk
continue
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
@@ -2452,6 +2695,15 @@ async def stream_agent_loop(
if not first_token_received:
time_to_first_token = time.time() - total_start
first_token_received = True
if not _round_first_token_logged:
_round_first_token_logged = True
logger.info(
"[agent-timing] first_visible_token round=%s elapsed=%.3fs total_elapsed=%.3fs thinking=%s",
round_num,
time.time() - _round_start,
time.time() - total_start,
bool(data.get("thinking")),
)
# Keep reasoning deltas in a separate accumulator so
# we can echo them back via `reasoning_content` on the
# next request (DeepSeek requires this; harmless for
@@ -2521,7 +2773,21 @@ async def stream_agent_loop(
yield chunk
# Intercept [DONE] — don't forward until all rounds finish
tool_blocks, used_native = _resolve_tool_blocks(round_response, native_tool_calls, round_num, is_api_model=_is_api_model)
logger.info(
"[agent-timing] round_stream_done round=%s elapsed=%.3fs text_chars=%s tool_calls=%s first_event=%s first_token=%s",
round_num,
time.time() - _round_start,
len(round_response),
len(native_tool_calls),
_round_first_event_logged,
_round_first_token_logged,
)
tool_blocks, used_native = _resolve_tool_blocks(
round_response,
native_tool_calls,
round_num,
is_api_model=(_is_api_model and not guide_only),
)
# Force-answer round: we told the model to STOP calling tools and
# answer. If it ignored that and emitted a (possibly DSML) tool
@@ -2605,7 +2871,7 @@ async def stream_agent_loop(
# model with no real native_tool_calls) must not be stripped from the
# persisted text either — otherwise it streams once and then disappears
# on reload (#3222 follow-up).
cleaned_round = strip_tool_blocks(round_response, skip_fenced=(_is_api_model and not used_native)).strip()
cleaned_round = strip_tool_blocks(round_response, skip_fenced=(_is_api_model and not used_native and not guide_only)).strip()
round_texts.append(cleaned_round)
if not tool_blocks:
@@ -2677,6 +2943,15 @@ async def stream_agent_loop(
_intent_nudge_count += 1
_matched_phrase = _intent_match.group(0).strip()
logger.info(f"[agent] intent-without-action nudge #{_intent_nudge_count} on round {round_num}: {_matched_phrase!r}")
_lower_phrase = _matched_phrase.lower()
_cookbook_log_hint = ""
if any(_word in _lower_phrase for _word in ("log", "logs", "output", "tail", "status")):
_cookbook_log_hint = (
" If this is about a Cookbook/model serve, the concrete calls are: "
"`list_served_models` first, then `tail_serve_output` with the "
"session_id from the serve/list result. Never answer with "
"\"check logs\" when those tools are available."
)
messages.append({
"role": "system",
"content": (
@@ -2685,6 +2960,7 @@ async def stream_agent_loop(
"see you announced the action but didn't run it, which "
"is the most frustrating thing you can do. "
"DO IT NOW: emit the actual function call this turn. "
f"{_cookbook_log_hint}"
"If you decided not to do it after all, say so plainly in "
"one sentence instead of restating the plan."
),
+13 -1
View File
@@ -174,8 +174,20 @@ async def subscribe(session_id: str) -> AsyncGenerator[str, None]:
next_seq += 1
if run.status != "running":
return
heartbeat_idx = 0
while True:
seq, ev = await q.get()
try:
seq, ev = await asyncio.wait_for(q.get(), timeout=10.0)
except asyncio.TimeoutError:
# Keep slow local models/proxies alive while they prefill before
# the first token. SSE comments are ignored by the UI but reset
# browser/proxy idle timers, which prevents "empty response"
# disconnects on llama.cpp first-token latencies of 30s+.
if run.status == "running":
heartbeat_idx += 1
yield f": heartbeat {heartbeat_idx}\n\n"
continue
seq, ev = (None, None)
if seq is None: # end sentinel
while next_seq < len(run.buffer): # flush any tail the sentinel raced
yield run.buffer[next_seq]
+33 -11
View File
@@ -7,6 +7,7 @@ from src.constants import MAX_OUTPUT_CHARS
class WebSearchTool:
async def execute(self, content: str, ctx: dict) -> dict:
from src.search import comprehensive_web_search
progress_cb = ctx.get("progress_cb") if isinstance(ctx, dict) else None
raw = content.strip()
query = raw
time_filter = None
@@ -37,18 +38,39 @@ class WebSearchTool:
elif " news" in q_lc or q_lc.startswith("news ") or q_lc.endswith(" news"):
time_filter = "week"
loop = asyncio.get_running_loop()
text, sources = await asyncio.wait_for(
loop.run_in_executor(
None,
lambda: comprehensive_web_search(
query,
max_pages=max_pages,
time_filter=time_filter,
return_sources=True,
if progress_cb:
await progress_cb({
"elapsed_s": 0,
"tail": f"Searching web for: {query[:160]}",
})
try:
text, sources = await asyncio.wait_for(
loop.run_in_executor(
None,
lambda: comprehensive_web_search(
query,
max_pages=max_pages,
time_filter=time_filter,
return_sources=True,
),
),
),
timeout=30,
)
timeout=30,
)
except asyncio.TimeoutError:
return {
"error": f"web_search timed out after 30s: {query[:200]}",
"exit_code": 1,
}
except Exception as e:
return {
"error": f"web_search failed: {type(e).__name__}: {str(e) or 'no details'}",
"exit_code": 1,
}
if progress_cb:
await progress_cb({
"elapsed_s": 30,
"tail": "Search completed; preparing sources.",
})
output = text[:MAX_OUTPUT_CHARS] if len(text) > MAX_OUTPUT_CHARS else text
if sources:
output += "\n\n<!-- SOURCES:" + json.dumps(sources) + " -->"
+50 -48
View File
@@ -76,8 +76,7 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
import json
import re
from src.constants import DATA_DIR
from src.endpoint_resolver import resolve_endpoint
from src.llm_core import llm_call_async
from src.llm_core import llm_call_async_with_fallback
from src.memory import MemoryManager
manager = MemoryManager(DATA_DIR)
@@ -116,10 +115,9 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
if len(group_memories) < 2:
return False
url, model, headers = resolve_endpoint("utility", owner=group_owner or None)
if not url or not model:
url, model, headers = resolve_endpoint("default", owner=group_owner or None)
if not url or not model:
from src.task_endpoint import resolve_task_candidates
candidates = resolve_task_candidates(owner=group_owner or None)
if not candidates:
return False
try:
@@ -147,13 +145,11 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
"\"drop\":[{\"id\":\"existing id\",\"reason\":\"short reason\"}]}\n\n"
f"MEMORIES:\n{json.dumps(items, ensure_ascii=False)}"
)
raw = await llm_call_async(
url=url,
model=model,
raw = await llm_call_async_with_fallback(
candidates,
messages=[{"role": "user", "content": prompt}],
temperature=0.0,
max_tokens=4096,
headers=headers,
timeout=120,
)
from src.text_helpers import strip_think
@@ -604,8 +600,7 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
try:
from datetime import timedelta
from core.database import SessionLocal, CalendarEvent
from src.endpoint_resolver import resolve_endpoint
from src.llm_core import llm_call_async
from src.llm_core import llm_call_async_with_fallback
import re as _re, json as _json
db = SessionLocal()
@@ -620,10 +615,9 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
if not events:
return "No upcoming events to classify", True
llm_url, llm_model, llm_headers = resolve_endpoint("utility", owner=owner)
if not llm_url:
llm_url, llm_model, llm_headers = resolve_endpoint("default", owner=owner)
llm_available = bool(llm_url and llm_model)
from src.task_endpoint import resolve_task_candidates
llm_candidates = resolve_task_candidates(owner=owner)
llm_available = bool(llm_candidates)
# Pull user memories so the LLM has personal context (relationships,
# job, hobbies). Helps it know e.g. "<name> is your spouse" so their
@@ -699,11 +693,11 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
f"EVENTS: {_json.dumps(items)}"
)
try:
raw = await llm_call_async(
url=llm_url, model=llm_model,
raw = await llm_call_async_with_fallback(
llm_candidates,
messages=[{"role": "user", "content": prompt}],
temperature=0.1, max_tokens=16384,
headers=llm_headers, timeout=180,
timeout=180,
)
from src.text_helpers import strip_think as _st
raw = _st(raw or "", prose=False, prompt_echo=False)
@@ -810,8 +804,7 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
import asyncio as _aio
from datetime import datetime as _dt, timedelta as _td
from routes.email_helpers import _email_cache_owner_clause, _imap_connect, SCHEDULED_DB
from src.endpoint_resolver import resolve_endpoint
from src.llm_core import llm_call_async
from src.llm_core import llm_call_async_with_fallback
# 1. Pull recent UIDs + From headers cheaply (header-only fetch).
def _pull_headers():
@@ -891,11 +884,11 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
if not eligible:
return "All sender sigs already cached (or no eligible senders)", True
url, model, headers = resolve_endpoint("utility", owner=owner)
if not url or not model:
url, model, headers = resolve_endpoint("default", owner=owner)
if not url or not model:
from src.task_endpoint import resolve_task_candidates
candidates = resolve_task_candidates(owner=owner)
if not candidates:
return "No LLM endpoint available", False
model = candidates[0][1]
analyzed = 0
no_sig = 0
@@ -949,11 +942,11 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
)
try:
raw = await llm_call_async(
url=url, model=model,
raw = await llm_call_async_with_fallback(
candidates,
messages=[{"role": "user", "content": prompt}],
temperature=0.0, max_tokens=600,
headers=headers, timeout=60,
timeout=60,
)
from src.text_helpers import strip_think as _st
sig = _st(raw or "", prose=False, prompt_echo=False).strip()
@@ -1137,7 +1130,6 @@ async def action_test_skills(owner: str, **kwargs) -> Tuple[str, bool]:
from services.memory.skills import SkillsManager
from src.constants import DATA_DIR
from routes.skills_routes import _run_skill_test_once, _skill_test_task
from src.endpoint_resolver import resolve_endpoint
# #3 SCOPE GUARD: refuse to run on a None/empty owner — otherwise
# `sm.load(owner=None)` returns every user's skills and we'd cross-
@@ -1152,27 +1144,40 @@ async def action_test_skills(owner: str, **kwargs) -> Tuple[str, bool]:
if not names:
raise TaskNoop("no skills to test")
url, model, headers = resolve_endpoint("default", owner=owner)
if not url or not model:
from src.task_endpoint import resolve_task_candidates
candidates = resolve_task_candidates(owner=owner)
if not candidates:
return "No Default/Utility model configured — set one in Settings.", False
# #2 NO SILENT MODEL SWAP: if the configured model isn't served by the
# endpoint, try a basename match — but fail loudly instead of grabbing
# `avail[0]` which could be an embedding-only model and produce 36
# garbage transcripts → 36 'unknown' verdicts with no hint why.
url, model, headers = candidates[0]
try:
from src.llm_core import list_model_ids
avail = list_model_ids(url, headers=headers)
if avail and model not in avail:
import os as _os
base = _os.path.basename((model or "").rstrip("/"))
m = next((a for a in avail if _os.path.basename(a.rstrip("/")) == base), None)
if m:
model = m
else:
return (f"Default model '{model}' not served by endpoint {url}. "
f"Available: {', '.join(avail[:8])}{'' if len(avail) > 8 else ''}. "
"Set a valid Default model in Settings."), False
import os as _os
selected = None
mismatch_notes = []
for cand_url, cand_model, cand_headers in candidates:
avail = list_model_ids(cand_url, headers=cand_headers)
if not avail or cand_model in avail:
selected = (cand_url, cand_model, cand_headers)
break
base = _os.path.basename((cand_model or "").rstrip("/"))
matched = next((a for a in avail if _os.path.basename(a.rstrip("/")) == base), None)
if matched:
selected = (cand_url, matched, cand_headers)
break
mismatch_notes.append(
f"{cand_model} not served by {cand_url}; available: "
f"{', '.join(avail[:8])}{'...' if len(avail) > 8 else ''}"
)
if selected:
url, model, headers = selected
elif mismatch_notes:
return "No configured task fallback model is served. " + " | ".join(mismatch_notes[:3]), False
except Exception as _e:
logger.warning(f"test_skills model resolve check failed (continuing): {_e}")
@@ -1483,7 +1488,6 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]:
from pathlib import Path as _P
from core.database import SessionLocal as _SL, EmailAccount as _EA
from routes.email_helpers import _imap_connect, _decode_header
from src.endpoint_resolver import resolve_endpoint, resolve_utility_fallback_candidates
from src.llm_core import llm_call_async_with_fallback
# Per-owner state file so multi-user runs don't clobber each other's
@@ -1505,12 +1509,10 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]:
# ── 1. Resolve LLM candidates (utility primary + utility fallbacks; fall
# through to default chat as a last resort).
url, model, headers = resolve_endpoint("utility", owner=owner)
if not url or not model:
url, model, headers = resolve_endpoint("default", owner=owner)
if not url or not model:
from src.task_endpoint import resolve_task_candidates
candidates = resolve_task_candidates(owner=owner)
if not candidates:
return "No LLM endpoint available", False
candidates = [(url, model, headers)] + resolve_utility_fallback_candidates(owner=owner)
# ── 2. Enumerate enabled accounts. Match this task's owner AND fall
# back to the legacy "unowned account whose imap_user / from_address
+1 -1
View File
@@ -4,7 +4,7 @@ import os
from src.runtime_paths import get_app_root, get_default_data_dir
APP_VERSION = "1.0.0"
APP_VERSION = "1.0.1"
# Base paths
BASE_DIR = os.path.join(get_app_root(), "")
+3
View File
@@ -424,6 +424,9 @@ def resolve_utility_fallback_candidates(owner: Optional[str] = None) -> list:
settings = load_settings()
utility_ep = (get_user_setting("utility_endpoint_id", owner or "", settings.get("utility_endpoint_id", "")) or "").strip()
if not utility_ep:
utility_chain = get_user_setting("utility_model_fallbacks", owner or "", settings.get("utility_model_fallbacks") or []) or []
if utility_chain:
return _resolve_fallback_candidates("utility_model_fallbacks", owner=owner)
return _resolve_fallback_candidates("default_model_fallbacks", owner=owner)
except Exception:
pass
+6 -1
View File
@@ -907,7 +907,10 @@ def _anthropic_rejects_temperature(model: str) -> bool:
return (int(match.group(1)), int(match.group(2))) >= (4, 7)
# Models that support structured thinking — may output </think> without opening tag
_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma")
_THINKING_MODEL_PATTERNS = (
"qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax",
"m2-reap", "gemma", "stepfun", "step-3", "step3",
)
def _supports_thinking(model: str) -> bool:
"""Check if model supports structured thinking output."""
@@ -2135,6 +2138,8 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
yield _stream_delta_event(reasoning, thinking=True)
content = delta.get("content") or ""
if content:
content = re.sub(r"<mm:think(\s+[^>]*)?>", r"<think\1>", content, flags=re.IGNORECASE)
content = re.sub(r"</mm:think>", "</think>", content, flags=re.IGNORECASE)
stripped = content.lstrip()
# gpt-oss harmony format (<|channel|>analysis/final): route via the harmony
# stream router. Sticky once the first marker appears — distinct from the
+64 -2
View File
@@ -1,6 +1,11 @@
"""Shared resolver for background-task AI endpoint (auto-naming, memory, sorting)."""
"""Shared resolver for background-task AI endpoints."""
from src.endpoint_resolver import resolve_endpoint
from src.endpoint_resolver import (
resolve_chat_fallback_candidates,
resolve_endpoint,
resolve_utility_fallback_candidates,
)
from src.llm_core import llm_call_async_with_fallback
def resolve_task_endpoint(fallback_url=None, fallback_model=None, fallback_headers=None, owner=None):
@@ -11,3 +16,60 @@ def resolve_task_endpoint(fallback_url=None, fallback_model=None, fallback_heade
endpoint cannot be resolved.
"""
return resolve_endpoint("task", fallback_url, fallback_model, fallback_headers, owner=owner)
def resolve_task_candidates(
fallback_url=None,
fallback_model=None,
fallback_headers=None,
owner=None,
):
"""Return ordered background-task LLM candidates.
Order:
1. configured Background Tasks endpoint/model, or caller fallback
2. Utility endpoint/model
3. Default endpoint/model
4. Utility fallback chain
5. Default fallback chain
"""
candidates = []
def _append(url, model, headers):
if not url or not model:
return
key = (url, model)
if any((u, m) == key for u, m, _ in candidates):
return
candidates.append((url, model, headers or {}))
_append(*resolve_task_endpoint(fallback_url, fallback_model, fallback_headers, owner=owner))
_append(*resolve_endpoint("utility", owner=owner))
_append(*resolve_endpoint("default", owner=owner))
for url, model, headers in resolve_utility_fallback_candidates(owner=owner):
_append(url, model, headers)
for url, model, headers in resolve_chat_fallback_candidates(owner=owner):
_append(url, model, headers)
return candidates
async def task_llm_call_async(
messages,
*,
fallback_url=None,
fallback_model=None,
fallback_headers=None,
owner=None,
**kwargs,
):
"""Call the shared background-task LLM candidate chain."""
candidates = resolve_task_candidates(
fallback_url=fallback_url,
fallback_model=fallback_model,
fallback_headers=fallback_headers,
owner=owner,
)
if not candidates:
raise RuntimeError("No LLM endpoint available for background task")
return await llm_call_async_with_fallback(candidates, messages=messages, **kwargs)
+32 -13
View File
@@ -886,6 +886,14 @@ class TaskScheduler:
owner=task.owner,
body=run.result if output == "notification" else None,
)
elif run.status == "error":
self.add_notification(
task.name,
"error",
task_id,
owner=task.owner,
body=run.error or run.result,
)
# Log result to the assistant chat so all task activity is visible.
# Skip skipped/error rows — user shouldn't see "skipped: …" noise
@@ -1468,12 +1476,18 @@ class TaskScheduler:
)
except Exception as e:
logger.warning(f"Agent loop failed for task '{task.name}', falling back to simple call: {e}")
from src.llm_core import llm_call_async
from src.task_endpoint import task_llm_call_async
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": task.prompt},
]
result = await llm_call_async(url=endpoint_url, model=model, messages=messages, timeout=120)
result = await task_llm_call_async(
messages,
fallback_url=endpoint_url,
fallback_model=model,
owner=task.owner,
timeout=120,
)
# Strip the model's chain-of-thought before saving/delivering. Task
# output is LLM-only, so prose=True (which also removes untagged
@@ -1698,13 +1712,17 @@ class TaskScheduler:
# Honor per-task max_steps (defense against runaway agent loops).
# Falls back to 20 if not set — the historical default.
_task_max_rounds = task.max_steps if task.max_steps and task.max_steps > 0 else 20
# Tasks are background workloads they share the Utility model's
# fallback chain (Settings → Utility Model → Fallbacks). A downed
# primary endpoint won't silently yield `(no output)` — same recipe
# chat uses but with the utility list (`utility_model_fallbacks`).
# Tasks are background workloads: use the shared task fallback chain
# behind the primary endpoint so a downed primary won't silently yield
# `(no output)`.
try:
from src.endpoint_resolver import resolve_utility_fallback_candidates
_task_fallbacks = resolve_utility_fallback_candidates(owner=task.owner or None)
from src.task_endpoint import resolve_task_candidates
_task_fallbacks = resolve_task_candidates(
fallback_url=endpoint_url,
fallback_model=model,
fallback_headers=headers,
owner=task.owner or None,
)[1:]
except Exception:
_task_fallbacks = []
async for event_str in stream_agent_loop(
@@ -1741,21 +1759,22 @@ class TaskScheduler:
# asking it to summarize what it did. Guarantees output.
if not full_text.strip():
try:
from src.llm_core import llm_call_async_with_fallback
from src.endpoint_resolver import resolve_utility_fallback_candidates
from src.task_endpoint import task_llm_call_async
grace_context = "You ran out of steps. "
if tool_results:
grace_context += "Here's what your tools returned:\n" + "\n".join(tool_results[-5:])
else:
grace_context += "No tool results were captured."
grace_context += "\n\nSummarize what you accomplished and what's still pending. Be concise."
_grace_candidates = [(endpoint_url, model, headers)] + resolve_utility_fallback_candidates(owner=task.owner or None)
full_text = await llm_call_async_with_fallback(
_grace_candidates,
full_text = await task_llm_call_async(
messages=[
{"role": "system", "content": system_content},
{"role": "user", "content": grace_context},
],
fallback_url=endpoint_url,
fallback_model=model,
fallback_headers=headers,
owner=task.owner or None,
timeout=30,
)
full_text = (full_text or "").strip()
+26 -5
View File
@@ -1268,8 +1268,8 @@ async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict:
_ALIASES = {
"shell": ["bash"],
"terminal": ["bash"],
"search": ["web_search"],
"web": ["web_search"],
"search": ["web_search", "web_fetch"],
"web": ["web_search", "web_fetch"],
"browser": ["builtin_browser"],
"documents": ["create_document", "edit_document", "update_document", "suggest_document"],
"doc": ["create_document", "edit_document", "update_document", "suggest_document"],
@@ -1281,7 +1281,7 @@ async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict:
"notes": ["manage_notes"],
"calendar": ["manage_calendar"],
"email": ["mcp__email__list_emails", "mcp__email__read_email", "mcp__email__send_email"],
"research": ["web_search"], # research is a per-request flag, not a tool — closest analog
"research": ["web_search", "web_fetch"], # research is a per-request flag, not a tool — closest analog
}
if action == "list_tools":
@@ -2863,13 +2863,25 @@ async def do_serve_model(content: str, owner: Optional[str] = None) -> Dict:
endpoint_added=endpoint_added, endpoint_id=endpoint_id or "",
)
note = "" if registered else " (state-write failed — task may not show in UI)"
where = host or "local"
log_path = f"/tmp/odysseus-tmux/{sid}.log"
return {
"output": f"Serving {repo_id} (session: {sid}){note}",
"output": (
f"Serving {repo_id} on {where} (session: {sid}){note}\n"
f"Next required check: call list_served_models. If this task is not ready, "
f"call tail_serve_output with session_id={sid} and tail=400 before answering. "
f"Do not tell the user to check logs; you have the log tool."
),
"session_id": sid,
"task_type": "serve",
"phase": "running",
"host": host,
"endpoint_id": endpoint_id,
"log_path": log_path,
"next_tools": [
{"name": "list_served_models", "arguments": {}},
{"name": "tail_serve_output", "arguments": {"session_id": sid, "tail": 400}},
],
"exit_code": 0,
}
# FastAPI HTTPException puts the message under `detail`, not `error`.
@@ -3216,8 +3228,17 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic
MAX_CHARS = 8000
if len(output_text) > MAX_CHARS:
output_text = "…(earlier output truncated)…\n" + output_text[-MAX_CHARS:]
if not output_text:
output_text = (
f"No log output captured yet for {session_id} on {host_label}. "
"This usually means the tmux wrapper has started but the model process "
"has not printed anything yet. Do not stop here: call list_served_models "
"again to check whether it is still loading, ready, or crashed; if it is "
"still not ready, call tail_serve_output again with a larger tail after "
"the next status check."
)
return {
"output": output_text or "(empty pane)",
"output": output_text,
"session_id": session_id,
"host": host_label,
"tail_lines": tail,
+183 -3
View File
@@ -39,6 +39,10 @@ _XML_TOOL_CALL_RE = re.compile(
r"<(?:[\w]+:)?(?:tool_call|function_call)>\s*([\s\S]*?)</(?:[\w]+:)?(?:tool_call|function_call)>",
re.IGNORECASE,
)
_XML_OPEN_TOOL_CALL_RE = re.compile(
r"<(?:[\w]+:)?(?:tool_call|function_call)>\s*([\s\S]*)\Z",
re.IGNORECASE,
)
_XML_INVOKE_RE = re.compile(
r'<invoke\s+name=["\'](\w+)["\']>\s*([\s\S]*?)</invoke>',
re.IGNORECASE,
@@ -47,6 +51,21 @@ _XML_PARAM_RE = re.compile(
r'<parameter\s+name=["\'](\w+)["\']>([\s\S]*?)</parameter>',
re.IGNORECASE,
)
_XML_DIRECT_TOOL_RE = re.compile(
r"<\s*([A-Za-z_][\w-]*)\s*>([\s\S]*?)</\s*\1\s*>",
re.IGNORECASE,
)
# Pattern 3b: StepFun Step-3.x native tool-call tokens. The tokenizer defines:
# <tool▁calls▁begin> ... <tool▁calls▁end>
# <tool▁call▁begin>tool_name<tool▁sep>{...}<tool▁call▁end>
# These can leak as text through llama.cpp/Ollama-style endpoints when the
# engine does not return structured OpenAI tool_calls.
_STEPFUN_CALL_BEGIN = "<tool▁call▁begin>"
_STEPFUN_CALL_SEP = "<tool▁sep>"
_STEPFUN_CALL_END = "<tool▁call▁end>"
_STEPFUN_CALLS_BEGIN = "<tool▁calls▁begin>"
_STEPFUN_CALLS_END = "<tool▁calls▁end>"
# Pattern 4: <tool_code> blocks (MiniMax-M2.5 style)
# {tool => 'tool_name', args => '<param>value</param>'}
@@ -446,6 +465,138 @@ def _parse_xml_invoke(inv_match) -> Optional[ToolBlock]:
return function_call_to_tool_block(tool_name, json.dumps(params))
def _parse_xml_direct_tool(tool_match) -> Optional[ToolBlock]:
"""Parse direct XML tool tags inside <tool_call>.
Some local models emit:
<tool_call><web_search>query</web_search></tool_call>
instead of the invoke/parameter shape:
<tool_call><invoke name="web_search"><parameter name="query">query</parameter></invoke></tool_call>
Keep this as an adapter to the canonical function-call converter so aliases
and per-tool argument formatting stay in one place.
"""
tool_name = tool_match.group(1).lower().replace("-", "_")
if tool_name in {"invoke", "parameter", "tool_call", "function_call"}:
return None
mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None)
if not mapped:
return None
body = tool_match.group(2).strip()
if not body:
return None
try:
params = json.loads(body)
if not isinstance(params, dict):
params = {}
except json.JSONDecodeError:
if mapped == "web_search":
params = {"query": body}
elif mapped == "web_fetch":
params = {"url": body}
elif mapped == "bash":
params = {"command": body}
elif mapped == "python":
params = {"code": body}
elif mapped in ("read_file", "write_file"):
params = {"path": body}
else:
params = {"content": body}
from src.tool_schemas import function_call_to_tool_block
return function_call_to_tool_block(mapped, json.dumps(params))
def _iter_stepfun_tool_calls(text: str):
"""Yield StepFun native tool-call token bodies without regex backtracking."""
pos = 0
while True:
start = text.find(_STEPFUN_CALL_BEGIN, pos)
if start < 0:
return
name_start = start + len(_STEPFUN_CALL_BEGIN)
sep = text.find(_STEPFUN_CALL_SEP, name_start)
if sep < 0:
return
end = text.find(_STEPFUN_CALL_END, sep + len(_STEPFUN_CALL_SEP))
if end < 0:
return
raw_name = text[name_start:sep].strip()
body = text[sep + len(_STEPFUN_CALL_SEP):end].strip()
if raw_name and len(raw_name) <= 128:
yield raw_name, body
pos = end + len(_STEPFUN_CALL_END)
def _strip_stepfun_tool_markup(text: str) -> str:
"""Remove StepFun tool-call token blocks and wrappers using literal scans."""
out = []
pos = 0
while True:
start = text.find(_STEPFUN_CALL_BEGIN, pos)
if start < 0:
out.append(text[pos:])
break
end = text.find(_STEPFUN_CALL_END, start + len(_STEPFUN_CALL_BEGIN))
if end < 0:
out.append(text[pos:])
break
out.append(text[pos:start])
pos = end + len(_STEPFUN_CALL_END)
cleaned = "".join(out)
return cleaned.replace(_STEPFUN_CALLS_BEGIN, "").replace(_STEPFUN_CALLS_END, "")
def _strip_bare_invoke_markup(text: str) -> str:
"""Remove bare <invoke ...>...</invoke> blocks without regex backtracking."""
out = []
pos = 0
while True:
start = text.lower().find("<invoke", pos)
if start < 0:
out.append(text[pos:])
break
tag_end = text.find(">", start)
if tag_end < 0:
out.append(text[pos:])
break
close = text.lower().find("</invoke>", tag_end + 1)
if close < 0:
out.append(text[pos:])
break
out.append(text[pos:start])
pos = close + len("</invoke>")
return "".join(out)
def _parse_stepfun_tool_call(tool_name: str, body: str) -> Optional[ToolBlock]:
"""Parse StepFun native tool-call tokens into an Odysseus ToolBlock."""
tool_name = tool_name.lower().replace("-", "_").replace(".", "_")
mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None)
if not mapped:
return None
body = (body or "").strip()
if not body:
return None
try:
params = json.loads(body)
if not isinstance(params, dict):
params = {}
except json.JSONDecodeError:
if mapped == "web_search":
params = {"query": body}
elif mapped == "web_fetch":
params = {"url": body}
elif mapped == "bash":
params = {"command": body}
elif mapped == "python":
params = {"code": body}
elif mapped in ("read_file", "write_file"):
params = {"path": body}
else:
params = {"content": body}
from src.tool_schemas import function_call_to_tool_block
return function_call_to_tool_block(mapped, json.dumps(params))
def _parse_tool_code_block(raw: str) -> Optional[ToolBlock]:
"""Parse a <tool_code>{tool => 'name', args => '...'}</tool_code> block (MiniMax style)."""
# Extract tool name
@@ -511,8 +662,9 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
2. [TOOL_CALL] ... [/TOOL_CALL] blocks (some models)
3. XML-style <tool_call>/<invoke> blocks
4. <tool_code> blocks (MiniMax-M2.5 style)
5. DeepSeek DSML markup (normalized to <invoke> first)
6. Non-native local model fallback: prose mentioning web_search followed by
5. StepFun Step-3 native <toolcallbegin> tokens
6. DeepSeek DSML markup (normalized to <invoke> first)
7. Non-native local model fallback: prose mentioning web_search followed by
bare JSON args, e.g. {"query":"...", "time_filter":"week"}
`skip_fenced`: when True, Pattern 1 (fenced ```bash/```python/```json code
@@ -567,12 +719,38 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
# Pattern 3: XML-style <tool_call>/<invoke> blocks
if not blocks:
for tool_name, body in _iter_stepfun_tool_calls(text):
block = _parse_stepfun_tool_call(tool_name, body)
if block:
blocks.append(block)
if blocks:
return blocks
# Try wrapped: <tool_call><invoke ...>...</invoke></tool_call>
for m in _XML_TOOL_CALL_RE.finditer(text):
for inv in _XML_INVOKE_RE.finditer(m.group(1)):
block = _parse_xml_invoke(inv)
if block:
blocks.append(block)
if not blocks:
for direct in _XML_DIRECT_TOOL_RE.finditer(m.group(1)):
block = _parse_xml_direct_tool(direct)
if block:
blocks.append(block)
# Some local models stream an opening <tool_call> wrapper and a
# complete inner tool tag, but forget the closing </tool_call>.
if not blocks:
for m in _XML_OPEN_TOOL_CALL_RE.finditer(text):
body = m.group(1)
for inv in _XML_INVOKE_RE.finditer(body):
block = _parse_xml_invoke(inv)
if block:
blocks.append(block)
if blocks:
break
for direct in _XML_DIRECT_TOOL_RE.finditer(body):
block = _parse_xml_direct_tool(direct)
if block:
blocks.append(block)
# Try bare <invoke> without wrapper
if not blocks:
for inv in _XML_INVOKE_RE.finditer(text):
@@ -614,7 +792,9 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str:
text = _normalize_dsml(text)
cleaned = text if skip_fenced else _TOOL_BLOCK_RE.sub('', text)
cleaned = _TOOL_CALL_RE.sub('', cleaned)
cleaned = _strip_stepfun_tool_markup(cleaned)
cleaned = _XML_TOOL_CALL_RE.sub('', cleaned)
cleaned = _XML_OPEN_TOOL_CALL_RE.sub('', cleaned)
cleaned = _TOOL_CODE_RE.sub('', cleaned)
if not skip_fenced:
raw_web_json = _parse_raw_web_json_lookup(cleaned)
@@ -622,6 +802,6 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str:
_, (start, end) = raw_web_json
cleaned = cleaned[:start] + cleaned[end:]
# Strip bare <invoke> blocks not wrapped in <tool_call>
cleaned = re.sub(r'<invoke\s+name=["\'].*?</invoke>', '', cleaned, flags=re.DOTALL | re.IGNORECASE)
cleaned = _strip_bare_invoke_markup(cleaned)
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
return cleaned.strip()
Binary file not shown.

After

Width:  |  Height:  |  Size: 7.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

+7 -2
View File
@@ -879,7 +879,7 @@
<span class="grow">Library</span>
<button type="button" class="list-item-plus-btn" id="library-new-doc-btn" title="New document">
<svg class="list-item-plus-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round" style="width:11px;height:11px;"><line x1="12" y1="5" x2="12" y2="19"/><line x1="5" y1="12" x2="19" y2="12"/></svg>
<span class="list-item-plus-label">new</span>
<span class="list-item-plus-label">document</span>
</button>
</div>
<div class="list-item" id="tool-notes-btn">
@@ -1005,7 +1005,12 @@
<button type="button" class="model-picker-btn" id="model-picker-btn" title="Switch model"><span id="model-picker-label">Select model</span> <svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="6 15 12 9 18 15"/></svg></button>
<div class="model-picker-menu hidden" id="model-picker-menu">
<div class="model-picker-search-row">
<input type="text" id="model-picker-search" placeholder="Search models..." autocomplete="off" aria-label="Search models">
<div class="model-picker-search-wrap">
<input type="text" id="model-picker-search" placeholder="Search models..." autocomplete="off" aria-label="Search models">
<button type="button" class="model-picker-refresh-btn" id="model-picker-refresh-btn" title="Refresh model picker" aria-label="Refresh model picker">
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round"><polyline points="23 4 23 10 17 10"/><path d="M20.49 15a9 9 0 1 1-2.12-9.36L23 10"/></svg>
</button>
</div>
<button type="button" class="model-picker-action-btn primary" id="model-picker-add-models-btn" title="Add model endpoints" aria-label="Add model endpoints">
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round"><path d="M12 5v14"/><path d="M5 12h14"/></svg>
</button>
+77 -66
View File
@@ -571,6 +571,24 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
let timeoutId = null;
let responseTimeoutCleared = false;
let clearResponseTimeout = () => {};
let firstTokenWaitTimers = [];
const clearFirstTokenWaitTimers = () => {
firstTokenWaitTimers.forEach(t => { try { clearTimeout(t); } catch (_) {} });
firstTokenWaitTimers = [];
};
const scheduleFirstTokenWaitMessages = () => {
clearFirstTokenWaitTimers();
const steps = [
[20000, 'Still waiting for first token'],
[60000, 'Large local model is pre-filling context'],
[120000, 'Still working - no tokens yet from the model'],
];
firstTokenWaitTimers = steps.map(([ms, text]) => setTimeout(() => {
if (!accumulated && spinner && spinner.element && !(currentAbort && currentAbort.signal.aborted)) {
spinner.updateMessage(text);
}
}, ms));
};
const clearProcessingProbe = () => {
if (processingProbeTimer) {
clearTimeout(processingProbeTimer);
@@ -921,56 +939,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
setTimeout(() => spinner.updateMessage('Analyzing sources'), 1500);
} else {
spinner.updateMessage('Processing request');
const endpointUrlForProbe = sessionModule.getCurrentEndpointUrl ? sessionModule.getCurrentEndpointUrl() : null;
if (endpointUrlForProbe && modelName) {
processingProbeTimer = setTimeout(async () => {
processingProbeTimer = null;
if (accumulated || !spinner || !spinner.element || (currentAbort && currentAbort.signal.aborted)) return;
processingProbeAbort = new AbortController();
try {
spinner.updateMessage('Checking model endpoint');
const status = await _probeCurrentEndpointStatus(endpointUrlForProbe, processingProbeAbort.signal);
if (accumulated || !spinner || !spinner.element || (currentAbort && currentAbort.signal.aborted)) return;
if (!status) {
spinner.updateMessage('Still waiting for model');
} else if (status.alive) {
const latency = status.latency_ms ? ` (${status.latency_ms}ms)` : '';
spinner.updateMessage(`Endpoint online${latency}; waiting for first token`);
} else {
// Probe confirms the endpoint isn't responding. Don't
// sit on a hung fetch — give the user 5s to read the
// status, then auto-abort with reason='offline' so the
// catch handler shows a clean "switch model" message
// instead of leaving the spinner spinning forever.
if (status.error) console.warn('Model endpoint probe failed:', status.error);
let _countdown = 5;
spinner.updateMessage(`Endpoint offline — cancelling in ${_countdown}s`);
const _tick = setInterval(() => {
_countdown--;
if (!spinner || !spinner.element || (currentAbort && currentAbort.signal.aborted) || accumulated) {
clearInterval(_tick);
return;
}
if (_countdown > 0) {
spinner.updateMessage(`Endpoint offline — cancelling in ${_countdown}s`);
} else {
clearInterval(_tick);
if (currentAbort && !currentAbort.signal.aborted) {
currentAbort._reason = 'offline';
currentAbort.abort();
}
}
}, 1000);
}
} catch (e) {
if (e && e.name !== 'AbortError' && spinner && spinner.element && !accumulated) {
spinner.updateMessage('Still waiting for model');
}
} finally {
processingProbeAbort = null;
}
}, 10000);
}
scheduleFirstTokenWaitMessages();
}
const researchBtn = el('research-toggle-btn');
@@ -1150,6 +1119,11 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
uiModule.scrollHistory();
}
function _replaceThinkingSpinner(label) {
_removeThinkingSpinner();
_showThinkingSpinner(label);
}
// Auto-show thinking spinner after text stops streaming
let _textPauseTimer = null;
function _scheduleThinkingSpinner() {
@@ -1173,10 +1147,24 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
let _liveThinkHeader = null;
let _liveThinkSpinnerSlot = null;
let _liveThinkTimerEl = null;
let _liveThinkTokenCount = 0;
let _liveThinkToggle = null;
let _liveThinkDomId = null;
function _estimateThinkingTokens(text) {
const clean = (text || '').trim();
if (!clean) return 0;
return Math.max(1, Math.ceil(clean.length / 4));
}
function _formatThinkStats(seconds, tokenCount) {
const time = seconds ? seconds + 's' : '';
const tokens = tokenCount ? tokenCount + ' tok' : '';
return time && tokens ? time + ' · ' + tokens : (time || tokens);
}
function _replyAfterClosedThinking(text) {
text = markdownModule.normalizeThinkingMarkup(text || '');
const closeRe = /<\/(?:think(?:ing)?|thought)>|<channel\|>/gi;
let match = null;
let last = null;
@@ -1187,7 +1175,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
// Direct render helper for streaming text
_renderStream = () => {
let dt = stripToolBlocks(roundText);
let dt = markdownModule.normalizeThinkingMarkup(stripToolBlocks(roundText));
const bodyEl = roundHolder.querySelector('.body');
const contentEl = _ensureStreamLayout(bodyEl);
@@ -1277,6 +1265,12 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
let _nextIsError = false;
let _streamSawDone = false;
let _firstVisibleOutputSeen = false;
const markFirstVisibleOutput = () => {
if (_firstVisibleOutputSeen) return;
_firstVisibleOutputSeen = true;
clearFirstTokenWaitTimers();
};
while (true) {
const { done, value } = await reader.read();
@@ -1296,6 +1290,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
}
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data && data !== '[DONE]') markFirstVisibleOutput();
// (thinking spinner removal is handled in agent_step / tool_start / content handlers)
@@ -1357,7 +1352,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
if (_liveThinkHeader) _liveThinkHeader.textContent = 'View thinking process';
if (_liveThinkSpinnerSlot) _liveThinkSpinnerSlot.remove();
if (_liveThinkTimerEl && _elapsedDone) {
_liveThinkTimerEl.textContent = _elapsedDone + 's';
_liveThinkTimerEl.textContent = _formatThinkStats(_elapsedDone, _liveThinkTokenCount);
_liveThinkTimerEl.style.marginLeft = 'auto';
_liveThinkTimerEl.style.marginRight = '5px';
var _hdrDone = _liveThinkTimerEl.closest('.thinking-header');
@@ -1399,9 +1394,17 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
typewriterInto(roundHolder.querySelector('.body'), errMsg);
break;
}
if (json.delta || json.type === 'tool_start' || json.type === 'tool_output' || json.type === 'tool_progress' || json.type === 'agent_step' || json.type === 'doc_stream_open' || json.type === 'doc_stream_delta' || json.type === 'research_progress') {
if (json.delta || json.type === 'agent_prep' || json.type === 'tool_start' || json.type === 'tool_output' || json.type === 'tool_progress' || json.type === 'agent_step' || json.type === 'doc_stream_open' || json.type === 'doc_stream_delta' || json.type === 'research_progress') {
clearResponseTimeout();
clearProcessingProbe();
clearFirstTokenWaitTimers();
}
if (json.type === 'agent_prep') {
if (!_isBg) {
_cancelThinkingTimer();
_replaceThinkingSpinner('Preparing agent');
}
continue;
}
if (json.delta) {
_cancelThinkingTimer();
@@ -1464,12 +1467,13 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
// 1. Normal: <think>...no closing tag yet
// 2. Malformed: <think></think>\n...text but no second </think> yet
// 3. Qwen3.5: "Thinking Process:" without <think> tags
let hasUnclosedThink = markdownModule.hasUnclosedThinkTag(roundText);
const normalizedRoundText = markdownModule.normalizeThinkingMarkup(roundText);
let hasUnclosedThink = markdownModule.hasUnclosedThinkTag(normalizedRoundText);
// Detect non-tag thinking patterns: "Thinking:", "Thinking Process:", Gemma-style reasoning
// These patterns don't use <think> tags, so we simulate unclosed thinking during streaming
const _replyPrefixes = ['Hey', 'Hi ', 'Hi!', 'Hello', 'Sure', 'Yes', 'No ', 'No,', 'Yo', 'OK', 'Here', 'Absolutely', 'Of course', 'Great', 'Alright', 'Thanks', 'Welcome', 'Good ', "I'm happy", "I'd be"];
if (!hasUnclosedThink && !/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>|<\|channel>thought/i.test(roundText)) {
const _trimmedRT = roundText.trimStart();
if (!hasUnclosedThink && !/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>|<\|channel>thought/i.test(normalizedRoundText)) {
const _trimmedRT = normalizedRoundText.trimStart();
const _isReasoning = markdownModule.startsWithReasoningPrefix(_trimmedRT);
if (_isReasoning) {
// Check if we can see a reply boundary yet (newline then reply pattern)
@@ -1494,9 +1498,9 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
}
}
}
if (!hasUnclosedThink && /^<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*<\/(?:think(?:ing)?|thought)>/i.test(roundText)) {
if (!hasUnclosedThink && /^<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*<\/(?:think(?:ing)?|thought)>/i.test(normalizedRoundText)) {
// Empty <think></think> — the model likely put thinking outside the tags
const afterEmpty = roundText.replace(/^<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*<\/(?:think(?:ing)?|thought)>/i, '').trim();
const afterEmpty = normalizedRoundText.replace(/^<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*<\/(?:think(?:ing)?|thought)>/i, '').trim();
const closeTags = (afterEmpty.match(/<\/(?:think(?:ing)?|thought)>/gi) || []).length;
if (closeTags === 0 && afterEmpty.length > 0) {
hasUnclosedThink = true; // still waiting for real closing tag
@@ -1506,10 +1510,10 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
// Only applies when there's a second </think> later (model leaked thinking outside tags)
// Do NOT trigger if the text after </think> contains tool calls (that's real content)
if (!hasUnclosedThink && isThinking) {
const _thinkMatch = roundText.match(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>([\s\S]*?)<\/(?:think(?:ing)?|thought)>/i);
const _thinkMatch = normalizedRoundText.match(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>([\s\S]*?)<\/(?:think(?:ing)?|thought)>/i);
const _thinkLen = _thinkMatch ? _thinkMatch[1].trim().length : 0;
if (_thinkLen < 20) {
const _afterClose = roundText.replace(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>([\s\S]*?)<\/(?:think(?:ing)?|thought)>/i, '').trim();
const _afterClose = normalizedRoundText.replace(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>([\s\S]*?)<\/(?:think(?:ing)?|thought)>/i, '').trim();
// Only keep waiting if there's trailing text that looks like thinking (not tool calls)
const _hasToolCall = /```(?:bash|python|web_search|read_file|write_file|create_document|edit_document|manage_|generate_image)/i.test(_afterClose);
const _hasOrphanClose = /<\/(?:think(?:ing)?|thought)>/i.test(_afterClose);
@@ -1554,7 +1558,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
function _tickThinkTimer() {
if (!_liveThinkTimerEl || !_liveThinkTimerEl.isConnected) return;
var s = ((Date.now() - _thinkTimerStart) / 1000).toFixed(1);
_liveThinkTimerEl.textContent = s + 's';
_liveThinkTimerEl.textContent = _formatThinkStats(s, _liveThinkTokenCount);
_thinkTimerRAF = requestAnimationFrame(_tickThinkTimer);
}
_thinkTimerRAF = requestAnimationFrame(_tickThinkTimer);
@@ -1570,13 +1574,18 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
} else if (hasUnclosedThink && isThinking) {
if (_liveThinkInner) {
// Extract raw thinking text (strip known thinking wrappers and prefixes)
var thinkText = roundText
var thinkText = markdownModule.normalizeThinkingMarkup(roundText)
.replace(/<\/?(?:think(?:ing)?|thought)(?:\s+[^>]*)?>/gi, '')
.replace(/<\|channel>thought\s*\n?/gi, '')
.replace(/<\|channel>response\s*\n?/gi, '')
.replace(/<channel\|>/gi, '');
thinkText = thinkText.replace(/^\s*Thinking(?:\s+Process)?:\s*/i, '');
_liveThinkTokenCount = _estimateThinkingTokens(thinkText);
_liveThinkInner.innerHTML = markdownModule.mdToHtml(thinkText);
if (_liveThinkTimerEl) {
var _elapsedLive = thinkingStartTime ? ((Date.now() - thinkingStartTime) / 1000).toFixed(1) : '';
_liveThinkTimerEl.textContent = _formatThinkStats(_elapsedLive, _liveThinkTokenCount);
}
// Keep thinking box scrolled to bottom, but let user scroll up
var thinkBox = _liveThinkInner.closest('.thinking-content');
if (thinkBox) {
@@ -1600,6 +1609,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
_liveThinkHeader = null;
_liveThinkSpinnerSlot = null;
_liveThinkTimerEl = null;
_liveThinkTokenCount = 0;
_liveThinkToggle = null;
_liveThinkDomId = null;
// Fall through to normal streaming
@@ -1622,7 +1632,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
if (_liveThinkSpinnerSlot) _liveThinkSpinnerSlot.remove();
// Move timer to right side of header
if (_liveThinkTimerEl && elapsed) {
_liveThinkTimerEl.textContent = elapsed + 's';
_liveThinkTimerEl.textContent = _formatThinkStats(elapsed, _liveThinkTokenCount);
_liveThinkTimerEl.style.marginLeft = 'auto';
_liveThinkTimerEl.style.marginRight = '5px';
var _hdrRow = _liveThinkTimerEl.closest('.thinking-header');
@@ -2023,7 +2033,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
cancelAnimationFrame(_thinkTimerRAF);
var _elapsed2 = thinkingStartTime ? ((Date.now() - thinkingStartTime) / 1000).toFixed(1) : null;
if (_liveThinkHeader) _liveThinkHeader.textContent = 'View thinking process';
if (_liveThinkTimerEl) _liveThinkTimerEl.textContent = _elapsed2 ? _elapsed2 + 's' : '';
if (_liveThinkTimerEl) _liveThinkTimerEl.textContent = _elapsed2 ? _formatThinkStats(_elapsed2, _liveThinkTokenCount) : '';
if (_liveThinkSpinnerSlot) _liveThinkSpinnerSlot.remove();
// Assign stable IDs
var _thinkId2 = 'think-' + Date.now();
@@ -2037,7 +2047,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
if (!roundFinalized) {
roundFinalized = true;
if (spinner && spinner.element) spinner.destroy();
const dt = stripToolBlocks(roundText);
const dt = markdownModule.normalizeThinkingMarkup(stripToolBlocks(roundText));
if (dt.trim()) {
var _body3 = roundHolder.querySelector('.body');
var _contentEl3 = _ensureStreamLayout(_body3);
@@ -3018,6 +3028,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
} finally {
clearResponseTimeout();
clearProcessingProbe();
clearFirstTokenWaitTimers();
// Streaming done — let screen readers announce the settled response.
const _chatLogDone = document.getElementById('chat-history');
if (_chatLogDone) _chatLogDone.setAttribute('aria-busy', 'false');
@@ -3396,7 +3407,7 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
};
const renderDelta = () => {
const dt = stripToolBlocks(roundText);
const dt = markdownModule.normalizeThinkingMarkup(stripToolBlocks(roundText));
contentDiv.innerHTML = markdownModule.mdToHtml(markdownModule.squashOutsideCode(dt));
uiModule.scrollHistory();
};
+73 -17
View File
@@ -73,6 +73,45 @@ function isCompareActive() {
return state.isActive;
}
function _compareModeLabel() {
return ({ search: ' search providers', agent: ' agents', research: ' research models' }[state._compareMode] || ' models');
}
function _setToolbarMode(mode, syncModeTools = !state.isActive) {
const target = mode === 'agent' ? 'agent' : 'chat';
const toggleState = Storage.loadToggleState();
toggleState.mode = target;
Storage.saveToggleState(toggleState);
const agentBtn = document.getElementById('mode-agent-btn');
const chatBtn = document.getElementById('mode-chat-btn');
const modeToggle = agentBtn?.closest('.mode-toggle') || chatBtn?.closest('.mode-toggle') || document.querySelector('.mode-toggle');
if (agentBtn && chatBtn) {
agentBtn.classList.toggle('active', target === 'agent');
chatBtn.classList.toggle('active', target === 'chat');
agentBtn.setAttribute('aria-pressed', target === 'agent' ? 'true' : 'false');
chatBtn.setAttribute('aria-pressed', target === 'chat' ? 'true' : 'false');
}
if (modeToggle) {
modeToggle.classList.toggle('mode-chat', target === 'chat');
modeToggle.classList.toggle('mode-right', target === 'chat');
}
if (syncModeTools) {
document.querySelectorAll('[data-mode-tool]').forEach(b => { b.style.display = target === 'agent' ? '' : 'none'; });
}
}
function _syncCompareModeFromToolbar(mode) {
if (!state.isActive) return;
state._compareMode = mode === 'agent' ? 'agent' : 'chat';
_setToolbarMode(state._compareMode, false);
const headerLabel = document.querySelector('.compare-header-label');
if (headerLabel) {
headerLabel.textContent = 'Comparing' + _compareModeLabel() + (state._blindMode ? ' (blind)' : '') + ' · ' + state._timeout + 's timeout';
}
const evalWrap = document.getElementById('cmp-eval-wrap');
if (evalWrap && typeof evalWrap._renderItems === 'function') evalWrap._renderItems();
}
// ────────────────────────────────────────────────────────────────────────────
// ── closeCompare ──
// ────────────────────────────────────────────────────────────────────────────
@@ -170,12 +209,7 @@ async function deactivate(teardown) {
});
// Restore agent/chat mode to what it was before compare
const _ts = Storage.loadToggleState();
_ts.mode = state._savedMode;
Storage.saveToggleState(_ts);
const _ab2 = document.getElementById('mode-agent-btn'), _cb2 = document.getElementById('mode-chat-btn');
if (_ab2 && _cb2) { _ab2.classList.toggle('active', state._savedMode === 'agent'); _cb2.classList.toggle('active', state._savedMode === 'chat'); }
document.querySelectorAll('[data-mode-tool]').forEach(b => { b.style.display = state._savedMode === 'agent' ? '' : 'none'; });
_setToolbarMode(state._savedMode, true);
// Delete unsaved sessions, then reload
if (teardown) {
@@ -258,19 +292,30 @@ async function _buildCompareUI() {
if (el) state._savedIndicatorDisplay[id] = el.style.display;
});
// 5. Save current mode and lock to the right one for this compare type
// 5. Save current mode and seed the toolbar for this compare type.
const _toggleState = Storage.loadToggleState();
state._savedMode = _toggleState.mode || 'chat';
const _targetMode = (state._compareMode === 'agent') ? 'agent' : 'chat';
_toggleState.mode = _targetMode;
Storage.saveToggleState(_toggleState);
_setToolbarMode(_targetMode, false);
const _ab = document.getElementById('mode-agent-btn'), _cb = document.getElementById('mode-chat-btn');
let _modeCleanup = null;
const _onCompareModeClick = (ev) => {
ev.stopPropagation();
ev.stopImmediatePropagation();
_syncCompareModeFromToolbar(ev.currentTarget === _ab ? 'agent' : 'chat');
};
if (_ab && _cb) {
_ab.classList.toggle('active', _targetMode === 'agent');
_cb.classList.toggle('active', _targetMode === 'chat');
_ab.addEventListener('click', _onCompareModeClick, true);
_cb.addEventListener('click', _onCompareModeClick, true);
_modeCleanup = document.createElement('span');
_modeCleanup.style.display = 'none';
_modeCleanup._cleanup = () => {
_ab.removeEventListener('click', _onCompareModeClick, true);
_cb.removeEventListener('click', _onCompareModeClick, true);
};
}
const _modeToggle = document.querySelector('.mode-toggle');
if (_modeToggle) { _modeToggle.style.pointerEvents = 'none'; _modeToggle.style.opacity = '0.4'; }
if (_modeToggle) { _modeToggle.style.pointerEvents = ''; _modeToggle.style.opacity = ''; }
// 6. Force tool toggles per compare mode
disableToolToggles();
@@ -289,6 +334,7 @@ async function _buildCompareUI() {
// 7. Hide existing chat container children (preserves event listeners)
const container = document.getElementById('chat-container');
state._compareElements = [];
if (_modeCleanup) state._compareElements.push(_modeCleanup);
Array.from(container.children).forEach(child => {
if (child.style.display === 'none') return;
child.dataset.cmpHidden = '1';
@@ -302,9 +348,9 @@ async function _buildCompareUI() {
headerBar.className = 'compare-header-bar';
headerBar.style.cssText = 'display:flex;align-items:center;justify-content:space-between;padding:6px 10px;flex-shrink:0;';
const headerLabel = document.createElement('span');
headerLabel.className = 'compare-header-label';
headerLabel.style.cssText = 'font-size:10px;font-weight:400;color:var(--fg);white-space:nowrap;overflow:hidden;text-overflow:ellipsis;min-width:0;';
const _modeLabel = ({ search: ' search providers', agent: ' agents', research: ' research models' }[state._compareMode] || ' models');
headerLabel.textContent = 'Comparing' + _modeLabel + (state._blindMode ? ' (blind)' : '') + ' · ' + state._timeout + 's timeout';
headerLabel.textContent = 'Comparing' + _compareModeLabel() + (state._blindMode ? ' (blind)' : '') + ' · ' + state._timeout + 's timeout';
// Left side: the Compare tool icon (two side-by-side panes, matching the
// rail/sidebar icon) + the label. Other tool headers carry their icon; this
// one was missing it.
@@ -475,7 +521,7 @@ async function _buildCompareUI() {
}
const msgTA = document.getElementById('message');
if (msgTA) {
msgTA.placeholder = 'Enter prompt for all models...';
msgTA.placeholder = window.matchMedia('(max-width: 767px)').matches ? '' : 'Enter prompt for all models...';
requestAnimationFrame(() => msgTA.focus());
}
@@ -891,8 +937,7 @@ async function _executeCompare(message) {
let sharedSearchContext = null;
let sharedSearchSources = null;
const webChk = document.getElementById('web-toggle');
const toggleState = Storage.loadToggleState();
const isAgentMode = (toggleState.mode || 'chat') === 'agent';
const isAgentMode = state._compareMode === 'agent';
const webOn = webChk && webChk.checked;
// In agent mode, web_search is a tool (handled per-pane); in chat mode, pre-search and share
if (webOn && !isAgentMode) {
@@ -1198,6 +1243,15 @@ function _setupEvalPicker() {
function _renderItems() {
const mode = state._compareMode || 'chat';
const label = btn.querySelector('.cmp-eval-label');
if (label) {
label.textContent = ({
agent: 'Agent prompts',
chat: 'Chat prompts',
search: 'Search prompts',
research: 'Research prompts'
}[mode] || 'Eval prompts');
}
// research/html aren't first-class compare types — fall back gracefully
const key = EVAL_PROMPTS[mode] ? mode
: (mode === 'research' ? 'search' : 'chat');
@@ -1258,8 +1312,10 @@ function _setupEvalPicker() {
};
document.addEventListener('click', _onDocClick);
_renderItems();
wrap.appendChild(btn);
wrap.appendChild(menu);
wrap._renderItems = _renderItems;
inputTop.appendChild(wrap);
// Expected-answer chip — placed above the chat-input-bar (outside it), so
+27 -4
View File
@@ -551,23 +551,46 @@ async function streamToPane(paneIdx, sessionId, message, aiMsgEl, opts) {
footer.className = 'msg-footer';
const span = document.createElement('span');
span.className = 'response-metrics';
let text = metrics.output_tokens + ' tokens | ' + metrics.tokens_per_second + ' tok/s';
const outputTokens = metrics.output_tokens;
const responseTime = metrics.response_time ?? metrics.total_time;
const explicitTps = metrics.tokens_per_second ?? metrics.gen_tps ?? metrics.tps;
const numericOutput = Number(outputTokens);
const numericTime = Number(responseTime);
const numericTps = Number(explicitTps);
const derivedTps = Number.isFinite(numericTps)
? numericTps
: (Number.isFinite(numericOutput) && Number.isFinite(numericTime) && numericTime > 0)
? numericOutput / numericTime
: null;
const tpsLabel = derivedTps != null
? (derivedTps >= 100 ? String(Math.round(derivedTps)) : derivedTps.toFixed(2).replace(/\.?0+$/, ''))
: null;
const parts = [];
if (outputTokens != null && outputTokens !== 'undefined') {
parts.push(outputTokens + ' tokens');
}
if (tpsLabel != null) {
parts.push(tpsLabel + ' tok/s');
}
if (responseTime != null && responseTime !== 'undefined' && parts.length === 0) {
parts.push(responseTime + 's');
}
// Add per-request cost and cost per 1000
const _model = metrics.model || (state._selectedModels[paneIdx] && state._selectedModels[paneIdx].model) || '';
const _cost = getModelCost(_model, metrics.input_tokens || 0, metrics.output_tokens || 0);
// Build the metrics span with optional cost and context
span.textContent = text;
span.textContent = parts.join(' | ');
if (_cost !== null) {
const _cost1k = _cost * 1000;
const costSpan = document.createElement('span');
costSpan.style.color = 'var(--color-success, #4caf50)';
costSpan.title = 'Estimated cost per 1,000 responses like this one';
costSpan.textContent = ' | $' + (_cost1k < 1 ? _cost1k.toFixed(2) : _cost1k.toFixed(0)) + '/1k';
costSpan.textContent = (span.textContent ? ' | ' : '') + '$' + (_cost1k < 1 ? _cost1k.toFixed(2) : _cost1k.toFixed(0)) + '/1k';
span.appendChild(costSpan);
}
if (metrics.context_percent > 0) {
const ctx = document.createElement('span');
ctx.textContent = ' | ' + metrics.context_percent + '% ctx';
ctx.textContent = (span.textContent ? ' | ' : '') + metrics.context_percent + '% ctx';
if (metrics.context_percent >= 85) ctx.style.color = 'var(--color-error)';
else if (metrics.context_percent >= 70) ctx.style.color = '#ff9900';
span.appendChild(ctx);
+1 -1
View File
@@ -181,7 +181,7 @@ function handleVote(winnerIdx) {
let html = '';
const caret = ' <span class="pane-title-caret">&#x25BE;</span>';
if (isWinner) html = '<span style="color:var(--red);margin-right:4px;">&#x2605;</span><strong>' + escapeHtml(name) + '</strong> <span style="color:var(--red);font-size:0.82em;font-weight:800;text-transform:uppercase;letter-spacing:1px;position:relative;top:-2px;">Winner!</span>' + caret;
if (isWinner) html = '<span style="color:var(--green, #50fa7b);margin-right:4px;">&#x2605;</span><strong>' + escapeHtml(name) + '</strong> <span style="color:var(--green, #50fa7b);font-size:0.82em;font-weight:800;text-transform:uppercase;letter-spacing:1px;position:relative;top:0;">Winner!</span>' + caret;
else if (isTie) html = '<span style="opacity:0.5;margin-right:4px;">=</span><strong>' + escapeHtml(name) + '</strong>' + caret;
else html = '<strong>' + escapeHtml(name) + '</strong>' + caret;
el.innerHTML = html;
+41 -3
View File
@@ -461,6 +461,40 @@ export const ERROR_PATTERNS = [
{ label: 'Copy install command', action: () => _copyText('curl -fsSL https://ollama.com/install.sh | sh') },
],
},
// System build deps must be checked BEFORE the llama-server catch-all:
// a `cmake: command not found` failure ALSO produces `llama-server:
// command not found` later in the script (the build aborts then the
// run line fails) — pattern order is first-match-wins, so without
// these specific entries the user gets the misleading "install
// llama-cpp-python[server]" suggestion when the actual blocker is a
// missing OS-package toolchain that pip can't ship.
{
pattern: /cmake: command not found|cmake.*not found.*Could not/i,
message: 'cmake is required to compile llama.cpp from source, but it is not installed on this server.',
suggestion: 'Suggested action: install cmake via the OS package manager — apt: cmake build-essential / pacman: cmake base-devel / dnf: cmake gcc-c++ make / brew: cmake. Cookbook can do this automatically on the next launch if your user has passwordless sudo for apt/pacman/dnf.',
fixes: [
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('llama_cpp') },
{ label: 'Copy apt install', action: () => _copyText('sudo apt install -y cmake build-essential git') },
{ label: 'Copy pacman install', action: () => _copyText('sudo pacman -Sy --needed cmake base-devel git') },
{ label: 'Copy dnf install', action: () => _copyText('sudo dnf install -y cmake gcc gcc-c++ make git') },
],
},
{
pattern: /^(make|g\+\+|gcc): command not found|Could not find C\+\+ compiler/i,
message: 'A C/C++ compiler (build-essential / base-devel) is required to compile llama.cpp.',
fixes: [
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('llama_cpp') },
{ label: 'Copy apt install', action: () => _copyText('sudo apt install -y build-essential') },
],
},
{
pattern: /^git: command not found/i,
message: 'git is required to clone the llama.cpp source tree.',
fixes: [
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('llama_cpp') },
{ label: 'Copy apt install', action: () => _copyText('sudo apt install -y git') },
],
},
{
pattern: /llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'/i,
message: 'llama-cpp-python server is not installed. Run: pip install "llama-cpp-python[server]"',
@@ -714,11 +748,15 @@ export function _showDiagnosis(panel, diagnosis, sourceText) {
copyBtn.addEventListener('click', async (e) => {
e.stopPropagation();
const bundle = _diagnosisCopyBundle(task, diagnosis, sourceText, suggestionText);
try {
await navigator.clipboard.writeText(bundle);
// Use the shared helper which falls back to execCommand('copy') on
// non-HTTPS origins (Tailscale IPs, LAN IPs, etc.) — navigator.clipboard
// is silently a no-op on those, which is why the button appeared dead
// for users on http://100.113.161.2:7011 over Tailscale/mobile.
const ok = await _copyText(bundle);
if (ok) {
copyBtn.classList.add('copied');
setTimeout(() => { if (copyBtn.isConnected) copyBtn.classList.remove('copied'); }, 1200);
} catch (_) {}
}
});
const dismissBtn = document.createElement('button');
+9 -15
View File
@@ -578,7 +578,9 @@ export async function _hwfitFetch(fresh = false) {
const _cached = fresh ? null : _readScanCache(_sig);
const wp = spinnerModule.createWhirlpool(18);
if (_cached) {
_hwfitCache = _cached;
// Tag the restored cache with its host too (scan-sig keys cache per
// host, so a hit here is always for the current remoteHost).
_hwfitCache = { ..._cached, _scannedHost: remoteHost || '' };
_hwfitRenderHw(hw, _cached.system);
if (!remoteHost && _cached.system && _cached.system.platform) {
_envState.platform = _cached.system.platform;
@@ -750,7 +752,11 @@ export async function _hwfitFetch(fresh = false) {
: _olRows;
data.models = (data.models || []).concat(_olFiltered);
}
_hwfitCache = data;
// Tag the cache with the host this scan was for, so downstream
// code (_gpuEnvVarName, backend-aware command builders) can avoid
// trusting a stale scan when the user switches the server picker
// to a different target without re-running hwfit.
_hwfitCache = { ...data, _scannedHost: remoteHost || '' };
_hwfitRenderHw(hw, data.system);
// Propagate local platform from hardware probe so _isWindows(task) works
// for local tasks (menu items, shell commands, etc.).
@@ -1415,23 +1421,11 @@ export function _expandModelRow(row, modelData) {
const dlSource = _downloadSourceRepo(modelData, backend);
const hfUrl = `https://huggingface.co/${dlSource.repo}`;
// Official vendor recipe deep-links. These point to vLLM / SGLang's curated
// hardware-specific launch-command pages. They 404 for uncatalogued models \u2014
// a known tradeoff; user just gets the vendor's "model not found" page.
const _recipeRepo = modelData.name || '';
const _vllmUrl = _recipeRepo ? `https://recipes.vllm.ai/${_recipeRepo}` : '';
const _sglangUrl = _recipeRepo ? `https://docs.sglang.io/cookbook/autoregressive/${_recipeRepo}${_sglangHashFor(modelData)}` : '';
let html = `<div class="hwfit-action-panel" data-model-name="${esc(modelData.name)}">`;
html += `<div class="hwfit-panel-header">`;
html += `<span class="hwfit-panel-model">${esc(modelData.name)}${dlSource.kind ? ` <span style="opacity:0.5;font-size:10px;">(${esc(dlSource.kind)} ${esc(modelData.quant || '')})</span>` : (modelData.quant_repo ? ` <span style="opacity:0.5;font-size:10px;">(${esc(modelData.quant)})</span>` : '')}</span>`;
html += `<span class="hwfit-panel-badge">${esc(label)}</span>`;
html += `<a href="${esc(hfUrl)}" target="_blank" rel="noopener" class="hwfit-panel-hf-link" title="View download source on HuggingFace">HF \u2197</a>`;
if (backend === 'vllm' && _vllmUrl) {
html += `<a href="${esc(_vllmUrl)}" target="_blank" rel="noopener" class="hwfit-panel-hf-link" title="vLLM official recipe (curated launch command). 404s if this model isn't in vLLM's recipes catalog.">vLLM \u2197</a>`;
}
if (backend === 'sglang' && _sglangUrl) {
html += `<a href="${esc(_sglangUrl)}" target="_blank" rel="noopener" class="hwfit-panel-hf-link" title="SGLang cookbook (hash pre-filled with your detected hardware). 404s if this model isn't in SGLang's cookbook catalog.">SGLang \u2197</a>`;
}
html += `</div>`;
html += `<div class="hwfit-panel-actions">`;
html += `<button class="cookbook-btn hwfit-dl-btn">Download</button>`;
@@ -1679,7 +1673,7 @@ export function _expandModelRow(row, modelData) {
} else if (runBackend === 'llamacpp') {
const dir = `"$HOME/.cache/huggingface/hub/models--${modelData.name.replace(/\//g, '--')}/snapshots"`;
const ggufPath = `$({ find ${dir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${dir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`;
cmd = `MODEL_FILE=${ggufPath} && { [ -n "$MODEL_FILE" ] && [ -f "$MODEL_FILE" ]; } || { echo "ERROR: No GGUF found on this host. Download a GGUF quant or switch backend."; exit 1; } && llama-server --model "$MODEL_FILE" --host 0.0.0.0 --port 8080 -ngl 99 -c ${maxCtx} || python3 -m llama_cpp.server --model "$MODEL_FILE" --host 0.0.0.0 --port 8080 --n_gpu_layers 99 --n_ctx ${maxCtx}`;
cmd = `llama-server --model "${ggufPath}" --host 0.0.0.0 --port 8080 -ngl 99 -c ${maxCtx} --flash-attn auto`;
} else {
cmd = `vllm serve ${modelData.name} --host 0.0.0.0 --port ${port}`;
cmd += ` --tensor-parallel-size ${tp}`;
+664 -93
View File
File diff suppressed because it is too large Load Diff
+20 -3
View File
@@ -85,6 +85,22 @@ function _ggufIncludePattern(model, source) {
return '*.gguf';
}
function _ggufDisplayPartFromInclude(include) {
const clean = String(include || '').replace(/\*/g, '');
const parts = clean.split('/').filter(Boolean);
const file = parts[parts.length - 1] || clean;
const dir = parts.length > 1 ? parts[parts.length - 2] : '';
const quant = `${dir} ${file}`.match(/\b(?:UD-)?(?:IQ[1-8]_[A-Z0-9]+|Q[2-8]_K_[MLS]|Q[2-8]_[0-9A-Z]+|Q[2-8])\b/i);
if (quant) return quant[0].toUpperCase().replace(/^UD-/, '');
return file.replace(/\.gguf$/i, '').replace(/-\d{5}-of-\d{5}$/i, '');
}
function _downloadTaskName(shortName, payload) {
const include = payload?.include || '';
const part = include ? _ggufDisplayPartFromInclude(include) : '';
return part ? `${shortName} · ${part}` : shortName;
}
function _missingGgufMessage(model) {
const name = model?.name || 'this model';
if (/\bnvfp4\b/i.test(name)) {
@@ -519,6 +535,7 @@ export async function _runModelDownload(panel, model, backend, hostOverride) {
}
const shortName = (model.name || repo).split('/').pop();
const taskName = _downloadTaskName(shortName, payload);
const targetHost = host || 'local';
const tasks = _loadTasks();
@@ -576,7 +593,7 @@ export async function _runModelDownload(panel, model, backend, hostOverride) {
if (activeOnHost) {
const queueId = `queue-${Date.now().toString(36)}`;
const allTasks = _loadTasks();
allTasks.push({ id: queueId, sessionId: queueId, name: shortName, type: 'download', status: 'queued', output: '', ts: Date.now(), payload, remoteHost: host });
allTasks.push({ id: queueId, sessionId: queueId, name: taskName, type: 'download', status: 'queued', output: '', ts: Date.now(), payload, remoteHost: host });
_saveTasks(allTasks);
_renderRunningTab();
uiModule.showToast(`Queued ${shortName} — waiting for current download`);
@@ -601,8 +618,8 @@ export async function _runModelDownload(panel, model, backend, hostOverride) {
uiModule.showToast('Download failed: ' + (data.error || ''), 9000);
return;
}
_addTask(data.session_id, shortName, 'download', payload);
uiModule.showToast(`Downloading ${shortName}...`);
_addTask(data.session_id, taskName, 'download', payload);
uiModule.showToast(`Downloading ${taskName}...`);
} catch (e) {
uiModule.showToast('Download failed: ' + e.message, 9000);
}
+235 -69
View File
@@ -27,6 +27,9 @@ function _statusLabel(status, type) {
// "cookbook-task-status" ('' = the neutral loading style).
function _taskBadge(task) {
if (task._unreachable && task.status === 'running') return { text: 'unreachable', cls: 'cookbook-task-error' };
if (task.type === 'download' && task.status === 'running') {
return { text: _statusLabel(task.status, task.type), cls: 'cookbook-task-downloading' };
}
if (task.type === 'serve' && task.status === 'running' && task.progress) {
// Same green "running" pill — just with dynamic phase text, so it doesn't
// read as a different status while the server is coming up.
@@ -35,6 +38,47 @@ function _taskBadge(task) {
return { text: _statusLabel(task.status, task.type), cls: 'cookbook-task-' + task.status };
}
function _ggufDisplayPartFromPath(path) {
const parts = String(path || '').split('/').filter(Boolean);
const file = parts[parts.length - 1] || '';
const dir = parts.length > 1 ? parts[parts.length - 2] : '';
const text = `${dir} ${file}`;
const quant = text.match(/\b(?:UD-)?(?:IQ[1-8]_[A-Z0-9]+|Q[2-8]_K_[MLS]|Q[2-8]_[0-9A-Z]+|Q[2-8])\b/i);
if (quant) return quant[0].toUpperCase().replace(/^UD-/, '');
return file.replace(/\.gguf$/i, '').replace(/-\d{5}-of-\d{5}$/i, '');
}
function _downloadDisplayName(name, task) {
const include = task?.payload?.include || '';
if (!include || String(name || '').includes(' · ')) return name;
const part = _ggufDisplayPartFromPath(include.replace(/\*/g, ''));
return part ? `${name} · ${part}` : name;
}
function _taskDisplayName(task) {
const name = String(task?.name || '').trim();
if (task?.type === 'download') return _downloadDisplayName(name, task);
if (task?.type !== 'serve') return name;
const gguf = task?.payload?._fields?.gguf_file || task?.payload?.gguf_file || '';
if (!gguf || name.includes(' · ')) return name;
const part = _ggufDisplayPartFromPath(gguf);
return part ? `${name} · ${part}` : name;
}
function _canLaunchDownloadedTask(task) {
return task?.type === 'download' && ['done', 'completed'].includes(task.status || '') && !!(task.payload?.repo_id || task.name);
}
function _downloadServeFields(task) {
const include = String(task?.payload?.include || '').trim();
if (!include) return null;
return {
backend: 'llamacpp',
_forceBackend: true,
_preferredGgufInclude: include,
};
}
// A download task whose tmux output still shows an active per-shard line
// (e.g. "model-00012-of-00082.safetensors: 56%|") is NOT actually finished —
// the cookbook just lost track. The clear pill becomes a "reconnect" affordance
@@ -52,13 +96,13 @@ function _downloadOutputLooksActive(task) {
function _canClearTask(task) {
if (!task || task.status === 'running') return false;
if (task.type === 'serve' && (task.status === 'ready' || task._serveReady)) return false;
if (task.type === 'serve' && (task.status === 'ready' || (task._serveReady && !['stopped', 'error', 'crashed', 'failed', 'completed'].includes(task.status)))) return false;
// If the tmux output still shows an in-flight download, the task isn't
// actually finished — hide the clear/check pill so it doesn't show on a
// task that's still doing work. (The next render will reflect this and
// ideally the self-heal flips status back to running.)
if (_downloadOutputLooksActive(task)) return false;
return ['done', 'stopped', 'error', 'crashed', 'failed'].includes(task.status);
return ['done', 'completed', 'stopped', 'error', 'crashed', 'failed'].includes(task.status);
}
function _clearPillLabel(task) {
@@ -66,6 +110,13 @@ function _clearPillLabel(task) {
return 'clear';
}
function _venvRootFromPath(path) {
let p = (path || '').toString().trim().replace(/\/+$/, '');
if (!p) return '';
p = p.replace(/\/bin\/(?:activate|python(?:3(?:\.\d+)?)?|vllm|pip(?:3)?)$/i, '');
return p;
}
// A pip dependency/driver install (payload._dep) reports success with the
// runner's "=== Process exited with code 0 ===" sentinel and pip's
// "Successfully installed" line — never the HuggingFace download markers
@@ -141,6 +192,13 @@ async function _openDownloadForGgufTask(task) {
function _terminalServeDiagnosis(task, outputText) {
const out = String(outputText || task?.output || '');
if (!task || task.type !== 'serve' || !['stopped', 'error', 'crashed', 'failed'].includes(task.status) || !out.trim()) return null;
// Suppress the crash diagnosis when the output proves the server
// actually became reachable — e.g. an early `exit 127` from a failed
// build attempt was followed by the shim/Python fallback successfully
// starting Uvicorn. Without this, the user sees a confusing "build
// stopped before the server became reachable" toast while the server
// is right there serving requests.
if (_serveOutputLooksReady(task)) return null;
// Pip tasks (Reinstall vLLM, Upgrade torch, etc.) ride on the serve task
// type so they get a tmux session + show up in Running tab — but they are
// NOT serve invocations. Their output is pip's own; the generic
@@ -256,6 +314,7 @@ let _copyText;
let _persistEnvState;
let _refreshDependencies;
let _serverByVal;
let _serverKey;
let _selectedServer;
let modelLogo;
let esc;
@@ -264,6 +323,40 @@ let _detectToolParser;
let _detectModelOptimizations;
let _buildServeCmd;
function _taskServerSelection(task) {
const host = task?.remoteHost || task?.payload?.remote_host || '';
const savedKey = task?.remoteServerKey || task?.payload?.remote_server_key || '';
const server = (savedKey ? _serverByVal(savedKey) : null)
|| (host ? _serverByVal(host) : null)
|| (host ? _envState.servers.find(s => s.host === host) : null)
|| null;
const key = server ? (_serverKey ? _serverKey(server) : savedKey) : (savedKey || (host || 'local'));
return { host, server, key };
}
function _selectTaskServer(task) {
const { host, server, key } = _taskServerSelection(task);
_envState.remoteHost = host;
_envState.remoteServerKey = key === 'local' ? '' : key;
if (server) {
_envState.env = server.env || 'none';
_envState.envPath = server.envPath || '';
_envState.platform = server.platform || '';
} else if (!host) {
_envState.env = 'none';
_envState.envPath = '';
_envState.platform = '';
}
document.querySelectorAll('#hwfit-server-select, #hwfit-dl-server, #hwfit-cache-server, #hwfit-deps-server').forEach(sel => {
if (!sel || sel.tagName !== 'SELECT') return;
const wanted = key || (host || 'local');
if ([...sel.options].some(o => o.value === wanted)) sel.value = wanted;
else if (host && [...sel.options].some(o => o.value === host)) sel.value = host;
else sel.value = host ? wanted : 'local';
});
return { host, server, key };
}
// When a new action is started (download / dependency / serve), this holds the
// new task's id so the next render collapses every other card and leaves only
// the new one open. Consumed (cleared) by _renderRunningTab.
@@ -526,7 +619,7 @@ async function _startQueuedDownload(task) {
if (t.sessionId === data.session_id) return false;
return !(key && t.type === 'download' && t.status === 'queued' && _downloadDedupeKey(t) === key);
});
if (!found) tasks.push(_stripTaskSecrets(launchedTask));
if (!found) tasks.push(_redactTaskForStorage(launchedTask));
_saveTasks(tasks);
_renderRunningTab();
_startBackgroundMonitor();
@@ -636,28 +729,53 @@ function _loadPrunedTasks() {
const _REMOVED_KEY = 'cookbook-removed-tasks';
const _TOMBSTONE_TTL_MS = 24 * 3600 * 1000;
function _loadTombstones() {
try { return JSON.parse(localStorage.getItem(_REMOVED_KEY)) || {}; }
try {
const tomb = JSON.parse(localStorage.getItem(_REMOVED_KEY)) || {};
const now = Date.now();
let changed = false;
for (const k in tomb) {
if (now - tomb[k] > _TOMBSTONE_TTL_MS) {
delete tomb[k];
changed = true;
}
}
if (changed) localStorage.setItem(_REMOVED_KEY, JSON.stringify(tomb));
return tomb;
}
catch { return {}; }
}
function _saveTombstones(tomb) {
localStorage.setItem(_REMOVED_KEY, JSON.stringify(tomb || {}));
}
function _tombstoneTask(id) {
if (!id) return;
const tomb = _loadTombstones();
const now = Date.now();
tomb[id] = now;
for (const k in tomb) { if (now - tomb[k] > _TOMBSTONE_TTL_MS) delete tomb[k]; }
localStorage.setItem(_REMOVED_KEY, JSON.stringify(tomb));
_saveTombstones(tomb);
}
function _isTombstoned(id) {
const ts = _loadTombstones()[id];
return ts != null && (Date.now() - ts) <= _TOMBSTONE_TTL_MS;
}
function _stripTaskSecrets(task) {
function _redactStoredText(value) {
return String(value || '')
.replace(/hf_[A-Za-z0-9]{20,}/g, '[redacted-token]')
.replace(/((?:api[_-]?key|token|authorization|password|passwd|secret)\s*[=:]\s*)(["']?)[^\s"']+/gi, '$1$2[redacted]');
}
function _redactTaskForStorage(task) {
if (!task || typeof task !== 'object') return task;
const safe = { ...task };
if (typeof safe.output === 'string') safe.output = _redactStoredText(safe.output);
if (safe.payload && typeof safe.payload === 'object') {
safe.payload = { ...safe.payload };
delete safe.payload.hf_token;
delete safe.payload.hfToken;
if (typeof safe.payload._cmd === 'string') safe.payload._cmd = _redactStoredText(safe.payload._cmd);
if (typeof safe.payload.cmd === 'string') safe.payload.cmd = _redactStoredText(safe.payload.cmd);
}
return safe;
}
@@ -666,23 +784,24 @@ function _stripStateSecrets(state) {
const safe = { ...state };
if (safe.env && typeof safe.env === 'object') {
const { hfToken, ...env } = safe.env;
if (hfToken) env.hfToken = hfToken;
safe.env = env;
}
if (Array.isArray(safe.tasks)) safe.tasks = safe.tasks.map(_stripTaskSecrets);
if (Array.isArray(safe.tasks)) safe.tasks = safe.tasks.map(_redactTaskForStorage);
return safe;
}
export function _saveTasks(tasks) {
localStorage.setItem(TASKS_KEY, JSON.stringify((tasks || []).map(_stripTaskSecrets)));
localStorage.setItem(TASKS_KEY, JSON.stringify((tasks || []).map(_redactTaskForStorage)));
_syncToServer();
}
export function _addTask(sessionId, name, type, payload) {
let tasks = _loadTasks();
const remoteHost = (payload && payload.remote_host) || _envState.remoteHost || '';
const sshPort = (payload && payload.ssh_port) || _getPort(remoteHost) || '';
const platform = (payload && payload.platform) || _getPlatform(remoteHost) || '';
const remoteServerKey = (payload && payload.remote_server_key) || '';
const remoteServerName = (payload && payload.remote_server_name) || '';
const sshPort = (payload && payload.ssh_port) || _getPort(remoteServerKey || remoteHost) || '';
const platform = (payload && payload.platform) || _getPlatform(remoteServerKey || remoteHost) || '';
// Serving a model supersedes its finished download — clear the matching
// finished download card (covers serving directly from the Serve tab, not just
// via the download card's "Serve →" button).
@@ -697,7 +816,7 @@ export function _addTask(sessionId, name, type, payload) {
return !(key && t.type === 'download' && t.status === 'queued' && _downloadDedupeKey(t) === key);
});
}
const task = _stripTaskSecrets({ id: sessionId, sessionId, name, type, status: 'running', output: '', ts: Date.now(), payload: payload || null, remoteHost, sshPort, platform });
const task = _redactTaskForStorage({ id: sessionId, sessionId, name, type, status: 'running', output: '', ts: Date.now(), payload: payload || null, remoteHost, remoteServerKey, remoteServerName, sshPort, platform });
tasks.push(task);
_saveTasks(tasks);
// New action → collapse all other cards, leave only this one open.
@@ -992,14 +1111,24 @@ function _presetEnvFields(task) {
};
}
function _redactPresetForStorage(preset) {
if (!preset || typeof preset !== 'object') return preset;
const safe = { ...preset };
if (typeof safe.cmd === 'string') safe.cmd = _redactStoredText(safe.cmd);
if (typeof safe.command === 'string') safe.command = _redactStoredText(safe.command);
delete safe.hf_token;
delete safe.hfToken;
return safe;
}
function _saveTaskAsPreset(task, label) {
const host = task.remoteHost || 'localhost';
const portMatch = task.payload?._cmd?.match(/--port\s+(\d+)/);
const port = portMatch ? portMatch[1] : '8000';
const presets = _loadPresets();
if (presets.some(p => p.cmd === task.payload._cmd)) return false;
presets.push({ name: task.name, model: task.payload.repo_id, backend: 'vllm', host, port, cmd: task.payload._cmd, remoteHost: task.remoteHost || '', label: label || task.name, ..._presetEnvFields(task) });
_savePresets(presets);
presets.push(_redactPresetForStorage({ name: task.name, model: task.payload.repo_id, backend: 'vllm', host, port, cmd: task.payload._cmd, remoteHost: task.remoteHost || '', label: label || task.name, ..._presetEnvFields(task) }));
_savePresets(presets.map(_redactPresetForStorage));
return true;
}
@@ -1042,7 +1171,7 @@ function _autoSaveWorkingConfig(task) {
const existing = presets.find(p => p.cmd === cmd);
if (existing) {
task._autoSaved = true;
if (!existing.confirmedWorking) { existing.confirmedWorking = true; _savePresets(presets); }
if (!existing.confirmedWorking) { existing.confirmedWorking = true; _savePresets(presets.map(_redactPresetForStorage)); }
return; // already saved → just confirm it, no duplicate, no toast
}
// Respect the per-model cap the manual save flow uses (max 5).
@@ -1050,13 +1179,13 @@ function _autoSaveWorkingConfig(task) {
const host = task.remoteHost || 'localhost';
const portMatch = cmd.match(/--port[=\s]+(\d+)/);
const port = portMatch ? portMatch[1] : '8000';
presets.push({
presets.push(_redactPresetForStorage({
name: task.name, model, backend: 'vllm', host, port,
cmd, remoteHost: task.remoteHost || '',
label: _autoConfigLabel(task), confirmedWorking: true, autoSaved: true,
..._presetEnvFields(task),
});
_savePresets(presets);
}));
_savePresets(presets.map(_redactPresetForStorage));
task._autoSaved = true;
uiModule.showToast('Saved working config');
}
@@ -1078,6 +1207,7 @@ function _syncToServer() {
if (!_envState || !Array.isArray(_envState.servers) || _envState.servers.length === 0) return;
const state = {
tasks: _loadTasks(),
removedTasks: _loadTombstones(),
presets: _loadPresets(),
env: _envState,
serveState: null,
@@ -1126,15 +1256,22 @@ export async function _syncFromServer() {
const localTasks = _loadTasks();
const serverTasks = state.tasks || [];
const serverTombstones = (state.removedTasks && typeof state.removedTasks === 'object') ? state.removedTasks : {};
const localTombstones = _loadTombstones();
const mergedTombstones = { ...serverTombstones, ...localTombstones };
for (const [id, ts] of Object.entries(serverTombstones)) {
if (localTombstones[id] == null || Number(ts) > Number(localTombstones[id])) mergedTombstones[id] = ts;
}
_saveTombstones(mergedTombstones);
const localIds = new Set(localTasks.map(t => t.sessionId));
const merged = [...localTasks];
const merged = localTasks.filter(t => !_isTombstoned(t.sessionId));
for (const t of serverTasks) {
if (!localIds.has(t.sessionId) && !_isTombstoned(t.sessionId)) {
merged.push(t);
}
}
localStorage.setItem(TASKS_KEY, JSON.stringify(merged.map(_stripTaskSecrets)));
localStorage.setItem(TASKS_KEY, JSON.stringify(merged.map(_redactTaskForStorage)));
if (state.env) {
// The active server selection (remoteHost + its env/path/platform) is a
@@ -1145,6 +1282,18 @@ export async function _syncFromServer() {
const { remoteHost: _rh, env: _e, envPath: _ep, platform: _pf, ...settings } = state.env;
delete settings.hfToken;
Object.assign(_envState, settings);
const selected = (_envState.remoteServerKey && _serverByVal?.(_envState.remoteServerKey))
|| (_envState.remoteHost ? (_envState.servers || []).find(s => s.host === _envState.remoteHost) : null);
if (selected) {
_envState.env = selected.env || 'none';
_envState.envPath = selected.envPath || '';
_envState.platform = selected.platform || '';
} else if (!_envState.remoteHost) {
const local = (_envState.servers || []).find(s => !s.host || s.host === 'local');
_envState.env = local?.env || 'none';
_envState.envPath = local?.envPath || '';
_envState.platform = local?.platform || '';
}
const { hfToken, ...safeState } = _envState;
localStorage.setItem('cookbook-last-state', JSON.stringify(safeState));
}
@@ -1154,6 +1303,7 @@ export async function _syncFromServer() {
if (state.serveState) {
localStorage.setItem(SERVE_STATE_KEY, JSON.stringify(state.serveState));
}
document.dispatchEvent(new CustomEvent('cookbook:state-synced', { detail: state }));
return true;
} catch { return false; }
}
@@ -1312,17 +1462,11 @@ async function _openServeEditForTask(task, cmdOverride, fieldOverrides = null) {
if (fieldOverrides && typeof fieldOverrides === 'object') {
fields = { ...(fields || {}), ...fieldOverrides };
}
// Switch the active server to the one this serve ran on (mirrors _openEdit).
const _tHost = task.remoteHost || '';
_envState.remoteHost = _tHost;
const _tSrv = _serverByVal(_envState.remoteServerKey || _tHost)
|| _envState.servers.find(s => s.host === _tHost);
if (_tSrv) { _envState.env = _tSrv.env || 'none'; _envState.envPath = _tSrv.envPath || ''; _envState.platform = _tSrv.platform || ''; }
else if (!_tHost) { _envState.env = 'none'; _envState.envPath = ''; _envState.platform = ''; }
document.querySelectorAll('#hwfit-server-select, #hwfit-dl-server, #hwfit-cache-server, #hwfit-deps-server').forEach(sel => {
if (!sel || sel.tagName !== 'SELECT') return;
sel.value = _tHost || 'local';
});
fields = { ...(fields || {}), _replaceTaskId: task.sessionId };
// Switch the active server to the exact profile this serve ran on. The
// dropdown stores stable srv: keys, not raw host strings, so preserving only
// task.remoteHost can relaunch against the local container by accident.
_selectTaskServer(task);
try {
const { openServePanelForRepo } = await import('./cookbookServe.js');
await openServePanelForRepo(repo, fields);
@@ -1520,15 +1664,33 @@ function _parseServeCmdToFields(cmd) {
return fields;
}
export async function _launchServeTask(shortName, repo, cmd, fields, hostOverride) {
export async function _launchServeTask(shortName, repo, cmd, fields, hostOverride, targetMeta = null) {
// Host resolution mirrors the download path: when the caller passes an explicit
// host (resolved from the dropdown the user actually picked), use it and look
// up that server's port/platform from the shared servers list. Only fall back
// to _envState.remoteHost for legacy callers (diagnosis/pip-update).
const _host = (hostOverride !== undefined) ? (hostOverride || '') : (_envState.remoteHost || '');
const _hsrv = _serverByVal(_envState.remoteServerKey || _host)
const _targetKey = targetMeta?.serverKey || '';
const _hsrv = (_targetKey && _targetKey !== 'local' ? _serverByVal(_targetKey) : null)
|| (hostOverride === undefined ? _serverByVal(_envState.remoteServerKey || _host) : null)
|| _envState.servers.find(s => s.host === _host) || {};
const _serverMetaKey = _targetKey || (_hsrv && _serverKey ? _serverKey(_hsrv) : '') || (_host || 'local');
const _serverMetaName = targetMeta?.serverName || _hsrv.name || (_host ? _host : 'Local');
const _hplatform = _host ? (_hsrv.platform || '') : (_envState.platform || '');
const _replaceTaskId = fields?._replaceTaskId || '';
if (_replaceTaskId) {
try {
const _old = _loadTasks().find(t => t.sessionId === _replaceTaskId);
if (_old && _old.type === 'serve') {
await fetch('/api/shell/exec', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ command: _tmuxGracefulKill(_old) }),
});
_removeTask(_old.sessionId);
}
} catch {}
}
// Replace any serve already targeting this same host:port — you can't run two
// servers on one port, so re-serving (or retrying) should stop & remove the
@@ -1572,7 +1734,7 @@ export async function _launchServeTask(shortName, repo, cmd, fields, hostOverrid
}
} else {
if (_envState.env === 'venv' && _envState.envPath) {
const p = _envState.envPath;
const p = _venvRootFromPath(_envState.envPath);
envPrefix = 'source ' + (p.endsWith('/bin/activate') ? p : p + '/bin/activate');
} else if (_envState.env === 'conda' && _envState.envPath) {
envPrefix = 'eval "$(conda shell.bash hook)" && conda activate ' + _envState.envPath;
@@ -1583,7 +1745,7 @@ export async function _launchServeTask(shortName, repo, cmd, fields, hostOverrid
repo_id: repo,
cmd: cmd,
remote_host: _host || undefined,
ssh_port: _getPort(_host) || undefined,
ssh_port: _getPort(_serverMetaKey || _host) || undefined,
env_prefix: envPrefix || undefined,
hf_token: _envState.hfToken || undefined,
gpus: _envState.gpus || undefined,
@@ -1607,11 +1769,11 @@ export async function _launchServeTask(shortName, repo, cmd, fields, hostOverrid
return;
}
const _sp = _getPort(_host);
const _sp = _getPort(_serverMetaKey || _host);
// _fields = the exact structured serve-form values used for this launch,
// so the "Edit / relaunch" button can re-open the Serve panel pre-filled
// with these precise settings (not just the last-used-for-repo state).
const payload = { repo_id: repo, remote_host: _host || undefined, ssh_port: _sp || undefined, _cmd: cmd, _fields: fields || undefined, _env: _usedEnv, _envPath: _usedEnvPath, _gpus: _usedGpus };
const payload = { repo_id: repo, remote_host: _host || undefined, remote_server_key: _serverMetaKey || undefined, remote_server_name: _serverMetaName || undefined, ssh_port: _sp || undefined, _cmd: cmd, _fields: fields || undefined, _env: _usedEnv, _envPath: _usedEnvPath, _gpus: _usedGpus };
_addTask(data.session_id, shortName, 'serve', payload);
uiModule.showToast(`Serving ${shortName}...`);
// Auto-register may have enabled an existing (offline) endpoint for this
@@ -1726,7 +1888,7 @@ export function _renderRunningTab() {
'<div style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;">' +
'<h2 style="margin:0;padding:0;line-height:1;">Active <span id="running-count" class="memory-count" style="font-size:0.6em;opacity:0.6;font-weight:normal">' + activeCount + '</span></h2>' +
'</div>' +
'<p class="memory-desc doclib-desc" style="margin-top:6px;">Active downloads and serving processes.</p>' +
'<p class="memory-desc doclib-desc" style="margin-top:6px;">Active downloads, installs and model launches.</p>' +
'</div>';
const firstGroup = body.querySelector('.cookbook-group');
if (firstGroup) body.insertBefore(group, firstGroup);
@@ -1760,16 +1922,25 @@ export function _renderRunningTab() {
}
// Group tasks by server
const _serverName = (host) => {
if (!host) return 'Local';
const srv = _serverByVal(_envState.remoteServerKey || host)
|| _envState.servers.find(s => s.host === host);
return srv?.name || host;
const _taskServerKey = (task) => task?.remoteServerKey || task?.remoteHost || '';
const _serverName = (keyOrTask) => {
if (keyOrTask && typeof keyOrTask === 'object') {
const task = keyOrTask;
if (task.remoteServerName) return task.remoteServerName;
const srv = task.remoteServerKey ? _serverByVal(task.remoteServerKey) : null;
if (srv?.name) return srv.name;
if (!task.remoteHost) return 'Local';
return (_envState.servers.find(s => s.host === task.remoteHost)?.name) || task.remoteHost;
}
const key = keyOrTask || '';
if (!key || key === 'local') return 'Local';
const srv = _serverByVal(key);
return srv?.name || key;
};
const serverGroups = {};
for (const t of tasks) {
const key = t.remoteHost || '';
if (!serverGroups[key]) serverGroups[key] = { name: _serverName(key), serve: [], download: [] };
const key = _taskServerKey(t);
if (!serverGroups[key]) serverGroups[key] = { name: _serverName(t), serve: [], download: [] };
serverGroups[key][t.type === 'serve' ? 'serve' : 'download'].push(t);
}
@@ -1816,12 +1987,12 @@ export function _renderRunningTab() {
e.stopPropagation(); // don't toggle the section collapse (was an inline onclick, blocked by CSP)
const host = btn.dataset.clearServer;
const allTasks = _loadTasks();
const toRemove = allTasks.filter(t => (t.remoteHost || '') === host && _canClearTask(t));
const toRemove = allTasks.filter(t => _taskServerKey(t) === host && _canClearTask(t));
// Bail with a clear message instead of silently doing nothing when
// every task on this server is still running (nothing finished to
// clear yet) — the previous behavior looked like the button was dead.
if (!toRemove.length) {
const stillRunning = allTasks.filter(t => (t.remoteHost || '') === host && t.status === 'running').length;
const stillRunning = allTasks.filter(t => _taskServerKey(t) === host && t.status === 'running').length;
const _msg = stillRunning
? `No finished tasks on ${_serverName(host)}${stillRunning} still running. Stop them first to clear.`
: `No finished tasks on ${_serverName(host)}.`;
@@ -1830,7 +2001,8 @@ export function _renderRunningTab() {
return;
}
if (!await window.styledConfirm(`Clear ${toRemove.length} finished task${toRemove.length === 1 ? '' : 's'} on ${_serverName(host)}?`, { confirmText: 'Clear' })) return;
const remaining = allTasks.filter(t => (t.remoteHost || '') !== host || !_canClearTask(t));
toRemove.forEach(t => _tombstoneTask(t.sessionId));
const remaining = allTasks.filter(t => _taskServerKey(t) !== host || !_canClearTask(t));
_saveTasks(remaining);
// Fade/slide each finished card out (same exit as the per-card clear)
// instead of yanking them instantly.
@@ -1864,7 +2036,7 @@ export function _renderRunningTab() {
btn.addEventListener('click', async (e) => {
e.stopPropagation(); // don't toggle the section collapse
const host = btn.dataset.stopServer;
const running = _loadTasks().filter(t => (t.remoteHost || '') === host && t.status === 'running');
const running = _loadTasks().filter(t => _taskServerKey(t) === host && t.status === 'running');
if (!running.length) { uiModule.showToast(`Nothing running on ${_serverName(host)}`); return; }
if (!await window.styledConfirm(`Stop ${running.length} running task${running.length > 1 ? 's' : ''} on ${_serverName(host)}?`, { confirmText: 'Stop all' })) return;
// Mark every task as user-stopped BEFORE firing the kills so that the
@@ -1967,11 +2139,12 @@ export function _renderRunningTab() {
const _bdg = _taskBadge(task);
const _bdgTitle = (task._unreachable && task.status === 'running') ? ' title="Server not responding — it may have crashed"' : '';
const displayName = _taskDisplayName(task);
el.innerHTML = `
<div class="cookbook-task-header">
<span class="cookbook-task-type${(task.status === 'done' && task.type === 'download') ? ' cookbook-task-type-done' : ''}" data-type="${esc(task.type)}">${esc((task.status === 'done' && task.type === 'download') ? 'finished' : task.type)}</span>
<span class="cookbook-task-name">${modelLogo(task.name)}${esc(task.name)}</span>
<span class="cookbook-task-indicator"><span class="cookbook-task-wave" style="display:${task.status === 'running' ? '' : 'none'}"></span><span class="cookbook-task-check" title="Clear" style="display:${_canClearTask(task) ? '' : 'none'}"><svg class="cookbook-task-check-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="#50fa7b" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg><svg class="cookbook-task-clear-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg><span class="cookbook-task-done-label">${esc(_clearPillLabel(task))}</span><span class="cookbook-task-clear-label">clear</span></span></span>
<span class="cookbook-task-name">${modelLogo(task.name)}${esc(displayName)}</span>
<span class="cookbook-task-indicator"><span class="cookbook-task-wave" style="display:${task.status === 'running' ? '' : 'none'}"></span>${_canLaunchDownloadedTask(task) ? '<button type="button" class="cookbook-task-serve-btn" title="Open in Launch"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round"><polygon points="13 2 3 14 12 14 11 22 21 10 12 10 13 2"/></svg><span>Launch</span></button>' : ''}<span class="cookbook-task-check" title="Clear" style="display:${_canClearTask(task) ? '' : 'none'}"><svg class="cookbook-task-check-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="#50fa7b" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg><svg class="cookbook-task-clear-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg><span class="cookbook-task-done-label">${esc(_clearPillLabel(task))}</span><span class="cookbook-task-clear-label">clear</span></span></span>
<button type="button" class="cookbook-task-start-now" title="Start this queued download now" style="display:${(task.type === 'download' && task.status === 'queued') ? '' : 'none'}"><svg width="11" height="11" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true"><polygon points="8 5 19 12 8 19 8 5"/></svg><span>start now</span></button>
<span class="cookbook-task-status ${_bdg.cls}"${_bdgTitle}>${esc(_bdg.text)}</span>
<button class="cookbook-task-menu-btn" title="Actions">&#8942;</button>
@@ -2043,19 +2216,11 @@ export function _renderRunningTab() {
e.stopPropagation();
const repo = task.payload?.repo_id || task.name;
if (!repo) { uiModule.showToast('No model info on this task'); return; }
// Point the active server at the one it downloaded to.
const _tHost = task.remoteHost || '';
_envState.remoteHost = _tHost;
const _tSrv = _serverByVal(_envState.remoteServerKey || _tHost)
|| _envState.servers.find(s => s.host === _tHost);
if (_tSrv) { _envState.env = _tSrv.env || 'none'; _envState.envPath = _tSrv.envPath || ''; _envState.platform = _tSrv.platform || ''; }
else if (!_tHost) { _envState.env = 'none'; _envState.envPath = ''; _envState.platform = ''; }
document.querySelectorAll('#hwfit-server-select, #hwfit-dl-server, #hwfit-cache-server, #hwfit-deps-server').forEach(sel => {
if (sel && sel.tagName === 'SELECT') sel.value = _tHost || 'local';
});
// Point the active server at the exact profile it downloaded to.
_selectTaskServer(task);
try {
const { openServePanelForRepo } = await import('./cookbookServe.js');
await openServePanelForRepo(repo);
await openServePanelForRepo(repo, _downloadServeFields(task));
// Serving it supersedes the finished download — clear the card from
// the Running tab (smooth exit) now that we've jumped to Serve.
_animateOutThenRemove(el, task.sessionId);
@@ -2177,9 +2342,6 @@ export function _renderRunningTab() {
if (task.status !== 'running' && task.status !== 'queued') {
items.push({ group: 'run', label: 'Reconnect tmux', action: 'reconnect' });
}
if (task.status === 'running') {
items.push({ group: 'run', label: 'Stop', action: 'stop', danger: true });
}
items.push({ group: 'run', label: 'Restart', action: 'retry' });
// ── Edit section ────────────────────────────────────────────
// Merged "Edit & relaunch" — opens the structured serve panel
@@ -2539,7 +2701,7 @@ export function _renderRunningTab() {
});
// Route to the right server section body
const serverBodyId = `server-body-${(task.remoteHost || 'local').replace(/[^a-zA-Z0-9-]/g, '_')}`;
const serverBodyId = `server-body-${(_taskServerKey(task) || 'local').replace(/[^a-zA-Z0-9-]/g, '_')}`;
const targetBody = document.getElementById(serverBodyId);
if (targetBody) targetBody.appendChild(el);
else group.appendChild(el);
@@ -3393,7 +3555,8 @@ function _refreshServerDots() {
let tasks;
try { tasks = _loadTasks(); } catch { return; }
const byKey = {};
for (const t of tasks) { (byKey[t.remoteHost || ''] = byKey[t.remoteHost || ''] || []).push(t); }
const _taskServerKeyForDot = (task) => task?.remoteServerKey || task?.remoteHost || '';
for (const t of tasks) { (byKey[_taskServerKeyForDot(t)] = byKey[_taskServerKeyForDot(t)] || []).push(t); }
document.querySelectorAll('.cookbook-section-header').forEach(header => {
const dot = header.querySelector('.cookbook-srv-status');
if (!dot) return;
@@ -3527,7 +3690,9 @@ async function _probeEndpointUntilOnline(epId, host, port) {
try {
// Hit the probe endpoint — it re-probes server-side and updates
// cached_models. We consume (and discard) the SSE stream.
await fetch(`/api/model-endpoints/${epId}/probe`, { credentials: 'same-origin' }).then(r => r.text()).catch(() => {});
const probeRes = await fetch(`/api/model-endpoints/${epId}/probe`, { credentials: 'same-origin' }).catch(() => null);
if (probeRes && probeRes.status === 404) return;
if (probeRes) await probeRes.text().catch(() => {});
const eps = await fetch('/api/model-endpoints', { credentials: 'same-origin' }).then(r => r.json()).catch(() => []);
const ep = (eps || []).find(e => e.id === epId);
if (ep && (ep.models || []).length) {
@@ -3565,7 +3730,7 @@ async function _pollBackgroundStatus() {
}
}
if (added > 0) {
localStorage.setItem(TASKS_KEY, JSON.stringify(merged.map(_stripTaskSecrets)));
localStorage.setItem(TASKS_KEY, JSON.stringify(merged.map(_redactTaskForStorage)));
_renderRunningTab();
}
}
@@ -3798,6 +3963,7 @@ export function initRunning(shared) {
_persistEnvState = shared._persistEnvState;
_refreshDependencies = shared._refreshDependencies;
_serverByVal = shared._serverByVal;
_serverKey = shared._serverKey;
_selectedServer = shared._selectedServer;
modelLogo = shared.modelLogo;
esc = shared.esc;
+1289 -127
View File
File diff suppressed because it is too large Load Diff
+155 -4
View File
@@ -24,6 +24,7 @@ import * as Modals from './modalManager.js';
let _autoDetectDebounce = null;
let _autoTitleDebounce = null;
let _autoSaveDebounce = null;
let _lastAutoSaveErrorAt = 0;
let _animationInProgress = false;
let _animationCancel = null; // function to cancel current animation
let _htmlPreviewActive = false; // true when inline HTML preview iframe is showing
@@ -154,6 +155,20 @@ import * as Modals from './modalManager.js';
addDocToTabs,
syncDocIndicator: _syncDocIndicator,
});
const sidebarNewDocBtn = document.getElementById('library-new-doc-btn');
if (sidebarNewDocBtn && !sidebarNewDocBtn.dataset.docNewWired) {
sidebarNewDocBtn.dataset.docNewWired = '1';
sidebarNewDocBtn.addEventListener('click', async (e) => {
e.preventDefault();
e.stopPropagation();
try {
await newDocument();
} catch (err) {
console.error('Failed to create document from sidebar button:', err);
if (uiModule) uiModule.showError('Failed to create document');
}
});
}
_maybeOpenDocFromHash();
window.addEventListener('hashchange', _maybeOpenDocFromHash);
}
@@ -2686,6 +2701,104 @@ import * as Modals from './modalManager.js';
await _uploadComposeFiles(files);
}
function _isMarkdownImageFile(file) {
if (!file) return false;
if ((file.type || '').toLowerCase().startsWith('image/')) return true;
return /\.(avif|bmp|gif|jpe?g|png|svg|webp)$/i.test(file.name || '');
}
function _markdownImageAlt(name) {
const base = String(name || 'image').replace(/\.[^.]+$/, '').trim() || 'image';
return base.replace(/[\[\]\n\r]/g, ' ').replace(/\s+/g, ' ').trim() || 'image';
}
function _activeDocLanguage() {
const doc = activeDocId && docs.get(activeDocId);
return ((doc && doc.language) || document.getElementById('doc-language-select')?.value || '').toLowerCase();
}
function _scheduleMarkdownImageAutosave(ta) {
updateLineNumbers(ta.value);
const codeEl = document.getElementById('doc-editor-code');
if (codeEl && !codeEl.dataset.hasDiff) {
codeEl.textContent = ta.value + '\n';
codeEl.style.minHeight = ta.scrollHeight + 'px';
}
clearTimeout(_hlDebounce);
_hlDebounce = setTimeout(syncHighlighting, 80);
clearTimeout(_autoTitleDebounce);
_autoTitleDebounce = setTimeout(() => autoTitleFromContent(ta.value), 600);
clearTimeout(_autoSaveDebounce);
_autoSaveDebounce = setTimeout(() => { saveDocument({ silent: true }); }, 800);
}
function _insertMarkdownImages(uploadedFiles) {
const ta = document.getElementById('doc-editor-textarea');
if (!ta) return;
const files = Array.isArray(uploadedFiles) ? uploadedFiles : [];
if (!files.length) return;
const start = ta.selectionStart || 0;
const end = ta.selectionEnd || start;
const before = ta.value.slice(0, start);
const after = ta.value.slice(end);
const lines = files.map(file => {
const id = encodeURIComponent(file.id || file.file_id || '');
const alt = _markdownImageAlt(file.name || file.filename);
return id ? `![${alt}](/api/upload/${id})` : '';
}).filter(Boolean);
if (!lines.length) return;
const prefix = before && !before.endsWith('\n') ? '\n' : '';
const suffix = after && !after.startsWith('\n') ? '\n' : '';
const insert = `${prefix}${lines.join('\n\n')}${suffix}`;
_replaceRange(ta, start, end, insert);
const caret = start + insert.length;
ta.selectionStart = caret;
ta.selectionEnd = caret;
ta.focus();
_scheduleMarkdownImageAutosave(ta);
_refreshMarkdownPreviewIfVisible(activeDocId, ta.value);
}
async function _uploadMarkdownImages(files) {
const images = Array.from(files || []).filter(_isMarkdownImageFile);
if (!images.length) {
if (uiModule) uiModule.showError('Choose an image file');
return;
}
if (_activeDocLanguage() !== 'markdown') {
if (uiModule) uiModule.showError('Switch the document to markdown before inserting images');
return;
}
const fd = new FormData();
images.forEach(file => fd.append('files', file));
try {
const res = await fetch(`${API_BASE}/api/upload`, {
method: 'POST',
credentials: 'same-origin',
body: fd,
});
let data = null;
try { data = await res.json(); } catch (_) {}
if (!res.ok) throw new Error((data && (data.error || data.detail)) || `HTTP ${res.status}`);
const uploaded = Array.isArray(data?.files) ? data.files : [];
if (!uploaded.length) throw new Error('No uploaded files returned');
_insertMarkdownImages(uploaded);
if (uiModule) uiModule.showToast(images.length === 1 ? 'Image inserted' : 'Images inserted');
} catch (err) {
console.error('Failed to insert markdown image:', err);
if (uiModule) uiModule.showError('Failed to insert image');
}
}
async function _handleMarkdownImageUpload(e) {
const files = e.target.files;
e.target.value = '';
await _uploadMarkdownImages(files);
}
function _renderComposeAttachments() {
const container = document.getElementById('doc-email-compose-atts');
if (!container) return;
@@ -3752,9 +3865,12 @@ import * as Modals from './modalManager.js';
const res = await fetch(`${API_BASE}/api/document`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
credentials: 'same-origin',
body: JSON.stringify({ session_id: sessionId, title: '', content }),
});
if (!res.ok) throw new Error(`Document create failed: HTTP ${res.status}`);
const doc = await res.json();
if (!doc || !doc.id) throw new Error('Document create failed: missing id');
addDocToTabs(doc, sessionId);
// Set the content into the map so switchToDoc preserves it
const d = docs.get(doc.id);
@@ -3981,6 +4097,7 @@ import * as Modals from './modalManager.js';
<input type="hidden" id="doc-email-source-folder" />
<input type="file" id="doc-email-file-input" multiple style="display:none" />
</div>
<input type="file" id="doc-md-image-input" accept="image/*" multiple style="display:none" />
<div class="doc-md-toolbar" id="doc-md-toolbar" style="display:none">
<div class="md-toolbar-items" id="md-toolbar-items">
<span class="md-view-toggle" id="doc-md-view-toggle" style="display:none" role="group" aria-label="Edit or preview">
@@ -4003,7 +4120,7 @@ import * as Modals from './modalManager.js';
<button type="button" class="md-dd-toggle" data-dd="list" title="List"><span style="font-variant-numeric:tabular-nums;">1.</span><svg width="8" height="8" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3.5" stroke-linecap="round" stroke-linejoin="round"><polyline points="6 9 12 15 18 9"/></svg></button>
<span class="md-toolbar-sep"></span>
<button type="button" data-md="link" title="Link"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M10 13a5 5 0 0 0 7.54.54l3-3a5 5 0 0 0-7.07-7.07l-1.72 1.71"/><path d="M14 11a5 5 0 0 0-7.54-.54l-3 3a5 5 0 0 0 7.07 7.07l1.71-1.71"/></svg></button>
<button type="button" id="md-toolbar-attach-btn" class="md-toolbar-attach-btn" title="Attach files"><svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="m21.44 11.05-9.19 9.19a6 6 0 0 1-8.49-8.49l8.57-8.57A4 4 0 1 1 17.93 8.8l-8.59 8.57a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg></button>
<button type="button" id="md-toolbar-attach-btn" class="md-toolbar-attach-btn" title="Insert image"><svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="m21.44 11.05-9.19 9.19a6 6 0 0 1-8.49-8.49l8.57-8.57A4 4 0 1 1 17.93 8.8l-8.59 8.57a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg></button>
<button type="button" class="md-dd-toggle md-toolbar-email-hide" data-dd="code" title="Code">\`<svg width="8" height="8" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3.5" stroke-linecap="round" stroke-linejoin="round"><polyline points="6 9 12 15 18 9"/></svg></button>
<button type="button" data-md="hr" title="Horizontal rule"></button>
<span class="md-toolbar-sep"></span>
@@ -4602,9 +4719,14 @@ import * as Modals from './modalManager.js';
document.getElementById('doc-email-file-input')?.click();
});
document.getElementById('md-toolbar-attach-btn')?.addEventListener('click', () => {
document.getElementById('doc-email-file-input')?.click();
if (_activeDocLanguage() === 'email') {
document.getElementById('doc-email-file-input')?.click();
} else {
document.getElementById('doc-md-image-input')?.click();
}
});
document.getElementById('doc-email-file-input')?.addEventListener('change', _handleAttachUpload);
document.getElementById('doc-md-image-input')?.addEventListener('change', _handleMarkdownImageUpload);
// Cc/Bcc toggle
document.getElementById('doc-email-show-cc')?.addEventListener('click', () => {
@@ -4840,6 +4962,26 @@ import * as Modals from './modalManager.js';
clearTimeout(_autoSaveDebounce);
_autoSaveDebounce = setTimeout(() => { saveDocument({ silent: true }); }, 2000);
});
ta.addEventListener('paste', (e) => {
if (_activeDocLanguage() !== 'markdown') return;
const files = Array.from(e.clipboardData?.files || []).filter(_isMarkdownImageFile);
if (!files.length) return;
e.preventDefault();
_uploadMarkdownImages(files);
});
ta.addEventListener('dragover', (e) => {
if (_activeDocLanguage() !== 'markdown') return;
const items = Array.from(e.dataTransfer?.items || []);
if (!items.some(item => item.kind === 'file' && /^image\//i.test(item.type || ''))) return;
e.preventDefault();
});
ta.addEventListener('drop', (e) => {
if (_activeDocLanguage() !== 'markdown') return;
const files = Array.from(e.dataTransfer?.files || []).filter(_isMarkdownImageFile);
if (!files.length) return;
e.preventDefault();
_uploadMarkdownImages(files);
});
ta.addEventListener('scroll', () => {
const code = document.getElementById('doc-editor-code');
if (code) code.style.minHeight = ta.scrollHeight + 'px';
@@ -5548,7 +5690,7 @@ import * as Modals from './modalManager.js';
// any dropdown that just opened. Preventing the default mousedown keeps the
// textarea focused, so formatting hits the live selection and menus stay up.
toolbar.addEventListener('mousedown', (e) => {
if (e.target.closest('[data-md], .md-dd-toggle, .emoji-picker-btn')) e.preventDefault();
if (e.target.closest('[data-md], .md-dd-toggle, .emoji-picker-btn, .md-toolbar-attach-btn')) e.preventDefault();
});
toolbar.addEventListener('click', (e) => {
@@ -5976,6 +6118,7 @@ import * as Modals from './modalManager.js';
const res = await fetch(`${API_BASE}/api/document`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
credentials: 'same-origin',
body: JSON.stringify({
session_id: sessionId,
title: '',
@@ -5983,7 +6126,9 @@ import * as Modals from './modalManager.js';
language: 'markdown',
}),
});
if (!res.ok) throw new Error(`Document create failed: HTTP ${res.status}`);
const doc = await res.json();
if (!doc || !doc.id) throw new Error('Document create failed: missing id');
addDocToTabs(doc, sessionId);
if (!isOpen) openPanel();
// Re-enable editor if it was in empty state
@@ -8266,8 +8411,10 @@ import * as Modals from './modalManager.js';
const res = await fetch(`${API_BASE}/api/document/${activeDocId}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
credentials: 'same-origin',
body: JSON.stringify({ content: textarea.value }),
});
if (!res.ok) throw new Error(`Document save failed: HTTP ${res.status}`);
const doc = await res.json();
const badge = document.getElementById('doc-version-badge');
if (badge) { const _v = doc.version_count || 1; badge.textContent = `v${_v}`; badge.style.display = _v > 1 ? '' : 'none'; }
@@ -8280,7 +8427,11 @@ import * as Modals from './modalManager.js';
if (!silent && uiModule) uiModule.showToast('Document saved');
} catch (e) {
console.error('Failed to save document:', e);
if (!silent && uiModule) uiModule.showError('Failed to save document');
const now = Date.now();
if (uiModule && (!silent || now - _lastAutoSaveErrorAt > 10000)) {
uiModule.showError(silent ? 'Autosave failed' : 'Failed to save document');
_lastAutoSaveErrorAt = now;
}
}
}
+53 -12
View File
@@ -2936,6 +2936,20 @@ function _createCard(em) {
titleRow.appendChild(att);
}
const tags = Array.isArray(em.tags) ? em.tags : [];
if (tags.length || em.is_spam_verdict) {
const tagWrap = document.createElement('span');
tagWrap.className = 'email-tags email-card-tags';
tagWrap.innerHTML = tags.map(t => {
const tag = String(t || '').trim().toLowerCase().replace(/_/g, '-');
return tag ? `<span class="email-tag email-tag-${_esc(tag)}">${_esc(tag)}</span>` : '';
}).join('');
if (em.is_spam_verdict) {
tagWrap.insertAdjacentHTML('beforeend', '<span class="email-tag email-tag-spam">spam</span>');
}
titleRow.appendChild(tagWrap);
}
// Done check + unread dot stay next to the subject on the left.
const isSentFolder = /sent/i.test(state._libFolder);
if (!isSentFolder) {
@@ -4560,11 +4574,12 @@ function _wireAttachmentHandlers(reader, folder) {
const uid = openBtn.dataset.openUid;
const index = openBtn.dataset.openIndex;
const name = openBtn.dataset.openName || `attachment-${index}`;
const sourceFolder = openBtn.dataset.openFolder || useFolder;
if (!uid || index == null) return;
const orig = openBtn.style.opacity;
openBtn.style.opacity = '0.4';
try {
const folderQs = encodeURIComponent(useFolder);
const folderQs = encodeURIComponent(sourceFolder);
const res = await fetch(
`${API_BASE}/api/email/attachment-as-doc/${encodeURIComponent(uid)}/${encodeURIComponent(index)}?folder=${folderQs}${_acct()}`,
{ method: 'POST', credentials: 'same-origin' }
@@ -4618,8 +4633,9 @@ function _wireAttachmentHandlers(reader, folder) {
const uid = chip.dataset.attUid;
const index = chip.dataset.attIndex;
const name = chip.dataset.attName || `attachment-${index}`;
const sourceFolder = chip.dataset.attFolder || useFolder;
if (!uid || index == null) return;
const url = `${API_BASE}/api/email/attachment/${encodeURIComponent(uid)}/${encodeURIComponent(index)}?folder=${encodeURIComponent(useFolder)}${_acct()}`;
const url = `${API_BASE}/api/email/attachment/${encodeURIComponent(uid)}/${encodeURIComponent(index)}?folder=${encodeURIComponent(sourceFolder)}${_acct()}`;
if (_isMobileUA) {
window.open(url, '_blank');
return;
@@ -4698,25 +4714,50 @@ function _isLikelySignatureImage(a) {
// Build the attachments header+chips HTML for an email read response. Pulled
// out so both the initial-open and the swap-reader paths can render it.
function _buildAttsHtmlFor(uid, data) {
if (!data || !data.attachments || !data.attachments.length) return '';
const _OPENABLE_RE = /\.(pdf|docx|txt|md|markdown)$/i;
const visible = data.attachments.filter(a => !_isLikelySignatureImage(a));
if (!visible.length) return '';
const chips = visible.map(a => {
if (!data) return '';
const _OPENABLE_RE = /\.(pdf|docx|txt|md|markdown|eml)$/i;
const currentAttachments = Array.isArray(data.attachments) ? data.attachments : [];
const relatedAttachments = Array.isArray(data.related_attachments) ? data.related_attachments : [];
if (!currentAttachments.length && !relatedAttachments.length) return '';
const visible = currentAttachments.filter(a => !_isLikelySignatureImage(a));
const hidden = currentAttachments.filter(a => _isLikelySignatureImage(a));
const related = relatedAttachments.filter(a => !_isLikelySignatureImage(a));
const renderChip = (a, extraClass = '') => {
const openable = _OPENABLE_RE.test(a.filename || '');
const chipUid = a.source_uid || a.uid || uid;
const chipFolder = a.source_folder || data.folder || state._libFolder || 'INBOX';
const openBtn = openable
? `<span class="email-attachment-open" title="Open in document editor" data-open-uid="${_esc(uid)}" data-open-index="${a.index}" data-open-name="${_esc(a.filename)}"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/><polyline points="14 2 14 8 20 8"/><line x1="8" y1="13" x2="16" y2="13"/><line x1="8" y1="17" x2="16" y2="17"/><line x1="8" y1="9" x2="10" y2="9"/></svg><span class="email-attachment-open-label">Open</span></span>`
? `<span class="email-attachment-open" title="Open in document editor" data-open-uid="${_esc(chipUid)}" data-open-index="${a.index}" data-open-name="${_esc(a.filename)}" data-open-folder="${_esc(chipFolder)}"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/><polyline points="14 2 14 8 20 8"/><line x1="8" y1="13" x2="16" y2="13"/><line x1="8" y1="17" x2="16" y2="17"/><line x1="8" y1="9" x2="10" y2="9"/></svg><span class="email-attachment-open-label">Open</span></span>`
: '';
return `<button type="button" class="email-attachment-chip" data-att-uid="${_esc(uid)}" data-att-index="${a.index}" data-att-name="${_esc(a.filename)}"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="m21.44 11.05-9.19 9.19a6 6 0 0 1-8.49-8.49l8.57-8.57A4 4 0 1 1 17.93 8.8l-8.59 8.57a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg><span>${_esc(a.filename)}</span><span class="att-size">${Math.round((a.size||0)/1024)} KB</span>${openBtn}</button>`;
}).join('');
return `<button type="button" class="email-attachment-chip${extraClass}" data-att-uid="${_esc(chipUid)}" data-att-index="${a.index}" data-att-name="${_esc(a.filename)}" data-att-folder="${_esc(chipFolder)}"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="m21.44 11.05-9.19 9.19a6 6 0 0 1-8.49-8.49l8.57-8.57A4 4 0 1 1 17.93 8.8l-8.59 8.57a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg><span>${_esc(a.filename)}</span><span class="att-size">${Math.round((a.size||0)/1024)} KB</span>${openBtn}</button>`;
};
const chips = visible.map(a => renderChip(a)).join('');
const hiddenChips = hidden.map(a => renderChip(a, ' email-attachment-chip-muted')).join('');
const relatedChips = related.map(a => renderChip(a, ' email-attachment-chip-related')).join('');
const visibleSection = visible.length
? '<div class="email-reader-atts">' + chips + '</div>'
: '';
const relatedSection = related.length
? '<div class="email-reader-atts-hidden-note">From earlier in this thread</div><div class="email-reader-atts email-reader-atts-related">' + relatedChips + '</div>'
: '';
const hiddenSection = hidden.length
? '<div class="email-reader-atts-hidden-note">Filtered inline images / signature files</div><div class="email-reader-atts email-reader-atts-hidden">' + hiddenChips + '</div>'
: '';
const label = visible.length
? `Attachments (${visible.length + related.length})`
: related.length
? `Thread attachments (${related.length})`
: `Hidden inline attachments (${hidden.length})`;
return (
'<div class="email-reader-atts-wrap collapsed">'
+ '<div class="email-reader-atts-header email-summary-toggle" role="button" tabindex="0">'
+ '<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="m21.44 11.05-9.19 9.19a6 6 0 0 1-8.49-8.49l8.57-8.57A4 4 0 1 1 17.93 8.8l-8.59 8.57a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg>'
+ `<span>Attachments (${data.attachments.length})</span>`
+ `<span>${label}</span>`
+ '<svg class="email-summary-chevron" width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round" style="margin-left:auto;transition:transform .15s ease;"><polyline points="6 9 12 15 18 9"/></svg>'
+ '</div>'
+ '<div class="email-reader-atts">' + chips + '</div>'
+ visibleSection
+ relatedSection
+ hiddenSection
+ '</div>'
);
}
+22 -3
View File
@@ -36,6 +36,14 @@ function linkHtml(text, url) {
return `<a href="${escapeHtml(safeUrl)}" target="_blank" rel="noopener noreferrer">${safeText}</a>`;
}
function imageHtml(alt, url, title) {
const safeUrl = safeLinkUrl(url);
if (!safeUrl || safeUrl.startsWith('#')) return escapeHtml(alt || '');
const safeAlt = escapeHtml(alt || '');
const safeTitle = title ? ` title="${escapeHtml(title)}"` : '';
return `<img src="${escapeHtml(safeUrl)}" alt="${safeAlt}"${safeTitle} loading="lazy" decoding="async">`;
}
function _isModelEndpointUrl(rawUrl) {
try {
const parsed = new URL(String(rawUrl || ''), window.location.origin);
@@ -146,7 +154,7 @@ function sanitizeAllowedHtml(html) {
* Check if text has unclosed think tag
*/
export function hasUnclosedThinkTag(text) {
text = text || '';
text = normalizeThinkingMarkup(text || '');
const openCount =
(text.match(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>/gi) || []).length
+ (text.match(/<\|channel>thought/gi) || []).length;
@@ -163,6 +171,10 @@ export function startsWithReasoningPrefix(text) {
export function normalizeThinkingMarkup(text) {
if (!text) return text;
let normalized = text;
// MiniMax M-series can emit namespaced reasoning tags like
// <mm:think>...</mm:think>. Normalize them into the shared thinking parser.
normalized = normalized.replace(/<mm:think(\s+[^>]*)?>/gi, (_m, attrs = '') => `<think${attrs || ''}>`);
normalized = normalized.replace(/<\/mm:think>/gi, '</think>');
normalized = normalized.replace(/<thought(\s+[^>]*)?>/gi, (_m, attrs = '') => `<think${attrs || ''}>`);
normalized = normalized.replace(/<\/thought>/gi, '</think>');
normalized = normalized.replace(/<\|channel>thought\s*\n?([\s\S]*?)<channel\|>\s*/gi, (_m, content = '') => {
@@ -535,6 +547,12 @@ export function mdToHtml(src, opts) {
'$1[#$2](#$2)',
);
// Convert markdown images before links so ![alt](url) does not become
// literal "!" plus a normal link.
s = s.replace(/!\[([^\]\n]*)\]\(([^)\s]+)(?:\s+"([^"]*)")?\)/g, (match, alt, url, title) => {
return imageHtml(alt, url, title);
});
// Convert markdown links [text](url) to clickable links
// Internal #hash links navigate in-page; external links open in new tab
s = s.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, text, url) => {
@@ -573,8 +591,9 @@ export function mdToHtml(src, opts) {
return placeholder;
});
// ALSO preserve <a> tags the same way (they're now in the HTML from markdown conversion)
s = s.replace(/<a\s+[^>]*>.*?<\/a>/gi, (match) => {
// ALSO preserve <a>/<img> tags the same way (they're now in the HTML from
// markdown conversion)
s = s.replace(/<(?:a\s+[^>]*>.*?<\/a|img\s+[^>]*?)>/gi, (match) => {
const placeholder = `___ALLOWED_HTML_${allowedHtmlBlocks.length}___`;
allowedHtmlBlocks.push(sanitizeAllowedHtml(match));
return placeholder;
+60 -19
View File
@@ -77,6 +77,7 @@ function _handlePickerKeydown(e, listEl, itemSelector, closeFn) {
// Dependencies injected via initModelPicker()
let _deps = null;
let _autoSelectingDefault = false;
let _defaultChatPickInFlight = false;
function _modelExists(modelId, url) {
if (!modelId || !window.modelsModule || !window.modelsModule.getCachedItems) return false;
@@ -91,6 +92,43 @@ function _modelExists(modelId, url) {
});
}
async function _ensureDefaultPendingChat() {
if (!_deps || _defaultChatPickInFlight) return;
if (_deps.getCurrentSessionId && _deps.getCurrentSessionId()) return;
const pending = _deps.getPendingChat && _deps.getPendingChat();
if (pending && pending.modelId) return;
_defaultChatPickInFlight = true;
try {
let dc = null;
try {
const res = await fetch(`${API_BASE}/api/default-chat`, { credentials: 'same-origin' });
if (res.ok) dc = await res.json();
} catch (_) {}
if (dc && dc.endpoint_url && dc.model) {
_deps.setPendingChat({
url: dc.endpoint_url,
modelId: dc.model,
endpointId: dc.endpoint_id || '',
});
try { window.__odysseusDefaultChat = dc; } catch (_) {}
updateModelPicker();
return;
}
// No configured default: preserve the old convenience fallback.
if (window.modelsModule && window.modelsModule.getCachedItems) {
const items = window.modelsModule.getCachedItems();
const first = items.find(item => !item.offline && ((item.models || []).length || (item.models_extra || []).length));
if (first) {
const models = (first.models || []).concat(first.models_extra || []);
_deps.setPendingChat({ url: first.url, modelId: models[0], endpointId: first.endpoint_id });
updateModelPicker();
}
}
} finally {
_defaultChatPickInFlight = false;
}
}
/**
* Initialize the model picker dropdown.
* @param {Object} deps
@@ -112,6 +150,7 @@ function _initModelPickerDropdown() {
const search = document.getElementById('model-picker-search');
const listEl = document.getElementById('model-picker-list');
const searchRow = menu ? menu.querySelector('.model-picker-search-row') : null;
const refreshBtn = document.getElementById('model-picker-refresh-btn');
if (!wrap || !btn || !menu || !search || !listEl) return;
function _close() {
@@ -608,6 +647,26 @@ function _initModelPickerDropdown() {
search.addEventListener('input', () => _populate(search.value));
search.addEventListener('click', (e) => e.stopPropagation());
if (refreshBtn) {
refreshBtn.addEventListener('click', async (e) => {
e.stopPropagation();
refreshBtn.disabled = true;
refreshBtn.classList.add('spinning');
try {
if (window.modelsModule && window.modelsModule.refreshModels) {
await window.modelsModule.refreshModels(true);
}
await _refreshLocalProbe();
if (!menu.classList.contains('hidden')) _populate(search.value || '');
updateModelPicker();
} catch (_) {
uiModule.showToast('Model refresh failed');
} finally {
refreshBtn.disabled = false;
refreshBtn.classList.remove('spinning');
}
});
}
search.addEventListener('keydown', (e) => {
_handlePickerKeydown(e, listEl, '.model-switch-item', _close);
});
@@ -689,25 +748,7 @@ export function updateModelPicker() {
}
}
if (!modelId && !_autoSelectingDefault && window.modelsModule && window.modelsModule.getCachedItems) {
const items = window.modelsModule.getCachedItems();
const first = items.find(item => !item.offline && ((item.models || []).length || (item.models_extra || []).length));
if (first) {
const models = (first.models || []).concat(first.models_extra || []);
modelId = models[0];
if (!currentSessionId) {
_deps.setPendingChat({ url: first.url, modelId, endpointId: first.endpoint_id });
} else {
if (s) { s.model = modelId; s.endpoint_url = first.url; }
_autoSelectingDefault = true;
const fd = new FormData();
fd.append('model', modelId);
fd.append('endpoint_url', first.url || '');
if (first.endpoint_id) fd.append('endpoint_id', first.endpoint_id);
fetch(`${API_BASE}/api/session/${currentSessionId}`, { method: 'PATCH', body: fd })
.catch(() => {})
.finally(() => { _autoSelectingDefault = false; });
}
}
_ensureDefaultPendingChat();
}
const displayName = modelId ? modelId.split('/').pop() : 'Select model';
+155 -20
View File
@@ -608,7 +608,7 @@ function _isNoteFullyDone(note) {
// A "checklist note" — todo or goal — has structured items[] that the cards
// render as checkboxes and that "fully done" / progress logic reads from.
function _hasItems(note) {
return note && (note.note_type === 'todo' || note.note_type === 'goal');
return note && (note.note_type === 'todo' || note.note_type === 'goal' || note.note_type === 'checklist');
}
// Compact " N/M" progress string for a goal's checklist. Empty when the goal
@@ -1120,8 +1120,6 @@ export function openPanel() {
}
_open = true;
_editingId = null;
// Reset the search filter — the rebuilt pane's search input renders empty, so a
// stale _searchQuery would silently hide non-matching notes after a reopen.
_searchQuery = '';
_clearViewedReminderGlows();
_firedDotDismissedAt = Date.now();
@@ -1822,10 +1820,20 @@ function _renderNotes() {
for (let i = 0; i < note.items.length; i++) {
const item = note.items[i];
const doneClass = item.done ? ' done' : '';
const agentStatus = (item.agent_status || '').toLowerCase();
const agentDoneClass = agentStatus === 'stream_complete' ? ' is-agent-stream-complete' : '';
const agentTitle = agentStatus === 'stream_complete'
? 'Agent stream finished for this todo'
: (agentStatus === 'running' ? 'Agent is working on this todo' : 'Solve this todo with the agent');
const agentSessionAttr = item.agent_session_id ? ` data-session-id="${_attrEsc(item.agent_session_id)}"` : '';
const agentMenuTitle = item.agent_session_title || `Agent: ${(item.text || '').slice(0, 40)}`;
const indent = Math.min(item.indent || 0, 3);
contentHtml += `<div class="note-checkbox${doneClass}" data-note-id="${note.id}" data-idx="${i}" style="padding-left:${indent * 16}px">
<span class="note-check-dot" title="Mark done"></span>
<span class="note-check-text">${_linkify(item.text)}</span>
<button class="note-checkbox-agent${agentDoneClass}" data-note-id="${_attrEsc(note.id)}" data-idx="${i}"${agentSessionAttr} data-agent-title="${_attrEsc(agentMenuTitle)}" title="${_attrEsc(agentTitle)}">
<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 8V4H8"/><rect x="4" y="8" width="16" height="12" rx="2"/><path d="M2 14h2M20 14h2M15 13v2M9 13v2"/></svg>
</button>
<button class="note-checkbox-rm" data-note-id="${note.id}" data-idx="${i}" title="Delete item">
<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg>
</button>
@@ -1889,10 +1897,6 @@ function _renderNotes() {
${_hasItems(note) ? `<div class="note-cl-quickadd"><input type="text" class="note-cl-quickadd-input" placeholder="+ Add item" data-note-id="${note.id}" /></div>` : ''}
${reminderTagHtml}
${noteTags.length ? `<div class="note-card-label">${noteTags.map(t => `<button type="button" class="note-card-label-chip" data-note-label-filter="${_esc(t)}" title="Filter #${_esc(t)}">#${_esc(t)}</button>`).join(' ')}</div>` : ''}
${note.agent_session_id ? `<button class="note-agent-tag" data-note-id="${note.id}" data-session-id="${_esc(note.agent_session_id)}" title="Open the agent's chat for this note">
<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 8V4H8"/><rect x="4" y="8" width="16" height="12" rx="2"/><path d="M2 14h2M20 14h2M15 13v2M9 13v2"/></svg>
<span>Agent</span>
</button>` : ''}
<div class="note-card-actions">
<div class="note-card-colors">${colorDots}</div>
<span style="flex:1"></span>
@@ -2177,7 +2181,7 @@ function _bindCardEvents(body) {
// Click empty area of checklist preview (not on checkbox/X) — edit
body.querySelectorAll('.note-checklist-preview').forEach(el => {
el.addEventListener('click', (e) => {
if (e.target.closest('.note-checkbox, .note-checkbox-rm, .note-cl-quickadd, input')) return;
if (e.target.closest('.note-checkbox, .note-checkbox-rm, .note-checkbox-agent, .note-cl-quickadd, input')) return;
e.stopPropagation();
tapToEditOrSelect(el.closest('.note-card'));
});
@@ -2203,7 +2207,7 @@ function _bindCardEvents(body) {
// title / content preview triggered edit, so padding + empty gutters were
// dead zones that felt broken on mobile.
if (_isNotesMobileMode() && !_selectMode) {
const _INTERACTIVE = 'button, a, input, label, .note-card-color-dot, .note-checkbox, .note-checkbox-rm, .note-cl-quickadd, .note-agent-tag, .note-card-pin, .note-card-corner-trash, .note-card-corner-menu, .note-card-corner-unarchive, .note-card-edit-corner, .note-card-reminder, .note-card-cb';
const _INTERACTIVE = 'button, a, input, label, .note-card-color-dot, .note-checkbox, .note-checkbox-rm, .note-checkbox-agent, .note-cl-quickadd, .note-agent-tag, .note-card-pin, .note-card-corner-trash, .note-card-corner-menu, .note-card-corner-unarchive, .note-card-edit-corner, .note-card-reminder, .note-card-cb';
body.querySelectorAll('.note-card').forEach(card => {
card.addEventListener('click', (e) => {
if (e.target.closest(_INTERACTIVE)) return;
@@ -2297,16 +2301,6 @@ function _bindCardEvents(body) {
_openNoteCornerMenu(btn);
});
});
// Agent tag — opens the chat session the agent ran for this note.
body.querySelectorAll('.note-agent-tag').forEach(tag => {
tag.addEventListener('click', (e) => {
e.preventDefault();
e.stopPropagation();
const sid = tag.dataset.sessionId;
const _sm = window.sessionModule;
if (sid && _sm && _sm.selectSession) { closePanel(); _sm.selectSession(sid); }
});
});
body.querySelectorAll('.note-card-label-chip').forEach(chip => {
chip.addEventListener('click', (e) => {
e.preventDefault();
@@ -2523,6 +2517,18 @@ function _bindCardEvents(body) {
});
});
// Per-item agent solve (hover button next to the X). Scoped to one todo
// item — uses the note title as context if present, but only the single
// item's text as the work. Mirrors the per-note _agentSolveNote pattern.
body.querySelectorAll('.note-checkbox-agent').forEach(btn => {
btn.addEventListener('click', (e) => {
e.preventDefault();
e.stopPropagation();
if (_selectMode) return;
_openTodoAgentMenu(btn);
});
});
// Quick-add new checklist item (hover input at bottom of todo cards)
body.querySelectorAll('.note-cl-quickadd-input').forEach(input => {
input.addEventListener('click', (e) => e.stopPropagation());
@@ -4342,6 +4348,54 @@ function _openNoteCornerMenu(btn) {
menu.querySelector('[data-act="agent"]').addEventListener('click', () => { menu.remove(); _agentSolveNote(id); });
}
function _positionNoteMenu(menu, btn, width = 196) {
document.body.appendChild(menu);
const r = btn.getBoundingClientRect();
let left = Math.min(r.right - width, window.innerWidth - width - 8);
left = Math.max(8, left);
const mh = menu.offsetHeight || 112;
const below = window.innerHeight - r.bottom;
const top = (below < mh + 8 && r.top > mh + 8) ? (r.top - mh - 4) : (r.bottom + 4);
menu.style.cssText += `position:fixed;z-index:11000;top:${Math.round(top)}px;left:${Math.round(left)}px;min-width:${width}px;`;
const close = (ev) => {
if (ev && menu.contains(ev.target)) return;
menu.remove();
document.removeEventListener('click', close, true);
};
setTimeout(() => document.addEventListener('click', close, true), 0);
}
function _openTodoAgentMenu(btn) {
document.querySelectorAll('.note-corner-menu-dropdown').forEach(d => d.remove());
const noteId = btn.dataset.noteId;
const idx = parseInt(btn.dataset.idx);
const sid = btn.dataset.sessionId || '';
const menu = document.createElement('div');
menu.className = 'note-corner-menu-dropdown note-agent-item-menu';
menu.innerHTML = `
${sid ? `<button type="button" class="ncm-item" data-act="open">
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M15 3h6v6"/><path d="M10 14L21 3"/><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"/></svg>
<span>Open</span>
</button>` : ''}
<button type="button" class="ncm-item" data-act="run">
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 8V4H8"/><rect x="4" y="8" width="16" height="12" rx="2"/><path d="M2 14h2M20 14h2M15 13v2M9 13v2"/></svg>
<span>${sid ? 'Run again' : 'Run Agent'}</span>
</button>`;
_positionNoteMenu(menu, btn);
const openBtn = menu.querySelector('[data-act="open"]');
if (openBtn) {
openBtn.addEventListener('click', () => {
menu.remove();
const _sm = window.sessionModule;
if (sid && _sm && _sm.selectSession) { closePanel(); _sm.selectSession(sid); }
});
}
menu.querySelector('[data-act="run"]').addEventListener('click', () => {
menu.remove();
_agentSolveTodoItem(noteId, idx);
});
}
// Build the prompt the agent gets from a note: title + body, plus any
// not-yet-done checklist items.
function _noteToAgentPrompt(note) {
@@ -4353,7 +4407,7 @@ function _noteToAgentPrompt(note) {
.forEach(it => parts.push('- ' + it.text.trim()));
}
const body = parts.join('\n');
return body ? `Help me get this done:\n\n${body}` : '';
return body ? `Help me get this done:\n\n${body}\n\nThe source note is read-only. Do not edit, replace, or update it.` : '';
}
// Agent-solve: create a chat session server-side, kick off an agent run
@@ -4395,6 +4449,7 @@ async function _agentSolveNote(id) {
fd.append('message', prompt);
fd.append('session', sid);
fd.append('mode', 'agent');
fd.append('disabled_tools', JSON.stringify(['manage_notes']));
fetch(`${API_BASE}/api/chat_stream`, { method: 'POST', credentials: 'same-origin', body: fd })
.then(async (res) => {
if (!res.ok || !res.body) return;
@@ -4413,6 +4468,86 @@ async function _agentSolveNote(id) {
}
}
// Per-item version of _agentSolveNote. Scoped to a single checklist item;
// the note title (if any) is included as context, but only this one item's
// text is the work the agent is asked to do. agent_session_id is set on the
// PARENT note (latest-wins) so the Agent tag still surfaces the most recent
// run from this note — same UX as a per-note solve.
async function _agentSolveTodoItem(noteId, idx) {
const note = _notes.find(n => n.id === noteId);
if (!note || !Array.isArray(note.items)) return;
const item = note.items[idx];
const itemText = (item && (item.text || '').trim()) || '';
if (!itemText) {
uiModule.showToast('Nothing to solve — item is empty');
return;
}
const titleCtx = (note.title || '').trim();
const prompt = titleCtx
? `Context (from note "${titleCtx}").\n\nHelp me with this todo: ${itemText}\n\nThe source note is read-only. Do not edit, replace, or update it.`
: `Help me with this todo: ${itemText}\n\nThe source note is read-only. Do not edit, replace, or update it.`;
try {
const dc = await (await fetch(`${API_BASE}/api/default-chat`, { credentials: 'same-origin' })).json();
if (!dc.endpoint_url || !dc.model) { uiModule.showError('No default chat model configured'); return; }
const label = itemText.slice(0, 40);
const csFd = new FormData();
csFd.append('name', 'Agent: ' + label);
csFd.append('endpoint_url', dc.endpoint_url);
csFd.append('model', dc.model);
if (dc.endpoint_id) csFd.append('endpoint_id', dc.endpoint_id);
csFd.append('skip_validation', 'true');
const csRes = await fetch(`${API_BASE}/api/session`, { method: 'POST', credentials: 'same-origin', body: csFd });
if (!csRes.ok) { uiModule.showError('Could not create agent session'); return; }
const sess = await csRes.json();
const sid = sess.id;
const sessionTitle = 'Agent: ' + label;
const n = _notes.find(x => x.id === noteId);
if (n) {
n.agent_session_id = sid;
if (Array.isArray(n.items) && n.items[idx]) {
n.items[idx].agent_session_id = sid;
n.items[idx].agent_session_title = sessionTitle;
n.items[idx].agent_status = 'running';
n.items[idx].agent_stream_completed_at = '';
}
}
_renderNotes();
_patchNote(noteId, { items: n && Array.isArray(n.items) ? n.items : note.items, agent_session_id: sid }).catch(() => {});
const fd = new FormData();
fd.append('message', prompt);
fd.append('session', sid);
fd.append('mode', 'agent');
fd.append('disabled_tools', JSON.stringify(['manage_notes']));
fetch(`${API_BASE}/api/chat_stream`, { method: 'POST', credentials: 'same-origin', body: fd })
.then(async (res) => {
if (!res.ok || !res.body) return;
const reader = res.body.getReader();
while (true) { const { done } = await reader.read(); if (done) break; }
if (window.sessionModule && window.sessionModule.markStreamComplete) {
try { window.sessionModule.markStreamComplete(sid); } catch {}
}
const doneNote = _notes.find(x => x.id === noteId);
if (doneNote && Array.isArray(doneNote.items) && doneNote.items[idx]) {
doneNote.agent_session_id = sid;
doneNote.items[idx].agent_session_id = sid;
doneNote.items[idx].agent_session_title = sessionTitle;
doneNote.items[idx].agent_status = 'stream_complete';
doneNote.items[idx].agent_stream_completed_at = new Date().toISOString();
_renderNotes();
_patchNote(noteId, { items: doneNote.items, agent_session_id: sid }).catch(() => {});
}
})
.catch(() => {});
uiModule.showToast('Agent working on this item — tap the Agent tag when ready');
} catch (e) {
uiModule.showError('Agent failed: ' + (e.message || e));
}
}
async function _copyNote(noteId, btnEl) {
const note = _notes.find(n => n.id === noteId);
if (!note) return false;
+19 -32
View File
@@ -366,20 +366,13 @@ function _buildPanelHTML() {
<div class="modal-body research-pane-body" data-no-swipe-dismiss>
<div class="research-new-job">
<div style="display:flex;align-items:center;gap:8px;margin-bottom:2px;">
<h2 style="margin:0;padding:0;line-height:1;display:inline-flex;align-items:center;gap:6px;"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="var(--accent, var(--red))" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="flex-shrink:0;"><path d="M6 18h8"/><path d="M3 22h18"/><path d="M14 22a7 7 0 1 0 0-14h-1"/><path d="M9 14h2"/><path d="M9 12a2 2 0 0 1-2-2V6h4v4a2 2 0 0 1-2 2Z"/><path d="M12 6V3a1 1 0 0 0-1-1H9a1 1 0 0 0-1 1v3"/></svg>Research <span id="research-stats" class="memory-count" style="font-size:0.6em;opacity:0.6;font-weight:normal;position:relative;top:4px;"></span></h2>
<h2 style="margin:0;padding:0;line-height:1;display:inline-flex;align-items:center;gap:6px;"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="var(--accent, var(--red))" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="flex-shrink:0;"><path d="M6 18h8"/><path d="M3 22h18"/><path d="M14 22a7 7 0 1 0 0-14h-1"/><path d="M9 14h2"/><path d="M9 12a2 2 0 0 1-2-2V6h4v4a2 2 0 0 1-2 2Z"/><path d="M12 6V3a1 1 0 0 0-1-1H9a1 1 0 0 0-1 1v3"/></svg>Research <span id="research-stats" class="memory-count" style="font-size:0.6em;opacity:0.6;font-weight:normal"></span></h2>
</div>
<p class="memory-desc doclib-desc" style="margin-top:2px;display:flex;align-items:center;gap:6px;flex-wrap:wrap;">
<span>Multi-step web research with an LLM-in-the-loop agent</span>
<span id="research-no-past-hint" style="display:none;font-size:11px;opacity:0.7;position:relative;top:-4px;"> past runs in <button type="button" class="research-library-link" style="background:none;border:none;padding:0;font:inherit;color:var(--accent, var(--red));cursor:pointer;text-decoration:underline;">Library, Research</button></span>
<span id="research-no-past-hint" style="display:none;font:inherit;opacity:1;position:static;"> past runs in <button type="button" class="research-library-link" style="background:none;border:none;padding:0;font:inherit;color:var(--accent, var(--red));cursor:pointer;text-decoration:underline;">Library, Research</button></span>
</p>
<textarea id="research-query" class="research-query" placeholder="${_pickResearchHint()}" rows="4"></textarea>
<div class="research-category-row" id="research-category-row">
<button class="research-cat active" data-cat="" title="LLM auto-detects the best format">Auto</button>
<button class="research-cat" data-cat="product">Product</button>
<button class="research-cat" data-cat="comparison">Compare</button>
<button class="research-cat" data-cat="howto">How-to</button>
<button class="research-cat" data-cat="factcheck">Fact-check</button>
</div>
<button id="research-settings-toggle" class="research-settings-toggle${chevronCls}">
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-2px;margin-right:4px;opacity:0.85;flex-shrink:0;"><circle cx="12" cy="12" r="3"/><path d="M19.4 15a1.65 1.65 0 0 0 .33 1.82l.06.06a2 2 0 0 1 0 2.83 2 2 0 0 1-2.83 0l-.06-.06a1.65 1.65 0 0 0-1.82-.33 1.65 1.65 0 0 0-1 1.51V21a2 2 0 0 1-2 2 2 2 0 0 1-2-2v-.09A1.65 1.65 0 0 0 9 19.4a1.65 1.65 0 0 0-1.82.33l-.06.06a2 2 0 0 1-2.83 0 2 2 0 0 1 0-2.83l.06-.06a1.65 1.65 0 0 0 .33-1.82 1.65 1.65 0 0 0-1.51-1H3a2 2 0 0 1-2-2 2 2 0 0 1 2-2h.09A1.65 1.65 0 0 0 4.6 9a1.65 1.65 0 0 0-.33-1.82l-.06-.06a2 2 0 0 1 0-2.83 2 2 0 0 1 2.83 0l.06.06a1.65 1.65 0 0 0 1.82.33H9a1.65 1.65 0 0 0 1-1.51V3a2 2 0 0 1 2-2 2 2 0 0 1 2 2v.09a1.65 1.65 0 0 0 1 1.51 1.65 1.65 0 0 0 1.82-.33l.06-.06a2 2 0 0 1 2.83 0 2 2 0 0 1 0 2.83l-.06.06a1.65 1.65 0 0 0-.33 1.82V9a1.65 1.65 0 0 0 1.51 1H21a2 2 0 0 1 2 2 2 2 0 0 1-2 2h-.09a1.65 1.65 0 0 0-1.51 1z"/></svg>Settings<span class="research-settings-chevron">${_chevronIcon}</span>
</button>
@@ -787,6 +780,21 @@ function _renderJobs() {
+ '<span class="research-section-dot' + (dotPulse ? ' pulsing' : '') + '" style="background:' + dotColor + ';"></span>'
+ '<svg class="research-section-chevron" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round"><polyline points="6 9 12 15 18 9"/></svg>'
+ '</span>';
if (key === 'past') {
const hint = document.createElement('span');
hint.className = 'research-library-hint';
hint.innerHTML = '<span>Multi-step web research with an LLM-in-the-loop agent</span> <button type="button" class="research-library-link">Library, Research</button>';
hint.querySelector('.research-library-link').addEventListener('click', (e) => {
e.stopPropagation();
// Close the research panel first so the Library opens ABOVE it on mobile
// (otherwise it stacks under the full-screen panel).
closePanel();
if (window.documentModule && window.documentModule.openLibrary) {
window.documentModule.openLibrary({ tab: 'research' });
}
});
header.appendChild(hint);
}
header.addEventListener('click', () => {
const nowCollapsed = sec.classList.toggle('collapsed');
if (nowCollapsed) _collapsedSections.add(key); else _collapsedSections.delete(key);
@@ -803,27 +811,6 @@ function _renderJobs() {
});
const body = document.createElement('div');
body.className = 'research-section-body';
// Past Research header: link goes INLINE next to the title instead
// of on a second row. Append it to the title span as a small chip.
if (key === 'past') {
const titleEl = header.querySelector('.research-section-title');
if (titleEl) {
const hint = document.createElement('span');
hint.className = 'research-library-hint research-library-hint-inline';
hint.style.cssText = 'margin-left:8px;font-size:10.5px;opacity:0.65;font-weight:normal;';
hint.innerHTML = '— all in <button type="button" class="research-library-link" style="background:none;border:none;padding:0;font:inherit;color:var(--accent, var(--red));cursor:pointer;text-decoration:underline;">Library, Research</button>';
hint.querySelector('.research-library-link').addEventListener('click', (e) => {
e.stopPropagation();
// Close the research panel first so the Library opens ABOVE it on mobile
// (otherwise it stacks under the full-screen panel).
closePanel();
if (window.documentModule && window.documentModule.openLibrary) {
window.documentModule.openLibrary({ tab: 'research' });
}
});
titleEl.appendChild(hint);
}
}
arr.forEach(j => body.appendChild(_buildJobCard(j)));
sec.appendChild(header);
sec.appendChild(body);
@@ -1014,9 +1001,9 @@ function _buildJobCard(job) {
</div>
${failNote}
<div class="research-job-actions">
<button class="research-job-action" data-action="copy" title="Copy report to clipboard">${_copyIcon}</button>
<button class="research-job-action" data-action="chat" title="Open follow-up chat with this research as context">${_chatIcon} Discuss</button>
<button class="research-job-action research-job-action-report" data-action="report" title="Visual report">${_externalIcon} Visual Report</button>
<button class="research-job-action" data-action="chat" title="Open follow-up chat with this research as context">${_chatIcon} Discuss</button>
<button class="research-job-action research-job-action-dim" data-action="copy" title="Copy report to clipboard">${_copyIcon}</button>
<button class="research-job-action research-job-action-dim" data-action="dismiss" title="Clear from list">${_cancelIcon}</button>
<button class="research-job-action research-job-action-dim" data-action="delete" title="Delete from disk">${_trashIcon} Delete</button>
</div>
+1 -10
View File
@@ -103,7 +103,6 @@ export function initSidebarLayout(Storage, opts) {
});
// Hamburger cycles: full sidebar → mini → off → full
// Shift-click swaps sidebar side
let _userToggledSidebar = false;
let _wasAutoCollapsed = false;
@@ -122,8 +121,7 @@ export function initSidebarLayout(Storage, opts) {
if (window.innerWidth < 768 && cc && cc.classList.contains('compare-active')) return;
_userToggledSidebar = true;
// Optionally place the sidebar on a specific edge (the swipe gesture passes
// the direction). Persist it + re-anchor the doc panel, same as a
// shift-click on the hamburger.
// the direction). Persist it + re-anchor the doc panel.
if (side === 'left' || side === 'right') {
const wantRight = side === 'right';
if (sidebar.classList.contains('right-side') !== wantRight) {
@@ -143,13 +141,6 @@ export function initSidebarLayout(Storage, opts) {
hamburgerBtn.addEventListener('click', (e) => {
e.stopPropagation();
const sidebar = document.getElementById('sidebar');
if (e.shiftKey) {
sidebar.classList.toggle('right-side');
Storage.set(Storage.KEYS.SIDEBAR_SIDE, sidebar.classList.contains('right-side') ? 'right' : 'left');
syncRailSide();
if (documentModule && documentModule.swapSide) documentModule.swapSide();
return;
}
_userToggledSidebar = true;
const isSidebarVisible = !sidebar.classList.contains('hidden');
+22 -3
View File
@@ -17,9 +17,16 @@ let _tasksFetched = false; // first-fetch sentinel — `false` → show loadin
let _escHandler = null;
let _viewingRuns = null; // task id when viewing run history
let _clockInterval = null;
let _taskFailurePending = false;
const DAYS_OF_WEEK = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'];
function _setTaskFailurePending(active) {
_taskFailurePending = !!active;
document.getElementById('tool-tasks-btn')?.classList.toggle('task-failure-pending', _taskFailurePending);
document.getElementById('rail-tasks')?.classList.toggle('task-failure-pending', _taskFailurePending);
}
// ---- API ----
async function _fetchTasks() {
@@ -2238,6 +2245,9 @@ function _renderActivityEntry(entry) {
status = _classifyResult(entry.result);
}
const statusDot = `<span class="task-log-status task-log-status-${status}" title="${status}"></span>`;
const failedTag = status === 'error'
? '<span class="task-log-failed-tag">(failed)</span>'
: '';
// Render the result through markdown so code blocks, lists, links look right.
let resultHtml;
const _isRunning = entry.status === 'running' || entry.status === 'queued';
@@ -2361,7 +2371,7 @@ function _renderActivityEntry(entry) {
<div class="task-log-row-head">
${statusDot}
<span class="task-log-task-icon">${_taskIcon({ action: entry.action, task_type: entry.kind })}</span>
<span class="task-log-name">${_escHtml(entry.taskName)}</span>${_taskAiMark(entry)}
<span class="task-log-name">${_escHtml(entry.taskName)}</span>${failedTag}${_taskAiMark(entry)}
${repeatBadge}
<span style="flex:1"></span>
${rightHtml}
@@ -2502,8 +2512,11 @@ function _renderMainView() {
export function openTasks(focusId, opts) {
const o = opts || {};
const openActivityForFailure = _taskFailurePending && !focusId && o.filter === undefined;
_setTaskFailurePending(false);
if (_open) {
// Already open — just focus the requested task / apply filter.
if (openActivityForFailure) _switchTab('activity');
if (o.filter !== undefined) { _taskFilter = o.filter; _renderList(); }
if (focusId) _focusTask(focusId);
return;
@@ -2610,7 +2623,7 @@ export function openTasks(focusId, opts) {
// of an empty modal-body that fills in after the fetch resolves — that delay
// was visible as a "flicker" right after opening.
_activeTab = 'tasks';
_switchTab('tasks');
_switchTab(openActivityForFailure ? 'activity' : 'tasks');
_fetchTasks().then(() => {
// Re-render so the list swaps the Loading row for real cards.
_renderList();
@@ -2704,7 +2717,13 @@ async function _pollTaskNotifications() {
const msg = `Task ${ok ? 'finished' : 'failed'}: ${n.task_name}`;
if (!uiModule) continue;
if (ok) uiModule.showToast(msg, { duration: 5000 });
else uiModule.showError(msg);
else {
_setTaskFailurePending(true);
uiModule.showError(msg);
if (_open && document.querySelector('.tasks-tab.active[data-tab="activity"]')) {
_renderActivityView();
}
}
}
} catch (e) {
// Silently ignore — server may be unreachable
+3 -1
View File
@@ -26,7 +26,7 @@ export const THEMES = {
gpt: { bg:'#212121', fg:'#ececec', panel:'#171717', border:'#424242', red:'#949494',
advanced: { sendBtnBg: '#949494', sendBtnHover: '#7f7f7f',
userBubbleBg: '#2f2f2f', aiBubbleBg: '#171717',
inputBg: '#2f2f2f' } },
inputBg: '#2f2f2f', brandColor: '#ffffff', brandMixTo: '#ffffff' } },
claude: { bg:'#262624', fg:'#f5f4f0', panel:'#30302e', border:'#4a4a47', red:'#c6613f' },
cute: { bg:'#fff0f5', fg:'#d4608a', panel:'#fff8fa', border:'#f0c0d0', red:'#ff6b9d' },
};
@@ -184,6 +184,7 @@ const ADV_KEYS = [
{ key: 'bubbleBorder', css: '--bubble-border', label: 'Border Chat Bubble', group: 'Chat Bubbles' },
{ key: 'sidebarBg', css: '--sidebar-bg', label: 'Sidebar Bg', group: 'Sidebar' },
{ key: 'brandColor', css: '--brand-color', label: 'Odysseus Logo', group: 'Sidebar' },
{ key: 'brandMixTo', css: '--brand-mix-to', label: 'Logo Gradient End', group: 'Sidebar' },
{ key: 'hamburgerColor', css: '--hamburger-color', label: 'Hamburger Menu', group: 'Sidebar' },
{ key: 'inputBg', css: '--input-bg', label: 'Input Bg', group: 'Chat Input / Prompt Area' },
{ key: 'inputBorder', css: '--input-border', label: 'Input Border', group: 'Chat Input / Prompt Area' },
@@ -203,6 +204,7 @@ function computeAdvancedDefaults(colors) {
bubbleBorder: colors.border,
sidebarBg: colors.panel,
brandColor: red,
brandMixTo: colors.fg,
hamburgerColor: colors.fg,
inputBg: colors.panel,
inputBorder: colors.border,
+664 -62
View File
File diff suppressed because it is too large Load Diff
+2 -1
View File
@@ -43,7 +43,8 @@ def test_background_session_sort_uses_owner_task_endpoint():
def test_scheduler_fallbacks_and_research_headers_are_owner_scoped():
src = _src("src/task_scheduler.py")
assert "resolve_utility_fallback_candidates(owner=task.owner or None)" in src
assert "resolve_task_candidates(" in src
assert "owner=task.owner or None" in src
assert 'resolve_endpoint(\n "research",' in src
assert "owner=task.owner or None" in src
assert "headers_from_resolver = False" in src
+21 -26
View File
@@ -51,23 +51,19 @@ class _Db:
self.closed = True
def _resolver_spy(monkeypatch, utility_result=("", "", {}), default_result=("http://llm", "model", {})):
from src import endpoint_resolver
def _resolver_spy(monkeypatch, candidates=None):
from src import task_endpoint
calls = []
fallback_calls = []
def fake_resolve(kind, *args, **kwargs):
calls.append((kind, kwargs.get("owner")))
return utility_result if kind == "utility" else default_result
def fake_candidates(*args, **kwargs):
calls.append(kwargs.get("owner"))
if candidates is None:
return [("http://llm", "model", {})]
return list(candidates)
def fake_fallbacks(*args, **kwargs):
fallback_calls.append(kwargs.get("owner"))
return []
monkeypatch.setattr(endpoint_resolver, "resolve_endpoint", fake_resolve)
monkeypatch.setattr(endpoint_resolver, "resolve_utility_fallback_candidates", fake_fallbacks)
return calls, fallback_calls
monkeypatch.setattr(task_endpoint, "resolve_task_candidates", fake_candidates)
return calls
@pytest.mark.asyncio
@@ -88,7 +84,7 @@ async def test_classify_events_resolves_llm_for_task_owner(monkeypatch):
location="",
)
db = _Db({FakeCalendarEvent: [event]})
calls, _fallback_calls = _resolver_spy(monkeypatch, utility_result=("http://llm", "model", {}))
calls = _resolver_spy(monkeypatch)
monkeypatch.setattr(database, "CalendarEvent", FakeCalendarEvent)
monkeypatch.setattr(database, "SessionLocal", lambda: db)
@@ -97,7 +93,7 @@ async def test_classify_events_resolves_llm_for_task_owner(monkeypatch):
assert ok is True
assert "Scanned 1 upcoming event" in message
assert calls == [("utility", "alice")]
assert calls == ["alice"]
assert db.closed is True
@@ -122,7 +118,7 @@ async def test_learn_sender_signatures_resolves_llm_for_task_owner(monkeypatch):
def logout(self):
return None
calls, _fallback_calls = _resolver_spy(monkeypatch, utility_result=("", "", {}), default_result=("", "", {}))
calls = _resolver_spy(monkeypatch, candidates=[])
imap_owners = []
def fake_imap_connect(_account_id=None, owner=""):
@@ -135,14 +131,14 @@ async def test_learn_sender_signatures_resolves_llm_for_task_owner(monkeypatch):
assert ok is False
assert message == "No LLM endpoint available"
assert calls == [("utility", "alice"), ("default", "alice")]
assert calls == ["alice"]
assert imap_owners == ["alice"]
@pytest.mark.asyncio
async def test_learn_sender_signatures_writes_owner_scoped_cache(monkeypatch, tmp_path):
from routes import email_helpers
from src import endpoint_resolver, llm_core
from src import llm_core, task_endpoint
from src.builtin_actions import action_learn_sender_signatures
db_path = tmp_path / "scheduled_emails.db"
@@ -205,15 +201,15 @@ async def test_learn_sender_signatures_writes_owner_scoped_cache(monkeypatch, tm
monkeypatch.setattr(email_helpers, "_imap_connect", fake_imap_connect)
monkeypatch.setattr(
endpoint_resolver,
"resolve_endpoint",
lambda kind, *args, **kwargs: ("http://llm", "alice-model", {}),
task_endpoint,
"resolve_task_candidates",
lambda *args, **kwargs: [("http://llm", "alice-model", {})],
)
async def fake_llm_call_async(**_kwargs):
async def fake_llm_call_async(_candidates, **_kwargs):
return "Writer Example\nExample Co.\nwriter@example.com"
monkeypatch.setattr(llm_core, "llm_call_async", fake_llm_call_async)
monkeypatch.setattr(llm_core, "llm_call_async_with_fallback", fake_llm_call_async)
message, ok = await action_learn_sender_signatures("alice")
@@ -253,7 +249,7 @@ async def test_check_email_urgency_resolves_llm_candidates_for_task_owner(monkey
from_address = _Column()
db = _Db({FakeEmailAccount: []})
calls, fallback_calls = _resolver_spy(monkeypatch, utility_result=("http://llm", "model", {}))
calls = _resolver_spy(monkeypatch)
monkeypatch.chdir(tmp_path)
monkeypatch.setattr(database, "EmailAccount", FakeEmailAccount)
@@ -262,6 +258,5 @@ async def test_check_email_urgency_resolves_llm_candidates_for_task_owner(monkey
with pytest.raises(TaskNoop, match="no email accounts configured"):
await action_check_email_urgency("alice")
assert calls == [("utility", "alice")]
assert fallback_calls == ["alice"]
assert calls == ["alice"]
assert db.closed is True
+8 -4
View File
@@ -29,8 +29,8 @@ def _read_memories(data_dir):
@pytest.mark.asyncio
async def test_consolidate_memory_empty_owner_treats_each_owner_separately(monkeypatch, tmp_path):
from src import constants
from src import endpoint_resolver
from src import llm_core
from src import task_endpoint
action_consolidate_memory = _import_consolidate_action()
long_alice_text = "Alice private project context. " + ("A" * 2200)
@@ -44,11 +44,15 @@ async def test_consolidate_memory_empty_owner_treats_each_owner_separately(monke
],
)
monkeypatch.setattr(constants, "DATA_DIR", str(data_dir))
monkeypatch.setattr(endpoint_resolver, "resolve_endpoint", lambda *args, **kwargs: ("http://llm", "model", {}))
monkeypatch.setattr(
task_endpoint,
"resolve_task_candidates",
lambda *args, **kwargs: [("http://llm", "model", {})],
)
prompts = []
async def fake_llm_call_async(**kwargs):
async def fake_llm_call_async(_candidates, **kwargs):
prompt = kwargs["messages"][0]["content"]
prompts.append(prompt)
if "alice-long" in prompt:
@@ -71,7 +75,7 @@ async def test_consolidate_memory_empty_owner_treats_each_owner_separately(monke
}
)
monkeypatch.setattr(llm_core, "llm_call_async", fake_llm_call_async)
monkeypatch.setattr(llm_core, "llm_call_async_with_fallback", fake_llm_call_async)
message, ok = await action_consolidate_memory("")
@@ -29,24 +29,24 @@ class _FakeMM:
def test_omitted_memory_survives_only_explicit_drop(monkeypatch):
import src.memory
import src.endpoint_resolver
import src.llm_core
import src.task_endpoint
_FakeMM.saved = None
monkeypatch.setattr(src.memory, "MemoryManager", _FakeMM)
monkeypatch.setattr(
src.endpoint_resolver, "resolve_endpoint",
lambda kind, owner=None: ("http://x/v1", "model", {}),
src.task_endpoint, "resolve_task_candidates",
lambda owner=None: [("http://x/v1", "model", {})],
)
async def fake_llm(**kwargs):
async def fake_llm(_candidates, **kwargs):
# Model keeps 'a', drops 'b', and OMITS 'c' entirely.
return json.dumps({
"keep": [{"id": "a", "text": "Likes dark roast coffee", "category": "preference"}],
"drop": [{"id": "b", "reason": "duplicate of a"}],
})
monkeypatch.setattr(src.llm_core, "llm_call_async", fake_llm)
monkeypatch.setattr(src.llm_core, "llm_call_async_with_fallback", fake_llm)
msg, ok = asyncio.run(ba.action_consolidate_memory("alice"))
+30
View File
@@ -16,6 +16,7 @@ from pathlib import Path
SRC = Path(__file__).resolve().parent.parent / "static/js/cookbook.js"
SERVE_SRC = Path(__file__).resolve().parent.parent / "static/js/cookbookServe.js"
ROUTES_SRC = Path(__file__).resolve().parent.parent / "routes/cookbook_routes.py"
def test_cpu_only_drops_gpu_only_flags():
@@ -54,3 +55,32 @@ def test_windows_diffusers_uses_python_not_python3():
assert "const diffusersPy = _isWindows() ? 'python' : _py3Bin;" in text
assert "cmd += `${diffusersPy} scripts/diffusion_server.py" in text
assert "cmd += `python3 scripts/diffusion_server.py" not in text
def test_vllm_blank_swap_omits_swap_space_flag():
text = SRC.read_text(encoding="utf-8")
assert "const _swapRaw = (f.swap ?? '').toString().trim().toLowerCase();" in text
assert "['0', 'off', 'none', 'false'].includes(_swapRaw)" in text
assert "if (_swapRaw && !['0', 'off', 'none', 'false'].includes(_swapRaw)) cmd += ` --swap-space ${_swapRaw}`;" in text
def test_serve_preflight_uses_selected_server_not_stale_env_host():
text = SERVE_SRC.read_text(encoding="utf-8")
assert "function _selectedServeTarget(panel) {" in text
assert "const _hostStr = launchTarget.host || '';" in text
assert "(t.remoteHost || '') === _hostStr" in text
assert "const _probeHost = (launchTarget.host || '').trim();" in text
assert "const _portHost = (launchTarget.host || '').trim();" in text
def test_vllm_route_strips_swap_space_when_runtime_rejects_it():
text = ROUTES_SRC.read_text(encoding="utf-8")
assert "Setting vLLM --swap-space 0 so the runtime does not reserve CPU swap per GPU." in text
assert "vLLM serve does not expose --swap-space; removing the flag and patching the runtime default to 0." in text
assert "ODYSSEUS_VLLM_HELP_CMD" in text
assert "print(shlex.join(parts[:serve_i + 1] + [\"--help\"]))" in text
assert "eval \"$ODYSSEUS_VLLM_HELP_CMD\" 2>&1 | grep -q -- \"--swap-space\"" in text
assert "eval \"$ODYSSEUS_SERVE_CMD\"" in text
+5 -5
View File
@@ -348,7 +348,7 @@ def test_serve_pip_install_normalizes_llama_cpp_alias_and_adds_wheel_index():
src = (pathlib.Path(__file__).resolve().parent.parent
/ "routes" / "cookbook_routes.py").read_text(encoding="utf-8")
assert "re.sub(r\"(?<![A-Za-z0-9_.-])llama_cpp(?![A-Za-z0-9_.-])\", \"llama-cpp-python[server]\", req.cmd)" in src
assert "re.sub(r\"(?<![A-Za-z0-9_.\\-/])llama_cpp(?![A-Za-z0-9_.\\-/])\", \"llama-cpp-python[server]\", req.cmd)" in src
assert "if \"llama-cpp-python\" in req.cmd and \"--extra-index-url\" not in req.cmd:" in src
assert "https://abetlen.github.io/llama-cpp-python/whl/cpu" in src
@@ -626,7 +626,7 @@ def test_llama_cpp_linux_bootstrap_prefers_rocm_before_cuda():
script = "\n".join(runner_lines)
assert "mkdir -p ~/bin" in script
assert script.index("mkdir -p ~/bin") < script.index("cd ~/llama.cpp && rm -rf build")
assert script.index("mkdir -p ~/bin") < script.index("cd ~/llama.cpp")
assert 'command -v hipconfig &>/dev/null || [ -d /opt/rocm ] || [ -n "$ROCM_PATH" ] || [ -n "$HIP_PATH" ]' in script
assert 'cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_HIP=ON' in script
assert 'cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON' in script
@@ -676,7 +676,7 @@ def test_llama_cpp_linux_bootstrap_nvcc_without_cudart_warns_and_falls_back():
# outer else that handles no-GPU-toolchain). Verify it appears at least once
# before the outer "no HIP/CUDA toolchain" warning.
cpu_cmake = 'cmake -B build -DCMAKE_BUILD_TYPE=Release &&'
no_toolchain_warn = 'WARNING: no HIP/CUDA toolchain found'
no_toolchain_warn = 'WARNING: no HIP/CUDA/Vulkan toolchain found'
assert cpu_cmake in script
assert script.index(cpu_cmake) < script.index(no_toolchain_warn)
@@ -693,8 +693,8 @@ def test_llama_cpp_linux_bootstrap_keeps_cpu_fallback_when_no_gpu_toolchain():
_append_llama_cpp_linux_accel_build_lines(runner_lines)
script = "\n".join(runner_lines)
assert 'WARNING: no HIP/CUDA toolchain found — building llama-server for CPU only.' in script
assert 'Install ROCm for AMD GPUs or vLLM/CUDA tooling for NVIDIA' in script
assert 'WARNING: no HIP/CUDA/Vulkan toolchain found — building llama-server for CPU only.' in script
assert 'Install Vulkan (libvulkan-dev) / ROCm for AMD GPUs or CUDA tooling for NVIDIA' in script
def test_llama_cpp_rebuild_cmd_clears_cached_build_paths():
@@ -50,14 +50,14 @@ def test_serve_launch_preflights_use_selected_target_and_port():
assert "if (launchTarget.port) _probeParams.set('ssh_port', launchTarget.port);" in SERVE
assert "const _portHost = (launchTarget.host || '').trim();" in SERVE
assert "StrictHostKeyChecking=no ${_sshPrefix(launchTarget.port)}${_portHost}" in SERVE
assert "let serveHost = launchTarget.host || '';" in SERVE
assert "const serveHost = launchTarget.host || '';" in SERVE
assert SERVE.index(launch_target) < SERVE.index("const _runningMod = await import('./cookbookRunning.js');")
def test_running_tab_resolves_profile_key_not_first_host():
assert "_serverByVal(_envState.remoteServerKey || _tHost)" in RUNNING
assert "_serverByVal(_targetKey)" in RUNNING
assert "_serverByVal(_envState.remoteServerKey || _host)" in RUNNING
assert "_serverByVal(_envState.remoteServerKey || host)" in RUNNING
assert "_serverByVal(savedKey)" in RUNNING
assert "_serverByVal = shared._serverByVal;" in RUNNING
assert "_selectedServer = shared._selectedServer;" in RUNNING
@@ -126,6 +126,27 @@ def test_plain_reply_copy_text_is_unchanged(node_available):
assert out["content"] == raw
def test_minimax_namespaced_thinking_is_extracted(node_available):
raw = (
'<mm:think>The user said "idk" - just casual.</mm:think>'
"Haha fair. Well, I'm here whenever you figure it out."
)
out = _extract_thinking_blocks(raw)
assert out["thinkingBlocks"] == ['The user said "idk" - just casual.']
assert out["content"] == "Haha fair. Well, I'm here whenever you figure it out."
assert "mm:think" not in out["content"]
def test_minimax_orphan_closing_tag_drops_leaked_reasoning(node_available):
raw = "</mm:think>Hi! What can I do for you?"
out = _extract_thinking_blocks(raw)
assert out["thinkingBlocks"] == []
assert out["content"] == "Hi! What can I do for you?"
assert "mm:think" not in out["content"]
def test_thinking_only_message_yields_empty_content(node_available):
# The copy handler falls back to the raw text in this case so the button
# still copies something for turns interrupted mid-thinking.
+2 -2
View File
@@ -59,8 +59,8 @@ def test_docker_entrypoint_does_not_resolve_root_commands_from_app_local_path():
path_export = script.index('export PATH="/app/.local/bin:$PATH"')
gosu_capture = script.index('GOSU_BIN="$(command -v gosu)"')
python_capture = script.index('PYTHON_BIN="$(command -v python)"')
setup_call = script.index('"$GOSU_BIN" "$PUID:$PGID" "$PYTHON_BIN" /app/setup.py')
final_exec = script.index('exec "$GOSU_BIN" "$PUID:$PGID" "$@"')
setup_call = script.index('"$GOSU_BIN" "$ODY_USER" "$PYTHON_BIN" /app/setup.py')
final_exec = script.index('exec "$GOSU_BIN" "$ODY_USER" "$@"')
assert gosu_capture < path_export < setup_call
assert python_capture < path_export < setup_call
@@ -221,6 +221,60 @@ def test_skip_fenced_still_recovers_xml_invoke_markup():
assert "latest python release" in blocks[0].content
def test_stepfun_native_tool_tokens_are_executed_even_when_fenced_fallback_is_skipped():
leaked = (
"<tool▁calls▁begin>"
"<tool▁call▁begin>web_search<tool▁sep>"
'{"query":"Sweden news today"}'
"<tool▁call▁end>"
"<tool▁calls▁end>"
)
blocks = parse_tool_blocks(leaked, skip_fenced=True)
assert len(blocks) == 1
assert blocks[0].tool_type == "web_search"
assert "Sweden news today" in blocks[0].content
assert strip_tool_blocks(leaked, skip_fenced=True) == ""
def test_stepfun_native_tool_tokens_accept_plain_web_query():
leaked = (
"<tool▁call▁begin>web_search<tool▁sep>"
"Sweden news today"
"<tool▁call▁end>"
)
blocks = parse_tool_blocks(leaked, skip_fenced=True)
assert len(blocks) == 1
assert blocks[0].tool_type == "web_search"
assert "Sweden news today" in blocks[0].content
def test_skip_fenced_still_recovers_direct_xml_tool_markup():
leaked = (
"I'll search now.\n"
"<tool_call><web_search>News in Sweden today 2026-06-22</web_search></tool_call>"
)
blocks = parse_tool_blocks(leaked, skip_fenced=True)
assert len(blocks) == 1
assert blocks[0].tool_type == "web_search"
assert "News in Sweden today 2026-06-22" in blocks[0].content
assert strip_tool_blocks(leaked, skip_fenced=True) == "I'll search now."
def test_skip_fenced_recovers_direct_xml_tool_markup_with_unclosed_wrapper():
leaked = (
"I'll search now.\n"
"<tool_call>\n"
"<web_search>\n"
"Sweden news today 2026-06-22\n"
"</web_search>"
)
blocks = parse_tool_blocks(leaked, skip_fenced=True)
assert len(blocks) == 1
assert blocks[0].tool_type == "web_search"
assert "Sweden news today 2026-06-22" in blocks[0].content
assert strip_tool_blocks(leaked, skip_fenced=True) == "I'll search now."
def test_skip_fenced_still_recovers_dsml_markup():
dsml = (
"Let me search for that.\n"
+4 -5
View File
@@ -124,9 +124,9 @@ def test_nvidia_odysseus_adds_only_overlay(base):
{"driver": "nvidia", "count": "all", "capabilities": ["gpu"]}
]
# No AMD-only keys leaked in.
# Base Docker socket group is preserved; no AMD-only keys leaked in.
assert "devices" not in svc
assert "group_add" not in svc
assert svc["group_add"] == base_svc["group_add"]
def test_amd_odysseus_adds_only_overlay(base):
@@ -137,11 +137,10 @@ def test_amd_odysseus_adds_only_overlay(base):
# Environment is unchanged from base for AMD.
assert svc["environment"] == base_svc["environment"]
# devices and group_add are new and match the overlay exactly.
# devices are new; group_add preserves the base Docker group and appends AMD groups.
assert "devices" not in base_svc
assert "group_add" not in base_svc
assert svc["devices"] == ["/dev/kfd", "/dev/dri"]
assert svc["group_add"] == ["video", "${RENDER_GID:-render}"]
assert svc["group_add"] == base_svc["group_add"] + ["video", "${RENDER_GID:-render}"]
# No NVIDIA-only keys leaked in.
assert "deploy" not in svc
+1 -1
View File
@@ -18,7 +18,7 @@ def _compute_is_api_model(model: str, endpoint_url: str, endpoint_supports=None)
model_supports_tools = any(kw in model_lc for kw in (
"gpt-4", "gpt-5", "gpt-o", "claude", "gemini", "gemma",
"qwen3", "qwen2.5", "mixtral", "mistral", "llama-3.1", "llama-3.2",
"llama-3.3", "llama-4",
"llama-3.3", "llama-4", "llama3.1", "llama3.2", "llama3.3", "llama4",
"minimax", "kimi", "yi-", "phi-3", "phi-4", "command-r",
"glm-4", "internlm", "hermes",
"deepseek-v", "deepseek-chat",
+70
View File
@@ -19,7 +19,12 @@ from pathlib import Path
import pytest
from fastapi import APIRouter
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import NullPool
import core.database as cdb
from core.database import GalleryImage
from src.upload_handler import count_recent_uploads, UploadHandler
import routes.upload_routes as up
@@ -82,6 +87,10 @@ def _files(n):
return [types.SimpleNamespace(filename=f"f{i}.txt") for i in range(n)]
def _image_upload(name="photo.png", content=b"not really png but enough for route metadata"):
return types.SimpleNamespace(filename=name, file=io.BytesIO(content))
@pytest.fixture(autouse=True)
def _reset_router(monkeypatch):
# Module-level router accumulates routes across setup calls; reset it.
@@ -163,3 +172,64 @@ def test_six_file_batch_is_not_rate_limited(tmp_path):
assert meta and meta.get("id")
saved += 1
assert saved == 6
async def test_chat_image_upload_is_added_to_gallery(tmp_path, monkeypatch):
engine = create_engine(
f"sqlite:///{tmp_path / 'gallery.db'}",
connect_args={"check_same_thread": False},
poolclass=NullPool,
)
cdb.Base.metadata.create_all(engine)
TestingSession = sessionmaker(bind=engine, autoflush=False, autocommit=False)
gallery_dir = tmp_path / "generated_images"
monkeypatch.setattr(up, "SessionLocal", TestingSession)
monkeypatch.setattr(up, "GENERATED_IMAGES_DIR", str(gallery_dir))
h = UploadHandler(base_dir=str(tmp_path), upload_dir=str(tmp_path / "uploads"))
up.setup_upload_routes(h)
endpoint = _endpoint(up.router)
result = await endpoint(_request(user="alice"), [_image_upload()])
uploaded = result["files"][0]
assert uploaded["gallery_id"]
db = TestingSession()
try:
image = db.query(GalleryImage).filter(GalleryImage.id == uploaded["gallery_id"]).one()
assert image.owner == "alice"
assert image.model == "chat-upload"
assert image.prompt == "photo.png"
assert image.file_hash == uploaded["hash"]
assert (gallery_dir / image.filename).exists()
finally:
db.close()
async def test_non_image_chat_upload_is_not_added_to_gallery(tmp_path, monkeypatch):
engine = create_engine(
f"sqlite:///{tmp_path / 'gallery.db'}",
connect_args={"check_same_thread": False},
poolclass=NullPool,
)
cdb.Base.metadata.create_all(engine)
TestingSession = sessionmaker(bind=engine, autoflush=False, autocommit=False)
monkeypatch.setattr(up, "SessionLocal", TestingSession)
monkeypatch.setattr(up, "GENERATED_IMAGES_DIR", str(tmp_path / "generated_images"))
h = UploadHandler(base_dir=str(tmp_path), upload_dir=str(tmp_path / "uploads"))
up.setup_upload_routes(h)
endpoint = _endpoint(up.router)
result = await endpoint(_request(user="alice"), [types.SimpleNamespace(
filename="notes.txt",
file=io.BytesIO(b"plain text upload"),
)])
assert "gallery_id" not in result["files"][0]
db = TestingSession()
try:
assert db.query(GalleryImage).count() == 0
finally:
db.close()