Merge dev into fix/native-agent-loop-guard-signals

This commit is contained in:
Alexandre Teixeira
2026-06-26 13:00:59 +01:00
285 changed files with 20014 additions and 3616 deletions
+2
View File
@@ -160,6 +160,8 @@ def setup_api_token_routes() -> APIRouter:
payload = await request.json()
except Exception:
payload = {}
if not isinstance(payload, dict):
payload = {}
with get_db_session() as db:
token = db.query(ApiToken).filter(ApiToken.id == token_id).first()
if not token:
+3 -2
View File
@@ -16,6 +16,7 @@ from pydantic import BaseModel
from core.database import SessionLocal, CrewMember, ScheduledTask
from src.auth_helpers import get_current_user
from core.auth import RESERVED_USERNAMES
from src.task_scheduler import compute_next_run
@@ -89,11 +90,11 @@ def setup_assistant_routes(task_scheduler) -> APIRouter:
# check-in tasks seeded. Hitting any /assistant route under one of these
# used to seed a full CrewMember + Morning/Midday/Evening tasks under that
# owner, which then double-fired alongside the real user's check-ins.
_SYNTHETIC_OWNERS = frozenset({"internal-tool", "api", "demo", "system", ""})
# RESERVED_USERNAMES covers the same set; the `not owner` guard handles "".
async def _get_or_create(owner: str) -> CrewMember:
"""Return the per-owner assistant CrewMember, creating it on demand."""
if not owner or owner in _SYNTHETIC_OWNERS:
if not owner or owner in RESERVED_USERNAMES:
raise HTTPException(status_code=400, detail=f"Cannot seed assistant for {owner!r}")
db = SessionLocal()
try:
+45 -11
View File
@@ -12,8 +12,8 @@ import re
from pathlib import Path
from core.atomic_io import atomic_write_json, atomic_write_text
from core.auth import AuthManager, SetAdminResult
from src.constants import DEEP_RESEARCH_DIR, MEMORY_FILE, SKILLS_DIR
from core.auth import AuthManager, RESERVED_USERNAMES, SetAdminResult, TOKEN_TTL
from src.constants import DEEP_RESEARCH_DIR, MEMORY_FILE, PASSWORD_MIN_LENGTH, SKILLS_DIR
from src.rate_limiter import RateLimiter
from src.settings_scrub import scrub_settings
from src.settings import (
@@ -102,8 +102,12 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
raise HTTPException(429, "Too many requests — try again later")
if auth_manager.is_configured:
raise HTTPException(400, "Already configured")
if len(body.password) < 8:
raise HTTPException(400, "Password must be at least 8 characters")
if len(body.password) < PASSWORD_MIN_LENGTH:
raise HTTPException(400, f"Password must be at least {PASSWORD_MIN_LENGTH} characters")
if len(body.username.strip()) < 1:
raise HTTPException(400, "Username is required")
if body.username.lower() in RESERVED_USERNAMES:
raise HTTPException(403, "Username is reserved")
ok = await asyncio.to_thread(auth_manager.setup, body.username, body.password)
if not ok:
raise HTTPException(500, "Setup failed")
@@ -118,10 +122,12 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
raise HTTPException(400, "Run setup first")
if not auth_manager.signup_enabled:
raise HTTPException(403, "Registration is disabled. Ask an admin for an account.")
if len(body.password) < 8:
raise HTTPException(400, "Password must be at least 8 characters")
if len(body.password) < PASSWORD_MIN_LENGTH:
raise HTTPException(400, f"Password must be at least {PASSWORD_MIN_LENGTH} characters")
if len(body.username.strip()) < 1:
raise HTTPException(400, "Username is required")
if body.username.lower() in RESERVED_USERNAMES:
raise HTTPException(403, "Username is reserved")
ok = await asyncio.to_thread(auth_manager.create_user, body.username, body.password, is_admin=False)
if not ok:
raise HTTPException(409, "Username already taken")
@@ -144,6 +150,8 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
raise HTTPException(401, "Invalid 2FA code")
# All checks passed — create session (password already verified above)
token = await asyncio.to_thread(auth_manager.create_session_trusted, username)
if not token:
raise HTTPException(401, "Invalid credentials")
cookie_kwargs = dict(
key=SESSION_COOKIE,
value=token,
@@ -153,7 +161,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
path="/",
)
if body.remember:
cookie_kwargs["max_age"] = 60 * 60 * 24 * 7 # 7 days
cookie_kwargs["max_age"] = TOKEN_TTL
response.set_cookie(**cookie_kwargs)
return {"ok": True, "username": username}
@@ -182,13 +190,18 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
pass
return result
@router.get("/policy")
async def auth_policy():
"""Return public auth policy constants for the frontend."""
return auth_manager.policy()
@router.post("/change-password")
async def change_password(body: ChangePasswordRequest, request: Request):
user = _get_current_user(request)
if not user:
raise HTTPException(401, "Not authenticated")
if len(body.new_password) < 8:
raise HTTPException(400, "Password must be at least 8 characters")
if len(body.new_password) < PASSWORD_MIN_LENGTH:
raise HTTPException(400, f"Password must be at least {PASSWORD_MIN_LENGTH} characters")
current_token = request.cookies.get(SESSION_COOKIE)
ok = await asyncio.to_thread(auth_manager.change_password, user, body.current_password, body.new_password)
if not ok:
@@ -268,8 +281,12 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
user = _get_current_user(request)
if not user or not auth_manager.is_admin(user):
raise HTTPException(403, "Admin only")
if len(body.password) < 8:
raise HTTPException(400, "Password must be at least 8 characters")
if len(body.password) < PASSWORD_MIN_LENGTH:
raise HTTPException(400, f"Password must be at least {PASSWORD_MIN_LENGTH} characters")
if len(body.username.strip()) < 1:
raise HTTPException(400, "Username is required")
if body.username.lower() in RESERVED_USERNAMES:
raise HTTPException(403, "Username is reserved")
ok = auth_manager.create_user(body.username, body.password, body.is_admin)
if not ok:
raise HTTPException(409, "Username already taken")
@@ -432,6 +449,23 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
except Exception as e:
logger.warning("Failed to rename upload owner references %s -> %s: %s", old_username, new_username, e)
# direct personal RAG uploads live in per-owner directories and the
# vector metadata also carries the username used for owner-filtered
# search. Keep both in sync with the auth rename.
try:
from routes.personal_routes import rename_personal_upload_owner
personal_docs_manager = getattr(request.app.state, "personal_docs_manager", None)
if personal_docs_manager is not None:
rag_manager = getattr(personal_docs_manager, "rag_manager", None)
rename_personal_upload_owner(
old_username,
new_username,
personal_docs_manager=personal_docs_manager,
rag_manager=rag_manager,
)
except Exception as e:
logger.warning("Failed to rename personal RAG upload owner references %s -> %s: %s", old_username, new_username, e)
# skills: SKILL.md frontmatter carries owner: <username>; the usage
# sidecar (_usage.json) keys entries as owner::skill-name. Both must
# be updated or the renamed user's Skills panel goes empty.
+64 -26
View File
@@ -14,7 +14,7 @@ from core.database import Session as DBSession, ModelEndpoint
from src.llm_core import normalize_model_id
from src.endpoint_resolver import normalize_base
from src.context_compactor import maybe_compact, trim_for_context
from src.auth_helpers import get_current_user
from src.auth_helpers import effective_user
from src.prompt_security import untrusted_context_message
from routes.prefs_routes import _load_for_user as load_prefs_for_user
@@ -22,6 +22,47 @@ from fastapi import HTTPException
logger = logging.getLogger(__name__)
_CASUAL_OPENING_RE = re.compile(
r"^\s*(?:h+i+|hey+|hello+|yo+|sup+|what'?s up|wass?up|hiya|howdy|"
r"lol|lmao|haha+|hehe+|thanks?|thank you|ty|idk|dunno|meh|bruh|bro)\b(?P<tail>.*)$",
re.IGNORECASE,
)
_CASUAL_BLOCKLIST_RE = re.compile(
r"\b(?:cookbook|serve|serving|launch|start|vllm|sglang|llama\.?cpp|ollama|"
r"download|model|email|document|doc|note|calendar|task|search|web|research|"
r"file|folder|repo|git|settings?|endpoint|api|token|mcp)\b",
re.IGNORECASE,
)
def _is_casual_low_signal(text: str) -> bool:
"""Short greetings/slang should not pull memory, skills, RAG, or docs."""
s = str(text or "").strip()
m = _CASUAL_OPENING_RE.match(s)
if not m:
return False
tail = m.group("tail") or ""
if _CASUAL_BLOCKLIST_RE.search(tail):
return False
tail_words = re.findall(r"[A-Za-z0-9_'-]+", tail)
return len(tail_words) <= 2
# Strong references to in-flight fire-and-forget tasks scheduled from this
# module. asyncio only keeps weak references to tasks created via
# create_task, so without this the GC can collect a task mid-execution and
# the background work (extraction, auto-naming) silently never runs.
# Mirrors WebhookManager._spawn_tracked from src/webhook_manager.py.
_BG_TASKS: set[asyncio.Task] = set()
def _spawn_bg(coro) -> asyncio.Task:
"""Schedule a background task and hold a strong reference until it finishes."""
task = asyncio.create_task(coro)
_BG_TASKS.add(task)
task.add_done_callback(_BG_TASKS.discard)
return task
# ── Data containers ────────────────────────────────────────────────────── #
@@ -78,7 +119,7 @@ def _enforce_chat_privileges(request, sess) -> None:
which means unrestricted allowed_models / zero cap -> no-op for them.
"""
try:
user = get_current_user(request)
user = effective_user(request)
except Exception:
user = None
if not user:
@@ -159,17 +200,9 @@ async def auto_name_session(session_manager, sess):
return
owner = getattr(sess, "owner", None)
t_url, t_model, t_headers = resolve_task_endpoint(owner=owner)
if not t_model:
# If no task/utility model is configured at all, fall back to
# the session's own model so auto-naming still works even on
# minimal setups.
from src.endpoint_resolver import resolve_endpoint
_fallback = resolve_endpoint("default", owner=owner)
if _fallback and _fallback[1]:
t_url, t_model, t_headers = _fallback
else:
t_url, t_model, t_headers = sess.endpoint_url, sess.model, sess.headers
t_url, t_model, t_headers = resolve_task_endpoint(
sess.endpoint_url, sess.model, sess.headers, owner=owner
)
if not t_model:
logger.debug("[auto-name] No model provided, skipping")
return
@@ -346,11 +379,11 @@ def add_user_message(sess, chat_handler, preprocessed: PreprocessedMessage, inco
def fire_message_event(request, webhook_manager, session_id: str, sess, message: str, compare_mode: bool = False):
"""Fire webhook and event_bus events for a new user message."""
if webhook_manager and not compare_mode:
asyncio.create_task(webhook_manager.fire("chat.message", {
webhook_manager.fire_and_forget("chat.message", {
"session_id": session_id, "model": sess.model, "message": message[:2000],
}))
})
from src.event_bus import fire_event
user = get_current_user(request)
user = effective_user(request)
fire_event("message_sent", user)
@@ -576,9 +609,11 @@ async def build_chat_context(
if not incognito:
fire_message_event(request, webhook_manager, session_id, sess, message, compare_mode)
# Resolve user prefs
user = get_current_user(request)
# Resolve owner-scoped prefs/context. Browser requests keep the cookie user;
# bearer-token chat requests use the token owner instead of the "api" sentinel.
user = effective_user(request)
uprefs = load_prefs_for_user(user)
casual_low_signal = _is_casual_low_signal(message)
# Memory enabled?
mem_enabled = not incognito and not no_memory and uprefs.get("memory_enabled", True)
@@ -588,6 +623,9 @@ async def build_chat_context(
if not allow_tool_preprocessing:
mem_enabled = False
skills_enabled = False
if casual_low_signal:
mem_enabled = False
skills_enabled = False
logger.debug(
"Memory enabled=%s for user=%s (incognito=%s, no_memory=%s, pref=%s)",
mem_enabled, user, incognito, no_memory, uprefs.get("memory_enabled", "NOT_SET"),
@@ -603,11 +641,11 @@ async def build_chat_context(
# Use RAG?
use_rag_val = (str(use_rag).lower() != "false") if use_rag is not None else True
if incognito or not allow_tool_preprocessing or is_research_spinoff:
if incognito or not allow_tool_preprocessing or is_research_spinoff or casual_low_signal:
use_rag_val = False
# If pre-fetched search context was provided (compare mode), skip live web search
skip_web = bool(search_context) or not allow_tool_preprocessing
skip_web = bool(search_context) or not allow_tool_preprocessing or casual_low_signal
# Build context preface
# The stream path uses enhanced_message (with CoT/preprocessing applied),
@@ -626,7 +664,7 @@ async def build_chat_context(
incognito=incognito,
use_skills=skills_enabled,
)
if use_rag is not None or is_research_spinoff:
if use_rag is not None or is_research_spinoff or casual_low_signal:
_preface_kwargs["use_rag"] = use_rag_val
preface, rag_sources, web_sources = chat_processor.build_context_preface(**_preface_kwargs)
@@ -634,7 +672,7 @@ async def build_chat_context(
used_memories = getattr(chat_processor, '_last_used_memories', [])
# Inject pre-fetched search context (compare mode)
if search_context and allow_tool_preprocessing:
if search_context and allow_tool_preprocessing and not casual_low_signal:
preface.append(untrusted_context_message("prefetched search context", search_context))
# YouTube transcripts
@@ -1112,7 +1150,7 @@ def run_post_response_tasks(
)))
if _extraction_jobs:
asyncio.create_task(_run_extraction_jobs_sequentially(session_id, _extraction_jobs))
_spawn_bg(_run_extraction_jobs_sequentially(session_id, _extraction_jobs))
# Token accumulation
if last_metrics:
@@ -1120,11 +1158,11 @@ def run_post_response_tasks(
# Webhook
if webhook_manager and not compare_mode:
asyncio.create_task(webhook_manager.fire("chat.completed", {
webhook_manager.fire_and_forget("chat.completed", {
"session_id": session_id, "model": sess.model,
"user_message": message, "response": full_response[:2000],
}))
})
# Auto-name
if needs_auto_name(sess.name):
asyncio.create_task(auto_name_session(session_manager, sess))
_spawn_bg(auto_name_session(session_manager, sess))
+25 -12
View File
@@ -23,12 +23,13 @@ from src.endpoint_resolver import normalize_base as _normalize_base, build_chat_
from src.session_search import search_session_messages
from src.prompt_security import untrusted_context_message
from core.exceptions import SessionNotFoundError
from src.auth_helpers import get_current_user
from src.auth_helpers import effective_user, get_current_user
from routes.session_routes import _verify_session_owner
from routes.document_helpers import _owner_session_filter
from core.database import SessionLocal, get_session_mode, set_session_mode
from core.database import Session as DBSession, ChatMessage as DBChatMessage
from core.database import Document as DBDocument, ModelEndpoint
from core.log_safety import redact_url
from routes.research_routes import _resolve_research_endpoint
from routes.model_routes import _visible_models
from routes.chat_helpers import (
@@ -126,7 +127,8 @@ def _clear_orphaned_session_endpoint(sess, owner: str | None = None) -> bool:
sess.model = ""
sess.headers = {}
return True
except Exception:
except Exception as e:
logger.warning("Failed to clear orphaned session endpoint", exc_info=e)
db.rollback()
return False
finally:
@@ -144,7 +146,8 @@ def _endpoint_cache_contains_model(endpoint, model: str) -> bool:
return True
try:
models = json.loads(raw) if isinstance(raw, str) else raw
except Exception:
except Exception as e:
logger.warning("Failed to parse cached models list, treating as containing model", exc_info=e)
return True
if not isinstance(models, list) or not models:
return True
@@ -236,7 +239,8 @@ def _recover_empty_session_model(sess, session_id: str, owner: str | None = None
is_chatgpt_subscription = False
try:
cached = json.loads(ep.cached_models) if isinstance(ep.cached_models, str) else (ep.cached_models or [])
except Exception:
except Exception as e:
logger.warning("Failed to parse cached_models for endpoint %r", getattr(ep, "id", "?"), exc_info=e)
cached = []
if not cached:
visible = []
@@ -360,7 +364,7 @@ def setup_chat_routes(
sess = session_manager.get_session(session)
except KeyError:
raise HTTPException(404, f"Session '{session}' not found")
owner = get_current_user(request)
owner = effective_user(request)
if _clear_orphaned_session_endpoint(sess, owner=owner):
raise HTTPException(400, "Selected model endpoint was removed. Pick another model in Settings.")
@@ -600,7 +604,7 @@ def setup_chat_routes(
# but BEFORE loading. Prevents cross-user session hijack.
_verify_session_owner(request, session)
sess = session_manager.get_session(session)
owner = get_current_user(request)
owner = effective_user(request)
if _clear_orphaned_session_endpoint(sess, owner=owner):
raise HTTPException(400, "Selected model endpoint was removed. Pick another model in Settings.")
# Issue #587: picker shows a model from the endpoint cache but
@@ -631,7 +635,7 @@ def setup_chat_routes(
_enforce_chat_privileges(request, sess)
# Ensure session has auth headers
resolve_session_auth(sess, session, owner=get_current_user(request))
resolve_session_auth(sess, session, owner=effective_user(request))
# Check for research_pending BEFORE mode persist overwrites it
do_research = str(use_research).lower() == "true"
@@ -646,8 +650,8 @@ def setup_chat_routes(
elif attachments:
try:
att_ids = [str(x) for x in json.loads(attachments)]
except Exception:
pass
except Exception as e:
logger.warning("Failed to parse attachments JSON, ignoring attachments", exc_info=e)
no_memory = str(form_data.get("no_memory", "")).lower() == "true"
pre_context_tool_policy = build_effective_tool_policy(
@@ -826,7 +830,11 @@ def setup_chat_routes(
from src.settings import get_setting
_global_disabled = get_setting("disabled_tools", [])
if _global_disabled and isinstance(_global_disabled, list):
disabled_tools.update(_global_disabled)
explicit_web_allowed = allow_web_search is not None and str(allow_web_search).lower() == "true"
if explicit_web_allowed:
disabled_tools.update(t for t in _global_disabled if t not in {"web_search", "web_fetch"})
else:
disabled_tools.update(_global_disabled)
# Light auto-escalation: the user is in chat mode and just expressed a
# notes/calendar/email intent. Grant the relevant managers but withhold
@@ -923,7 +931,7 @@ def setup_chat_routes(
if effective_do_research:
_r_ep, _r_model, _r_headers = _resolve_research_endpoint(sess)
_auth_keys = list(_r_headers.keys()) if _r_headers else []
logger.info(f"Research endpoint resolved: model={_r_model}, endpoint={_r_ep}, auth_keys={_auth_keys}, sess_headers_keys={list(sess.headers.keys()) if isinstance(sess.headers, dict) else type(sess.headers)}")
logger.info(f"Research endpoint resolved: model={_r_model}, endpoint={redact_url(_r_ep)}, auth_keys={_auth_keys}, sess_headers_keys={list(sess.headers.keys()) if isinstance(sess.headers, dict) else type(sess.headers)}")
# Clarification round: only for very short/vague queries on first research message.
# Skip in compare mode — each pane is a fresh session, so every one would
@@ -1256,6 +1264,10 @@ def setup_chat_routes(
_max_rounds = _DEFAULT_ROUNDS
_max_rounds = max(1, min(_max_rounds, 200))
_forced_tools = None
if allow_web_search is not None and str(allow_web_search).lower() == "true":
_forced_tools = {"web_search", "web_fetch"}
async for chunk in stream_agent_loop(
sess.endpoint_url,
sess.model,
@@ -1277,6 +1289,7 @@ def setup_chat_routes(
plan_mode=plan_mode,
approved_plan=approved_plan or None,
workspace=workspace or None,
forced_tools=_forced_tools,
):
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
try:
@@ -1484,7 +1497,7 @@ def setup_chat_routes(
if not q or not q.strip():
return []
_user = get_current_user(request)
_user = effective_user(request)
return [
result.to_dict()
for result in search_session_messages(
+10 -3
View File
@@ -46,8 +46,12 @@ def _ssh_prefix_for_task(task: dict) -> tuple[str, str]:
shell metacharacters in ``remoteHost`` is rejected with 400 rather than
injected.
"""
host = validate_remote_host((task.get("remoteHost") or "").strip() or None) or ""
ssh_port = validate_ssh_port((task.get("sshPort") or "").strip() or None) or ""
raw_host = task.get("remoteHost")
raw_port = task.get("sshPort")
host_value = str(raw_host).strip() if raw_host is not None else None
port_value = str(raw_port).strip() if raw_port is not None else None
host = validate_remote_host(host_value or None) or ""
ssh_port = validate_ssh_port(port_value or None) or ""
port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
return host, port_flag
@@ -306,7 +310,10 @@ def setup_codex_routes(
@router.post("/emails/draft-document")
async def codex_email_draft_document(request: Request, body: dict[str, Any] = Body(default_factory=dict)):
owner = _scope_owner_all(request, {"email:draft", "documents:write"})
owner = _scope_owner(request, EMAIL_DRAFT_SCOPES)
docs_owner = _scope_owner_all(request, DOCS_WRITE_SCOPES)
if docs_owner != owner:
raise HTTPException(403, "API token owner mismatch")
if documents_create_endpoint is None:
raise HTTPException(503, "Documents integration is not available")
from routes.document_routes import DocumentCreate
+18 -8
View File
@@ -18,6 +18,7 @@ from pathlib import Path
from datetime import datetime
from urllib.parse import urljoin, urlparse, urlunparse
from core.log_safety import redact_url
from fastapi import APIRouter, Query, Depends, Response, HTTPException
from typing import List, Dict, Optional
@@ -689,15 +690,24 @@ def _delete_contact(uid: str) -> bool:
url = _resolve_resource_url(uid)
auth = (cfg["username"], cfg["password"]) if cfg["username"] else None
r = httpx.delete(url, auth=auth, timeout=10)
if r.status_code in (200, 204):
_contact_cache["fetched_at"] = None
return True
if r.status_code == 404:
# Resource not found at the resolved URL. With href resolution
# this should be rare (genuinely already deleted). Invalidate
# the cache and report success so the UI doesn't keep a ghost.
logger.info(f"CardDAV DELETE 404 for {uid} — treating as already gone")
if r.status_code in (200, 204, 404):
# Invalidate cache so the next fetch sees the server truth.
_contact_cache["fetched_at"] = None
# Verify: force a fresh fetch and check the UID is actually gone.
# A 404 on the guessed URL ({uid}.vcf) can mean the contact
# lives at a different resource URL — the DELETE missed it but
# we'd silently report success. This check catches that.
fresh = _fetch_contacts(force=True)
still_there = any(c.get("uid") == uid for c in fresh)
if still_there:
logger.warning(
f"CardDAV DELETE reported success for {uid} "
f"but UID still present after re-fetch — "
f"resource URL may differ from {redact_url(url)}"
)
return False
if r.status_code == 404:
logger.info(f"CardDAV DELETE 404 for {uid} — already gone")
return True
logger.warning(f"CardDAV DELETE returned {r.status_code}: {r.text[:200]}")
return False
+182 -18
View File
@@ -505,6 +505,8 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None, add_hf_cache:
" if u.startswith('KB'): return int(n * 1024)",
" return int(n)",
"def scan_ollama():",
" if any(m.get('is_ollama') for m in models): return",
" if os.name == 'nt' and not os.environ.get('ODYSSEUS_ALLOW_OLLAMA_CLI_SCAN'): return",
" if not shutil.which('ollama'): return",
" try:",
" p = subprocess.run(['ollama', 'list'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, timeout=6)",
@@ -535,8 +537,8 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None, add_hf_cache:
" models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})",
" return",
"for _hf_cache in hf_cache_paths(): scan_hf(_hf_cache)",
"scan_ollama()",
"scan_ollama_api()",
"scan_ollama()",
]
for model_dir in model_dirs or []:
lines.append(f"scan_dir(os.path.expanduser({model_dir!r}))")
@@ -784,25 +786,149 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
to hard-wire CUDA on Linux. That made ROCm hosts attempt a CUDA configure and
fail with "CUDA Toolkit not found" instead of building with HIP.
"""
# Try a prebuilt binary from llama.cpp's GitHub releases FIRST — no
# cmake/build-essential/git/CUDA-headers needed at all. The from-source
# build below stays as a fallback (custom flags, esoteric arch, no
# internet, etc). 30 seconds vs 5+ minutes of compile, and removes
# every OS-package dep from the launch path. Sets _odysseus_have_prebuilt=1
# on success; the existing build-tier if/elif chain below is gated on
# that variable so we never compile twice or shadow the prebuilt symlink.
runner_lines.append(' _odysseus_have_prebuilt=""')
runner_lines.append(' _odysseus_arch="$(uname -m)"')
runner_lines.append(' _odysseus_prebuilt_url=""')
runner_lines.append(' if command -v curl >/dev/null 2>&1 && [ "$_odysseus_arch" = "x86_64" ]; then')
runner_lines.append(' _odysseus_pat=""')
runner_lines.append(' _odysseus_has_nv_inline() { command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi -L 2>/dev/null | grep -q "GPU "; }')
runner_lines.append(' _odysseus_has_vk_inline() { ldconfig -p 2>/dev/null | grep -q "libvulkan\\.so" || command -v vulkaninfo >/dev/null 2>&1 || [ -e /usr/lib/x86_64-linux-gnu/libvulkan.so.1 ]; }')
runner_lines.append(' _odysseus_has_vkdev_inline() { ls /dev/dri/renderD* >/dev/null 2>&1 || (lspci 2>/dev/null | grep -Ei \'VGA|3D|Display\' | grep -Eiq \'AMD|ATI|Radeon\'); }')
runner_lines.append(' if _odysseus_has_nv_inline; then')
runner_lines.append(' _odysseus_pat="ubuntu.*cuda"')
runner_lines.append(' elif _odysseus_has_vkdev_inline && _odysseus_has_vk_inline; then')
runner_lines.append(' _odysseus_pat="ubuntu.*vulkan"')
runner_lines.append(' else')
runner_lines.append(' _odysseus_pat="ubuntu-x64\\\\.zip"')
runner_lines.append(' fi')
runner_lines.append(' _odysseus_prebuilt_url="$(curl -fsSL --max-time 15 https://api.github.com/repos/ggml-org/llama.cpp/releases/latest 2>/dev/null | grep \'"browser_download_url"\' | cut -d\'"\' -f4 | grep -iE "$_odysseus_pat" | grep -iv "arm\\|aarch64" | head -1)"')
runner_lines.append(' fi')
# Accept any of unzip / bsdtar / python3 -m zipfile as the extractor.
# python3 is essentially always present on modern Linux, so this lets
# the prebuilt path work on minimal Ubuntu installs that lack `unzip`.
runner_lines.append(' if [ -n "$_odysseus_prebuilt_url" ] && (command -v unzip >/dev/null 2>&1 || command -v bsdtar >/dev/null 2>&1 || command -v python3 >/dev/null 2>&1); then')
runner_lines.append(' echo "[odysseus] Found prebuilt llama-server: $_odysseus_prebuilt_url"')
runner_lines.append(' mkdir -p ~/bin "$HOME/.cache/odysseus/llama-cpp-prebuilt" && cd "$HOME/.cache/odysseus/llama-cpp-prebuilt"')
runner_lines.append(' rm -f llama-cpp.zip')
runner_lines.append(' if curl -fsSL --max-time 120 "$_odysseus_prebuilt_url" -o llama-cpp.zip && [ -s llama-cpp.zip ]; then')
runner_lines.append(' rm -rf build && mkdir -p build')
runner_lines.append(' if command -v unzip >/dev/null 2>&1; then unzip -qq -o llama-cpp.zip -d build; elif command -v bsdtar >/dev/null 2>&1; then bsdtar -xf llama-cpp.zip -C build; else python3 -c "import zipfile; zipfile.ZipFile(\\"llama-cpp.zip\\").extractall(\\"build\\")"; fi')
runner_lines.append(' _odysseus_extracted="$(find build -type f -name llama-server 2>/dev/null | head -1)"')
runner_lines.append(' if [ -n "$_odysseus_extracted" ]; then')
runner_lines.append(' chmod +x "$_odysseus_extracted"')
runner_lines.append(' ln -sf "$_odysseus_extracted" ~/bin/llama-server')
runner_lines.append(' _odysseus_libdir="$(dirname "$_odysseus_extracted")"')
runner_lines.append(' mkdir -p ~/.config && echo "export LD_LIBRARY_PATH=\\"$_odysseus_libdir:\\${LD_LIBRARY_PATH:-}\\"" > ~/.config/odysseus-llama-cpp-env')
runner_lines.append(' _odysseus_have_prebuilt=1')
runner_lines.append(' echo "[odysseus] Prebuilt llama-server installed at $_odysseus_extracted"')
runner_lines.append(' fi')
runner_lines.append(' fi')
runner_lines.append(' [ -z "$_odysseus_have_prebuilt" ] && echo "[odysseus] Prebuilt download/extract failed — falling back to from-source build."')
runner_lines.append(' elif [ -z "$_odysseus_prebuilt_url" ]; then')
runner_lines.append(' echo "[odysseus] No matching prebuilt llama-server for this host (arch=$_odysseus_arch) — will build from source."')
runner_lines.append(' fi')
runner_lines.append(' if [ -z "$_odysseus_have_prebuilt" ]; then')
# Detect pip-installed nvcc (from vLLM/nvidia CUDA wheels) and put it on PATH
# so cmake's CUDA configure can find it. We keep this after the ROCm/HIP
# check — a machine with both stacks should honor the native HIP toolchain on
# AMD hosts instead of accidentally preferring a stray nvcc wheel.
runner_lines.append(' for _cudir in ~/.local/lib/python*/site-packages/nvidia/cu13 ~/.local/lib/python*/site-packages/nvidia/cu12 ~/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do')
runner_lines.append(' [ -x "$_cudir/bin/nvcc" ] && export CUDA_HOME="$_cudir" && export PATH="$_cudir/bin:$PATH" && break')
runner_lines.append(' done')
# so cmake's CUDA configure can find it — BUT only when actual NVIDIA
# hardware is present. On AMD/Intel hosts the pip nvcc is a misleading
# leftover (no libcudart, no GPU it could target) and would otherwise
# send the build down the CUDA branch and fail with "CUDA Toolkit not
# found" instead of trying Vulkan.
runner_lines.append(' _odysseus_has_nvidia_hw() {')
runner_lines.append(' command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi -L 2>/dev/null | grep -q "GPU " && return 0')
runner_lines.append(' ls /dev/nvidia* >/dev/null 2>&1 && return 0')
runner_lines.append(' lspci 2>/dev/null | grep -iE \'VGA|3D|Display\' | grep -iq nvidia && return 0')
runner_lines.append(' return 1')
runner_lines.append(' }')
runner_lines.append(' if _odysseus_has_nvidia_hw; then')
runner_lines.append(' for _cudir in ~/.local/lib/python*/site-packages/nvidia/cu13 ~/.local/lib/python*/site-packages/nvidia/cu12 ~/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do')
runner_lines.append(' [ -x "$_cudir/bin/nvcc" ] && export CUDA_HOME="$_cudir" && export PATH="$_cudir/bin:$PATH" && break')
runner_lines.append(' done')
runner_lines.append(' fi')
# rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a failed CUDA
# or HIP attempt) doesn't cause the next configure to reuse stale settings.
runner_lines.append(' mkdir -p ~/bin')
runner_lines.append(' cd ~/llama.cpp && rm -rf build')
# Try to install cmake / build-essential / git automatically before the
# build, but ONLY via passwordless sudo (`sudo -n`) — interactive sudo
# would hang a tmux-backgrounded serve task waiting for a password. If
# sudo asks for a password the install is skipped silently and the
# diagnosis pattern (cookbook_routes.py / cookbook_helpers.py) surfaces
# an explicit "install cmake" suggestion in the Cookbook diagnosis
# toolbar after the inevitable build failure.
runner_lines.append(' _odysseus_apt_bootstrap() {')
runner_lines.append(' local _missing=""')
runner_lines.append(' command -v cmake >/dev/null 2>&1 || _missing="$_missing cmake"')
runner_lines.append(' command -v g++ >/dev/null 2>&1 || command -v gcc >/dev/null 2>&1 || _missing="$_missing build-essential"')
runner_lines.append(' command -v git >/dev/null 2>&1 || _missing="$_missing git"')
runner_lines.append(' [ -z "$_missing" ] && return 0')
runner_lines.append(' if command -v apt-get >/dev/null 2>&1 && sudo -n true 2>/dev/null; then')
runner_lines.append(' echo "[odysseus] Auto-installing missing build deps via apt:$_missing"')
runner_lines.append(' sudo -n env DEBIAN_FRONTEND=noninteractive apt-get update -qq 2>&1 | tail -3')
runner_lines.append(' sudo -n env DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends $_missing 2>&1 | tail -5 || true')
runner_lines.append(' elif command -v pacman >/dev/null 2>&1 && sudo -n true 2>/dev/null; then')
runner_lines.append(' echo "[odysseus] Auto-installing missing build deps via pacman:$_missing"')
runner_lines.append(' local _pacpkgs="$(echo "$_missing" | sed -e \'s/build-essential/base-devel/g\')"')
runner_lines.append(' sudo -n pacman -Sy --needed --noconfirm $_pacpkgs 2>&1 | tail -5 || true')
runner_lines.append(' elif command -v dnf >/dev/null 2>&1 && sudo -n true 2>/dev/null; then')
runner_lines.append(' echo "[odysseus] Auto-installing missing build deps via dnf:$_missing"')
runner_lines.append(' local _dnfpkgs="$(echo "$_missing" | sed -e \'s/build-essential/gcc gcc-c++ make/g\')"')
runner_lines.append(' sudo -n dnf install -y $_dnfpkgs 2>&1 | tail -5 || true')
runner_lines.append(' else')
runner_lines.append(' echo "[odysseus] WARNING: missing build deps ($_missing) — passwordless sudo is unavailable, cannot auto-install. Cookbook Diagnosis will explain the fix after the build fails."')
runner_lines.append(' fi')
runner_lines.append(' }')
runner_lines.append(' _odysseus_apt_bootstrap')
runner_lines.append(' _odysseus_missing_build_deps=""')
runner_lines.append(' command -v cmake >/dev/null 2>&1 || _odysseus_missing_build_deps="$_odysseus_missing_build_deps cmake"')
runner_lines.append(' command -v git >/dev/null 2>&1 || _odysseus_missing_build_deps="$_odysseus_missing_build_deps git"')
runner_lines.append(' command -v g++ >/dev/null 2>&1 || command -v gcc >/dev/null 2>&1 || _odysseus_missing_build_deps="$_odysseus_missing_build_deps build-essential"')
runner_lines.append(' if [ -n "$_odysseus_missing_build_deps" ]; then')
runner_lines.append(' echo "ERROR: llama.cpp source build needs missing packages:$_odysseus_missing_build_deps"')
runner_lines.append(' if command -v apt-get >/dev/null 2>&1; then')
runner_lines.append(' echo "Install on this host: sudo apt-get update && sudo apt-get install -y cmake build-essential git"')
runner_lines.append(' elif command -v pacman >/dev/null 2>&1; then')
runner_lines.append(' echo "Install on this host: sudo pacman -Sy --needed cmake base-devel git"')
runner_lines.append(' elif command -v dnf >/dev/null 2>&1; then')
runner_lines.append(' echo "Install on this host: sudo dnf install -y cmake gcc gcc-c++ make git"')
runner_lines.append(' fi')
runner_lines.append(' echo "Alternative: install a native llama-server on PATH, then relaunch."')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append(' fi')
runner_lines.append(' cd ~/llama.cpp')
runner_lines.append(' _odysseus_has_vulkan() {')
runner_lines.append(' ldconfig -p 2>/dev/null | grep -q \'libvulkan\\.so\' && return 0')
runner_lines.append(' [ -e /usr/lib/libvulkan.so.1 ] && return 0')
runner_lines.append(' [ -e /usr/lib/x86_64-linux-gnu/libvulkan.so.1 ] && return 0')
runner_lines.append(' command -v vulkaninfo >/dev/null 2>&1 && return 0')
runner_lines.append(' return 1')
runner_lines.append(' }')
runner_lines.append(' _odysseus_has_vulkan_device() {')
runner_lines.append(' ls /dev/dri/renderD* >/dev/null 2>&1 && return 0')
runner_lines.append(' lspci 2>/dev/null | grep -Ei \'VGA|3D|Display\' | grep -Eiq \'AMD|ATI|Radeon\' && return 0')
runner_lines.append(' return 1')
runner_lines.append(' }')
# Backend preference: native ROCm/HIP > native CUDA > Vulkan > CPU.
# Vulkan is a portable fallback that works on AMD when ROCm isn't
# installed (e.g. Strix Halo) and on any vendor's discrete GPU, but
# it's ~30-40% slower than native HIP/CUDA for LLM inference — only
# pick it when no native toolchain is present.
runner_lines.append(' if command -v hipconfig &>/dev/null || [ -d /opt/rocm ] || [ -n "$ROCM_PATH" ] || [ -n "$HIP_PATH" ]; then')
runner_lines.append(' rm -rf build')
runner_lines.append(' if command -v hipconfig &>/dev/null; then')
runner_lines.append(' export HIPCXX="${HIPCXX:-$(hipconfig -l)/clang}"')
runner_lines.append(' export HIP_PATH="${HIP_PATH:-$(hipconfig -R)}"')
runner_lines.append(' fi')
runner_lines.append(' echo "[odysseus] ROCm/HIP detected — building llama-server with HIP support..."')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_HIP=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' elif command -v nvcc &>/dev/null; then')
runner_lines.append(' elif command -v nvcc &>/dev/null && _odysseus_has_nvidia_hw; then')
runner_lines.append(' rm -rf build')
# nvcc alone is not sufficient — pip-installed CUDA wheels or incomplete
# tooling can expose nvcc without shipping libcudart, causing cmake to fail
# mid-build with "CUDA runtime library not found". Check cudart explicitly
@@ -826,31 +952,50 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
runner_lines.append(' echo "[odysseus] Ensure libcudart is installed (e.g. cuda-runtime package) and visible via ldconfig or CUDA_HOME."')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' fi')
runner_lines.append(' elif _odysseus_has_vulkan_device && _odysseus_has_vulkan; then')
runner_lines.append(' echo "[odysseus] Vulkan-capable GPU detected (no ROCm/CUDA toolchain installed) — building llama-server with Vulkan support..."')
runner_lines.append(' rm -rf build-vulkan')
runner_lines.append(' cmake -B build-vulkan -DCMAKE_BUILD_TYPE=Release -DGGML_VULKAN=ON && cmake --build build-vulkan -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build-vulkan/bin/llama-server ~/bin/llama-server')
runner_lines.append(' else')
runner_lines.append(' echo "[odysseus] WARNING: no HIP/CUDA toolchain found — building llama-server for CPU only."')
runner_lines.append(' echo "[odysseus] WARNING: no HIP/CUDA/Vulkan toolchain found — building llama-server for CPU only."')
runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."')
runner_lines.append(' echo "[odysseus] Install ROCm for AMD GPUs or vLLM/CUDA tooling for NVIDIA, then re-launch this serve task."')
runner_lines.append(' echo "[odysseus] Install Vulkan (libvulkan-dev) / ROCm for AMD GPUs or CUDA tooling for NVIDIA, then re-launch this serve task."')
runner_lines.append(' rm -rf build')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' fi')
runner_lines.append(' fi # end _odysseus_have_prebuilt guard')
def _llama_cpp_rebuild_cmd() -> str:
def _llama_cpp_rebuild_cmd(update_source: bool = False) -> str:
"""Shell command that clears the Cookbook-managed llama.cpp build.
Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build``
Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build*``
directory so the next llama.cpp serve recompiles from source, picking up a
CUDA or HIP toolchain if one is now available. The serve bootstrap only
builds when ``llama-server`` is missing from PATH, so without this an
existing CPU-only build is reused forever. It deliberately installs and
downloads nothing; the rebuild itself happens on the next serve.
existing CPU-only build is reused forever. When ``update_source`` is true,
the command also fast-forwards the Cookbook-managed ``~/llama.cpp`` checkout
if it exists. The rebuild itself happens on the next serve.
"""
update_cmd = ''
if update_source:
update_cmd = (
'if [ -d "$HOME/llama.cpp/.git" ]; then '
'git -C "$HOME/llama.cpp" pull --ff-only --depth 1 || '
'echo "[odysseus] WARNING: llama.cpp source update failed; clearing cached build anyway."; '
'elif command -v git >/dev/null 2>&1; then '
'git clone --depth 1 https://github.com/ggml-org/llama.cpp "$HOME/llama.cpp" || '
'echo "[odysseus] WARNING: llama.cpp clone failed; clearing cached build anyway."; '
'fi && '
)
return (
'mkdir -p "$HOME/bin" && '
f'{update_cmd}'
'rm -f "$HOME/bin/llama-server" && '
'rm -rf "$HOME/llama.cpp/build" && '
'rm -rf "$HOME/llama.cpp/build" "$HOME/llama.cpp/build-vulkan" && '
'echo "[odysseus] Cleared the cached llama.cpp build. '
'Re-launch the serve task to rebuild llama-server from source '
'(CUDA or HIP will be used if a toolchain is now available)."'
'(Vulkan, HIP, or CUDA will be used if a matching toolchain is now available)."'
)
@@ -1113,8 +1258,27 @@ def _diagnose_serve_output(text: str) -> dict | None:
"SGLang is not installed or not in PATH on this server.",
[{"label": "install SGLang in Cookbook Dependencies", "op": "dependency", "package": "sglang[all]"}],
),
# System build deps come BEFORE the generic llama.cpp catch-all so
# cmake / build-essential / git missing → a specific OS-package
# remediation instead of "install llama-cpp-python[server]" (which
# itself fails to compile when cmake is absent).
(
r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'|git: command not found|cmake: command not found",
r"cmake: command not found|cmake.*not found.*[Cc]ould not",
"cmake is required to build llama.cpp from source but isn't installed on this server.",
[{"label": "install build deps for llama.cpp (apt: cmake build-essential git / pacman: cmake base-devel git / dnf: cmake gcc-c++ make git / brew: cmake git)", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
(
r"^(make|g\+\+|gcc): command not found|Could not find C\+\+ compiler",
"A C/C++ compiler (build-essential) is required to build llama.cpp from source.",
[{"label": "install build deps for llama.cpp on this server", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
(
r"^git: command not found",
"git is required to clone the llama.cpp source tree.",
[{"label": "install build deps for llama.cpp on this server", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
(
r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'",
"llama.cpp / llama-cpp-python dependencies are missing.",
[{"label": "install llama.cpp dependencies or llama-cpp-python[server]", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
+340 -16
View File
@@ -189,8 +189,27 @@ def setup_cookbook_routes() -> APIRouter:
"SGLang is not installed or not in PATH on this server.",
[{"label": "install SGLang in Cookbook Dependencies", "op": "dependency", "package": "sglang[all]"}],
),
# System build deps come BEFORE the generic llama.cpp catch-all
# so cmake / build-essential / git missing → a specific OS-package
# remediation instead of "install llama-cpp-python[server]" (which
# itself fails to compile when cmake is absent).
(
r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'|git: command not found|cmake: command not found",
r"cmake: command not found|cmake.*not found.*[Cc]ould not",
"cmake is required to build llama.cpp from source but isn't installed on this server.",
[{"label": "install build deps for llama.cpp (apt: cmake build-essential git / pacman: cmake base-devel git / dnf: cmake gcc-c++ make git / brew: cmake git)", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
(
r"^(make|g\+\+|gcc): command not found|Could not find C\+\+ compiler",
"A C/C++ compiler (build-essential) is required to build llama.cpp from source.",
[{"label": "install build deps for llama.cpp on this server", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
(
r"^git: command not found",
"git is required to clone the llama.cpp source tree.",
[{"label": "install build deps for llama.cpp on this server", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
(
r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'",
"llama.cpp / llama-cpp-python dependencies are missing.",
[{"label": "install llama.cpp dependencies or llama-cpp-python[server]", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
@@ -254,6 +273,79 @@ def setup_cookbook_routes() -> APIRouter:
def _load_stored_hf_token() -> str:
return load_stored_hf_token(state_path=_cookbook_state_path)
def _normalize_minimax_m3_vllm_cmd(cmd: str) -> str:
"""Patch MiniMax M3 vLLM launches into the known-good local form.
The browser form can be stale or omit advanced-only fields. MiniMax M3
is sensitive to several flags: using the HF repo id with block-size 128
fails KV-cache setup, and FlashInfer sampler JIT fails on this host's
system nvcc. Normalize server-side before writing the tmux runner.
"""
cmd_lower = (cmd or "").lower()
if not cmd or "vllm serve" not in cmd_lower or "minimax" not in cmd_lower or "m3" not in cmd_lower:
return cmd
try:
parts = shlex.split(cmd)
except ValueError:
return cmd
if "serve" not in parts:
return cmd
env_re = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=")
env_parts = [p for p in parts if env_re.match(p)]
body = [p for p in parts if not env_re.match(p)]
try:
serve_i = body.index("serve")
except ValueError:
return cmd
if serve_i + 1 >= len(body):
return cmd
repo_id = "cyankiwi/MiniMax-M3-AWQ-INT4"
snapshot = (
"/home/pewds/.cache/huggingface/hub/"
"models--cyankiwi--MiniMax-M3-AWQ-INT4/"
"snapshots/4082acbbec1236d21828d55b6bb0fe02ade4ab5b"
)
if body[serve_i + 1] == repo_id:
body[serve_i + 1] = snapshot
def add_env(key: str, value: str) -> None:
if not any(p.startswith(f"{key}=") for p in env_parts):
env_parts.append(f"{key}={value}")
def has_flag(flag: str) -> bool:
return any(p == flag or p.startswith(flag + "=") for p in body)
def set_flag(flag: str, value: str) -> None:
for i, part in enumerate(body):
if part == flag:
if i + 1 < len(body):
body[i + 1] = value
else:
body.append(value)
return
if part.startswith(flag + "="):
body[i] = f"{flag}={value}"
return
body.extend([flag, value])
def add_bool(flag: str) -> None:
if not has_flag(flag):
body.append(flag)
add_env("VLLM_TARGET_DEVICE", "cuda")
add_env("VLLM_USE_FLASHINFER_SAMPLER", "0")
set_flag("--served-model-name", repo_id)
set_flag("--tool-call-parser", "minimax_m3")
set_flag("--reasoning-parser", "minimax_m3")
set_flag("--attention-backend", "TRITON_ATTN")
set_flag("--block-size", "128")
add_bool("--language-model-only")
add_bool("--disable-custom-all-reduce")
add_bool("--enable-expert-parallel")
return shlex.join(env_parts + body)
def _cookbook_ssh_dir() -> Path:
# The Docker image keeps cookbook keys under /app/.ssh; that path only
# exists inside the container. On Windows (and any non-container host)
@@ -1230,6 +1322,7 @@ def setup_cookbook_routes() -> APIRouter:
# `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400).
req.cmd = _validate_serve_cmd(req.cmd) or ""
req.cmd = _normalize_llama_cpp_python_cache_types(req.cmd) or ""
req.cmd = _normalize_minimax_m3_vllm_cmd(req.cmd)
req.cmd = _venv_safe_local_pip_install_cmd(
req.cmd,
local=not bool(req.remote_host),
@@ -1243,8 +1336,16 @@ def setup_cookbook_routes() -> APIRouter:
req.cmd = _pip_install_no_cache(req.cmd)
# Accept common aliases and enforce server extras for llama-cpp so
# `python -m llama_cpp.server` has all runtime dependencies.
req.cmd = re.sub(r"(?<![A-Za-z0-9_.-])llama_cpp(?![A-Za-z0-9_.-])", "llama-cpp-python[server]", req.cmd)
req.cmd = re.sub(r"(?<![A-Za-z0-9_.-])llama-cpp-python(?!\[)", "llama-cpp-python[server]", req.cmd)
# CRITICAL: the lookbehind / lookahead must also exclude `/` so
# the regex DOESN'T mangle a URL path like
# https://abetlen.github.io/llama-cpp-python/whl/cu124
# The previous regex turned that URL into
# https://abetlen.github.io/llama-cpp-python[server]/whl/cu124
# which pip then couldn't resolve → silent fallback to source
# build of the .tar.gz → CPU-only binary (because CMAKE_ARGS
# isn't set), defeating the entire purpose of the CUDA index.
req.cmd = re.sub(r"(?<![A-Za-z0-9_.\-/])llama_cpp(?![A-Za-z0-9_.\-/])", "llama-cpp-python[server]", req.cmd)
req.cmd = re.sub(r"(?<![A-Za-z0-9_.\-/])llama-cpp-python(?![\[/])", "llama-cpp-python[server]", req.cmd)
if "llama-cpp-python" in req.cmd and "--extra-index-url" not in req.cmd:
req.cmd += " --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu"
# PEP-508-style package spec — letters, digits, `.-_` for the
@@ -1284,6 +1385,11 @@ def setup_cookbook_routes() -> APIRouter:
# LOCAL execution on a native-Windows host never uses tmux (detached
# process path below), regardless of the UI-supplied platform.
local_windows = IS_WINDOWS and not remote
if is_windows and remote and "diffusion_server.py" in req.cmd:
raise HTTPException(
400,
"Remote Windows Diffusers serving is not supported yet; use local Windows or a Linux remote server.",
)
if not is_windows and not local_windows and not await _binary_available("tmux", remote, req.ssh_port):
return {
@@ -1426,6 +1532,69 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(' else')
_append_llama_cpp_linux_accel_build_lines(runner_lines)
runner_lines.append(' fi')
# Source the env file the prebuilt-download path writes so
# LD_LIBRARY_PATH includes the directory holding libllama.so
# and friends. No-op when prebuilt wasn't used.
runner_lines.append(' [ -r ~/.config/odysseus-llama-cpp-env ] && . ~/.config/odysseus-llama-cpp-env')
# Auto-upgrade pip llama-cpp-python to the CUDA-enabled
# wheel when (a) NVIDIA hardware is present and (b) the
# currently-installed wheel is CPU-only. Without this the
# user gets the Python server happily running at 3 tok/s
# because pip's default index ships CPU-only wheels.
# Forward-compat: cu124 wheels work on driver/runtime
# 12.4+ including the cu13.x line.
runner_lines.append(' if command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi -L 2>/dev/null | grep -q "GPU " && python3 -c "import llama_cpp" 2>/dev/null; then')
runner_lines.append(' if ! python3 -c "import llama_cpp; import sys; sys.exit(0 if llama_cpp.llama_supports_gpu_offload() else 1)" 2>/dev/null; then')
runner_lines.append(' echo "[odysseus] NVIDIA detected but installed llama-cpp-python is CPU-only — reinstalling with CUDA wheel index for GPU offload..."')
runner_lines.append(' python3 -m pip install --user --break-system-packages --force-reinstall --no-cache-dir "llama-cpp-python[server]" --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124 2>&1 | tail -8 || echo "[odysseus] WARNING: CUDA wheel reinstall failed — Python server will stay CPU-only (slow). Manual fix: pip install --user --force-reinstall \'llama-cpp-python[server]\' --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124"')
runner_lines.append(' if python3 -c "import llama_cpp; import sys; sys.exit(0 if llama_cpp.llama_supports_gpu_offload() else 1)" 2>/dev/null; then')
runner_lines.append(' echo "[odysseus] llama-cpp-python now supports GPU offload."')
runner_lines.append(' fi')
runner_lines.append(' fi')
runner_lines.append(' fi')
# SHORT-CIRCUIT before the build/pip fallback: if the
# native binary is missing but llama_cpp Python is already
# installed, drop a wrapper at ~/bin/llama-server that
# translates llama-server CLI args to llama_cpp.server's
# underscore-style flags. The user's serve command stays
# `llama-server ...` and "just works" — no build, no cmake,
# no second install. This is the path that unblocks every
# remote where pip-installed llama-cpp-python is already
# working but Cookbook used to insist on a native binary.
runner_lines.append(' if ! command -v llama-server >/dev/null 2>&1 && python3 -c "import llama_cpp" 2>/dev/null; then')
runner_lines.append(' mkdir -p ~/bin')
runner_lines.append(' cat > ~/bin/llama-server <<\'_ODY_LLAMA_SHIM_EOF\'')
runner_lines.append('#!/usr/bin/env bash')
runner_lines.append('# Auto-generated by Odysseus Cookbook: a `llama-server` lookalike')
runner_lines.append('# that translates the native CLI to `python -m llama_cpp.server`.')
runner_lines.append('# Lets cookbook-generated launch commands run unchanged on hosts')
runner_lines.append('# where only the pip llama-cpp-python package is installed.')
runner_lines.append('ARGS=()')
runner_lines.append('while [ $# -gt 0 ]; do')
runner_lines.append(' case "$1" in')
runner_lines.append(' -ngl|--gpu-layers|--n-gpu-layers) ARGS+=(--n_gpu_layers "$2"); shift 2 ;;')
runner_lines.append(' -c|--ctx-size) ARGS+=(--n_ctx "$2"); shift 2 ;;')
runner_lines.append(' -b|--batch-size) ARGS+=(--n_batch "$2"); shift 2 ;;')
runner_lines.append(' -ub|--ubatch-size) shift 2 ;; # llama-cpp-python has no separate ubatch')
runner_lines.append(' --flash-attn) ARGS+=(--flash_attn true); shift 2 ;;')
runner_lines.append(' --cache-type-k) ARGS+=(--type_k "$2"); shift 2 ;;')
runner_lines.append(' --cache-type-v) ARGS+=(--type_v "$2"); shift 2 ;;')
runner_lines.append(' --n-cpu-moe) ARGS+=(--n_cpu_moe "$2"); shift 2 ;;')
runner_lines.append(' --mmproj) ARGS+=(--clip_model_path "$2"); shift 2 ;;')
runner_lines.append(' --image-max-tokens) shift 2 ;; # native-only')
runner_lines.append(' --no-mmap) ARGS+=(--no_mmap true); shift ;;')
runner_lines.append(' --no-warmup) shift ;; # native-only')
runner_lines.append(' --chat-template) ARGS+=(--chat_format "$2"); shift 2 ;;')
runner_lines.append(' --fit|--split-mode|--tensor-split|--main-gpu|--parallel) shift 2 ;; # native-only')
runner_lines.append(' --mlock) ARGS+=(--use_mlock true); shift ;;')
runner_lines.append(' *) ARGS+=("$1"); shift ;;')
runner_lines.append(' esac')
runner_lines.append('done')
runner_lines.append('exec python3 -m llama_cpp.server "${ARGS[@]}"')
runner_lines.append('_ODY_LLAMA_SHIM_EOF')
runner_lines.append(' chmod +x ~/bin/llama-server')
runner_lines.append(' echo "[odysseus] Created llama-server shim → python -m llama_cpp.server (no native binary needed)"')
runner_lines.append(' fi')
runner_lines.append(' # If the native build failed, fall back to the Python bindings.')
runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then')
runner_lines.append(' echo "llama-server build failed — installing Python bindings as fallback..."')
@@ -1489,6 +1658,96 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(' echo "ERROR: vLLM is not installed."')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append('fi')
runner_lines.append(f"ODYSSEUS_SERVE_CMD='{_bash_squote(req.cmd)}'")
runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ]; then')
runner_lines.append(' ODYSSEUS_VLLM_HELP_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
runner_lines.append('import shlex, sys')
runner_lines.append('parts = shlex.split(sys.argv[1])')
runner_lines.append('try:')
runner_lines.append(' serve_i = parts.index("serve")')
runner_lines.append('except ValueError:')
runner_lines.append(' print("vllm serve --help")')
runner_lines.append('else:')
runner_lines.append(' print(shlex.join(parts[:serve_i + 1] + ["--help"]))')
runner_lines.append('PY')
runner_lines.append(')"')
runner_lines.append(' ODYSSEUS_VLLM_SUPPORTS_SWAP=0')
runner_lines.append(' if eval "$ODYSSEUS_VLLM_HELP_CMD" 2>&1 | grep -q -- "--swap-space"; then ODYSSEUS_VLLM_SUPPORTS_SWAP=1; fi')
runner_lines.append('fi')
runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ] && [ "${ODYSSEUS_VLLM_SUPPORTS_SWAP:-0}" = "1" ] && ! printf "%s" "$ODYSSEUS_SERVE_CMD" | grep -q -- "--swap-space"; then')
runner_lines.append(' echo "[odysseus] Setting vLLM --swap-space 0 so the runtime does not reserve CPU swap per GPU."')
runner_lines.append(' ODYSSEUS_SERVE_CMD="${ODYSSEUS_SERVE_CMD} --swap-space 0"')
runner_lines.append('fi')
runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ] && [ "${ODYSSEUS_VLLM_SUPPORTS_SWAP:-0}" != "1" ]; then')
runner_lines.append(' if printf "%s" "$ODYSSEUS_SERVE_CMD" | grep -q -- "--swap-space"; then')
runner_lines.append(' echo "[odysseus] vLLM serve does not expose --swap-space; removing the flag and patching the runtime default to 0."')
runner_lines.append(' ODYSSEUS_SERVE_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
runner_lines.append('import shlex, sys')
runner_lines.append('parts = shlex.split(sys.argv[1])')
runner_lines.append('out = []')
runner_lines.append('skip = False')
runner_lines.append('for part in parts:')
runner_lines.append(' if skip:')
runner_lines.append(' skip = False')
runner_lines.append(' continue')
runner_lines.append(' if part == "--swap-space":')
runner_lines.append(' skip = True')
runner_lines.append(' continue')
runner_lines.append(' if part.startswith("--swap-space="):')
runner_lines.append(' continue')
runner_lines.append(' out.append(part)')
runner_lines.append('print(shlex.join(out))')
runner_lines.append('PY')
runner_lines.append(')"')
runner_lines.append(' fi')
runner_lines.append(' ODYSSEUS_SERVE_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
runner_lines.append('import shlex, sys')
runner_lines.append('parts = shlex.split(sys.argv[1])')
runner_lines.append('patch = r"""import inspect, sys')
runner_lines.append('from vllm.engine.arg_utils import EngineArgs, AsyncEngineArgs')
runner_lines.append('def _odysseus_swap0(cls):')
runner_lines.append(' params = list(inspect.signature(cls).parameters)')
runner_lines.append(' if "swap_space" not in params:')
runner_lines.append(' return')
runner_lines.append(' idx = params.index("swap_space")')
runner_lines.append(' defaults = list(cls.__init__.__defaults__ or ())')
runner_lines.append(' if idx < len(defaults):')
runner_lines.append(' defaults[idx] = 0')
runner_lines.append(' cls.__init__.__defaults__ = tuple(defaults)')
runner_lines.append(' fields = getattr(cls, "__dataclass_fields__", {})')
runner_lines.append(' if "swap_space" in fields:')
runner_lines.append(' fields["swap_space"].default = 0')
runner_lines.append('_odysseus_swap0(EngineArgs)')
runner_lines.append('_odysseus_swap0(AsyncEngineArgs)')
runner_lines.append('try:')
runner_lines.append(' from vllm.config import CacheConfig')
runner_lines.append(' CacheConfig.swap_space = 0')
runner_lines.append('except Exception:')
runner_lines.append(' pass')
runner_lines.append('_orig_create_engine_config = EngineArgs.create_engine_config')
runner_lines.append('def _odysseus_create_engine_config(self, *args, **kwargs):')
runner_lines.append(' self.swap_space = 0')
runner_lines.append(' return _orig_create_engine_config(self, *args, **kwargs)')
runner_lines.append('EngineArgs.create_engine_config = _odysseus_create_engine_config')
runner_lines.append('AsyncEngineArgs.create_engine_config = _odysseus_create_engine_config')
runner_lines.append('from vllm.entrypoints.cli.main import main')
runner_lines.append('sys.exit(main())"""')
runner_lines.append('try:')
runner_lines.append(' serve_i = parts.index("serve")')
runner_lines.append('except ValueError:')
runner_lines.append(' print(shlex.join(parts))')
runner_lines.append('else:')
runner_lines.append(' exe_i = serve_i - 1')
runner_lines.append(' exe = parts[exe_i] if exe_i >= 0 else "vllm"')
runner_lines.append(' py = "python3"')
runner_lines.append(' if exe.endswith("/bin/vllm"):')
runner_lines.append(' py = exe[:-len("/bin/vllm")] + "/bin/python"')
runner_lines.append(' parts[exe_i:serve_i] = [py, "-c", patch]')
runner_lines.append(' print(shlex.join(parts))')
runner_lines.append('PY')
runner_lines.append(')"')
runner_lines.append(' echo "[odysseus] Patched vLLM internal swap_space default to 0 for this runtime."')
runner_lines.append('fi')
elif "sglang.launch_server" in req.cmd:
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
runner_lines.append('if ! command -v sglang &>/dev/null; then')
@@ -1530,7 +1789,10 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines,
keep_shell_open=not local_windows,
)
runner_lines.append(req.cmd)
if "vllm serve" in req.cmd:
runner_lines.append('eval "$ODYSSEUS_SERVE_CMD"')
else:
runner_lines.append(req.cmd)
if local_windows:
# Detached background process — no interactive shell to keep open.
# Print the exit marker the status poller looks for, then stop.
@@ -1834,6 +2096,25 @@ def setup_cookbook_routes() -> APIRouter:
out, err = await _run_gpu_shell("ls -1 /sys/class/drm 2>/dev/null", host, ssh_port, timeout=4)
if err is not None or not out:
return []
# Pick the runtime label up-front so each GPU dict gets the
# right `backend`. AMD silicon can be driven by ROCm/HIP (native)
# OR Vulkan (mesa RADV). Reporting "rocm" on a host where no
# ROCm toolchain is installed misleads the frontend env-var
# prefix logic — it would emit `HIP_VISIBLE_DEVICES=` for a
# Vulkan-only stack, which is a silent no-op at best.
rt_out, _ = await _run_gpu_shell(
'command -v rocminfo >/dev/null 2>&1 && echo rocm '
'|| (command -v hipconfig >/dev/null 2>&1 && echo rocm) '
'|| (command -v vulkaninfo >/dev/null 2>&1 && echo vulkan) '
'|| echo unknown',
host, ssh_port, timeout=4,
)
_amd_runtime = (rt_out or "").strip().splitlines()[-1:][0].strip() if rt_out else "rocm"
if _amd_runtime not in ("rocm", "vulkan"):
# Default to rocm so existing ROCm-installed hosts keep
# working; "unknown" only happens when neither toolchain is
# detected (e.g. minimal sysfs read on a fresh box).
_amd_runtime = "rocm"
gpus = []
for entry in out.split():
if not entry.startswith("card") or "-" in entry:
@@ -1877,7 +2158,7 @@ def setup_cookbook_routes() -> APIRouter:
"free_mb": free_mb, "total_mb": total_mb, "used_mb": used_mb,
"gtt_used_mb": gtt_used_mb,
"util_pct": 0, "busy": bool(total_mb and (free_mb / total_mb) < 0.85),
"processes": [], "backend": "rocm", "source": "amd-sysfs",
"processes": [], "backend": _amd_runtime, "source": "amd-sysfs",
"unified_memory": unified,
})
if gpus:
@@ -2018,10 +2299,15 @@ def setup_cookbook_routes() -> APIRouter:
amd_gpus = await _probe_amd_sysfs(host, ssh_port)
if amd_gpus:
# The per-GPU dict already carries the runtime label picked by
# _probe_amd_sysfs (rocm vs vulkan); mirror that into the
# wrapper so the frontend can read `data.backend` directly
# without scanning the list.
_amd_wrap_backend = str(amd_gpus[0].get("backend") or "rocm")
return {
"ok": True,
"gpus": amd_gpus,
"backend": "rocm",
"backend": _amd_wrap_backend,
"source": "amd-sysfs",
"fallback_from": "nvidia-smi",
"nvidia_error": nvidia_error,
@@ -2161,6 +2447,17 @@ def setup_cookbook_routes() -> APIRouter:
disk_tasks = on_disk.get("tasks") or [] if isinstance(on_disk, dict) else []
incoming_tasks = data.get("tasks") if isinstance(data.get("tasks"), list) else []
incoming_removed = data.get("removedTasks") if isinstance(data.get("removedTasks"), dict) else {}
disk_removed = on_disk.get("removedTasks") if isinstance(on_disk, dict) and isinstance(on_disk.get("removedTasks"), dict) else {}
removed_tasks = {**disk_removed, **incoming_removed}
data["removedTasks"] = removed_tasks
removed_ids = set(removed_tasks.keys())
if removed_ids:
incoming_tasks = [
t for t in incoming_tasks
if not (isinstance(t, dict) and t.get("sessionId") in removed_ids)
]
data["tasks"] = incoming_tasks
# Anti-poisoning guard: a stale browser tab can keep POSTing a
# download task as status='done' from before the strict-finish
# fix landed, undoing any server-side correction. For each
@@ -2198,6 +2495,8 @@ def setup_cookbook_routes() -> APIRouter:
sid = t.get("sessionId")
if not sid or sid in incoming_ids:
continue # client's version wins
if sid in removed_ids:
continue # intentional cross-device clear/remove
ts = t.get("ts") or 0
if isinstance(ts, (int, float)) and (now_ms - ts) <= RACE_WINDOW_MS:
preserved.append(t)
@@ -2304,16 +2603,14 @@ def setup_cookbook_routes() -> APIRouter:
# Add 30% headroom for KV cache, activations, etc.
needed_vram = (est_vram * 1.3) if est_vram else None
if vram_gb > 0 and needed_vram is not None and needed_vram > vram_gb:
continue
# Unknown-size models (e.g. MiniMax-M2.7, DeepSeek-V4-Flash) have no
# "NB" in the repo id, so the regex above can't extract their
# param count. Previously we dropped them entirely, which made
# brand-new flagship releases silently vanish from this list even
# on rigs with hundreds of GB of VRAM. Adapters/LoRAs are already
# filtered by _is_excluded(), so what falls through here is
# overwhelmingly full models — keep them, just without a size
# badge (the frontend handles needed_vram_gb=null gracefully).
if vram_gb > 0:
if needed_vram is None:
# The "trending models that fit" list must be conservative:
# if we cannot estimate size from the repo id/tags, do not
# present it as runnable on this hardware.
continue
if needed_vram > vram_gb:
continue
out.append({
"repo_id": repo_id,
@@ -2510,6 +2807,33 @@ def setup_cookbook_routes() -> APIRouter:
except Exception as e:
logger.warning(f"orphan sweep: state write failed: {e}")
@router.get("/api/cookbook/hf-gguf-files")
async def hf_gguf_files(repo_id: str, owner: str = Depends(require_user)):
"""List GGUF files in a HuggingFace repo for the direct-download picker."""
import httpx
repo_id = _validate_repo_id(repo_id)
url = f"https://huggingface.co/api/models/{repo_id}"
try:
headers = {}
token = _load_stored_hf_token()
if token:
headers["Authorization"] = f"Bearer {token}"
async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client:
resp = await client.get(url, headers=headers)
if resp.status_code != 200:
return {"ok": False, "files": [], "error": f"HF API HTTP {resp.status_code}"}
data = resp.json()
except Exception:
logger.exception("HF GGUF file scan failed for %s", repo)
return {"ok": False, "files": [], "error": "HF API request failed"}
files = [
str(s.get("rfilename") or "")
for s in data.get("siblings", [])
if str(s.get("rfilename") or "").lower().endswith(".gguf")
]
return {"ok": True, "repo_id": repo_id, "files": files}
# In-memory cache for the Ollama library scrape. ollama.com is a public
# site, but it doesn't expose a stable JSON listing — we fetch the HTML
# search page and regex out the model cards. Cached for 1 h so a busy
+7 -2
View File
@@ -12,6 +12,7 @@ from pydantic import BaseModel
from core.database import Document, DocumentVersion
from core.database import Session as DbSession
from src.auth_helpers import _auth_disabled
from src.upload_handler import UploadHandler
logger = logging.getLogger(__name__)
@@ -78,6 +79,8 @@ def _verify_doc_owner(db, doc: Document, user: str):
the session join for any not-yet-backfilled legacy row.
"""
if user is None:
if _auth_disabled():
return # Single-user / no-auth mode: allow access
raise HTTPException(403, "Authentication required")
if doc.owner is not None:
if doc.owner != user:
@@ -102,8 +105,10 @@ def _owner_session_filter(q, user):
The owner backfill runs in init_db before the app serves requests, so
by the time this filter is live there are no NULL-owner rows to leak;
we therefore match the owner strictly."""
if user is None:
we therefore match the owner strictly for authenticated callers."""
if not user:
if user == "" or _auth_disabled():
return q
return q.filter(False)
return q.filter(Document.owner == user)
+13 -5
View File
@@ -10,7 +10,7 @@ from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File,
from sqlalchemy import case, func, or_
from core.database import SessionLocal, Document, DocumentVersion
from core.database import Session as DbSession
from src.auth_helpers import get_current_user
from src.auth_helpers import get_current_user, _auth_disabled
from src.constants import MAIL_ATTACHMENTS_DIR
logger = logging.getLogger(__name__)
@@ -388,7 +388,8 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
db = SessionLocal()
try:
if not user:
raise HTTPException(403, "Authentication required")
if not _auth_disabled():
raise HTTPException(403, "Authentication required")
# v2 review HIGH-9: raise 403 explicitly when the caller
# can't see this session, instead of returning [] which the
# UI treats identically to "no docs" and silently masks
@@ -503,7 +504,8 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
user = get_current_user(request)
try:
data = await request.json()
except Exception:
except Exception as e:
logger.warning("Failed to parse export request body, defaulting to empty", exc_info=e)
data = {}
ids = data.get("ids") or []
if not ids:
@@ -645,8 +647,8 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
try:
from src.agent_tools.document_tools import clear_active_document
clear_active_document(doc_id)
except Exception:
pass
except Exception as e:
logger.warning("Failed to clear active document %r on detach", doc_id, exc_info=e)
db.commit()
db.refresh(doc)
return _doc_to_dict(doc)
@@ -1331,6 +1333,12 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
if not pdf_path:
raise HTTPException(404, f"Source PDF {upload_id} not found")
# Fail fast with a clear 503 if the optional PyMuPDF dependency
# is missing — fill_fields/stamp_annotations will otherwise
# raise RuntimeError deep inside and bubble out as a 500.
# Mirrors the convention in _load_pdf_viewer_fitz above.
_load_pdf_viewer_fitz()
values = parse_markdown_to_values(doc.current_content or "")
out_path = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False).name
_to_unlink.append(out_path)
+45 -8
View File
@@ -1233,22 +1233,30 @@ def _list_attachments_from_msg(msg):
return attachments
idx = 0
for part in msg.walk():
if part.is_multipart():
continue
cd = str(part.get("Content-Disposition", ""))
ct = part.get_content_type()
is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
if part.is_multipart() and not is_attached_email:
continue
# Skip text/html body parts (only consider real attachments)
if ct in ("text/plain", "text/html") and "attachment" not in cd:
continue
filename = part.get_filename()
if filename:
filename = _decode_header(filename)
if ct == "message/rfc822" and not re.search(r"\.[A-Za-z0-9]{1,8}$", filename):
filename = f"{filename}.eml"
else:
# Inline images, etc. - generate a name
ext = ct.split("/")[-1] if "/" in ct else "bin"
ext = "eml" if ct == "message/rfc822" else (ct.split("/")[-1] if "/" in ct else "bin")
filename = f"attachment_{idx}.{ext}"
payload = part.get_payload(decode=True)
size = len(payload) if payload else 0
if payload is None and ct == "message/rfc822":
try:
payload = part.as_bytes()
except Exception:
payload = b""
size = len(payload) if payload is not None else 0
attachments.append({
"index": idx,
"filename": filename,
@@ -1260,29 +1268,58 @@ def _list_attachments_from_msg(msg):
return attachments
def _is_likely_signature_image_attachment(att: dict) -> bool:
"""Match the reader's inline signature/logo image filter."""
filename = str((att or {}).get("filename") or "").lower()
if not re.search(r"\.(png|jpe?g|gif|bmp|svg|webp)$", filename):
return False
size = int((att or {}).get("size") or 0)
if re.search(r"^image\d{3,}\.(png|jpe?g|gif)$", filename):
return True
if re.search(r"^(signature|logo|sig|footer|banner)[-_\d]*\.(png|jpe?g|gif|svg)$", filename):
return True
return 0 < size < 30 * 1024
def _has_visible_attachments(msg) -> bool:
"""Return True only for attachments the reader will render as chips."""
return any(
not _is_likely_signature_image_attachment(att)
for att in _list_attachments_from_msg(msg)
)
def _extract_attachment_to_disk(msg, index, target_dir):
"""Extract a specific attachment to disk and return the file path."""
if not msg.is_multipart():
return None
idx = 0
for part in msg.walk():
if part.is_multipart():
continue
cd = str(part.get("Content-Disposition", ""))
ct = part.get_content_type()
is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
if part.is_multipart() and not is_attached_email:
continue
if ct in ("text/plain", "text/html") and "attachment" not in cd:
continue
if idx == index:
filename = part.get_filename()
if filename:
filename = _decode_header(filename)
if ct == "message/rfc822" and not re.search(r"\.[A-Za-z0-9]{1,8}$", filename):
filename = f"{filename}.eml"
else:
ext = ct.split("/")[-1] if "/" in ct else "bin"
ext = "eml" if ct == "message/rfc822" else (ct.split("/")[-1] if "/" in ct else "bin")
filename = f"attachment_{idx}.{ext}"
# Sanitize
safe_name = re.sub(r"[^\w\s\-.]", "_", filename).strip()
payload = part.get_payload(decode=True)
if not payload:
if payload is None and ct == "message/rfc822":
try:
payload = part.as_bytes()
except Exception:
payload = b""
if payload is None:
return None
target_dir.mkdir(parents=True, exist_ok=True)
filepath = target_dir / safe_name
+29 -15
View File
@@ -44,6 +44,17 @@ from routes.email_helpers import (
logger = logging.getLogger(__name__)
# Recovers a `[{"action": ...}, ...]` JSON array from raw LLM output when the
# fenced-block strip leaves nothing usable. Runs on model output influenced by
# untrusted email bodies, so it must not backtrack: the object content class is
# `[^{}]` (brace-delimited, greedy) rather than the old `[^[\]]*?` lazy runs,
# which exploded exponentially on inputs like `[{"action"},{` + `}},{{` * N
# (CodeQL py/redos #198).
_CAL_ACTION_ARRAY_RE = re.compile(
r'\[\s*\{[^{}]*"action"[^{}]*\}\s*(?:,\s*\{[^{}]*\}\s*)*\]',
re.DOTALL,
)
def _owner_for_email_account(account_id: str | None) -> str:
if not account_id:
@@ -558,7 +569,7 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
cal_extract = _strip_think(_raw_original)
cal_extract = re.sub(r"^```(?:json)?\s*|\s*```$", "", cal_extract, flags=re.MULTILINE).strip()
if not cal_extract and _raw_original:
matches = list(re.finditer(r'\[\s*\{[^[\]]*?"action"[^[\]]*?\}\s*(?:,\s*\{[^[\]]*?\}\s*)*\]', _raw_original, re.DOTALL))
matches = list(_CAL_ACTION_ARRAY_RE.finditer(_raw_original))
if matches:
cal_extract = matches[-1].group()
logger.info(f"[cal-extract] uid={uid.decode() if isinstance(uid, bytes) else uid} folder={_folder} subj={subject[:50]!r} raw_len={len(cal_extract)} orig_len={len(_raw_original)} raw={cal_extract[:800]!r}")
@@ -683,20 +694,23 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
logger.warning(f"[cal-extract] JSON parse failed: {je} on raw={cal_extract[:200]!r}")
except Exception as e:
logger.warning(f"[cal-extract] Meeting extraction LLM call failed for uid={uid}: {e}")
# Record we processed this email so we don't re-LLM next run
try:
_cc = _sql3.connect(SCHEDULED_DB)
_cc.execute(
"INSERT OR REPLACE INTO email_calendar_extractions "
"(message_id, owner, uid, events_created, created_at) VALUES (?, ?, ?, ?, ?)",
(message_id, account_owner or "", uid.decode() if isinstance(uid, bytes) else str(uid),
_cal_run_count, datetime.utcnow().isoformat())
)
_cc.commit()
_cc.close()
_cal_existing.add(message_id)
except Exception as ce:
logger.debug(f"Could not cache calendar extraction: {ce}")
else:
# Record we processed this email so we don't re-LLM next run.
# Only mark as processed on success ? transient LLM failures
# are retried on the next poll run (matches summary/reply pattern).
try:
_cc = _sql3.connect(SCHEDULED_DB)
_cc.execute(
"INSERT OR REPLACE INTO email_calendar_extractions "
"(message_id, owner, uid, events_created, created_at) VALUES (?, ?, ?, ?, ?)",
(message_id, account_owner or "", uid.decode() if isinstance(uid, bytes) else str(uid),
_cal_run_count, datetime.utcnow().isoformat())
)
_cc.commit()
_cc.close()
_cal_existing.add(message_id)
except Exception as ce:
logger.debug(f"Could not cache calendar extraction: {ce}")
if need_urgent:
try:
+223 -47
View File
@@ -47,7 +47,7 @@ from routes.email_helpers import (
_IMAP_TIMEOUT_SECONDS, _open_imap_connection,
make_oauth_state, verify_oauth_state,
_imap_connect, _imap, _decode_header, _detect_sent_folder, _detect_drafts_folder,
_extract_attachment_text, _list_attachments_from_msg,
_extract_attachment_text, _list_attachments_from_msg, _has_visible_attachments, _is_likely_signature_image_attachment,
_extract_attachment_to_disk, _extract_html, _extract_text,
_fetch_sender_thread_context, _pre_retrieve_context,
_EMAIL_REPLY_SYS_PROMPT_BASE, _POOL_HOOKS,
@@ -61,6 +61,7 @@ from routes.email_pollers import _start_poller
logger = logging.getLogger(__name__)
ODYSSEUS_MAIL_ORIGIN = "odysseus-ui"
EMAIL_READ_ATTACHMENT_VERSION = 2
def _email_tag_owner_aliases(account_id: str | None, owner: str = "") -> list[str]:
@@ -79,15 +80,16 @@ def _email_tag_owner_aliases(account_id: str | None, owner: str = "") -> list[st
cfg.get("smtp_user") or "",
cfg.get("from_address") or "",
])
except Exception:
except Exception as _e:
logger.warning("Failed to resolve email account alias", exc_info=_e)
resolved_account_id = None
row = db.get(_EA, resolved_account_id) if resolved_account_id else None
if row:
aliases.extend([row.owner or "", row.imap_user or "", row.from_address or ""])
finally:
db.close()
except Exception:
pass
except Exception as _e:
logger.warning("Failed to load email aliases", exc_info=_e)
out = []
for a in aliases:
a = (a or "").strip()
@@ -247,6 +249,21 @@ def _imap_uid_fetch(conn, uid_set: str | bytes, query: str):
return conn.uid("FETCH", _uid_bytes(uid_set), query)
def _imap_search_quote(value: str) -> str:
return '"' + str(value or "").replace("\\", "\\\\").replace('"', '\\"') + '"'
def _message_id_chain(*values: str) -> list[str]:
seen = set()
out = []
for value in values:
for mid in re.findall(r"<[^>]+>", value or ""):
if mid not in seen:
seen.add(mid)
out.append(mid)
return out
def _uid_from_fetch_meta(meta_b: bytes) -> str:
m = re.search(rb"\bUID\s+(\d+)\b", meta_b)
return m.group(1).decode() if m else ""
@@ -365,6 +382,21 @@ def _apply_odysseus_headers(msg, kind: str | None = None, ref_id: str | None = N
msg["X-Odysseus-Ref"] = re.sub(r"[^A-Za-z0-9_.:-]", "-", ref_id)[:128]
def _normalize_addr_field(field: str) -> str:
"""Strip the malformed-but-common trailing/leading commas and stray
whitespace from a To/Cc/Bcc string before it lands in the MIME header
or the SMTP envelope. Users often paste a single address with a
trailing comma (e.g. `felix@pewdiepie.com,`) and most MTAs reject the
resulting `To: felix@pewdiepie.com,` line as a syntax error. Collapse
any run of separator junk between addresses too."""
if not field:
return field
# Split on commas, drop empty tokens, rejoin with a single ', '.
parts = [p.strip() for p in field.split(",")]
parts = [p for p in parts if p]
return ", ".join(parts)
def _envelope_recipients(*fields: str) -> list:
"""Extract bare SMTP envelope addresses from one or more To/Cc/Bcc header
strings. A naive `field.split(",")` corrupts display names that contain a
@@ -993,6 +1025,65 @@ def setup_email_routes():
except Exception:
pass
def _related_thread_attachments_sync(
folder: str,
account_id: str | None,
owner: str,
current_uid: str,
current_message_id: str,
in_reply_to: str,
references: str,
limit: int = 12,
) -> list[dict]:
"""Return visible attachments from referenced messages in this folder."""
wanted_ids = _message_id_chain(references, in_reply_to)
current_mid = (current_message_id or "").strip()
wanted_ids = [mid for mid in wanted_ids if mid and mid != current_mid]
if not wanted_ids:
return []
related: list[dict] = []
try:
with _imap(account_id, owner=owner) as conn:
conn.select(_q(folder), readonly=True)
# Search newest referenced messages first; cap work so opening
# a long thread stays bounded.
for mid in reversed(wanted_ids[-10:]):
if len(related) >= limit:
break
status, data = _imap_uid_search(conn, f'(HEADER Message-ID {_imap_search_quote(mid)})')
if status != "OK" or not data or not data[0]:
continue
for uid_b in reversed(data[0].split()[-3:]):
source_uid = uid_b.decode(errors="ignore")
if not source_uid or source_uid == str(current_uid):
continue
st2, msg_data = _imap_uid_fetch(conn, source_uid, "(BODY.PEEK[])")
if st2 != "OK" or not msg_data or not isinstance(msg_data[0], tuple):
continue
msg = email_mod.message_from_bytes(msg_data[0][1])
source_from = _decode_header(msg.get("From", ""))
source_subject = _decode_header(msg.get("Subject", ""))
source_date = msg.get("Date", "")
for att in _list_attachments_from_msg(msg):
if _is_likely_signature_image_attachment(att):
continue
enriched = dict(att)
enriched.update({
"source_uid": source_uid,
"source_folder": folder,
"source_message_id": (msg.get("Message-ID") or "").strip(),
"source_from": source_from,
"source_subject": source_subject,
"source_date": source_date,
})
related.append(enriched)
if len(related) >= limit:
break
except Exception as e:
logger.debug(f"related thread attachment lookup failed uid={current_uid}: {e}")
return related
@router.get("/list")
async def list_emails(
folder: str = Query("INBOX"),
@@ -1263,6 +1354,17 @@ def setup_email_routes():
sender_name, sender_addr = email.utils.parseaddr(sender)
parsed_date = email.utils.parsedate_to_datetime(date_str) if date_str else None
attachments = _list_attachments_from_msg(msg)
related_attachments = []
if not _has_visible_attachments(msg):
related_attachments = _related_thread_attachments_sync(
folder,
account_id,
owner,
uid,
message_id,
in_reply_to,
references,
)
if mark_seen:
# Set \Seen in a separate readwrite session so concurrent reads
@@ -1371,6 +1473,8 @@ def setup_email_routes():
"body": body,
"body_html": body_html,
"attachments": attachments,
"related_attachments": related_attachments,
"attachment_version": EMAIL_READ_ATTACHMENT_VERSION,
"cached_summary": cached_summary,
"cached_ai_reply": cached_ai_reply,
"boundaries": cached_boundaries,
@@ -1401,6 +1505,12 @@ def setup_email_routes():
"""Read email body. Cached for 30m, sync IMAP work runs in a thread."""
ck = _read_cache_key(account_id, folder, uid, owner=owner)
cached = _read_cache_get(ck)
if cached is not None:
# Older cached read responses lack the thread-attachment fallback.
# Fetch once so replies that reference prior attachments can show
# those files without waiting for cache expiry.
if cached.get("attachment_version") != EMAIL_READ_ATTACHMENT_VERSION:
cached = None
if cached is not None:
if mark_seen:
try:
@@ -1535,6 +1645,12 @@ def setup_email_routes():
return {"error": f"Attachment index {index} not found"}
from pathlib import Path as _Path
target_root = os.path.abspath(str(target_dir))
filepath_str = os.path.abspath(str(filepath))
if os.path.commonpath([target_root, filepath_str]) != target_root:
logger.warning("Rejected attachment path outside extraction dir: %s", filepath)
return {"error": "Invalid attachment path"}
filepath = _Path(filepath_str)
base = _Path(filepath).name
if base.startswith("."):
return {"error": "Invalid filename", "filename": base}
@@ -1589,6 +1705,65 @@ def setup_email_routes():
return None
doc_session_id = _resolve_doc_session()
def _create_markdown_doc(content: str, summary: str):
from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
doc_id = str(uuid.uuid4())
ver_id = str(uuid.uuid4())
_db = _SL()
try:
_db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
_db.add(_Doc(
id=doc_id, session_id=doc_session_id, title=title,
language="markdown", current_content=content,
version_count=1, is_active=True,
))
_db.add(_DV(
id=ver_id, document_id=doc_id, version_number=1,
content=content, summary=summary, source="upload",
))
_db.commit()
finally:
_db.close()
_tag_doc_with_source(doc_id)
return doc_id
def _attached_email_markdown(raw_bytes: bytes):
if not raw_bytes:
return f"# Attached email: {base}\n\n_(empty email attachment)_"
try:
attached_msg = email_mod.message_from_bytes(raw_bytes)
except Exception:
logger.exception("Failed to parse attached email %s", base)
return f"# Attached email: {base}\n\nCould not parse this email attachment."
attached_subject = _decode_header(attached_msg.get("Subject", "")) or base
attached_from = _decode_header(attached_msg.get("From", ""))
attached_to = _decode_header(attached_msg.get("To", ""))
attached_cc = _decode_header(attached_msg.get("Cc", ""))
attached_date = attached_msg.get("Date", "")
attached_body = _extract_text(attached_msg).strip()
attached_atts = _list_attachments_from_msg(attached_msg)
lines = [f"# Attached email: {attached_subject}", ""]
if attached_from:
lines.append(f"**From:** {attached_from}")
if attached_to:
lines.append(f"**To:** {attached_to}")
if attached_cc:
lines.append(f"**Cc:** {attached_cc}")
if attached_date:
lines.append(f"**Date:** {attached_date}")
lines.extend(["", "## Body", "", attached_body or "_(no readable body)_"])
if attached_atts:
lines.extend(["", "## Attachments", ""])
for att in attached_atts:
size = int(att.get("size") or 0)
size_label = f"{size} B" if size < 1024 else f"{round(size / 1024)} KB"
name = att.get("filename") or f"attachment_{att.get('index', '')}"
ctype = att.get("content_type") or "application/octet-stream"
lines.append(f"- {name} ({ctype}, {size_label})")
return "\n".join(lines).strip()
# ── PDF path (existing) ────────────────────────────────────
if ext == ".pdf":
import shutil as _shutil
@@ -1635,6 +1810,39 @@ def setup_email_routes():
_tag_doc_with_source(doc_id)
return {"doc_id": doc_id, "filename": filepath.name}
# ── Attached email (.eml / message/rfc822) ────────────────
if ext == ".eml":
def _attachment_bytes_from_msg():
if not msg.is_multipart():
return b""
idx = 0
for part in msg.walk():
cd = str(part.get("Content-Disposition", ""))
ct = part.get_content_type()
is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
if part.is_multipart() and not is_attached_email:
continue
if ct in ("text/plain", "text/html") and "attachment" not in cd:
continue
if idx == index:
payload = part.get_payload(decode=True)
if payload is None and ct == "message/rfc822":
try:
payload = part.as_bytes()
except Exception:
payload = b""
return payload or b""
idx += 1
return b""
try:
content = _attached_email_markdown(_attachment_bytes_from_msg())
except Exception:
logger.exception("Failed to read email attachment %s", base)
return {"error": "Failed to read email attachment", "filename": base}
doc_id = _create_markdown_doc(content, "Imported attached email")
return {"doc_id": doc_id, "filename": filepath.name}
# ── DOCX path: extract text → markdown document ───────────
if ext == ".docx":
try:
@@ -1672,25 +1880,7 @@ def setup_email_routes():
lines.append("")
content = "\n".join(lines).strip() or f"_(empty {base})_"
from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
doc_id = str(uuid.uuid4())
ver_id = str(uuid.uuid4())
_db = _SL()
try:
_db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
_db.add(_Doc(
id=doc_id, session_id=doc_session_id, title=title,
language="markdown", current_content=content,
version_count=1, is_active=True,
))
_db.add(_DV(
id=ver_id, document_id=doc_id, version_number=1,
content=content, summary="Imported from DOCX", source="upload",
))
_db.commit()
finally:
_db.close()
_tag_doc_with_source(doc_id)
doc_id = _create_markdown_doc(content, "Imported from DOCX")
return {"doc_id": doc_id, "filename": filepath.name}
# ── Plain text / markdown ────────────────────────────────
@@ -1699,25 +1889,7 @@ def setup_email_routes():
content = filepath.read_text(encoding="utf-8", errors="replace")
except Exception as e:
return {"error": f"Failed to read text file: {e}", "filename": base}
from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
doc_id = str(uuid.uuid4())
ver_id = str(uuid.uuid4())
_db = _SL()
try:
_db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
_db.add(_Doc(
id=doc_id, session_id=doc_session_id, title=title,
language="markdown", current_content=content,
version_count=1, is_active=True,
))
_db.add(_DV(
id=ver_id, document_id=doc_id, version_number=1,
content=content, summary="Imported from email attachment", source="upload",
))
_db.commit()
finally:
_db.close()
_tag_doc_with_source(doc_id)
doc_id = _create_markdown_doc(content, "Imported from email attachment")
return {"doc_id": doc_id, "filename": filepath.name}
return {"error": f"Unsupported attachment type: {ext}", "filename": base}
@@ -2026,6 +2198,9 @@ def setup_email_routes():
outer = MIMEMultipart("alternative")
body_container = outer
to = _normalize_addr_field(to or "")
cc = _normalize_addr_field(cc or "")
bcc = _normalize_addr_field(bcc or "")
outer["From"] = email.utils.formataddr((cfg.get("display_name") or "", cfg["from_address"]))
outer["To"] = to
if cc:
@@ -2170,12 +2345,10 @@ def setup_email_routes():
try:
conn = sqlite3.connect(SCHEDULED_DB)
conn.row_factory = sqlite3.Row
# The MCP server can't easily set owner, so it stores '' — fall
# back to those rows in addition to the caller's owner.
rows = conn.execute(
"""SELECT id, to_addr, subject, body, created_at, account_id
FROM scheduled_emails
WHERE status = 'agent_draft' AND (owner = ? OR owner = '')
WHERE status = 'agent_draft' AND owner = ?
ORDER BY created_at DESC""",
(owner or "",),
).fetchall()
@@ -2196,7 +2369,7 @@ def setup_email_routes():
cur = conn.execute(
"""UPDATE scheduled_emails
SET status = 'pending', send_at = ?
WHERE id = ? AND status = 'agent_draft' AND (owner = ? OR owner = '')""",
WHERE id = ? AND status = 'agent_draft' AND owner = ?""",
(datetime.utcnow().isoformat(), sid, owner or ""),
)
conn.commit()
@@ -2217,7 +2390,7 @@ def setup_email_routes():
conn = sqlite3.connect(SCHEDULED_DB)
cur = conn.execute(
"""UPDATE scheduled_emails SET status = 'cancelled'
WHERE id = ? AND status = 'agent_draft' AND (owner = ? OR owner = '')""",
WHERE id = ? AND status = 'agent_draft' AND owner = ?""",
(sid, owner or ""),
)
conn.commit()
@@ -2303,6 +2476,9 @@ def setup_email_routes():
outer = MIMEMultipart("alternative")
body_container = outer
req.to = _normalize_addr_field(req.to or "")
req.cc = _normalize_addr_field(req.cc or "")
req.bcc = _normalize_addr_field(req.bcc or "")
outer["From"] = email.utils.formataddr((cfg.get("display_name") or "", cfg["from_address"]))
outer["To"] = req.to
if req.cc:
+1
View File
@@ -9,6 +9,7 @@ from pathlib import Path
from fastapi import APIRouter, HTTPException, Form, Depends
from core.constants import EMBEDDING_ENDPOINT_FILE, FASTEMBED_CACHE_DIR
from core.middleware import require_admin
from src.runtime_paths import get_app_root
logger = logging.getLogger(__name__)
-8
View File
@@ -67,14 +67,6 @@ def _gallery_image_path(filename: str) -> Path:
raise HTTPException(400, "Unsafe gallery filename")
if safe_name != original:
raise HTTPException(400, "Unsafe gallery filename")
if not path.exists():
cwd_root = (Path.cwd() / "data" / "generated_images").resolve()
cwd_path = (cwd_root / safe_name).resolve()
try:
if os.path.commonpath([str(cwd_root), str(cwd_path)]) == str(cwd_root) and cwd_path.exists():
return cwd_path
except Exception:
pass
return path
+111 -4
View File
@@ -1,8 +1,13 @@
import json
import os
import re
import shlex
import subprocess
from copy import deepcopy
from fastapi import APIRouter, HTTPException
from core.platform_compat import run_ssh_command
from routes._validators import validate_remote_host, validate_ssh_port
@@ -107,6 +112,73 @@ def _apply_manual_hardware(system, manual_mode="", manual_gpu_count="", manual_v
return system
def _run_model_probe(host: str, ssh_port: str, cmd: str) -> str:
try:
if host:
r = run_ssh_command(
host,
ssh_port or None,
cmd,
timeout=15,
connect_timeout=5,
strict_host_key_checking=False,
text=True,
)
else:
r = subprocess.run(["bash", "-lc", cmd], capture_output=True, text=True, timeout=15)
if r.returncode == 0:
return (r.stdout or "").strip()
except Exception:
return ""
return ""
def _inspect_model_path(model_path: str, host: str = "", ssh_port: str = "") -> dict:
"""Read lightweight metadata from a local or SSH-visible HF model folder."""
path = (model_path or "").strip()
if not path or path.startswith(("http://", "https://")):
return {}
if not (path.startswith("/") or path.startswith("~")):
return {}
qpath = shlex.quote(path)
qconfig = shlex.quote(os.path.join(path, "config.json"))
out = {}
exists = _run_model_probe(host, ssh_port, f"test -d {qpath} && printf found || printf missing")
if exists != "found":
target = host or "local container"
out["model_probe_error"] = f"Model path is not visible on {target}: {path}"
return out
raw_config = _run_model_probe(host, ssh_port, f"test -f {qconfig} && sed -n '1,240p' {qconfig}")
if raw_config:
try:
cfg = json.loads(raw_config)
except Exception:
cfg = {}
for key in ("context_length", "max_position_embeddings", "n_ctx_train", "model_max_length", "max_seq_len"):
value = cfg.get(key)
if isinstance(value, (int, float)) and value > 0:
out["model_ctx_max"] = int(value)
break
else:
out["model_probe_error"] = f"config.json not found in model path: {path}"
size_cmd = (
f"find {qpath} -type f \\( -name '*.safetensors' -o -name '*.bin' -o -name '*.gguf' \\) "
"-printf '%s\\n' 2>/dev/null | awk '{s+=$1} END {if (s>0) printf \"%.6f\", s/1073741824}'"
)
weights = _run_model_probe(host, ssh_port, size_cmd)
try:
weights_gb = float(weights)
except Exception:
weights_gb = 0.0
if weights_gb > 0:
out["model_weights_gb"] = round(weights_gb, 3)
elif "model_probe_error" not in out:
out["model_probe_error"] = f"No model weight files found in: {path}"
return out
def setup_hwfit_routes():
router = APIRouter(prefix="/api/hwfit", tags=["hwfit"])
@@ -235,7 +307,7 @@ def setup_hwfit_routes():
return {"system": system, "models": results}
@router.get("/profiles")
def get_serve_profiles(model: str = "", host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, serve_weights_gb: float = 0.0, serve_quant: str = ""):
def get_serve_profiles(model: str = "", model_path: str = "", host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, serve_weights_gb: float = 0.0, serve_quant: str = ""):
"""Compute llama.cpp serve profiles (Quality/Balanced/Speed) for `model`
against the detected hardware on `host` (or local). Returns concrete
flags (n_gpu_layers, n_cpu_moe, cache_type, ctx) the serve UI can apply.
@@ -260,8 +332,23 @@ def setup_hwfit_routes():
# "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct".
s = (s or "").lower().strip()
s = s.split("/")[-1] # drop org prefix
s = re.sub(r"[-_.]?gguf$", "", s) # drop trailing gguf marker
s = re.sub(r"[-_.](q\d[^/]*|iq\d[^/]*|fp8|bf16|f16|awq[^/]*|gptq[^/]*)$", "", s)
for suffix in ("-gguf", "_gguf", ".gguf", "gguf"):
if s.endswith(suffix):
s = s[: -len(suffix)]
break
cut_at = None
for idx, ch in enumerate(s):
if ch not in "-_." or idx + 1 >= len(s):
continue
suffix = s[idx + 1:]
if (
suffix in {"fp8", "bf16", "f16"}
or suffix.startswith(("awq", "gptq", "iq"))
or (suffix.startswith("q") and len(suffix) > 1 and suffix[1].isdigit())
):
cut_at = idx
if cut_at is not None:
s = s[:cut_at]
return s
m = catalog.get(model)
@@ -272,8 +359,16 @@ def setup_hwfit_routes():
if nn and (nn == want or want.endswith(nn) or nn.endswith(want)):
m = entry
break
path_meta = _inspect_model_path(model_path or model, host=host, ssh_port=ssh_port)
if m is None:
return {"system": system, "profiles": [], "error": "model not in catalog"}
return {
"system": system,
"profiles": [],
"error": "model not in catalog",
"model_ctx_max": int(path_meta.get("model_ctx_max") or 0),
"model_weights_gb": float(path_meta.get("model_weights_gb") or 0),
"model_probe_error": path_meta.get("model_probe_error") or "",
}
# Surface the model's trained context limit so the serve UI can clamp a
# user-typed context down to it (asking for ctx > n_ctx_train overflows
# and, with a quantized KV cache, can crash the GPU).
@@ -283,6 +378,16 @@ def setup_hwfit_routes():
if isinstance(v, (int, float)) and v > 0:
model_ctx_max = int(v)
break
path_ctx_max = int(path_meta.get("model_ctx_max") or 0)
if path_ctx_max > 0:
model_ctx_max = max(model_ctx_max, path_ctx_max)
model_weights_gb = float(path_meta.get("model_weights_gb") or 0)
if model_weights_gb <= 0:
for k in ("min_vram_gb", "required_gb", "size_gb", "recommended_ram_gb", "min_ram_gb"):
v = m.get(k)
if isinstance(v, (int, float)) and v > 0:
model_weights_gb = float(v)
break
return {
"system": system,
"profiles": compute_serve_profiles(
@@ -291,6 +396,8 @@ def setup_hwfit_routes():
serve_quant=(serve_quant or None),
),
"model_ctx_max": model_ctx_max,
"model_weights_gb": model_weights_gb,
"model_probe_error": path_meta.get("model_probe_error") or "",
}
@router.get("/image-models")
+33 -58
View File
@@ -273,65 +273,30 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
async def api_audit_memories(request: Request, session: str = Form(None)):
"""Deduplicate and consolidate memories via LLM.
Uses the default model from settings, or falls back to a session's model.
Uses task/utility/default settings through the shared resolver, with
the active session as fallback when no task or utility model is set.
Returns before and after memory counts.
"""
from routes.model_routes import _load_settings, _normalize_base, build_chat_url
from core.database import ModelEndpoint
import json as _json
endpoint_url = model = None
headers = {}
# Try utility model from settings first — memory audit is a background
# task and should prefer the lighter utility model over the main chat model.
from src.task_endpoint import resolve_task_endpoint
user = _owner(request)
t_url, t_model, t_headers = resolve_task_endpoint(owner=user)
if t_url and t_model:
endpoint_url, model, headers = t_url, t_model, t_headers
else:
# Fall back to default model if no task/utility model configured
settings = _load_settings()
ep_id = settings.get("default_endpoint_id", "")
default_model = settings.get("default_model", "")
if ep_id:
db = SessionLocal()
try:
ep = db.query(ModelEndpoint).filter(
ModelEndpoint.id == ep_id, ModelEndpoint.is_enabled == True
).first()
if ep:
base = _normalize_base(ep.base_url)
endpoint_url = build_chat_url(base)
model = default_model
if not model and ep.models:
try:
models = _json.loads(ep.models) if isinstance(ep.models, str) else ep.models
if models:
model = models[0]
except Exception:
pass
if ep.api_key:
headers = {"Authorization": f"Bearer {ep.api_key}"}
finally:
db.close()
fallback_url = fallback_model = None
fallback_headers = None
if session:
try:
sess = session_manager.get_session(session)
_assert_session_owner(sess, user)
fallback_url = sess.endpoint_url
fallback_model = sess.model
fallback_headers = sess.headers
except KeyError:
pass
# Fall back to session model if no default configured
if not endpoint_url and session:
try:
sess = session_manager.get_session(session)
_assert_session_owner(sess, _owner(request))
endpoint_url = sess.endpoint_url
model = sess.model
headers = sess.headers
except KeyError:
pass
endpoint_url, model, headers = resolve_task_endpoint(
fallback_url, fallback_model, fallback_headers, owner=user
)
if not endpoint_url or not model:
raise HTTPException(400, "No default model configured — set one in Settings")
user = _owner(request)
result = await audit_memories(
memory_manager,
memory_vector,
@@ -369,18 +334,28 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
model = None
headers = {}
user = _owner(request)
if session:
try:
sess = session_manager.get_session(session)
_assert_session_owner(sess, _owner(request))
endpoint_url, model, headers = resolve_task_endpoint(
sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
)
_assert_session_owner(sess, user)
except KeyError:
logger.warning("Session %s not found, falling back to utility endpoint", session)
endpoint_url, model, headers = resolve_endpoint("utility", owner=_owner(request))
sess = None
except HTTPException as exc:
if exc.status_code != 404:
raise
sess = None
if sess is None:
logger.warning("Session %s not found or inaccessible, falling back to utility endpoint", session)
endpoint_url, model, headers = resolve_endpoint("utility", owner=user)
else:
endpoint_url, model, headers = resolve_task_endpoint(
sess.endpoint_url, sess.model, sess.headers, owner=user
)
else:
endpoint_url, model, headers = resolve_task_endpoint(owner=_owner(request))
endpoint_url, model, headers = resolve_task_endpoint(owner=user)
if not endpoint_url or not model:
raise HTTPException(400, "No LLM model configured. Set a default model in Settings.")
+149 -31
View File
@@ -5,6 +5,7 @@ import re
import uuid
import json
import hashlib
import ipaddress
import socket
import time as _time
import logging
@@ -16,6 +17,7 @@ from fastapi import APIRouter, HTTPException, Form, Query, Body, Request, Respon
from pydantic import BaseModel
from fastapi.responses import StreamingResponse
from core.database import SessionLocal, ModelEndpoint, Session as DbSession
from core.log_safety import redact_url as _redact_url_for_log
from core.middleware import require_admin
from src.llm_core import _detect_provider, _host_match, ANTHROPIC_MODELS
from src.tls_overrides import llm_verify
@@ -405,8 +407,11 @@ def _endpoint_refresh_timeout(ep: Any, category: str) -> float:
except Exception:
val = 0
if val > 0:
return float(max(1, min(30, val)))
return 2.5 if category == "local" else 2.0
return float(max(1, min(60, val)))
# llama.cpp and other local OpenAI-compatible servers can block briefly
# while warming/loading. A 2s local timeout makes working endpoints flicker
# offline before /v1/models is ready.
return 10.0 if category == "local" else 2.0
def _manual_refresh_timeout(ep: Any, category: str, requested: Any = None) -> float:
@@ -473,7 +478,7 @@ def _explicit_model_list_timeout(base_url: str, endpoint_kind: str = "auto", req
category = _classify_endpoint(base_url, kind)
if kind in ("api", "proxy") or category == "api":
return 30.0
return 3.0 if _is_ollama_base(base_url) else 2.0
return 15.0 if category == "local" else (3.0 if _is_ollama_base(base_url) else 2.0)
def _cached_model_ids(ep: Any) -> List[str]:
@@ -518,6 +523,10 @@ _NON_CHAT_EXACT_PREFIXES = (
def _is_chat_model(model_id: str) -> bool:
"""Return True if the model ID looks like a chat/completions-capable model."""
if not isinstance(model_id, str):
# Non-compliant upstreams can return non-string IDs (e.g. int/None);
# treat them as chat-capable rather than crashing on .lower().
return True
mid = model_id.lower()
for prefix in _NON_CHAT_PREFIXES:
if mid.startswith(prefix):
@@ -562,6 +571,8 @@ def _safe_build_models_url(base_url: str) -> str:
"""Build a /models URL without letting optional provider imports break probes."""
try:
return build_models_url(base_url)
except ValueError:
raise
except Exception as exc:
logger.debug("Model URL detection failed for %s: %s", base_url, exc)
return f"{(base_url or '').rstrip('/')}/models"
@@ -633,7 +644,7 @@ def _probe_single_model(base: str, api_key: str, model_id: str, timeout: int = 1
try:
t0 = _time.time()
r = httpx.post(target_url, headers=h, json=payload, timeout=timeout)
r = httpx.post(target_url, headers=h, json=payload, timeout=timeout, verify=llm_verify())
latency = round((_time.time() - t0) * 1000)
if r.is_success:
return {"status": "ok", "latency_ms": latency}
@@ -659,13 +670,20 @@ def _probe_single_model(base: str, api_key: str, model_id: str, timeout: int = 1
# Hostnames / IP prefixes that indicate a local endpoint
_LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1"}
_PRIVATE_PREFIXES = ("10.", "172.16.", "172.17.", "172.18.", "172.19.",
"172.20.", "172.21.", "172.22.", "172.23.", "172.24.",
"172.25.", "172.26.", "172.27.", "172.28.", "172.29.",
"172.30.", "172.31.", "192.168.")
_PRIVATE_NETWORKS = (
ipaddress.ip_network("10.0.0.0/8"),
ipaddress.ip_network("172.16.0.0/12"),
ipaddress.ip_network("192.168.0.0/16"),
)
_TAILSCALE_CGNAT = ipaddress.ip_network("100.64.0.0/10")
_TAILSCALE_RE = re.compile(r"^100\.(6[4-9]|[7-9]\d|1[01]\d|12[0-7])\.")
def _local_ip_literal(host: str) -> bool:
try:
ip = ipaddress.ip_address(host)
except ValueError:
return False
return any(ip in network for network in _PRIVATE_NETWORKS) or ip in _TAILSCALE_CGNAT
def _classify_endpoint(base_url: str, endpoint_kind: str = "auto") -> str:
@@ -679,9 +697,7 @@ def _classify_endpoint(base_url: str, endpoint_kind: str = "auto") -> str:
return "api"
try:
host = urlparse(base_url).hostname or ""
if host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES):
return "local"
if _TAILSCALE_RE.match(host):
if host in _LOCAL_HOSTS or _local_ip_literal(host):
return "local"
except Exception:
pass
@@ -703,6 +719,44 @@ def _effective_endpoint_kind(ep: Any, base_url: str) -> str:
return "auto"
def _is_loading_model_response(resp: Any) -> bool:
if getattr(resp, "status_code", None) != 503:
return False
try:
body = resp.text or ""
except Exception:
body = ""
return "loading model" in body.lower()
def _openai_model_ids(data: Any) -> List[str]:
"""Extract OpenAI-style model IDs (``{"data": [{"id": ...}]}``).
Tolerates a non-dict body and non-string IDs from non-compliant upstreams,
returning only non-empty string IDs.
"""
items = data.get("data") if isinstance(data, dict) else None
return [m["id"] for m in (items or [])
if isinstance(m, dict) and isinstance(m.get("id"), str) and m["id"]]
def _ollama_model_names(data: Any) -> List[str]:
"""Extract native-Ollama model names (``{"models": [{"name"|"model": ...}]}``).
Same tolerance as :func:`_openai_model_ids`: a non-dict body or non-string
value is skipped rather than crashing, preserving name-then-model precedence.
"""
items = data.get("models") if isinstance(data, dict) else None
out: List[str] = []
for m in (items or []):
if not isinstance(m, dict):
continue
v = m.get("name") or m.get("model")
if isinstance(v, str) and v:
out.append(v)
return out
def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> List[str]:
"""Probe a base URL's /models endpoint and return list of model IDs.
@@ -726,7 +780,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify())
r.raise_for_status()
data = r.json()
models = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
models = _openai_model_ids(data)
if models:
return models
except httpx.HTTPStatusError as e:
@@ -748,10 +802,10 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
r.raise_for_status()
data = r.json()
# OpenAI format: {"data": [{"id": "model-name"}]}
models = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
models = _openai_model_ids(data)
# Ollama format: {"models": [{"name": "model-name"}]}
if not models:
models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")]
models = _ollama_model_names(data)
if models:
# Z.AI coding plan omits some working models from /models;
# append curated-only entries for that endpoint only.
@@ -767,16 +821,19 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
models.append(_e)
return [m for m in models if _is_chat_model(m)]
except httpx.HTTPStatusError as e:
if e.response is not None and _is_loading_model_response(e.response):
logger.info("Endpoint still loading model at %s", _redact_url_for_log(url))
return []
if api_key:
status = e.response.status_code if e.response is not None else "unknown"
logger.warning(f"Failed to probe {url} with API key: HTTP {status}")
logger.warning("Failed to probe %s with API key: HTTP %s", _redact_url_for_log(url), status)
return []
logger.warning(f"Failed to probe {url}: {e}")
logger.warning("Failed to probe %s: %s", _redact_url_for_log(url), e)
except Exception as e:
if api_key:
logger.warning(f"Failed to probe {url} with API key: {e}")
logger.warning("Failed to probe %s with API key: %s", _redact_url_for_log(url), e)
return []
logger.warning(f"Failed to probe {url}: {e}")
logger.warning("Failed to probe %s: %s", _redact_url_for_log(url), e)
# Older Ollama builds and some proxies expose native /api/tags even when
# the OpenAI-compatible /v1/models path is unavailable.
@@ -787,7 +844,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
r = httpx.get(root + "/api/tags", timeout=timeout, verify=llm_verify())
r.raise_for_status()
data = r.json()
models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")]
models = _ollama_model_names(data)
if models:
return [m for m in models if _is_chat_model(m)]
except Exception as e:
@@ -816,6 +873,15 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
or "ollama" in (parsed_base.hostname or "").lower()
)
def _is_loading_model_response(r) -> bool:
if getattr(r, "status_code", None) != 503:
return False
try:
body = r.text or ""
except Exception:
body = ""
return "loading model" in body.lower()
def _result_from_response(r) -> Dict[str, Any]:
if 300 <= r.status_code < 400:
loc = r.headers.get("location", "")
@@ -832,6 +898,13 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
"status_code": r.status_code,
"error": None,
}
if _is_loading_model_response(r):
return {
"reachable": True,
"loading": True,
"status_code": r.status_code,
"error": "Loading model",
}
return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code}"}
last_error: Optional[str] = None
@@ -864,7 +937,7 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
if 400 <= sc < 500 and sc not in (401, 403):
models_url = _safe_build_models_url(base)
try:
r2 = httpx.get(models_url, headers=headers, timeout=timeout, verify=llm_verify())
r2 = httpx.get(models_url, headers=headers,timeout=timeout, verify=llm_verify())
result2 = _result_from_response(r2)
if result2["reachable"]:
return result2
@@ -1048,9 +1121,11 @@ def setup_model_routes(model_discovery):
except Exception:
return 0.0
def _failure_delay(fails: int) -> float:
def _failure_delay(fails: int, *, empty_local: bool = False) -> float:
if fails <= 0:
return 0.0
if empty_local:
return min(5.0 * (2 ** max(0, fails - 1)), 30.0)
return min(_REFRESH_FAILURE_BASE * (2 ** max(0, fails - 1)), _REFRESH_FAILURE_MAX)
def _should_refresh_endpoint(ep: Any, now: float, force: bool = False) -> tuple[bool, Dict[str, Any]]:
@@ -1081,7 +1156,12 @@ def setup_model_routes(model_discovery):
fails = int(state.get("fail_count") or 0)
if fails and not force:
last_failure = float(state.get("last_failure") or 0.0)
if now - last_failure < _failure_delay(fails):
empty_local = (
not cached
and category == "local"
and str(getattr(ep, "id", "") or "").startswith("local-")
)
if now - last_failure < _failure_delay(fails, empty_local=empty_local):
return False, info
if cached and not force:
interval = _endpoint_refresh_interval(ep, category)
@@ -1396,7 +1476,7 @@ def setup_model_routes(model_discovery):
t0 = _time.time()
ping = _ping_endpoint(base, ep.api_key, timeout=1.5)
entry["latency_ms"] = round((_time.time() - t0) * 1000)
entry["status"] = "online" if ping.get("reachable") or cached_count else "offline"
entry["status"] = "loading" if ping.get("loading") else ("online" if ping.get("reachable") or cached_count else "offline")
entry["error"] = ping.get("error")
entry["model_count"] = cached_count or (len(ANTHROPIC_MODELS) if provider == "anthropic" else 0)
except Exception as e:
@@ -1570,9 +1650,37 @@ def setup_model_routes(model_discovery):
# "everything's already cached" path because this branch only
# runs for endpoints with an empty cached_models.
if not all_models and not pinned and r.is_enabled:
ping = _ping_endpoint(r.base_url, r.api_key, timeout=3.5)
base_for_ping = _normalize_base(r.base_url)
kind_for_ping = _effective_endpoint_kind(r, base_for_ping)
ping_timeout = 10.0 if _classify_endpoint(base_for_ping, kind_for_ping) == "local" else 3.5
ping = _ping_endpoint(r.base_url, r.api_key, timeout=ping_timeout)
if ping.get("reachable"):
status = "empty"
status = "loading" if ping.get("loading") else "empty"
if ping.get("loading"):
base = _normalize_base(r.base_url)
kind = _effective_endpoint_kind(r, base)
results.append({
"id": r.id,
"name": r.name,
"base_url": r.base_url,
"has_key": bool(r.api_key),
"api_key_fingerprint": _api_key_fingerprint(r.api_key),
"is_enabled": r.is_enabled,
"models": visible,
"pinned_models": pinned,
"hidden_count": len(hidden),
"online": True,
"status": status,
"ping_error": (ping or {}).get("error") if ping else None,
"model_type": getattr(r, "model_type", None) or "llm",
"supports_tools": getattr(r, "supports_tools", None),
"endpoint_kind": kind,
"category": _classify_endpoint(base, kind),
"model_refresh_mode": _endpoint_refresh_mode(r, kind),
"model_refresh_interval": getattr(r, "model_refresh_interval", None),
"model_refresh_timeout": getattr(r, "model_refresh_timeout", None),
})
continue
# Best-effort: if the probe came back reachable, try
# to populate cached_models in the background so the
# NEXT picker load shows "online" instead of "empty".
@@ -1580,7 +1688,7 @@ def setup_model_routes(model_discovery):
# "empty" status, and the existing background refresh
# path will eventually fill it in too.
try:
probed = _probe_endpoint(r.base_url, r.api_key, timeout=5)
probed = _probe_endpoint(r.base_url, r.api_key, timeout=max(5, int(ping_timeout)))
if probed:
r.cached_models = json.dumps(probed)
db.commit()
@@ -1758,7 +1866,7 @@ def setup_model_routes(model_discovery):
model_ids = _probe_endpoint(base_url, api_key.strip() or None, timeout=explicit_timeout) if should_probe else []
ping = {"reachable": False, "error": None}
if (should_probe or requested_kind in ("api", "proxy")) and not model_ids:
ping = _ping_endpoint(base_url, api_key.strip() or None, timeout=min(explicit_timeout, 2.0))
ping = _ping_endpoint(base_url, api_key.strip() or None, timeout=min(explicit_timeout, 10.0))
if require_model_list and not model_ids:
raise HTTPException(400, _model_endpoint_error_message(base_url, ping))
@@ -1825,7 +1933,7 @@ def setup_model_routes(model_discovery):
"models": _merge_model_ids(model_ids, _pinned),
"pinned_models": _pinned,
"online": bool(model_ids) or bool(_pinned) or bool(ping.get("reachable")),
"status": "online" if (model_ids or _pinned) else ("empty" if ping.get("reachable") else "offline"),
"status": "online" if (model_ids or _pinned) else ("loading" if ping.get("loading") else ("empty" if ping.get("reachable") else "offline")),
"ping_error": ping.get("error") if ping else None,
"endpoint_kind": requested_kind,
"category": _classify_endpoint(base_url, requested_kind),
@@ -1850,11 +1958,11 @@ def setup_model_routes(model_discovery):
configured_timeout = _parse_positive_int(model_refresh_timeout, minimum=1, maximum=60)
probe_timeout = _explicit_model_list_timeout(base_url, requested_kind, configured_timeout)
models = _probe_endpoint(base_url, api_key.strip() or None, timeout=probe_timeout)
ping = {"reachable": True, "error": None} if models else _ping_endpoint(base_url, api_key.strip() or None, timeout=min(probe_timeout, 2.0))
ping = {"reachable": True, "error": None} if models else _ping_endpoint(base_url, api_key.strip() or None, timeout=min(probe_timeout, 10.0))
return {
"base_url": base_url,
"online": bool(models) or bool(ping.get("reachable")),
"status": "online" if models else ("empty" if ping.get("reachable") else "offline"),
"status": "online" if models else ("loading" if ping.get("loading") else ("empty" if ping.get("reachable") else "offline")),
"ping_error": ping.get("error") if ping else None,
"models": models,
"count": len(models),
@@ -2032,6 +2140,16 @@ def setup_model_routes(model_discovery):
ep_id = (_user_prefs.get("default_endpoint_id") or "").strip()
model = (_user_prefs.get("default_model") or "").strip()
_fallbacks = _user_prefs.get("default_model_fallbacks") or []
# If user has no personal default, fall back to global default
# But only based on the "share_defaults_with_users" flag
# (only if share_defaults_with_users is enabled)
if settings.get("share_defaults_with_users", False):
if not ep_id:
ep_id = settings.get("default_endpoint_id", "")
if not model:
model = settings.get("default_model", "")
if not _fallbacks:
_fallbacks = settings.get("default_model_fallbacks") or []
else:
ep_id = settings.get("default_endpoint_id", "")
model = settings.get("default_model", "")
+7 -5
View File
@@ -10,6 +10,7 @@ from fastapi import APIRouter, HTTPException, Request
from pydantic import BaseModel
from core.database import SessionLocal, Note
from core.middleware import INTERNAL_TOOL_USER
from src.auth_helpers import require_user
from src.constants import DATA_DIR
from sqlalchemy.orm.attributes import flag_modified
@@ -334,10 +335,11 @@ async def dispatch_reminder(
# Loud diagnostic so we can see WHY a reminder didn't send (the
# previous "silently no-op when cfg has no smtp_host" was invisible).
logger.info(
f"dispatch_reminder[email] note_id={note_id} owner={owner!r} "
f"smtp_host={cfg.get('smtp_host')!r} smtp_user={cfg.get('smtp_user')!r} "
f"from={from_addr!r} recipient={recipient!r} "
f"account_name={cfg.get('account_name')!r}"
"dispatch_reminder[email] note_id=%s owner=%r "
"has_smtp_host=%s has_smtp_user=%s has_from=%s has_recipient=%s",
note_id, owner,
bool(cfg.get("smtp_host")), bool(cfg.get("smtp_user")),
bool(from_addr), bool(recipient),
)
missing = []
if not cfg.get("smtp_host"):
@@ -582,7 +584,7 @@ def setup_note_routes(task_scheduler=None):
return require_user(request) or None
def _is_admin_or_single_user(request: Request, user: str | None) -> bool:
if user == "internal-tool":
if user == INTERNAL_TOOL_USER:
return True
if not user:
# require_user() already admitted this request, which only happens
+90 -5
View File
@@ -2,8 +2,9 @@
"""Routes for personal documents management."""
import os
import logging
import shutil
import uuid
from typing import List, Tuple
from typing import Any, Dict, List, Tuple
from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Depends
from src.request_models import DirectoryRequest
from core.constants import BASE_DIR, PERSONAL_DIR, PERSONAL_UPLOADS_DIR
@@ -18,14 +19,15 @@ UPLOADS_DIR = PERSONAL_UPLOADS_DIR
logger = logging.getLogger(__name__)
def _personal_upload_dir_for_owner(owner: str | None) -> str:
def _personal_upload_dir_for_owner(owner: str | None, *, create: bool = True) -> str:
"""Return the per-owner upload directory used for direct RAG uploads."""
owner_segment = secure_filename((owner or "local").strip())[:80] or "local"
upload_dir = os.path.abspath(os.path.join(UPLOADS_DIR, owner_segment))
base_abs = os.path.abspath(UPLOADS_DIR)
if os.path.commonpath([upload_dir, base_abs]) != base_abs:
raise ValueError("Unsafe upload owner path")
os.makedirs(upload_dir, exist_ok=True)
if create:
os.makedirs(upload_dir, exist_ok=True)
return upload_dir
@@ -44,6 +46,87 @@ def _unique_personal_upload_path(upload_dir: str, original_name: str | None) ->
raise ValueError("Unsafe upload filename")
return file_path, filename, safe_name
def _unique_existing_target(path: str) -> str:
"""Return a non-existing sibling path for rename collision handling."""
if not os.path.exists(path):
return path
stem, ext = os.path.splitext(path)
while True:
candidate = f"{stem}-{uuid.uuid4().hex[:10]}{ext}"
if not os.path.exists(candidate):
return candidate
def _remove_empty_tree(path: str) -> None:
"""Best-effort removal of empty directories under ``path``."""
if not os.path.isdir(path):
return
for root, dirs, _files in os.walk(path, topdown=False):
for dirname in dirs:
candidate = os.path.join(root, dirname)
try:
os.rmdir(candidate)
except OSError:
pass
try:
os.rmdir(path)
except OSError:
pass
def rename_personal_upload_owner(
old_owner: str,
new_owner: str,
*,
personal_docs_manager: Any = None,
rag_manager: Any = None,
) -> Dict[str, Any]:
"""Move direct personal uploads and rewrite RAG owner metadata on user rename."""
old_dir = _personal_upload_dir_for_owner(old_owner, create=False)
new_dir = _personal_upload_dir_for_owner(new_owner, create=False)
path_map: Dict[str, str] = {}
moved_files = 0
if os.path.isdir(old_dir) and old_dir != new_dir:
os.makedirs(new_dir, exist_ok=True)
for root, _dirs, files in os.walk(old_dir):
rel_root = os.path.relpath(root, old_dir)
target_root = new_dir if rel_root == "." else os.path.join(new_dir, rel_root)
os.makedirs(target_root, exist_ok=True)
for filename in files:
source = os.path.abspath(os.path.join(root, filename))
target = _unique_existing_target(os.path.abspath(os.path.join(target_root, filename)))
shutil.move(source, target)
path_map[source] = target
moved_files += 1
_remove_empty_tree(old_dir)
if personal_docs_manager is not None:
rename_directory = getattr(personal_docs_manager, "rename_directory", None)
if callable(rename_directory):
rename_directory(old_dir, new_dir, path_map=path_map)
rag_result = None
if rag_manager is not None:
rename_owner = getattr(rag_manager, "rename_owner", None)
if callable(rename_owner):
rag_result = rename_owner(
old_owner,
new_owner,
path_map=path_map,
path_prefixes=[(old_dir, new_dir)],
)
return {
"old_dir": old_dir,
"new_dir": new_dir,
"moved_files": moved_files,
"path_map": path_map,
"rag_result": rag_result,
}
def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
"""
Setup personal documents related routes.
@@ -275,11 +358,13 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
except Exception as e:
logger.warning(f"RAG removal failed for {filepath}: {e}")
# Delete file from disk if it's in uploads dir
# Delete file from disk if it's in the caller's own uploads dir.
# Scope to the per-owner subdir, not the shared uploads root, so one
# admin can't delete another user's personal files by path.
deleted_from_disk = False
try:
abs_target = os.path.realpath(filepath)
base_abs = os.path.realpath(UPLOADS_DIR)
base_abs = os.path.realpath(_personal_upload_dir_for_owner(owner, create=False))
in_uploads = (
abs_target == base_abs
or os.path.commonpath([abs_target, base_abs]) == base_abs
+4 -2
View File
@@ -12,8 +12,10 @@ from typing import Optional
from fastapi import APIRouter, HTTPException, Query, Request
from fastapi.responses import HTMLResponse, StreamingResponse
from pydantic import BaseModel, Field
from core.middleware import INTERNAL_TOOL_USER
from src.endpoint_resolver import resolve_endpoint
from src.auth_helpers import _auth_disabled, get_current_user
from core.auth import RESERVED_USERNAMES
from src.constants import DEEP_RESEARCH_DIR
_SESSION_ID_RE = re.compile(r"^[a-zA-Z0-9-]{1,128}$")
@@ -385,9 +387,9 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
"""Launch a research job from the dedicated panel."""
from src.auth_helpers import require_privilege
user = require_privilege(request, "can_use_research")
if user == "internal-tool":
if user == INTERNAL_TOOL_USER:
tool_owner = (request.headers.get("X-Odysseus-Owner") or "").strip()
if tool_owner and tool_owner not in {"internal-tool", "api", "demo", "system"}:
if tool_owner and tool_owner not in RESERVED_USERNAMES:
auth_mgr = getattr(request.app.state, "auth_manager", None)
if auth_mgr is not None and getattr(auth_mgr, "is_configured", False):
try:
+16 -5
View File
@@ -11,7 +11,7 @@ from core.session_manager import SessionManager
from core.models import ChatMessage
from src.request_models import SessionResponse
from core.database import Session as DbSession, SessionLocal, Document, GalleryImage, utcnow_naive
from src.auth_helpers import get_current_user, effective_user, _auth_disabled, owner_filter
from src.auth_helpers import effective_user, _auth_disabled, owner_filter
from src.session_actions import is_session_recently_active
@@ -328,7 +328,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
endpoint_id: str = Form(""),
):
skip_val = str(skip_validation).lower() == "true"
user = get_current_user(request)
user = effective_user(request)
endpoint_api_key = ""
endpoint_base_url = ""
_reject_raw_endpoint_url_for_non_admin(request, user, endpoint_id, endpoint_url)
@@ -477,7 +477,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
db.close()
# Switch model/endpoint mid-session
if model is not None and endpoint_url is not None:
user = get_current_user(request)
user = effective_user(request)
_reject_raw_endpoint_url_for_non_admin(request, user, endpoint_id, endpoint_url)
endpoint_api_key = ""
endpoint_base_url = ""
@@ -1004,6 +1004,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
"""
from src.llm_core import llm_call
user = effective_user(request)
single_user_mode = not user and _auth_disabled()
user_sessions = session_manager.get_sessions_for_user(user)
# Delete empty and throwaway sessions before sorting
@@ -1022,7 +1023,12 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
}
_THROWAWAY_MAX_MESSAGES = 4 # only delete if <= this many messages
try:
rows = db.query(DbSession).filter(DbSession.archived == False, DbSession.owner == user).limit(2000).all()
rows_q = db.query(DbSession).filter(DbSession.archived == False)
if user:
rows_q = rows_q.filter(DbSession.owner == user)
elif not single_user_mode:
rows_q = rows_q.filter(DbSession.owner == user)
rows = rows_q.limit(2000).all()
folder_map = {r.id: r.folder for r in rows}
# Precompute per-session message counts in TWO aggregate queries
# instead of 13 queries PER session — with many chats the per-row
@@ -1242,7 +1248,12 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
db = SessionLocal()
try:
for sid, folder_name in assignments.items():
db_session = db.query(DbSession).filter(DbSession.id == sid, DbSession.owner == user).first()
db_session_q = db.query(DbSession).filter(DbSession.id == sid)
if user:
db_session_q = db_session_q.filter(DbSession.owner == user)
elif not single_user_mode:
db_session_q = db_session_q.filter(DbSession.owner == user)
db_session = db_session_q.first()
if db_session:
db_session.folder = folder_name
db_session.updated_at = datetime.utcnow()
+368 -13
View File
@@ -15,6 +15,7 @@ from collections import namedtuple
from pathlib import Path
from typing import Dict, Any
from core.platform_compat import IS_APPLE_SILICON, which_tool
from core.middleware import INTERNAL_TOOL_USER
from src.optional_deps import prepare_optional_dependency_import
# POSIX-only: `pty`/`fcntl` transitively import `termios`, which does NOT exist
@@ -55,7 +56,7 @@ def _require_admin(request: Request):
# In-process tool loopback. The AuthMiddleware already validated the
# internal token + loopback client before setting this marker, so
# honour it here as admin-equivalent.
if user == "internal-tool":
if user == INTERNAL_TOOL_USER:
return
if not user or user == "api":
raise HTTPException(403, "Admin only")
@@ -330,6 +331,9 @@ def add_user_install_bins_to_path():
candidates.append(os.path.join(site.USER_BASE, 'bin'))
except Exception:
pass
candidates.append(os.path.expanduser('~/bin'))
candidates.append(os.path.expanduser('~/llama.cpp/build/bin'))
candidates.append(os.path.expanduser('~/llama.cpp/build-vulkan/bin'))
candidates.append(os.path.expanduser('~/.local/bin'))
parts = os.environ.get('PATH', '').split(os.pathsep) if os.environ.get('PATH') else []
changed = False
@@ -961,12 +965,84 @@ def setup_shell_routes() -> APIRouter:
return StreamingResponse(generate(), media_type="text/event-stream")
def _os_id_from_release(text: str) -> str:
"""Map /etc/os-release contents to a canonical family for our matrix."""
if not text:
return ""
ids = []
for line in text.splitlines():
line = line.strip()
if line.startswith("ID=") or line.startswith("ID_LIKE="):
ids += line.split("=", 1)[1].strip().strip('"').split()
ids = [i.lower() for i in ids]
if any(x in ids for x in ("debian", "ubuntu", "linuxmint", "pop", "elementary")):
return "debian"
if any(x in ids for x in ("arch", "manjaro", "endeavouros", "cachyos", "garuda")):
return "arch"
if any(x in ids for x in ("fedora", "rhel", "centos", "rocky", "almalinux", "ol")):
return "fedora"
if "alpine" in ids:
return "alpine"
if any(x in ids for x in ("suse", "opensuse", "opensuse-leap", "opensuse-tumbleweed", "sles")):
return "suse"
return ""
# Matrix lookup keyed on (os_family, backend) → (pkg_mgr_cmd_template, pkg_list_per_dep).
# Each `system_prereqs` name resolves to a list of OS-specific package
# names that get joined into the final `sudo apt install -y …` etc.
# command. Backend-specific extras (CUDA toolkit, ROCm, Vulkan headers)
# are added only when the detected backend needs them.
_PKG_NAMES = {
# canonical-name → {os_id: [actual_pkg_names_on_this_os]}
"cmake": {"debian": ["cmake"], "arch": ["cmake"], "fedora": ["cmake"], "alpine": ["cmake"], "suse": ["cmake"], "macos": ["cmake"]},
"build-essential": {"debian": ["build-essential"], "arch": ["base-devel"], "fedora": ["gcc", "gcc-c++", "make"], "alpine": ["build-base"], "suse": ["gcc-c++", "make"], "macos": []},
"g++": {"debian": ["g++"], "arch": ["gcc"], "fedora": ["gcc-c++"], "alpine": ["g++"], "suse": ["gcc-c++"], "macos": []},
"gcc": {"debian": ["gcc"], "arch": ["gcc"], "fedora": ["gcc"], "alpine": ["gcc"], "suse": ["gcc"], "macos": []},
"make": {"debian": ["make"], "arch": ["make"], "fedora": ["make"], "alpine": ["make"], "suse": ["make"], "macos": []},
"git": {"debian": ["git"], "arch": ["git"], "fedora": ["git"], "alpine": ["git"], "suse": ["git"], "macos": ["git"]},
"tmux": {"debian": ["tmux"], "arch": ["tmux"], "fedora": ["tmux"], "alpine": ["tmux"], "suse": ["tmux"], "macos": ["tmux"]},
}
_BACKEND_EXTRAS = {
"cuda": {"debian": ["nvidia-cuda-toolkit"], "arch": ["cuda"], "fedora": ["cuda-toolkit"], "alpine": [], "suse": ["cuda"], "macos": []},
"rocm": {"debian": ["rocm-dev"], "arch": ["rocm-hip-sdk"], "fedora": ["rocm-devel"], "alpine": [], "suse": ["rocm-dev"], "macos": []},
"vulkan": {"debian": ["libvulkan-dev", "vulkan-tools"], "arch": ["vulkan-headers", "vulkan-tools"], "fedora": ["vulkan-headers", "vulkan-tools"], "alpine": ["vulkan-loader-dev", "vulkan-tools"], "suse": ["vulkan-devel", "vulkan-tools"], "macos": []},
}
_PKG_MGR = {
"debian": "sudo apt install -y {pkgs}",
"arch": "sudo pacman -S --needed {pkgs}",
"fedora": "sudo dnf install -y {pkgs}",
"alpine": "sudo apk add {pkgs}",
"suse": "sudo zypper install -n {pkgs}",
"macos": "brew install {pkgs}",
}
def _install_cmd_for_target(os_id: str, backend: str, missing: list[str]) -> str:
"""Build a single OS+backend-aware install command for the missing prereqs."""
if not os_id or os_id not in _PKG_MGR:
return ""
pkgs: list[str] = []
seen: set[str] = set()
for m in missing:
for p in _PKG_NAMES.get(m, {}).get(os_id, []):
if p not in seen:
pkgs.append(p); seen.add(p)
# Add backend-specific extras only when the build would actually
# consume them (a CUDA toolkit isn't useful on a Vulkan box).
backend = (backend or "").lower()
for p in _BACKEND_EXTRAS.get(backend, {}).get(os_id, []):
if p not in seen:
pkgs.append(p); seen.add(p)
if not pkgs:
return ""
return _PKG_MGR[os_id].format(pkgs=" ".join(pkgs))
@router.get("/api/cookbook/packages")
async def list_packages(
request: Request,
host: str | None = None,
ssh_port: str | None = None,
venv: str | None = None,
backend: str | None = None,
):
"""Check which optional packages are installed.
@@ -1015,6 +1091,12 @@ def setup_shell_routes() -> APIRouter:
"kind": "system",
"install_hint": "Install Docker on the selected server and allow this user to run docker.",
},
# Note: cmake / gcc / git are not separate dependency rows —
# they're declared as `system_prereqs` on llama_cpp (and any
# other engine that compiles from source) so they appear as
# an inline status note on that engine's row instead of
# cluttering the panel with raw OS package names that aren't
# meaningful product-level dependencies on their own.
# ── LLM ── installs on GPU servers for model serving/downloading
{
"name": "hf_transfer",
@@ -1026,9 +1108,16 @@ def setup_shell_routes() -> APIRouter:
{
"name": "llama_cpp",
"pip": "llama-cpp-python[server]",
"desc": "Serve GGUF models via llama.cpp",
"desc": "Great for single-GPU or CPU inference with GGUF models",
"category": "LLM",
"target": "remote",
# Build-toolchain prereqs. Cookbook's launch bootstrap
# compiles llama-server from source when no prebuilt
# binary is present; without these the build aborts
# with `cmake: command not found`. Surfaced inline on
# this row so the user doesn't have to chase three
# separate OS-package rows.
"system_prereqs": ["cmake", "g++", "git"],
},
{
"name": "sglang",
@@ -1040,7 +1129,7 @@ def setup_shell_routes() -> APIRouter:
{
"name": "vllm",
"pip": "vllm",
"desc": "High-throughput LLM serving engine",
"desc": "Great for high-throughput multi-GPU inference",
"category": "LLM",
"target": "remote",
},
@@ -1103,6 +1192,7 @@ def setup_shell_routes() -> APIRouter:
# venv over SSH so a remote `pip install` actually reflects here.
remote_status: dict = {}
remote_details: dict = {}
remote_probe_error = ""
remote_names = [
p["name"]
for p in packages
@@ -1141,16 +1231,56 @@ def setup_shell_routes() -> APIRouter:
break
except ValueError as e:
raise HTTPException(400, str(e))
except Exception:
except Exception as e:
remote_status = {}
if host and remote_system_names:
remote_probe_error = f"SSH package probe failed: {str(e)[:160]}"
if "llama_cpp" in remote_names:
try:
inner = (
'export PATH="$HOME/.local/bin:$HOME/bin:'
'$HOME/llama.cpp/build/bin:$HOME/llama.cpp/build-vulkan/bin:$PATH"; '
"command -v llama-server 2>/dev/null || true"
)
argv = _ssh_base_argv(host, ssh_port) + [inner]
proc = await asyncio.create_subprocess_exec(
*argv,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
out, _err = await asyncio.wait_for(proc.communicate(), timeout=8)
llama_server_path = out.decode("utf-8", errors="replace").strip().splitlines()
llama_server_path = llama_server_path[-1].strip() if llama_server_path else ""
if llama_server_path:
remote_status["llama_cpp"] = True
probe = remote_details.setdefault("llama_cpp", {})
if isinstance(probe, dict):
probe.setdefault("binaries", {})["llama-server"] = llama_server_path
except Exception as e:
if not remote_probe_error:
remote_probe_error = f"SSH llama-server probe failed: {str(e)[:160]}"
pass
# Union of system_names + every package's system_prereqs. Probing
# the prereqs alongside the main system deps in a single SSH call
# avoids a second round-trip per Cookbook → Dependencies refresh.
prereq_names: set[str] = set()
for p in packages:
for pr in p.get("system_prereqs") or []:
prereq_names.add(str(pr))
all_system_names = list(set(remote_system_names) | prereq_names)
# Detect the target's OS family + read /etc/os-release in the same
# SSH round-trip as the prereq probe — used downstream to render a
# single OS-specific install command per row instead of dumping
# every distro's syntax onto the user.
target_os_id: str = ""
if host and all_system_names:
try:
checks = []
for name in remote_system_names:
for name in all_system_names:
qn = shlex.quote(name)
checks.append(
f"if command -v {qn} >/dev/null 2>&1; then echo {qn}=1; else echo {qn}=0; fi"
)
checks.append("echo '---OSREL---'; cat /etc/os-release 2>/dev/null || true")
inner = " ; ".join(checks)
argv = _ssh_base_argv(host, ssh_port) + [inner]
proc = await asyncio.create_subprocess_exec(
@@ -1160,20 +1290,45 @@ def setup_shell_routes() -> APIRouter:
)
out, _err = await asyncio.wait_for(proc.communicate(), timeout=12)
txt = out.decode("utf-8", errors="replace").strip()
_section, _osrel_lines = "probe", []
for line in txt.splitlines():
if line.strip() == "---OSREL---":
_section = "osrel"; continue
if _section == "osrel":
_osrel_lines.append(line)
continue
name, sep, value = line.strip().partition("=")
if sep and name in remote_system_names:
if sep and name in all_system_names:
remote_status[name] = value == "1"
target_os_id = _os_id_from_release("\n".join(_osrel_lines))
except ValueError as e:
raise HTTPException(400, str(e))
except Exception:
except Exception as e:
if not remote_probe_error:
remote_probe_error = f"SSH system probe failed: {str(e)[:160]}"
pass
elif not host:
# Local target — probe in-process so the inline install command
# still appears in the dep panel when the cookbook container
# itself is the selected server.
try:
with open("/etc/os-release", encoding="utf-8") as f:
target_os_id = _os_id_from_release(f.read())
except Exception:
target_os_id = ""
if sys.platform == "darwin":
target_os_id = "macos"
for pkg in packages:
on_remote = bool(host and pkg.get("target") == "remote")
probe = None
if on_remote:
pkg["installed"] = bool(remote_status.get(pkg["name"], False))
if remote_probe_error and pkg["name"] not in remote_status:
pkg["installed"] = None
pkg["probe_error"] = remote_probe_error
pkg["status_note"] = remote_probe_error
else:
pkg["installed"] = bool(remote_status.get(pkg["name"], False))
probe = remote_details.get(pkg["name"])
if isinstance(probe, dict):
pkg["details"] = probe
@@ -1222,13 +1377,116 @@ def setup_shell_routes() -> APIRouter:
pkg["installed"] = False
except importlib_metadata.PackageNotFoundError:
pkg["installed"] = False
except Exception:
except (Exception, SystemExit):
# Installed but crashes on import — e.g. a CUDA build of
# llama-cpp-python raising FileNotFoundError when the CUDA
# toolkit dir is absent. One broken optional package must not
# 500 the entire packages panel; report it as not usable.
# toolkit dir is absent, or rembg calling sys.exit(1) when no
# onnxruntime backend can be loaded. SystemExit is a
# BaseException, not Exception, so without catching it here a
# single sys.exit-on-import package escapes and takes down the
# whole packages panel / worker (the panel hangs forever). One
# broken optional package must not 500 — or hang — the entire
# panel; report it as not usable.
pkg["installed"] = False
# llama_cpp partial-state probe: when the package is installed
# but the wheel was built CPU-only AND the target has NVIDIA
# hardware, mark the row as partial (yellow/orange) with a
# one-click upgrade to the CUDA wheel. Without this the row
# reads "ready" green while inference runs at 3 tok/s on GPU
# silicon — actively misleading.
if pkg["name"] == "llama_cpp" and pkg.get("installed"):
_native_llama_server = bool(
isinstance(probe, dict)
and isinstance(probe.get("binaries"), dict)
and probe["binaries"].get("llama-server")
)
_gpu_capable = False
_has_nvidia_target = False
if _native_llama_server:
# Native llama-server is the launcher path Cookbook now
# prefers. Do not mark this as a CPU-only Python wheel just
# because llama-cpp-python is absent from the selected venv.
_gpu_capable = True
elif on_remote and host:
try:
# Activate the configured venv FIRST so the probe
# runs against the same python the launch script
# would activate. Without this prefix, bare
# `python3` was checked — which can disagree with
# the venv's wheel (e.g. user-site has CUDA wheel
# but venv has CPU-only), and the dep panel then
# showed "ready" green while every launch fell to
# CPU.
_vp = _venv_activate_prefix(venv)
probe = (
f'{_vp}python3 -c "import llama_cpp; import sys; '
'sys.exit(0 if llama_cpp.llama_supports_gpu_offload() else 1)" '
'&& echo llama_cpp_gpu=1 || echo llama_cpp_gpu=0; '
'command -v nvidia-smi >/dev/null 2>&1 '
'&& nvidia-smi -L 2>/dev/null | grep -q "GPU " '
'&& echo nvidia=1 || echo nvidia=0'
)
argv = _ssh_base_argv(host, ssh_port) + [probe]
proc = await asyncio.create_subprocess_exec(
*argv, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
)
out, _ = await asyncio.wait_for(proc.communicate(), timeout=8)
txt = out.decode("utf-8", errors="replace")
if "llama_cpp_gpu=1" in txt:
_gpu_capable = True
if "nvidia=1" in txt:
_has_nvidia_target = True
except Exception:
pass
else:
try:
import llama_cpp as _lcp # type: ignore
_gpu_capable = bool(_lcp.llama_supports_gpu_offload())
except Exception:
_gpu_capable = False
_has_nvidia_target = shutil.which("nvidia-smi") is not None
if (not _gpu_capable) and _has_nvidia_target:
pkg["partial"] = True
pkg["partial_reason"] = "Installed but CPU-only wheel — GPU detected on this target. Upgrade to a CUDA wheel for ~10× faster inference."
pkg["partial_action"] = "reinstall_llama_cpp_cuda"
# Attach per-package system_prereqs status. We probed each
# prereq name above; surface "Missing build deps: …" ONLY
# when the package itself is not installed — if the package
# works (e.g. llama-cpp-python already imports cleanly), the
# build toolchain is irrelevant and surfacing it as a red
# flag confuses users ("ready" + "missing" on the same row).
_prereqs = list(pkg.get("system_prereqs") or [])
if _prereqs:
if on_remote:
_pr_present = {n: bool(remote_status.get(n)) for n in _prereqs}
else:
_pr_present = {n: shutil.which(n) is not None for n in _prereqs}
pkg["system_prereqs_status"] = _pr_present
_missing = [n for n, ok in _pr_present.items() if not ok]
# Suppress the "missing build deps" hint when the package
# itself is installed — build deps are only relevant if
# the user would need to recompile from source.
if pkg.get("installed"):
_missing = []
if _missing:
# Build a target-specific install command from the
# (os_family, backend) matrix when we know both. Fall
# back to the multi-distro hint only when the target's
# OS can't be classified (e.g. ssh probe failed).
_resolved_os = target_os_id or "debian" # safest default
_cmd = _install_cmd_for_target(_resolved_os, backend or "", _missing)
if _cmd and target_os_id:
_hint = "Missing build deps for this target: " + ", ".join(_missing)
pkg["install_cmd_for_target"] = _cmd
pkg["install_cmd_os"] = target_os_id
pkg["install_cmd_backend"] = (backend or "").lower()
else:
_hint = "Missing build deps: " + ", ".join(_missing) + ". Install via apt: cmake build-essential git / pacman: cmake base-devel git / dnf: cmake gcc-c++ make git / brew: cmake git."
_existing_note = pkg.get("status_note") or ""
pkg["status_note"] = (_existing_note + "" + _hint) if _existing_note else _hint
pkg["build_deps_missing"] = _missing
if pkg.get("installed"):
update_status = _package_pip_update_status(pkg, probe)
pkg["pip_update_available"] = update_status.available
@@ -1288,6 +1546,102 @@ def setup_shell_routes() -> APIRouter:
return {"ok": True, "output": stdout.decode()[-200:]}
return {"ok": False, "error": stderr.decode()[-300:]}
@router.post("/api/cookbook/install-system-deps")
async def install_system_deps(request: Request):
"""Install OS-level system packages (cmake/build-essential/git/tmux)
on a remote target or in the local container. Admin only.
Bounded by a per-package allowlist anything outside the catalog
is rejected so the route can't be coerced into installing arbitrary
OS packages. Uses `sudo -n` (passwordless) so the call returns a
clear "needs sudo password" error instead of hanging when interactive
sudo is required.
"""
_require_admin(request)
body = await request.json()
raw = body.get("packages") or []
host = (body.get("remote_host") or "").strip()
ssh_port = body.get("ssh_port")
# Names users can request — must match canonical names used in the
# deps catalog's `system_prereqs` field and on the System rows.
ALLOWED = {"cmake", "build-essential", "g++", "gcc", "git", "tmux", "make"}
pkgs = [str(p).strip() for p in raw if str(p).strip() in ALLOWED]
if not pkgs:
return {"ok": False, "error": "no installable packages requested (allowlist: " + ", ".join(sorted(ALLOWED)) + ")"}
# Re-map to the right package name per OS. apt/dpkg use the names
# as-is; pacman has base-devel for build-essential, etc.
def _apt(names): return list(names)
def _pacman(names):
return ["base-devel" if n == "build-essential" else n for n in names]
def _dnf(names):
out = []
for n in names:
if n == "build-essential": out += ["gcc", "gcc-c++", "make"]
elif n == "g++": out += ["gcc-c++"]
else: out.append(n)
return out
def _brew(names):
return [n for n in names if n not in ("build-essential", "g++", "gcc", "make")]
# Build a single shell snippet that detects the package manager and
# runs the right install. Non-interactive sudo (-n) only — if sudo
# asks for a password the script reports it instead of hanging.
apt_pkgs = " ".join(shlex.quote(p) for p in _apt(pkgs))
pac_pkgs = " ".join(shlex.quote(p) for p in _pacman(pkgs))
dnf_pkgs = " ".join(shlex.quote(p) for p in _dnf(pkgs))
brew_pkgs = " ".join(shlex.quote(p) for p in _brew(pkgs))
# Error messages go to stderr (>&2) so the route's error field
# gets populated. Without the redirect, `echo "ERROR…"` on stdout
# left stderr empty and the frontend toast fell through to a
# bare "HTTP 200" instead of surfacing the real reason.
script = (
'set -e; '
'if ! sudo -n true 2>/dev/null; then '
' echo "ERROR: passwordless sudo unavailable on this target. Run once: sudo apt install -y ' + " ".join(pkgs) + ' (or your distro equivalent: pacman -S, dnf install, brew install). After that, Cookbook can install the rest." >&2; exit 2; fi; '
'if command -v apt-get >/dev/null 2>&1; then '
f' sudo -n env DEBIAN_FRONTEND=noninteractive apt-get update -qq && sudo -n env DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends {apt_pkgs}; '
'elif command -v pacman >/dev/null 2>&1; then '
f' sudo -n pacman -Sy --needed --noconfirm {pac_pkgs}; '
'elif command -v dnf >/dev/null 2>&1; then '
f' sudo -n dnf install -y {dnf_pkgs}; '
'elif command -v brew >/dev/null 2>&1; then '
f' brew install {brew_pkgs}; '
'else '
' echo "ERROR: no supported package manager (apt/pacman/dnf/brew) on this target." >&2; exit 3; fi'
)
try:
if host:
argv = _ssh_base_argv(host, ssh_port) + [script]
else:
argv = ["bash", "-lc", script]
except ValueError as e:
raise HTTPException(400, str(e))
try:
proc = await asyncio.create_subprocess_exec(
*argv, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
out, err = await asyncio.wait_for(proc.communicate(), timeout=180)
except asyncio.TimeoutError:
return {"ok": False, "error": "Install timed out after 180s"}
ok = (proc.returncode == 0)
# Combine stderr + (last lines of stdout) into a single error
# blob when ok=False — some package managers print useful failure
# context to stdout, and a script that exits via `echo ...; exit N`
# without `>&2` would otherwise hand back an empty error string
# and force the frontend to show a bare "HTTP 200".
err_txt = err.decode("utf-8", errors="replace").strip()
out_txt = out.decode("utf-8", errors="replace").strip()
if not ok:
tail_out = out_txt[-500:] if out_txt else ""
combined = err_txt or tail_out or f"exit code {proc.returncode}"
else:
combined = None
return {
"ok": ok,
"exit_code": proc.returncode,
"output": out_txt[-1000:],
"error": combined,
}
@router.post("/api/cookbook/rebuild-engine")
async def rebuild_engine(request: Request):
"""Clear the cached llama.cpp build so the next serve recompiles.
@@ -1308,7 +1662,8 @@ def setup_shell_routes() -> APIRouter:
return {"ok": False, "error": f"Unsupported engine: {engine}"}
host = str(body.get("remote_host") or "").strip()
ssh_port = body.get("ssh_port")
cmd = _llama_cpp_rebuild_cmd()
update_source = bool(body.get("update_source"))
cmd = _llama_cpp_rebuild_cmd(update_source=update_source)
try:
argv = (
(_ssh_base_argv(host, ssh_port) + [cmd])
+2 -1
View File
@@ -11,6 +11,7 @@ from fastapi import APIRouter, HTTPException, Request
from pydantic import BaseModel
from core.database import SessionLocal, ScheduledTask, TaskRun
from core.middleware import INTERNAL_TOOL_USER
from core.constants import internal_api_base
from src.auth_helpers import get_current_user
from src.constants import DATA_DIR, EMAIL_URGENCY_CACHE_DIR
@@ -427,7 +428,7 @@ def setup_task_routes(task_scheduler) -> APIRouter:
# In-process tool-loopback marker — AuthMiddleware validated
# the internal token + loopback client before stamping this,
# so treat as admin-equivalent.
if user == "internal-tool":
if user == INTERNAL_TOOL_USER:
return True
try:
from core.auth import AuthManager
+80 -7
View File
@@ -3,11 +3,16 @@ import os
import time
import json
import asyncio
import shutil
import uuid
from pathlib import Path
from fastapi import APIRouter, Request, File, UploadFile, HTTPException
from typing import List
import logging
from core.middleware import require_admin
from src.auth_helpers import get_current_user
from core.database import SessionLocal, GalleryImage
from src.auth_helpers import effective_user
from src.constants import GENERATED_IMAGES_DIR
from src.upload_handler import count_recent_uploads
logger = logging.getLogger(__name__)
@@ -50,6 +55,69 @@ def setup_upload_routes(upload_handler):
raise HTTPException(404, "File not found")
raise HTTPException(404, "File not found")
def _promote_chat_image_to_gallery(meta: dict, owner: str | None) -> str | None:
"""Make chat-uploaded images visible in Gallery without changing chat storage."""
is_image_file = getattr(upload_handler, "is_image_file", None)
if not callable(is_image_file):
return None
if not is_image_file(meta.get("name", ""), meta.get("mime", "")):
return None
source_path = meta.get("path")
if not source_path or not os.path.isfile(source_path):
return None
db = SessionLocal()
try:
file_hash = meta.get("hash")
if file_hash:
q = db.query(GalleryImage).filter(
GalleryImage.file_hash == file_hash,
GalleryImage.is_active == True, # noqa: E712
)
if owner:
q = q.filter(GalleryImage.owner == owner)
existing = q.first()
if existing:
return existing.id
image_dir = Path(GENERATED_IMAGES_DIR)
image_dir.mkdir(parents=True, exist_ok=True)
ext = Path(meta.get("name") or source_path).suffix.lower()
if ext not in {".png", ".jpg", ".jpeg", ".webp", ".gif"}:
mime_ext = {
"image/png": ".png",
"image/jpeg": ".jpg",
"image/jpg": ".jpg",
"image/webp": ".webp",
"image/gif": ".gif",
}.get(meta.get("mime", ""))
ext = mime_ext or ".png"
filename = f"{uuid.uuid4().hex[:12]}{ext}"
dest_path = image_dir / filename
shutil.copy2(source_path, dest_path)
image_id = str(uuid.uuid4())
db.add(GalleryImage(
id=image_id,
filename=filename,
prompt=meta.get("name") or "Chat upload",
model="chat-upload",
owner=owner,
file_hash=file_hash,
width=meta.get("width"),
height=meta.get("height"),
file_size=meta.get("size"),
))
db.commit()
return image_id
except Exception as e:
db.rollback()
logger.warning("Failed to add chat image upload to gallery: %s", e)
return None
finally:
db.close()
@router.post("")
async def api_upload(request: Request, files: List[UploadFile] = File(...)):
@@ -78,8 +146,10 @@ def setup_upload_routes(upload_handler):
for u in files:
try:
meta = upload_handler.save_upload(u, client_ip, owner=get_current_user(request))
out.append({
owner = effective_user(request)
meta = upload_handler.save_upload(u, client_ip, owner=owner)
gallery_id = _promote_chat_image_to_gallery(meta, owner)
item = {
"id": meta["id"],
"name": meta["name"],
"mime": meta["mime"],
@@ -89,7 +159,10 @@ def setup_upload_routes(upload_handler):
"width": meta.get("width"),
"height": meta.get("height"),
"is_duplicate": meta.get("is_duplicate", False)
})
}
if gallery_id:
item["gallery_id"] = gallery_id
out.append(item)
except HTTPException:
raise
except Exception as e:
@@ -138,7 +211,7 @@ def setup_upload_routes(upload_handler):
original_name = info.get("name", file_id)
auth_mgr = getattr(request.app.state, "auth_manager", None)
auth_configured = bool(auth_mgr and auth_mgr.is_configured)
current_user = get_current_user(request)
current_user = effective_user(request)
file_owner = info.get("owner") if info else None
if auth_configured:
if not current_user:
@@ -204,7 +277,7 @@ def setup_upload_routes(upload_handler):
info = _load_upload_info(file_id)
auth_mgr = getattr(request.app.state, "auth_manager", None)
auth_configured = bool(auth_mgr and auth_mgr.is_configured)
current_user = get_current_user(request)
current_user = effective_user(request)
file_owner = info.get("owner") if info else None
if auth_configured:
if not current_user:
@@ -247,7 +320,7 @@ def setup_upload_routes(upload_handler):
raise HTTPException(404, "File not found")
auth_mgr = getattr(request.app.state, "auth_manager", None)
auth_configured = bool(auth_mgr and auth_mgr.is_configured)
current_user = get_current_user(request)
current_user = effective_user(request)
file_owner = info.get("owner")
if auth_configured:
if not current_user:
+2 -3
View File
@@ -1,6 +1,5 @@
"""Webhook, API Token, and sync chat routes."""
import asyncio
import uuid
import logging
from typing import Optional
@@ -385,10 +384,10 @@ def setup_webhook_routes(
sess.add_message(ChatMessage("assistant", reply))
session_manager.save_sessions()
asyncio.create_task(webhook_manager.fire("chat.completed", {
webhook_manager.fire_and_forget("chat.completed", {
"session_id": session_id, "model": sess.model,
"user_message": message[:2000], "response": reply[:2000],
}))
})
return {"response": reply, "session_id": session_id, "model": sess.model}