Cookbook model workflow fixes

This commit is contained in:
pewdiepie-archdaemon
2026-06-21 11:02:35 +00:00
parent 8c46172e87
commit c504214925
38 changed files with 3042 additions and 459 deletions
+33 -4
View File
@@ -22,6 +22,31 @@ from fastapi import HTTPException
logger = logging.getLogger(__name__)
_CASUAL_OPENING_RE = re.compile(
r"^\s*(?:h+i+|hey+|hello+|yo+|sup+|what'?s up|wass?up|hiya|howdy|"
r"lol|lmao|haha+|hehe+|thanks?|thank you|ty|idk|dunno|meh|bruh|bro)\b(?P<tail>.*)$",
re.IGNORECASE,
)
_CASUAL_BLOCKLIST_RE = re.compile(
r"\b(?:cookbook|serve|serving|launch|start|vllm|sglang|llama\.?cpp|ollama|"
r"download|model|email|document|doc|note|calendar|task|search|web|research|"
r"file|folder|repo|git|settings?|endpoint|api|token|mcp)\b",
re.IGNORECASE,
)
def _is_casual_low_signal(text: str) -> bool:
"""Short greetings/slang should not pull memory, skills, RAG, or docs."""
s = str(text or "").strip()
m = _CASUAL_OPENING_RE.match(s)
if not m:
return False
tail = m.group("tail") or ""
if _CASUAL_BLOCKLIST_RE.search(tail):
return False
tail_words = re.findall(r"[A-Za-z0-9_'-]+", tail)
return len(tail_words) <= 2
# ── Data containers ────────────────────────────────────────────────────── #
@@ -579,6 +604,7 @@ async def build_chat_context(
# Resolve user prefs
user = get_current_user(request)
uprefs = load_prefs_for_user(user)
casual_low_signal = _is_casual_low_signal(message)
# Memory enabled?
mem_enabled = not incognito and not no_memory and uprefs.get("memory_enabled", True)
@@ -588,6 +614,9 @@ async def build_chat_context(
if not allow_tool_preprocessing:
mem_enabled = False
skills_enabled = False
if casual_low_signal:
mem_enabled = False
skills_enabled = False
logger.debug(
"Memory enabled=%s for user=%s (incognito=%s, no_memory=%s, pref=%s)",
mem_enabled, user, incognito, no_memory, uprefs.get("memory_enabled", "NOT_SET"),
@@ -603,11 +632,11 @@ async def build_chat_context(
# Use RAG?
use_rag_val = (str(use_rag).lower() != "false") if use_rag is not None else True
if incognito or not allow_tool_preprocessing or is_research_spinoff:
if incognito or not allow_tool_preprocessing or is_research_spinoff or casual_low_signal:
use_rag_val = False
# If pre-fetched search context was provided (compare mode), skip live web search
skip_web = bool(search_context) or not allow_tool_preprocessing
skip_web = bool(search_context) or not allow_tool_preprocessing or casual_low_signal
# Build context preface
# The stream path uses enhanced_message (with CoT/preprocessing applied),
@@ -626,7 +655,7 @@ async def build_chat_context(
incognito=incognito,
use_skills=skills_enabled,
)
if use_rag is not None or is_research_spinoff:
if use_rag is not None or is_research_spinoff or casual_low_signal:
_preface_kwargs["use_rag"] = use_rag_val
preface, rag_sources, web_sources = chat_processor.build_context_preface(**_preface_kwargs)
@@ -634,7 +663,7 @@ async def build_chat_context(
used_memories = getattr(chat_processor, '_last_used_memories', [])
# Inject pre-fetched search context (compare mode)
if search_context and allow_tool_preprocessing:
if search_context and allow_tool_preprocessing and not casual_low_signal:
preface.append(untrusted_context_message("prefetched search context", search_context))
# YouTube transcripts
+10 -1
View File
@@ -826,7 +826,11 @@ def setup_chat_routes(
from src.settings import get_setting
_global_disabled = get_setting("disabled_tools", [])
if _global_disabled and isinstance(_global_disabled, list):
disabled_tools.update(_global_disabled)
explicit_web_allowed = allow_web_search is not None and str(allow_web_search).lower() == "true"
if explicit_web_allowed:
disabled_tools.update(t for t in _global_disabled if t not in {"web_search", "web_fetch"})
else:
disabled_tools.update(_global_disabled)
# Light auto-escalation: the user is in chat mode and just expressed a
# notes/calendar/email intent. Grant the relevant managers but withhold
@@ -1256,6 +1260,10 @@ def setup_chat_routes(
_max_rounds = _DEFAULT_ROUNDS
_max_rounds = max(1, min(_max_rounds, 200))
_forced_tools = None
if allow_web_search is not None and str(allow_web_search).lower() == "true":
_forced_tools = {"web_search", "web_fetch"}
async for chunk in stream_agent_loop(
sess.endpoint_url,
sess.model,
@@ -1277,6 +1285,7 @@ def setup_chat_routes(
plan_mode=plan_mode,
approved_plan=approved_plan or None,
workspace=workspace or None,
forced_tools=_forced_tools,
):
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
try:
+16 -3
View File
@@ -964,18 +964,31 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
runner_lines.append(' fi # end _odysseus_have_prebuilt guard')
def _llama_cpp_rebuild_cmd() -> str:
def _llama_cpp_rebuild_cmd(update_source: bool = False) -> str:
"""Shell command that clears the Cookbook-managed llama.cpp build.
Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build*``
directory so the next llama.cpp serve recompiles from source, picking up a
CUDA or HIP toolchain if one is now available. The serve bootstrap only
builds when ``llama-server`` is missing from PATH, so without this an
existing CPU-only build is reused forever. It deliberately installs and
downloads nothing; the rebuild itself happens on the next serve.
existing CPU-only build is reused forever. When ``update_source`` is true,
the command also fast-forwards the Cookbook-managed ``~/llama.cpp`` checkout
if it exists. The rebuild itself happens on the next serve.
"""
update_cmd = ''
if update_source:
update_cmd = (
'if [ -d "$HOME/llama.cpp/.git" ]; then '
'git -C "$HOME/llama.cpp" pull --ff-only --depth 1 || '
'echo "[odysseus] WARNING: llama.cpp source update failed; clearing cached build anyway."; '
'elif command -v git >/dev/null 2>&1; then '
'git clone --depth 1 https://github.com/ggml-org/llama.cpp "$HOME/llama.cpp" || '
'echo "[odysseus] WARNING: llama.cpp clone failed; clearing cached build anyway."; '
'fi && '
)
return (
'mkdir -p "$HOME/bin" && '
f'{update_cmd}'
'rm -f "$HOME/bin/llama-server" && '
'rm -rf "$HOME/llama.cpp/build" "$HOME/llama.cpp/build-vulkan" && '
'echo "[odysseus] Cleared the cached llama.cpp build. '
+201 -11
View File
@@ -273,6 +273,78 @@ def setup_cookbook_routes() -> APIRouter:
def _load_stored_hf_token() -> str:
return load_stored_hf_token(state_path=_cookbook_state_path)
def _normalize_minimax_m3_vllm_cmd(cmd: str) -> str:
"""Patch MiniMax M3 vLLM launches into the known-good local form.
The browser form can be stale or omit advanced-only fields. MiniMax M3
is sensitive to several flags: using the HF repo id with block-size 128
fails KV-cache setup, and FlashInfer sampler JIT fails on this host's
system nvcc. Normalize server-side before writing the tmux runner.
"""
if not cmd or "vllm serve" not in cmd or not re.search(r"minimax.*m3", cmd, re.I):
return cmd
try:
parts = shlex.split(cmd)
except ValueError:
return cmd
if "serve" not in parts:
return cmd
env_re = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=")
env_parts = [p for p in parts if env_re.match(p)]
body = [p for p in parts if not env_re.match(p)]
try:
serve_i = body.index("serve")
except ValueError:
return cmd
if serve_i + 1 >= len(body):
return cmd
repo_id = "cyankiwi/MiniMax-M3-AWQ-INT4"
snapshot = (
"/home/pewds/.cache/huggingface/hub/"
"models--cyankiwi--MiniMax-M3-AWQ-INT4/"
"snapshots/4082acbbec1236d21828d55b6bb0fe02ade4ab5b"
)
if body[serve_i + 1] == repo_id:
body[serve_i + 1] = snapshot
def add_env(key: str, value: str) -> None:
if not any(p.startswith(f"{key}=") for p in env_parts):
env_parts.append(f"{key}={value}")
def has_flag(flag: str) -> bool:
return any(p == flag or p.startswith(flag + "=") for p in body)
def set_flag(flag: str, value: str) -> None:
for i, part in enumerate(body):
if part == flag:
if i + 1 < len(body):
body[i + 1] = value
else:
body.append(value)
return
if part.startswith(flag + "="):
body[i] = f"{flag}={value}"
return
body.extend([flag, value])
def add_bool(flag: str) -> None:
if not has_flag(flag):
body.append(flag)
add_env("VLLM_TARGET_DEVICE", "cuda")
add_env("VLLM_USE_FLASHINFER_SAMPLER", "0")
set_flag("--served-model-name", repo_id)
set_flag("--tool-call-parser", "minimax_m3")
set_flag("--reasoning-parser", "minimax_m3")
set_flag("--attention-backend", "TRITON_ATTN")
set_flag("--block-size", "128")
add_bool("--language-model-only")
add_bool("--disable-custom-all-reduce")
add_bool("--enable-expert-parallel")
return shlex.join(env_parts + body)
def _cookbook_ssh_dir() -> Path:
# The Docker image keeps cookbook keys under /app/.ssh; that path only
# exists inside the container. On Windows (and any non-container host)
@@ -1249,6 +1321,7 @@ def setup_cookbook_routes() -> APIRouter:
# `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400).
req.cmd = _validate_serve_cmd(req.cmd) or ""
req.cmd = _normalize_llama_cpp_python_cache_types(req.cmd) or ""
req.cmd = _normalize_minimax_m3_vllm_cmd(req.cmd)
req.cmd = _venv_safe_local_pip_install_cmd(
req.cmd,
local=not bool(req.remote_host),
@@ -1579,6 +1652,96 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(' echo "ERROR: vLLM is not installed."')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append('fi')
runner_lines.append(f"ODYSSEUS_SERVE_CMD='{_bash_squote(req.cmd)}'")
runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ]; then')
runner_lines.append(' ODYSSEUS_VLLM_HELP_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
runner_lines.append('import shlex, sys')
runner_lines.append('parts = shlex.split(sys.argv[1])')
runner_lines.append('try:')
runner_lines.append(' serve_i = parts.index("serve")')
runner_lines.append('except ValueError:')
runner_lines.append(' print("vllm serve --help")')
runner_lines.append('else:')
runner_lines.append(' print(shlex.join(parts[:serve_i + 1] + ["--help"]))')
runner_lines.append('PY')
runner_lines.append(')"')
runner_lines.append(' ODYSSEUS_VLLM_SUPPORTS_SWAP=0')
runner_lines.append(' if eval "$ODYSSEUS_VLLM_HELP_CMD" 2>&1 | grep -q -- "--swap-space"; then ODYSSEUS_VLLM_SUPPORTS_SWAP=1; fi')
runner_lines.append('fi')
runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ] && [ "${ODYSSEUS_VLLM_SUPPORTS_SWAP:-0}" = "1" ] && ! printf "%s" "$ODYSSEUS_SERVE_CMD" | grep -q -- "--swap-space"; then')
runner_lines.append(' echo "[odysseus] Setting vLLM --swap-space 0 so the runtime does not reserve CPU swap per GPU."')
runner_lines.append(' ODYSSEUS_SERVE_CMD="${ODYSSEUS_SERVE_CMD} --swap-space 0"')
runner_lines.append('fi')
runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ] && [ "${ODYSSEUS_VLLM_SUPPORTS_SWAP:-0}" != "1" ]; then')
runner_lines.append(' if printf "%s" "$ODYSSEUS_SERVE_CMD" | grep -q -- "--swap-space"; then')
runner_lines.append(' echo "[odysseus] vLLM serve does not expose --swap-space; removing the flag and patching the runtime default to 0."')
runner_lines.append(' ODYSSEUS_SERVE_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
runner_lines.append('import shlex, sys')
runner_lines.append('parts = shlex.split(sys.argv[1])')
runner_lines.append('out = []')
runner_lines.append('skip = False')
runner_lines.append('for part in parts:')
runner_lines.append(' if skip:')
runner_lines.append(' skip = False')
runner_lines.append(' continue')
runner_lines.append(' if part == "--swap-space":')
runner_lines.append(' skip = True')
runner_lines.append(' continue')
runner_lines.append(' if part.startswith("--swap-space="):')
runner_lines.append(' continue')
runner_lines.append(' out.append(part)')
runner_lines.append('print(shlex.join(out))')
runner_lines.append('PY')
runner_lines.append(')"')
runner_lines.append(' fi')
runner_lines.append(' ODYSSEUS_SERVE_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
runner_lines.append('import shlex, sys')
runner_lines.append('parts = shlex.split(sys.argv[1])')
runner_lines.append('patch = r"""import inspect, sys')
runner_lines.append('from vllm.engine.arg_utils import EngineArgs, AsyncEngineArgs')
runner_lines.append('def _odysseus_swap0(cls):')
runner_lines.append(' params = list(inspect.signature(cls).parameters)')
runner_lines.append(' if "swap_space" not in params:')
runner_lines.append(' return')
runner_lines.append(' idx = params.index("swap_space")')
runner_lines.append(' defaults = list(cls.__init__.__defaults__ or ())')
runner_lines.append(' if idx < len(defaults):')
runner_lines.append(' defaults[idx] = 0')
runner_lines.append(' cls.__init__.__defaults__ = tuple(defaults)')
runner_lines.append(' fields = getattr(cls, "__dataclass_fields__", {})')
runner_lines.append(' if "swap_space" in fields:')
runner_lines.append(' fields["swap_space"].default = 0')
runner_lines.append('_odysseus_swap0(EngineArgs)')
runner_lines.append('_odysseus_swap0(AsyncEngineArgs)')
runner_lines.append('try:')
runner_lines.append(' from vllm.config import CacheConfig')
runner_lines.append(' CacheConfig.swap_space = 0')
runner_lines.append('except Exception:')
runner_lines.append(' pass')
runner_lines.append('_orig_create_engine_config = EngineArgs.create_engine_config')
runner_lines.append('def _odysseus_create_engine_config(self, *args, **kwargs):')
runner_lines.append(' self.swap_space = 0')
runner_lines.append(' return _orig_create_engine_config(self, *args, **kwargs)')
runner_lines.append('EngineArgs.create_engine_config = _odysseus_create_engine_config')
runner_lines.append('AsyncEngineArgs.create_engine_config = _odysseus_create_engine_config')
runner_lines.append('from vllm.entrypoints.cli.main import main')
runner_lines.append('sys.exit(main())"""')
runner_lines.append('try:')
runner_lines.append(' serve_i = parts.index("serve")')
runner_lines.append('except ValueError:')
runner_lines.append(' print(shlex.join(parts))')
runner_lines.append('else:')
runner_lines.append(' exe_i = serve_i - 1')
runner_lines.append(' exe = parts[exe_i] if exe_i >= 0 else "vllm"')
runner_lines.append(' py = "python3"')
runner_lines.append(' if exe.endswith("/bin/vllm"):')
runner_lines.append(' py = exe[:-len("/bin/vllm")] + "/bin/python"')
runner_lines.append(' parts[exe_i:serve_i] = [py, "-c", patch]')
runner_lines.append(' print(shlex.join(parts))')
runner_lines.append('PY')
runner_lines.append(')"')
runner_lines.append(' echo "[odysseus] Patched vLLM internal swap_space default to 0 for this runtime."')
runner_lines.append('fi')
elif "sglang.launch_server" in req.cmd:
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
runner_lines.append('if ! command -v sglang &>/dev/null; then')
@@ -1620,7 +1783,10 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines,
keep_shell_open=not local_windows,
)
runner_lines.append(req.cmd)
if "vllm serve" in req.cmd:
runner_lines.append('eval "$ODYSSEUS_SERVE_CMD"')
else:
runner_lines.append(req.cmd)
if local_windows:
# Detached background process — no interactive shell to keep open.
# Print the exit marker the status poller looks for, then stop.
@@ -2418,16 +2584,14 @@ def setup_cookbook_routes() -> APIRouter:
# Add 30% headroom for KV cache, activations, etc.
needed_vram = (est_vram * 1.3) if est_vram else None
if vram_gb > 0 and needed_vram is not None and needed_vram > vram_gb:
continue
# Unknown-size models (e.g. MiniMax-M2.7, DeepSeek-V4-Flash) have no
# "NB" in the repo id, so the regex above can't extract their
# param count. Previously we dropped them entirely, which made
# brand-new flagship releases silently vanish from this list even
# on rigs with hundreds of GB of VRAM. Adapters/LoRAs are already
# filtered by _is_excluded(), so what falls through here is
# overwhelmingly full models — keep them, just without a size
# badge (the frontend handles needed_vram_gb=null gracefully).
if vram_gb > 0:
if needed_vram is None:
# The "trending models that fit" list must be conservative:
# if we cannot estimate size from the repo id/tags, do not
# present it as runnable on this hardware.
continue
if needed_vram > vram_gb:
continue
out.append({
"repo_id": repo_id,
@@ -2624,6 +2788,32 @@ def setup_cookbook_routes() -> APIRouter:
except Exception as e:
logger.warning(f"orphan sweep: state write failed: {e}")
@router.get("/api/cookbook/hf-gguf-files")
async def hf_gguf_files(repo_id: str, owner: str = Depends(require_user)):
"""List GGUF files in a HuggingFace repo for the direct-download picker."""
import httpx
repo_id = _validate_repo_id(repo_id)
url = f"https://huggingface.co/api/models/{repo_id}"
try:
headers = {}
token = _load_stored_hf_token()
if token:
headers["Authorization"] = f"Bearer {token}"
async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client:
resp = await client.get(url, headers=headers)
if resp.status_code != 200:
return {"ok": False, "files": [], "error": f"HF API HTTP {resp.status_code}"}
data = resp.json()
except Exception as e:
return {"ok": False, "files": [], "error": str(e)}
files = [
str(s.get("rfilename") or "")
for s in data.get("siblings", [])
if str(s.get("rfilename") or "").lower().endswith(".gguf")
]
return {"ok": True, "repo_id": repo_id, "files": files}
# In-memory cache for the Ollama library scrape. ollama.com is a public
# site, but it doesn't expose a stable JSON listing — we fetch the HTML
# search page and regex out the model cards. Cached for 1 h so a busy
+45 -8
View File
@@ -1109,22 +1109,30 @@ def _list_attachments_from_msg(msg):
return attachments
idx = 0
for part in msg.walk():
if part.is_multipart():
continue
cd = str(part.get("Content-Disposition", ""))
ct = part.get_content_type()
is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
if part.is_multipart() and not is_attached_email:
continue
# Skip text/html body parts (only consider real attachments)
if ct in ("text/plain", "text/html") and "attachment" not in cd:
continue
filename = part.get_filename()
if filename:
filename = _decode_header(filename)
if ct == "message/rfc822" and not re.search(r"\.[A-Za-z0-9]{1,8}$", filename):
filename = f"{filename}.eml"
else:
# Inline images, etc. - generate a name
ext = ct.split("/")[-1] if "/" in ct else "bin"
ext = "eml" if ct == "message/rfc822" else (ct.split("/")[-1] if "/" in ct else "bin")
filename = f"attachment_{idx}.{ext}"
payload = part.get_payload(decode=True)
size = len(payload) if payload else 0
if payload is None and ct == "message/rfc822":
try:
payload = part.as_bytes()
except Exception:
payload = b""
size = len(payload) if payload is not None else 0
attachments.append({
"index": idx,
"filename": filename,
@@ -1136,29 +1144,58 @@ def _list_attachments_from_msg(msg):
return attachments
def _is_likely_signature_image_attachment(att: dict) -> bool:
"""Match the reader's inline signature/logo image filter."""
filename = str((att or {}).get("filename") or "").lower()
if not re.search(r"\.(png|jpe?g|gif|bmp|svg|webp)$", filename):
return False
size = int((att or {}).get("size") or 0)
if re.search(r"^image\d{3,}\.(png|jpe?g|gif)$", filename):
return True
if re.search(r"^(signature|logo|sig|footer|banner)[-_\d]*\.(png|jpe?g|gif|svg)$", filename):
return True
return 0 < size < 30 * 1024
def _has_visible_attachments(msg) -> bool:
"""Return True only for attachments the reader will render as chips."""
return any(
not _is_likely_signature_image_attachment(att)
for att in _list_attachments_from_msg(msg)
)
def _extract_attachment_to_disk(msg, index, target_dir):
"""Extract a specific attachment to disk and return the file path."""
if not msg.is_multipart():
return None
idx = 0
for part in msg.walk():
if part.is_multipart():
continue
cd = str(part.get("Content-Disposition", ""))
ct = part.get_content_type()
is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
if part.is_multipart() and not is_attached_email:
continue
if ct in ("text/plain", "text/html") and "attachment" not in cd:
continue
if idx == index:
filename = part.get_filename()
if filename:
filename = _decode_header(filename)
if ct == "message/rfc822" and not re.search(r"\.[A-Za-z0-9]{1,8}$", filename):
filename = f"{filename}.eml"
else:
ext = ct.split("/")[-1] if "/" in ct else "bin"
ext = "eml" if ct == "message/rfc822" else (ct.split("/")[-1] if "/" in ct else "bin")
filename = f"attachment_{idx}.{ext}"
# Sanitize
safe_name = re.sub(r"[^\w\s\-.]", "_", filename).strip()
payload = part.get_payload(decode=True)
if not payload:
if payload is None and ct == "message/rfc822":
try:
payload = part.as_bytes()
except Exception:
payload = b""
if payload is None:
return None
target_dir.mkdir(parents=True, exist_ok=True)
filepath = target_dir / safe_name
+165 -39
View File
@@ -44,7 +44,7 @@ from routes.email_helpers import (
_send_smtp_message, _smtp_security_mode,
_IMAP_TIMEOUT_SECONDS, _open_imap_connection,
_imap_connect, _imap, _decode_header, _detect_sent_folder, _detect_drafts_folder,
_extract_attachment_text, _list_attachments_from_msg,
_extract_attachment_text, _list_attachments_from_msg, _has_visible_attachments, _is_likely_signature_image_attachment,
_extract_attachment_to_disk, _extract_html, _extract_text,
_fetch_sender_thread_context, _pre_retrieve_context,
_EMAIL_REPLY_SYS_PROMPT_BASE, _POOL_HOOKS,
@@ -58,6 +58,7 @@ from routes.email_pollers import _start_poller
logger = logging.getLogger(__name__)
ODYSSEUS_MAIL_ORIGIN = "odysseus-ui"
EMAIL_READ_ATTACHMENT_VERSION = 2
def _email_tag_owner_aliases(account_id: str | None, owner: str = "") -> list[str]:
@@ -244,6 +245,21 @@ def _imap_uid_fetch(conn, uid_set: str | bytes, query: str):
return conn.uid("FETCH", _uid_bytes(uid_set), query)
def _imap_search_quote(value: str) -> str:
return '"' + str(value or "").replace("\\", "\\\\").replace('"', '\\"') + '"'
def _message_id_chain(*values: str) -> list[str]:
seen = set()
out = []
for value in values:
for mid in re.findall(r"<[^>]+>", value or ""):
if mid not in seen:
seen.add(mid)
out.append(mid)
return out
def _uid_from_fetch_meta(meta_b: bytes) -> str:
m = re.search(rb"\bUID\s+(\d+)\b", meta_b)
return m.group(1).decode() if m else ""
@@ -1003,6 +1019,65 @@ def setup_email_routes():
except Exception:
pass
def _related_thread_attachments_sync(
folder: str,
account_id: str | None,
owner: str,
current_uid: str,
current_message_id: str,
in_reply_to: str,
references: str,
limit: int = 12,
) -> list[dict]:
"""Return visible attachments from referenced messages in this folder."""
wanted_ids = _message_id_chain(references, in_reply_to)
current_mid = (current_message_id or "").strip()
wanted_ids = [mid for mid in wanted_ids if mid and mid != current_mid]
if not wanted_ids:
return []
related: list[dict] = []
try:
with _imap(account_id, owner=owner) as conn:
conn.select(_q(folder), readonly=True)
# Search newest referenced messages first; cap work so opening
# a long thread stays bounded.
for mid in reversed(wanted_ids[-10:]):
if len(related) >= limit:
break
status, data = _imap_uid_search(conn, f'(HEADER Message-ID {_imap_search_quote(mid)})')
if status != "OK" or not data or not data[0]:
continue
for uid_b in reversed(data[0].split()[-3:]):
source_uid = uid_b.decode(errors="ignore")
if not source_uid or source_uid == str(current_uid):
continue
st2, msg_data = _imap_uid_fetch(conn, source_uid, "(BODY.PEEK[])")
if st2 != "OK" or not msg_data or not isinstance(msg_data[0], tuple):
continue
msg = email_mod.message_from_bytes(msg_data[0][1])
source_from = _decode_header(msg.get("From", ""))
source_subject = _decode_header(msg.get("Subject", ""))
source_date = msg.get("Date", "")
for att in _list_attachments_from_msg(msg):
if _is_likely_signature_image_attachment(att):
continue
enriched = dict(att)
enriched.update({
"source_uid": source_uid,
"source_folder": folder,
"source_message_id": (msg.get("Message-ID") or "").strip(),
"source_from": source_from,
"source_subject": source_subject,
"source_date": source_date,
})
related.append(enriched)
if len(related) >= limit:
break
except Exception as e:
logger.debug(f"related thread attachment lookup failed uid={current_uid}: {e}")
return related
@router.get("/list")
async def list_emails(
folder: str = Query("INBOX"),
@@ -1273,6 +1348,17 @@ def setup_email_routes():
sender_name, sender_addr = email.utils.parseaddr(sender)
parsed_date = email.utils.parsedate_to_datetime(date_str) if date_str else None
attachments = _list_attachments_from_msg(msg)
related_attachments = []
if not _has_visible_attachments(msg):
related_attachments = _related_thread_attachments_sync(
folder,
account_id,
owner,
uid,
message_id,
in_reply_to,
references,
)
if mark_seen:
# Set \Seen in a separate readwrite session so concurrent reads
@@ -1381,6 +1467,8 @@ def setup_email_routes():
"body": body,
"body_html": body_html,
"attachments": attachments,
"related_attachments": related_attachments,
"attachment_version": EMAIL_READ_ATTACHMENT_VERSION,
"cached_summary": cached_summary,
"cached_ai_reply": cached_ai_reply,
"boundaries": cached_boundaries,
@@ -1411,6 +1499,12 @@ def setup_email_routes():
"""Read email body. Cached for 30m, sync IMAP work runs in a thread."""
ck = _read_cache_key(account_id, folder, uid, owner=owner)
cached = _read_cache_get(ck)
if cached is not None:
# Older cached read responses lack the thread-attachment fallback.
# Fetch once so replies that reference prior attachments can show
# those files without waiting for cache expiry.
if cached.get("attachment_version") != EMAIL_READ_ATTACHMENT_VERSION:
cached = None
if cached is not None:
if mark_seen:
try:
@@ -1599,6 +1693,65 @@ def setup_email_routes():
return None
doc_session_id = _resolve_doc_session()
def _create_markdown_doc(content: str, summary: str):
from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
doc_id = str(uuid.uuid4())
ver_id = str(uuid.uuid4())
_db = _SL()
try:
_db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
_db.add(_Doc(
id=doc_id, session_id=doc_session_id, title=title,
language="markdown", current_content=content,
version_count=1, is_active=True,
))
_db.add(_DV(
id=ver_id, document_id=doc_id, version_number=1,
content=content, summary=summary, source="upload",
))
_db.commit()
finally:
_db.close()
_tag_doc_with_source(doc_id)
return doc_id
def _attached_email_markdown(path):
raw_bytes = path.read_bytes()
if not raw_bytes:
return f"# Attached email: {base}\n\n_(empty email attachment)_"
try:
attached_msg = email_mod.message_from_bytes(raw_bytes)
except Exception as e:
return f"# Attached email: {base}\n\nCould not parse this email attachment: {e}"
attached_subject = _decode_header(attached_msg.get("Subject", "")) or base
attached_from = _decode_header(attached_msg.get("From", ""))
attached_to = _decode_header(attached_msg.get("To", ""))
attached_cc = _decode_header(attached_msg.get("Cc", ""))
attached_date = attached_msg.get("Date", "")
attached_body = _extract_text(attached_msg).strip()
attached_atts = _list_attachments_from_msg(attached_msg)
lines = [f"# Attached email: {attached_subject}", ""]
if attached_from:
lines.append(f"**From:** {attached_from}")
if attached_to:
lines.append(f"**To:** {attached_to}")
if attached_cc:
lines.append(f"**Cc:** {attached_cc}")
if attached_date:
lines.append(f"**Date:** {attached_date}")
lines.extend(["", "## Body", "", attached_body or "_(no readable body)_"])
if attached_atts:
lines.extend(["", "## Attachments", ""])
for att in attached_atts:
size = int(att.get("size") or 0)
size_label = f"{size} B" if size < 1024 else f"{round(size / 1024)} KB"
name = att.get("filename") or f"attachment_{att.get('index', '')}"
ctype = att.get("content_type") or "application/octet-stream"
lines.append(f"- {name} ({ctype}, {size_label})")
return "\n".join(lines).strip()
# ── PDF path (existing) ────────────────────────────────────
if ext == ".pdf":
import shutil as _shutil
@@ -1645,6 +1798,15 @@ def setup_email_routes():
_tag_doc_with_source(doc_id)
return {"doc_id": doc_id, "filename": filepath.name}
# ── Attached email (.eml / message/rfc822) ────────────────
if ext == ".eml":
try:
content = _attached_email_markdown(filepath)
except Exception as e:
return {"error": f"Failed to read email attachment: {e}", "filename": base}
doc_id = _create_markdown_doc(content, "Imported attached email")
return {"doc_id": doc_id, "filename": filepath.name}
# ── DOCX path: extract text → markdown document ───────────
if ext == ".docx":
try:
@@ -1682,25 +1844,7 @@ def setup_email_routes():
lines.append("")
content = "\n".join(lines).strip() or f"_(empty {base})_"
from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
doc_id = str(uuid.uuid4())
ver_id = str(uuid.uuid4())
_db = _SL()
try:
_db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
_db.add(_Doc(
id=doc_id, session_id=doc_session_id, title=title,
language="markdown", current_content=content,
version_count=1, is_active=True,
))
_db.add(_DV(
id=ver_id, document_id=doc_id, version_number=1,
content=content, summary="Imported from DOCX", source="upload",
))
_db.commit()
finally:
_db.close()
_tag_doc_with_source(doc_id)
doc_id = _create_markdown_doc(content, "Imported from DOCX")
return {"doc_id": doc_id, "filename": filepath.name}
# ── Plain text / markdown ────────────────────────────────
@@ -1709,25 +1853,7 @@ def setup_email_routes():
content = filepath.read_text(encoding="utf-8", errors="replace")
except Exception as e:
return {"error": f"Failed to read text file: {e}", "filename": base}
from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
doc_id = str(uuid.uuid4())
ver_id = str(uuid.uuid4())
_db = _SL()
try:
_db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
_db.add(_Doc(
id=doc_id, session_id=doc_session_id, title=title,
language="markdown", current_content=content,
version_count=1, is_active=True,
))
_db.add(_DV(
id=ver_id, document_id=doc_id, version_number=1,
content=content, summary="Imported from email attachment", source="upload",
))
_db.commit()
finally:
_db.close()
_tag_doc_with_source(doc_id)
doc_id = _create_markdown_doc(content, "Imported from email attachment")
return {"doc_id": doc_id, "filename": filepath.name}
return {"error": f"Unsupported attachment type: {ext}", "filename": base}
+94 -2
View File
@@ -1,8 +1,13 @@
import json
import os
import re
import shlex
import subprocess
from copy import deepcopy
from fastapi import APIRouter, HTTPException
from core.platform_compat import run_ssh_command
from routes._validators import validate_remote_host, validate_ssh_port
@@ -107,6 +112,73 @@ def _apply_manual_hardware(system, manual_mode="", manual_gpu_count="", manual_v
return system
def _run_model_probe(host: str, ssh_port: str, cmd: str) -> str:
try:
if host:
r = run_ssh_command(
host,
ssh_port or None,
cmd,
timeout=15,
connect_timeout=5,
strict_host_key_checking=False,
text=True,
)
else:
r = subprocess.run(["bash", "-lc", cmd], capture_output=True, text=True, timeout=15)
if r.returncode == 0:
return (r.stdout or "").strip()
except Exception:
return ""
return ""
def _inspect_model_path(model_path: str, host: str = "", ssh_port: str = "") -> dict:
"""Read lightweight metadata from a local or SSH-visible HF model folder."""
path = (model_path or "").strip()
if not path or path.startswith(("http://", "https://")):
return {}
if not (path.startswith("/") or path.startswith("~")):
return {}
qpath = shlex.quote(path)
qconfig = shlex.quote(os.path.join(path, "config.json"))
out = {}
exists = _run_model_probe(host, ssh_port, f"test -d {qpath} && printf found || printf missing")
if exists != "found":
target = host or "local container"
out["model_probe_error"] = f"Model path is not visible on {target}: {path}"
return out
raw_config = _run_model_probe(host, ssh_port, f"test -f {qconfig} && sed -n '1,240p' {qconfig}")
if raw_config:
try:
cfg = json.loads(raw_config)
except Exception:
cfg = {}
for key in ("context_length", "max_position_embeddings", "n_ctx_train", "model_max_length", "max_seq_len"):
value = cfg.get(key)
if isinstance(value, (int, float)) and value > 0:
out["model_ctx_max"] = int(value)
break
else:
out["model_probe_error"] = f"config.json not found in model path: {path}"
size_cmd = (
f"find {qpath} -type f \\( -name '*.safetensors' -o -name '*.bin' -o -name '*.gguf' \\) "
"-printf '%s\\n' 2>/dev/null | awk '{s+=$1} END {if (s>0) printf \"%.6f\", s/1073741824}'"
)
weights = _run_model_probe(host, ssh_port, size_cmd)
try:
weights_gb = float(weights)
except Exception:
weights_gb = 0.0
if weights_gb > 0:
out["model_weights_gb"] = round(weights_gb, 3)
elif "model_probe_error" not in out:
out["model_probe_error"] = f"No model weight files found in: {path}"
return out
def setup_hwfit_routes():
router = APIRouter(prefix="/api/hwfit", tags=["hwfit"])
@@ -235,7 +307,7 @@ def setup_hwfit_routes():
return {"system": system, "models": results}
@router.get("/profiles")
def get_serve_profiles(model: str = "", host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, serve_weights_gb: float = 0.0, serve_quant: str = ""):
def get_serve_profiles(model: str = "", model_path: str = "", host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, serve_weights_gb: float = 0.0, serve_quant: str = ""):
"""Compute llama.cpp serve profiles (Quality/Balanced/Speed) for `model`
against the detected hardware on `host` (or local). Returns concrete
flags (n_gpu_layers, n_cpu_moe, cache_type, ctx) the serve UI can apply.
@@ -272,8 +344,16 @@ def setup_hwfit_routes():
if nn and (nn == want or want.endswith(nn) or nn.endswith(want)):
m = entry
break
path_meta = _inspect_model_path(model_path or model, host=host, ssh_port=ssh_port)
if m is None:
return {"system": system, "profiles": [], "error": "model not in catalog"}
return {
"system": system,
"profiles": [],
"error": "model not in catalog",
"model_ctx_max": int(path_meta.get("model_ctx_max") or 0),
"model_weights_gb": float(path_meta.get("model_weights_gb") or 0),
"model_probe_error": path_meta.get("model_probe_error") or "",
}
# Surface the model's trained context limit so the serve UI can clamp a
# user-typed context down to it (asking for ctx > n_ctx_train overflows
# and, with a quantized KV cache, can crash the GPU).
@@ -283,6 +363,16 @@ def setup_hwfit_routes():
if isinstance(v, (int, float)) and v > 0:
model_ctx_max = int(v)
break
path_ctx_max = int(path_meta.get("model_ctx_max") or 0)
if path_ctx_max > 0:
model_ctx_max = max(model_ctx_max, path_ctx_max)
model_weights_gb = float(path_meta.get("model_weights_gb") or 0)
if model_weights_gb <= 0:
for k in ("min_vram_gb", "required_gb", "size_gb", "recommended_ram_gb", "min_ram_gb"):
v = m.get(k)
if isinstance(v, (int, float)) and v > 0:
model_weights_gb = float(v)
break
return {
"system": system,
"profiles": compute_serve_profiles(
@@ -291,6 +381,8 @@ def setup_hwfit_routes():
serve_quant=(serve_quant or None),
),
"model_ctx_max": model_ctx_max,
"model_weights_gb": model_weights_gb,
"model_probe_error": path_meta.get("model_probe_error") or "",
}
@router.get("/image-models")
+9 -2
View File
@@ -1064,9 +1064,11 @@ def setup_model_routes(model_discovery):
except Exception:
return 0.0
def _failure_delay(fails: int) -> float:
def _failure_delay(fails: int, *, empty_local: bool = False) -> float:
if fails <= 0:
return 0.0
if empty_local:
return min(5.0 * (2 ** max(0, fails - 1)), 30.0)
return min(_REFRESH_FAILURE_BASE * (2 ** max(0, fails - 1)), _REFRESH_FAILURE_MAX)
def _should_refresh_endpoint(ep: Any, now: float, force: bool = False) -> tuple[bool, Dict[str, Any]]:
@@ -1097,7 +1099,12 @@ def setup_model_routes(model_discovery):
fails = int(state.get("fail_count") or 0)
if fails and not force:
last_failure = float(state.get("last_failure") or 0.0)
if now - last_failure < _failure_delay(fails):
empty_local = (
not cached
and category == "local"
and str(getattr(ep, "id", "") or "").startswith("local-")
)
if now - last_failure < _failure_delay(fails, empty_local=empty_local):
return False, info
if cached and not force:
interval = _endpoint_refresh_interval(ep, category)
+53 -5
View File
@@ -330,6 +330,9 @@ def add_user_install_bins_to_path():
candidates.append(os.path.join(site.USER_BASE, 'bin'))
except Exception:
pass
candidates.append(os.path.expanduser('~/bin'))
candidates.append(os.path.expanduser('~/llama.cpp/build/bin'))
candidates.append(os.path.expanduser('~/llama.cpp/build-vulkan/bin'))
candidates.append(os.path.expanduser('~/.local/bin'))
parts = os.environ.get('PATH', '').split(os.pathsep) if os.environ.get('PATH') else []
changed = False
@@ -1188,6 +1191,7 @@ def setup_shell_routes() -> APIRouter:
# venv over SSH so a remote `pip install` actually reflects here.
remote_status: dict = {}
remote_details: dict = {}
remote_probe_error = ""
remote_names = [
p["name"]
for p in packages
@@ -1226,8 +1230,34 @@ def setup_shell_routes() -> APIRouter:
break
except ValueError as e:
raise HTTPException(400, str(e))
except Exception:
except Exception as e:
remote_status = {}
remote_probe_error = f"SSH package probe failed: {str(e)[:160]}"
if "llama_cpp" in remote_names:
try:
inner = (
'export PATH="$HOME/.local/bin:$HOME/bin:'
'$HOME/llama.cpp/build/bin:$HOME/llama.cpp/build-vulkan/bin:$PATH"; '
"command -v llama-server 2>/dev/null || true"
)
argv = _ssh_base_argv(host, ssh_port) + [inner]
proc = await asyncio.create_subprocess_exec(
*argv,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
out, _err = await asyncio.wait_for(proc.communicate(), timeout=8)
llama_server_path = out.decode("utf-8", errors="replace").strip().splitlines()
llama_server_path = llama_server_path[-1].strip() if llama_server_path else ""
if llama_server_path:
remote_status["llama_cpp"] = True
probe = remote_details.setdefault("llama_cpp", {})
if isinstance(probe, dict):
probe.setdefault("binaries", {})["llama-server"] = llama_server_path
except Exception as e:
if not remote_probe_error:
remote_probe_error = f"SSH llama-server probe failed: {str(e)[:160]}"
pass
# Union of system_names + every package's system_prereqs. Probing
# the prereqs alongside the main system deps in a single SSH call
# avoids a second round-trip per Cookbook → Dependencies refresh.
@@ -1272,7 +1302,9 @@ def setup_shell_routes() -> APIRouter:
target_os_id = _os_id_from_release("\n".join(_osrel_lines))
except ValueError as e:
raise HTTPException(400, str(e))
except Exception:
except Exception as e:
if not remote_probe_error:
remote_probe_error = f"SSH system probe failed: {str(e)[:160]}"
pass
elif not host:
# Local target — probe in-process so the inline install command
@@ -1290,7 +1322,12 @@ def setup_shell_routes() -> APIRouter:
on_remote = bool(host and pkg.get("target") == "remote")
probe = None
if on_remote:
pkg["installed"] = bool(remote_status.get(pkg["name"], False))
if remote_probe_error and pkg["name"] not in remote_status:
pkg["installed"] = None
pkg["probe_error"] = remote_probe_error
pkg["status_note"] = remote_probe_error
else:
pkg["installed"] = bool(remote_status.get(pkg["name"], False))
probe = remote_details.get(pkg["name"])
if isinstance(probe, dict):
pkg["details"] = probe
@@ -1353,9 +1390,19 @@ def setup_shell_routes() -> APIRouter:
# reads "ready" green while inference runs at 3 tok/s on GPU
# silicon — actively misleading.
if pkg["name"] == "llama_cpp" and pkg.get("installed"):
_native_llama_server = bool(
isinstance(probe, dict)
and isinstance(probe.get("binaries"), dict)
and probe["binaries"].get("llama-server")
)
_gpu_capable = False
_has_nvidia_target = False
if on_remote and host:
if _native_llama_server:
# Native llama-server is the launcher path Cookbook now
# prefers. Do not mark this as a CPU-only Python wheel just
# because llama-cpp-python is absent from the selected venv.
_gpu_capable = True
elif on_remote and host:
try:
# Activate the configured venv FIRST so the probe
# runs against the same python the launch script
@@ -1609,7 +1656,8 @@ def setup_shell_routes() -> APIRouter:
return {"ok": False, "error": f"Unsupported engine: {engine}"}
host = str(body.get("remote_host") or "").strip()
ssh_port = body.get("ssh_port")
cmd = _llama_cpp_rebuild_cmd()
update_source = bool(body.get("update_source"))
cmd = _llama_cpp_rebuild_cmd(update_source=update_source)
try:
argv = (
(_ssh_base_argv(host, ssh_port) + [cmd])