mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-30 00:22:10 -04:00
Cookbook model workflow fixes
This commit is contained in:
+33
-4
@@ -22,6 +22,31 @@ from fastapi import HTTPException
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_CASUAL_OPENING_RE = re.compile(
|
||||
r"^\s*(?:h+i+|hey+|hello+|yo+|sup+|what'?s up|wass?up|hiya|howdy|"
|
||||
r"lol|lmao|haha+|hehe+|thanks?|thank you|ty|idk|dunno|meh|bruh|bro)\b(?P<tail>.*)$",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_CASUAL_BLOCKLIST_RE = re.compile(
|
||||
r"\b(?:cookbook|serve|serving|launch|start|vllm|sglang|llama\.?cpp|ollama|"
|
||||
r"download|model|email|document|doc|note|calendar|task|search|web|research|"
|
||||
r"file|folder|repo|git|settings?|endpoint|api|token|mcp)\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _is_casual_low_signal(text: str) -> bool:
|
||||
"""Short greetings/slang should not pull memory, skills, RAG, or docs."""
|
||||
s = str(text or "").strip()
|
||||
m = _CASUAL_OPENING_RE.match(s)
|
||||
if not m:
|
||||
return False
|
||||
tail = m.group("tail") or ""
|
||||
if _CASUAL_BLOCKLIST_RE.search(tail):
|
||||
return False
|
||||
tail_words = re.findall(r"[A-Za-z0-9_'-]+", tail)
|
||||
return len(tail_words) <= 2
|
||||
|
||||
|
||||
# ── Data containers ────────────────────────────────────────────────────── #
|
||||
|
||||
@@ -579,6 +604,7 @@ async def build_chat_context(
|
||||
# Resolve user prefs
|
||||
user = get_current_user(request)
|
||||
uprefs = load_prefs_for_user(user)
|
||||
casual_low_signal = _is_casual_low_signal(message)
|
||||
|
||||
# Memory enabled?
|
||||
mem_enabled = not incognito and not no_memory and uprefs.get("memory_enabled", True)
|
||||
@@ -588,6 +614,9 @@ async def build_chat_context(
|
||||
if not allow_tool_preprocessing:
|
||||
mem_enabled = False
|
||||
skills_enabled = False
|
||||
if casual_low_signal:
|
||||
mem_enabled = False
|
||||
skills_enabled = False
|
||||
logger.debug(
|
||||
"Memory enabled=%s for user=%s (incognito=%s, no_memory=%s, pref=%s)",
|
||||
mem_enabled, user, incognito, no_memory, uprefs.get("memory_enabled", "NOT_SET"),
|
||||
@@ -603,11 +632,11 @@ async def build_chat_context(
|
||||
|
||||
# Use RAG?
|
||||
use_rag_val = (str(use_rag).lower() != "false") if use_rag is not None else True
|
||||
if incognito or not allow_tool_preprocessing or is_research_spinoff:
|
||||
if incognito or not allow_tool_preprocessing or is_research_spinoff or casual_low_signal:
|
||||
use_rag_val = False
|
||||
|
||||
# If pre-fetched search context was provided (compare mode), skip live web search
|
||||
skip_web = bool(search_context) or not allow_tool_preprocessing
|
||||
skip_web = bool(search_context) or not allow_tool_preprocessing or casual_low_signal
|
||||
|
||||
# Build context preface
|
||||
# The stream path uses enhanced_message (with CoT/preprocessing applied),
|
||||
@@ -626,7 +655,7 @@ async def build_chat_context(
|
||||
incognito=incognito,
|
||||
use_skills=skills_enabled,
|
||||
)
|
||||
if use_rag is not None or is_research_spinoff:
|
||||
if use_rag is not None or is_research_spinoff or casual_low_signal:
|
||||
_preface_kwargs["use_rag"] = use_rag_val
|
||||
preface, rag_sources, web_sources = chat_processor.build_context_preface(**_preface_kwargs)
|
||||
|
||||
@@ -634,7 +663,7 @@ async def build_chat_context(
|
||||
used_memories = getattr(chat_processor, '_last_used_memories', [])
|
||||
|
||||
# Inject pre-fetched search context (compare mode)
|
||||
if search_context and allow_tool_preprocessing:
|
||||
if search_context and allow_tool_preprocessing and not casual_low_signal:
|
||||
preface.append(untrusted_context_message("prefetched search context", search_context))
|
||||
|
||||
# YouTube transcripts
|
||||
|
||||
+10
-1
@@ -826,7 +826,11 @@ def setup_chat_routes(
|
||||
from src.settings import get_setting
|
||||
_global_disabled = get_setting("disabled_tools", [])
|
||||
if _global_disabled and isinstance(_global_disabled, list):
|
||||
disabled_tools.update(_global_disabled)
|
||||
explicit_web_allowed = allow_web_search is not None and str(allow_web_search).lower() == "true"
|
||||
if explicit_web_allowed:
|
||||
disabled_tools.update(t for t in _global_disabled if t not in {"web_search", "web_fetch"})
|
||||
else:
|
||||
disabled_tools.update(_global_disabled)
|
||||
|
||||
# Light auto-escalation: the user is in chat mode and just expressed a
|
||||
# notes/calendar/email intent. Grant the relevant managers but withhold
|
||||
@@ -1256,6 +1260,10 @@ def setup_chat_routes(
|
||||
_max_rounds = _DEFAULT_ROUNDS
|
||||
_max_rounds = max(1, min(_max_rounds, 200))
|
||||
|
||||
_forced_tools = None
|
||||
if allow_web_search is not None and str(allow_web_search).lower() == "true":
|
||||
_forced_tools = {"web_search", "web_fetch"}
|
||||
|
||||
async for chunk in stream_agent_loop(
|
||||
sess.endpoint_url,
|
||||
sess.model,
|
||||
@@ -1277,6 +1285,7 @@ def setup_chat_routes(
|
||||
plan_mode=plan_mode,
|
||||
approved_plan=approved_plan or None,
|
||||
workspace=workspace or None,
|
||||
forced_tools=_forced_tools,
|
||||
):
|
||||
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
|
||||
try:
|
||||
|
||||
@@ -964,18 +964,31 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
|
||||
runner_lines.append(' fi # end _odysseus_have_prebuilt guard')
|
||||
|
||||
|
||||
def _llama_cpp_rebuild_cmd() -> str:
|
||||
def _llama_cpp_rebuild_cmd(update_source: bool = False) -> str:
|
||||
"""Shell command that clears the Cookbook-managed llama.cpp build.
|
||||
|
||||
Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build*``
|
||||
directory so the next llama.cpp serve recompiles from source, picking up a
|
||||
CUDA or HIP toolchain if one is now available. The serve bootstrap only
|
||||
builds when ``llama-server`` is missing from PATH, so without this an
|
||||
existing CPU-only build is reused forever. It deliberately installs and
|
||||
downloads nothing; the rebuild itself happens on the next serve.
|
||||
existing CPU-only build is reused forever. When ``update_source`` is true,
|
||||
the command also fast-forwards the Cookbook-managed ``~/llama.cpp`` checkout
|
||||
if it exists. The rebuild itself happens on the next serve.
|
||||
"""
|
||||
update_cmd = ''
|
||||
if update_source:
|
||||
update_cmd = (
|
||||
'if [ -d "$HOME/llama.cpp/.git" ]; then '
|
||||
'git -C "$HOME/llama.cpp" pull --ff-only --depth 1 || '
|
||||
'echo "[odysseus] WARNING: llama.cpp source update failed; clearing cached build anyway."; '
|
||||
'elif command -v git >/dev/null 2>&1; then '
|
||||
'git clone --depth 1 https://github.com/ggml-org/llama.cpp "$HOME/llama.cpp" || '
|
||||
'echo "[odysseus] WARNING: llama.cpp clone failed; clearing cached build anyway."; '
|
||||
'fi && '
|
||||
)
|
||||
return (
|
||||
'mkdir -p "$HOME/bin" && '
|
||||
f'{update_cmd}'
|
||||
'rm -f "$HOME/bin/llama-server" && '
|
||||
'rm -rf "$HOME/llama.cpp/build" "$HOME/llama.cpp/build-vulkan" && '
|
||||
'echo "[odysseus] Cleared the cached llama.cpp build. '
|
||||
|
||||
+201
-11
@@ -273,6 +273,78 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
def _load_stored_hf_token() -> str:
|
||||
return load_stored_hf_token(state_path=_cookbook_state_path)
|
||||
|
||||
def _normalize_minimax_m3_vllm_cmd(cmd: str) -> str:
|
||||
"""Patch MiniMax M3 vLLM launches into the known-good local form.
|
||||
|
||||
The browser form can be stale or omit advanced-only fields. MiniMax M3
|
||||
is sensitive to several flags: using the HF repo id with block-size 128
|
||||
fails KV-cache setup, and FlashInfer sampler JIT fails on this host's
|
||||
system nvcc. Normalize server-side before writing the tmux runner.
|
||||
"""
|
||||
if not cmd or "vllm serve" not in cmd or not re.search(r"minimax.*m3", cmd, re.I):
|
||||
return cmd
|
||||
try:
|
||||
parts = shlex.split(cmd)
|
||||
except ValueError:
|
||||
return cmd
|
||||
if "serve" not in parts:
|
||||
return cmd
|
||||
|
||||
env_re = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=")
|
||||
env_parts = [p for p in parts if env_re.match(p)]
|
||||
body = [p for p in parts if not env_re.match(p)]
|
||||
try:
|
||||
serve_i = body.index("serve")
|
||||
except ValueError:
|
||||
return cmd
|
||||
if serve_i + 1 >= len(body):
|
||||
return cmd
|
||||
|
||||
repo_id = "cyankiwi/MiniMax-M3-AWQ-INT4"
|
||||
snapshot = (
|
||||
"/home/pewds/.cache/huggingface/hub/"
|
||||
"models--cyankiwi--MiniMax-M3-AWQ-INT4/"
|
||||
"snapshots/4082acbbec1236d21828d55b6bb0fe02ade4ab5b"
|
||||
)
|
||||
if body[serve_i + 1] == repo_id:
|
||||
body[serve_i + 1] = snapshot
|
||||
|
||||
def add_env(key: str, value: str) -> None:
|
||||
if not any(p.startswith(f"{key}=") for p in env_parts):
|
||||
env_parts.append(f"{key}={value}")
|
||||
|
||||
def has_flag(flag: str) -> bool:
|
||||
return any(p == flag or p.startswith(flag + "=") for p in body)
|
||||
|
||||
def set_flag(flag: str, value: str) -> None:
|
||||
for i, part in enumerate(body):
|
||||
if part == flag:
|
||||
if i + 1 < len(body):
|
||||
body[i + 1] = value
|
||||
else:
|
||||
body.append(value)
|
||||
return
|
||||
if part.startswith(flag + "="):
|
||||
body[i] = f"{flag}={value}"
|
||||
return
|
||||
body.extend([flag, value])
|
||||
|
||||
def add_bool(flag: str) -> None:
|
||||
if not has_flag(flag):
|
||||
body.append(flag)
|
||||
|
||||
add_env("VLLM_TARGET_DEVICE", "cuda")
|
||||
add_env("VLLM_USE_FLASHINFER_SAMPLER", "0")
|
||||
set_flag("--served-model-name", repo_id)
|
||||
set_flag("--tool-call-parser", "minimax_m3")
|
||||
set_flag("--reasoning-parser", "minimax_m3")
|
||||
set_flag("--attention-backend", "TRITON_ATTN")
|
||||
set_flag("--block-size", "128")
|
||||
add_bool("--language-model-only")
|
||||
add_bool("--disable-custom-all-reduce")
|
||||
add_bool("--enable-expert-parallel")
|
||||
return shlex.join(env_parts + body)
|
||||
|
||||
def _cookbook_ssh_dir() -> Path:
|
||||
# The Docker image keeps cookbook keys under /app/.ssh; that path only
|
||||
# exists inside the container. On Windows (and any non-container host)
|
||||
@@ -1249,6 +1321,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
# `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400).
|
||||
req.cmd = _validate_serve_cmd(req.cmd) or ""
|
||||
req.cmd = _normalize_llama_cpp_python_cache_types(req.cmd) or ""
|
||||
req.cmd = _normalize_minimax_m3_vllm_cmd(req.cmd)
|
||||
req.cmd = _venv_safe_local_pip_install_cmd(
|
||||
req.cmd,
|
||||
local=not bool(req.remote_host),
|
||||
@@ -1579,6 +1652,96 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
runner_lines.append(' echo "ERROR: vLLM is not installed."')
|
||||
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
|
||||
runner_lines.append('fi')
|
||||
runner_lines.append(f"ODYSSEUS_SERVE_CMD='{_bash_squote(req.cmd)}'")
|
||||
runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ]; then')
|
||||
runner_lines.append(' ODYSSEUS_VLLM_HELP_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
|
||||
runner_lines.append('import shlex, sys')
|
||||
runner_lines.append('parts = shlex.split(sys.argv[1])')
|
||||
runner_lines.append('try:')
|
||||
runner_lines.append(' serve_i = parts.index("serve")')
|
||||
runner_lines.append('except ValueError:')
|
||||
runner_lines.append(' print("vllm serve --help")')
|
||||
runner_lines.append('else:')
|
||||
runner_lines.append(' print(shlex.join(parts[:serve_i + 1] + ["--help"]))')
|
||||
runner_lines.append('PY')
|
||||
runner_lines.append(')"')
|
||||
runner_lines.append(' ODYSSEUS_VLLM_SUPPORTS_SWAP=0')
|
||||
runner_lines.append(' if eval "$ODYSSEUS_VLLM_HELP_CMD" 2>&1 | grep -q -- "--swap-space"; then ODYSSEUS_VLLM_SUPPORTS_SWAP=1; fi')
|
||||
runner_lines.append('fi')
|
||||
runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ] && [ "${ODYSSEUS_VLLM_SUPPORTS_SWAP:-0}" = "1" ] && ! printf "%s" "$ODYSSEUS_SERVE_CMD" | grep -q -- "--swap-space"; then')
|
||||
runner_lines.append(' echo "[odysseus] Setting vLLM --swap-space 0 so the runtime does not reserve CPU swap per GPU."')
|
||||
runner_lines.append(' ODYSSEUS_SERVE_CMD="${ODYSSEUS_SERVE_CMD} --swap-space 0"')
|
||||
runner_lines.append('fi')
|
||||
runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ] && [ "${ODYSSEUS_VLLM_SUPPORTS_SWAP:-0}" != "1" ]; then')
|
||||
runner_lines.append(' if printf "%s" "$ODYSSEUS_SERVE_CMD" | grep -q -- "--swap-space"; then')
|
||||
runner_lines.append(' echo "[odysseus] vLLM serve does not expose --swap-space; removing the flag and patching the runtime default to 0."')
|
||||
runner_lines.append(' ODYSSEUS_SERVE_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
|
||||
runner_lines.append('import shlex, sys')
|
||||
runner_lines.append('parts = shlex.split(sys.argv[1])')
|
||||
runner_lines.append('out = []')
|
||||
runner_lines.append('skip = False')
|
||||
runner_lines.append('for part in parts:')
|
||||
runner_lines.append(' if skip:')
|
||||
runner_lines.append(' skip = False')
|
||||
runner_lines.append(' continue')
|
||||
runner_lines.append(' if part == "--swap-space":')
|
||||
runner_lines.append(' skip = True')
|
||||
runner_lines.append(' continue')
|
||||
runner_lines.append(' if part.startswith("--swap-space="):')
|
||||
runner_lines.append(' continue')
|
||||
runner_lines.append(' out.append(part)')
|
||||
runner_lines.append('print(shlex.join(out))')
|
||||
runner_lines.append('PY')
|
||||
runner_lines.append(')"')
|
||||
runner_lines.append(' fi')
|
||||
runner_lines.append(' ODYSSEUS_SERVE_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
|
||||
runner_lines.append('import shlex, sys')
|
||||
runner_lines.append('parts = shlex.split(sys.argv[1])')
|
||||
runner_lines.append('patch = r"""import inspect, sys')
|
||||
runner_lines.append('from vllm.engine.arg_utils import EngineArgs, AsyncEngineArgs')
|
||||
runner_lines.append('def _odysseus_swap0(cls):')
|
||||
runner_lines.append(' params = list(inspect.signature(cls).parameters)')
|
||||
runner_lines.append(' if "swap_space" not in params:')
|
||||
runner_lines.append(' return')
|
||||
runner_lines.append(' idx = params.index("swap_space")')
|
||||
runner_lines.append(' defaults = list(cls.__init__.__defaults__ or ())')
|
||||
runner_lines.append(' if idx < len(defaults):')
|
||||
runner_lines.append(' defaults[idx] = 0')
|
||||
runner_lines.append(' cls.__init__.__defaults__ = tuple(defaults)')
|
||||
runner_lines.append(' fields = getattr(cls, "__dataclass_fields__", {})')
|
||||
runner_lines.append(' if "swap_space" in fields:')
|
||||
runner_lines.append(' fields["swap_space"].default = 0')
|
||||
runner_lines.append('_odysseus_swap0(EngineArgs)')
|
||||
runner_lines.append('_odysseus_swap0(AsyncEngineArgs)')
|
||||
runner_lines.append('try:')
|
||||
runner_lines.append(' from vllm.config import CacheConfig')
|
||||
runner_lines.append(' CacheConfig.swap_space = 0')
|
||||
runner_lines.append('except Exception:')
|
||||
runner_lines.append(' pass')
|
||||
runner_lines.append('_orig_create_engine_config = EngineArgs.create_engine_config')
|
||||
runner_lines.append('def _odysseus_create_engine_config(self, *args, **kwargs):')
|
||||
runner_lines.append(' self.swap_space = 0')
|
||||
runner_lines.append(' return _orig_create_engine_config(self, *args, **kwargs)')
|
||||
runner_lines.append('EngineArgs.create_engine_config = _odysseus_create_engine_config')
|
||||
runner_lines.append('AsyncEngineArgs.create_engine_config = _odysseus_create_engine_config')
|
||||
runner_lines.append('from vllm.entrypoints.cli.main import main')
|
||||
runner_lines.append('sys.exit(main())"""')
|
||||
runner_lines.append('try:')
|
||||
runner_lines.append(' serve_i = parts.index("serve")')
|
||||
runner_lines.append('except ValueError:')
|
||||
runner_lines.append(' print(shlex.join(parts))')
|
||||
runner_lines.append('else:')
|
||||
runner_lines.append(' exe_i = serve_i - 1')
|
||||
runner_lines.append(' exe = parts[exe_i] if exe_i >= 0 else "vllm"')
|
||||
runner_lines.append(' py = "python3"')
|
||||
runner_lines.append(' if exe.endswith("/bin/vllm"):')
|
||||
runner_lines.append(' py = exe[:-len("/bin/vllm")] + "/bin/python"')
|
||||
runner_lines.append(' parts[exe_i:serve_i] = [py, "-c", patch]')
|
||||
runner_lines.append(' print(shlex.join(parts))')
|
||||
runner_lines.append('PY')
|
||||
runner_lines.append(')"')
|
||||
runner_lines.append(' echo "[odysseus] Patched vLLM internal swap_space default to 0 for this runtime."')
|
||||
runner_lines.append('fi')
|
||||
elif "sglang.launch_server" in req.cmd:
|
||||
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
|
||||
runner_lines.append('if ! command -v sglang &>/dev/null; then')
|
||||
@@ -1620,7 +1783,10 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
runner_lines,
|
||||
keep_shell_open=not local_windows,
|
||||
)
|
||||
runner_lines.append(req.cmd)
|
||||
if "vllm serve" in req.cmd:
|
||||
runner_lines.append('eval "$ODYSSEUS_SERVE_CMD"')
|
||||
else:
|
||||
runner_lines.append(req.cmd)
|
||||
if local_windows:
|
||||
# Detached background process — no interactive shell to keep open.
|
||||
# Print the exit marker the status poller looks for, then stop.
|
||||
@@ -2418,16 +2584,14 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
# Add 30% headroom for KV cache, activations, etc.
|
||||
needed_vram = (est_vram * 1.3) if est_vram else None
|
||||
|
||||
if vram_gb > 0 and needed_vram is not None and needed_vram > vram_gb:
|
||||
continue
|
||||
# Unknown-size models (e.g. MiniMax-M2.7, DeepSeek-V4-Flash) have no
|
||||
# "NB" in the repo id, so the regex above can't extract their
|
||||
# param count. Previously we dropped them entirely, which made
|
||||
# brand-new flagship releases silently vanish from this list even
|
||||
# on rigs with hundreds of GB of VRAM. Adapters/LoRAs are already
|
||||
# filtered by _is_excluded(), so what falls through here is
|
||||
# overwhelmingly full models — keep them, just without a size
|
||||
# badge (the frontend handles needed_vram_gb=null gracefully).
|
||||
if vram_gb > 0:
|
||||
if needed_vram is None:
|
||||
# The "trending models that fit" list must be conservative:
|
||||
# if we cannot estimate size from the repo id/tags, do not
|
||||
# present it as runnable on this hardware.
|
||||
continue
|
||||
if needed_vram > vram_gb:
|
||||
continue
|
||||
|
||||
out.append({
|
||||
"repo_id": repo_id,
|
||||
@@ -2624,6 +2788,32 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
except Exception as e:
|
||||
logger.warning(f"orphan sweep: state write failed: {e}")
|
||||
|
||||
@router.get("/api/cookbook/hf-gguf-files")
|
||||
async def hf_gguf_files(repo_id: str, owner: str = Depends(require_user)):
|
||||
"""List GGUF files in a HuggingFace repo for the direct-download picker."""
|
||||
import httpx
|
||||
|
||||
repo_id = _validate_repo_id(repo_id)
|
||||
url = f"https://huggingface.co/api/models/{repo_id}"
|
||||
try:
|
||||
headers = {}
|
||||
token = _load_stored_hf_token()
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client:
|
||||
resp = await client.get(url, headers=headers)
|
||||
if resp.status_code != 200:
|
||||
return {"ok": False, "files": [], "error": f"HF API HTTP {resp.status_code}"}
|
||||
data = resp.json()
|
||||
except Exception as e:
|
||||
return {"ok": False, "files": [], "error": str(e)}
|
||||
files = [
|
||||
str(s.get("rfilename") or "")
|
||||
for s in data.get("siblings", [])
|
||||
if str(s.get("rfilename") or "").lower().endswith(".gguf")
|
||||
]
|
||||
return {"ok": True, "repo_id": repo_id, "files": files}
|
||||
|
||||
# In-memory cache for the Ollama library scrape. ollama.com is a public
|
||||
# site, but it doesn't expose a stable JSON listing — we fetch the HTML
|
||||
# search page and regex out the model cards. Cached for 1 h so a busy
|
||||
|
||||
+45
-8
@@ -1109,22 +1109,30 @@ def _list_attachments_from_msg(msg):
|
||||
return attachments
|
||||
idx = 0
|
||||
for part in msg.walk():
|
||||
if part.is_multipart():
|
||||
continue
|
||||
cd = str(part.get("Content-Disposition", ""))
|
||||
ct = part.get_content_type()
|
||||
is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
|
||||
if part.is_multipart() and not is_attached_email:
|
||||
continue
|
||||
# Skip text/html body parts (only consider real attachments)
|
||||
if ct in ("text/plain", "text/html") and "attachment" not in cd:
|
||||
continue
|
||||
filename = part.get_filename()
|
||||
if filename:
|
||||
filename = _decode_header(filename)
|
||||
if ct == "message/rfc822" and not re.search(r"\.[A-Za-z0-9]{1,8}$", filename):
|
||||
filename = f"{filename}.eml"
|
||||
else:
|
||||
# Inline images, etc. - generate a name
|
||||
ext = ct.split("/")[-1] if "/" in ct else "bin"
|
||||
ext = "eml" if ct == "message/rfc822" else (ct.split("/")[-1] if "/" in ct else "bin")
|
||||
filename = f"attachment_{idx}.{ext}"
|
||||
payload = part.get_payload(decode=True)
|
||||
size = len(payload) if payload else 0
|
||||
if payload is None and ct == "message/rfc822":
|
||||
try:
|
||||
payload = part.as_bytes()
|
||||
except Exception:
|
||||
payload = b""
|
||||
size = len(payload) if payload is not None else 0
|
||||
attachments.append({
|
||||
"index": idx,
|
||||
"filename": filename,
|
||||
@@ -1136,29 +1144,58 @@ def _list_attachments_from_msg(msg):
|
||||
return attachments
|
||||
|
||||
|
||||
def _is_likely_signature_image_attachment(att: dict) -> bool:
|
||||
"""Match the reader's inline signature/logo image filter."""
|
||||
filename = str((att or {}).get("filename") or "").lower()
|
||||
if not re.search(r"\.(png|jpe?g|gif|bmp|svg|webp)$", filename):
|
||||
return False
|
||||
size = int((att or {}).get("size") or 0)
|
||||
if re.search(r"^image\d{3,}\.(png|jpe?g|gif)$", filename):
|
||||
return True
|
||||
if re.search(r"^(signature|logo|sig|footer|banner)[-_\d]*\.(png|jpe?g|gif|svg)$", filename):
|
||||
return True
|
||||
return 0 < size < 30 * 1024
|
||||
|
||||
|
||||
def _has_visible_attachments(msg) -> bool:
|
||||
"""Return True only for attachments the reader will render as chips."""
|
||||
return any(
|
||||
not _is_likely_signature_image_attachment(att)
|
||||
for att in _list_attachments_from_msg(msg)
|
||||
)
|
||||
|
||||
|
||||
def _extract_attachment_to_disk(msg, index, target_dir):
|
||||
"""Extract a specific attachment to disk and return the file path."""
|
||||
if not msg.is_multipart():
|
||||
return None
|
||||
idx = 0
|
||||
for part in msg.walk():
|
||||
if part.is_multipart():
|
||||
continue
|
||||
cd = str(part.get("Content-Disposition", ""))
|
||||
ct = part.get_content_type()
|
||||
is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
|
||||
if part.is_multipart() and not is_attached_email:
|
||||
continue
|
||||
if ct in ("text/plain", "text/html") and "attachment" not in cd:
|
||||
continue
|
||||
if idx == index:
|
||||
filename = part.get_filename()
|
||||
if filename:
|
||||
filename = _decode_header(filename)
|
||||
if ct == "message/rfc822" and not re.search(r"\.[A-Za-z0-9]{1,8}$", filename):
|
||||
filename = f"{filename}.eml"
|
||||
else:
|
||||
ext = ct.split("/")[-1] if "/" in ct else "bin"
|
||||
ext = "eml" if ct == "message/rfc822" else (ct.split("/")[-1] if "/" in ct else "bin")
|
||||
filename = f"attachment_{idx}.{ext}"
|
||||
# Sanitize
|
||||
safe_name = re.sub(r"[^\w\s\-.]", "_", filename).strip()
|
||||
payload = part.get_payload(decode=True)
|
||||
if not payload:
|
||||
if payload is None and ct == "message/rfc822":
|
||||
try:
|
||||
payload = part.as_bytes()
|
||||
except Exception:
|
||||
payload = b""
|
||||
if payload is None:
|
||||
return None
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
filepath = target_dir / safe_name
|
||||
|
||||
+165
-39
@@ -44,7 +44,7 @@ from routes.email_helpers import (
|
||||
_send_smtp_message, _smtp_security_mode,
|
||||
_IMAP_TIMEOUT_SECONDS, _open_imap_connection,
|
||||
_imap_connect, _imap, _decode_header, _detect_sent_folder, _detect_drafts_folder,
|
||||
_extract_attachment_text, _list_attachments_from_msg,
|
||||
_extract_attachment_text, _list_attachments_from_msg, _has_visible_attachments, _is_likely_signature_image_attachment,
|
||||
_extract_attachment_to_disk, _extract_html, _extract_text,
|
||||
_fetch_sender_thread_context, _pre_retrieve_context,
|
||||
_EMAIL_REPLY_SYS_PROMPT_BASE, _POOL_HOOKS,
|
||||
@@ -58,6 +58,7 @@ from routes.email_pollers import _start_poller
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ODYSSEUS_MAIL_ORIGIN = "odysseus-ui"
|
||||
EMAIL_READ_ATTACHMENT_VERSION = 2
|
||||
|
||||
|
||||
def _email_tag_owner_aliases(account_id: str | None, owner: str = "") -> list[str]:
|
||||
@@ -244,6 +245,21 @@ def _imap_uid_fetch(conn, uid_set: str | bytes, query: str):
|
||||
return conn.uid("FETCH", _uid_bytes(uid_set), query)
|
||||
|
||||
|
||||
def _imap_search_quote(value: str) -> str:
|
||||
return '"' + str(value or "").replace("\\", "\\\\").replace('"', '\\"') + '"'
|
||||
|
||||
|
||||
def _message_id_chain(*values: str) -> list[str]:
|
||||
seen = set()
|
||||
out = []
|
||||
for value in values:
|
||||
for mid in re.findall(r"<[^>]+>", value or ""):
|
||||
if mid not in seen:
|
||||
seen.add(mid)
|
||||
out.append(mid)
|
||||
return out
|
||||
|
||||
|
||||
def _uid_from_fetch_meta(meta_b: bytes) -> str:
|
||||
m = re.search(rb"\bUID\s+(\d+)\b", meta_b)
|
||||
return m.group(1).decode() if m else ""
|
||||
@@ -1003,6 +1019,65 @@ def setup_email_routes():
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _related_thread_attachments_sync(
|
||||
folder: str,
|
||||
account_id: str | None,
|
||||
owner: str,
|
||||
current_uid: str,
|
||||
current_message_id: str,
|
||||
in_reply_to: str,
|
||||
references: str,
|
||||
limit: int = 12,
|
||||
) -> list[dict]:
|
||||
"""Return visible attachments from referenced messages in this folder."""
|
||||
wanted_ids = _message_id_chain(references, in_reply_to)
|
||||
current_mid = (current_message_id or "").strip()
|
||||
wanted_ids = [mid for mid in wanted_ids if mid and mid != current_mid]
|
||||
if not wanted_ids:
|
||||
return []
|
||||
|
||||
related: list[dict] = []
|
||||
try:
|
||||
with _imap(account_id, owner=owner) as conn:
|
||||
conn.select(_q(folder), readonly=True)
|
||||
# Search newest referenced messages first; cap work so opening
|
||||
# a long thread stays bounded.
|
||||
for mid in reversed(wanted_ids[-10:]):
|
||||
if len(related) >= limit:
|
||||
break
|
||||
status, data = _imap_uid_search(conn, f'(HEADER Message-ID {_imap_search_quote(mid)})')
|
||||
if status != "OK" or not data or not data[0]:
|
||||
continue
|
||||
for uid_b in reversed(data[0].split()[-3:]):
|
||||
source_uid = uid_b.decode(errors="ignore")
|
||||
if not source_uid or source_uid == str(current_uid):
|
||||
continue
|
||||
st2, msg_data = _imap_uid_fetch(conn, source_uid, "(BODY.PEEK[])")
|
||||
if st2 != "OK" or not msg_data or not isinstance(msg_data[0], tuple):
|
||||
continue
|
||||
msg = email_mod.message_from_bytes(msg_data[0][1])
|
||||
source_from = _decode_header(msg.get("From", ""))
|
||||
source_subject = _decode_header(msg.get("Subject", ""))
|
||||
source_date = msg.get("Date", "")
|
||||
for att in _list_attachments_from_msg(msg):
|
||||
if _is_likely_signature_image_attachment(att):
|
||||
continue
|
||||
enriched = dict(att)
|
||||
enriched.update({
|
||||
"source_uid": source_uid,
|
||||
"source_folder": folder,
|
||||
"source_message_id": (msg.get("Message-ID") or "").strip(),
|
||||
"source_from": source_from,
|
||||
"source_subject": source_subject,
|
||||
"source_date": source_date,
|
||||
})
|
||||
related.append(enriched)
|
||||
if len(related) >= limit:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.debug(f"related thread attachment lookup failed uid={current_uid}: {e}")
|
||||
return related
|
||||
|
||||
@router.get("/list")
|
||||
async def list_emails(
|
||||
folder: str = Query("INBOX"),
|
||||
@@ -1273,6 +1348,17 @@ def setup_email_routes():
|
||||
sender_name, sender_addr = email.utils.parseaddr(sender)
|
||||
parsed_date = email.utils.parsedate_to_datetime(date_str) if date_str else None
|
||||
attachments = _list_attachments_from_msg(msg)
|
||||
related_attachments = []
|
||||
if not _has_visible_attachments(msg):
|
||||
related_attachments = _related_thread_attachments_sync(
|
||||
folder,
|
||||
account_id,
|
||||
owner,
|
||||
uid,
|
||||
message_id,
|
||||
in_reply_to,
|
||||
references,
|
||||
)
|
||||
|
||||
if mark_seen:
|
||||
# Set \Seen in a separate readwrite session so concurrent reads
|
||||
@@ -1381,6 +1467,8 @@ def setup_email_routes():
|
||||
"body": body,
|
||||
"body_html": body_html,
|
||||
"attachments": attachments,
|
||||
"related_attachments": related_attachments,
|
||||
"attachment_version": EMAIL_READ_ATTACHMENT_VERSION,
|
||||
"cached_summary": cached_summary,
|
||||
"cached_ai_reply": cached_ai_reply,
|
||||
"boundaries": cached_boundaries,
|
||||
@@ -1411,6 +1499,12 @@ def setup_email_routes():
|
||||
"""Read email body. Cached for 30m, sync IMAP work runs in a thread."""
|
||||
ck = _read_cache_key(account_id, folder, uid, owner=owner)
|
||||
cached = _read_cache_get(ck)
|
||||
if cached is not None:
|
||||
# Older cached read responses lack the thread-attachment fallback.
|
||||
# Fetch once so replies that reference prior attachments can show
|
||||
# those files without waiting for cache expiry.
|
||||
if cached.get("attachment_version") != EMAIL_READ_ATTACHMENT_VERSION:
|
||||
cached = None
|
||||
if cached is not None:
|
||||
if mark_seen:
|
||||
try:
|
||||
@@ -1599,6 +1693,65 @@ def setup_email_routes():
|
||||
return None
|
||||
doc_session_id = _resolve_doc_session()
|
||||
|
||||
def _create_markdown_doc(content: str, summary: str):
|
||||
from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
|
||||
doc_id = str(uuid.uuid4())
|
||||
ver_id = str(uuid.uuid4())
|
||||
_db = _SL()
|
||||
try:
|
||||
_db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
|
||||
_db.add(_Doc(
|
||||
id=doc_id, session_id=doc_session_id, title=title,
|
||||
language="markdown", current_content=content,
|
||||
version_count=1, is_active=True,
|
||||
))
|
||||
_db.add(_DV(
|
||||
id=ver_id, document_id=doc_id, version_number=1,
|
||||
content=content, summary=summary, source="upload",
|
||||
))
|
||||
_db.commit()
|
||||
finally:
|
||||
_db.close()
|
||||
_tag_doc_with_source(doc_id)
|
||||
return doc_id
|
||||
|
||||
def _attached_email_markdown(path):
|
||||
raw_bytes = path.read_bytes()
|
||||
if not raw_bytes:
|
||||
return f"# Attached email: {base}\n\n_(empty email attachment)_"
|
||||
try:
|
||||
attached_msg = email_mod.message_from_bytes(raw_bytes)
|
||||
except Exception as e:
|
||||
return f"# Attached email: {base}\n\nCould not parse this email attachment: {e}"
|
||||
|
||||
attached_subject = _decode_header(attached_msg.get("Subject", "")) or base
|
||||
attached_from = _decode_header(attached_msg.get("From", ""))
|
||||
attached_to = _decode_header(attached_msg.get("To", ""))
|
||||
attached_cc = _decode_header(attached_msg.get("Cc", ""))
|
||||
attached_date = attached_msg.get("Date", "")
|
||||
attached_body = _extract_text(attached_msg).strip()
|
||||
attached_atts = _list_attachments_from_msg(attached_msg)
|
||||
|
||||
lines = [f"# Attached email: {attached_subject}", ""]
|
||||
if attached_from:
|
||||
lines.append(f"**From:** {attached_from}")
|
||||
if attached_to:
|
||||
lines.append(f"**To:** {attached_to}")
|
||||
if attached_cc:
|
||||
lines.append(f"**Cc:** {attached_cc}")
|
||||
if attached_date:
|
||||
lines.append(f"**Date:** {attached_date}")
|
||||
lines.extend(["", "## Body", "", attached_body or "_(no readable body)_"])
|
||||
if attached_atts:
|
||||
lines.extend(["", "## Attachments", ""])
|
||||
for att in attached_atts:
|
||||
size = int(att.get("size") or 0)
|
||||
size_label = f"{size} B" if size < 1024 else f"{round(size / 1024)} KB"
|
||||
name = att.get("filename") or f"attachment_{att.get('index', '')}"
|
||||
ctype = att.get("content_type") or "application/octet-stream"
|
||||
lines.append(f"- {name} ({ctype}, {size_label})")
|
||||
return "\n".join(lines).strip()
|
||||
|
||||
# ── PDF path (existing) ────────────────────────────────────
|
||||
if ext == ".pdf":
|
||||
import shutil as _shutil
|
||||
@@ -1645,6 +1798,15 @@ def setup_email_routes():
|
||||
_tag_doc_with_source(doc_id)
|
||||
return {"doc_id": doc_id, "filename": filepath.name}
|
||||
|
||||
# ── Attached email (.eml / message/rfc822) ────────────────
|
||||
if ext == ".eml":
|
||||
try:
|
||||
content = _attached_email_markdown(filepath)
|
||||
except Exception as e:
|
||||
return {"error": f"Failed to read email attachment: {e}", "filename": base}
|
||||
doc_id = _create_markdown_doc(content, "Imported attached email")
|
||||
return {"doc_id": doc_id, "filename": filepath.name}
|
||||
|
||||
# ── DOCX path: extract text → markdown document ───────────
|
||||
if ext == ".docx":
|
||||
try:
|
||||
@@ -1682,25 +1844,7 @@ def setup_email_routes():
|
||||
lines.append("")
|
||||
content = "\n".join(lines).strip() or f"_(empty {base})_"
|
||||
|
||||
from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
|
||||
doc_id = str(uuid.uuid4())
|
||||
ver_id = str(uuid.uuid4())
|
||||
_db = _SL()
|
||||
try:
|
||||
_db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
|
||||
_db.add(_Doc(
|
||||
id=doc_id, session_id=doc_session_id, title=title,
|
||||
language="markdown", current_content=content,
|
||||
version_count=1, is_active=True,
|
||||
))
|
||||
_db.add(_DV(
|
||||
id=ver_id, document_id=doc_id, version_number=1,
|
||||
content=content, summary="Imported from DOCX", source="upload",
|
||||
))
|
||||
_db.commit()
|
||||
finally:
|
||||
_db.close()
|
||||
_tag_doc_with_source(doc_id)
|
||||
doc_id = _create_markdown_doc(content, "Imported from DOCX")
|
||||
return {"doc_id": doc_id, "filename": filepath.name}
|
||||
|
||||
# ── Plain text / markdown ────────────────────────────────
|
||||
@@ -1709,25 +1853,7 @@ def setup_email_routes():
|
||||
content = filepath.read_text(encoding="utf-8", errors="replace")
|
||||
except Exception as e:
|
||||
return {"error": f"Failed to read text file: {e}", "filename": base}
|
||||
from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
|
||||
doc_id = str(uuid.uuid4())
|
||||
ver_id = str(uuid.uuid4())
|
||||
_db = _SL()
|
||||
try:
|
||||
_db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
|
||||
_db.add(_Doc(
|
||||
id=doc_id, session_id=doc_session_id, title=title,
|
||||
language="markdown", current_content=content,
|
||||
version_count=1, is_active=True,
|
||||
))
|
||||
_db.add(_DV(
|
||||
id=ver_id, document_id=doc_id, version_number=1,
|
||||
content=content, summary="Imported from email attachment", source="upload",
|
||||
))
|
||||
_db.commit()
|
||||
finally:
|
||||
_db.close()
|
||||
_tag_doc_with_source(doc_id)
|
||||
doc_id = _create_markdown_doc(content, "Imported from email attachment")
|
||||
return {"doc_id": doc_id, "filename": filepath.name}
|
||||
|
||||
return {"error": f"Unsupported attachment type: {ext}", "filename": base}
|
||||
|
||||
+94
-2
@@ -1,8 +1,13 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shlex
|
||||
import subprocess
|
||||
from copy import deepcopy
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from core.platform_compat import run_ssh_command
|
||||
from routes._validators import validate_remote_host, validate_ssh_port
|
||||
|
||||
|
||||
@@ -107,6 +112,73 @@ def _apply_manual_hardware(system, manual_mode="", manual_gpu_count="", manual_v
|
||||
return system
|
||||
|
||||
|
||||
def _run_model_probe(host: str, ssh_port: str, cmd: str) -> str:
|
||||
try:
|
||||
if host:
|
||||
r = run_ssh_command(
|
||||
host,
|
||||
ssh_port or None,
|
||||
cmd,
|
||||
timeout=15,
|
||||
connect_timeout=5,
|
||||
strict_host_key_checking=False,
|
||||
text=True,
|
||||
)
|
||||
else:
|
||||
r = subprocess.run(["bash", "-lc", cmd], capture_output=True, text=True, timeout=15)
|
||||
if r.returncode == 0:
|
||||
return (r.stdout or "").strip()
|
||||
except Exception:
|
||||
return ""
|
||||
return ""
|
||||
|
||||
|
||||
def _inspect_model_path(model_path: str, host: str = "", ssh_port: str = "") -> dict:
|
||||
"""Read lightweight metadata from a local or SSH-visible HF model folder."""
|
||||
path = (model_path or "").strip()
|
||||
if not path or path.startswith(("http://", "https://")):
|
||||
return {}
|
||||
if not (path.startswith("/") or path.startswith("~")):
|
||||
return {}
|
||||
|
||||
qpath = shlex.quote(path)
|
||||
qconfig = shlex.quote(os.path.join(path, "config.json"))
|
||||
out = {}
|
||||
exists = _run_model_probe(host, ssh_port, f"test -d {qpath} && printf found || printf missing")
|
||||
if exists != "found":
|
||||
target = host or "local container"
|
||||
out["model_probe_error"] = f"Model path is not visible on {target}: {path}"
|
||||
return out
|
||||
raw_config = _run_model_probe(host, ssh_port, f"test -f {qconfig} && sed -n '1,240p' {qconfig}")
|
||||
if raw_config:
|
||||
try:
|
||||
cfg = json.loads(raw_config)
|
||||
except Exception:
|
||||
cfg = {}
|
||||
for key in ("context_length", "max_position_embeddings", "n_ctx_train", "model_max_length", "max_seq_len"):
|
||||
value = cfg.get(key)
|
||||
if isinstance(value, (int, float)) and value > 0:
|
||||
out["model_ctx_max"] = int(value)
|
||||
break
|
||||
else:
|
||||
out["model_probe_error"] = f"config.json not found in model path: {path}"
|
||||
|
||||
size_cmd = (
|
||||
f"find {qpath} -type f \\( -name '*.safetensors' -o -name '*.bin' -o -name '*.gguf' \\) "
|
||||
"-printf '%s\\n' 2>/dev/null | awk '{s+=$1} END {if (s>0) printf \"%.6f\", s/1073741824}'"
|
||||
)
|
||||
weights = _run_model_probe(host, ssh_port, size_cmd)
|
||||
try:
|
||||
weights_gb = float(weights)
|
||||
except Exception:
|
||||
weights_gb = 0.0
|
||||
if weights_gb > 0:
|
||||
out["model_weights_gb"] = round(weights_gb, 3)
|
||||
elif "model_probe_error" not in out:
|
||||
out["model_probe_error"] = f"No model weight files found in: {path}"
|
||||
return out
|
||||
|
||||
|
||||
def setup_hwfit_routes():
|
||||
router = APIRouter(prefix="/api/hwfit", tags=["hwfit"])
|
||||
|
||||
@@ -235,7 +307,7 @@ def setup_hwfit_routes():
|
||||
return {"system": system, "models": results}
|
||||
|
||||
@router.get("/profiles")
|
||||
def get_serve_profiles(model: str = "", host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, serve_weights_gb: float = 0.0, serve_quant: str = ""):
|
||||
def get_serve_profiles(model: str = "", model_path: str = "", host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, serve_weights_gb: float = 0.0, serve_quant: str = ""):
|
||||
"""Compute llama.cpp serve profiles (Quality/Balanced/Speed) for `model`
|
||||
against the detected hardware on `host` (or local). Returns concrete
|
||||
flags (n_gpu_layers, n_cpu_moe, cache_type, ctx) the serve UI can apply.
|
||||
@@ -272,8 +344,16 @@ def setup_hwfit_routes():
|
||||
if nn and (nn == want or want.endswith(nn) or nn.endswith(want)):
|
||||
m = entry
|
||||
break
|
||||
path_meta = _inspect_model_path(model_path or model, host=host, ssh_port=ssh_port)
|
||||
if m is None:
|
||||
return {"system": system, "profiles": [], "error": "model not in catalog"}
|
||||
return {
|
||||
"system": system,
|
||||
"profiles": [],
|
||||
"error": "model not in catalog",
|
||||
"model_ctx_max": int(path_meta.get("model_ctx_max") or 0),
|
||||
"model_weights_gb": float(path_meta.get("model_weights_gb") or 0),
|
||||
"model_probe_error": path_meta.get("model_probe_error") or "",
|
||||
}
|
||||
# Surface the model's trained context limit so the serve UI can clamp a
|
||||
# user-typed context down to it (asking for ctx > n_ctx_train overflows
|
||||
# and, with a quantized KV cache, can crash the GPU).
|
||||
@@ -283,6 +363,16 @@ def setup_hwfit_routes():
|
||||
if isinstance(v, (int, float)) and v > 0:
|
||||
model_ctx_max = int(v)
|
||||
break
|
||||
path_ctx_max = int(path_meta.get("model_ctx_max") or 0)
|
||||
if path_ctx_max > 0:
|
||||
model_ctx_max = max(model_ctx_max, path_ctx_max)
|
||||
model_weights_gb = float(path_meta.get("model_weights_gb") or 0)
|
||||
if model_weights_gb <= 0:
|
||||
for k in ("min_vram_gb", "required_gb", "size_gb", "recommended_ram_gb", "min_ram_gb"):
|
||||
v = m.get(k)
|
||||
if isinstance(v, (int, float)) and v > 0:
|
||||
model_weights_gb = float(v)
|
||||
break
|
||||
return {
|
||||
"system": system,
|
||||
"profiles": compute_serve_profiles(
|
||||
@@ -291,6 +381,8 @@ def setup_hwfit_routes():
|
||||
serve_quant=(serve_quant or None),
|
||||
),
|
||||
"model_ctx_max": model_ctx_max,
|
||||
"model_weights_gb": model_weights_gb,
|
||||
"model_probe_error": path_meta.get("model_probe_error") or "",
|
||||
}
|
||||
|
||||
@router.get("/image-models")
|
||||
|
||||
@@ -1064,9 +1064,11 @@ def setup_model_routes(model_discovery):
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
def _failure_delay(fails: int) -> float:
|
||||
def _failure_delay(fails: int, *, empty_local: bool = False) -> float:
|
||||
if fails <= 0:
|
||||
return 0.0
|
||||
if empty_local:
|
||||
return min(5.0 * (2 ** max(0, fails - 1)), 30.0)
|
||||
return min(_REFRESH_FAILURE_BASE * (2 ** max(0, fails - 1)), _REFRESH_FAILURE_MAX)
|
||||
|
||||
def _should_refresh_endpoint(ep: Any, now: float, force: bool = False) -> tuple[bool, Dict[str, Any]]:
|
||||
@@ -1097,7 +1099,12 @@ def setup_model_routes(model_discovery):
|
||||
fails = int(state.get("fail_count") or 0)
|
||||
if fails and not force:
|
||||
last_failure = float(state.get("last_failure") or 0.0)
|
||||
if now - last_failure < _failure_delay(fails):
|
||||
empty_local = (
|
||||
not cached
|
||||
and category == "local"
|
||||
and str(getattr(ep, "id", "") or "").startswith("local-")
|
||||
)
|
||||
if now - last_failure < _failure_delay(fails, empty_local=empty_local):
|
||||
return False, info
|
||||
if cached and not force:
|
||||
interval = _endpoint_refresh_interval(ep, category)
|
||||
|
||||
+53
-5
@@ -330,6 +330,9 @@ def add_user_install_bins_to_path():
|
||||
candidates.append(os.path.join(site.USER_BASE, 'bin'))
|
||||
except Exception:
|
||||
pass
|
||||
candidates.append(os.path.expanduser('~/bin'))
|
||||
candidates.append(os.path.expanduser('~/llama.cpp/build/bin'))
|
||||
candidates.append(os.path.expanduser('~/llama.cpp/build-vulkan/bin'))
|
||||
candidates.append(os.path.expanduser('~/.local/bin'))
|
||||
parts = os.environ.get('PATH', '').split(os.pathsep) if os.environ.get('PATH') else []
|
||||
changed = False
|
||||
@@ -1188,6 +1191,7 @@ def setup_shell_routes() -> APIRouter:
|
||||
# venv over SSH so a remote `pip install` actually reflects here.
|
||||
remote_status: dict = {}
|
||||
remote_details: dict = {}
|
||||
remote_probe_error = ""
|
||||
remote_names = [
|
||||
p["name"]
|
||||
for p in packages
|
||||
@@ -1226,8 +1230,34 @@ def setup_shell_routes() -> APIRouter:
|
||||
break
|
||||
except ValueError as e:
|
||||
raise HTTPException(400, str(e))
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
remote_status = {}
|
||||
remote_probe_error = f"SSH package probe failed: {str(e)[:160]}"
|
||||
if "llama_cpp" in remote_names:
|
||||
try:
|
||||
inner = (
|
||||
'export PATH="$HOME/.local/bin:$HOME/bin:'
|
||||
'$HOME/llama.cpp/build/bin:$HOME/llama.cpp/build-vulkan/bin:$PATH"; '
|
||||
"command -v llama-server 2>/dev/null || true"
|
||||
)
|
||||
argv = _ssh_base_argv(host, ssh_port) + [inner]
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*argv,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
out, _err = await asyncio.wait_for(proc.communicate(), timeout=8)
|
||||
llama_server_path = out.decode("utf-8", errors="replace").strip().splitlines()
|
||||
llama_server_path = llama_server_path[-1].strip() if llama_server_path else ""
|
||||
if llama_server_path:
|
||||
remote_status["llama_cpp"] = True
|
||||
probe = remote_details.setdefault("llama_cpp", {})
|
||||
if isinstance(probe, dict):
|
||||
probe.setdefault("binaries", {})["llama-server"] = llama_server_path
|
||||
except Exception as e:
|
||||
if not remote_probe_error:
|
||||
remote_probe_error = f"SSH llama-server probe failed: {str(e)[:160]}"
|
||||
pass
|
||||
# Union of system_names + every package's system_prereqs. Probing
|
||||
# the prereqs alongside the main system deps in a single SSH call
|
||||
# avoids a second round-trip per Cookbook → Dependencies refresh.
|
||||
@@ -1272,7 +1302,9 @@ def setup_shell_routes() -> APIRouter:
|
||||
target_os_id = _os_id_from_release("\n".join(_osrel_lines))
|
||||
except ValueError as e:
|
||||
raise HTTPException(400, str(e))
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
if not remote_probe_error:
|
||||
remote_probe_error = f"SSH system probe failed: {str(e)[:160]}"
|
||||
pass
|
||||
elif not host:
|
||||
# Local target — probe in-process so the inline install command
|
||||
@@ -1290,7 +1322,12 @@ def setup_shell_routes() -> APIRouter:
|
||||
on_remote = bool(host and pkg.get("target") == "remote")
|
||||
probe = None
|
||||
if on_remote:
|
||||
pkg["installed"] = bool(remote_status.get(pkg["name"], False))
|
||||
if remote_probe_error and pkg["name"] not in remote_status:
|
||||
pkg["installed"] = None
|
||||
pkg["probe_error"] = remote_probe_error
|
||||
pkg["status_note"] = remote_probe_error
|
||||
else:
|
||||
pkg["installed"] = bool(remote_status.get(pkg["name"], False))
|
||||
probe = remote_details.get(pkg["name"])
|
||||
if isinstance(probe, dict):
|
||||
pkg["details"] = probe
|
||||
@@ -1353,9 +1390,19 @@ def setup_shell_routes() -> APIRouter:
|
||||
# reads "ready" green while inference runs at 3 tok/s on GPU
|
||||
# silicon — actively misleading.
|
||||
if pkg["name"] == "llama_cpp" and pkg.get("installed"):
|
||||
_native_llama_server = bool(
|
||||
isinstance(probe, dict)
|
||||
and isinstance(probe.get("binaries"), dict)
|
||||
and probe["binaries"].get("llama-server")
|
||||
)
|
||||
_gpu_capable = False
|
||||
_has_nvidia_target = False
|
||||
if on_remote and host:
|
||||
if _native_llama_server:
|
||||
# Native llama-server is the launcher path Cookbook now
|
||||
# prefers. Do not mark this as a CPU-only Python wheel just
|
||||
# because llama-cpp-python is absent from the selected venv.
|
||||
_gpu_capable = True
|
||||
elif on_remote and host:
|
||||
try:
|
||||
# Activate the configured venv FIRST so the probe
|
||||
# runs against the same python the launch script
|
||||
@@ -1609,7 +1656,8 @@ def setup_shell_routes() -> APIRouter:
|
||||
return {"ok": False, "error": f"Unsupported engine: {engine}"}
|
||||
host = str(body.get("remote_host") or "").strip()
|
||||
ssh_port = body.get("ssh_port")
|
||||
cmd = _llama_cpp_rebuild_cmd()
|
||||
update_source = bool(body.get("update_source"))
|
||||
cmd = _llama_cpp_rebuild_cmd(update_source=update_source)
|
||||
try:
|
||||
argv = (
|
||||
(_ssh_base_argv(host, ssh_port) + [cmd])
|
||||
|
||||
Reference in New Issue
Block a user