Merge remote-tracking branch 'upstream/dev' into feat/llm-self-eval

This commit is contained in:
GeekLuffy
2026-06-24 13:07:10 +05:30
332 changed files with 30741 additions and 5444 deletions
+543 -46
View File
@@ -267,6 +267,10 @@ _DOMAIN_RULES = {
- Use `resolve_contact` to look up a contact's email or phone number by name. Searches the CardDAV address book and sent email history.
- Use `manage_contact` to list, add, update, or delete contacts in the address book.
- Do NOT use `manage_memory` for contact lookups — contact details live in the address book, not memory.""",
"integrations": """\
## Integration/API rules
- To query or control a configured service integration (Home Assistant, Miniflux, Gitea, Linkding, Jellyfin, or any other registered service), use `api_call` with the integration name, HTTP method, path, and optional JSON body.
- Do not use shell, curl, or `app_api` to reach a user's connected integration when `api_call` is available.""",
}
_DOMAIN_TOOL_MAP = {
@@ -277,9 +281,10 @@ _DOMAIN_TOOL_MAP = {
"notes_calendar_tasks": {"manage_notes", "manage_calendar", "manage_tasks"},
"ui": {"ui_control"},
"sessions": {"create_session", "list_sessions", "manage_session", "send_to_session", "search_chats"},
"files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls", "get_workspace"},
"files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls", "get_workspace", "manage_bg_jobs"},
"settings": {"manage_settings", "manage_endpoints", "manage_mcp", "manage_webhooks", "manage_tokens", "app_api"},
"contacts": {"resolve_contact", "manage_contact"},
"integrations": {"api_call"},
}
def _domain_rules_for_tools(tool_names: set) -> list[str]:
@@ -408,7 +413,7 @@ Generate an image. Line 1 = description, line 2 = model name, line 3 = WxH (e.g.
"ask_teacher": "- ```ask_teacher``` — Escalate a hard question to a more capable model. Line 1 = model name or 'auto', rest = the question. Use when stuck or need expert knowledge.",
"list_models": "- ```list_models``` — Show all available AI models across all endpoints. Use when user asks what models are available.",
"manage_session": "- ```manage_session``` — Rename, archive, delete, fork, switch, or `list` chats (the UI calls them 'chats'; 'session' is internal). Line 1 = action (list/switch/rename/archive/unarchive/delete/important/unimportant/truncate/fork), Line 2 = exact chat id from `list_sessions` (or `current` where supported). For delete/archive/truncate, always list first and reuse the exact id; never invent placeholder ids. `switch`/`open` returns a clickable anchor link the user can tap to open the chat — use for \"open my X chat\".",
"manage_memory": "- ```manage_memory``` — Manage the user's persistent memory (facts, identity, preferences, context that persists across chats). Line 1 = action (list/add/edit/delete/search), rest = content. Use when user says 'remember this', states identity facts like 'my name is <name>' / 'call me <name>' / 'I live in <place>', or asks about stored memories.",
"manage_memory": "- ```manage_memory``` — Manage the user's persistent memory (facts about the USER themselves, their preferences, context that persists across chats). Line 1 = action (list/add/edit/delete/search), rest = content. Use when user says 'remember this' about themselves, states identity facts like 'my name is <name>' / 'call me <name>' / 'I live in <place>', or asks about stored memories. DO NOT use for info about another person (their address, phone, email, birthday) — that goes in `manage_contact`. If the user pastes an address/phone with a name and says 'save this for <person>', use `manage_contact add` with the address arg, NOT manage_memory.",
"manage_skills": "- ```manage_skills``` — Skill registry (SKILL.md format). Args (JSON): {\"action\": \"list|view|view_ref|search|add|edit|patch|publish|delete\", ...}. `list` returns the index of available skills (published + teacher-escalation drafts); `view name=foo` fetches the full SKILL.md; `view_ref name=foo path=...` loads a reference file under the skill directory. For `add`, provide an explicit kebab-case `name` and only report the exact returned name, because storage may normalize or dedupe it. Use this BEFORE doing domain work — there may already be a procedure (published or draft) that prescribes the correct steps. Drafts written by the teacher loop are authoritative guidance even though they're not yet published.",
"manage_tasks": "- ```manage_tasks``` — Create and manage scheduled background tasks (recurring AI jobs). Args (JSON): {\"action\": \"list|create|edit|delete|pause|resume|run\", ...}",
"manage_endpoints": "- ```manage_endpoints``` — Add, remove, or configure AI model API endpoints. Args (JSON): {\"action\": \"list|add|delete|enable|disable\", ...}. Use when user wants to add a new AI provider.",
@@ -428,7 +433,9 @@ Notes, checklists, AND user reminders. Use this for "create/add/write a note", t
```send_email
{"to": "recipient@example.com", "subject": "Re: Your question", "body": "Hi, ...", "account": "gmail"}
```
Send a new email via SMTP. Use `resolve_contact` first if you only have a name. If multiple email accounts exist, call `list_email_accounts` first and pass the chosen `account`.""",
Send a new email via SMTP. Use `resolve_contact` first if you only have a name. If multiple email accounts exist, call `list_email_accounts` first and pass the chosen `account`.
CRITICAL — signatures: DO NOT invent a sign-off name. End the body with just `Thanks,` or similar — never type a person's name unless the user explicitly told you what to sign as. When `agent_email_confirm` is on (default), the tool returns `{pending: true, pending_id: ...}` and stages the email for the user to approve in the chat UI instead of SMTPing immediately.""",
"list_emails": """\
```list_emails
{"folder": "INBOX", "max_results": 20, "unread_only": false, "account": "gmail"}
@@ -439,7 +446,9 @@ List recent emails from a folder, newest first, including read messages by defau
```reply_to_email
{"uid": "1234", "body": "Sounds good — talk Friday.", "account": "gmail"}
```
SEND a reply email immediately by UID. Do not use this for "open a reply" or "start a reply" — those should use `ui_control` with `open_email_reply <uid> <folder> reply` to open the email draft document. For follow-up requests like "reply ..." after reading/listing email where the user clearly wants to send now, use the exact UID and account from the latest `read_email`/`list_emails` result. Never invent UID `1`. Threads automatically (In-Reply-To/References handled).""",
SEND a reply email immediately by UID. Do not use this for "open a reply" or "start a reply" — those should use `ui_control` with `open_email_reply <uid> <folder> reply` to open the email draft document. For follow-up requests like "reply ..." after reading/listing email where the user clearly wants to send now, use the exact UID and account from the latest `read_email`/`list_emails` result. Never invent UID `1`. Threads automatically (In-Reply-To/References handled).
CRITICAL — signatures: DO NOT invent a sign-off name. End the body with just `Thanks,` or similar — never type a person's name unless the user explicitly told you what to sign as. When `agent_email_confirm` is on (default), the tool returns `{pending: true, pending_id: ...}` and stages the email for the user to approve in the chat UI instead of SMTPing immediately.""",
"bulk_email": """\
```bulk_email
{"action": "delete", "uids": ["10997", "10998"], "folder": "INBOX", "account": "Gmail"}
@@ -449,7 +458,7 @@ Bulk delete/archive/mark emails. Use this for "delete all those" after listing e
"archive_email": "- ```archive_email``` — Archive one email by UID. Args (JSON): {\"uid\":\"...\", \"folder\":\"INBOX\", \"account\":\"Gmail\"}. For multiple messages use bulk_email.",
"mark_email_read": "- ```mark_email_read``` — Mark one email read/unread. Args (JSON): {\"uid\":\"...\", \"read\":true, \"folder\":\"INBOX\", \"account\":\"Gmail\"}. For multiple messages use bulk_email.",
"resolve_contact": "- ```resolve_contact``` — Look up a contact's email by name. Searches CardDAV address book + sent email history. Args (JSON): {\"name\": \"...\"}. Use BEFORE send_email when the user gives only a name.",
"manage_contact": "- ```manage_contact``` — Create/update/delete/list CardDAV contacts. Args (JSON): {\"action\": \"list|add|update|delete\", \"name\": \"...\", \"email\": \"...\", \"uid\": \"...\"}. Use only for explicit address-book/contact requests with contact details. Do NOT use for user identity facts like 'my name is <name>'; save those with manage_memory. For update/delete, call action=list first to get the uid.",
"manage_contact": "- ```manage_contact``` — Create/update/delete/list CardDAV contacts. Args (JSON): {\"action\": \"list|add|update|delete\", \"name\": \"...\", \"email\": \"...\", \"phones\": [...], \"address\": \"...\", \"uid\": \"...\"}. Use for info about another person: email, phone, postal address. For 'save this for <person>' / address paste / phone next to a name, use this — NOT manage_memory. Do NOT use for user identity facts ('my name is X'); those are manage_memory. For update/delete, call action=list first for the uid.",
"manage_calendar": """\
```manage_calendar
{"action": "create_event", "summary": "<event title>", "dtstart": "<natural language or ISO datetime>"}
@@ -520,7 +529,7 @@ def get_builtin_overrides() -> dict:
ov = get_setting("builtin_tool_overrides", {})
return ov if isinstance(ov, dict) else {}
except Exception as e:
logger.warning('Failed to load builtin tool overrides: %s', e)
logger.warning("Failed to load builtin tool overrides, using defaults", exc_info=e)
return {}
@@ -532,17 +541,44 @@ def _section_text(name: str, default: str) -> str:
return val if isinstance(val, str) and val.strip() else default
def _compact_tool_line(name: str, section: str) -> str:
"""One-line fenced-tool usage hint for compact/local prompts."""
text = (section or "").strip()
if not text:
return f"- `{name}`"
if text.startswith("- "):
return text
lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
usage = []
in_fence = False
for ln in lines:
if ln.startswith("```"):
usage.append(ln)
in_fence = not in_fence
if len(usage) >= 3:
break
continue
if in_fence and len(usage) < 3:
usage.append(ln)
if usage:
return f"- `{name}` — " + " ".join(usage)
return f"- `{name}` — " + lines[0][:160]
def _assemble_prompt(tool_names: set, disabled_tools: set = None, compact: bool = False) -> str:
"""Build the system prompt with only the specified tools included."""
disabled = disabled_tools or set()
included = tool_names - disabled
if compact:
tool_list = ", ".join(sorted(included)) if included else "none"
tool_lines = []
for name, _default_section in TOOL_SECTIONS.items():
if name in included:
tool_lines.append(_compact_tool_line(name, _section_text(name, _default_section)))
parts = [
"You are an AI assistant with tool access.",
f"Available tools: {tool_list}.",
_API_AGENT_RULES,
_AGENT_PREAMBLE,
"## Available tools\n" + ("\n".join(tool_lines) if tool_lines else "none"),
_AGENT_RULES,
]
parts.extend(_domain_rules_for_tools(included))
return "\n\n".join(parts)
@@ -608,11 +644,6 @@ _API_HOSTS = frozenset([
"api.perplexity.ai", "api.x.ai",
"ollama.com", "api.venice.ai", "api.kimi.com",
"api.githubcopilot.com",
# Local OpenAI-compatible endpoints (llama.cpp, vLLM, LM Studio, etc.).
# Without these, `_is_api_model` falls back to keyword sniffing on the
# model name, so well-behaved local servers don't get native tool
# schemas and the agent silently degrades to fenced-block parsing.
"localhost", "127.0.0.1", "host.docker.internal",
])
_MCP_KEYWORDS = frozenset(["mcp", "browse", "browser", "website", "calendar", "event", "email",
"gmail", "screenshot", "navigate", "click", "miniflux", "rss", "feed"])
@@ -640,6 +671,28 @@ def _is_ollama_openai_compat_url(endpoint_url: str) -> bool:
return parsed.port == 11434 and (path == "/v1" or path.startswith("/v1/"))
def _is_local_openai_compat_url(endpoint_url: str) -> bool:
try:
parsed = urlparse(endpoint_url or "")
except Exception:
return False
host = (parsed.hostname or "").lower()
path = (parsed.path or "").rstrip("/")
if not (path == "/v1" or path.startswith("/v1/")):
return False
if host in {"localhost", "127.0.0.1", "0.0.0.0", "host.docker.internal"}:
return True
if host.startswith("192.168.") or host.startswith("10."):
return True
if host.startswith("172."):
try:
second = int(host.split(".")[1])
return 16 <= second <= 31
except Exception:
return False
return False
def _endpoint_lookup_keys(endpoint_url: str) -> List[str]:
"""Candidate ModelEndpoint.base_url keys for a runtime chat URL."""
raw = (endpoint_url or "").strip()
@@ -703,6 +756,17 @@ def _extract_last_user_message(messages: List[Dict]) -> str:
_LOW_SIGNAL_RE = re.compile(r"^[\W_]*$", re.UNICODE)
_CASUAL_OPENING_RE = re.compile(
r"^\s*(?:h+i+|hey+|hello+|yo+|sup+|what'?s up|wass?up|hiya|howdy|"
r"lol|lmao|haha+|hehe+|thanks?|thank you|ty|idk|dunno|meh|bruh|bro)\b(?P<tail>.*)$",
re.IGNORECASE,
)
_CASUAL_BLOCKLIST_RE = re.compile(
r"\b(?:cookbook|serve|serving|launch|start|vllm|sglang|llama\.?cpp|ollama|"
r"download|model|email|document|doc|note|calendar|task|search|web|research|"
r"file|folder|repo|git|settings?|endpoint|api|token|mcp)\b",
re.IGNORECASE,
)
_EXPLICIT_CONTINUATION_RE = re.compile(
r"^\s*(?:"
r"yes|y|yeah|yep|ok|okay|sure|do it|go ahead|continue|carry on|"
@@ -712,6 +776,17 @@ _EXPLICIT_CONTINUATION_RE = re.compile(
r")\s*[.!?]*\s*$",
re.IGNORECASE,
)
_RETRY_CONTINUATION_RE = re.compile(
r"\b(?:try again|retry|again|rerun|re-run|run it again|launch it again|"
r"start it again|failed|fails?|died|crashed|broke|insta|instantly)\b",
re.IGNORECASE,
)
_COOKBOOK_CONTEXT_RE = re.compile(
r"\b(?:cookbook|serve|serving|served|launch|start|preset|vllm|sglang|"
r"llama\.?cpp|ollama|download|cached models?|model servers?|running models?|"
r"gpu box|ajax|qwen|gemma|llama|mistral|minimax)\b",
re.IGNORECASE,
)
def _is_explicit_continuation(text: str) -> bool:
@@ -719,6 +794,37 @@ def _is_explicit_continuation(text: str) -> bool:
return bool(_EXPLICIT_CONTINUATION_RE.match(str(text or "").strip()))
def _is_casual_low_signal(text: str) -> bool:
"""True for short greetings/slang that should not inherit stale context."""
s = str(text or "").strip()
m = _CASUAL_OPENING_RE.match(s)
if not m:
return False
tail = m.group("tail") or ""
if _CASUAL_BLOCKLIST_RE.search(tail):
return False
# Allow a short vocative/address after the opener without hardcoding the
# address term itself: "hey man", "yo dude", "sup <name>". Longer tails are
# more likely to be an actual request and should get normal context/tooling.
tail_words = re.findall(r"[A-Za-z0-9_'-]+", tail)
return len(tail_words) <= 2
def _is_contextual_retry_continuation(messages: List[Dict], text: str) -> bool:
"""Treat "try again / it failed" as a continuation only for active tool work.
These follow-ups are common after Cookbook launches: the latest user turn
says only "try again it failed", while the actionable model/host/command
details live one or two turns back. Keep this intentionally narrow so
ordinary chat does not inherit stale Cookbook context.
"""
latest = str(text or "").strip()
if not latest or not _RETRY_CONTINUATION_RE.search(latest):
return False
recent = _recent_context_for_retrieval(messages, max_user=5, max_chars=1200)
return bool(_COOKBOOK_CONTEXT_RE.search(recent))
def _assistant_requested_followup(messages: List[Dict]) -> bool:
"""True when the previous assistant turn asked for missing task details.
@@ -760,11 +866,12 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
which domain rule packs get appended to the system prompt.
"""
text = str(last_user or "").strip()
continuation = _is_explicit_continuation(text) or _assistant_requested_followup(messages)
retry_continuation = _is_contextual_retry_continuation(messages, text)
continuation = _is_explicit_continuation(text) or _assistant_requested_followup(messages) or retry_continuation
retrieval_query = _recent_context_for_retrieval(messages) if continuation else text
q = retrieval_query.lower()
if not text or bool(_LOW_SIGNAL_RE.match(text)):
if not text or bool(_LOW_SIGNAL_RE.match(text)) or _is_casual_low_signal(text):
return {
"low_signal": True,
"continuation": False,
@@ -807,10 +914,25 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
domains.add("sessions")
if has(r"\b(file|folder|directory|repo|git|grep|find in files|read file|edit file|shell|terminal|bash|python)\b"):
domains.add("files")
# Managing detached bash jobs: "kill the background job", "stop the job",
# "kill that job", "check the job output", "is the bg job done".
if (has(r"\b(background|bg)\s+(jobs?|task)\b")
or has(r"\b(kill|stop|cancel|terminate|check|tail|show|list)\b.{0,16}\bjobs?\b")
or has(r"\bjobs?\b.{0,16}\b(output|status|done|finished|running)\b")):
domains.add("files")
if has(r"\b(endpoint|api token|mcp|webhook|preference|configure|config|setting)\b"):
domains.add("settings")
if has(r"\b(contact|contacts|phone|phone number|address book|vcard)\b"):
domains.add("contacts")
# API-integration intent — calling a configured service via the api_call
# tool. Without this the #3794 repro ("Use the api_call tool to call Home
# Assistant GET /api/states") matched no domain, classified as low-signal,
# and the tool never reached the schema filter. Detect it explicitly so the
# "integrations" domain seeds api_call deterministically (see
# _DOMAIN_TOOL_MAP), independent of embedding retrieval.
if has(r"\bapi[ _]call\b", r"\bintegrations?\b",
r"\b(?:home ?assistant|miniflux|gitea|linkding|jellyfin)\b"):
domains.add("integrations")
low_signal = not continuation and not domains
return {
@@ -839,8 +961,11 @@ def _recent_context_for_retrieval(messages: List[Dict], max_user: int = 3, max_c
if isinstance(content, list):
content = " ".join(b.get("text", "") for b in content if isinstance(b, dict))
content = (content or "").strip()
# Skip injected tool-result envelopes — role=user but not human intent.
if not content or content.startswith("[Tool execution results]"):
# Skip injected envelopes — role=user but not human intent. Tool results
# are now wrapped via untrusted_context_message (metadata.trusted=False);
# keep the legacy "[Tool execution results]" prefix for older histories.
meta = msg.get("metadata") or {}
if not content or meta.get("trusted") is False or content.startswith("[Tool execution results]"):
continue
collected.append(content)
if len(collected) >= max_user:
@@ -859,6 +984,8 @@ def _build_system_prompt(
compact: bool = False,
owner: Optional[str] = None,
suppress_local_context: bool = False,
suppress_skills: bool = False,
active_email: Optional[Dict[str, str]] = None,
) -> List[Dict]:
"""Build agent system prompt, inject MCP/document context, merge consecutive system msgs."""
global _cached_base_prompt, _cached_base_prompt_key
@@ -875,7 +1002,7 @@ def _build_system_prompt(
_ov_sig = _hl.sha256(_json.dumps(get_builtin_overrides() or {}, sort_keys=True).encode()).hexdigest()
except Exception:
_ov_sig = ""
cache_key = (frozenset(disabled_tools or []), bool(mcp_mgr), needs_admin, _rt_key, compact, _ov_sig, owner, suppress_local_context)
cache_key = (frozenset(disabled_tools or []), bool(mcp_mgr), needs_admin, _rt_key, compact, _ov_sig, owner, suppress_local_context, suppress_skills)
if _cached_base_prompt and _cached_base_prompt_key == cache_key and not active_document:
agent_prompt = _cached_base_prompt
# Skill index is user-editable (name + description), so it must never
@@ -885,6 +1012,7 @@ def _build_system_prompt(
disabled_tools, mcp_mgr, needs_admin, relevant_tools,
mcp_disabled_map=mcp_disabled_map, compact=compact, owner=owner,
suppress_local_context=suppress_local_context,
suppress_skills=suppress_skills,
)
else:
agent_prompt, _skill_index_block = _build_base_prompt(
@@ -896,6 +1024,7 @@ def _build_system_prompt(
compact=compact,
owner=owner,
suppress_local_context=suppress_local_context,
suppress_skills=suppress_skills,
)
if not active_document:
_cached_base_prompt = agent_prompt
@@ -924,8 +1053,8 @@ def _build_system_prompt(
try:
from src.user_time import current_datetime_context_message
_datetime_message = current_datetime_context_message()
except Exception:
pass
except Exception as e:
logger.warning("Failed to build datetime context message", exc_info=e)
# Document context is kept as a SEPARATE message (not merged into the tool
# prompt) so the context trimmer doesn't destroy it when truncating the
@@ -968,8 +1097,8 @@ def _build_system_prompt(
try:
from src.pdf_form_doc import find_source_upload_id
_is_form_backed = bool(find_source_upload_id(active_document.current_content or ""))
except Exception:
pass
except Exception as e:
logger.warning("Failed to detect if document is form-backed, assuming plain", exc_info=e)
if _is_form_backed:
doc_ctx = (
@@ -1051,6 +1180,66 @@ def _build_system_prompt(
else:
set_active_document(None)
# Active email reader — frontend told us the user has an email open.
# Inject a context block so "reply", "summarize this", "what does it say"
# resolve to the real UID instead of the agent inventing a fresh .md
# draft with fake headers. This is the email equivalent of _doc_message.
_email_message = None
if active_email and active_email.get("uid"):
_em_uid = active_email.get("uid", "")
_em_folder = active_email.get("folder", "INBOX")
_em_account = active_email.get("account", "")
_em_subject = active_email.get("subject", "") or "(no subject)"
_em_from = active_email.get("from", "") or "(unknown sender)"
_em_preview = (active_email.get("body_preview", "") or "").strip()
_preview_block = f"\nBody preview:\n```\n{_em_preview[:1800]}\n```" if _em_preview else ""
_acct_arg = f" {_em_account}" if _em_account else ""
email_ctx = (
f"ACTIVE EMAIL OPEN (the user has this email open in a reader window right now)\n"
f"UID: {_em_uid}\n"
f"Folder: {_em_folder}\n"
f"Account: {_em_account or '(default)'}\n"
f"From: {_em_from}\n"
f"Subject: {_em_subject}{_preview_block}\n\n"
f"CRITICAL DEFAULT — every request about email this turn refers to "
f"THIS email unless the user names a DIFFERENT specific recipient "
f"(a name, an email address, or another thread). Examples that "
f"ALL mean reply-to-the-open-email:\n"
f"'reply' / 'reply to this' / 'respond'\n"
f"'write email saying X' / 'send email saying X' / 'draft something'\n"
f"'tell them X' / 'say hi' / 'thanks' / 'ack' / 'lmk'\n"
f"'summarize it' / 'what does it say' / 'tldr'\n"
f"'forward this' / 'forward to <addr>'\n"
f"DO NOT ASK THE USER 'who do you want to send this to?' — the "
f"answer is ALWAYS the sender of the open email (above) unless they "
f"named someone else. Asking that is the wrong move every time.\n\n"
f"RULES for the open email:\n"
f"1. DRAFT a reply (default for any 'write/send/reply/tell them' "
f"request without a different recipient): call `ui_control` with "
f"`action=\"open_email_reply\"` and `extra=\"{_em_uid} {_em_folder} "
f"reply\"`. This opens the proper reply doc with To/Subject/"
f"In-Reply-To pre-filled by the backend. The user will see and edit "
f"it before sending. DO NOT `create_document` a markdown file with "
f"hand-written `To:` / `Subject:` / `In-Reply-To:` headers — that "
f"is wrong every time.\n"
f"2. SEND a reply immediately (skip the draft): call "
f"`reply_to_email` with the UID above. Only do this when the user "
f"explicitly says 'send' / 'send the reply' / 'reply and send'.\n"
f"3. READ the full body (the preview above may be truncated): "
f"call `read_email` with the UID/folder/account above.\n"
f"4. SUMMARIZE / answer questions about it: read it first, then "
f"answer in chat. Don't create a document for a summary unless "
f"the user explicitly asks for one.\n"
f"5. Never ask the user to paste the email or 'share it with you' "
f"— you already have its identity above and can read the full body.\n"
f"6. The ONLY time you ask 'who to send to?' is when the user "
f"explicitly says 'send a NEW email to someone else' or names a "
f"recipient you can't identify. A bare 'send email saying X' = the "
f"open email's sender.\n"
)
_email_message = untrusted_context_message("active email reader", email_ctx)
_email_message["_protected"] = True
# Inject writing style for any email writing path. This is deliberately
# broader than read/list: models may compose via send_email, reply_to_email,
# or ui_control open_email_reply after the first tool round.
@@ -1119,7 +1308,7 @@ def _build_system_prompt(
# few. If the teacher wrote a procedure for "open my X chat" last
# time the student failed, this is where the student finds it
# before deciding which tool to call.
if not suppress_local_context:
if not suppress_local_context and not suppress_skills:
try:
last_user = _extract_last_user_message(messages)
# Respect the user's skills-enabled toggle (mirrors memory_enabled).
@@ -1258,6 +1447,9 @@ def _build_system_prompt(
if _doc_message:
merged.insert(last_user_idx, _doc_message)
last_user_idx += 1 # the document message is now at last_user_idx
if _email_message:
merged.insert(last_user_idx, _email_message)
last_user_idx += 1
if _skills_message:
merged.insert(last_user_idx, _skills_message)
last_user_idx += 1
@@ -1283,6 +1475,7 @@ def _build_base_prompt(
compact: bool = False,
owner: Optional[str] = None,
suppress_local_context: bool = False,
suppress_skills: bool = False,
):
"""Build the agent prompt with only relevant tools included.
@@ -1292,12 +1485,18 @@ def _build_base_prompt(
from src.tool_index import ALWAYS_AVAILABLE
disabled = set(disabled_tools or [])
if not get_setting("image_gen_enabled", True):
if not get_setting("image_gen_enabled", False):
disabled.add("generate_image")
if relevant_tools is not None:
# RAG mode: include always-available + retrieved + admin (if needed)
tool_names = set(ALWAYS_AVAILABLE) | set(relevant_tools)
# RAG mode: trust the relevant_tools set as already-composed.
# get_tools_for_query starts from ALWAYS_AVAILABLE and may
# *discard* tools that conflict with the query's intent (e.g.
# drop manage_memory for clear contact-save patterns). Unioning
# ALWAYS_AVAILABLE back in here used to silently undo those
# drops. Only force-include the irreducible loop primitives
# (ask_user, update_plan) as belt-and-suspenders.
tool_names = set(relevant_tools) | {"ask_user", "update_plan"}
if needs_admin:
tool_names |= _ADMIN_TOOLS
agent_prompt = _assemble_prompt(tool_names, disabled, compact=compact)
@@ -1329,7 +1528,7 @@ def _build_base_prompt(
# The caller wraps it in untrusted_context_message and ships it as a
# user-role message — same treatment as the matched-skills block.
skill_index_block = ""
if not suppress_local_context:
if not suppress_local_context and not suppress_skills:
try:
from services.memory.skills import SkillsManager
from src.constants import DATA_DIR
@@ -1488,8 +1687,14 @@ def _append_tool_results(
if round_reasoning:
msg["reasoning_content"] = round_reasoning
messages.append(msg)
# Tool output (shell/python stdout, file reads, fetched pages, email
# bodies, MCP results) is sourced from outside the server. Wrap it as
# untrusted data so prompt-injection inside a tool result is treated as
# data, not instructions — same hardening as skills (#788) and the
# web/RAG context. THREAT_MODEL.md lists tool output as a surface that
# must go through untrusted_context_message.
messages.append(
{"role": "user", "content": f"[Tool execution results]\n\n{tool_output_text}"}
untrusted_context_message("tool execution results", tool_output_text)
)
@@ -1738,6 +1943,7 @@ async def stream_agent_loop(
max_tool_calls: int = 0,
context_length: int = 0,
active_document=None,
active_email: Optional[Dict[str, str]] = None,
session_id: Optional[str] = None,
disabled_tools: Optional[Set[str]] = None,
owner: Optional[str] = None,
@@ -1747,6 +1953,7 @@ async def stream_agent_loop(
approved_plan: Optional[str] = None,
tool_policy: Optional[ToolPolicy] = None,
workspace: Optional[str] = None,
forced_tools: Optional[Set[str]] = None,
_is_teacher_run: bool = False,
) -> AsyncGenerator[str, None]:
"""Streaming agent loop generator.
@@ -1786,6 +1993,20 @@ async def stream_agent_loop(
_needs_admin = _detect_admin_intent(messages)
_last_user = _extract_last_user_message(messages)
_intent = _classify_agent_request(messages, _last_user)
_low_signal_turn = bool(_intent.get("low_signal"))
_casual_low_signal_turn = _is_casual_low_signal(_last_user)
_direct_low_signal = (
_low_signal_turn
and not bool(_intent.get("continuation"))
and not plan_mode
and not approved_plan
and not guide_only
and (_casual_low_signal_turn or active_document is None)
and (_casual_low_signal_turn or not active_email)
and (_casual_low_signal_turn or not workspace)
and not forced_tools
and not relevant_tools
)
# Tool retrieval uses the latest message by default. It may inherit recent
# user turns only for explicit continuations ("yes", "do it", "1").
_retrieval_query = str(_intent.get("retrieval_query") or _last_user)
@@ -1793,11 +2014,86 @@ async def stream_agent_loop(
"[agent-intent] latest=%r continuation=%s low_signal=%s domains=%s retrieval_query=%r",
_last_user[:120],
bool(_intent.get("continuation")),
bool(_intent.get("low_signal")),
_low_signal_turn,
sorted(_intent.get("domains") or []),
_retrieval_query[:200],
)
_mcp_disabled_map = _load_mcp_disabled_map() if mcp_mgr else {}
if _direct_low_signal:
logger.info("[agent] direct low-signal reply path for latest=%r", _last_user[:80])
direct_messages = [{"role": "user", "content": _last_user}]
direct_response = ""
direct_start = time.time()
direct_actual_model = model
real_input_tokens = 0
real_output_tokens = 0
try:
async for chunk in stream_llm_with_fallback(
[(endpoint_url, model, headers)] + list(fallbacks or []),
direct_messages,
temperature=temperature,
max_tokens=min(max_tokens or 128, 128),
prompt_type=None,
tools=None,
timeout=int(get_setting("agent_stream_timeout_seconds", 300) or 300),
session_id=session_id,
):
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
try:
data = json.loads(chunk[6:])
except json.JSONDecodeError:
yield chunk
continue
if data.get("type") == "usage":
usage = data.get("data", {}) or {}
direct_actual_model = usage.get("model") or direct_actual_model
real_input_tokens += usage.get("input_tokens", 0) or 0
real_output_tokens += usage.get("output_tokens", 0) or 0
continue
if data.get("type") == "model_actual":
direct_actual_model = data.get("model") or direct_actual_model
data["requested_model"] = model
yield f"data: {json.dumps(data)}\n\n"
continue
if data.get("type") == "fallback":
direct_actual_model = data.get("answered_by") or direct_actual_model
yield chunk
continue
if "delta" in data:
if not data.get("thinking"):
direct_response += data.get("delta", "")
yield chunk
continue
yield chunk
elif chunk.startswith("event: "):
yield chunk
except Exception as _direct_err:
logger.warning("[agent] direct low-signal path failed: %s", _direct_err)
fallback = "Hey."
direct_response += fallback
yield f"data: {json.dumps({'delta': fallback})}\n\n"
if not direct_response.strip():
fallback = "Hey."
direct_response = fallback
yield f"data: {json.dumps({'delta': fallback})}\n\n"
duration = time.time() - direct_start
metrics = {
"model": direct_actual_model,
"requested_model": model,
"input_tokens": real_input_tokens or estimate_tokens(direct_messages),
"output_tokens": real_output_tokens or max(len(direct_response) // 4, 1),
"total_time": round(duration, 2),
"response_time": round(duration, 2),
"agent_rounds": 0,
"tool_calls": 0,
"direct_low_signal": True,
}
yield f"data: {json.dumps({'type': 'metrics', 'data': metrics})}\n\n"
yield "data: [DONE]\n\n"
return
if plan_mode and mcp_mgr:
# Allow read-only MCP tools to investigate, block write/unknown ones:
# hide them from the schemas AND reject them at runtime by qualified name.
@@ -1809,11 +2105,11 @@ async def stream_agent_loop(
# RAG-based tool selection: retrieve relevant tools for this query.
# If caller provided a pre-computed set (e.g. task_scheduler), use that.
_relevant_tools = set() if guide_only else relevant_tools
_relevant_tools = relevant_tools
_t1 = time.time()
if _relevant_tools:
logger.info(f"[tool-rag] Using caller-provided relevant_tools ({len(_relevant_tools)} tools)")
if not guide_only and not _relevant_tools and bool(_intent.get("low_signal")):
if not guide_only and not _relevant_tools and _low_signal_turn:
from src.tool_index import ALWAYS_AVAILABLE
if workspace:
# An active workspace IS the file-work signal: a vague "look at the
@@ -1904,6 +2200,53 @@ async def stream_agent_loop(
if _relevant_tools is not None and active_document is not None:
_relevant_tools.update({"edit_document", "update_document", "suggest_document"})
# Per-request UI toggles are stronger than retrieval. If the user turns on
# Search, the model must see the search tools even when the latest text is a
# typo or otherwise low-signal for tool RAG.
if not guide_only and forced_tools:
if _relevant_tools is None:
from src.tool_index import ALWAYS_AVAILABLE
_relevant_tools = set(ALWAYS_AVAILABLE)
_relevant_tools.update(t for t in forced_tools if t not in disabled_tools)
# The skill index injected by _build_system_prompt tells the model to
# call `manage_skills action=view`, and Jaccard-matched skills are pasted
# into the prompt as procedures to follow — but neither path goes through
# tool selection, so the model can be handed a procedure naming tools
# (grep, read_file, ...) that aren't in its schema list. Keep the schemas
# in lockstep: manage_skills is callable whenever any skill is indexed,
# and a matched skill's declared requires_toolsets ride along with it.
if not guide_only and _relevant_tools is not None and not _low_signal_turn:
try:
from services.memory.skills import SkillsManager
from src.constants import DATA_DIR
_skills_on = True
try:
from routes.prefs_routes import _load_for_user as _load_prefs
_skills_on = (_load_prefs(owner) or {}).get("skills_enabled", True)
except Exception:
pass
_sm = SkillsManager(DATA_DIR)
_owner_skills = _sm.load(owner=owner) if _skills_on else []
if _owner_skills:
_relevant_tools.add("manage_skills")
if _retrieval_query:
# Validate against every known executable tool, not just
# TOOL_SECTIONS — code-nav tools (grep/glob/ls) ship as
# schemas without a prompt-prose section.
from src.tool_policy import known_tool_names
_known = known_tool_names()
for _sk in _sm.get_relevant_skills(
_retrieval_query, skills=_owner_skills,
threshold=0.25, max_items=3,
):
_relevant_tools.update(
t for t in (_sk.get("requires_toolsets") or [])
if t in _known
)
except Exception as _e:
logger.debug(f"[tool-rag] skill-aware tool include skipped: {_e}")
if _relevant_tools is not None:
logger.info("[agent-intent] selected_tools=%s", sorted(_relevant_tools)[:50])
@@ -1938,7 +2281,7 @@ async def stream_agent_loop(
_model_supports_tools = any(kw in _model_lc for kw in (
"gpt-4", "gpt-5", "gpt-o", "claude", "gemini", "gemma",
"qwen3", "qwen2.5", "mixtral", "mistral", "llama-3.1", "llama-3.2",
"llama-3.3", "llama-4",
"llama-3.3", "llama-4", "llama3.1", "llama3.2", "llama3.3", "llama4",
# Local-served models that follow OpenAI-style function calling
# via vLLM's `--enable-auto-tool-choice`. Belt-and-suspenders
# with the per-endpoint flag above.
@@ -1980,13 +2323,16 @@ async def stream_agent_loop(
_is_api_model = False
else:
_is_api_model = any(h in endpoint_url for h in _API_HOSTS) or _model_supports_tools
_compact_agent_prompt = _is_api_model or _is_ollama_native or _ollama_openai_compat
messages, mcp_schemas = _build_system_prompt(
messages, model, active_document, mcp_mgr, disabled_tools,
needs_admin=_needs_admin, relevant_tools=_relevant_tools,
mcp_disabled_map=_mcp_disabled_map,
compact=_is_api_model,
compact=_compact_agent_prompt,
owner=owner,
suppress_local_context=guide_only,
suppress_skills=_low_signal_turn,
active_email=active_email,
)
if plan_mode and not guide_only:
# Steer the model to investigate-then-propose. Hard tool gating handles
@@ -2071,6 +2417,14 @@ async def stream_agent_loop(
# Strip internal metadata keys before sending to the LLM API
messages = [{k: v for k, v in msg.items() if k != "_protected"} for msg in messages]
agent_prompt_tokens = estimate_tokens(messages)
logger.info(
"[agent-timing] prep_done model=%s prompt_tokens=%s context_length=%s prep=%s",
model,
agent_prompt_tokens,
context_length,
{k: round(v, 3) for k, v in prep_timings.items()},
)
yield f"data: {json.dumps({'type': 'agent_prep', 'data': {k: round(v, 3) for k, v in prep_timings.items()}})}\n\n"
full_response = ""
@@ -2167,9 +2521,17 @@ async def stream_agent_loop(
elif _is_api_model:
# Filter schemas by RAG-selected tools (if available)
if _relevant_tools:
# _build_base_prompt unions _ADMIN_TOOLS into the prompt
# sections when admin intent fires — the schema list must
# offer the same names, or the model reads prose describing
# tools it cannot call and substitutes the nearest schema
# it does have (e.g. manage_memory for manage_skills).
_schema_names = set(_relevant_tools)
if _needs_admin:
_schema_names |= _ADMIN_TOOLS
base_schemas = [
s for s in FUNCTION_TOOL_SCHEMAS
if s.get("function", {}).get("name") in _relevant_tools
if s.get("function", {}).get("name") in _schema_names
]
_mcp_filtered = [
s for s in mcp_schemas
@@ -2207,6 +2569,19 @@ async def stream_agent_loop(
# complementary cap for the rare stream that trickles bytes forever and
# so never trips the inactivity timeout. Generous — only catches runaway.
_round_deadline = time.time() + max(agent_stream_timeout * 4, 1200)
_round_start = time.time()
_round_first_event_logged = False
_round_first_token_logged = False
logger.info(
"[agent-timing] round_start round=%s model=%s endpoint=%s prompt_tokens=%s tools=%s native_tools=%s timeout=%s",
round_num,
model,
endpoint_url,
estimate_tokens(messages),
len(_tool_names_sent),
bool(all_tool_schemas),
agent_stream_timeout,
)
async for chunk in stream_llm_with_fallback(
_candidates,
messages,
@@ -2217,11 +2592,30 @@ async def stream_agent_loop(
timeout=agent_stream_timeout,
session_id=session_id,
):
if not _round_first_event_logged:
_round_first_event_logged = True
logger.info(
"[agent-timing] first_event round=%s elapsed=%.3fs kind=%s",
round_num,
time.time() - _round_start,
"error" if chunk.startswith("event: error") else "data",
)
if time.time() > _round_deadline:
logger.warning(f"[agent] round {round_num} stream exceeded wall-clock deadline; cutting off")
logger.warning(
"[agent-timing] round_deadline round=%s elapsed=%.3fs deadline_s=%s",
round_num,
time.time() - _round_start,
max(agent_stream_timeout * 4, 1200),
)
break
# Forward error events from stream_llm to the frontend
if chunk.startswith("event: error"):
logger.warning(
"[agent-timing] stream_error round=%s elapsed=%.3fs chunk=%r",
round_num,
time.time() - _round_start,
chunk[:500],
)
yield chunk
continue
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
@@ -2301,6 +2695,15 @@ async def stream_agent_loop(
if not first_token_received:
time_to_first_token = time.time() - total_start
first_token_received = True
if not _round_first_token_logged:
_round_first_token_logged = True
logger.info(
"[agent-timing] first_visible_token round=%s elapsed=%.3fs total_elapsed=%.3fs thinking=%s",
round_num,
time.time() - _round_start,
time.time() - total_start,
bool(data.get("thinking")),
)
# Keep reasoning deltas in a separate accumulator so
# we can echo them back via `reasoning_content` on the
# next request (DeepSeek requires this; harmless for
@@ -2370,7 +2773,21 @@ async def stream_agent_loop(
yield chunk
# Intercept [DONE] — don't forward until all rounds finish
tool_blocks, used_native = _resolve_tool_blocks(round_response, native_tool_calls, round_num, is_api_model=_is_api_model)
logger.info(
"[agent-timing] round_stream_done round=%s elapsed=%.3fs text_chars=%s tool_calls=%s first_event=%s first_token=%s",
round_num,
time.time() - _round_start,
len(round_response),
len(native_tool_calls),
_round_first_event_logged,
_round_first_token_logged,
)
tool_blocks, used_native = _resolve_tool_blocks(
round_response,
native_tool_calls,
round_num,
is_api_model=(_is_api_model and not guide_only),
)
# Force-answer round: we told the model to STOP calling tools and
# answer. If it ignored that and emitted a (possibly DSML) tool
@@ -2454,7 +2871,7 @@ async def stream_agent_loop(
# model with no real native_tool_calls) must not be stripped from the
# persisted text either — otherwise it streams once and then disappears
# on reload (#3222 follow-up).
cleaned_round = strip_tool_blocks(round_response, skip_fenced=(_is_api_model and not used_native)).strip()
cleaned_round = strip_tool_blocks(round_response, skip_fenced=(_is_api_model and not used_native and not guide_only)).strip()
round_texts.append(cleaned_round)
if not tool_blocks:
@@ -2526,6 +2943,15 @@ async def stream_agent_loop(
_intent_nudge_count += 1
_matched_phrase = _intent_match.group(0).strip()
logger.info(f"[agent] intent-without-action nudge #{_intent_nudge_count} on round {round_num}: {_matched_phrase!r}")
_lower_phrase = _matched_phrase.lower()
_cookbook_log_hint = ""
if any(_word in _lower_phrase for _word in ("log", "logs", "output", "tail", "status")):
_cookbook_log_hint = (
" If this is about a Cookbook/model serve, the concrete calls are: "
"`list_served_models` first, then `tail_serve_output` with the "
"session_id from the serve/list result. Never answer with "
"\"check logs\" when those tools are available."
)
messages.append({
"role": "system",
"content": (
@@ -2534,6 +2960,7 @@ async def stream_agent_loop(
"see you announced the action but didn't run it, which "
"is the most frustrating thing you can do. "
"DO IT NOW: emit the actual function call this turn. "
f"{_cookbook_log_hint}"
"If you decided not to do it after all, say so plainly in "
"one sentence instead of restating the plan."
),
@@ -2705,6 +3132,46 @@ async def stream_agent_loop(
)
desc, result = await _tool_task
# A skill the model just loaded can prescribe tools that weren't
# RAG-selected this turn (declared via requires_toolsets in its
# frontmatter). Union them into the selection so the NEXT round's
# schema list includes them — otherwise the model reads "use
# grep" from the skill it fetched but has no grep schema to call.
if (
block.tool_type == "manage_skills"
and _relevant_tools is not None
and not result.get("error")
):
_ms_args = {}
_ms_raw = (block.content or "").strip()
if _ms_raw.startswith("{"):
try:
_ms_args = json.loads(_ms_raw)
except json.JSONDecodeError:
_ms_args = {}
_ms_name = str(_ms_args.get("name", "") or "").strip()
if _ms_name and _ms_args.get("action") in ("view", "view_ref"):
try:
from services.memory.skills import SkillsManager as _SkM
from src.constants import DATA_DIR as _DD
from src.tool_policy import known_tool_names as _ktn
_known = _ktn()
for _sk in _SkM(_DD).load(owner=owner):
if _sk.get("name") == _ms_name:
_new = {
t for t in (_sk.get("requires_toolsets") or [])
if t in _known and t not in _relevant_tools
}
if _new:
_relevant_tools.update(_new)
logger.info(
"[tool-rag] skill '%s' unlocked tools for next round: %s",
_ms_name, sorted(_new),
)
break
except Exception as _e:
logger.debug(f"skill requires_toolsets unlock skipped: {_e}")
# Extract structured web sources from web_search tool output.
# web_search returns {"output": ..., "exit_code": 0}; check "output"
# first so the <!-- SOURCES:…--> marker is found and stripped even
@@ -2748,9 +3215,12 @@ async def stream_agent_loop(
f'data: {json.dumps({"type": "ui_control", "data": result})}\n\n'
)
# ask_user: the agent posed a multiple-choice question. Emit it so the
# frontend renders clickable options, then end the turn (below) and
# wait — the user's pick becomes the next message.
# ask_user: remember the payload now, but emit the interactive event
# only *after* tool_output below. Emitting it before tool_output let
# the subsequent tool-card rewrite/scroll push the choices out of
# view. The payload is also copied into the persisted tool event so
# history reload can reconstruct an unanswered card.
_pending_ask_user_event = None
if "ask_user" in result:
# The question lives in the tool args. ChatMessage.to_dict()
# replays only role+content to the model next turn — tool_event
@@ -2765,9 +3235,7 @@ async def stream_agent_loop(
_auq_delta = ("\n\n" if full_response.strip() else "") + _auq_q
full_response += _auq_delta
yield 'data: ' + json.dumps({"delta": _auq_delta}) + '\n\n'
yield (
f'data: {json.dumps({"type": "ask_user", "data": result["ask_user"]})}\n\n'
)
_pending_ask_user_event = _auq
_awaiting_user = True
# update_plan: agent wrote back to the plan (ticked a step / revised).
@@ -2822,9 +3290,25 @@ async def stream_agent_loop(
# Emit tool_output (include ui_event data if present)
tool_output_data = {"type": "tool_output", "tool": block.tool_type, "command": cmd_display, "output": output_text, "exit_code": result.get("exit_code")}
if _pending_ask_user_event:
# Keep enough state in the streamed tool result for alternate
# clients to render the prompt without depending on event order.
tool_output_data["ask_user"] = _pending_ask_user_event
if "ui_event" in result:
tool_output_data["ui_event"] = result["ui_event"]
for k in ("toggle_name", "state", "mode", "model", "endpoint_url", "theme_name", "colors"):
for k in (
"toggle_name", "state", "mode", "model", "endpoint_url",
"theme_name", "colors",
# ui_control open_email_reply payload — without these the
# frontend openReplyDraft bails on undefined uid and the
# reply window silently never opens.
"uid", "folder", "account_id",
# Optional pre-filled body for open_email_reply so the
# agent can compose-and-open in one tool call.
"body",
# ui_control open_panel payload
"panel",
):
if k in result:
tool_output_data[k] = result[k]
# Forward image data from generate_image tool
@@ -2840,6 +3324,14 @@ async def stream_agent_loop(
tool_output_data["diff"] = result["diff"]
yield f'data: {json.dumps(tool_output_data)}\n\n'
# This must be the final UI event for ask_user: the frontend appends
# the card below the now-settled tool node and cancels any between-
# round spinner. The turn ends after the current tool batch.
if _pending_ask_user_event:
yield (
f'data: {json.dumps({"type": "ask_user", "data": _pending_ask_user_event})}\n\n'
)
# Native document tools open in the editor + carry the REAL doc id.
# Emit a doc_update so the frontend opens/activates it and sends it
# back as active_doc_id next turn (otherwise the agent can't "see"
@@ -2897,6 +3389,11 @@ async def stream_agent_loop(
# this the diff shows live but vanishes from saved history.
if result.get("diff"):
tool_event["diff"] = result["diff"]
if _pending_ask_user_event:
# Persist the structured question with the tool event. On a
# reload, chatRenderer can restore the card; a later user
# message removes it as answered.
tool_event["ask_user"] = _pending_ask_user_event
tool_events.append(tool_event)
if block.tool_type in _VERIFIER_EFFECTFUL_TOOLS:
_effectful_used = True
+13 -1
View File
@@ -174,8 +174,20 @@ async def subscribe(session_id: str) -> AsyncGenerator[str, None]:
next_seq += 1
if run.status != "running":
return
heartbeat_idx = 0
while True:
seq, ev = await q.get()
try:
seq, ev = await asyncio.wait_for(q.get(), timeout=10.0)
except asyncio.TimeoutError:
# Keep slow local models/proxies alive while they prefill before
# the first token. SSE comments are ignored by the UI but reset
# browser/proxy idle timers, which prevents "empty response"
# disconnects on llama.cpp first-token latencies of 30s+.
if run.status == "running":
heartbeat_idx += 1
yield f": heartbeat {heartbeat_idx}\n\n"
continue
seq, ev = (None, None)
if seq is None: # end sentinel
while next_seq < len(run.buffer): # flush any tail the sentinel raced
yield run.buffer[next_seq]
+19 -6
View File
@@ -22,6 +22,14 @@ from .subprocess_tools import BashTool, PythonTool
from .web_tools import WebSearchTool, WebFetchTool
from .filesystem_tools import ReadFileTool, WriteFileTool, EditFileTool, LsTool, GlobTool, GrepTool, GetWorkspaceTool
from .document_tools import CreateDocumentTool, UpdateDocumentTool, EditDocumentTool, SuggestDocumentTool, ManageDocumentTool
from .model_interaction_tools import ChatWithModelTool, AskTeacherTool, ListModelsTool
from .bg_job_tools import ManageBgJobsTool
from .session_tools import CreateSessionTool, ListSessionsTool, SendToSessionTool, ManageSessionTool
from .admin_tools import (
ADMIN_TOOL_HANDLERS,
do_manage_endpoints, do_manage_mcp, do_manage_webhooks,
do_manage_tokens, do_manage_settings,
)
TOOL_HANDLERS = {
"bash": BashTool().execute,
@@ -40,7 +48,17 @@ TOOL_HANDLERS = {
"suggest_document": SuggestDocumentTool().execute,
"manage_documents": ManageDocumentTool().execute,
"get_workspace": GetWorkspaceTool().execute,
"chat_with_model": ChatWithModelTool().execute,
"ask_teacher": AskTeacherTool().execute,
"list_models": ListModelsTool().execute,
"manage_bg_jobs": ManageBgJobsTool().execute,
"create_session": CreateSessionTool().execute,
"list_sessions": ListSessionsTool().execute,
"send_to_session": SendToSessionTool().execute,
"manage_session": ManageSessionTool().execute,
}
# Config/integration admin tools (manage_endpoints/mcp/webhooks/tokens/settings).
TOOL_HANDLERS.update(ADMIN_TOOL_HANDLERS)
# ---------------------------------------------------------------------------
# Constants (re-exported for backward compatibility — single source of truth
@@ -52,7 +70,7 @@ PYTHON_TIMEOUT = 30
# Tool types that trigger execution
TOOL_TAGS = {"bash", "python", "web_search", "web_fetch", "read_file", "write_file", "edit_file",
"grep", "glob", "ls", "get_workspace",
"grep", "glob", "ls", "get_workspace", "manage_bg_jobs",
"create_document", "update_document", "edit_document",
"search_chats",
"chat_with_model", "create_session", "list_sessions",
@@ -127,10 +145,5 @@ from src.tool_implementations import ( # noqa: E402, F401
do_search_chats,
do_manage_skills,
do_manage_tasks,
do_manage_endpoints,
do_manage_mcp,
do_manage_webhooks,
do_manage_tokens,
do_manage_settings,
do_api_call,
)
+784
View File
@@ -0,0 +1,784 @@
"""Config/integration admin agent tools (TOOL_HANDLERS).
Moved verbatim from tool_implementations.py as part of the tool-registry
migration (#3629, the `admin_tools.py` bullet): manage_endpoints / manage_mcp /
manage_webhooks / manage_tokens / manage_settings, plus manage_mcp's
command-allowlist guard. Each impl keeps its `do_*(content, owner)` shape;
ADMIN_TOOL_HANDLERS wraps them into registry `execute(content, ctx)` adapters
via one factory.
"""
import json
import os
import re
import logging
from typing import Optional, Dict
from src.tool_utils import get_mcp_manager, _parse_tool_args
logger = logging.getLogger(__name__)
async def do_manage_endpoints(content: str, owner: Optional[str] = None) -> Dict:
"""Manage model endpoints: list, add, delete, enable, disable."""
from core.database import SessionLocal, ModelEndpoint
try:
args = _parse_tool_args(content)
except ValueError:
return {"error": "Invalid JSON arguments", "exit_code": 1}
action = args.get("action", "list")
db = SessionLocal()
try:
if action == "list":
eps = db.query(ModelEndpoint).all()
items = [{"id": e.id, "name": e.name, "base_url": e.base_url,
"is_enabled": e.is_enabled} for e in eps]
return {"response": f"{len(items)} endpoints", "endpoints": items, "exit_code": 0}
elif action == "add":
import uuid as _uuid
name = args.get("name", "")
base_url = args.get("base_url", "")
api_key = args.get("api_key", "")
if not base_url:
return {"error": "base_url is required", "exit_code": 1}
eid = str(_uuid.uuid4())[:8]
from datetime import datetime
ep = ModelEndpoint(id=eid, name=name or base_url, base_url=base_url,
api_key=api_key, is_enabled=True,
created_at=datetime.utcnow(), updated_at=datetime.utcnow())
db.add(ep)
db.commit()
return {"response": f"Added endpoint '{name or base_url}' (id: {eid})", "exit_code": 0}
elif action == "delete":
eid = args.get("endpoint_id", "")
ep = db.query(ModelEndpoint).filter(ModelEndpoint.id == eid).first()
if not ep:
return {"error": f"Endpoint {eid} not found", "exit_code": 1}
name = ep.name
db.delete(ep)
db.commit()
return {"response": f"Deleted endpoint '{name}'", "exit_code": 0}
elif action in ("enable", "disable"):
eid = args.get("endpoint_id", "")
ep = db.query(ModelEndpoint).filter(ModelEndpoint.id == eid).first()
if not ep:
return {"error": f"Endpoint {eid} not found", "exit_code": 1}
ep.is_enabled = (action == "enable")
db.commit()
return {"response": f"Endpoint '{ep.name}' {action}d", "exit_code": 0}
else:
return {"error": f"Unknown action: {action}", "exit_code": 1}
except Exception as e:
logger.error(f"manage_endpoints error: {e}")
return {"error": str(e), "exit_code": 1}
finally:
db.close()
# ---------------------------------------------------------------------------
# MCP server management tool
# ---------------------------------------------------------------------------
# Parallel to routes/cookbook_helpers._validate_serve_cmd but deliberately the
# opposite policy: that gate guards an admin-only serve command and allows
# interpreters (python3/etc) because model-serving needs them, whereas this is
# the model/prompt-injection-reachable manage_mcp path, so interpreters and
# runners are denied here.
#
# Commands that can execute arbitrary code regardless of their arguments. These
# are NEVER accepted on the manage_mcp agent path, even if an operator lists one
# in ODYSSEUS_MCP_ALLOWED_COMMANDS -- a stdio server that genuinely needs an
# interpreter or package runner must be registered via the trusted admin route.
_MCP_DENIED_COMMANDS = frozenset({
"sh", "bash", "zsh", "fish", "dash", "ksh", "csh", "tcsh", "ash", "busybox",
"cmd", "command.com", "powershell", "pwsh",
"python", "pypy", "node", "nodejs", "deno", "bun", "ruby", "jruby",
"perl", "raku", "php", "lua", "luajit", "tclsh", "wish", "expect", "rscript",
"groovy", "scala", "elixir", "erl", "iex", "java", "javac", "jshell", "jbang",
"kotlin", "kotlinc", "dotnet", "mono", "swift", "osascript", "tsx", "ts-node",
"npx", "bunx", "uvx", "pipx", "npm", "pnpm", "yarn", "pip", "uv",
"gem", "cargo", "go", "bundle", "poetry", "conda", "mamba", "brew",
"apt", "apt-get", "yum", "dnf", "pacman", "apk",
"env", "xargs", "nohup", "setsid", "nice", "ionice", "time", "timeout",
"watch", "stdbuf", "unbuffer", "script", "ssh", "scp", "sshpass", "sudo",
"doas", "su", "make", "cmake", "docker", "podman", "kubectl", "find",
"awk", "gawk", "sed", "vi", "vim", "nvim", "emacs", "ed", "tee", "eval",
})
# Argv flags that make even an allowlisted binary execute inline code. Matched
# by prefix so glued forms (-cimport os, --eval=...) are caught, not just the
# exact-token form.
_MCP_CODE_EXEC_SHORT_FLAGS = ("-c", "-e", "-m")
_MCP_CODE_EXEC_LONG_FLAGS = ("--eval", "--exec", "--print", "--module", "--command", "--require")
_MCP_URL_SCHEMES = ("http://", "https://", "ftp://", "ftps://", "file://", "data:", "jar:", "blob:")
# Shell metacharacters refused in command/args. Args are passed as an argv list
# (no shell), but refusing these keeps the surface narrow and obvious.
_MCP_SHELL_METACHARS = set(";|&$`><\n\r")
# Env vars that let a child process load attacker-supplied code before main().
_MCP_DANGEROUS_ENV = frozenset({
"LD_PRELOAD", "LD_LIBRARY_PATH", "LD_AUDIT", "DYLD_INSERT_LIBRARIES",
"DYLD_LIBRARY_PATH", "DYLD_FRAMEWORK_PATH", "PYTHONPATH", "PYTHONSTARTUP",
"PYTHONHOME", "PYTHONEXECUTABLE", "NODE_OPTIONS", "NODE_PATH", "BASH_ENV",
"ENV", "SHELLOPTS", "PERL5LIB", "PERL5OPT", "RUBYOPT", "RUBYLIB", "GEM_PATH",
"R_PROFILE", "R_HOME", "PATH", "IFS", "PROMPT_COMMAND",
})
def _mcp_allowed_commands() -> set:
"""Operator-configured allowlist of safe MCP launcher basenames for the agent
path. Empty by default; set ODYSSEUS_MCP_ALLOWED_COMMANDS (comma-separated)
to opt specific trusted binaries in. Denied commands are rejected even if
listed here."""
raw = os.environ.get("ODYSSEUS_MCP_ALLOWED_COMMANDS", "")
return {c.strip().lower() for c in raw.split(",") if c.strip()}
def _validate_mcp_command(command, args, env) -> Optional[str]:
"""Validate a model-supplied stdio MCP registration. Returns an error string
if it must be rejected, else None.
Closes the RCE where manage_mcp 'add' passed prompt-injection-controlled
command/args/env straight to a subprocess spawn (issue #438): a payload
smuggled into a skill description, memory entry, fetched page, or email body
could register a stdio server running arbitrary code as the app UID.
"""
if not isinstance(command, str) or not command.strip():
return "command must be a non-empty string"
command = command.strip()
if "/" in command or "\\" in command:
return "command must be a bare executable name, not a path"
if any(ch in _MCP_SHELL_METACHARS for ch in command):
return "command contains shell metacharacters"
base = command.lower()
if base.endswith(".exe") or base.endswith(".cmd") or base.endswith(".bat"):
base = base.rsplit(".", 1)[0]
# Canonicalize a trailing version suffix so versioned aliases collapse to the
# family name (python3.11 -> python, node18 -> node, pip3 -> pip); both the
# raw basename and the canonical form are denied, so an operator cannot
# accidentally allowlist a runtime alias back into the path.
canon = re.sub(r"[-_.]?\d+(?:\.\d+)*$", "", base)
if base in _MCP_DENIED_COMMANDS or canon in _MCP_DENIED_COMMANDS:
return (
f"command '{command}' is not allowed on the agent MCP path: "
"interpreters, runtimes, package runners, and shells can execute "
"arbitrary code. Register such a server via the admin route instead."
)
if base not in _mcp_allowed_commands():
return (
f"command '{command}' is not in the MCP allowlist. Add it to "
"ODYSSEUS_MCP_ALLOWED_COMMANDS if you trust it, or register the "
"server via the admin route."
)
if args is not None:
if isinstance(args, str):
try:
args = json.loads(args)
except Exception:
return "args must be a JSON list"
if not isinstance(args, list):
return "args must be a list"
for a in args:
if not isinstance(a, str):
return "args must all be strings"
s = a.strip()
low = s.lower()
if any(s == f or s.startswith(f) for f in _MCP_CODE_EXEC_SHORT_FLAGS):
return f"arg '{a}' is a code-execution flag and is not allowed"
if any(low == f or low.startswith(f + "=") for f in _MCP_CODE_EXEC_LONG_FLAGS):
return f"arg '{a}' is a code-execution flag and is not allowed"
if any(low.startswith(u) for u in _MCP_URL_SCHEMES):
return f"arg '{a}' is a remote URL and is not allowed"
if any(ch in _MCP_SHELL_METACHARS for ch in a):
return f"arg '{a}' contains shell metacharacters"
if env:
if isinstance(env, str):
try:
env = json.loads(env)
except Exception:
return "env must be a JSON object"
if not isinstance(env, dict):
return "env must be an object"
for k in env:
if str(k).strip().upper() in _MCP_DANGEROUS_ENV:
return f"env var '{k}' can inject code into the child process and is not allowed"
return None
async def do_manage_mcp(content: str, owner: Optional[str] = None) -> Dict:
"""Manage MCP servers: list, add, delete, enable, disable, reconnect."""
try:
args = _parse_tool_args(content)
except ValueError:
return {"error": "Invalid JSON arguments", "exit_code": 1}
action = args.get("action", "list")
if action == "list":
mcp = get_mcp_manager()
if not mcp:
return {"response": "No MCP manager available", "servers": [], "exit_code": 0}
from core.database import SessionLocal, McpServer
db = SessionLocal()
try:
servers = db.query(McpServer).all()
items = []
for s in servers:
st = mcp.get_server_status(s.id)
status = st.get("status", "disconnected")
tool_count = st.get("tool_count", 0)
items.append({"id": s.id, "name": s.name, "transport": s.transport,
"is_enabled": s.is_enabled, "status": status,
"tool_count": tool_count})
return {"response": f"{len(items)} MCP servers", "servers": items, "exit_code": 0}
finally:
db.close()
elif action == "add":
from core.database import SessionLocal, McpServer
import uuid as _uuid
from datetime import datetime
name = args.get("name", "")
command = args.get("command", "")
cmd_args = args.get("args", [])
env = args.get("env", {})
if not name or not command:
return {"error": "name and command are required", "exit_code": 1}
# Validate BEFORE any DB write or spawn: a rejected registration must
# leave no enabled row (which would otherwise auto-reconnect on restart)
# and must not attempt a connection.
_mcp_err = _validate_mcp_command(command, cmd_args, env)
if _mcp_err:
return {"error": f"manage_mcp: refused unsafe server registration: {_mcp_err}", "exit_code": 1}
sid = str(_uuid.uuid4())[:8]
db = SessionLocal()
try:
srv = McpServer(id=sid, name=name, transport="stdio", command=command,
args=json.dumps(cmd_args) if isinstance(cmd_args, list) else cmd_args,
env=json.dumps(env) if isinstance(env, dict) else env,
is_enabled=True, created_at=datetime.utcnow(), updated_at=datetime.utcnow())
db.add(srv)
db.commit()
finally:
db.close()
# Try to connect
mcp = get_mcp_manager()
tool_count = 0
if mcp:
try:
await mcp.connect_server(
sid, name, "stdio", command=command,
args=cmd_args if isinstance(cmd_args, list) else json.loads(cmd_args),
env=env if isinstance(env, dict) else json.loads(env),
)
st = mcp.get_server_status(sid)
tool_count = st.get("tool_count", 0)
except Exception as e:
logger.warning(f"MCP connect failed for {name}: {e}")
return {"response": f"Added MCP server '{name}' ({tool_count} tools)", "exit_code": 0}
elif action == "delete":
sid = args.get("server_id", "")
from core.database import SessionLocal, McpServer
db = SessionLocal()
try:
srv = db.query(McpServer).filter(McpServer.id == sid).first()
if not srv:
return {"error": f"Server {sid} not found", "exit_code": 1}
name = srv.name
mcp = get_mcp_manager()
if mcp:
try:
await mcp.disconnect_server(sid)
except Exception:
pass
db.delete(srv)
db.commit()
return {"response": f"Deleted MCP server '{name}'", "exit_code": 0}
finally:
db.close()
elif action == "reconnect":
sid = args.get("server_id", "")
mcp = get_mcp_manager()
if not mcp:
return {"error": "MCP manager not available", "exit_code": 1}
try:
await mcp.disconnect_server(sid)
from core.database import SessionLocal, McpServer
db2 = SessionLocal()
try:
srv = db2.query(McpServer).filter(McpServer.id == sid).first()
if srv:
_args = json.loads(srv.args) if srv.args else []
_env = json.loads(srv.env) if srv.env else {}
await mcp.connect_server(
server_id=sid,
name=srv.name,
transport=srv.transport,
command=srv.command,
args=_args,
env=_env,
url=srv.url,
)
st = mcp.get_server_status(sid)
return {"response": f"Reconnected '{srv.name}' ({st.get('tool_count', 0)} tools)", "exit_code": 0}
return {"error": f"Server {sid} not found", "exit_code": 1}
finally:
db2.close()
except Exception as e:
return {"error": str(e), "exit_code": 1}
elif action in ("enable", "disable"):
sid = args.get("server_id", "")
from core.database import SessionLocal, McpServer
db = SessionLocal()
try:
srv = db.query(McpServer).filter(McpServer.id == sid).first()
if not srv:
return {"error": f"Server {sid} not found", "exit_code": 1}
srv.is_enabled = (action == "enable")
db.commit()
return {"response": f"MCP server '{srv.name}' {action}d", "exit_code": 0}
finally:
db.close()
elif action == "list_tools":
mcp = get_mcp_manager()
if not mcp:
return {"response": "No MCP manager", "tools": [], "exit_code": 0}
tools = mcp.get_all_tools()
items = [{"name": t["name"], "server": t["server_name"],
"description": t.get("description", "")[:100]} for t in tools]
return {"response": f"{len(items)} MCP tools available", "tools": items, "exit_code": 0}
else:
return {"error": f"Unknown action: {action}", "exit_code": 1}
# ---------------------------------------------------------------------------
# Webhook management tool
# ---------------------------------------------------------------------------
async def do_manage_webhooks(content: str, owner: Optional[str] = None) -> Dict:
"""Manage webhooks: list, add, delete, enable, disable, test."""
from core.database import SessionLocal
try:
args = _parse_tool_args(content)
except ValueError:
return {"error": "Invalid JSON arguments", "exit_code": 1}
action = args.get("action", "list")
db = SessionLocal()
try:
from core.database import Webhook
if action == "list":
hooks = db.query(Webhook).all()
items = [{"id": h.id, "name": h.name, "url": h.url,
"events": h.events, "is_active": h.is_active} for h in hooks]
return {"response": f"{len(items)} webhooks", "webhooks": items, "exit_code": 0}
elif action == "add":
import uuid as _uuid
from datetime import datetime
from src.webhook_manager import validate_events, validate_webhook_url
name = args.get("name", "")
url = args.get("url", "")
events = args.get("events", "chat.completed")
if not url:
return {"error": "url is required", "exit_code": 1}
try:
url = validate_webhook_url(url)
events = validate_events(events)
except ValueError as e:
return {"error": str(e), "exit_code": 1}
wid = str(_uuid.uuid4())[:8]
hook = Webhook(id=wid, name=name or url, url=url,
events=events, is_active=True,
created_at=datetime.utcnow(), updated_at=datetime.utcnow())
db.add(hook)
db.commit()
return {"response": f"Added webhook '{name or url}'", "exit_code": 0}
elif action == "delete":
wid = args.get("webhook_id", "")
hook = db.query(Webhook).filter(Webhook.id == wid).first()
if not hook:
return {"error": f"Webhook {wid} not found", "exit_code": 1}
name = hook.name
db.delete(hook)
db.commit()
return {"response": f"Deleted webhook '{name}'", "exit_code": 0}
elif action in ("enable", "disable"):
wid = args.get("webhook_id", "")
hook = db.query(Webhook).filter(Webhook.id == wid).first()
if not hook:
return {"error": f"Webhook {wid} not found", "exit_code": 1}
hook.is_active = (action == "enable")
db.commit()
return {"response": f"Webhook '{hook.name}' {action}d", "exit_code": 0}
else:
return {"error": f"Unknown action: {action}", "exit_code": 1}
except Exception as e:
logger.error(f"manage_webhooks error: {e}")
return {"error": str(e), "exit_code": 1}
finally:
db.close()
# ---------------------------------------------------------------------------
# API token management tool
# ---------------------------------------------------------------------------
async def do_manage_tokens(content: str, owner: Optional[str] = None) -> Dict:
"""Manage API tokens: list, create, delete."""
from core.database import SessionLocal, ApiToken
try:
args = _parse_tool_args(content)
except ValueError:
return {"error": "Invalid JSON arguments", "exit_code": 1}
action = args.get("action", "list")
db = SessionLocal()
try:
if action == "list":
tokens = db.query(ApiToken).all()
items = [{"id": t.id, "name": t.name, "token_prefix": t.token_prefix + "...",
"is_active": t.is_active} for t in tokens]
return {"response": f"{len(items)} API tokens", "tokens": items, "exit_code": 0}
elif action == "create":
import uuid as _uuid, secrets, bcrypt
from datetime import datetime
name = args.get("name", "API Token")
raw_token = secrets.token_urlsafe(32)
token_hash = bcrypt.hashpw(raw_token.encode(), bcrypt.gensalt()).decode()
tid = str(_uuid.uuid4())[:8]
t = ApiToken(id=tid, name=name, token_hash=token_hash,
token_prefix=raw_token[:8], is_active=True,
created_at=datetime.utcnow(), updated_at=datetime.utcnow())
db.add(t)
db.commit()
return {"response": f"Created token '{name}'", "token": raw_token, "exit_code": 0}
elif action == "delete":
tid = args.get("token_id", "")
t = db.query(ApiToken).filter(ApiToken.id == tid).first()
if not t:
return {"error": f"Token {tid} not found", "exit_code": 1}
name = t.name
db.delete(t)
db.commit()
return {"response": f"Deleted token '{name}'", "exit_code": 0}
else:
return {"error": f"Unknown action: {action}", "exit_code": 1}
except Exception as e:
logger.error(f"manage_tokens error: {e}")
return {"error": str(e), "exit_code": 1}
finally:
db.close()
# ---------------------------------------------------------------------------
# Settings/preferences management tool
# ---------------------------------------------------------------------------
async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict:
"""Manage user settings and preferences."""
try:
args = _parse_tool_args(content)
except ValueError:
return {"error": "Invalid JSON arguments", "exit_code": 1}
action = args.get("action", "list")
from core.database import SessionLocal
db = SessionLocal()
try:
# set/get/list/delete operate on the REAL app settings (the same store
# the Settings panel writes), so changing a model / voice / search
# engine / reminder channel from chat actually takes effect.
from src.settings import load_settings, save_settings, DEFAULT_SETTINGS
# Secrets/credentials the agent must NOT write: kept read-only (masked)
# so API keys never flow through chat. User sets these in the panel.
_SECRET_KEYS = {
"brave_api_key", "google_pse_key", "google_pse_cx",
"tavily_api_key", "serper_api_key", "app_public_url",
}
def _is_secret(k):
# `token` must be a suffix, not a substring: otherwise the int
# setting `agent_input_token_budget` (which even has a "token budget"
# alias to set it from chat) is wrongly classified as a credential.
return (
k in _SECRET_KEYS
or k.endswith("token")
or any(t in k for t in ("api_key", "_key", "secret", "password"))
)
# Friendly aliases → real keys, so natural phrasing resolves.
_ALIASES_SET = {
"voice": "tts_voice", "tts voice": "tts_voice", "tts": "tts_enabled",
"text to speech": "tts_enabled", "tts provider": "tts_provider",
"speech speed": "tts_speed", "voice speed": "tts_speed",
"stt": "stt_enabled", "speech to text": "stt_enabled", "transcription": "stt_enabled",
"search engine": "search_provider", "search provider": "search_provider",
"search results": "search_result_count", "result count": "search_result_count",
"default model": "default_model", "chat model": "default_model",
"default endpoint": "default_endpoint_id",
"task model": "task_model", "background model": "task_model",
"teacher model": "teacher_model", "teacher": "teacher_enabled",
"utility model": "utility_model", "research model": "research_model",
"research max tokens": "research_max_tokens",
"vision model": "vision_model", "vision": "vision_enabled",
"image model": "image_model", "image quality": "image_quality",
"image gen": "image_gen_enabled", "image generation": "image_gen_enabled",
"reminder channel": "reminder_channel", "reminders": "reminder_channel",
"ntfy topic": "reminder_ntfy_topic",
"webhook integration": "reminder_webhook_integration_id",
"webhook template": "reminder_webhook_payload_template", "webhook payload": "reminder_webhook_payload_template",
"agent tool calls": "agent_max_tool_calls", "max tool calls": "agent_max_tool_calls",
"agent timeout": "agent_stream_timeout_seconds", "stream timeout": "agent_stream_timeout_seconds",
"token budget": "agent_input_token_budget", "input budget": "agent_input_token_budget",
"hard max": "agent_input_token_hard_max",
"token budget cap": "agent_input_token_hard_max",
"input budget cap": "agent_input_token_hard_max",
}
def _resolve(k):
k2 = (k or "").strip().lower()
if k2 in DEFAULT_SETTINGS:
return k2
return _ALIASES_SET.get(k2, (k or "").strip())
_ENUMS = {
"image_quality": ["low", "medium", "high"],
"reminder_channel": ["browser", "email", "ntfy", "webhook"],
}
def _coerce(value, default):
if isinstance(default, bool):
return value if isinstance(value, bool) else str(value).strip().lower() in ("true", "on", "yes", "1", "enable", "enabled")
if isinstance(default, int):
return int(value)
return value
def _model_slug(value: str) -> str:
import re as _re
return _re.sub(r"[^a-z0-9]+", "", (value or "").lower())
def _endpoint_model_from_cache(model_query: str):
"""Resolve friendly model text to an enabled endpoint + real model id.
The Settings UI stores both `<prefix>_endpoint_id` and
`<prefix>_model`; writing only the model leaves the runtime on the
old endpoint. Prefer cached model lists so this stays fast/offline.
"""
import json as _json
import re as _re
from core.database import ModelEndpoint
wanted = (model_query or "").strip()
wanted_slug = _model_slug(wanted)
wanted_tokens = [_model_slug(t) for t in _re.findall(r"[A-Za-z0-9]+", wanted)]
wanted_tokens = [t for t in wanted_tokens if t]
if not wanted_slug:
return None
best = None
for ep in db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).all():
raw_models = []
try:
raw_models = _json.loads(ep.cached_models or "[]") or []
except Exception:
raw_models = []
# If cache is empty, still allow matching against endpoint name
# for callers using model@endpoint elsewhere later.
for mid in raw_models:
mid = str(mid)
mid_slug = _model_slug(mid)
if not mid_slug:
continue
exact = mid.lower() == wanted.lower()
compact_match = wanted_slug in mid_slug or mid_slug in wanted_slug
token_match = bool(wanted_tokens) and all(tok in mid_slug for tok in wanted_tokens)
if exact or compact_match or token_match:
score = 3 if exact else (2 if compact_match else 1)
if not best or score > best[0]:
best = (score, ep.id, mid)
if best:
return {"endpoint_id": best[1], "model": best[2]}
return None
def _mask(k, v):
return "••••• (set in panel)" if _is_secret(k) and v else v
if action == "list":
s = load_settings()
shown = {k: _mask(k, v) for k, v in s.items() if k in DEFAULT_SETTINGS and not isinstance(v, dict)}
return {"response": f"{len(shown)} settings (use get/set with a key)", "settings": shown, "exit_code": 0}
elif action == "get":
key = _resolve(args.get("key", ""))
if not key:
return {"error": "key is required", "exit_code": 1}
if key not in DEFAULT_SETTINGS:
return {"error": f"Unknown setting '{args.get('key')}'. Use action='list' to see them.", "exit_code": 1}
val = load_settings().get(key, DEFAULT_SETTINGS.get(key))
return {"response": f"{key} = {_mask(key, val)}", "value": _mask(key, val), "exit_code": 0}
elif action == "set":
raw = args.get("key", "")
value = args.get("value")
if not raw:
return {"error": "key is required", "exit_code": 1}
key = _resolve(raw)
if key not in DEFAULT_SETTINGS:
return {"error": f"Unknown setting '{raw}'. Use action='list' to see available settings.", "exit_code": 1}
if _is_secret(key):
return {"response": f"'{key}' is a credential/secret. For security I can't set it from chat. Open Settings and set it there.", "exit_code": 0}
# Structured settings (dicts/lists like keybinds, default_model_fallbacks)
# have no safe scalar coercion; _coerce would pass a bare string
# straight through and clobber the structure. Refuse them here; they're
# edited in their dedicated panels. (reset/delete still restore the
# default structure, which is safe.)
if isinstance(DEFAULT_SETTINGS[key], (dict, list)):
return {"response": f"'{key}' is a structured setting. Edit it in its panel, not from chat. (You can reset it to default here.)", "exit_code": 0}
try:
value = _coerce(value, DEFAULT_SETTINGS[key])
except (ValueError, TypeError):
return {"error": f"'{value}' isn't a valid value for {key} (expected {type(DEFAULT_SETTINGS[key]).__name__}).", "exit_code": 1}
if key in _ENUMS and str(value).lower() not in _ENUMS[key]:
return {"error": f"{key} must be one of: {', '.join(_ENUMS[key])}.", "exit_code": 1}
s = load_settings()
s[key] = value
if key in {"default_model", "research_model", "utility_model", "task_model", "vision_model", "image_model"}:
resolved = _endpoint_model_from_cache(str(value))
if resolved:
prefix = key[:-6]
s[f"{prefix}_endpoint_id"] = resolved["endpoint_id"]
s[key] = resolved["model"]
value = resolved["model"]
save_settings(s)
if key.endswith("_model") and s.get(f"{key[:-6]}_endpoint_id"):
return {"response": f"Set {key} = {value} (endpoint {s.get(f'{key[:-6]}_endpoint_id')}).", "exit_code": 0}
return {"response": f"Set {key} = {value}.", "exit_code": 0}
elif action == "delete" or action == "reset":
key = _resolve(args.get("key", ""))
if key not in DEFAULT_SETTINGS:
return {"error": f"Unknown setting '{args.get('key')}'.", "exit_code": 1}
if _is_secret(key):
return {"response": f"'{key}' is a credential. Reset it in the panel.", "exit_code": 0}
s = load_settings()
s[key] = DEFAULT_SETTINGS[key]
save_settings(s)
return {"response": f"Reset {key} to default ({DEFAULT_SETTINGS[key]}).", "exit_code": 0}
elif action in ("disable_tool", "enable_tool", "list_tools"):
# Tool-toggle actions. These edit settings.json:disabled_tools
# (the global list read on every chat request) rather than
# prefs.json. Friendly aliases accepted: "shell" -> "bash",
# "search" -> "web_search", "browser" -> "builtin_browser",
# "documents" -> the document tool set, "memory" ->
# manage_memory, etc.
from src.settings import get_setting, save_settings, load_settings
_ALIASES = {
"shell": ["bash"],
"terminal": ["bash"],
"search": ["web_search", "web_fetch"],
"web": ["web_search", "web_fetch"],
"browser": ["builtin_browser"],
"documents": ["create_document", "edit_document", "update_document", "suggest_document"],
"doc": ["create_document", "edit_document", "update_document", "suggest_document"],
"memory": ["manage_memory"],
"skills": ["manage_skills"],
"images": ["generate_image"],
"image": ["generate_image"],
"tasks": ["manage_tasks"],
"notes": ["manage_notes"],
"calendar": ["manage_calendar"],
"email": ["mcp__email__list_emails", "mcp__email__read_email", "mcp__email__send_email"],
"research": ["web_search", "web_fetch"], # research is a per-request flag, not a tool (closest analog)
}
if action == "list_tools":
current = get_setting("disabled_tools", []) or []
return {
"response": (
f"Currently disabled: {', '.join(current) if current else '(none)'}.\n"
"Common toggles: shell (bash), search (web_search), browser, documents, "
"memory, skills, images, tasks, notes, calendar, email."
),
"disabled": list(current),
"exit_code": 0,
}
tool_name = (args.get("tool") or args.get("name") or "").strip().lower()
if not tool_name:
return {"error": "tool name required (e.g. 'shell', 'search', 'bash')", "exit_code": 1}
targets = _ALIASES.get(tool_name, [tool_name])
settings = load_settings()
current = list(settings.get("disabled_tools") or [])
before = set(current)
if action == "disable_tool":
for t in targets:
if t not in current:
current.append(t)
else: # enable_tool
current = [t for t in current if t not in targets]
after = set(current)
settings["disabled_tools"] = current
save_settings(settings)
verb = "Disabled" if action == "disable_tool" else "Enabled"
changed = sorted(after.symmetric_difference(before))
return {
"response": (
f"{verb} {tool_name} ({', '.join(targets)}). "
f"Now disabled: {', '.join(current) if current else '(none)'}."
),
"changed": changed,
"disabled": list(current),
"exit_code": 0,
}
else:
return {"error": f"Unknown action: {action}", "exit_code": 1}
except Exception as e:
logger.error(f"manage_settings error: {e}")
return {"error": str(e), "exit_code": 1}
finally:
db.close()
# ---------------------------------------------------------------------------
# API call tool
# ---------------------------------------------------------------------------
# ── registry adapters ────────────────────────────────────────────────────────
def _owner_adapter(fn):
"""Wrap a do_*(content, owner) impl as a registry execute(content, ctx)."""
async def _execute(content: str, ctx: dict) -> dict:
return await fn(content, ctx.get("owner"))
return _execute
ADMIN_TOOL_HANDLERS = {
"manage_endpoints": _owner_adapter(do_manage_endpoints),
"manage_mcp": _owner_adapter(do_manage_mcp),
"manage_webhooks": _owner_adapter(do_manage_webhooks),
"manage_tokens": _owner_adapter(do_manage_tokens),
"manage_settings": _owner_adapter(do_manage_settings),
}
+98
View File
@@ -0,0 +1,98 @@
"""Agent tool to inspect and control detached background `bash` jobs.
`bash` blocks prefixed with a `#!bg` marker run detached via `src.bg_jobs`; the
agent is auto-re-invoked with the output when they finish. This tool covers the
gaps in that flow: list the jobs in the current chat, read a still-running job's
output on demand, and kill a runaway job instead of waiting out its max-runtime.
Registry tool (`TOOL_HANDLERS["manage_bg_jobs"]`). Jobs are scoped to the chat
that launched them, so every action requires the caller's `session_id` and a job
from another session is treated as not found.
"""
import json
import time
from typing import Any, Dict, List
_LIST_ACTIONS = {"list", "ls", "jobs"}
_OUTPUT_ACTIONS = {"output", "get", "read", "tail", "status", "show"}
_KILL_ACTIONS = {"kill", "stop", "cancel", "terminate"}
def _age(rec: Dict[str, Any]) -> str:
start = rec.get("started_at")
if not start:
return "?"
secs = int(time.time() - start)
if secs < 60:
return f"{secs}s"
if secs < 3600:
return f"{secs // 60}m"
return f"{secs // 3600}h{(secs % 3600) // 60}m"
def _status_label(rec: Dict[str, Any]) -> str:
status = rec.get("status", "?")
if rec.get("killed"):
return "killed"
if rec.get("timed_out"):
return "timed out"
if rec.get("died"):
return "died"
if status in ("done", "failed"):
return f"{status} (exit {rec.get('exit_code')})"
return status
def _row(rec: Dict[str, Any]) -> str:
cmd = (rec.get("command") or "").strip().splitlines()[0][:80]
return f"[{rec.get('id')}] {_status_label(rec)} | {_age(rec)} | {cmd}"
class ManageBgJobsTool:
async def execute(self, content: str, ctx: dict) -> dict:
from src import bg_jobs
session_id = ctx.get("session_id")
raw = (content or "").strip()
try:
args = json.loads(raw) if raw else {}
except (ValueError, TypeError):
args = {}
if not isinstance(args, dict):
args = {}
action = str(args.get("action", "list")).strip().lower()
job_id = str(args.get("job_id") or args.get("id") or "").strip()
if not session_id:
return {"error": "manage_bg_jobs: no active chat session; background jobs are scoped to a chat.", "exit_code": 1}
if action in _LIST_ACTIONS:
jobs: List[Dict[str, Any]] = bg_jobs.list_for_session(session_id)
if not jobs:
return {"output": "No background jobs in this chat.", "exit_code": 0}
jobs.sort(key=lambda r: r.get("started_at") or 0, reverse=True)
lines = "\n".join(_row(r) for r in jobs)
return {"output": f"{len(jobs)} background job(s):\n{lines}", "exit_code": 0}
if action in _OUTPUT_ACTIONS or action in _KILL_ACTIONS:
if not job_id:
return {"error": f"manage_bg_jobs: action '{action}' requires a job_id (see action='list').", "exit_code": 1}
rec = bg_jobs.get(job_id)
# Scope: only the chat that launched a job may see or control it.
if rec is None or rec.get("session_id") != session_id:
return {"error": f"manage_bg_jobs: no background job '{job_id}' in this chat.", "exit_code": 1}
if action in _KILL_ACTIONS:
if rec.get("status") != "running":
return {"output": f"Job `{job_id}` already {_status_label(rec)}; nothing to kill.", "exit_code": 0}
killed = bg_jobs.kill(job_id)
return {"output": f"Killed background job `{job_id}` ({(killed or {}).get('command', '').splitlines()[0][:80]}).", "exit_code": 0}
out = rec.get("output") or "(no output yet)"
return {
"output": f"Job `{job_id}` [{_status_label(rec)}, {_age(rec)}]\nCommand: {rec.get('command')}\n\nOutput:\n{out}",
"exit_code": 0,
}
return {"error": f"manage_bg_jobs: unknown action '{action}'. Use list, output, or kill.", "exit_code": 1}
+1 -33
View File
@@ -1,8 +1,8 @@
from typing import Any, Dict, List, Optional
import logging
import re
import json
from src.constants import MAX_READ_CHARS
from src.tool_utils import _parse_tool_args
logger = logging.getLogger(__name__)
@@ -154,38 +154,6 @@ def _coerce_email_document_content(existing: str, incoming: str) -> str:
body = new
return header.rstrip() + "\n---\n" + body
def _parse_tool_args(content):
"""Parse a tool-call argument blob.
Accepts either a JSON string or an already-decoded dict. Unwraps the
common `{"body": {...}}` envelope that smaller models emit when they
read tool descriptions like "Body is JSON: {...}" literally — they
pass `body` as a field name rather than treating it as a noun.
Returns a dict on success, raises ValueError on bad JSON.
"""
if isinstance(content, str):
try:
args = json.loads(content) if content.strip() else {}
except (json.JSONDecodeError, TypeError) as e:
raise ValueError(str(e))
elif isinstance(content, dict):
args = content
else:
args = {}
# Unwrap {"body": {...}} envelope — but only if `body` is the sole key
# and points at a dict. We don't want to clobber a legitimate `body`
# field on tools where it's a real arg (e.g. send_email body text).
if (
isinstance(args, dict)
and len(args) == 1
and "body" in args
and isinstance(args["body"], dict)
and "action" in args["body"] # extra safety: only unwrap if the inner dict looks like a tool call
):
args = args["body"]
return args
def parse_edit_blocks(content: str) -> list:
"""Parse <<<FIND>>>...<<<REPLACE>>>...<<<END>>> blocks."""
edits = []
+54 -13
View File
@@ -1,6 +1,7 @@
import asyncio
import json
import os
import re
import difflib
import fnmatch
import shutil
@@ -16,6 +17,31 @@ _CODENAV_SKIP_DIRS = frozenset({
_CODENAV_MAX_HITS = 200
_CODENAV_MAX_LINE = 400
def _glob_to_regex(pat: str) -> "re.Pattern":
"""Translate a forward-slash glob (**, *, ?) into a compiled regex.
`**/` matches zero or more complete directories.
`*` matches within a single path segment (does not cross /).
"""
i, n, out = 0, len(pat), []
while i < n:
if pat[i : i + 3] == "**/":
out.append("(?:[^/]+/)*")
i += 3
elif pat[i : i + 2] == "**":
out.append(".*")
i += 2
elif pat[i] == "*":
out.append("[^/]*")
i += 1
elif pat[i] == "?":
out.append("[^/]")
i += 1
else:
out.append(re.escape(pat[i]))
i += 1
return re.compile("".join(out))
def _unified_diff(old: str, new: str, path: str) -> Optional[Dict[str, Any]]:
if old == new:
return None
@@ -259,23 +285,38 @@ class GlobTool:
return {"error": f"glob: {e}", "exit_code": 1}
def _glob():
from pathlib import Path
base = Path(root)
if not base.is_dir():
base = os.path.abspath(root)
if not os.path.isdir(base):
return None, f"glob: {root}: not a directory"
norm_pat = pattern.replace("\\", "/")
# Fast path: literal pattern (no wildcards) → direct path lookup.
if not any(c in norm_pat for c in "*?["):
cand = os.path.normpath(os.path.join(base, norm_pat))
if os.path.exists(cand):
return [cand], None
# Literal not at exact path — fall through to walk so
# e.g. "foo.py" still matches at any depth (like rglob).
# Compile glob to regex: * stays within one segment, **/ spans dirs.
regex = _glob_to_regex(norm_pat)
matched = []
cap = _CODENAV_MAX_HITS * 5
try:
for p in base.rglob(pattern):
if set(p.relative_to(base).parts) & _CODENAV_SKIP_DIRS:
continue
try:
mtime = p.stat().st_mtime
except OSError:
mtime = 0
matched.append((mtime, str(p)))
if len(matched) > _CODENAV_MAX_HITS * 5:
for dp, dns, fns in os.walk(base):
# Prune skipped dirs before descending (unlike rglob which
# descends first then filters — fatal on large node_modules).
dns[:] = [d for d in dns if d not in _CODENAV_SKIP_DIRS]
for name in fns + dns:
full = os.path.join(dp, name)
rel = os.path.relpath(full, base).replace(os.sep, "/")
if regex.fullmatch(rel) or regex.fullmatch(name):
try:
mtime = os.stat(full).st_mtime
except OSError:
mtime = 0
matched.append((mtime, full))
if len(matched) > cap:
break
except (OSError, ValueError) as _e:
except OSError as _e:
return None, f"glob: {_e}"
matched.sort(key=lambda t: t[0], reverse=True)
return [pth for _, pth in matched[:_CODENAV_MAX_HITS]], None
+208
View File
@@ -0,0 +1,208 @@
"""model_interaction_tools.py - agent tools for talking to other models.
Owns the model-interaction tool implementations (chat_with_model, ask_teacher,
list_models) and their handler classes, registered in ``TOOL_HANDLERS``. Part
of the tool -> registry migration (#3629): the implementations were moved here
out of ``src.ai_interaction`` so dispatch flows through the registry instead of
the elif chain / dispatch_ai_tool in tool_execution.py.
Shared helpers that still live in ``src.ai_interaction`` and are used by tools
not yet migrated (``_resolve_model``, ``AI_CHAT_TIMEOUT``) are imported lazily
inside the functions to avoid an import cycle at module load.
"""
import logging
from typing import Dict, Optional
logger = logging.getLogger(__name__)
_TEACHER_SYSTEM_PROMPT = (
"You are a senior AI mentor. A less capable model is stuck on a problem and asking for help. "
"Provide clear, actionable guidance:\n"
"1. Brief analysis of the problem\n"
"2. Recommended approach (step by step)\n"
"3. Key things to watch out for\n\n"
"Be concise and practical. No preamble."
)
async def chat_with_model(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
"""Send a message to a specific model and return its response.
Content format:
Line 1: model_name (or model_name@endpoint_name)
Line 2+: the message to send
"""
from src.ai_interaction import _resolve_model, AI_CHAT_TIMEOUT
from src.llm_core import llm_call_async
lines = content.strip().split("\n", 1)
if not lines or not lines[0].strip():
return {"error": "First line must be the model name"}
model_spec = lines[0].strip()
message = lines[1].strip() if len(lines) > 1 else ""
if not message:
return {"error": "No message provided (line 2+ is the message)"}
try:
url, model, headers = _resolve_model(model_spec, owner=owner)
except ValueError as e:
return {"error": str(e)}
try:
response = await llm_call_async(
url, model,
[{"role": "user", "content": message}],
headers=headers,
timeout=AI_CHAT_TIMEOUT,
)
# Truncate very long responses
if len(response) > 10000:
response = response[:10000] + "\n... (truncated)"
return {"model": model, "response": response}
except Exception as e:
logger.error(f"chat_with_model failed: {e}")
return {"error": f"Failed to get response from {model_spec}: {e}"}
async def ask_teacher(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
"""Ask a more capable model for help.
Content format:
Line 1: model_name (or 'auto')
Line 2+: the problem description
"""
from src.ai_interaction import _resolve_model, AI_CHAT_TIMEOUT
from src.llm_core import llm_call_async
from src.settings import get_setting
lines = content.strip().split("\n", 1)
model_spec = lines[0].strip() if lines else "auto"
problem = lines[1].strip() if len(lines) > 1 else ""
if not problem:
return {"error": "No problem description provided"}
if model_spec.lower() in ("auto", ""):
model_spec = get_setting("teacher_model", "")
if not model_spec:
return {"error": "No teacher model configured. Specify a model name or set teacher_model in settings."}
try:
url, model, headers = _resolve_model(model_spec, owner=owner)
except ValueError as e:
return {"error": str(e)}
try:
response = await llm_call_async(
url, model,
[
{"role": "system", "content": _TEACHER_SYSTEM_PROMPT},
{"role": "user", "content": f"Problem:\n{problem}"},
],
headers=headers,
timeout=AI_CHAT_TIMEOUT,
)
if len(response) > 8000:
response = response[:8000] + "\n... (truncated)"
return {"model": model, "response": response, "teacher": True}
except Exception as e:
logger.error(f"ask_teacher failed: {e}")
return {"error": f"Teacher call failed ({model_spec}): {e}"}
async def list_models(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
"""List all available models across configured endpoints.
Content = optional filter keyword.
"""
import json
import httpx
from src.database import SessionLocal, ModelEndpoint
from src.llm_core import _detect_provider, ANTHROPIC_MODELS
from src.auth_helpers import owner_filter
from src.endpoint_resolver import resolve_endpoint_runtime, build_headers, build_models_url
keyword = content.strip().lower() if content.strip() else None
db = SessionLocal()
try:
query = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
if owner:
query = owner_filter(query, ModelEndpoint, owner)
endpoints = query.all()
if not endpoints:
return {"results": "No enabled model endpoints configured."}
result_lines = []
total_models = 0
for ep in endpoints:
try:
base, api_key = resolve_endpoint_runtime(ep, owner=owner)
except Exception:
continue
provider = _detect_provider(base)
headers = build_headers(api_key, base)
model_ids = []
if provider == "anthropic":
model_ids = list(ANTHROPIC_MODELS)
else:
try:
models_url = build_models_url(base)
if models_url:
r = httpx.get(models_url, headers=headers, timeout=5)
r.raise_for_status()
data = r.json()
model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
if not model_ids:
model_ids = [
m.get("name") or m.get("model")
for m in (data.get("models") or [])
if m.get("name") or m.get("model")
]
else:
model_ids = json.loads(ep.cached_models or "[]")
except Exception:
model_ids = ["(endpoint offline)"]
if keyword:
model_ids = [m for m in model_ids if keyword in m.lower() or keyword in (ep.name or "").lower()]
if model_ids:
result_lines.append(f"\n**{ep.name or base}** ({provider}):")
for mid in model_ids:
result_lines.append(f" - `{mid}`")
total_models += 1
if not result_lines:
return {"results": "No models found" + (f" matching '{keyword}'" if keyword else "") + "."}
header = f"Available models ({total_models} total):"
return {"results": header + "\n".join(result_lines)}
except Exception as e:
logger.error(f"list_models failed: {e}")
return {"error": str(e)}
finally:
db.close()
# ---------------------------------------------------------------------------
# Handler classes registered in TOOL_HANDLERS
# ---------------------------------------------------------------------------
class ChatWithModelTool:
async def execute(self, content: str, ctx: dict) -> Dict:
return await chat_with_model(content, ctx.get("session_id"), owner=ctx.get("owner"))
class AskTeacherTool:
async def execute(self, content: str, ctx: dict) -> Dict:
return await ask_teacher(content, ctx.get("session_id"), owner=ctx.get("owner"))
class ListModelsTool:
async def execute(self, content: str, ctx: dict) -> Dict:
return await list_models(content, ctx.get("session_id"), owner=ctx.get("owner"))
+464
View File
@@ -0,0 +1,464 @@
"""session_tools.py - agent tools for AI-to-AI session management.
Owns create_session, list_sessions, send_to_session and manage_session, moved
out of src.ai_interaction as part of the tool -> registry migration (#3629), and
their handler classes registered in TOOL_HANDLERS.
The session manager is a runtime-set singleton in src.ai_interaction, so each
function fetches it via get_session_manager() (imported here); _resolve_model and
AI_CHAT_TIMEOUT are reused from there too.
"""
import json
import logging
import uuid
from typing import Dict, Optional
from src.ai_interaction import get_session_manager, _resolve_model, AI_CHAT_TIMEOUT
logger = logging.getLogger(__name__)
async def create_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
"""Create a new chat session.
Content format:
Line 1: session name
Line 2: model_name (or model_name@endpoint_name)
"""
_session_manager = get_session_manager()
if not _session_manager:
return {"error": "Session manager not available"}
lines = content.strip().split("\n")
if len(lines) < 2:
return {"error": "Need 2 lines: session name, then model spec"}
name = lines[0].strip()
model_spec = lines[1].strip()
if not name:
return {"error": "Session name cannot be empty"}
try:
url, model, headers = _resolve_model(model_spec, owner=owner)
except ValueError as e:
return {"error": str(e)}
sid = str(uuid.uuid4())[:8]
try:
_session_manager.create_session(
session_id=sid,
name=name,
endpoint_url=url,
model=model,
rag=False,
owner=owner,
)
# Store headers on session for future calls
sess = _session_manager.get_session(sid)
if sess and headers:
sess.headers = headers
try:
from src.event_bus import fire_event
fire_event("session_created", owner)
except Exception:
logger.debug("session_created event dispatch failed", exc_info=True)
return {"session_id": sid, "name": name, "model": model, "endpoint_url": url}
except Exception as e:
logger.error(f"create_session failed: {e}")
return {"error": f"Failed to create session: {e}"}
async def list_sessions(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
"""List sessions sorted by most-recently-active first.
Output includes a relative "last active" timestamp per row so the
agent can answer "open my last chat" without guessing from titles.
The most-recent session is always first in the list.
Content = optional filter keyword (matches session name).
"""
_session_manager = get_session_manager()
if not _session_manager:
return {"error": "Session manager not available"}
keyword = content.strip().lower() if content.strip() else None
try:
from core.database import SessionLocal, Session as DbSession
from datetime import datetime, timezone
# Pull every session's last_accessed from the DB so we can sort
# by recency. In-memory sessions hold name + model + msg_count;
# the DB row holds the timestamps.
db = SessionLocal()
try:
db_rows = {r.id: r for r in db.query(DbSession).all()}
finally:
db.close()
# SECURITY: scope to the caller's sessions. Passing None returned
# every user's sessions, which the agent tool then exposed via the
# "list my chats" reply.
sessions = _session_manager.get_sessions_for_user(owner)
rows = []
for sid, sess in sessions.items():
if keyword and keyword not in (sess.name or "").lower():
continue
db_row = db_rows.get(sid)
# Prefer last_accessed; fall back to updated_at, then created_at.
ts = None
if db_row:
ts = getattr(db_row, 'last_accessed', None) or getattr(db_row, 'updated_at', None) or getattr(db_row, 'created_at', None)
rows.append((ts, sid, sess))
# Sort by timestamp DESC; rows without a timestamp sink to the bottom.
rows.sort(key=lambda r: r[0] or datetime.min, reverse=True)
def _rel(ts):
if not ts:
return 'never'
now = datetime.utcnow()
try:
if ts.tzinfo is not None:
now = datetime.now(timezone.utc)
diff = (now - ts).total_seconds()
except Exception:
return 'unknown'
if diff < 60: return 'just now'
if diff < 3600: return f'{int(diff / 60)}m ago'
if diff < 86400: return f'{int(diff / 3600)}h ago'
if diff < 86400 * 7: return f'{int(diff / 86400)}d ago'
return ts.strftime('%Y-%m-%d')
lines = []
for i, (ts, sid, sess) in enumerate(rows):
if i >= 50:
lines.append(f"... and {len(rows) - 50} more (showing first 50)")
break
safe_name = (sess.name or "Untitled").replace("[", "\\[").replace("]", "\\]")
msg_count = getattr(sess, "message_count", 0) or 0
model = getattr(sess, "model", "unknown")
marker = " ← most recent" if i == 0 else ""
lines.append(f"- **[{safe_name}](#session-{sid})** (id: `{sid}`, model: {model}, {msg_count} msgs, last active {_rel(ts)}){marker}")
if not lines:
return {"results": "No sessions found" + (f" matching '{keyword}'" if keyword else "") + "."}
return {
"results": (
f"Found {len(rows)} session(s), sorted most-recent first:\n"
+ "\n".join(lines)
+ "\n\nAssistant: when replying to the user, preserve the chat-title markdown links exactly as shown, e.g. `[Chat](#session-id)`. Do not rewrite this as a plain, non-clickable table."
)
}
except Exception as e:
logger.error(f"list_sessions failed: {e}")
return {"error": str(e)}
async def send_to_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
"""Send a message to an existing session and get a response.
Content format:
Line 1: session_id
Line 2+: message
"""
_session_manager = get_session_manager()
from src.llm_core import llm_call_async
from core.models import ChatMessage
if not _session_manager:
return {"error": "Session manager not available"}
lines = content.strip().split("\n", 1)
if len(lines) < 2:
return {"error": "Need 2 lines: session_id, then message"}
target_sid = lines[0].strip()
message = lines[1].strip()
sess = _session_manager.get_session(target_sid)
if not sess:
return {"error": f"Session '{target_sid}' not found"}
# Owner-scope: reject access to another user's session
if owner and getattr(sess, "owner", None) and sess.owner != owner:
return {"error": f"Session '{target_sid}' not found"}
if not message:
return {"error": "No message provided"}
try:
# Build context from session history
context = sess.get_context_messages()
context.append({"role": "user", "content": message})
response = await llm_call_async(
sess.endpoint_url, sess.model, context,
headers=sess.headers,
timeout=AI_CHAT_TIMEOUT,
)
# Save both messages to session
sess.add_message(ChatMessage("user", message))
sess.add_message(ChatMessage("assistant", response))
# Truncate for tool output
if len(response) > 10000:
response = response[:10000] + "\n... (truncated)"
return {
"session_id": target_sid,
"session_name": sess.name,
"response": response,
}
except Exception as e:
logger.error(f"send_to_session failed: {e}")
return {"error": f"Failed to send to session: {e}"}
async def manage_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
"""Manage sessions: rename, archive, delete, important, truncate, fork.
Content format:
Line 1: action (rename|archive|unarchive|delete|important|unimportant|truncate|fork)
Line 2: target session_id (or "current" to use the active session)
Line 3+: action-specific params (e.g. new name for rename, keep_count for truncate)
"""
_session_manager = get_session_manager()
if not _session_manager:
return {"error": "Session manager not available"}
from src.database import SessionLocal, Session as DbSession
# Accept BOTH the structured JSON args the tool schema advertises
# ({action, session_id, value}) AND the legacy line-based format
# (line1=action, line2=session_id, line3=value). Native function-calling
# models send JSON; fenced-block callers send lines. Previously only the
# line format was parsed, so a model that followed the schema (JSON) got
# "Need at least 2 lines" / "Rename needs line 3" and couldn't drive it.
_raw = (content or "").strip()
action = ""
target_sid = ""
value = None # the action param: new name (rename) / keep_count (truncate, fork)
_list_filter = ""
_parsed = None
if _raw.startswith("{"):
try:
_parsed = json.loads(_raw)
except Exception:
_parsed = None
if isinstance(_parsed, dict):
action = str(_parsed.get("action") or "").strip().lower()
target_sid = str(_parsed.get("session_id") or _parsed.get("session") or _parsed.get("id") or "").strip()
_v = _parsed.get("value")
if _v is None:
_v = (_parsed.get("name") or _parsed.get("new_name")
or _parsed.get("title") or _parsed.get("keep_count"))
value = None if _v is None else str(_v).strip()
_list_filter = str(_parsed.get("filter") or "").strip()
else:
lines = _raw.split("\n")
if not lines or not lines[0].strip():
return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
action = lines[0].strip().lower()
target_sid = lines[1].strip() if len(lines) >= 2 else ""
value = lines[2].strip() if len(lines) >= 3 else None
_list_filter = "\n".join(lines[1:]).strip()
if not action:
return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
# `list` alias - dispatch to list_sessions so the agent's natural
# first guess (every other manage_* tool has a `list` action) works.
if action == "list":
return await list_sessions(_list_filter, session_id, owner=owner)
if not target_sid:
return {"error": "Need a session_id (or 'current' for the active chat)"}
# Allow "current" to refer to the active session
if target_sid.lower() == "current" and session_id:
target_sid = session_id
# `switch` / `open` / `select` / `view` - the agent reaches for
# these when the user asks to "open" or "switch to" a session.
# There's no server-side way to make the browser navigate, so we
# just return a clickable anchor link the user can click. The
# frontend's chat-history click delegate routes `#session-<id>`
# to selectSession(). The agent's reply naturally embeds this
# result so the user sees a single clickable line.
def _session_query(db):
query = db.query(DbSession).filter(DbSession.id == target_sid)
if owner is not None:
query = query.filter(DbSession.owner == owner)
return query
if action in ("switch", "open", "select", "view"):
db = SessionLocal()
try:
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
name = db_sess.name or target_sid
finally:
db.close()
return {
"action": action,
"session_id": target_sid,
"name": name,
"results": f"[{name}](#session-{target_sid}) - click to open.",
}
db = SessionLocal()
try:
if action == "rename":
if not value:
return {"error": "rename needs a new name (the `value` arg, or line 3 in the legacy format)"}
new_name = value
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
db_sess.name = new_name
db.commit()
_session_manager.update_session_name(target_sid, new_name)
return {"action": "rename", "session_id": target_sid, "name": new_name,
"results": f"Session renamed to '{new_name}'"}
elif action == "archive":
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
db_sess.archived = True
db.commit()
return {"action": "archive", "session_id": target_sid,
"results": f"Session '{db_sess.name}' archived"}
elif action == "unarchive":
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
db_sess.archived = False
db.commit()
return {"action": "unarchive", "session_id": target_sid,
"results": f"Session '{db_sess.name}' unarchived"}
elif action == "delete":
if target_sid == session_id:
return {"error": "Cannot delete the current session while chatting in it. Delete other sessions first."}
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Refusing to delete an unknown chat id; use the exact id from list_sessions."}
if db_sess and db_sess.is_important:
return {"error": f"Session '{db_sess.name}' is starred/favorited. Unstar it first before deleting."}
try:
ok = _session_manager.delete_session(target_sid)
if not ok:
return {"error": f"Session '{target_sid}' was not deleted because it no longer exists."}
return {"action": "delete", "session_id": target_sid,
"results": f"Session '{db_sess.name or target_sid}' deleted"}
except Exception as e:
return {"error": f"Failed to delete session: {e}"}
elif action in ("important", "unimportant"):
is_important = action == "important"
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
# Prevent AI from unstarring sessions - only the user can do that manually
if not is_important and db_sess.is_important:
return {"error": f"Session '{db_sess.name}' is starred by the user. Only the user can unstar sessions manually."}
db_sess.is_important = is_important
db.commit()
status = "marked as important" if is_important else "unmarked as important"
return {"action": action, "session_id": target_sid,
"results": f"Session '{db_sess.name}' {status}"}
elif action == "truncate":
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
keep_count = 10
if value:
try:
keep_count = int(value)
except ValueError:
pass
success = _session_manager.truncate_messages(target_sid, keep_count)
if success:
return {"action": "truncate", "session_id": target_sid,
"results": f"Session truncated to last {keep_count} messages"}
return {"error": f"Failed to truncate session '{target_sid}'"}
elif action == "fork":
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
keep_count = 0 # 0 = all messages
if value:
try:
keep_count = int(value)
except ValueError:
pass
source = _session_manager.get_session(target_sid)
if not source:
return {"error": f"Session '{target_sid}' not found"}
new_sid = str(uuid.uuid4())[:8]
_session_manager.create_session(
session_id=new_sid,
name=f"Fork: {source.name}",
endpoint_url=source.endpoint_url,
model=source.model,
rag=False,
owner=owner,
)
# Copy messages
history = source.get_context_messages()
if keep_count > 0:
history = history[:keep_count]
from core.models import ChatMessage as InMemoryMsg
new_sess = _session_manager.get_session(new_sid)
for msg in history:
new_sess.add_message(InMemoryMsg(msg["role"], msg["content"]))
try:
from src.event_bus import fire_event
fire_event("session_created", owner)
except Exception:
logger.debug("session_created event dispatch failed", exc_info=True)
return {"action": "fork", "session_id": new_sid,
"source_session": target_sid, "messages_copied": len(history),
"results": f"Forked session '{source.name}' -> new session {new_sid} ({len(history)} messages)"}
else:
return {"error": f"Unknown action '{action}'. Use: list, switch, rename, archive, unarchive, delete, important, unimportant, truncate, fork"}
except Exception as e:
logger.error(f"manage_session failed: {e}")
return {"error": str(e)}
finally:
db.close()
# ---------------------------------------------------------------------------
# Handler classes registered in TOOL_HANDLERS
# ---------------------------------------------------------------------------
class CreateSessionTool:
async def execute(self, content: str, ctx: dict) -> Dict:
return await create_session(content, ctx.get("session_id"), owner=ctx.get("owner"))
class ListSessionsTool:
async def execute(self, content: str, ctx: dict) -> Dict:
return await list_sessions(content, ctx.get("session_id"), owner=ctx.get("owner"))
class SendToSessionTool:
async def execute(self, content: str, ctx: dict) -> Dict:
return await send_to_session(content, ctx.get("session_id"), owner=ctx.get("owner"))
class ManageSessionTool:
async def execute(self, content: str, ctx: dict) -> Dict:
return await manage_session(content, ctx.get("session_id"), owner=ctx.get("owner"))
+65 -13
View File
@@ -7,6 +7,7 @@ from src.constants import MAX_OUTPUT_CHARS
class WebSearchTool:
async def execute(self, content: str, ctx: dict) -> dict:
from src.search import comprehensive_web_search
progress_cb = ctx.get("progress_cb") if isinstance(ctx, dict) else None
raw = content.strip()
query = raw
time_filter = None
@@ -37,18 +38,39 @@ class WebSearchTool:
elif " news" in q_lc or q_lc.startswith("news ") or q_lc.endswith(" news"):
time_filter = "week"
loop = asyncio.get_running_loop()
text, sources = await asyncio.wait_for(
loop.run_in_executor(
None,
lambda: comprehensive_web_search(
query,
max_pages=max_pages,
time_filter=time_filter,
return_sources=True,
if progress_cb:
await progress_cb({
"elapsed_s": 0,
"tail": f"Searching web for: {query[:160]}",
})
try:
text, sources = await asyncio.wait_for(
loop.run_in_executor(
None,
lambda: comprehensive_web_search(
query,
max_pages=max_pages,
time_filter=time_filter,
return_sources=True,
),
),
),
timeout=30,
)
timeout=30,
)
except asyncio.TimeoutError:
return {
"error": f"web_search timed out after 30s: {query[:200]}",
"exit_code": 1,
}
except Exception as e:
return {
"error": f"web_search failed: {type(e).__name__}: {str(e) or 'no details'}",
"exit_code": 1,
}
if progress_cb:
await progress_cb({
"elapsed_s": 30,
"tail": "Search completed; preparing sources.",
})
output = text[:MAX_OUTPUT_CHARS] if len(text) > MAX_OUTPUT_CHARS else text
if sources:
output += "\n\n<!-- SOURCES:" + json.dumps(sources) + " -->"
@@ -57,13 +79,23 @@ class WebSearchTool:
class WebFetchTool:
async def execute(self, content: str, ctx: dict) -> dict:
from src.search.content import fetch_webpage_content
from src.constants import WEB_FETCH_HARD_MAX_BYTES
raw = content.strip()
url = ""
max_bytes = None
if raw.startswith("{"):
try:
parsed = json.loads(raw)
if isinstance(parsed, dict):
url = str(parsed.get("url") or "").strip()
# Download-budget override (#3812): "full": true raises the
# budget to the hard cap; an explicit max_bytes is clamped
# to the hard cap downstream. Default stays the soft cap.
if parsed.get("full") is True:
max_bytes = WEB_FETCH_HARD_MAX_BYTES
mb = parsed.get("max_bytes")
if isinstance(mb, int) and mb > 0:
max_bytes = mb
except json.JSONDecodeError:
url = ""
if not url:
@@ -78,7 +110,7 @@ class WebFetchTool:
loop = asyncio.get_running_loop()
try:
result = await asyncio.wait_for(
loop.run_in_executor(None, lambda: fetch_webpage_content(url, timeout=10)),
loop.run_in_executor(None, lambda: fetch_webpage_content(url, timeout=10, max_bytes=max_bytes)),
timeout=30,
)
except asyncio.TimeoutError:
@@ -94,8 +126,28 @@ class WebFetchTool:
return {"error": f"web_fetch: {url}: {err}", "exit_code": 1}
return {"error": f"web_fetch: {url}: no readable text content (not HTML, or the page needs JS/login)", "exit_code": 1}
# Tell the model when the download budget cut the body short and how
# to get the rest, instead of silently presenting a partial page as
# the whole thing.
size_note = ""
if result.get("truncated"):
fetched = result.get("fetched_bytes") or 0
total = result.get("total_bytes")
total_txt = f" of {total:,} bytes" if total else ""
size_note = (
f"[partial content: download stopped at {fetched:,} bytes{total_txt}. "
f'Re-call with {{"url": "{url}", "full": true}} to fetch up to '
f"{WEB_FETCH_HARD_MAX_BYTES:,} bytes.]\n\n"
)
# The notice must lead the output so the MAX_OUTPUT_CHARS trim below can
# never drop it. The title is untrusted, uncapped page content, so a
# giant title ahead of the notice could push it out of range; keep the
# notice first and cap the title as a second guard.
if len(title) > 300:
title = title[:300] + "..."
header = (f"# {title}\n" if title else "") + f"Source: {url}\n\n"
output = header + text
output = size_note + header + text
if len(output) > MAX_OUTPUT_CHARS:
output = output[:MAX_OUTPUT_CHARS] + "\n\n[...truncated]"
return {"output": output, "exit_code": 0}
+64 -786
View File
@@ -1,8 +1,14 @@
"""
ai_interaction.py
AI-to-AI interaction tools: chat_with_model, create_session, list_sessions,
send_to_session, pipeline.
AI-to-AI interaction tools: pipeline and manage_memory, plus shared model
resolution (_resolve_model), the session-manager singleton, and dispatch_ai_tool.
As part of the tool -> registry migration (#3629), chat_with_model, ask_teacher
and list_models moved to src/agent_tools/model_interaction_tools.py, and
create_session, list_sessions, send_to_session and manage_session moved to
src/agent_tools/session_tools.py. Those modules reuse get_session_manager /
_resolve_model / AI_CHAT_TIMEOUT from here.
These are agent tools — the LLM writes fenced code blocks and they execute
through the standard agent_tools.py pipeline.
@@ -159,440 +165,6 @@ def _resolve_model(spec: str, owner: Optional[str] = None) -> Tuple[str, str, Di
# Tool implementations
# ---------------------------------------------------------------------------
async def do_chat_with_model(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
"""Send a message to a specific model and return its response.
Content format:
Line 1: model_name (or model_name@endpoint_name)
Line 2+: the message to send
"""
from src.llm_core import llm_call_async
lines = content.strip().split("\n", 1)
if not lines or not lines[0].strip():
return {"error": "First line must be the model name"}
model_spec = lines[0].strip()
message = lines[1].strip() if len(lines) > 1 else ""
if not message:
return {"error": "No message provided (line 2+ is the message)"}
try:
url, model, headers = _resolve_model(model_spec, owner=owner)
except ValueError as e:
return {"error": str(e)}
try:
response = await llm_call_async(
url, model,
[{"role": "user", "content": message}],
headers=headers,
timeout=AI_CHAT_TIMEOUT,
)
# Truncate very long responses
if len(response) > 10000:
response = response[:10000] + "\n... (truncated)"
return {"model": model, "response": response}
except Exception as e:
logger.error(f"chat_with_model failed: {e}")
return {"error": f"Failed to get response from {model_spec}: {e}"}
_TEACHER_SYSTEM_PROMPT = (
"You are a senior AI mentor. A less capable model is stuck on a problem and asking for help. "
"Provide clear, actionable guidance:\n"
"1. Brief analysis of the problem\n"
"2. Recommended approach (step by step)\n"
"3. Key things to watch out for\n\n"
"Be concise and practical. No preamble."
)
async def do_ask_teacher(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
"""Ask a more capable model for help.
Content format:
Line 1: model_name (or 'auto')
Line 2+: the problem description
"""
from src.llm_core import llm_call_async
from src.settings import get_setting
lines = content.strip().split("\n", 1)
model_spec = lines[0].strip() if lines else "auto"
problem = lines[1].strip() if len(lines) > 1 else ""
if not problem:
return {"error": "No problem description provided"}
if model_spec.lower() in ("auto", ""):
model_spec = get_setting("teacher_model", "")
if not model_spec:
return {"error": "No teacher model configured. Specify a model name or set teacher_model in settings."}
try:
url, model, headers = _resolve_model(model_spec, owner=owner)
except ValueError as e:
return {"error": str(e)}
try:
response = await llm_call_async(
url, model,
[
{"role": "system", "content": _TEACHER_SYSTEM_PROMPT},
{"role": "user", "content": f"Problem:\n{problem}"},
],
headers=headers,
timeout=AI_CHAT_TIMEOUT,
)
if len(response) > 8000:
response = response[:8000] + "\n... (truncated)"
return {"model": model, "response": response, "teacher": True}
except Exception as e:
logger.error(f"ask_teacher failed: {e}")
return {"error": f"Teacher call failed ({model_spec}): {e}"}
async def do_second_opinion(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
"""Get a second opinion from another model, then have the original model
evaluate the feedback and produce a unified version.
Content format:
Line 1: model_name (or model_name@endpoint_name)
Line 2+ (optional): specific question or focus area
Flow:
1. Pull recent conversation context
2. Send to reviewer model → get honest feedback
3. Send feedback back to the session's own model → evaluate & unify
4. Return both the review and the unified response
"""
from src.llm_core import llm_call_async
lines = content.strip().split("\n", 1)
if not lines or not lines[0].strip():
return {"error": "First line must be the model name"}
model_spec = lines[0].strip()
focus = lines[1].strip() if len(lines) > 1 else ""
try:
reviewer_url, reviewer_model, reviewer_headers = _resolve_model(model_spec, owner=owner)
except ValueError as e:
return {"error": str(e)}
# Pull recent conversation context from current session
context_text = ""
sess = None
if session_id and _session_manager:
sess = _session_manager.get_session(session_id)
if sess:
messages = sess.get_context_messages()
recent = messages[-15:] if len(messages) > 15 else messages
parts = []
for m in recent:
role = m.get("role", "unknown").upper()
text = m.get("content", "")
if isinstance(text, list):
text = " ".join(
p.get("text", "") for p in text if isinstance(p, dict)
)
if text:
parts.append(f"[{role}]: {text[:2000]}")
context_text = "\n\n".join(parts)
if not context_text:
return {"error": "No conversation context found to review"}
# ── Step 1: Get the reviewer's feedback ──
reviewer_system = (
"You are giving a second opinion on a conversation between a user and an AI assistant. "
"Your job is to be genuinely helpful and honest — not a yes-man, but not a contrarian either.\n\n"
"Guidelines:\n"
"- If the plan/idea is solid, say so clearly. Don't manufacture problems that aren't there.\n"
"- If you spot a real flaw, blind spot, or simpler approach — call it out directly.\n"
"- Be practical. Don't over-engineer or over-analyze. Real-world tradeoffs matter.\n"
"- If there's a meaningfully better way to do something, suggest it concretely.\n"
"- Give credit where it's due — highlight what's working well.\n"
"- Keep it concise and actionable. No fluff.\n"
"- You're a second pair of eyes, not a professor grading a paper."
)
reviewer_message = f"Here's the conversation so far:\n\n{context_text}"
if focus:
reviewer_message += f"\n\n---\nSpecifically, I want your take on: {focus}"
else:
reviewer_message += "\n\n---\nGive me your honest second opinion on what's being discussed."
try:
review = await llm_call_async(
reviewer_url, reviewer_model,
[
{"role": "system", "content": reviewer_system},
{"role": "user", "content": reviewer_message},
],
headers=reviewer_headers,
timeout=AI_CHAT_TIMEOUT,
)
if len(review) > 8000:
review = review[:8000] + "\n... (truncated)"
except Exception as e:
logger.error(f"second_opinion reviewer call failed: {e}")
return {"error": f"Failed to get second opinion from {model_spec}: {e}"}
# ── Step 2: Send review back to session's own model for evaluation ──
unified = ""
original_model = "unknown"
if sess:
original_url = sess.endpoint_url
original_model = sess.model
original_headers = getattr(sess, "headers", None) or {}
unify_system = (
"Another AI model just reviewed the conversation you've been having with the user. "
"Read their feedback carefully, then respond with:\n\n"
"1. **What you agree with** — acknowledge valid points honestly.\n"
"2. **What you disagree with** — explain why, briefly.\n"
"3. **Unified version** — produce an updated/refined version of whatever was being discussed, "
"incorporating the feedback you found valid. Don't accept every note blindly — "
"use your judgment on what actually improves things vs what's unnecessary.\n\n"
"Be concise and practical. The user wants a better result, not a meta-discussion."
)
unify_message = (
f"Here's the conversation context:\n\n{context_text}\n\n"
f"---\n\n"
f"**Review from {reviewer_model}:**\n\n{review}\n\n"
f"---\n\n"
f"Evaluate this feedback and produce a unified improved version."
)
try:
unified = await llm_call_async(
original_url, original_model,
[
{"role": "system", "content": unify_system},
{"role": "user", "content": unify_message},
],
headers=original_headers,
timeout=AI_CHAT_TIMEOUT,
)
if len(unified) > 10000:
unified = unified[:10000] + "\n... (truncated)"
except Exception as e:
logger.error(f"second_opinion unify call failed: {e}")
unified = f"(Failed to get unified response: {e})"
# Build combined result
combined = (
f"## Second Opinion from {reviewer_model}\n\n{review}"
f"\n\n---\n\n"
f"## {original_model}'s Response\n\n{unified}"
)
return {
"model": reviewer_model,
"response": combined,
"instruction": "Present these results to the user exactly as they are. Do NOT call second_opinion again. The user can continue the conversation from here.",
}
async def do_create_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
"""Create a new chat session.
Content format:
Line 1: session name
Line 2: model_name (or model_name@endpoint_name)
"""
if not _session_manager:
return {"error": "Session manager not available"}
lines = content.strip().split("\n")
if len(lines) < 2:
return {"error": "Need 2 lines: session name, then model spec"}
name = lines[0].strip()
model_spec = lines[1].strip()
if not name:
return {"error": "Session name cannot be empty"}
try:
url, model, headers = _resolve_model(model_spec, owner=owner)
except ValueError as e:
return {"error": str(e)}
sid = str(uuid.uuid4())[:8]
try:
_session_manager.create_session(
session_id=sid,
name=name,
endpoint_url=url,
model=model,
rag=False,
owner=owner,
)
# Store headers on session for future calls
sess = _session_manager.get_session(sid)
if sess and headers:
sess.headers = headers
try:
from src.event_bus import fire_event
fire_event("session_created", owner)
except Exception:
logger.debug("session_created event dispatch failed", exc_info=True)
return {"session_id": sid, "name": name, "model": model, "endpoint_url": url}
except Exception as e:
logger.error(f"create_session failed: {e}")
return {"error": f"Failed to create session: {e}"}
async def do_list_sessions(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
"""List sessions sorted by most-recently-active first.
Output includes a relative "last active" timestamp per row so the
agent can answer "open my last chat" without guessing from titles.
The most-recent session is always first in the list.
Content = optional filter keyword (matches session name).
"""
if not _session_manager:
return {"error": "Session manager not available"}
keyword = content.strip().lower() if content.strip() else None
try:
from core.database import SessionLocal, Session as DbSession
from datetime import datetime, timezone
# Pull every session's last_accessed from the DB so we can sort
# by recency. In-memory sessions hold name + model + msg_count;
# the DB row holds the timestamps.
db = SessionLocal()
try:
db_rows = {r.id: r for r in db.query(DbSession).all()}
finally:
db.close()
# SECURITY: scope to the caller's sessions. Passing None returned
# every user's sessions, which the agent tool then exposed via the
# "list my chats" reply.
sessions = _session_manager.get_sessions_for_user(owner)
rows = []
for sid, sess in sessions.items():
if keyword and keyword not in (sess.name or "").lower():
continue
db_row = db_rows.get(sid)
# Prefer last_accessed; fall back to updated_at, then created_at.
ts = None
if db_row:
ts = getattr(db_row, 'last_accessed', None) or getattr(db_row, 'updated_at', None) or getattr(db_row, 'created_at', None)
rows.append((ts, sid, sess))
# Sort by timestamp DESC; rows without a timestamp sink to the bottom.
rows.sort(key=lambda r: r[0] or datetime.min, reverse=True)
def _rel(ts):
if not ts:
return 'never'
now = datetime.utcnow()
try:
if ts.tzinfo is not None:
now = datetime.now(timezone.utc)
diff = (now - ts).total_seconds()
except Exception:
return 'unknown'
if diff < 60: return 'just now'
if diff < 3600: return f'{int(diff / 60)}m ago'
if diff < 86400: return f'{int(diff / 3600)}h ago'
if diff < 86400 * 7: return f'{int(diff / 86400)}d ago'
return ts.strftime('%Y-%m-%d')
lines = []
for i, (ts, sid, sess) in enumerate(rows):
if i >= 50:
lines.append(f"... and {len(rows) - 50} more (showing first 50)")
break
safe_name = (sess.name or "Untitled").replace("[", "\\[").replace("]", "\\]")
msg_count = getattr(sess, "message_count", 0) or 0
model = getattr(sess, "model", "unknown")
marker = " ← most recent" if i == 0 else ""
lines.append(f"- **[{safe_name}](#session-{sid})** (id: `{sid}`, model: {model}, {msg_count} msgs, last active {_rel(ts)}){marker}")
if not lines:
return {"results": "No sessions found" + (f" matching '{keyword}'" if keyword else "") + "."}
return {
"results": (
f"Found {len(rows)} session(s), sorted most-recent first:\n"
+ "\n".join(lines)
+ "\n\nAssistant: when replying to the user, preserve the chat-title markdown links exactly as shown, e.g. `[Chat](#session-id)`. Do not rewrite this as a plain, non-clickable table."
)
}
except Exception as e:
logger.error(f"list_sessions failed: {e}")
return {"error": str(e)}
async def do_send_to_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
"""Send a message to an existing session and get a response.
Content format:
Line 1: session_id
Line 2+: message
"""
from src.llm_core import llm_call_async
from core.models import ChatMessage
if not _session_manager:
return {"error": "Session manager not available"}
lines = content.strip().split("\n", 1)
if len(lines) < 2:
return {"error": "Need 2 lines: session_id, then message"}
target_sid = lines[0].strip()
message = lines[1].strip()
sess = _session_manager.get_session(target_sid)
if not sess:
return {"error": f"Session '{target_sid}' not found"}
# Owner-scope: reject access to another user's session
if owner and getattr(sess, "owner", None) and sess.owner != owner:
return {"error": f"Session '{target_sid}' not found"}
if not message:
return {"error": "No message provided"}
try:
# Build context from session history
context = sess.get_context_messages()
context.append({"role": "user", "content": message})
response = await llm_call_async(
sess.endpoint_url, sess.model, context,
headers=sess.headers,
timeout=AI_CHAT_TIMEOUT,
)
# Save both messages to session
sess.add_message(ChatMessage("user", message))
sess.add_message(ChatMessage("assistant", response))
# Truncate for tool output
if len(response) > 10000:
response = response[:10000] + "\n... (truncated)"
return {
"session_id": target_sid,
"session_name": sess.name,
"response": response,
}
except Exception as e:
logger.error(f"send_to_session failed: {e}")
return {"error": f"Failed to send to session: {e}"}
async def stream_ai_tool(tool: str, content: str, session_id: Optional[str] = None, owner: Optional[str] = None):
@@ -715,229 +287,6 @@ async def do_pipeline(content: str, session_id: Optional[str] = None, owner: Opt
# Session management tool
# ---------------------------------------------------------------------------
async def do_manage_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
"""Manage sessions: rename, archive, delete, important, truncate, fork.
Content format:
Line 1: action (rename|archive|unarchive|delete|important|unimportant|truncate|fork)
Line 2: target session_id (or "current" to use the active session)
Line 3+: action-specific params (e.g. new name for rename, keep_count for truncate)
"""
if not _session_manager:
return {"error": "Session manager not available"}
from src.database import SessionLocal, Session as DbSession
# Accept BOTH the structured JSON args the tool schema advertises
# ({action, session_id, value}) AND the legacy line-based format
# (line1=action, line2=session_id, line3=value). Native function-calling
# models send JSON; fenced-block callers send lines. Previously only the
# line format was parsed, so a model that followed the schema (JSON) got
# "Need at least 2 lines" / "Rename needs line 3" and couldn't drive it.
_raw = (content or "").strip()
action = ""
target_sid = ""
value = None # the action param: new name (rename) / keep_count (truncate, fork)
_list_filter = ""
_parsed = None
if _raw.startswith("{"):
try:
_parsed = json.loads(_raw)
except Exception:
_parsed = None
if isinstance(_parsed, dict):
action = str(_parsed.get("action") or "").strip().lower()
target_sid = str(_parsed.get("session_id") or _parsed.get("session") or _parsed.get("id") or "").strip()
_v = _parsed.get("value")
if _v is None:
_v = (_parsed.get("name") or _parsed.get("new_name")
or _parsed.get("title") or _parsed.get("keep_count"))
value = None if _v is None else str(_v).strip()
_list_filter = str(_parsed.get("filter") or "").strip()
else:
lines = _raw.split("\n")
if not lines or not lines[0].strip():
return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
action = lines[0].strip().lower()
target_sid = lines[1].strip() if len(lines) >= 2 else ""
value = lines[2].strip() if len(lines) >= 3 else None
_list_filter = "\n".join(lines[1:]).strip()
if not action:
return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
# `list` alias — dispatch to do_list_sessions so the agent's natural
# first guess (every other manage_* tool has a `list` action) works.
if action == "list":
return await do_list_sessions(_list_filter, session_id, owner=owner)
if not target_sid:
return {"error": "Need a session_id (or 'current' for the active chat)"}
# Allow "current" to refer to the active session
if target_sid.lower() == "current" and session_id:
target_sid = session_id
# `switch` / `open` / `select` / `view` — the agent reaches for
# these when the user asks to "open" or "switch to" a session.
# There's no server-side way to make the browser navigate, so we
# just return a clickable anchor link the user can click. The
# frontend's chat-history click delegate routes `#session-<id>`
# to selectSession(). The agent's reply naturally embeds this
# result so the user sees a single clickable line.
def _session_query(db):
query = db.query(DbSession).filter(DbSession.id == target_sid)
if owner is not None:
query = query.filter(DbSession.owner == owner)
return query
if action in ("switch", "open", "select", "view"):
db = SessionLocal()
try:
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
name = db_sess.name or target_sid
finally:
db.close()
return {
"action": action,
"session_id": target_sid,
"name": name,
"results": f"[{name}](#session-{target_sid}) — click to open.",
}
db = SessionLocal()
try:
if action == "rename":
if not value:
return {"error": "rename needs a new name (the `value` arg, or line 3 in the legacy format)"}
new_name = value
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
db_sess.name = new_name
db.commit()
_session_manager.update_session_name(target_sid, new_name)
return {"action": "rename", "session_id": target_sid, "name": new_name,
"results": f"Session renamed to '{new_name}'"}
elif action == "archive":
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
db_sess.archived = True
db.commit()
return {"action": "archive", "session_id": target_sid,
"results": f"Session '{db_sess.name}' archived"}
elif action == "unarchive":
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
db_sess.archived = False
db.commit()
return {"action": "unarchive", "session_id": target_sid,
"results": f"Session '{db_sess.name}' unarchived"}
elif action == "delete":
if target_sid == session_id:
return {"error": "Cannot delete the current session while chatting in it. Delete other sessions first."}
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Refusing to delete an unknown chat id; use the exact id from list_sessions."}
if db_sess and db_sess.is_important:
return {"error": f"Session '{db_sess.name}' is starred/favorited. Unstar it first before deleting."}
try:
ok = _session_manager.delete_session(target_sid)
if not ok:
return {"error": f"Session '{target_sid}' was not deleted because it no longer exists."}
return {"action": "delete", "session_id": target_sid,
"results": f"Session '{db_sess.name or target_sid}' deleted"}
except Exception as e:
return {"error": f"Failed to delete session: {e}"}
elif action in ("important", "unimportant"):
is_important = action == "important"
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
# Prevent AI from unstarring sessions — only the user can do that manually
if not is_important and db_sess.is_important:
return {"error": f"Session '{db_sess.name}' is starred by the user. Only the user can unstar sessions manually."}
db_sess.is_important = is_important
db.commit()
status = "marked as important" if is_important else "unmarked as important"
return {"action": action, "session_id": target_sid,
"results": f"Session '{db_sess.name}' {status}"}
elif action == "truncate":
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
keep_count = 10
if value:
try:
keep_count = int(value)
except ValueError:
pass
success = _session_manager.truncate_messages(target_sid, keep_count)
if success:
return {"action": "truncate", "session_id": target_sid,
"results": f"Session truncated to last {keep_count} messages"}
return {"error": f"Failed to truncate session '{target_sid}'"}
elif action == "fork":
db_sess = _session_query(db).first()
if not db_sess:
return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
keep_count = 0 # 0 = all messages
if value:
try:
keep_count = int(value)
except ValueError:
pass
source = _session_manager.get_session(target_sid)
if not source:
return {"error": f"Session '{target_sid}' not found"}
new_sid = str(uuid.uuid4())[:8]
_session_manager.create_session(
session_id=new_sid,
name=f"Fork: {source.name}",
endpoint_url=source.endpoint_url,
model=source.model,
rag=False,
owner=owner,
)
# Copy messages
history = source.get_context_messages()
if keep_count > 0:
history = history[:keep_count]
from core.models import ChatMessage as InMemoryMsg
new_sess = _session_manager.get_session(new_sid)
for msg in history:
new_sess.add_message(InMemoryMsg(msg["role"], msg["content"]))
try:
from src.event_bus import fire_event
fire_event("session_created", owner)
except Exception:
logger.debug("session_created event dispatch failed", exc_info=True)
return {"action": "fork", "session_id": new_sid,
"source_session": target_sid, "messages_copied": len(history),
"results": f"Forked session '{source.name}' -> new session {new_sid} ({len(history)} messages)"}
else:
return {"error": f"Unknown action '{action}'. Use: list, switch, rename, archive, unarchive, delete, important, unimportant, truncate, fork"}
except Exception as e:
logger.error(f"manage_session failed: {e}")
return {"error": str(e)}
finally:
db.close()
# ---------------------------------------------------------------------------
# Memory management tool
# ---------------------------------------------------------------------------
@@ -972,16 +321,15 @@ async def do_manage_memory(content: str, session_id: Optional[str] = None, owner
memories = [m for m in memories if m.get("category", "").lower() == category_filter]
if not memories:
return {"results": "No memories found" + (f" in category '{category_filter}'" if category_filter else "") + "."}
result_lines = [f"Found {len(memories)} memory entries:\n"]
for m in memories[:100]:
for m in memories:
cat = m.get("category", "fact")
mid = m.get("id", "?")[:8]
text = m.get("text", "")
if len(text) > 150:
text = text[:150] + "..."
result_lines.append(f"- [{cat}] `{mid}` — {text}")
if len(memories) > 100:
result_lines.append(f"... and {len(memories) - 100} more")
return {"results": "\n".join(result_lines)}
elif action == "add":
@@ -1105,83 +453,6 @@ async def do_manage_memory(content: str, session_id: Optional[str] = None, owner
return {"error": f"Unknown action '{action}'. Use: list, add, edit, delete, search"}
# ---------------------------------------------------------------------------
# List models tool
# ---------------------------------------------------------------------------
async def do_list_models(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
"""List all available models across configured endpoints.
Content = optional filter keyword.
"""
import httpx
from src.database import SessionLocal, ModelEndpoint
from src.llm_core import _detect_provider, ANTHROPIC_MODELS
from src.auth_helpers import owner_filter
keyword = content.strip().lower() if content.strip() else None
db = SessionLocal()
try:
query = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
if owner:
query = owner_filter(query, ModelEndpoint, owner)
endpoints = query.all()
if not endpoints:
return {"results": "No enabled model endpoints configured."}
result_lines = []
total_models = 0
for ep in endpoints:
try:
base, api_key = resolve_endpoint_runtime(ep, owner=owner)
except Exception:
continue
provider = _detect_provider(base)
headers = build_headers(api_key, base)
model_ids = []
if provider == "anthropic":
model_ids = list(ANTHROPIC_MODELS)
else:
try:
models_url = build_models_url(base)
if models_url:
r = httpx.get(models_url, headers=headers, timeout=5)
r.raise_for_status()
data = r.json()
model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
if not model_ids:
model_ids = [
m.get("name") or m.get("model")
for m in (data.get("models") or [])
if m.get("name") or m.get("model")
]
else:
model_ids = json.loads(ep.cached_models or "[]")
except Exception:
model_ids = ["(endpoint offline)"]
if keyword:
model_ids = [m for m in model_ids if keyword in m.lower() or keyword in (ep.name or "").lower()]
if model_ids:
result_lines.append(f"\n**{ep.name or base}** ({provider}):")
for mid in model_ids:
result_lines.append(f" - `{mid}`")
total_models += 1
if not result_lines:
return {"results": "No models found" + (f" matching '{keyword}'" if keyword else "") + "."}
header = f"Available models ({total_models} total):"
return {"results": header + "\n".join(result_lines)}
except Exception as e:
logger.error(f"list_models failed: {e}")
return {"error": str(e)}
finally:
db.close()
# ---------------------------------------------------------------------------
@@ -1293,7 +564,7 @@ async def do_ui_control(content: str, session_id: Optional[str] = None, owner: O
set_theme <preset> — Apply a built-in theme preset (dark, light, midnight, paper, cyberpunk, retrowave, forest, ocean, ume, copper, terminal, organs, lavender, gpt, claude, cute)
create_theme <name> <bg> <fg> <panel> <border> <accent> [key=val ...] — Create custom theme. Optional key=val: advanced color overrides AND background effects: bgPattern=<none|dots|synapse|rain|constellations|perlin-flow|petals|sparkles|embers>, bgEffectColor=#RRGGBB, bgEffectIntensity=<num>, bgEffectSize=<num>, frosted=true|false
open_panel <name> — Open a panel (documents, gallery, email, sessions, notes, memories, skills, settings, cookbook)
open_email_reply <uid> [folder] [reply|reply-all|ai-reply] — Open a reply draft document for an email; does not send
open_email_reply <uid> [folder] [reply|reply-all|ai-reply] [body text] — Open a reply draft document for an email; does not send. ALWAYS append the body text when the user told you what to say (one-shot draft); only omit body when the user just asked to "open a reply" without content.
get_toggles — Return current toggle states (server-side knowledge)
"""
lines = content.strip().split("\n")
@@ -1537,21 +808,54 @@ async def do_ui_control(content: str, session_id: Optional[str] = None, owner: O
}
elif action == "open_email_reply":
reply_parts = lines[0].strip().split()
uid = reply_parts[1].strip() if len(reply_parts) > 1 else ""
folder = reply_parts[2].strip() if len(reply_parts) > 2 else "INBOX"
mode = reply_parts[3].strip().lower() if len(reply_parts) > 3 else "reply"
# Two forms supported:
# open_email_reply <uid> [folder] [reply|reply-all|ai-reply]
# open_email_reply <uid> [folder] [reply|reply-all|ai-reply]
# <body text on subsequent lines or after the mode token>
# The body text (if any) gets pre-filled into the reply draft so the
# agent can compose-and-open in one tool call instead of opening an
# empty draft and leaving the user to wonder what happened.
first_line = lines[0].strip()
parts = first_line.split(maxsplit=4)
uid = parts[1].strip() if len(parts) > 1 else ""
folder = parts[2].strip() if len(parts) > 2 else "INBOX"
mode = parts[3].strip().lower() if len(parts) > 3 else "reply"
# Body: everything on the first line after the mode token, plus any
# subsequent lines. Allows multi-line bodies.
inline_body = parts[4] if len(parts) > 4 else ""
rest_lines = "\n".join(lines[1:]).strip() if len(lines) > 1 else ""
body = (inline_body + ("\n" + rest_lines if rest_lines else "")).strip()
if not uid:
return {"error": "open_email_reply needs: open_email_reply <uid> [folder] [reply|reply-all|ai-reply]"}
return {"error": "open_email_reply needs: open_email_reply <uid> [folder] [reply|reply-all|ai-reply] [body text]"}
if mode not in ("reply", "reply-all", "ai-reply"):
mode = "reply"
return {
# Body is REQUIRED for the agent path. Opening an empty draft is what
# users do by clicking the Reply button — they don't ask the agent
# for that. Every agent invocation of open_email_reply MUST include
# the body. Reject empty so the agent retries with the content the
# user asked for. Exception: ai-reply mode triggers the existing
# AI-Reply path on the frontend which generates its own body.
if not body and mode != "ai-reply":
return {
"error": (
"open_email_reply called without body. The agent path REQUIRES a body — "
"opening an empty draft is the wrong response when the user asked you to write. "
"Re-call with the reply text included: "
f"`open_email_reply {uid} {folder or 'INBOX'} {mode} <your reply text here>`. "
"Compose the reply now based on the open email's content and the user's request, "
"then call this tool again with the body. Do NOT call create_document instead."
),
}
result = {
"ui_event": "open_email_reply",
"uid": uid,
"folder": folder or "INBOX",
"mode": mode,
"results": f"Opening reply draft for email UID {uid}",
"results": f"Opening reply draft for email UID {uid}" + (" with pre-filled body" if body else ""),
}
if body:
result["body"] = body
return result
elif action == "get_toggles":
return {
@@ -1581,7 +885,9 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne
"""
import base64
import httpx
import os
from pathlib import Path
from src.url_safety import check_outbound_url
lines = content.strip().split("\n")
prompt = lines[0].strip() if lines else ""
@@ -1747,8 +1053,15 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne
elif img.get("url"):
# Download external URL and save locally (DALL-E returns temp URLs)
result_url = img["url"]
ok, reason = check_outbound_url(
result_url,
block_private=os.getenv("IMAGE_BLOCK_PRIVATE_IPS", "false").lower() == "true",
)
if not ok:
return {"error": f"Image API returned unsafe image URL: {reason}"}
try:
dl_resp = httpx.get(img["url"], timeout=60)
dl_resp = httpx.get(result_url, timeout=60)
if dl_resp.status_code == 200:
img_dir = Path(GENERATED_IMAGES_DIR)
img_dir.mkdir(parents=True, exist_ok=True)
@@ -1758,10 +1071,10 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne
image_url = f"/api/generated-image/{filename}"
image_id = _save_to_gallery(filename)
else:
image_url = img["url"] # fallback to external URL
image_url = result_url # fallback to external URL
except Exception as _dl_e:
logger.warning(f"Failed to download DALL-E image: {_dl_e}")
image_url = img["url"] # fallback to external URL
image_url = result_url # fallback to external URL
else:
return {"error": "Image API returned unexpected format (no b64_json or url)"}
@@ -1790,55 +1103,20 @@ async def dispatch_ai_tool(
) -> Tuple[str, Dict]:
"""Dispatch an AI interaction tool. Returns (description, result_dict)."""
if tool == "chat_with_model":
model_spec = content.split("\n")[0].strip()[:60]
desc = f"chat_with_model: {model_spec}"
result = await do_chat_with_model(content, session_id, owner=owner)
elif tool == "create_session":
name = content.split("\n")[0].strip()[:60]
desc = f"create_session: {name}"
result = await do_create_session(content, session_id, owner=owner)
elif tool == "list_sessions":
keyword = content.strip()[:40]
desc = f"list_sessions{': ' + keyword if keyword else ''}"
result = await do_list_sessions(content, session_id, owner=owner)
elif tool == "send_to_session":
sid = content.split("\n")[0].strip()[:20]
desc = f"send_to_session: {sid}"
result = await do_send_to_session(content, session_id, owner=owner)
elif tool == "pipeline":
if tool == "pipeline":
desc = "pipeline: running steps"
result = await do_pipeline(content, session_id, owner=owner)
elif tool == "manage_session":
action = content.split("\n")[0].strip()[:40]
desc = f"manage_session: {action}"
result = await do_manage_session(content, session_id, owner=owner)
elif tool == "manage_memory":
action = content.split("\n")[0].strip()[:40]
desc = f"manage_memory: {action}"
result = await do_manage_memory(content, session_id, owner=owner)
elif tool == "list_models":
keyword = content.strip()[:40]
desc = f"list_models{': ' + keyword if keyword else ''}"
result = await do_list_models(content, session_id, owner=owner)
elif tool == "ui_control":
action = content.split("\n")[0].strip()[:60]
desc = f"ui_control: {action}"
result = await do_ui_control(content, session_id, owner=owner)
elif tool == "ask_teacher":
problem = content.split("\n", 1)[-1].strip()[:60]
desc = f"ask_teacher: {problem}"
result = await do_ask_teacher(content, session_id, owner=owner)
else:
desc = f"unknown ai tool: {tool}"
result = {"error": f"Unknown AI interaction tool: {tool}"}
+17 -2
View File
@@ -81,11 +81,26 @@ class APIKeyManager:
keys stay encrypted. Loading via load() first would decrypt them and
write them back as plaintext, which then fails to decrypt on the next
load() and silently drops those providers.
Uses atomic write (temp file + os.replace) so a crash, disk-full, or
mid-write error never truncates the existing keys file.
"""
keys = self._load_raw()
keys[provider] = self.encrypt_api_key(api_key)
with open(self.api_keys_file, 'w', encoding="utf-8") as f:
json.dump(keys, f)
tmp_file = self.api_keys_file + ".tmp"
try:
with open(tmp_file, 'w', encoding="utf-8") as f:
json.dump(keys, f)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_file, self.api_keys_file)
except OSError:
# Clean up temp file on failure; re-raise so callers see the error
try:
os.remove(tmp_file)
except OSError:
pass
raise
def load(self) -> Dict[str, str]:
"""Load and decrypt API keys"""
+30 -1
View File
@@ -1,6 +1,13 @@
# src/app_helpers.py
import os
import base64
import logging
import os
from fastapi import HTTPException
from fastapi.responses import HTMLResponse
from starlette.requests import Request
logger = logging.getLogger(__name__)
def read_if_exists(path: str) -> str:
"""Read file if it exists, return empty string otherwise."""
@@ -20,6 +27,28 @@ def abs_join(base_dir: str, rel: str) -> str:
"""Join paths and return absolute path."""
return os.path.abspath(os.path.join(base_dir, rel))
def serve_html_with_nonce(request: Request, file_path: str) -> HTMLResponse:
"""Read an app-bundled HTML page and inject the CSP nonce into inline <script> tags.
Callers pass fixed, server-owned template paths (index/login/backgrounds),
never a client-supplied path. So any read failure here — a missing file
(broken deployment) or a permission/IO error — is a server fault, not a
client "not found": map all of them to a logged 500 so a missing core
template surfaces in 5xx alerting instead of hiding behind a 404. If a
future caller serves a client-influenced path where 404 is correct, branch
that at the call site rather than defaulting this shared helper to 404.
"""
try:
with open(file_path, "r", encoding="utf-8") as f:
html = f.read()
except OSError:
logger.exception("Failed to read page %s", file_path)
raise HTTPException(500, "Internal server error")
nonce = getattr(request.state, "csp_nonce", "")
html = html.replace("{{CSP_NONCE}}", nonce)
return HTMLResponse(html)
def inside_base_dir(base_dir: str, path: str) -> bool:
"""Check if path is inside base directory."""
if not isinstance(base_dir, str) or not isinstance(path, str):
+23 -1
View File
@@ -263,10 +263,32 @@ def list_for_session(session_id: str) -> List[Dict[str, Any]]:
return [r for r in refresh().values() if r.get("session_id") == session_id]
def kill(job_id: str) -> Optional[Dict[str, Any]]:
"""Terminate a running job's process tree and mark it killed. Returns the
updated record, or None if the id is unknown. Idempotent: a job that already
finished is returned unchanged. Sets followed_up so the monitor does not also
fire an auto-continue for a job the agent deliberately stopped."""
jobs = _load()
rec = jobs.get(job_id)
if rec is None:
return None
if rec.get("status") == "running":
_kill(rec.get("pid"))
rec["status"] = "failed"
rec["exit_code"] = -1
rec["ended_at"] = time.time()
rec["killed"] = True
rec["followed_up"] = True
_save(jobs)
return rec
def result_text(rec: Dict[str, Any]) -> str:
"""Human/agent-readable summary of a finished job, for the follow-up."""
out = _read_output(rec)
if rec.get("timed_out"):
if rec.get("killed"):
head = "Background job was killed."
elif rec.get("timed_out"):
head = f"Background job timed out after {rec.get('max_runtime_s')}s."
elif rec.get("died"):
head = "Background job process died unexpectedly (no exit code)."
+50 -48
View File
@@ -76,8 +76,7 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
import json
import re
from src.constants import DATA_DIR
from src.endpoint_resolver import resolve_endpoint
from src.llm_core import llm_call_async
from src.llm_core import llm_call_async_with_fallback
from src.memory import MemoryManager
manager = MemoryManager(DATA_DIR)
@@ -116,10 +115,9 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
if len(group_memories) < 2:
return False
url, model, headers = resolve_endpoint("utility", owner=group_owner or None)
if not url or not model:
url, model, headers = resolve_endpoint("default", owner=group_owner or None)
if not url or not model:
from src.task_endpoint import resolve_task_candidates
candidates = resolve_task_candidates(owner=group_owner or None)
if not candidates:
return False
try:
@@ -147,13 +145,11 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
"\"drop\":[{\"id\":\"existing id\",\"reason\":\"short reason\"}]}\n\n"
f"MEMORIES:\n{json.dumps(items, ensure_ascii=False)}"
)
raw = await llm_call_async(
url=url,
model=model,
raw = await llm_call_async_with_fallback(
candidates,
messages=[{"role": "user", "content": prompt}],
temperature=0.0,
max_tokens=4096,
headers=headers,
timeout=120,
)
from src.text_helpers import strip_think
@@ -604,8 +600,7 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
try:
from datetime import timedelta
from core.database import SessionLocal, CalendarEvent
from src.endpoint_resolver import resolve_endpoint
from src.llm_core import llm_call_async
from src.llm_core import llm_call_async_with_fallback
import re as _re, json as _json
db = SessionLocal()
@@ -620,10 +615,9 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
if not events:
return "No upcoming events to classify", True
llm_url, llm_model, llm_headers = resolve_endpoint("utility", owner=owner)
if not llm_url:
llm_url, llm_model, llm_headers = resolve_endpoint("default", owner=owner)
llm_available = bool(llm_url and llm_model)
from src.task_endpoint import resolve_task_candidates
llm_candidates = resolve_task_candidates(owner=owner)
llm_available = bool(llm_candidates)
# Pull user memories so the LLM has personal context (relationships,
# job, hobbies). Helps it know e.g. "<name> is your spouse" so their
@@ -699,11 +693,11 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
f"EVENTS: {_json.dumps(items)}"
)
try:
raw = await llm_call_async(
url=llm_url, model=llm_model,
raw = await llm_call_async_with_fallback(
llm_candidates,
messages=[{"role": "user", "content": prompt}],
temperature=0.1, max_tokens=16384,
headers=llm_headers, timeout=180,
timeout=180,
)
from src.text_helpers import strip_think as _st
raw = _st(raw or "", prose=False, prompt_echo=False)
@@ -810,8 +804,7 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
import asyncio as _aio
from datetime import datetime as _dt, timedelta as _td
from routes.email_helpers import _email_cache_owner_clause, _imap_connect, SCHEDULED_DB
from src.endpoint_resolver import resolve_endpoint
from src.llm_core import llm_call_async
from src.llm_core import llm_call_async_with_fallback
# 1. Pull recent UIDs + From headers cheaply (header-only fetch).
def _pull_headers():
@@ -891,11 +884,11 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
if not eligible:
return "All sender sigs already cached (or no eligible senders)", True
url, model, headers = resolve_endpoint("utility", owner=owner)
if not url or not model:
url, model, headers = resolve_endpoint("default", owner=owner)
if not url or not model:
from src.task_endpoint import resolve_task_candidates
candidates = resolve_task_candidates(owner=owner)
if not candidates:
return "No LLM endpoint available", False
model = candidates[0][1]
analyzed = 0
no_sig = 0
@@ -949,11 +942,11 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
)
try:
raw = await llm_call_async(
url=url, model=model,
raw = await llm_call_async_with_fallback(
candidates,
messages=[{"role": "user", "content": prompt}],
temperature=0.0, max_tokens=600,
headers=headers, timeout=60,
timeout=60,
)
from src.text_helpers import strip_think as _st
sig = _st(raw or "", prose=False, prompt_echo=False).strip()
@@ -1137,7 +1130,6 @@ async def action_test_skills(owner: str, **kwargs) -> Tuple[str, bool]:
from services.memory.skills import SkillsManager
from src.constants import DATA_DIR
from routes.skills_routes import _run_skill_test_once, _skill_test_task
from src.endpoint_resolver import resolve_endpoint
# #3 SCOPE GUARD: refuse to run on a None/empty owner — otherwise
# `sm.load(owner=None)` returns every user's skills and we'd cross-
@@ -1152,27 +1144,40 @@ async def action_test_skills(owner: str, **kwargs) -> Tuple[str, bool]:
if not names:
raise TaskNoop("no skills to test")
url, model, headers = resolve_endpoint("default", owner=owner)
if not url or not model:
from src.task_endpoint import resolve_task_candidates
candidates = resolve_task_candidates(owner=owner)
if not candidates:
return "No Default/Utility model configured — set one in Settings.", False
# #2 NO SILENT MODEL SWAP: if the configured model isn't served by the
# endpoint, try a basename match — but fail loudly instead of grabbing
# `avail[0]` which could be an embedding-only model and produce 36
# garbage transcripts → 36 'unknown' verdicts with no hint why.
url, model, headers = candidates[0]
try:
from src.llm_core import list_model_ids
avail = list_model_ids(url, headers=headers)
if avail and model not in avail:
import os as _os
base = _os.path.basename((model or "").rstrip("/"))
m = next((a for a in avail if _os.path.basename(a.rstrip("/")) == base), None)
if m:
model = m
else:
return (f"Default model '{model}' not served by endpoint {url}. "
f"Available: {', '.join(avail[:8])}{'' if len(avail) > 8 else ''}. "
"Set a valid Default model in Settings."), False
import os as _os
selected = None
mismatch_notes = []
for cand_url, cand_model, cand_headers in candidates:
avail = list_model_ids(cand_url, headers=cand_headers)
if not avail or cand_model in avail:
selected = (cand_url, cand_model, cand_headers)
break
base = _os.path.basename((cand_model or "").rstrip("/"))
matched = next((a for a in avail if _os.path.basename(a.rstrip("/")) == base), None)
if matched:
selected = (cand_url, matched, cand_headers)
break
mismatch_notes.append(
f"{cand_model} not served by {cand_url}; available: "
f"{', '.join(avail[:8])}{'...' if len(avail) > 8 else ''}"
)
if selected:
url, model, headers = selected
elif mismatch_notes:
return "No configured task fallback model is served. " + " | ".join(mismatch_notes[:3]), False
except Exception as _e:
logger.warning(f"test_skills model resolve check failed (continuing): {_e}")
@@ -1483,7 +1488,6 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]:
from pathlib import Path as _P
from core.database import SessionLocal as _SL, EmailAccount as _EA
from routes.email_helpers import _imap_connect, _decode_header
from src.endpoint_resolver import resolve_endpoint, resolve_utility_fallback_candidates
from src.llm_core import llm_call_async_with_fallback
# Per-owner state file so multi-user runs don't clobber each other's
@@ -1505,12 +1509,10 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]:
# ── 1. Resolve LLM candidates (utility primary + utility fallbacks; fall
# through to default chat as a last resort).
url, model, headers = resolve_endpoint("utility", owner=owner)
if not url or not model:
url, model, headers = resolve_endpoint("default", owner=owner)
if not url or not model:
from src.task_endpoint import resolve_task_candidates
candidates = resolve_task_candidates(owner=owner)
if not candidates:
return "No LLM endpoint available", False
candidates = [(url, model, headers)] + resolve_utility_fallback_candidates(owner=owner)
# ── 2. Enumerate enabled accounts. Match this task's owner AND fall
# back to the legacy "unowned account whose imap_user / from_address
+3 -2
View File
@@ -14,6 +14,7 @@ import subprocess
import sys
from core.platform_compat import IS_WINDOWS, which_tool
from src.runtime_paths import get_app_root
logger = logging.getLogger(__name__)
@@ -81,7 +82,7 @@ _BUILTIN_NPX_SERVERS = {
"name": "Built-in: Browser",
"command": "npx",
"args": ["-y", "@playwright/mcp@latest", "--headless", "--caps", "vision"],
},
}
}
# Global flag to disable MCP if there are compatibility issues
@@ -94,7 +95,7 @@ async def register_builtin_servers(mcp_manager):
logger.info("Built-in MCP servers disabled via ODYSSEUS_DISABLE_MCP")
return
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
base_dir = get_app_root()
python = sys.executable
async def _connect_python_server(server_id: str, script_path: str, name: str):
+3 -2
View File
@@ -5,6 +5,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
from pydantic import Field, field_validator
from src.constants import DATA_DIR as _DATA_DIR_CONST
from src.runtime_paths import get_app_root
# Cross-platform OS flag, exposed here so callers can `from src.config import
# IS_WINDOWS`. Defined locally (a trivial `os.name == "nt"`) rather than imported
@@ -19,7 +20,7 @@ IS_WINDOWS = os.name == "nt"
class DataConfig(BaseSettings):
"""Configuration for data storage and file handling."""
# Base directory
base_dir: Path = Field(default=Path(__file__).parent.parent, description="Base directory for the application")
base_dir: Path = Field(default=Path(get_app_root()), description="Base directory for the application")
# Data paths
data_dir: Path = Field(default=Path(_DATA_DIR_CONST), description="Main data directory")
@@ -138,7 +139,7 @@ class AppConfig(BaseSettings):
if isinstance(v, dict) and "base_dir" in v:
base_dir = v["base_dir"]
else:
base_dir = Path(__file__).parent.parent
base_dir = Path(get_app_root())
# Convert string paths to Path objects relative to base_dir
data_dir = Path(_DATA_DIR_CONST)
+30 -4
View File
@@ -2,12 +2,14 @@
"""Application-wide constants and configuration values."""
import os
APP_VERSION = "1.0.0"
from src.runtime_paths import get_app_root, get_default_data_dir
APP_VERSION = "1.0.1"
# Base paths
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/"
BASE_DIR = os.path.join(get_app_root(), "")
STATIC_DIR = os.path.join(BASE_DIR, "static")
DATA_DIR = os.getenv("ODYSSEUS_DATA_DIR", os.path.join(BASE_DIR, "data"))
DATA_DIR = os.getenv("ODYSSEUS_DATA_DIR", get_default_data_dir())
# Data file paths
# Single source of truth: every persisted file/dir lives under DATA_DIR, which
@@ -55,7 +57,13 @@ MEMORY_VECTORS_DIR = os.path.join(DATA_DIR, "memory_vectors")
# Paths with an intentional dedicated env override, defaulting under DATA_DIR.
MAIL_ATTACHMENTS_DIR = os.getenv("ODYSSEUS_MAIL_ATTACHMENTS_DIR", os.path.join(DATA_DIR, "mail-attachments"))
FASTEMBED_CACHE_DIR = os.getenv("FASTEMBED_CACHE_PATH", os.path.join(DATA_DIR, "fastembed_cache"))
# `or` (not os.getenv's default arg) so a PRESENT-but-EMPTY value falls back to
# the default. docker-compose.yml injects `FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-}`,
# which sets the var to "" when the host hasn't defined it. os.getenv(name, default)
# only returns the default when the var is ABSENT, so the empty string would win →
# os.makedirs("") raises [Errno 2] No such file or directory: '' → FastEmbed fails to
# init and all vector features (RAG, semantic memory, tool index) silently degrade.
FASTEMBED_CACHE_DIR = os.getenv("FASTEMBED_CACHE_PATH") or os.path.join(DATA_DIR, "fastembed_cache")
# Agent tool output limits (single source of truth — imported by tool_execution.py,
# tool_implementations.py, agent_tools.py, and any other module that needs them)
@@ -63,11 +71,26 @@ MAX_OUTPUT_CHARS = 10_000 # cap for bash/python/web_search/web_fetch outpu
MAX_READ_CHARS = 20_000 # cap for read_file / document preview
MAX_DIFF_LINES = 400 # cap for edit_file unified-diff display
# web_fetch response-size policy (#3812). MAX_OUTPUT_CHARS above only trims
# what the agent SEES; these caps bound what the server downloads, parses,
# and writes to the content cache. The soft cap is the default download
# budget; the agent can raise it per call (full/max_bytes) but never past
# the hard cap, so a model can't decide to pull a multi-GB file.
WEB_FETCH_SOFT_MAX_BYTES = 2_000_000 # default download budget (2 MB)
WEB_FETCH_HARD_MAX_BYTES = 20_000_000 # absolute ceiling, even with override (20 MB)
# API Configuration
MAX_CONTEXT_MESSAGES = 90
REQUEST_TIMEOUT = 20
OPENAI_COMPAT_PATH = "/v1/chat/completions"
# Outbound UA for web_fetch / web_search scraping; common desktop UA so pages serve normal HTML.
WEB_FETCH_USER_AGENT = os.environ.get(
"WEB_FETCH_USER_AGENT",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36",
)
# Environment variables with defaults
DEFAULT_HOST = os.getenv("LLM_HOST", "localhost")
LLM_HOSTS = [h.strip() for h in os.getenv("LLM_HOSTS", "").split(",") if h.strip()]
@@ -79,6 +102,9 @@ SEARXNG_INSTANCE = os.getenv("SEARXNG_INSTANCE", "http://localhost:8080")
CLEANUP_ENABLED = os.getenv("CLEANUP_ENABLED", "True").lower() == "true"
CLEANUP_INTERVAL_HOURS = int(os.getenv("CLEANUP_INTERVAL_HOURS", "24"))
# Auth policy
PASSWORD_MIN_LENGTH = 8
# Default parameters
DEFAULT_TEMPERATURE = 1.0
DEFAULT_MAX_TOKENS = 0
+11 -3
View File
@@ -244,9 +244,17 @@ def trim_for_context(messages: List[Dict], context_length: int, reserve_tokens:
protected_tokens = estimate_tokens(protected_msgs)
budget -= protected_tokens
# Priority: keep first system msg (preset prompt), drop others (memory, RAG, memo)
essential_system = system_msgs[:1] if system_msgs else []
extra_system = system_msgs[1:]
# Priority: keep first system msg (preset prompt), drop others (memory, RAG, memo).
# Exception: a research-spinoff primer (the seeded report that grounds a
# "Discuss" chat) must never be dropped — it is the conversation's whole
# knowledge base. Treat any system message carrying research_spinoff_from
# metadata as essential alongside the leading system prompt.
def _is_research_primer(m):
return bool((m.get("metadata") or {}).get("research_spinoff_from"))
_primers = [m for m in system_msgs if _is_research_primer(m)]
_non_primer = [m for m in system_msgs if not _is_research_primer(m)]
essential_system = (_non_primer[:1] if _non_primer else []) + _primers
extra_system = _non_primer[1:]
# Try dropping extra system messages one by one (from the end)
trimmed = essential_system + convo_msgs
+3 -2
View File
@@ -161,11 +161,13 @@ async def _tick() -> None:
# Re-read state once before writing so we capture any updates from
# concurrent UI syncs.
stopped_any = False
successfully_stopped_sids = set()
for sid, host, port in to_stop:
ok = await _stop_serve(sid, host, port)
logger.info(f"cookbook_serve_lifecycle: stop {sid} (host={host or 'local'}): {'ok' if ok else 'failed'}")
if ok:
stopped_any = True
successfully_stopped_sids.add(sid)
# Drop the auto-registered endpoint so the model picker and
# the chat router don't keep pointing at a dead server.
for t in tasks:
@@ -188,12 +190,11 @@ async def _tick() -> None:
except Exception:
fresh = state
fresh_tasks = tasks
stopped_sids = {sid for sid, _, _ in to_stop}
for ft in fresh_tasks:
if not isinstance(ft, dict):
continue
ft_sid = ft.get("sessionId") or ft.get("id")
if ft_sid in stopped_sids:
if ft_sid in successfully_stopped_sids:
ft["status"] = "stopped"
ft["_scheduledStopAtMs"] = None
ft["_lastStatusFlipAt"] = now_ms
+58 -3
View File
@@ -199,11 +199,20 @@ def _fit_inline_attachment_text(
return text[:remaining] + marker, 0
def _process_office_document(path: str, display_name: str) -> str:
def _process_office_document(
path: str,
display_name: str,
session_id: str | None = None,
auto_opened_docs: list[Dict[str, Any]] | None = None,
owner: str | None = None,
) -> str:
"""Extract an Office/EPUB document to Markdown via the optional markitdown dep.
Falls back to a friendly banner when markitdown is unavailable or finds no
text, so a missing optional dependency never breaks the chat path.
text, so a missing optional dependency never breaks the chat path. When a
session_id is provided AND the extraction succeeded, the FULL text is also
saved as a Document so the agent can page through it via
`manage_documents action=read offset=` after the inline copy is capped.
"""
from src.markitdown_runtime import (
is_markitdown_format,
@@ -218,6 +227,46 @@ def _process_office_document(path: str, display_name: str) -> str:
if markdown and markdown.strip():
title = os.path.splitext(os.path.basename(path))[0]
body, marker = _truncate_inline(markdown)
# Persist the full extracted text as a Document. The agent's existing
# manage_documents tool can then read past the inline cap with offset.
doc_id = None
if session_id:
try:
from src.office_doc import create_office_document
doc_id = create_office_document(
session_id=session_id,
upload_id=os.path.basename(path),
title=title,
body_text=markdown,
)
if doc_id and auto_opened_docs is not None:
from src.database import SessionLocal, Document
_db = SessionLocal()
try:
_d = _db.query(Document).filter(Document.id == doc_id).first()
if _d:
auto_opened_docs.append({
"doc_id": _d.id,
"title": _d.title,
"language": _d.language,
"content": _d.current_content,
"version": _d.version_count,
})
finally:
_db.close()
except Exception as e:
logger.warning("Office auto-doc creation failed for %s: %s", path, e)
# Upgrade the truncation marker with a hint pointing at the full doc so
# the agent knows it can read the rest.
if doc_id and marker:
marker = (
f"\n[…truncated for inline context — full {len(markdown):,} chars "
f"saved as document `{doc_id}`. Use `manage_documents` with "
f"action=read, document_id={doc_id}, offset=<N> to page through.]"
)
return f"\n\n[Document content — {title}]:\n{body}{marker}"
# No content: tell the user whether to install the optional dep or whether
@@ -521,7 +570,13 @@ def build_user_content(
elif mime.startswith("text/") or _is_text_file(path):
extracted_text = _process_text_file(path)
else:
extracted_text = _process_office_document(path, display_name)
extracted_text = _process_office_document(
path,
display_name,
session_id=session_id,
auto_opened_docs=auto_opened_docs,
owner=owner,
)
extracted_text, inline_attachment_remaining = _fit_inline_attachment_text(
extracted_text,
+2
View File
@@ -31,6 +31,8 @@ import numpy as np
import httpx
from typing import List, Optional
from src.runtime_paths import get_app_root
logger = logging.getLogger(__name__)
_DEFAULT_MODEL = "all-minilm:l6-v2"
+50 -15
View File
@@ -161,6 +161,32 @@ def normalize_base(url: str) -> str:
return url
def _validated_endpoint_base(url: str) -> str:
"""Return a base URL that is safe for endpoint path appends."""
base = (url or "").strip().rstrip("/")
if "?" in base or "#" in base:
raise ValueError("Endpoint base URL must not include query or fragment")
return urlunparse(urlparse(base)._replace(query="", fragment="")).rstrip("/")
def _prepare_endpoint_base(base: str) -> str:
base = _validated_endpoint_base(normalize_base(base))
return _validated_endpoint_base(normalize_base(resolve_url(base)))
def _append_endpoint_path(base: str, suffix: str) -> str:
parsed = urlparse(base)
current = (parsed.path or "").rstrip("/")
extra = "/" + suffix.lstrip("/")
path = f"{current}{extra}" if current else extra
return urlunparse(parsed._replace(path=path, query="", fragment=""))
def _pathless_host(base: str, host: str) -> bool:
parsed = urlparse(base)
return (parsed.hostname or "").lower() == host and not (parsed.path or "").strip("/")
def _anthropic_api_root(base: str) -> str:
"""Return Anthropic's API root, preserving /v1 for OpenAI-compatible APIs elsewhere."""
base = (base or "").strip().rstrip("/")
@@ -171,15 +197,17 @@ def _anthropic_api_root(base: str) -> str:
def build_chat_url(base: str) -> str:
"""Return the correct chat endpoint URL for a given base."""
base = resolve_url(base)
base = _prepare_endpoint_base(base)
provider = _detect_provider(base)
if provider == "anthropic":
return _anthropic_api_root(base) + "/v1/messages"
return _append_endpoint_path(_anthropic_api_root(base), "/v1/messages")
if provider == "ollama":
return _ollama_api_root(base) + "/chat"
return _append_endpoint_path(_ollama_api_root(base), "/chat")
if provider == "chatgpt-subscription":
return base.rstrip("/") + "/responses"
return base + "/chat/completions"
return _append_endpoint_path(base, "/responses")
if _pathless_host(base, "api.openai.com"):
base = _append_endpoint_path(base, "/v1")
return _append_endpoint_path(base, "/chat/completions")
def build_models_url(base: str) -> Optional[str]:
@@ -193,21 +221,25 @@ def build_models_url(base: str) -> Optional[str]:
untouched (so custom prefixes like ``/openai`` or ``/api/openai/v1`` keep
their semantics).
"""
base = normalize_base(resolve_url(base))
base = _prepare_endpoint_base(base)
provider = _detect_provider(base)
if provider == "anthropic":
return _anthropic_api_root(base) + "/v1/models"
return _append_endpoint_path(_anthropic_api_root(base), "/v1/models")
if provider == "ollama":
return _ollama_api_root(base) + "/tags"
return _append_endpoint_path(_ollama_api_root(base), "/tags")
if provider == "chatgpt-subscription":
return None
# Generic OpenAI-compatible fallback: ensure the path lands on /v1/models
# when the user omitted a path entirely. If a non-empty path is already
# present (e.g. /openai, /api/openai/v1, /v1), trust the caller — the
# /models suffix is appended as-is and the caller's prefix is preserved.
if not urlparse(base).path:
base = base + "/v1"
return base + "/models"
# Generic OpenAI-compatible fallback: local model servers with no explicit
# path conventionally expose `/v1/models` (LM Studio, llama.cpp, vLLM).
# For non-local unknown hosts, do not invent `/v1`; append `/models` to the
# caller's base so look-alike provider hosts stay generic.
parsed = urlparse(base)
host = (parsed.hostname or "").lower()
is_local = host in {"localhost", "127.0.0.1", "::1", "host.docker.internal"}
uses_v1_models_by_default = is_local or host in {"api.deepseek.com", "api.openai.com"}
if not parsed.path and uses_v1_models_by_default:
base = _append_endpoint_path(base, "/v1")
return _append_endpoint_path(base, "/models")
def build_headers(api_key: Optional[str], base: str) -> Dict[str, str]:
@@ -392,6 +424,9 @@ def resolve_utility_fallback_candidates(owner: Optional[str] = None) -> list:
settings = load_settings()
utility_ep = (get_user_setting("utility_endpoint_id", owner or "", settings.get("utility_endpoint_id", "")) or "").strip()
if not utility_ep:
utility_chain = get_user_setting("utility_model_fallbacks", owner or "", settings.get("utility_model_fallbacks") or []) or []
if utility_chain:
return _resolve_fallback_candidates("utility_model_fallbacks", owner=owner)
return _resolve_fallback_candidates("default_model_fallbacks", owner=owner)
except Exception:
pass
+35 -8
View File
@@ -4,6 +4,7 @@ import uuid
import logging
import re
from typing import Dict, List, Optional, Any
from urllib.parse import urljoin, urlparse, urlunparse
import httpx
from fastapi import HTTPException
@@ -202,6 +203,22 @@ def mask_integration_secret(integration: Dict[str, Any]) -> Dict[str, Any]:
return safe
def _normalize_integration_base_url(base_url: Any) -> str:
if not isinstance(base_url, str) or not base_url.strip():
raise ValueError("Integration base URL is required")
cleaned = base_url.strip().rstrip("/")
if "?" in cleaned or "#" in cleaned:
raise ValueError("Integration base URL must not include query or fragment")
parsed = urlparse(cleaned)
if parsed.scheme.lower() not in ("http", "https") or not parsed.hostname:
raise ValueError("Integration base URL must be an HTTP(S) URL")
return urlunparse(parsed._replace(scheme=parsed.scheme.lower(), query="", fragment="")).rstrip("/")
def _join_integration_url(base_url: str, path: str) -> str:
return urljoin(base_url.rstrip("/") + "/", path.lstrip("/"))
def load_integrations() -> List[Dict[str, Any]]:
"""Load all integrations from disk with secrets decrypted for runtime use."""
if not os.path.exists(DATA_FILE):
@@ -261,8 +278,10 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:
if not isinstance(integration.get("name"), str) or not integration["name"].strip():
raise HTTPException(400, "Integration name is required")
if not isinstance(integration.get("base_url"), str) or not integration["base_url"].strip():
raise HTTPException(400, "Integration base URL is required")
try:
integration["base_url"] = _normalize_integration_base_url(integration.get("base_url"))
except ValueError as exc:
raise HTTPException(400, str(exc)) from exc
integrations = load_integrations()
integrations.append(integration)
@@ -272,10 +291,14 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:
def update_integration(integration_id: str, data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""Update fields on an existing integration. Returns updated integration or None."""
data = dict(data)
if "name" in data and (not isinstance(data["name"], str) or not data["name"].strip()):
raise HTTPException(400, "Integration name is required")
if "base_url" in data and (not isinstance(data["base_url"], str) or not data["base_url"].strip()):
raise HTTPException(400, "Integration base URL is required")
if "base_url" in data:
try:
data["base_url"] = _normalize_integration_base_url(data["base_url"])
except ValueError as exc:
raise HTTPException(400, str(exc)) from exc
integrations = load_integrations()
for item in integrations:
@@ -341,9 +364,10 @@ async def execute_api_call(
if not integration.get("enabled", True):
return {"error": f"Integration '{integration.get('name')}' is disabled", "exit_code": 1}
base_url = integration.get("base_url", "").rstrip("/")
if not base_url:
return {"error": "Integration has no base_url configured", "exit_code": 1}
try:
base_url = _normalize_integration_base_url(integration.get("base_url", ""))
except ValueError as exc:
return {"error": str(exc), "exit_code": 1}
# Strip common API path suffixes users might accidentally include
# (e.g. "http://host/v1/" → "http://host"). The integration's preset
@@ -366,7 +390,10 @@ async def execute_api_call(
if re.search(r"^https?://", path) or "://" in path:
return {"error": "Path must not contain a protocol scheme", "exit_code": 1}
url = base_url + path
if "#" in path:
return {"error": "Path must not contain a fragment", "exit_code": 1}
url = _join_integration_url(base_url, path)
method = method.upper()
# Build headers
+206 -38
View File
@@ -87,7 +87,7 @@ _host_health_lock = threading.Lock()
_model_activity: Dict[str, float] = {}
_HARMONY_MARKER_RE = re.compile(
r"<\|channel\|>(analysis|final)"
r"<\|channel\|>(analysis|commentary|final)"
r"|<\|start\|>(?:assistant|system|user|tool)?"
r"|<\|message\|>"
r"|<\|end\|>"
@@ -96,6 +96,7 @@ _HARMONY_MARKER_RE = re.compile(
)
_HARMONY_MARKERS = (
"<|channel|>analysis",
"<|channel|>commentary",
"<|channel|>final",
"<|start|>assistant",
"<|start|>system",
@@ -145,7 +146,10 @@ class _HarmonyStreamRouter:
out.append((text, False))
return
if self._in_message:
out.append((text, self._channel == "analysis"))
# analysis + commentary (tool-call preambles / function-arg bodies)
# are internal, not user-facing — route them to thinking so they
# don't leak into the visible answer; only `final` is visible.
out.append((text, self._channel in ("analysis", "commentary")))
def _handle_marker(self, match: re.Match[str]) -> None:
marker = match.group(0)
@@ -283,7 +287,8 @@ def _is_ollama_native_url(url: str) -> bool:
"""Return True for native Ollama API URLs, including Ollama Cloud."""
try:
parsed = urlparse(url or "")
except Exception:
except Exception as e:
logger.warning("Failed to parse URL for Ollama detection", exc_info=e)
return False
host = parsed.hostname or ""
path = (parsed.path or "").rstrip("/")
@@ -340,43 +345,102 @@ def _normalize_ollama_url(url: str) -> str:
return base.rstrip("/") + "/chat"
def _ollama_normalize_tool_messages(messages: List[Dict]) -> List[Dict]:
def _ollama_normalize_messages(messages: List[Dict]) -> List[Dict]:
"""Adapt Odysseus' canonical OpenAI-style messages to native Ollama /api/chat.
Odysseus carries assistant tool calls in the OpenAI shape, where
`function.arguments` is a JSON *string*. Native Ollama expects it to be a
JSON *object*; given the string it fails the whole request with HTTP 400
"Value looks like object, but can't find closing '}' symbol", which aborts
every follow-up (tool-result) round. Parse the arguments back into an object
here, on a shallow copy, leaving non-tool messages untouched. The opaque
Gemini `extra_content` (thought_signature) is dropped it is meaningless to
Ollama and only matters when the conversation is replayed to Gemini.
Two shape mismatches silently break requests:
1. Tool calls: Odysseus carries `function.arguments` as a JSON *string*.
Native Ollama expects a JSON *object* and rejects the string form with
HTTP 400 ("Value looks like object, but can't find closing '}' symbol"),
aborting every follow-up (tool-result) round. Parse the arguments back
into an object here, on a shallow copy, leaving non-tool messages
untouched. The opaque Gemini `extra_content` (thought_signature) is
dropped it is meaningless to Ollama and only matters when the
conversation is replayed to Gemini.
2. Images (issue #4723): Odysseus carries multimodal user content as an
OpenAI-style list ``[{type: "text", ...}, {type: "image_url",
image_url: {url: "data:image/...;base64,XXX"}}, ...]``. Native Ollama
does not accept a list for ``content`` it wants ``content`` as a
string plus a separate ``images`` array of raw base64 strings (no
``data:`` prefix). Without this conversion the image blocks pass
through untouched, the vision-capable model never sees the picture,
and the user gets "I can't see any image" even though the request
succeeded.
"""
out: List[Dict] = []
for m in messages or []:
tcs = m.get("tool_calls") if isinstance(m, dict) else None
if not tcs:
if not isinstance(m, dict):
out.append(m)
continue
new_calls = []
for tc in tcs:
fn = tc.get("function") or {}
args = fn.get("arguments")
if isinstance(args, str):
try:
args = json.loads(args) if args.strip() else {}
except (json.JSONDecodeError, TypeError):
args = {}
call: Dict = {"function": {"name": fn.get("name", ""), "arguments": args or {}}}
if tc.get("id"):
call["id"] = tc["id"]
new_calls.append(call)
nm = dict(m)
nm["tool_calls"] = new_calls
# 1. Tool-call argument strings -> objects.
tcs = nm.get("tool_calls")
if tcs:
new_calls = []
for tc in tcs:
fn = tc.get("function") or {}
args = fn.get("arguments")
if isinstance(args, str):
try:
args = json.loads(args) if args.strip() else {}
except (json.JSONDecodeError, TypeError):
args = {}
call: Dict = {"function": {"name": fn.get("name", ""), "arguments": args or {}}}
if tc.get("id"):
call["id"] = tc["id"]
new_calls.append(call)
nm["tool_calls"] = new_calls
# 2. Multimodal content list -> native content string + images array.
content = nm.get("content")
if isinstance(content, list):
text_parts: List[str] = []
images: List[str] = list(nm.get("images") or [])
for block in content:
if not isinstance(block, dict):
continue
btype = block.get("type")
if btype == "text":
t = block.get("text")
if t:
text_parts.append(str(t))
elif btype == "image_url":
url = (block.get("image_url") or {}).get("url", "")
if not url:
continue
if url.startswith("data:"):
# Strip the ``data:[...];base64,`` prefix — native
# Ollama wants only the base64 bytes.
_, _, b64 = url.partition(",")
if b64:
images.append(b64)
else:
# Native Ollama images[] is base64-only; it does
# not fetch HTTP URLs. Skip unsupported schemes
# rather than sending a non-base64 string that the
# model silently ignores.
logger.warning(
"Skipping non-data image_url (Ollama images[] "
"requires base64): %s",
url[:80],
)
nm["content"] = "\n".join(text_parts).strip()
if images:
nm["images"] = images
out.append(nm)
return out
# Backward-compatible alias for callers/tests that imported the older name
# (it only handled tool messages originally — issue #4723 broadened scope).
_ollama_normalize_tool_messages = _ollama_normalize_messages
def _build_ollama_payload(
model: str,
messages: List[Dict],
@@ -399,7 +463,7 @@ def _build_ollama_payload(
"""
payload: Dict = {
"model": model,
"messages": _ollama_normalize_tool_messages(messages),
"messages": _ollama_normalize_messages(messages),
"stream": stream,
}
options: Dict = {}
@@ -605,12 +669,16 @@ def _detect_provider(url: str) -> str:
return "groq"
if _host_match(url, "nvidia.com"):
return "nvidia"
if _host_match(url, "moonshot.ai") or _host_match(url, "moonshot.cn"):
return "moonshot"
from src.chatgpt_subscription import is_chatgpt_subscription_base
if is_chatgpt_subscription_base(url):
return "chatgpt-subscription"
from src.copilot import is_copilot_base
if is_copilot_base(url):
return "copilot"
if _host_match(url, "mistral.ai"):
return "mistral"
return "openai"
@@ -709,10 +777,17 @@ def _provider_label(url: str) -> str:
pass
if _is_ollama_native_url(url): return "Ollama"
try:
host = (urlparse(url).hostname or "").lower()
_parsed_local = urlparse(url)
host = (_parsed_local.hostname or "").lower()
port = _parsed_local.port
except Exception:
return "provider"
if host in {"localhost", "127.0.0.1", "::1", "0.0.0.0"}:
# A port alone is not authoritative: vLLM, SGLang, llama.cpp and plain
# OpenAI-compatible servers all routinely share 8000/8080, so naming the
# serving tool from the port here would mislabel real setups. The tool is
# identified by probing llama-server's native /props endpoint during
# discovery (see ModelDiscovery._fingerprint_provider); this stays neutral.
return "local endpoint"
return host or "provider"
@@ -856,6 +931,28 @@ def _restricts_temperature(model: str) -> bool:
m = model.lower()
return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS)
# The official Moonshot API fixes temperature at 1.0 in thinking mode and 0.6
# when thinking is explicitly disabled for Kimi K2.5/K2.6. Any other explicit
# value returns HTTP 400. Odysseus does not currently send the `thinking` mode
# control, so omit temperature and let Moonshot use its default thinking mode.
# Keep the gate provider-specific: self-hosted Kimi deployments may accept
# custom sampling values, and older Moonshot models have different defaults.
def _moonshot_rejects_custom_temperature(provider: str, model: str) -> bool:
"""Check if the official Moonshot API fixes temperature for this model."""
if provider != "moonshot" or not isinstance(model, str):
return False
model_id = model.lower().rsplit("/", 1)[-1]
return bool(re.match(r"^kimi-k2\.(?:5|6)(?:$|[-_:])", model_id))
def _omit_temperature(provider: str, model: str) -> bool:
"""Check if a request should use the provider's default temperature."""
return _restricts_temperature(model) or _moonshot_rejects_custom_temperature(
provider, model
)
# Anthropic removed the sampling parameters (temperature, top_p, top_k) starting
# with Claude Opus 4.7. On Opus 4.7 and later, sending `temperature` at all —
# even 0.0 — returns HTTP 400. Earlier Claude models (Opus 4.6 and below, every
@@ -877,8 +974,18 @@ def _anthropic_rejects_temperature(model: str) -> bool:
return False
return (int(match.group(1)), int(match.group(2))) >= (4, 7)
# Reasoning effort level sent to Mistral thinking-capable models. Mistral's
# API accepts "high", "medium", "low", "none" — see
# https://docs.mistral.ai/capabilities/reasoning/. Override via env var
# ODYSSEUS_MISTRAL_REASONING_EFFORT (e.g. set to "medium" for cheaper chat).
_MISTRAL_REASONING_EFFORT = os.getenv("ODYSSEUS_MISTRAL_REASONING_EFFORT", "high")
# Models that support structured thinking — may output </think> without opening tag
_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma")
_THINKING_MODEL_PATTERNS = (
"qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax",
"m2-reap", "gemma", "stepfun", "step-3", "step3",
"magistral", "mistral-small", "mistral-medium",
)
def _supports_thinking(model: str) -> bool:
"""Check if model supports structured thinking output."""
@@ -887,6 +994,38 @@ def _supports_thinking(model: str) -> bool:
m = model.lower()
return any(p in m for p in _THINKING_MODEL_PATTERNS)
def _normalize_mistral_content(content):
"""Mistral returns content as a structured array when reasoning is on:
[{"type": "thinking", "thinking": [{"type": "text", "text": "..."}], "closed": true},
{"type": "text", "text": "...final answer..."}]
Convert to (text, thinking) tuple of plain strings. Pass through strings
unchanged so non-Mistral OpenAI-compat endpoints are unaffected.
"""
if isinstance(content, str):
return content, ""
if not isinstance(content, list):
return "", ""
text_parts = []
thinking_parts = []
for block in content:
if not isinstance(block, dict):
continue
btype = block.get("type")
if btype == "text":
t = block.get("text", "")
if t:
text_parts.append(t)
elif btype == "thinking":
inner = block.get("thinking", [])
if isinstance(inner, list):
for tb in inner:
if isinstance(tb, dict) and tb.get("text"):
thinking_parts.append(tb["text"])
elif isinstance(inner, str):
thinking_parts.append(inner)
return "".join(text_parts), "".join(thinking_parts)
def _convert_openai_content_to_anthropic(content):
"""Convert OpenAI multimodal content blocks to Anthropic format.
@@ -1321,8 +1460,8 @@ def list_model_ids(
r = httpx.get(root + "/api/tags", timeout=timeout)
r.raise_for_status()
return [m.get("name") or m.get("model") for m in (r.json().get("models") or []) if m.get("name") or m.get("model")]
except Exception:
pass
except Exception as e:
logger.warning("Failed to fetch model list from configured endpoint", exc_info=e)
return []
def normalize_model_id(
@@ -1404,11 +1543,13 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL
"messages": messages_copy,
"temperature": temperature,
}
if _restricts_temperature(model):
if _omit_temperature(provider, model):
payload.pop("temperature", None)
if max_tokens and max_tokens > 0:
tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
payload[tok_key] = max_tokens
if provider == "mistral" and _supports_thinking(model):
payload["reasoning_effort"] = _MISTRAL_REASONING_EFFORT
try:
note_model_activity(target_url, model)
r = httpx_post_kimi_aware(target_url, h, json=payload, timeout=timeout)
@@ -1424,7 +1565,16 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL
response = _parse_ollama_response(data)
else:
msg = data["choices"][0]["message"]
response = msg.get("content") or msg.get("reasoning_content") or ""
content = msg.get("content")
if isinstance(content, list):
# Mistral structured content — extract thinking + text
text_part, thinking_part = _normalize_mistral_content(content)
if thinking_part:
response = thinking_part + "\n\n" + (text_part or "")
else:
response = text_part or msg.get("reasoning_content") or ""
else:
response = content or msg.get("reasoning_content") or ""
_set_cached_response(cache_key, response)
return response
except Exception:
@@ -1598,7 +1748,7 @@ async def llm_call_async(
"messages": messages_copy,
"temperature": temperature,
}
if _restricts_temperature(model):
if _omit_temperature(provider, model):
payload.pop("temperature", None)
if max_tokens and max_tokens > 0:
tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
@@ -1606,6 +1756,8 @@ async def llm_call_async(
# Suppress thinking for qwen3/gemma4 on Ollama /v1 — same as stream_llm.
if _is_ollama_openai_compat_url(url) and _supports_thinking(model):
payload["think"] = False
if provider == "mistral" and _supports_thinking(model):
payload["reasoning_effort"] = _MISTRAL_REASONING_EFFORT
_apply_local_cache_affinity(payload, url, session_id)
if _is_host_dead(target_url):
@@ -1715,7 +1867,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
"temperature": temperature,
"stream": True,
}
if _restricts_temperature(model):
if _omit_temperature(provider, model):
payload.pop("temperature", None)
if provider not in {"openrouter", "groq"}:
payload["stream_options"] = {"include_usage": True}
@@ -1724,6 +1876,12 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
payload[tok_key] = max_tokens
if tools:
payload["tools"] = tools
# Mistral thinking-capable models — send reasoning_effort so Mistral
# activates thinking mode and returns structured reasoning_content.
# Effort level is configurable via ODYSSEUS_MISTRAL_REASONING_EFFORT
# (high / medium / low / none); default "high".
if provider == "mistral" and _supports_thinking(model):
payload["reasoning_effort"] = _MISTRAL_REASONING_EFFORT
# For Ollama's OpenAI-compat /v1 endpoint with thinking models (qwen3,
# gemma4, etc.), suppress thinking so tool calls aren't swallowed inside
# <think> blocks. Ollama /v1 accepts "think": false as a top-level param.
@@ -2102,10 +2260,20 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
# Text content
# Reasoning tokens (VLLM --reasoning-parser, e.g. Qwen3/DeepSeek-R1, Nemotron). vLLM 0.20.2 / NIM emit the field as `reasoning`; older builds use `reasoning_content`. Some OpenAI-compatible Ollama builds use `thinking`.
reasoning = delta.get("reasoning_content") or delta.get("reasoning") or delta.get("thinking") or ""
content = delta.get("content") or ""
# Mistral structured content: content is a list of typed blocks
# ({"type": "thinking", ...}, {"type": "text", ...}). Split into
# reasoning + text so thinking streams into the thinking panel.
if isinstance(content, list):
text_part, thinking_part = _normalize_mistral_content(content)
if thinking_part:
reasoning = (reasoning + thinking_part) if reasoning else thinking_part
content = text_part
if reasoning:
yield _stream_delta_event(reasoning, thinking=True)
content = delta.get("content") or ""
if content:
content = re.sub(r"<mm:think(\s+[^>]*)?>", r"<think\1>", content, flags=re.IGNORECASE)
content = re.sub(r"</mm:think>", "</think>", content, flags=re.IGNORECASE)
stripped = content.lstrip()
# gpt-oss harmony format (<|channel|>analysis/final): route via the harmony
# stream router. Sticky once the first marker appears — distinct from the
+44
View File
@@ -40,15 +40,59 @@ def load_markitdown():
return MarkItDown
def _extract_docx_native(path: str) -> str | None:
"""Pure-Python .docx text extractor — no external deps.
A .docx file is just a zip of XML. The body prose lives in <w:t> runs
inside <w:p> paragraphs. Iterating with ElementTree (rather than
re.findall) keeps paragraph breaks intact and lets the XML parser handle
namespaces + entity unescaping. Loses tables, footnotes, images and
list bullets keeps ~95% of "summarize this doc" content, which is the
case people hit when markitdown isn't installed.
"""
import zipfile
import xml.etree.ElementTree as ET
ns = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}"
try:
with zipfile.ZipFile(path) as z:
xml_bytes = z.read("word/document.xml")
except (zipfile.BadZipFile, KeyError, OSError):
return None
try:
root = ET.fromstring(xml_bytes)
except ET.ParseError:
return None
paragraphs: list[str] = []
for para in root.iter(f"{ns}p"):
runs = [t.text or "" for t in para.iter(f"{ns}t")]
line = "".join(runs).strip()
if line:
paragraphs.append(line)
return "\n\n".join(paragraphs) if paragraphs else None
def convert_to_markdown(path: str) -> str | None:
"""Convert a document to Markdown text via markitdown.
Returns the extracted Markdown, or ``None`` if markitdown is unavailable or
the conversion fails callers degrade gracefully rather than erroring.
Fallback: when markitdown isn't installed and the file is a .docx, run
the bundled pure-Python extractor so the most common case (Word docs)
works out of the box. Other Office/EPUB formats still need markitdown.
"""
try:
markitdown_cls = load_markitdown()
except RuntimeError:
if isinstance(path, str) and path.lower().endswith(".docx"):
text = _extract_docx_native(path)
if text:
logger.info(
"markitdown not installed — used native .docx extractor for %s",
path,
)
return text
logger.warning("markitdown not installed; cannot extract %s", path)
return None
try:
+3 -1
View File
@@ -11,6 +11,8 @@ import os
import re
from typing import Any, Dict, List, Optional, Set, Tuple
from src.runtime_paths import get_app_root
logger = logging.getLogger(__name__)
def _format_mcp_connection_error(name: str, command: str = "", args: Optional[List[str]] = None, error: Exception = None) -> str:
@@ -508,7 +510,7 @@ class McpManager:
return False
script_rel, name = _BUILTIN_SERVERS[server_id]
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
base_dir = get_app_root()
script_path = os.path.join(base_dir, script_rel)
# Clean up old connection
+14 -5
View File
@@ -17,10 +17,11 @@ import httpx
logger = logging.getLogger(__name__)
_LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1", "host.docker.internal"}
_PRIVATE_PREFIXES = ("10.", "172.16.", "172.17.", "172.18.", "172.19.",
"172.20.", "172.21.", "172.22.", "172.23.", "172.24.",
"172.25.", "172.26.", "172.27.", "172.28.", "172.29.",
"172.30.", "172.31.", "192.168.")
_PRIVATE_NETWORKS = (
ipaddress.ip_network("10.0.0.0/8"),
ipaddress.ip_network("172.16.0.0/12"),
ipaddress.ip_network("192.168.0.0/16"),
)
# Tailscale uses the CGNAT range 100.64.0.0/10, NOT all of 100.0.0.0/8.
# A bare "100." prefix would classify public addresses (e.g. AWS ranges
@@ -36,6 +37,14 @@ def _in_tailscale_range(host: str) -> bool:
return False
def _is_private_ip_literal(host: str) -> bool:
try:
ip = ipaddress.ip_address(host)
except ValueError:
return False
return any(ip in network for network in _PRIVATE_NETWORKS)
def _normalize_base_for_compare(url: str) -> str:
url = (url or "").strip().rstrip("/")
for suffix in ("/chat/completions", "/models", "/completions", "/v1/messages"):
@@ -87,7 +96,7 @@ def is_local_endpoint(url: str) -> bool:
return True
try:
host = urlparse(url).hostname or ""
return host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES) or _in_tailscale_range(host)
return host in _LOCAL_HOSTS or _is_private_ip_literal(host) or _in_tailscale_range(host)
except Exception:
return False
+20 -4
View File
@@ -163,6 +163,21 @@ class ModelDiscovery:
return "lmstudio"
except Exception:
pass
# llama.cpp's llama-server exposes a native /props endpoint (no /v1 prefix)
# describing the loaded model, slots, and chat template — distinct from
# LM Studio (/api/v1/models) and vLLM (/version, /metrics).
try:
r = httpx.get(f"http://{host}:{port}/props", timeout=1.5)
if r.is_success:
props = r.json() or {}
if isinstance(props, dict) and (
"default_generation_settings" in props
or "total_slots" in props
or "chat_template" in props
):
return "llamacpp"
except Exception:
pass
return None
def _check_port(self, host: str, port: int) -> Optional[Dict[str, Any]]:
@@ -194,10 +209,11 @@ class ModelDiscovery:
logger.info(f"Scanning {len(hosts)} hosts for models: {hosts}")
# Well-known ports: 8000-8020 (vLLM, llama.cpp, SGLang, Cookbook),
# 1234 (LM Studio), 11434 (Ollama), 11435 for APFEL as its default port is
# occupied by Ollama. The env vars can add more ports which will be merged in.
ports = list(range(8000, 8021)) + [1234, 11434, 11435]
# Well-known ports: 8000-8020 (vLLM, SGLang, Cookbook), 8080 (llama.cpp /
# llama-server default), 1234 (LM Studio), 11434 (Ollama), 11435 for APFEL
# as its default port is occupied by Ollama. The env vars can add more
# ports which will be merged in.
ports = list(range(8000, 8021)) + [8080, 1234, 11434, 11435]
ports += [p for p in sorted(self._extra_ports) if p not in ports]
targets = [(h, p) for h in hosts for p in ports]
+73
View File
@@ -0,0 +1,73 @@
"""Auto-create a Document row from an Office attachment.
When a .docx (and friends) lands in chat, the full extracted text is stored
as a Document so the agent can page through it with `manage_documents
action=read offset=` even after the inline chat payload was capped. Mirrors
the PDF auto-doc pattern in `src.pdf_form_doc`.
"""
import logging
import uuid
from typing import Optional
logger = logging.getLogger(__name__)
def create_office_document(
session_id: str,
upload_id: str,
title: str,
body_text: Optional[str] = None,
) -> Optional[str]:
"""Create a markdown Document for an Office attachment and set it active.
Returns the new doc_id, or None on failure / empty body. The full
extracted body lives in `current_content`, so the agent can fetch
arbitrary windows via `manage_documents action=read` even when the
inline chat copy was truncated.
"""
from src.database import (
SessionLocal,
Document,
DocumentVersion,
Session as DbSession,
)
from src.agent_tools.document_tools import set_active_document
if not body_text or not body_text.strip():
return None
db = SessionLocal()
try:
doc_id = str(uuid.uuid4())
ver_id = str(uuid.uuid4())
sess = db.query(DbSession).filter(DbSession.id == session_id).first()
doc = Document(
id=doc_id,
session_id=session_id,
title=title,
language="markdown",
current_content=body_text,
version_count=1,
is_active=True,
owner=sess.owner if sess else None,
)
ver = DocumentVersion(
id=ver_id,
document_id=doc_id,
version_number=1,
content=body_text,
summary="Imported from Office attachment",
source="upload",
)
db.add(doc)
db.add(ver)
db.commit()
set_active_document(doc_id)
return doc_id
except Exception as e:
db.rollback()
logger.error("Failed to create office document: %s", e)
return None
finally:
db.close()
+41
View File
@@ -322,6 +322,47 @@ class PersonalDocsManager:
else:
logger.info(f"Directory not in index: {directory}")
def rename_directory(self, old_directory: str, new_directory: str, *, path_map: Dict[str, str] = None):
"""Rewrite tracked directory and excluded-file paths after an owner rename."""
old_directory = os.path.abspath(old_directory)
new_directory = os.path.abspath(new_directory)
path_map = {os.path.abspath(k): os.path.abspath(v) for k, v in (path_map or {}).items()}
def rewrite(path: str) -> str:
abs_path = os.path.abspath(path)
mapped = path_map.get(abs_path)
if mapped:
return mapped
if abs_path == old_directory:
return new_directory
if abs_path.startswith(old_directory + os.sep):
return new_directory + abs_path[len(old_directory):]
return abs_path
changed_dirs = False
rewritten_dirs = []
for directory in self.indexed_directories:
rewritten = rewrite(directory)
changed_dirs = changed_dirs or rewritten != os.path.abspath(directory)
if rewritten not in rewritten_dirs:
rewritten_dirs.append(rewritten)
if changed_dirs:
self.indexed_directories = rewritten_dirs
self.save_directories()
changed_excluded = False
rewritten_excluded = set()
for path in self.excluded_files:
rewritten = rewrite(path)
changed_excluded = changed_excluded or rewritten != os.path.abspath(path)
rewritten_excluded.add(rewritten)
if changed_excluded:
self.excluded_files = rewritten_excluded
self._save_excluded()
if changed_dirs or changed_excluded:
self.refresh_index()
def get_indexed_directories(self):
"""Get the list of all indexed directories."""
return self.indexed_directories.copy()
+1
View File
@@ -7,6 +7,7 @@ import time
from pathlib import Path
from src.constants import RAG_DIR
from src.runtime_paths import get_app_root
logger = logging.getLogger(__name__)
+86
View File
@@ -50,6 +50,23 @@ def _generate_doc_id(text: str, owner: str = "") -> str:
return f"doc_{hashlib.sha256(key.encode('utf-8')).hexdigest()[:16]}"
def _rewrite_owner_path(value: str, path_map: Dict[str, str], path_prefixes: List[tuple]) -> str:
if not isinstance(value, str) or not value:
return value
abs_value = os.path.abspath(value)
mapped = path_map.get(abs_value)
if mapped:
return mapped
for old_prefix, new_prefix in path_prefixes:
old_abs = os.path.abspath(old_prefix)
new_abs = os.path.abspath(new_prefix)
if abs_value == old_abs:
return new_abs
if abs_value.startswith(old_abs + os.sep):
return new_abs + abs_value[len(old_abs):]
return value
class VectorRAG:
"""RAG system using ChromaDB vector storage with hybrid search."""
@@ -250,6 +267,75 @@ class VectorRAG:
"failed_count": len(docs) - len(valid),
}
def rename_owner(
self,
old_owner: str,
new_owner: str,
*,
path_map: Optional[Dict[str, str]] = None,
path_prefixes: Optional[List[tuple]] = None,
) -> Dict[str, Any]:
"""Rewrite existing RAG metadata after an auth username rename."""
if not self.healthy:
return {"success": False, "updated_count": 0, "message": "Collection not initialized"}
old_owner = (old_owner or "").strip().lower()
new_owner = (new_owner or "").strip().lower()
if not old_owner or not new_owner or old_owner == new_owner:
return {"success": True, "updated_count": 0, "message": "No owner rename needed"}
path_map = {os.path.abspath(k): os.path.abspath(v) for k, v in (path_map or {}).items()}
path_prefixes = path_prefixes or []
updated_ids = set()
failed_count = 0
for lane_name, collection in self._collections_for_delete():
try:
results = collection.get(
where={"owner": old_owner},
include=["metadatas"],
)
except Exception as e:
logger.warning("rename_owner metadata scan failed in %s lane: %s", lane_name, e)
failed_count += 1
continue
ids = results.get("ids") or []
metadatas = results.get("metadatas") or []
if not ids:
continue
new_metas = []
selected_ids = []
for doc_id, meta in zip(ids, metadatas):
if not isinstance(meta, dict):
continue
next_meta = dict(meta)
if str(next_meta.get("owner", "")).strip().lower() == old_owner:
next_meta["owner"] = new_owner
for key in ("source", "directory"):
next_meta[key] = _rewrite_owner_path(next_meta.get(key), path_map, path_prefixes)
selected_ids.append(doc_id)
new_metas.append(next_meta)
if not selected_ids:
continue
try:
collection.update(ids=selected_ids, metadatas=new_metas)
updated_ids.update(selected_ids)
except Exception as e:
logger.warning("rename_owner metadata update failed in %s lane: %s", lane_name, e)
failed_count += len(selected_ids)
success = failed_count == 0
return {
"success": success,
"updated_count": len(updated_ids),
"failed_count": failed_count,
"message": f"Updated {len(updated_ids)} RAG chunk(s)",
}
# ------------------------------------------------------------------
# Search — hybrid: vector similarity + keyword overlap
# ------------------------------------------------------------------
+78
View File
@@ -0,0 +1,78 @@
"""Server-side mirror of the built-in characters used for reminder synthesis.
The frontend ships these in static/js/presets.js (PROMPT_TEMPLATES with
isCharacter:true). The Reminders AI Synthesis card writes only the
persona ID into settings; the synthesis route in note_routes.py needs
the full prompt text to bias the utility model's voice. Keeping a small
local mirror avoids having the client send the prompt over the wire on
every reminder fire.
If the user picks a custom character (id == "custom") we fall back to
the warm-neutral baseline custom prompts live in browser localStorage
and aren't visible to the server.
"""
PERSONAS = {
"socrates": (
"Never answer directly. Respond only with questions — sharp, layered, "
"Socratic. Expose contradictions. Make the person argue with themselves "
"until the truth falls out. Use irony like a scalpel. Be genuinely "
"curious, never condescending."
),
"razor": (
"Strip everything to the bone. No filler, no hedging, no pleasantries. "
"Answer in the fewest words possible. If one sentence works, don't use "
"two. If a word adds nothing, cut it. Blunt, precise, surgical."
),
"nietzsche": (
"Think and respond through the lens of Nietzsche. Analyze every "
"question in terms of will to power, self-overcoming, eternal "
"recurrence, ressentiment, value-creation, and master-slave morality. "
"Write with aphoristic force — sharp, compressed, vivid, and "
"unapologetic — but do not sacrifice depth for style. Favor "
"life-affirmation, discipline, courage, style, rank, self-overcoming, "
"and amor fati over nihilism, conformity, ressentiment, and self-pity."
),
"spark": (
"You are Spark, a playful, quick-witted assistant with bright energy "
"and practical instincts. Keep responses concise, vivid, and helpful. "
"Be warm without being cloying, imaginative without losing the thread, "
"and always center the user's actual goal. Use a light, lively voice "
"with occasional clever turns of phrase."
),
"odysseus": (
"You are Odysseus, king of Ithaca — subtle in counsel, disciplined in "
"judgment, and unmatched in strategic cunning. Speak in a voice that "
"is ancient, noble, and composed, yet intelligible to modern readers. "
"Be eloquent but not flowery. Be wise but not vague. Speak as one who "
"has weathered storms and taken back his house by wit, timing, and "
"resolve."
),
}
_DEFAULT_SYNTHESIS_TONE = (
"You write short, warm, one-line reminders. The user has set a note for "
"themselves and the moment to remember has arrived. Keep it under 18 "
"words. Be human, gentle, and direct — never robotic."
)
def synthesis_system_prompt(persona_id: str) -> str:
"""Return the system prompt for reminder synthesis given a persona id.
Falls back to the warm-neutral baseline when the id is empty, unknown,
or refers to a custom (client-only) character we don't have on file.
"""
persona = (persona_id or "").strip().lower()
persona_prompt = PERSONAS.get(persona)
if persona_prompt:
# Persona drives the voice; the synthesis-instruction stays attached
# so the model knows it's writing a short reminder, not a chat reply.
return (
persona_prompt
+ "\n\n"
+ "You are now writing a single one-line reminder for the user. "
"Keep it under 18 words and in the voice above."
)
return _DEFAULT_SYNTHESIS_TONE
+30
View File
@@ -0,0 +1,30 @@
"""Helpers for resolving runtime paths in source and frozen builds."""
import os
import sys
def get_app_root() -> str:
"""Return the app root directory.
In normal source runs, this is the repository root. In a frozen Windows
build, it is the bundle content root (PyInstaller's internal directory)
so bundled runtime folders like `static/`, `scripts/`, and `data/` stay
together with the executable payload.
"""
if getattr(sys, "frozen", False):
return getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(sys.executable)))
return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
def get_default_data_dir() -> str:
"""Return the default path to the data directory.
In normal runs, this is a 'data' subdirectory under the app root.
In frozen builds, it is a persistent user directory (~/.odysseus/data)
to prevent SQLite databases and other persistent files from being
written to the ephemeral, temporary extraction bundle directory.
"""
if getattr(sys, "frozen", False):
return os.path.join(os.path.expanduser("~"), ".odysseus", "data")
return os.path.join(get_app_root(), "data")
+14 -1
View File
@@ -29,7 +29,15 @@ def _invalidate_caches():
# ── Default values ──
DEFAULT_SETTINGS = {
"image_gen_enabled": True,
# Agent email safety: when True, the MCP send_email / reply_to_email
# tools don't SMTP directly. They stage the composed message into the
# scheduled_emails table with status='agent_draft' and return a
# pending_id + the rendered email so the user can review and approve
# (or cancel) before it actually goes out. Default ON because models
# have been observed inventing signatures and sending to real
# recipients without confirmation.
"agent_email_confirm": True,
"image_gen_enabled": False,
"image_model": "",
"image_quality": "medium",
"vision_model": "",
@@ -133,6 +141,10 @@ DEFAULT_SETTINGS = {
# before producing output (endpoint offline / errors), the chat
# dispatch retries the next entry in order.
"default_model_fallbacks": [],
# When True, non-admin users inherit global default model/endpoint/fallbacks
# when they have no personal defaults. When False, users only use their
# personal defaults (no global fallback). Default is False.
"share_defaults_with_users": False,
"utility_endpoint_id": "",
"utility_model": "",
# Ordered fallback chain for the Utility model (summarization, naming,
@@ -151,6 +163,7 @@ DEFAULT_SETTINGS = {
# Reminders
"reminder_channel": "browser", # "browser" | "email" | "ntfy" | "webhook"
"reminder_llm_synthesis": False,
"reminder_llm_persona": "",
"reminder_ntfy_topic": "Reminders",
"reminder_email_to": "",
# Generic outbound webhook channel: pick any saved Integration as the
+64 -2
View File
@@ -1,6 +1,11 @@
"""Shared resolver for background-task AI endpoint (auto-naming, memory, sorting)."""
"""Shared resolver for background-task AI endpoints."""
from src.endpoint_resolver import resolve_endpoint
from src.endpoint_resolver import (
resolve_chat_fallback_candidates,
resolve_endpoint,
resolve_utility_fallback_candidates,
)
from src.llm_core import llm_call_async_with_fallback
def resolve_task_endpoint(fallback_url=None, fallback_model=None, fallback_headers=None, owner=None):
@@ -11,3 +16,60 @@ def resolve_task_endpoint(fallback_url=None, fallback_model=None, fallback_heade
endpoint cannot be resolved.
"""
return resolve_endpoint("task", fallback_url, fallback_model, fallback_headers, owner=owner)
def resolve_task_candidates(
fallback_url=None,
fallback_model=None,
fallback_headers=None,
owner=None,
):
"""Return ordered background-task LLM candidates.
Order:
1. configured Background Tasks endpoint/model, or caller fallback
2. Utility endpoint/model
3. Default endpoint/model
4. Utility fallback chain
5. Default fallback chain
"""
candidates = []
def _append(url, model, headers):
if not url or not model:
return
key = (url, model)
if any((u, m) == key for u, m, _ in candidates):
return
candidates.append((url, model, headers or {}))
_append(*resolve_task_endpoint(fallback_url, fallback_model, fallback_headers, owner=owner))
_append(*resolve_endpoint("utility", owner=owner))
_append(*resolve_endpoint("default", owner=owner))
for url, model, headers in resolve_utility_fallback_candidates(owner=owner):
_append(url, model, headers)
for url, model, headers in resolve_chat_fallback_candidates(owner=owner):
_append(url, model, headers)
return candidates
async def task_llm_call_async(
messages,
*,
fallback_url=None,
fallback_model=None,
fallback_headers=None,
owner=None,
**kwargs,
):
"""Call the shared background-task LLM candidate chain."""
candidates = resolve_task_candidates(
fallback_url=fallback_url,
fallback_model=fallback_model,
fallback_headers=fallback_headers,
owner=owner,
)
if not candidates:
raise RuntimeError("No LLM endpoint available for background task")
return await llm_call_async_with_fallback(candidates, messages=messages, **kwargs)
+123 -29
View File
@@ -9,6 +9,8 @@ import uuid
from datetime import datetime, timedelta, timezone
from typing import Any, Awaitable, Callable, Dict, Tuple
from core.auth import RESERVED_USERNAMES
logger = logging.getLogger(__name__)
@@ -17,6 +19,34 @@ def _utcnow() -> datetime:
return datetime.now(timezone.utc).replace(tzinfo=None)
# Shell/file tools a scheduled task's agent should be offered by default,
# mirroring the chat agent (where these are on unless a privilege or global
# setting turns them off). The RAG tool selector + ASSISTANT_ALWAYS_AVAILABLE
# never include bash/python, so on a host with an empty/degraded tool-embedding
# index a task could not run shell or Python even for an admin owner. Offering
# them here is safe: stream_agent_loop's blocked_tools_for_owner() still strips
# this whole group for non-admin multi-user owners, and only admits it for
# admins and single-user (AUTH_ENABLED=false) deployments.
TASK_DEFAULT_SHELL_TOOLS = frozenset({
"bash", "python", "read_file", "write_file", "edit_file",
"grep", "glob", "ls", "get_workspace",
})
def compose_task_relevant_tools(rag_tools, assistant_always, disabled_tools):
"""Compose the relevant-tools set offered to a scheduled task's agent.
Unions the RAG-retrieved tools, the assistant's always-available set, and
the default shell/file group, then removes anything the task's crew
explicitly disabled via its `enabled_tools` allowlist. Per-owner admin
gating is applied later by stream_agent_loop (blocked_tools_for_owner).
"""
tools = set(rag_tools) | set(assistant_always) | set(TASK_DEFAULT_SHELL_TOOLS)
if disabled_tools:
tools -= set(disabled_tools)
return tools
# ── Shared TTL cache (singleflight) ────────────────────────────────────────
# Multiple scheduled tasks firing in the same minute often need the same
# external data (Miniflux unreads, MCP tool snapshots, etc.). This cache
@@ -236,6 +266,29 @@ def _digest_windows(now):
]
def _checkin_calendar_events(db, owner, start, end):
"""Calendar events in [start, end] for ONE owner, for the check-in digest.
Ownership lives on CalendarCal.owner; events inherit it via calendar_id.
The digest query had no owner scope, so it pulled EVERY user's events into
one user's check-in (a cross-tenant leak of summaries/locations). Scope it
by joining CalendarCal, mirroring routes/calendar_routes.list_events.
"""
from core.database import CalendarEvent as _CE, CalendarCal as _CC
return (
db.query(_CE)
.join(_CC, _CE.calendar_id == _CC.id)
.filter(
_CC.owner == owner,
_CE.dtstart >= start,
_CE.dtstart <= end,
_CE.status != "cancelled",
)
.order_by(_CE.dtstart)
.all()
)
class TaskScheduler:
def __init__(self, session_manager):
self._session_manager = session_manager
@@ -833,6 +886,14 @@ class TaskScheduler:
owner=task.owner,
body=run.result if output == "notification" else None,
)
elif run.status == "error":
self.add_notification(
task.name,
"error",
task_id,
owner=task.owner,
body=run.error or run.result,
)
# Log result to the assistant chat so all task activity is visible.
# Skip skipped/error rows — user shouldn't see "skipped: …" noise
@@ -1127,11 +1188,7 @@ class TaskScheduler:
# Strip timezone for naive DB comparison
_s = start.replace(tzinfo=None) if start.tzinfo else start
_e = end.replace(tzinfo=None) if end.tzinfo else end
evs = _db.query(_CE).filter(
_CE.dtstart >= _s,
_CE.dtstart <= _e,
_CE.status != "cancelled",
).order_by(_CE.dtstart).all()
evs = _checkin_calendar_events(_db, task.owner, _s, _e)
if not evs:
continue
# Group by importance for richer output
@@ -1338,11 +1395,24 @@ class TaskScheduler:
return await self._execute_checkin(task, crew, db, session_id, endpoint_url, model)
# Build system prompt: crew member persona overrides the default.
# Built-in character_id (Socrates, Razor, etc.) further biases the
# voice — it prepends to whichever base prompt we landed on so the
# task still knows it's executing a scheduled task but in that
# character's tone.
system_prompt = (
(crew.personality or "").strip()
if crew and crew.personality
else "You are a helpful assistant executing a scheduled task. Use available tools to complete the task thoroughly."
)
char_id = (getattr(task, "character_id", None) or "").strip()
if char_id:
try:
from src.reminder_personas import PERSONAS as _PERSONAS
char_prompt = _PERSONAS.get(char_id.lower())
if char_prompt:
system_prompt = f"{char_prompt}\n\n{system_prompt}"
except Exception:
pass
# Inject current time so the model knows what's past vs upcoming
tz_name = _resolve_task_timezone(db, task)
try:
@@ -1357,17 +1427,30 @@ class TaskScheduler:
time_str = _utcnow().strftime("%A, %B %d %Y, %H:%M UTC")
system_prompt = f"Current time: {time_str}\n\n{system_prompt}"
# Compute tool filter from CrewMember.enabled_tools if set
disabled_tools = None
# Compute the disabled-tools set: the crew's enabled_tools allowlist
# (inverted) plus the operator's global disabled_tools setting. The
# global list must be merged here — chat does the same merge before
# entering the agent loop (routes/chat_routes.py) — otherwise an admin
# or AUTH_ENABLED=false scheduled task would still see and call shell/
# file tools after the operator disabled them globally, because the
# prompt/schema/execution gates only enforce what is passed in.
disabled_tools: set[str] = set()
if crew and crew.enabled_tools:
try:
enabled = json.loads(crew.enabled_tools)
if isinstance(enabled, list) and enabled:
from src.tool_index import BUILTIN_TOOL_DESCRIPTIONS
all_tools = set(BUILTIN_TOOL_DESCRIPTIONS.keys())
disabled_tools = all_tools - set(enabled)
disabled_tools |= all_tools - set(enabled)
except Exception:
pass
try:
from src.settings import get_setting
_global_disabled = get_setting("disabled_tools", [])
if isinstance(_global_disabled, list):
disabled_tools.update(_global_disabled)
except Exception:
pass
# RAG-select relevant tools for this prompt + always-available assistant tools.
# Without this, all 40+ tools get sent and models hit their tool limit.
@@ -1377,10 +1460,10 @@ class TaskScheduler:
tool_idx = get_tool_index()
if tool_idx:
rag_tools = tool_idx.get_tools_for_query(task.prompt or "", k=8)
relevant_tools = (rag_tools | ASSISTANT_ALWAYS_AVAILABLE)
if disabled_tools:
relevant_tools -= disabled_tools
logger.info(f"[assistant] RAG selected {len(rag_tools)} tools + {len(ASSISTANT_ALWAYS_AVAILABLE)} always-available = {len(relevant_tools)} total for '{task.name}'")
relevant_tools = compose_task_relevant_tools(
rag_tools, ASSISTANT_ALWAYS_AVAILABLE, disabled_tools
)
logger.info(f"[assistant] RAG selected {len(rag_tools)} tools + {len(ASSISTANT_ALWAYS_AVAILABLE)} always-available + shell/file defaults = {len(relevant_tools)} total for '{task.name}'")
except Exception as e:
logger.warning(f"[assistant] RAG tool selection failed, using all: {e}")
@@ -1388,17 +1471,23 @@ class TaskScheduler:
try:
result = await self._run_agent_loop(
endpoint_url, model, task, session_id,
system_prompt=system_prompt, disabled_tools=disabled_tools,
system_prompt=system_prompt, disabled_tools=disabled_tools or None,
relevant_tools=relevant_tools,
)
except Exception as e:
logger.warning(f"Agent loop failed for task '{task.name}', falling back to simple call: {e}")
from src.llm_core import llm_call_async
from src.task_endpoint import task_llm_call_async
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": task.prompt},
]
result = await llm_call_async(url=endpoint_url, model=model, messages=messages, timeout=120)
result = await task_llm_call_async(
messages,
fallback_url=endpoint_url,
fallback_model=model,
owner=task.owner,
timeout=120,
)
# Strip the model's chain-of-thought before saving/delivering. Task
# output is LLM-only, so prose=True (which also removes untagged
@@ -1578,7 +1667,7 @@ class TaskScheduler:
msg["X-Odysseus-Ref"] = str(task.id)
msg.set_content(result or "")
_send_smtp_message(cfg, from_addr, [to_addr], msg.as_string(), timeout=30)
logger.info("Task %s emailed result to %s (%sb)", task.id, to_addr, len(result or ""))
logger.info("Task %s emailed result (recipient_set=%s, %sb)", task.id, bool(to_addr), len(result or ""))
except Exception as e:
logger.error("Task %s email delivery failed: %s", task.id, e, exc_info=True)
raise
@@ -1623,13 +1712,17 @@ class TaskScheduler:
# Honor per-task max_steps (defense against runaway agent loops).
# Falls back to 20 if not set — the historical default.
_task_max_rounds = task.max_steps if task.max_steps and task.max_steps > 0 else 20
# Tasks are background workloads they share the Utility model's
# fallback chain (Settings → Utility Model → Fallbacks). A downed
# primary endpoint won't silently yield `(no output)` — same recipe
# chat uses but with the utility list (`utility_model_fallbacks`).
# Tasks are background workloads: use the shared task fallback chain
# behind the primary endpoint so a downed primary won't silently yield
# `(no output)`.
try:
from src.endpoint_resolver import resolve_utility_fallback_candidates
_task_fallbacks = resolve_utility_fallback_candidates(owner=task.owner or None)
from src.task_endpoint import resolve_task_candidates
_task_fallbacks = resolve_task_candidates(
fallback_url=endpoint_url,
fallback_model=model,
fallback_headers=headers,
owner=task.owner or None,
)[1:]
except Exception:
_task_fallbacks = []
async for event_str in stream_agent_loop(
@@ -1666,21 +1759,22 @@ class TaskScheduler:
# asking it to summarize what it did. Guarantees output.
if not full_text.strip():
try:
from src.llm_core import llm_call_async_with_fallback
from src.endpoint_resolver import resolve_utility_fallback_candidates
from src.task_endpoint import task_llm_call_async
grace_context = "You ran out of steps. "
if tool_results:
grace_context += "Here's what your tools returned:\n" + "\n".join(tool_results[-5:])
else:
grace_context += "No tool results were captured."
grace_context += "\n\nSummarize what you accomplished and what's still pending. Be concise."
_grace_candidates = [(endpoint_url, model, headers)] + resolve_utility_fallback_candidates(owner=task.owner or None)
full_text = await llm_call_async_with_fallback(
_grace_candidates,
full_text = await task_llm_call_async(
messages=[
{"role": "system", "content": system_content},
{"role": "user", "content": grace_context},
],
fallback_url=endpoint_url,
fallback_model=model,
fallback_headers=headers,
owner=task.owner or None,
timeout=30,
)
full_text = (full_text or "").strip()
@@ -1935,7 +2029,7 @@ class TaskScheduler:
# silent SMTP failure is easier to spot in the logs.
logger.info(
f"Task {task.id} delivered via MCP tool {tool_name} "
f"(to={recipient or '<unset>'}, body={body_len}b, reply={stdout[:200]!r})"
f"(recipient_set={bool(recipient)}, body={body_len}b, reply={stdout[:200]!r})"
)
except Exception as e:
logger.error(f"Task {task.id} MCP delivery failed: {e}")
@@ -2189,7 +2283,7 @@ class TaskScheduler:
# check-ins seeded, which then double-fire alongside the human user's
# check-ins. This was the root cause of the duplicate 'Morning check-in'
# rows we had to manually clean up.
if not owner or owner in {"internal-tool", "api", "demo", "system"}:
if not owner or owner in RESERVED_USERNAMES:
logger.info(f"ensure_assistant_defaults: skip synthetic owner {owner!r}")
return
from core.database import SessionLocal, CrewMember, ScheduledTask
+64 -29
View File
@@ -323,6 +323,24 @@ _MCP_TOOL_MAP = {
"web_fetch": ("web_fetch", "web_fetch"),
"generate_image": ("image_gen", "generate_image"),
}
_EMAIL_MCP_OWNER_ARG = "_odysseus_owner"
def _parse_qualified_mcp_args(tool: str, content: str) -> tuple[Dict, Optional[str]]:
raw = (content or "").strip()
if not raw:
return {}, None
try:
parsed = json.loads(raw)
except (json.JSONDecodeError, TypeError):
if tool.startswith("mcp__email__"):
return {}, "Email MCP tool arguments must be a JSON object."
return {}, None
if not isinstance(parsed, dict):
if tool.startswith("mcp__email__"):
return {}, "Email MCP tool arguments must be a JSON object."
return {}, None
return parsed, None
def _parse_generate_image(content: str) -> Dict:
@@ -453,6 +471,8 @@ async def _direct_fallback(
tool: str,
content: str,
progress_cb: Optional[Callable[[Dict], Awaitable[None]]] = None,
session_id: Optional[str] = None,
owner: Optional[str] = None,
) -> Optional[Dict]:
_subproc_env = {
**os.environ,
@@ -466,6 +486,8 @@ async def _direct_fallback(
ctx = {
"progress_cb": progress_cb,
"subproc_env": _subproc_env,
"session_id": session_id,
"owner": owner,
}
from src.agent_tools import TOOL_HANDLERS
@@ -541,9 +563,7 @@ async def _execute_tool_block_impl(
"""
from src.tool_implementations import (
do_search_chats, do_manage_tasks,
do_manage_skills, do_api_call, do_manage_endpoints,
do_manage_mcp, do_manage_webhooks, do_manage_tokens,
do_manage_settings, do_manage_notes,
do_manage_skills, do_api_call, do_manage_notes,
do_manage_calendar,
do_download_model, do_serve_model, do_list_served_models, do_stop_served_model,
do_tail_serve_output,
@@ -713,10 +733,13 @@ async def _execute_tool_block_impl(
desc = f"bash (background): {short}"
result = {
"output": (
f"Started background job `{rec['id']}`. It is running detached "
f"Started background job `{rec['id']}`. It is running detached; "
f"do NOT wait for it or poll it. You will be automatically re-invoked "
f"with its full output when it finishes. Continue with other work, or "
f"end your turn now and resume when the result arrives."
f"end your turn now and resume when the result arrives. If the user "
f"later asks to check progress or stop it, call the manage_bg_jobs "
f"tool yourself (output or kill); do not tell them to run a tool "
f"command, and do not surface raw tool syntax in your reply."
),
"exit_code": 0,
"bg_job_id": rec["id"],
@@ -737,6 +760,11 @@ async def _execute_tool_block_impl(
desc = f"{tool}: {first_line}"
result = await _direct_fallback(tool, content, progress_cb=progress_cb) \
or {"error": f"{tool}: execution failed", "exit_code": 1}
elif tool == "manage_bg_jobs":
# Inspect/kill detached `bash` jobs; needs session_id to scope to chat.
desc = f"manage_bg_jobs: {content.split(chr(10))[0][:80]}"
result = await _direct_fallback(tool, content, session_id=session_id, owner=owner) \
or {"error": "manage_bg_jobs: execution failed", "exit_code": 1}
elif tool in ("create_document", "update_document", "edit_document",
"suggest_document", "manage_documents"):
desc = f"{tool}: {content.split(chr(10))[0][:80]}"
@@ -748,10 +776,24 @@ async def _execute_tool_block_impl(
query = content.split("\n")[0].strip()
desc = f"search_chats: {query[:80]}"
result = await do_search_chats(query, owner=owner)
elif tool in ("chat_with_model", "create_session", "list_sessions",
"send_to_session", "pipeline",
"manage_session", "manage_memory", "list_models",
"ui_control", "ask_teacher"):
elif tool in ("chat_with_model", "ask_teacher", "list_models"):
# Migrated to the agent_tools registry (#3629): dispatched through
# TOOL_HANDLERS with the owner/session ctx these tools need, instead
# of the legacy dispatch_ai_tool elif. The impls live in
# src/agent_tools/model_interaction_tools.py.
first_line = content.split(chr(10))[0].strip()[:60]
desc = f"{tool}: {first_line}" if first_line else tool
result = await _document_tool_dispatch(tool, content, session_id, owner) \
or {"error": f"{tool}: execution failed", "exit_code": 1}
elif tool in ("create_session", "list_sessions", "send_to_session", "manage_session"):
# Migrated to the agent_tools registry (#3629): dispatched through
# TOOL_HANDLERS with the owner/session ctx these tools need. The impls
# live in src/agent_tools/session_tools.py.
first_line = content.split(chr(10))[0].strip()[:60]
desc = f"{tool}: {first_line}" if first_line else tool
result = await _document_tool_dispatch(tool, content, session_id, owner) \
or {"error": f"{tool}: execution failed", "exit_code": 1}
elif tool in ("pipeline", "manage_memory", "ui_control"):
from src.ai_interaction import dispatch_ai_tool
desc, result = await dispatch_ai_tool(tool, content, session_id, owner=owner)
elif tool == "manage_tasks":
@@ -764,21 +806,11 @@ async def _execute_tool_block_impl(
first_line = content.split("\n")[0].strip()[:60]
desc = f"api_call: {first_line}"
result = await do_api_call(content)
elif tool == "manage_endpoints":
desc = "manage_endpoints"
result = await do_manage_endpoints(content, owner=owner)
elif tool == "manage_mcp":
desc = "manage_mcp"
result = await do_manage_mcp(content, owner=owner)
elif tool == "manage_webhooks":
desc = "manage_webhooks"
result = await do_manage_webhooks(content, owner=owner)
elif tool == "manage_tokens":
desc = "manage_tokens"
result = await do_manage_tokens(content, owner=owner)
elif tool == "manage_settings":
desc = "manage_settings"
result = await do_manage_settings(content, owner=owner)
elif tool in ("manage_endpoints", "manage_mcp", "manage_webhooks", "manage_tokens", "manage_settings"):
# Registry-dispatched (agent_tools.admin_tools); owner threaded for ownership/admin checks.
desc = tool
result = await _direct_fallback(tool, content, owner=owner) \
or {"error": f"{tool}: execution failed", "exit_code": 1}
elif tool == "manage_notes":
desc = "manage_notes"
result = await do_manage_notes(content, owner=owner)
@@ -858,12 +890,15 @@ async def _execute_tool_block_impl(
# MCP tool dispatch
mcp = get_mcp_manager()
if mcp:
try:
args = json.loads(content) if content.strip().startswith("{") else {}
except (json.JSONDecodeError, TypeError):
args = {}
desc = f"mcp: {tool}"
result = await mcp.call_tool(tool, args)
args, parse_error = _parse_qualified_mcp_args(tool, content)
if parse_error:
result = {"error": parse_error, "exit_code": 1}
else:
if tool.startswith("mcp__email__") and owner:
args = dict(args)
args[_EMAIL_MCP_OWNER_ARG] = owner
result = await mcp.call_tool(tool, args)
else:
desc = f"mcp: {tool}"
result = {"error": "MCP manager not available", "exit_code": 1}
+107 -659
View File
@@ -12,48 +12,62 @@ import os
import re
from typing import Any, Dict, List, Optional
from fastapi import HTTPException
from src.constants import MAX_READ_CHARS, DEEP_RESEARCH_DIR, VAULT_FILE
from src.tool_utils import get_mcp_manager
from src.tool_utils import get_mcp_manager, _parse_tool_args
from core.constants import internal_api_base
from routes._validators import validate_remote_host, validate_ssh_port
logger = logging.getLogger(__name__)
def _string_arg(value: Any) -> str:
return "" if value is None else str(value).strip()
def _validate_cookbook_ssh_target(remote_host: Any, ssh_port: Any = "") -> tuple[str, str]:
remote = validate_remote_host(_string_arg(remote_host) or None) or ""
sport = validate_ssh_port(_string_arg(ssh_port) or None) or ""
return remote, sport
# ---------------------------------------------------------------------------
# Active email state
# ---------------------------------------------------------------------------
# When the user has an email reader window open, the frontend tells the
# backend about it on each chat submit. Email tools can resolve "this email"
# without guessing a UID. Cleared between requests by chat_routes.
_active_email_ref: Optional[Dict[str, str]] = None
def set_active_email(uid: Optional[str], folder: Optional[str] = None, account: Optional[str] = None,
subject: Optional[str] = None, sender: Optional[str] = None) -> None:
"""Stash the email currently open in the UI. None clears it."""
global _active_email_ref
if not uid:
_active_email_ref = None
return
_active_email_ref = {
"uid": str(uid),
"folder": str(folder or "INBOX"),
"account": str(account or ""),
"subject": str(subject or ""),
"from": str(sender or ""),
}
def get_active_email() -> Optional[Dict[str, str]]:
return _active_email_ref
def clear_active_email() -> None:
global _active_email_ref
_active_email_ref = None
# ---------------------------------------------------------------------------
# Argument parsing
# ---------------------------------------------------------------------------
def _parse_tool_args(content):
"""Parse a tool-call argument blob.
Accepts either a JSON string or an already-decoded dict. Unwraps the
common `{"body": {...}}` envelope that smaller models emit when they
read tool descriptions like "Body is JSON: {...}" literally they
pass `body` as a field name rather than treating it as a noun.
Returns a dict on success, raises ValueError on bad JSON.
"""
if isinstance(content, str):
try:
args = json.loads(content) if content.strip() else {}
except (json.JSONDecodeError, TypeError) as e:
raise ValueError(str(e))
elif isinstance(content, dict):
args = content
else:
args = {}
# Unwrap {"body": {...}} envelope — but only if `body` is the sole key
# and points at a dict. We don't want to clobber a legitimate `body`
# field on tools where it's a real arg (e.g. send_email body text).
if (
isinstance(args, dict)
and len(args) == 1
and "body" in args
and isinstance(args["body"], dict)
and "action" in args["body"] # extra safety: only unwrap if the inner dict looks like a tool call
):
args = args["body"]
return args
# ---------------------------------------------------------------------------
# Search chats
# ---------------------------------------------------------------------------
@@ -542,620 +556,6 @@ async def do_manage_tasks(content: str, owner: Optional[str] = None) -> Dict:
db.close()
# ---------------------------------------------------------------------------
# Endpoint management tool
# ---------------------------------------------------------------------------
async def do_manage_endpoints(content: str, owner: Optional[str] = None) -> Dict:
"""Manage model endpoints: list, add, delete, enable, disable."""
from core.database import SessionLocal, ModelEndpoint
try:
args = _parse_tool_args(content)
except ValueError:
return {"error": "Invalid JSON arguments", "exit_code": 1}
action = args.get("action", "list")
db = SessionLocal()
try:
if action == "list":
eps = db.query(ModelEndpoint).all()
items = [{"id": e.id, "name": e.name, "base_url": e.base_url,
"is_enabled": e.is_enabled} for e in eps]
return {"response": f"{len(items)} endpoints", "endpoints": items, "exit_code": 0}
elif action == "add":
import uuid as _uuid
name = args.get("name", "")
base_url = args.get("base_url", "")
api_key = args.get("api_key", "")
if not base_url:
return {"error": "base_url is required", "exit_code": 1}
eid = str(_uuid.uuid4())[:8]
from datetime import datetime
ep = ModelEndpoint(id=eid, name=name or base_url, base_url=base_url,
api_key=api_key, is_enabled=True,
created_at=datetime.utcnow(), updated_at=datetime.utcnow())
db.add(ep)
db.commit()
return {"response": f"Added endpoint '{name or base_url}' (id: {eid})", "exit_code": 0}
elif action == "delete":
eid = args.get("endpoint_id", "")
ep = db.query(ModelEndpoint).filter(ModelEndpoint.id == eid).first()
if not ep:
return {"error": f"Endpoint {eid} not found", "exit_code": 1}
name = ep.name
db.delete(ep)
db.commit()
return {"response": f"Deleted endpoint '{name}'", "exit_code": 0}
elif action in ("enable", "disable"):
eid = args.get("endpoint_id", "")
ep = db.query(ModelEndpoint).filter(ModelEndpoint.id == eid).first()
if not ep:
return {"error": f"Endpoint {eid} not found", "exit_code": 1}
ep.is_enabled = (action == "enable")
db.commit()
return {"response": f"Endpoint '{ep.name}' {action}d", "exit_code": 0}
else:
return {"error": f"Unknown action: {action}", "exit_code": 1}
except Exception as e:
logger.error(f"manage_endpoints error: {e}")
return {"error": str(e), "exit_code": 1}
finally:
db.close()
# ---------------------------------------------------------------------------
# MCP server management tool
# ---------------------------------------------------------------------------
async def do_manage_mcp(content: str, owner: Optional[str] = None) -> Dict:
"""Manage MCP servers: list, add, delete, enable, disable, reconnect."""
try:
args = _parse_tool_args(content)
except ValueError:
return {"error": "Invalid JSON arguments", "exit_code": 1}
action = args.get("action", "list")
if action == "list":
mcp = get_mcp_manager()
if not mcp:
return {"response": "No MCP manager available", "servers": [], "exit_code": 0}
from core.database import SessionLocal, McpServer
db = SessionLocal()
try:
servers = db.query(McpServer).all()
items = []
for s in servers:
st = mcp.get_server_status(s.id)
status = st.get("status", "disconnected")
tool_count = st.get("tool_count", 0)
items.append({"id": s.id, "name": s.name, "transport": s.transport,
"is_enabled": s.is_enabled, "status": status,
"tool_count": tool_count})
return {"response": f"{len(items)} MCP servers", "servers": items, "exit_code": 0}
finally:
db.close()
elif action == "add":
from core.database import SessionLocal, McpServer
import uuid as _uuid
from datetime import datetime
name = args.get("name", "")
command = args.get("command", "")
cmd_args = args.get("args", [])
env = args.get("env", {})
if not name or not command:
return {"error": "name and command are required", "exit_code": 1}
sid = str(_uuid.uuid4())[:8]
db = SessionLocal()
try:
srv = McpServer(id=sid, name=name, transport="stdio", command=command,
args=json.dumps(cmd_args) if isinstance(cmd_args, list) else cmd_args,
env=json.dumps(env) if isinstance(env, dict) else env,
is_enabled=True, created_at=datetime.utcnow(), updated_at=datetime.utcnow())
db.add(srv)
db.commit()
finally:
db.close()
# Try to connect
mcp = get_mcp_manager()
tool_count = 0
if mcp:
try:
await mcp.connect_server(
sid, name, "stdio", command=command,
args=cmd_args if isinstance(cmd_args, list) else json.loads(cmd_args),
env=env if isinstance(env, dict) else json.loads(env),
)
st = mcp.get_server_status(sid)
tool_count = st.get("tool_count", 0)
except Exception as e:
logger.warning(f"MCP connect failed for {name}: {e}")
return {"response": f"Added MCP server '{name}' ({tool_count} tools)", "exit_code": 0}
elif action == "delete":
sid = args.get("server_id", "")
from core.database import SessionLocal, McpServer
db = SessionLocal()
try:
srv = db.query(McpServer).filter(McpServer.id == sid).first()
if not srv:
return {"error": f"Server {sid} not found", "exit_code": 1}
name = srv.name
mcp = get_mcp_manager()
if mcp:
try:
await mcp.disconnect_server(sid)
except Exception:
pass
db.delete(srv)
db.commit()
return {"response": f"Deleted MCP server '{name}'", "exit_code": 0}
finally:
db.close()
elif action == "reconnect":
sid = args.get("server_id", "")
mcp = get_mcp_manager()
if not mcp:
return {"error": "MCP manager not available", "exit_code": 1}
try:
await mcp.disconnect_server(sid)
from core.database import SessionLocal, McpServer
db2 = SessionLocal()
try:
srv = db2.query(McpServer).filter(McpServer.id == sid).first()
if srv:
_args = json.loads(srv.args) if srv.args else []
_env = json.loads(srv.env) if srv.env else {}
await mcp.connect_server(
server_id=sid,
name=srv.name,
transport=srv.transport,
command=srv.command,
args=_args,
env=_env,
url=srv.url,
)
st = mcp.get_server_status(sid)
return {"response": f"Reconnected '{srv.name}' ({st.get('tool_count', 0)} tools)", "exit_code": 0}
return {"error": f"Server {sid} not found", "exit_code": 1}
finally:
db2.close()
except Exception as e:
return {"error": str(e), "exit_code": 1}
elif action in ("enable", "disable"):
sid = args.get("server_id", "")
from core.database import SessionLocal, McpServer
db = SessionLocal()
try:
srv = db.query(McpServer).filter(McpServer.id == sid).first()
if not srv:
return {"error": f"Server {sid} not found", "exit_code": 1}
srv.is_enabled = (action == "enable")
db.commit()
return {"response": f"MCP server '{srv.name}' {action}d", "exit_code": 0}
finally:
db.close()
elif action == "list_tools":
mcp = get_mcp_manager()
if not mcp:
return {"response": "No MCP manager", "tools": [], "exit_code": 0}
tools = mcp.get_all_tools()
items = [{"name": t["name"], "server": t["server_name"],
"description": t.get("description", "")[:100]} for t in tools]
return {"response": f"{len(items)} MCP tools available", "tools": items, "exit_code": 0}
else:
return {"error": f"Unknown action: {action}", "exit_code": 1}
# ---------------------------------------------------------------------------
# Webhook management tool
# ---------------------------------------------------------------------------
async def do_manage_webhooks(content: str, owner: Optional[str] = None) -> Dict:
"""Manage webhooks: list, add, delete, enable, disable, test."""
from core.database import SessionLocal
try:
args = _parse_tool_args(content)
except ValueError:
return {"error": "Invalid JSON arguments", "exit_code": 1}
action = args.get("action", "list")
db = SessionLocal()
try:
from core.database import Webhook
if action == "list":
hooks = db.query(Webhook).all()
items = [{"id": h.id, "name": h.name, "url": h.url,
"events": h.events, "is_active": h.is_active} for h in hooks]
return {"response": f"{len(items)} webhooks", "webhooks": items, "exit_code": 0}
elif action == "add":
import uuid as _uuid
from datetime import datetime
from src.webhook_manager import validate_events, validate_webhook_url
name = args.get("name", "")
url = args.get("url", "")
events = args.get("events", "chat.completed")
if not url:
return {"error": "url is required", "exit_code": 1}
try:
url = validate_webhook_url(url)
events = validate_events(events)
except ValueError as e:
return {"error": str(e), "exit_code": 1}
wid = str(_uuid.uuid4())[:8]
hook = Webhook(id=wid, name=name or url, url=url,
events=events, is_active=True,
created_at=datetime.utcnow(), updated_at=datetime.utcnow())
db.add(hook)
db.commit()
return {"response": f"Added webhook '{name or url}'", "exit_code": 0}
elif action == "delete":
wid = args.get("webhook_id", "")
hook = db.query(Webhook).filter(Webhook.id == wid).first()
if not hook:
return {"error": f"Webhook {wid} not found", "exit_code": 1}
name = hook.name
db.delete(hook)
db.commit()
return {"response": f"Deleted webhook '{name}'", "exit_code": 0}
elif action in ("enable", "disable"):
wid = args.get("webhook_id", "")
hook = db.query(Webhook).filter(Webhook.id == wid).first()
if not hook:
return {"error": f"Webhook {wid} not found", "exit_code": 1}
hook.is_active = (action == "enable")
db.commit()
return {"response": f"Webhook '{hook.name}' {action}d", "exit_code": 0}
else:
return {"error": f"Unknown action: {action}", "exit_code": 1}
except Exception as e:
logger.error(f"manage_webhooks error: {e}")
return {"error": str(e), "exit_code": 1}
finally:
db.close()
# ---------------------------------------------------------------------------
# API token management tool
# ---------------------------------------------------------------------------
async def do_manage_tokens(content: str, owner: Optional[str] = None) -> Dict:
"""Manage API tokens: list, create, delete."""
from core.database import SessionLocal, ApiToken
try:
args = _parse_tool_args(content)
except ValueError:
return {"error": "Invalid JSON arguments", "exit_code": 1}
action = args.get("action", "list")
db = SessionLocal()
try:
if action == "list":
tokens = db.query(ApiToken).all()
items = [{"id": t.id, "name": t.name, "token_prefix": t.token_prefix + "...",
"is_active": t.is_active} for t in tokens]
return {"response": f"{len(items)} API tokens", "tokens": items, "exit_code": 0}
elif action == "create":
import uuid as _uuid, secrets, bcrypt
from datetime import datetime
name = args.get("name", "API Token")
raw_token = secrets.token_urlsafe(32)
token_hash = bcrypt.hashpw(raw_token.encode(), bcrypt.gensalt()).decode()
tid = str(_uuid.uuid4())[:8]
t = ApiToken(id=tid, name=name, token_hash=token_hash,
token_prefix=raw_token[:8], is_active=True,
created_at=datetime.utcnow(), updated_at=datetime.utcnow())
db.add(t)
db.commit()
return {"response": f"Created token '{name}'", "token": raw_token, "exit_code": 0}
elif action == "delete":
tid = args.get("token_id", "")
t = db.query(ApiToken).filter(ApiToken.id == tid).first()
if not t:
return {"error": f"Token {tid} not found", "exit_code": 1}
name = t.name
db.delete(t)
db.commit()
return {"response": f"Deleted token '{name}'", "exit_code": 0}
else:
return {"error": f"Unknown action: {action}", "exit_code": 1}
except Exception as e:
logger.error(f"manage_tokens error: {e}")
return {"error": str(e), "exit_code": 1}
finally:
db.close()
# ---------------------------------------------------------------------------
# Settings/preferences management tool
# ---------------------------------------------------------------------------
async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict:
"""Manage user settings and preferences."""
try:
args = _parse_tool_args(content)
except ValueError:
return {"error": "Invalid JSON arguments", "exit_code": 1}
action = args.get("action", "list")
from core.database import SessionLocal
db = SessionLocal()
try:
# set/get/list/delete operate on the REAL app settings (the same store
# the Settings panel writes), so changing a model / voice / search
# engine / reminder channel from chat actually takes effect.
from src.settings import load_settings, save_settings, DEFAULT_SETTINGS
# Secrets/credentials the agent must NOT write — kept read-only (masked)
# so API keys never flow through chat. User sets these in the panel.
_SECRET_KEYS = {
"brave_api_key", "google_pse_key", "google_pse_cx",
"tavily_api_key", "serper_api_key", "app_public_url",
}
def _is_secret(k):
# `token` must be a suffix, not a substring: otherwise the int
# setting `agent_input_token_budget` (which even has a "token budget"
# alias to set it from chat) is wrongly classified as a credential.
return (
k in _SECRET_KEYS
or k.endswith("token")
or any(t in k for t in ("api_key", "_key", "secret", "password"))
)
# Friendly aliases → real keys, so natural phrasing resolves.
_ALIASES_SET = {
"voice": "tts_voice", "tts voice": "tts_voice", "tts": "tts_enabled",
"text to speech": "tts_enabled", "tts provider": "tts_provider",
"speech speed": "tts_speed", "voice speed": "tts_speed",
"stt": "stt_enabled", "speech to text": "stt_enabled", "transcription": "stt_enabled",
"search engine": "search_provider", "search provider": "search_provider",
"search results": "search_result_count", "result count": "search_result_count",
"default model": "default_model", "chat model": "default_model",
"default endpoint": "default_endpoint_id",
"task model": "task_model", "background model": "task_model",
"teacher model": "teacher_model", "teacher": "teacher_enabled",
"utility model": "utility_model", "research model": "research_model",
"research max tokens": "research_max_tokens",
"vision model": "vision_model", "vision": "vision_enabled",
"image model": "image_model", "image quality": "image_quality",
"image gen": "image_gen_enabled", "image generation": "image_gen_enabled",
"reminder channel": "reminder_channel", "reminders": "reminder_channel",
"ntfy topic": "reminder_ntfy_topic",
"webhook integration": "reminder_webhook_integration_id",
"webhook template": "reminder_webhook_payload_template", "webhook payload": "reminder_webhook_payload_template",
"agent tool calls": "agent_max_tool_calls", "max tool calls": "agent_max_tool_calls",
"agent timeout": "agent_stream_timeout_seconds", "stream timeout": "agent_stream_timeout_seconds",
"token budget": "agent_input_token_budget", "input budget": "agent_input_token_budget",
"hard max": "agent_input_token_hard_max",
"token budget cap": "agent_input_token_hard_max",
"input budget cap": "agent_input_token_hard_max",
}
def _resolve(k):
k2 = (k or "").strip().lower()
if k2 in DEFAULT_SETTINGS:
return k2
return _ALIASES_SET.get(k2, (k or "").strip())
_ENUMS = {
"image_quality": ["low", "medium", "high"],
"reminder_channel": ["browser", "email", "ntfy", "webhook"],
}
def _coerce(value, default):
if isinstance(default, bool):
return value if isinstance(value, bool) else str(value).strip().lower() in ("true", "on", "yes", "1", "enable", "enabled")
if isinstance(default, int):
return int(value)
return value
def _model_slug(value: str) -> str:
import re as _re
return _re.sub(r"[^a-z0-9]+", "", (value or "").lower())
def _endpoint_model_from_cache(model_query: str):
"""Resolve friendly model text to an enabled endpoint + real model id.
The Settings UI stores both `<prefix>_endpoint_id` and
`<prefix>_model`; writing only the model leaves the runtime on the
old endpoint. Prefer cached model lists so this stays fast/offline.
"""
import json as _json
import re as _re
from core.database import ModelEndpoint
wanted = (model_query or "").strip()
wanted_slug = _model_slug(wanted)
wanted_tokens = [_model_slug(t) for t in _re.findall(r"[A-Za-z0-9]+", wanted)]
wanted_tokens = [t for t in wanted_tokens if t]
if not wanted_slug:
return None
best = None
for ep in db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).all():
raw_models = []
try:
raw_models = _json.loads(ep.cached_models or "[]") or []
except Exception:
raw_models = []
# If cache is empty, still allow matching against endpoint name
# for callers using model@endpoint elsewhere later.
for mid in raw_models:
mid = str(mid)
mid_slug = _model_slug(mid)
if not mid_slug:
continue
exact = mid.lower() == wanted.lower()
compact_match = wanted_slug in mid_slug or mid_slug in wanted_slug
token_match = bool(wanted_tokens) and all(tok in mid_slug for tok in wanted_tokens)
if exact or compact_match or token_match:
score = 3 if exact else (2 if compact_match else 1)
if not best or score > best[0]:
best = (score, ep.id, mid)
if best:
return {"endpoint_id": best[1], "model": best[2]}
return None
def _mask(k, v):
return "••••• (set in panel)" if _is_secret(k) and v else v
if action == "list":
s = load_settings()
shown = {k: _mask(k, v) for k, v in s.items() if k in DEFAULT_SETTINGS and not isinstance(v, dict)}
return {"response": f"{len(shown)} settings (use get/set with a key)", "settings": shown, "exit_code": 0}
elif action == "get":
key = _resolve(args.get("key", ""))
if not key:
return {"error": "key is required", "exit_code": 1}
if key not in DEFAULT_SETTINGS:
return {"error": f"Unknown setting '{args.get('key')}'. Use action='list' to see them.", "exit_code": 1}
val = load_settings().get(key, DEFAULT_SETTINGS.get(key))
return {"response": f"{key} = {_mask(key, val)}", "value": _mask(key, val), "exit_code": 0}
elif action == "set":
raw = args.get("key", "")
value = args.get("value")
if not raw:
return {"error": "key is required", "exit_code": 1}
key = _resolve(raw)
if key not in DEFAULT_SETTINGS:
return {"error": f"Unknown setting '{raw}'. Use action='list' to see available settings.", "exit_code": 1}
if _is_secret(key):
return {"response": f"'{key}' is a credential/secret — for security I can't set it from chat. Open Settings and set it there.", "exit_code": 0}
# Structured settings (dicts/lists like keybinds, default_model_fallbacks)
# have no safe scalar coercion — _coerce would pass a bare string
# straight through and clobber the structure. Refuse them here; they're
# edited in their dedicated panels. (reset/delete still restore the
# default structure, which is safe.)
if isinstance(DEFAULT_SETTINGS[key], (dict, list)):
return {"response": f"'{key}' is a structured setting — edit it in its panel, not from chat. (You can reset it to default here.)", "exit_code": 0}
try:
value = _coerce(value, DEFAULT_SETTINGS[key])
except (ValueError, TypeError):
return {"error": f"'{value}' isn't a valid value for {key} (expected {type(DEFAULT_SETTINGS[key]).__name__}).", "exit_code": 1}
if key in _ENUMS and str(value).lower() not in _ENUMS[key]:
return {"error": f"{key} must be one of: {', '.join(_ENUMS[key])}.", "exit_code": 1}
s = load_settings()
s[key] = value
if key in {"default_model", "research_model", "utility_model", "task_model", "vision_model", "image_model"}:
resolved = _endpoint_model_from_cache(str(value))
if resolved:
prefix = key[:-6]
s[f"{prefix}_endpoint_id"] = resolved["endpoint_id"]
s[key] = resolved["model"]
value = resolved["model"]
save_settings(s)
if key.endswith("_model") and s.get(f"{key[:-6]}_endpoint_id"):
return {"response": f"Set {key} = {value} (endpoint {s.get(f'{key[:-6]}_endpoint_id')}).", "exit_code": 0}
return {"response": f"Set {key} = {value}.", "exit_code": 0}
elif action == "delete" or action == "reset":
key = _resolve(args.get("key", ""))
if key not in DEFAULT_SETTINGS:
return {"error": f"Unknown setting '{args.get('key')}'.", "exit_code": 1}
if _is_secret(key):
return {"response": f"'{key}' is a credential — reset it in the panel.", "exit_code": 0}
s = load_settings()
s[key] = DEFAULT_SETTINGS[key]
save_settings(s)
return {"response": f"Reset {key} to default ({DEFAULT_SETTINGS[key]}).", "exit_code": 0}
elif action in ("disable_tool", "enable_tool", "list_tools"):
# Tool-toggle actions. These edit settings.json:disabled_tools
# (the global list read on every chat request) rather than
# prefs.json. Friendly aliases accepted: "shell" -> "bash",
# "search" -> "web_search", "browser" -> "builtin_browser",
# "documents" -> the document tool set, "memory" ->
# manage_memory, etc.
from src.settings import get_setting, save_settings, load_settings
_ALIASES = {
"shell": ["bash"],
"terminal": ["bash"],
"search": ["web_search"],
"web": ["web_search"],
"browser": ["builtin_browser"],
"documents": ["create_document", "edit_document", "update_document", "suggest_document"],
"doc": ["create_document", "edit_document", "update_document", "suggest_document"],
"memory": ["manage_memory"],
"skills": ["manage_skills"],
"images": ["generate_image"],
"image": ["generate_image"],
"tasks": ["manage_tasks"],
"notes": ["manage_notes"],
"calendar": ["manage_calendar"],
"email": ["mcp__email__list_emails", "mcp__email__read_email", "mcp__email__send_email"],
"research": ["web_search"], # research is a per-request flag, not a tool — closest analog
}
if action == "list_tools":
current = get_setting("disabled_tools", []) or []
return {
"response": (
f"Currently disabled: {', '.join(current) if current else '(none)'}.\n"
"Common toggles: shell (bash), search (web_search), browser, documents, "
"memory, skills, images, tasks, notes, calendar, email."
),
"disabled": list(current),
"exit_code": 0,
}
tool_name = (args.get("tool") or args.get("name") or "").strip().lower()
if not tool_name:
return {"error": "tool name required (e.g. 'shell', 'search', 'bash')", "exit_code": 1}
targets = _ALIASES.get(tool_name, [tool_name])
settings = load_settings()
current = list(settings.get("disabled_tools") or [])
before = set(current)
if action == "disable_tool":
for t in targets:
if t not in current:
current.append(t)
else: # enable_tool
current = [t for t in current if t not in targets]
after = set(current)
settings["disabled_tools"] = current
save_settings(settings)
verb = "Disabled" if action == "disable_tool" else "Enabled"
changed = sorted(after.symmetric_difference(before))
return {
"response": (
f"{verb} {tool_name} ({', '.join(targets)}). "
f"Now disabled: {', '.join(current) if current else '(none)'}."
),
"changed": changed,
"disabled": list(current),
"exit_code": 0,
}
else:
return {"error": f"Unknown action: {action}", "exit_code": 1}
except Exception as e:
logger.error(f"manage_settings error: {e}")
return {"error": str(e), "exit_code": 1}
finally:
db.close()
# ---------------------------------------------------------------------------
# API call tool
# ---------------------------------------------------------------------------
async def do_api_call(content: str) -> Dict:
"""Execute an API call to a registered integration."""
from src.integrations import execute_api_call, load_integrations
@@ -1545,10 +945,10 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict:
text = str(raw).strip().lower()
if text in {"none", "no", "off", "false"}:
return None
m = re.search(r"(\d+)\s*(?:m|min|minute|minutes)\b", text)
m = re.search(r"(\d+)\s*(?:minutes?|mins?|m)\b", text)
if m:
return max(0, int(m.group(1)))
m = re.search(r"(\d+)\s*(?:h|hr|hour|hours)\b", text)
m = re.search(r"(\d+)\s*(?:hours?|hrs?|h)\b", text)
if m:
return max(0, int(m.group(1)) * 60)
if text.isdigit():
@@ -1561,7 +961,7 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict:
return desc
reminder_only = re.compile(
r"^\s*(?:remind(?:er)?|alarm)\s*:?\s*\d+\s*"
r"(?:m|min|minute|minutes|h|hr|hour|hours)\b.*$",
r"(?:minutes?|mins?|m|hours?|hrs?|h)\b.*$",
re.I,
)
return "" if reminder_only.match(desc) else desc
@@ -2680,13 +2080,25 @@ async def do_serve_model(content: str, owner: Optional[str] = None) -> Dict:
endpoint_added=endpoint_added, endpoint_id=endpoint_id or "",
)
note = "" if registered else " (state-write failed — task may not show in UI)"
where = host or "local"
log_path = f"/tmp/odysseus-tmux/{sid}.log"
return {
"output": f"Serving {repo_id} (session: {sid}){note}",
"output": (
f"Serving {repo_id} on {where} (session: {sid}){note}\n"
f"Next required check: call list_served_models. If this task is not ready, "
f"call tail_serve_output with session_id={sid} and tail=400 before answering. "
f"Do not tell the user to check logs; you have the log tool."
),
"session_id": sid,
"task_type": "serve",
"phase": "running",
"host": host,
"endpoint_id": endpoint_id,
"log_path": log_path,
"next_tools": [
{"name": "list_served_models", "arguments": {}},
{"name": "tail_serve_output", "arguments": {"session_id": sid, "tail": 400}},
],
"exit_code": 0,
}
# FastAPI HTTPException puts the message under `detail`, not `error`.
@@ -2854,6 +2266,10 @@ async def _cookbook_kill_session(session_id: str, *, remote_host: str = "",
break
if remote:
try:
remote, sport = _validate_cookbook_ssh_target(remote, sport)
except HTTPException as e:
return {"error": str(getattr(e, "detail", e)), "exit_code": 1}
_pf = f"-p {shlex.quote(str(sport))} " if sport and str(sport) != "22" else ""
cmd = (
f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "
@@ -2942,8 +2358,8 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic
tail = 400
tail = max(20, min(tail, 4000))
headers = _internal_headers()
remote = (args.get("remote_host") or args.get("host") or "").strip()
sport = (args.get("ssh_port") or "").strip()
remote = _string_arg(args.get("remote_host") or args.get("host"))
sport = _string_arg(args.get("ssh_port"))
# Resolve host from cookbook state if caller didn't pass one — same
# lookup _cookbook_kill_session uses.
if not remote:
@@ -2961,6 +2377,12 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic
if not sport:
sport = t.get("sshPort") or ""
break
if remote:
try:
remote, sport = _validate_cookbook_ssh_target(remote, sport)
except HTTPException as e:
return {"error": str(getattr(e, "detail", e)), "exit_code": 1}
# Prefer the persisted /tmp/odysseus-tmux/SESSION.log file over the
# live tmux pane. The pane is what the user would see scrolling on
# their screen — including the post-crash neofetch banner and the
@@ -3023,8 +2445,17 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic
MAX_CHARS = 8000
if len(output_text) > MAX_CHARS:
output_text = "…(earlier output truncated)…\n" + output_text[-MAX_CHARS:]
if not output_text:
output_text = (
f"No log output captured yet for {session_id} on {host_label}. "
"This usually means the tmux wrapper has started but the model process "
"has not printed anything yet. Do not stop here: call list_served_models "
"again to check whether it is still loading, ready, or crashed; if it is "
"still not ready, call tail_serve_output again with a larger tail after "
"the next status check."
)
return {
"output": output_text or "(empty pane)",
"output": output_text,
"session_id": session_id,
"host": host_label,
"tail_lines": tail,
@@ -3138,7 +2569,7 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di
except ValueError:
return {"error": "Invalid JSON arguments", "exit_code": 1}
host = (args.get("host") or args.get("remote_host") or "").strip()
host = _string_arg(args.get("host") or args.get("remote_host"))
sess = (args.get("tmux_session") or args.get("session_id") or "").strip()
model = (args.get("model") or args.get("repo_id") or "").strip()
port = args.get("port") or 8000
@@ -3149,6 +2580,12 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di
return {"error": "tmux_session and model are required", "exit_code": 1}
# Verify tmux session exists on the target host
if host:
try:
host, _ = _validate_cookbook_ssh_target(host)
except HTTPException as e:
return {"error": str(getattr(e, "detail", e)), "exit_code": 1}
headers = _internal_headers()
if host:
check = f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no {shlex.quote(host)} 'tmux has-session -t {shlex.quote(sess)} 2>&1'"
@@ -3232,7 +2669,7 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di
host_only = host.split("@", 1)[-1] if host else "localhost"
endpoint_url = f"http://{host_only}:{int(port)}/v1"
try:
from src.tool_implementations import do_manage_endpoints # avoid forward ref issues
from src.agent_tools.admin_tools import do_manage_endpoints # moved in #3629
except Exception:
do_manage_endpoints = None
if do_manage_endpoints is not None:
@@ -3763,7 +3200,7 @@ async def do_resolve_contact(content: str, owner: Optional[str] = None) -> Dict:
if not name:
return {"error": "name is required", "exit_code": 1}
contacts = {} # email -> {name, source}
contacts = {} # email_or_phone -> {name, source, phone?}
# 1. CardDAV (Radicale) — structured contacts. Call in-process: a
# server-side httpx GET to /api/contacts/search carries no session
@@ -3778,10 +3215,18 @@ async def do_resolve_contact(content: str, owner: Optional[str] = None) -> Dict:
match = q in hay_name or any(q in (e or "").lower() for e in c.get("emails", []))
if not match:
continue
has_email = False
for email in (c.get("emails") or []):
email = (email or "").strip().lower()
if email and "@" in email:
contacts[email] = {"name": c.get("name") or email, "source": "contacts"}
has_email = True
# Fall back to phone numbers when the contact has no email address
if not has_email:
for phone in (c.get("phones") or []):
phone = (phone or "").strip()
if phone:
contacts[phone] = {"name": c.get("name") or phone, "source": "contacts", "phone": phone}
except Exception:
pass
@@ -3801,8 +3246,11 @@ async def do_resolve_contact(content: str, owner: Optional[str] = None) -> Dict:
return {"output": f"No contacts found matching '{name}'.", "exit_code": 0}
lines = [f"Contacts matching '{name}':"]
for email, info in contacts.items():
lines.append(f"- {info['name']} <{email}> ({info['source']})")
for key, info in contacts.items():
if info.get("phone"):
lines.append(f"- {info['name']} — phone: {info['phone']} ({info['source']})")
else:
lines.append(f"- {info['name']} <{key}> ({info['source']})")
return {"output": "\n".join(lines), "exit_code": 0}
+81 -6
View File
@@ -88,23 +88,24 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
"pipeline": "Run a multi-step AI pipeline with multiple models. Chain tasks together in sequence.",
"list_models": "List all available AI models and their endpoints.",
"manage_session": "Chat management: rename, archive, delete, or fork chats (the UI calls these 'chats'; internally 'sessions'). Use for 'rename my chats', 'rename this chat', 'archive/delete a chat'.",
"manage_memory": "Memory management: list, add, edit, delete, or search persistent memories.",
"manage_memory": "Memory management: list, add, edit, delete, or search persistent memories. For facts about the USER (their name, preferences, where they live). NOT for info about ANOTHER person — addresses, phones, emails belonging to a contact go in manage_contact, not memory.",
"manage_skills": "Skill management: add, update, publish, or search reusable skills/presets.",
"manage_tasks": "Scheduled task management: list, create, edit, delete, pause, resume, or run cron tasks.",
"manage_endpoints": "Endpoint management: list, add, delete, enable, or disable model API endpoints.",
"manage_mcp": "MCP server management: list, add, delete, reconnect servers, or list available tools.",
"manage_webhooks": "Webhook management: list, add, delete, enable, or disable webhooks.",
"api_call": "Call a configured API integration by name (Home Assistant, Miniflux, Gitea, Linkding, Jellyfin, RSS reader, git forge, bookmark manager, smart home, or any other registered service). Make a GET/POST/PUT/PATCH/DELETE request to the integration's endpoint path, with an optional JSON body. Use whenever the user asks to query or control one of their connected integrations/services.",
"manage_tokens": "API token management: list, create, or delete API access tokens.",
"manage_documents": "List, read, delete, or tidy documents in the editor panel. action='list' returns clickable rows (most-recent first) so the user can open any doc by clicking. action='read' (aka view/open/get) with document_id returns the content. action='delete' with document_id removes a doc (only way to delete). Use this for ANY 'show/read/list/open my documents/docs/files/notes' request — never shell or curl.",
"manage_documents": "List, read, delete, or tidy documents in the editor panel. action='list' returns clickable rows (most-recent first) so the user can open any doc by clicking. action='read' (aka view/open/get) with document_id returns the content; supports offset=<N> + limit=<N> to page through large docs (response includes next_offset when more remains, so you can keep calling with offset=next_offset). action='delete' with document_id removes a doc (only way to delete). Use this for ANY 'show/read/list/open my documents/docs/files/notes' request — never shell or curl.",
"manage_research": "List, read/open, or delete saved DEEP RESEARCH results from the Library. action='list' returns clickable [query](#research-<id>) rows (most-recent first). action='read' (aka open/view/get) with id returns the report + sources. action='delete' with id removes it. Use this for ANY 'open/read/find/delete my research / that report / the research on X' request. NOTE: this is for EXISTING research; to START new research use trigger_research.",
"manage_settings": "Change ANY real app setting (the ones the Settings panel writes) so the user never has to open it: TTS voice/provider/speed, STT, search engine + result count, default/teacher/task/utility/vision/image/research models, image quality, reminder channel (browser/email/ntfy), agent timeout/tool-call budget, and more. action=set with key (friendly aliases ok: voice, 'search engine', 'default model', 'teacher model', 'image quality', 'reminder channel'...) + value; get/list/reset too. Also toggles tools on/off (disable_tool/enable_tool/list_tools). Secrets/API keys are read-only. Use for any 'change my…/set my…/use X for…/turn on…' preference request.",
"create_session": "Create a new chat with a name and model.",
"list_sessions": "List all chats with their metadata (the UI calls these 'chats'). Use for 'list my chats', 'rename all my chats' (list first, then manage_session to rename each).",
"send_to_session": "Send a message to another chat. Cross-chat communication.",
"search_chats": "Search past session transcripts across chats.",
"ask_user": "Ask the user a multiple-choice question to get a decision or clarification. Use this when the task is genuinely ambiguous and the answer changes what you do next — pick between approaches, confirm an assumption, choose among options — instead of guessing. Provide a clear `question` and 2-6 `options` (each with a short `label`, optional `description`). Calling this ENDS your turn: the user sees clickable buttons and their choice arrives as your next message. Don't use it for things you can decide from context or sensible defaults, or for irreversible-action confirmation if a dedicated flow exists.",
"ask_user": "Ask the user a multiple-choice question to get a decision or clarification. Use this when the task is genuinely ambiguous and the answer changes what you do next — pick between approaches, confirm an assumption, choose among options — instead of guessing. Provide a clear `question` and 2-6 `options` (each with a short `label`, optional `description`). Omit `multi`/keep it false unless the question explicitly permits choosing multiple options. Calling this ENDS your turn: the user sees clickable buttons and their choice arrives as your next message. Don't use it for things you can decide from context or sensible defaults, or for irreversible-action confirmation if a dedicated flow exists.",
"update_plan": "Write back to the ACTIVE PLAN while executing an approved plan: mark steps done or revise them. After finishing a step call this with the full checklist and that step marked done; when the user asks to change the plan call it with the revised checklist. Always pass the COMPLETE markdown checklist (`- [ ]` / `- [x]`), not a diff. The user's docked plan window updates live. No effect when there is no active plan.",
"ui_control": "Control the UI and toggle tools on/off. Use this to turn off / turn on / disable / enable individual tools and features: shell (bash), search (web), research, browser, documents, incognito. Open panels (documents library, gallery, email inbox, sessions, notes, memories/brain, skills, settings, cookbook) via `open_panel <name>`. Use `open_email_reply <uid> <folder> reply` to open an email reply draft document without sending. Also switches between chat/agent modes, changes the current model, and applies/creates themes.",
"ui_control": "Control the UI and toggle tools on/off. Use this to turn off / turn on / disable / enable individual tools and features: shell (bash), search (web), research, browser, documents, incognito. Open panels (documents library, gallery, email inbox, sessions, notes, memories/brain, skills, settings, cookbook) via `open_panel <name>`. Use `open_email_reply <uid> <folder> reply` to open an email reply draft document without sending. To pre-fill the reply body in one shot (USE THIS whenever the user told you what to say — opening an empty draft when they asked you to write is wrong), append the body after the mode: `open_email_reply <uid> <folder> reply <body text>`. Body can continue on subsequent lines for multi-line replies. Also switches between chat/agent modes, changes the current model, and applies/creates themes.",
"list_email_accounts": "List configured email accounts and default status. Use before reading or sending mail when the user mentions Gmail, work mail, custom domain mail, another mailbox, or asks to compare/check multiple inboxes.",
"list_emails": "List emails for a folder/account, newest first, including read messages by default. Shows subject, sender, date, UID, account, and AI summary. Check inbox, find emails needing replies. Supports account from list_email_accounts for Gmail/work/custom mailboxes. For last/latest/newest email, use max_results=1 and unread_only=false.",
"read_email": "Read the full content of a specific email by UID or Message-ID. View email body, check details. Supports account from list_email_accounts when the UID belongs to a non-default mailbox.",
@@ -115,7 +116,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
"mark_email_read": "Mark an email as read or unread by toggling the \\Seen flag.",
"bulk_email": "Perform one action on many emails at once. Use for delete all those, archive these, mark all read, move spam to junk. Takes explicit UIDs from list_emails or all_unread=true. Always pass account for Gmail/work/custom mailbox results.",
"resolve_contact": "Look up a contact's email address by name. Searches CardDAV address book and sent email history. Use when the user says 'message [name]', 'email [name]', or 'send to [name]' without an email address.",
"manage_contact": "Create, update, delete, or list CardDAV contacts. Use to save a new contact, change an existing one's email/phone, or remove one. Action=list returns uids needed for update/delete. Use when the user says 'save this contact', 'add [name] to contacts', 'update [name]'s email', 'delete [name] from contacts'. Do not use for user identity facts like 'my name is <name>'; those are memory.",
"manage_contact": "Save / update / delete / list address-book contacts (CardDAV). Use for info about ANOTHER person — name, email, phone, postal address. Args: action=list|add|update|delete, name, email, phones, address, uid (from list). For 'save this for <person>' / address pastes / phone numbers next to a name, this is the right tool — NOT manage_memory. Do NOT use for facts about the USER ('my name is X'); those are manage_memory.",
"manage_notes": "Create and manage notes and checklists (Google Keep-style). ALWAYS use this for note/todo/checklist/reminder creation — NEVER hit /api/notes via app_api. Accepts natural-language `due_date` like 'tomorrow at 9am' or '11pm today' (parsed in the USER'S timezone). The due_date IS the reminder — it fires a notification at that time, so do NOT also create a calendar event for the same reminder. Set colors, labels, pin, archive. Do NOT use manage_memory for note content.",
"manage_calendar": "Calendar event management: list, create, update, delete. Each event can carry a tag/category (event_type — work/personal/health/travel/meal/social/admin/other) and importance (low/normal/high/critical). Resolve today/tomorrow using the Current date and time context, then use ISO datetimes in the user's local wall time; supports all-day events. For event reminders/alarms, pass reminder_minutes; this creates the Notes reminder, so do not also call manage_notes for the same reminder.",
"download_model": "Download a HuggingFace model to a local or remote server. Specify repo_id (e.g. 'Qwen/Qwen3-8B'), optional server host, and optional include filter for specific files.",
@@ -134,6 +135,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
"app_api": "Generic loopback to allowed Odysseus internal endpoints. Use this when the user wants something the UI can do but there's no named tool for it. Covers calendar, gallery, library/documents, memory, notes, tasks, settings, research, compare, cookbook GPUs/state — allowed UI buttons hit /api/* endpoints and you can hit them too. Sensitive auth/user/admin/shell paths and host-control Cookbook mutation routes are blocked; do NOT use app_api for shell commands, package installs, engine rebuilds, or PID signalling. Use named command tooling for shell commands. action='endpoints' with filter=<keyword> lists available endpoints. action='call' takes method+path+body. Hits same routes the UI uses — auth flows free. NOTE: themes are NOT an API endpoint — use the ui_control tool (create_theme / set_theme), not app_api. SESSIONS/CHATS: do NOT use app_api for these — GET /api/sessions returns EMPTY for tool calls (it's owner-filtered and tool calls authenticate as a different identity). EMAIL ACCOUNTS: do NOT use /api/email/accounts via app_api; use list_email_accounts, list_emails, and read_email instead. To list/rename/archive/delete/fork chats use the list_sessions and manage_session tools instead.",
"edit_image": "Edit an image in the gallery: upscale (increase resolution), remove background (rembg), inpaint (fill selected area), or harmonize (blend edits). Specify image ID and action.",
"trigger_research": "Start a deep research job on any topic — appears in the Deep Research sidebar, streams progress, produces a detailed report. Use for 'research X', 'look into Y', 'do deep research on Z', 'investigate'. NOT a scheduled task — it runs now and surfaces in the sidebar.",
"manage_bg_jobs": "Inspect and control detached background `bash` jobs (the ones started with a `#!bg` marker). action='list' shows this chat's jobs (id/status/age/command); action='output' returns a job's captured output so far (check on a long-running job, or re-read a finished one); action='kill' stops a runaway job by id. Use for 'is the background job done', 'check on that job', 'show the build output', 'kill the background job', 'stop the bg task'. output/kill need a job_id from list.",
}
@@ -348,6 +350,12 @@ class ToolIndex:
{"list_email_accounts", "list_emails", "read_email", "send_email", "reply_to_email", "bulk_email", "delete_email", "archive_email", "mark_email_read", "resolve_contact", "ui_control"},
frozenset({"calendar", "event", "meeting", "schedule", "appointment"}):
{"manage_calendar"},
# Detached background `bash` jobs (#!bg): check on / read output / kill.
frozenset({"background job", "background jobs", "bg job", "bg jobs",
"background task", "is the job done", "check the job",
"check on that job", "job output", "kill the job",
"kill the background", "stop the background", "running job"}):
{"manage_bg_jobs"},
frozenset({"note", "todo", "reminder", "remind", "checklist", "remember to"}):
{"manage_notes"},
# Chat/session management. "rename" alone maps to documents below, so a
@@ -372,7 +380,19 @@ class ToolIndex:
{"resolve_contact", "manage_contact"},
frozenset({"save contact", "add contact", "new contact", "update contact",
"edit contact", "delete contact", "remove contact",
"save this person", "add to contacts", "save to contacts"}):
"save this person", "add to contacts", "save to contacts",
# "add <name> to (my) contacts" — words between 'add' and
# 'contacts' break the literal phrase match above, so anchor
# on the tail.
"to my contacts", "to contacts", "to address book",
# "save this for <person>" / "save it for <person>" — the user
# is storing info on a known person without using the literal
# word 'contact'. Catches the address/phone-paste pattern.
"save this for", "save it for", "save for",
"save this one for", "save that for",
# Postal-address-like signals
"postal code", "zip code", "street address",
"mailing address", "their address"}):
{"manage_contact"},
# "Ask another model" intent → chat_with_model relays to a
# different model and returns its answer. ask_teacher escalates
@@ -402,6 +422,14 @@ class ToolIndex:
"my settings", "change setting", "change a setting", "set setting",
"preference", "preferences", "configure"}):
{"manage_settings", "ui_control"},
# API-integration intent → the api_call tool. Mirrors the agent-loop
# "integrations" domain so api_call still surfaces on the retrieval and
# keyword-fallback paths (not just the deterministic domain seed) when a
# user names a connected service.
frozenset({"api_call", "api call", "integration", "integrations",
"home assistant", "homeassistant", "miniflux", "gitea",
"linkding", "jellyfin"}):
{"api_call"},
# Managing EXISTING research in the Library — open/read/find/delete.
frozenset({"my research", "the research", "research on", "open research",
"read research", "find research", "delete research",
@@ -507,6 +535,53 @@ class ToolIndex:
# prompts do not drag web schemas into the agent context.
if self._WEB_RE.search(query):
base.update({"web_search", "web_fetch"})
# Hard steering: when the query is a clear "save info about a specific
# person" pattern (address paste + name, phone next to a name, etc.),
# the model has been observed defaulting to manage_memory even with
# manage_contact in the toolset. Pull memory out for these queries so
# the model literally cannot pick it. ALWAYS_AVAILABLE includes
# manage_memory by default; we override that here.
# The "for/to <word>" check needs to allow lowercase names (users
# don't always capitalize) but filter out timing/pronoun stopwords
# so "save this for later" / "save for tomorrow" don't trigger.
_CONTACT_STOPWORDS_AFTER_FOR = {
"later", "tomorrow", "yesterday", "now", "then", "today",
"tonight", "me", "us", "you", "him", "her", "them", "myself",
"yourself", "next", "this", "that", "the", "a", "an", "future",
"real", "use", "uses", "another", "future", "reference",
}
# Regex catches "save (this|it|the|her|...|<noun>) for <name>" / "to my
# contacts" patterns. More forgiving than literal-keyword matching —
# 'save this address for Alex' uses one extra word between 'save' and
# 'for' that breaks the contiguous 'save this for' phrase.
save_for_match = re.search(
r"\bsave\b(?:\s+\w+){0,3}\s+(?:for|to)\s+([A-Za-z]+)",
ql,
)
# "to my contacts", "into my contacts", "in my address book", etc.
to_contacts = re.search(r"\b(?:to|in|into)\s+(?:my\s+)?(?:contacts|address\s+book)\b", ql)
# Possessive: "save (his|her|their) (address|phone|email|number) ..."
# — strong contact signal even without "for <name>". Force-include
# manage_contact here too since the keyword fallback misses this
# construction.
possessive_contact = re.search(
r"\bsave\b(?:\s+\w+){0,2}\s+(?:his|her|their)\s+(?:address|phone|number|email|contact|details)",
ql,
)
word_after = (
save_for_match.group(1).lower() if save_for_match else None
)
contact_only_signal = (
(save_for_match is not None
and word_after is not None
and word_after not in _CONTACT_STOPWORDS_AFTER_FOR)
or to_contacts is not None
or possessive_contact is not None
)
if possessive_contact is not None:
base.add("manage_contact")
if contact_only_signal and "manage_contact" in base:
base.discard("manage_memory")
return base
+270 -4
View File
@@ -39,6 +39,10 @@ _XML_TOOL_CALL_RE = re.compile(
r"<(?:[\w]+:)?(?:tool_call|function_call)>\s*([\s\S]*?)</(?:[\w]+:)?(?:tool_call|function_call)>",
re.IGNORECASE,
)
_XML_OPEN_TOOL_CALL_RE = re.compile(
r"<(?:[\w]+:)?(?:tool_call|function_call)>\s*([\s\S]*)\Z",
re.IGNORECASE,
)
_XML_INVOKE_RE = re.compile(
r'<invoke\s+name=["\'](\w+)["\']>\s*([\s\S]*?)</invoke>',
re.IGNORECASE,
@@ -47,6 +51,21 @@ _XML_PARAM_RE = re.compile(
r'<parameter\s+name=["\'](\w+)["\']>([\s\S]*?)</parameter>',
re.IGNORECASE,
)
_XML_DIRECT_TOOL_RE = re.compile(
r"<\s*([A-Za-z_][\w-]*)\s*>([\s\S]*?)</\s*\1\s*>",
re.IGNORECASE,
)
# Pattern 3b: StepFun Step-3.x native tool-call tokens. The tokenizer defines:
# <tool▁calls▁begin> ... <tool▁calls▁end>
# <tool▁call▁begin>tool_name<tool▁sep>{...}<tool▁call▁end>
# These can leak as text through llama.cpp/Ollama-style endpoints when the
# engine does not return structured OpenAI tool_calls.
_STEPFUN_CALL_BEGIN = "<tool▁call▁begin>"
_STEPFUN_CALL_SEP = "<tool▁sep>"
_STEPFUN_CALL_END = "<tool▁call▁end>"
_STEPFUN_CALLS_BEGIN = "<tool▁calls▁begin>"
_STEPFUN_CALLS_END = "<tool▁calls▁end>"
# Pattern 4: <tool_code> blocks (MiniMax-M2.5 style)
# {tool => 'tool_name', args => '<param>value</param>'}
@@ -175,6 +194,9 @@ _TOOL_NAME_MAP = {
"notes": "manage_notes",
"todo": "manage_notes",
"todos": "manage_notes",
"manage_bg_jobs": "manage_bg_jobs",
"bg_jobs": "manage_bg_jobs",
"background_jobs": "manage_bg_jobs",
}
_MISFENCED_WEB_TOOL_NAMES = {
@@ -286,6 +308,88 @@ def _parse_misfenced_web_lookup(content: str) -> Optional[ToolBlock]:
return ToolBlock("web_fetch", url)
def _parse_misfenced_read_file_lookup(content: str, *, allow_shell_style: bool = False) -> Optional[ToolBlock]:
"""Recover simple read_file calls wrapped in python/bash fences."""
stripped = content.strip()
if not stripped:
return None
try:
module = ast.parse(stripped, mode="exec")
except SyntaxError:
module = None
if module and len(module.body) == 1 and isinstance(module.body[0], ast.Expr):
call = module.body[0].value
if isinstance(call, ast.Call) and isinstance(call.func, ast.Name):
if call.func.id.lower() != "read_file" or len(call.args) > 1:
return None
args = {}
if call.args:
path = _literal_string(call.args[0])
if not path:
return None
args["path"] = path
allowed = {"path", "file", "file_path", "offset", "limit"}
for keyword in call.keywords:
if keyword.arg not in allowed:
return None
key = "path" if keyword.arg in ("file", "file_path") else keyword.arg
if key == "path":
path = _literal_string(keyword.value)
if not path:
return None
args["path"] = path
continue
try:
value = ast.literal_eval(keyword.value)
except (ValueError, SyntaxError, TypeError):
return None
if not isinstance(value, int) or value < 0:
return None
args[key] = value
if not args.get("path"):
return None
from src.tool_schemas import function_call_to_tool_block
return function_call_to_tool_block("read_file", json.dumps(args))
if not allow_shell_style:
return None
lines = [line.strip() for line in stripped.splitlines() if line.strip()]
if len(lines) != 1:
return None
match = re.fullmatch(r"read_file\s+(.+)", lines[0], re.IGNORECASE)
if not match:
return None
path = match.group(1).strip()
if not path:
return None
if path.startswith("{"):
try:
args = json.loads(path)
except json.JSONDecodeError:
return None
if not isinstance(args, dict):
return None
normalized = {}
raw_path = args.get("path") or args.get("file") or args.get("file_path")
if isinstance(raw_path, str) and raw_path.strip():
normalized["path"] = raw_path.strip()
for key in ("offset", "limit"):
value = args.get(key)
if isinstance(value, int) and value >= 0:
normalized[key] = value
if not normalized.get("path"):
return None
from src.tool_schemas import function_call_to_tool_block
return function_call_to_tool_block("read_file", json.dumps(normalized))
if len(path) >= 2 and path[0] == path[-1] and path[0] in "'\"":
path = path[1:-1].strip()
if not path:
return None
return ToolBlock("read_file", path)
def _coerce_raw_web_query(value) -> Optional[str]:
if isinstance(value, str) and value.strip():
return value.strip()
@@ -443,6 +547,138 @@ def _parse_xml_invoke(inv_match) -> Optional[ToolBlock]:
return function_call_to_tool_block(tool_name, json.dumps(params))
def _parse_xml_direct_tool(tool_match) -> Optional[ToolBlock]:
"""Parse direct XML tool tags inside <tool_call>.
Some local models emit:
<tool_call><web_search>query</web_search></tool_call>
instead of the invoke/parameter shape:
<tool_call><invoke name="web_search"><parameter name="query">query</parameter></invoke></tool_call>
Keep this as an adapter to the canonical function-call converter so aliases
and per-tool argument formatting stay in one place.
"""
tool_name = tool_match.group(1).lower().replace("-", "_")
if tool_name in {"invoke", "parameter", "tool_call", "function_call"}:
return None
mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None)
if not mapped:
return None
body = tool_match.group(2).strip()
if not body:
return None
try:
params = json.loads(body)
if not isinstance(params, dict):
params = {}
except json.JSONDecodeError:
if mapped == "web_search":
params = {"query": body}
elif mapped == "web_fetch":
params = {"url": body}
elif mapped == "bash":
params = {"command": body}
elif mapped == "python":
params = {"code": body}
elif mapped in ("read_file", "write_file"):
params = {"path": body}
else:
params = {"content": body}
from src.tool_schemas import function_call_to_tool_block
return function_call_to_tool_block(mapped, json.dumps(params))
def _iter_stepfun_tool_calls(text: str):
"""Yield StepFun native tool-call token bodies without regex backtracking."""
pos = 0
while True:
start = text.find(_STEPFUN_CALL_BEGIN, pos)
if start < 0:
return
name_start = start + len(_STEPFUN_CALL_BEGIN)
sep = text.find(_STEPFUN_CALL_SEP, name_start)
if sep < 0:
return
end = text.find(_STEPFUN_CALL_END, sep + len(_STEPFUN_CALL_SEP))
if end < 0:
return
raw_name = text[name_start:sep].strip()
body = text[sep + len(_STEPFUN_CALL_SEP):end].strip()
if raw_name and len(raw_name) <= 128:
yield raw_name, body
pos = end + len(_STEPFUN_CALL_END)
def _strip_stepfun_tool_markup(text: str) -> str:
"""Remove StepFun tool-call token blocks and wrappers using literal scans."""
out = []
pos = 0
while True:
start = text.find(_STEPFUN_CALL_BEGIN, pos)
if start < 0:
out.append(text[pos:])
break
end = text.find(_STEPFUN_CALL_END, start + len(_STEPFUN_CALL_BEGIN))
if end < 0:
out.append(text[pos:])
break
out.append(text[pos:start])
pos = end + len(_STEPFUN_CALL_END)
cleaned = "".join(out)
return cleaned.replace(_STEPFUN_CALLS_BEGIN, "").replace(_STEPFUN_CALLS_END, "")
def _strip_bare_invoke_markup(text: str) -> str:
"""Remove bare <invoke ...>...</invoke> blocks without regex backtracking."""
out = []
pos = 0
while True:
start = text.lower().find("<invoke", pos)
if start < 0:
out.append(text[pos:])
break
tag_end = text.find(">", start)
if tag_end < 0:
out.append(text[pos:])
break
close = text.lower().find("</invoke>", tag_end + 1)
if close < 0:
out.append(text[pos:])
break
out.append(text[pos:start])
pos = close + len("</invoke>")
return "".join(out)
def _parse_stepfun_tool_call(tool_name: str, body: str) -> Optional[ToolBlock]:
"""Parse StepFun native tool-call tokens into an Odysseus ToolBlock."""
tool_name = tool_name.lower().replace("-", "_").replace(".", "_")
mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None)
if not mapped:
return None
body = (body or "").strip()
if not body:
return None
try:
params = json.loads(body)
if not isinstance(params, dict):
params = {}
except json.JSONDecodeError:
if mapped == "web_search":
params = {"query": body}
elif mapped == "web_fetch":
params = {"url": body}
elif mapped == "bash":
params = {"command": body}
elif mapped == "python":
params = {"code": body}
elif mapped in ("read_file", "write_file"):
params = {"path": body}
else:
params = {"content": body}
from src.tool_schemas import function_call_to_tool_block
return function_call_to_tool_block(mapped, json.dumps(params))
def _parse_tool_code_block(raw: str) -> Optional[ToolBlock]:
"""Parse a <tool_code>{tool => 'name', args => '...'}</tool_code> block (MiniMax style)."""
# Extract tool name
@@ -508,8 +744,9 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
2. [TOOL_CALL] ... [/TOOL_CALL] blocks (some models)
3. XML-style <tool_call>/<invoke> blocks
4. <tool_code> blocks (MiniMax-M2.5 style)
5. DeepSeek DSML markup (normalized to <invoke> first)
6. Non-native local model fallback: prose mentioning web_search followed by
5. StepFun Step-3 native <toolcallbegin> tokens
6. DeepSeek DSML markup (normalized to <invoke> first)
7. Non-native local model fallback: prose mentioning web_search followed by
bare JSON args, e.g. {"query":"...", "time_filter":"week"}
`skip_fenced`: when True, Pattern 1 (fenced ```bash/```python/```json code
@@ -549,7 +786,8 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
# _XML_INVOKE_RE's \w+ can't match would otherwise be executed as code.
continue
if tag in ("python", "bash"):
block = _parse_misfenced_web_lookup(content)
block = (_parse_misfenced_web_lookup(content)
or _parse_misfenced_read_file_lookup(content, allow_shell_style=(tag == "bash")))
if block:
blocks.append(block)
continue
@@ -564,12 +802,38 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
# Pattern 3: XML-style <tool_call>/<invoke> blocks
if not blocks:
for tool_name, body in _iter_stepfun_tool_calls(text):
block = _parse_stepfun_tool_call(tool_name, body)
if block:
blocks.append(block)
if blocks:
return blocks
# Try wrapped: <tool_call><invoke ...>...</invoke></tool_call>
for m in _XML_TOOL_CALL_RE.finditer(text):
for inv in _XML_INVOKE_RE.finditer(m.group(1)):
block = _parse_xml_invoke(inv)
if block:
blocks.append(block)
if not blocks:
for direct in _XML_DIRECT_TOOL_RE.finditer(m.group(1)):
block = _parse_xml_direct_tool(direct)
if block:
blocks.append(block)
# Some local models stream an opening <tool_call> wrapper and a
# complete inner tool tag, but forget the closing </tool_call>.
if not blocks:
for m in _XML_OPEN_TOOL_CALL_RE.finditer(text):
body = m.group(1)
for inv in _XML_INVOKE_RE.finditer(body):
block = _parse_xml_invoke(inv)
if block:
blocks.append(block)
if blocks:
break
for direct in _XML_DIRECT_TOOL_RE.finditer(body):
block = _parse_xml_direct_tool(direct)
if block:
blocks.append(block)
# Try bare <invoke> without wrapper
if not blocks:
for inv in _XML_INVOKE_RE.finditer(text):
@@ -611,7 +875,9 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str:
text = _normalize_dsml(text)
cleaned = text if skip_fenced else _TOOL_BLOCK_RE.sub('', text)
cleaned = _TOOL_CALL_RE.sub('', cleaned)
cleaned = _strip_stepfun_tool_markup(cleaned)
cleaned = _XML_TOOL_CALL_RE.sub('', cleaned)
cleaned = _XML_OPEN_TOOL_CALL_RE.sub('', cleaned)
cleaned = _TOOL_CODE_RE.sub('', cleaned)
if not skip_fenced:
raw_web_json = _parse_raw_web_json_lookup(cleaned)
@@ -619,6 +885,6 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str:
_, (start, end) = raw_web_json
cleaned = cleaned[:start] + cleaned[end:]
# Strip bare <invoke> blocks not wrapped in <tool_call>
cleaned = re.sub(r'<invoke\s+name=["\'].*?</invoke>', '', cleaned, flags=re.DOTALL | re.IGNORECASE)
cleaned = _strip_bare_invoke_markup(cleaned)
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
return cleaned.strip()
+39 -13
View File
@@ -68,11 +68,12 @@ FUNCTION_TOOL_SCHEMAS = [
"type": "function",
"function": {
"name": "web_fetch",
"description": "Fetch and read the text content of a specific URL the user names (e.g. 'check example.com', 'what's on this page <url>'). Use when you already have a concrete URL/domain. NOT for open-ended searches (use web_search) or 'research X' jobs (use trigger_research).",
"description": "Fetch and read the text content of a specific URL the user names (e.g. 'check example.com', 'what's on this page <url>'). Use when you already have a concrete URL/domain. NOT for open-ended searches (use web_search) or 'research X' jobs (use trigger_research). Downloads are size-budgeted; a '[partial content: ...]' notice in the result means the body was cut short and you can re-call with full=true for the rest.",
"parameters": {
"type": "object",
"properties": {
"url": {"type": "string", "description": "The URL or domain to fetch (http/https; a bare domain like example.com is fine)"}
"url": {"type": "string", "description": "The URL or domain to fetch (http/https; a bare domain like example.com is fine)"},
"full": {"type": "boolean", "description": "Raise the download budget to the hard cap for large pages/files. Use only after a result reported partial content."}
},
"required": ["url"]
}
@@ -466,7 +467,7 @@ FUNCTION_TOOL_SCHEMAS = [
"question": {"type": "string", "description": "The question to ask. Be specific and self-contained."},
"options": {
"type": "array",
"description": "2-6 mutually exclusive choices. Each is an object with a short `label` and an optional `description` explaining the trade-off.",
"description": "2-6 choices. Each is an object with a short `label` and an optional `description` explaining the trade-off.",
"items": {
"type": "object",
"properties": {
@@ -476,7 +477,7 @@ FUNCTION_TOOL_SCHEMAS = [
"required": ["label"]
}
},
"multi": {"type": "boolean", "description": "Set true to let the user select multiple options instead of one. Default false."}
"multi": {"type": "boolean", "description": "Set true ONLY when the question explicitly allows choosing more than one option. Otherwise omit it or set false. Default false."}
},
"required": ["question", "options"]
}
@@ -1008,7 +1009,7 @@ FUNCTION_TOOL_SCHEMAS = [
"type": "function",
"function": {
"name": "resolve_contact",
"description": "Look up a contact's email address by name. Searches CardDAV address book and sent email history. Use when the user says 'message [name]' or 'email [name]' without an email address.",
"description": "Look up a contact by name. Searches CardDAV address book and sent email history. Returns email addresses (when available) or phone numbers. Use when the user says 'message [name]', 'email [name]', or asks for someone's contact details.",
"parameters": {
"type": "object",
"properties": {
@@ -1022,7 +1023,7 @@ FUNCTION_TOOL_SCHEMAS = [
"type": "function",
"function": {
"name": "manage_contact",
"description": "Create, update, delete, or list the user's CardDAV contacts. Use to save a new contact ('save Jonathan's email jon@x.com'), update an existing one ('change Maria's number'), or remove one. For update/delete you need the contact's uid — call action='list' first to find it. Writes go through the same dedupe + validation as the Contacts UI.",
"description": "Create, update, delete, or list the user's CardDAV contacts. Use to save a new contact, update an existing one (email/phone/address), or remove one. For update/delete you need the contact's uid — call action='list' first to find it. Writes go through the same dedupe + validation as the Contacts UI.",
"parameters": {
"type": "object",
"properties": {
@@ -1033,6 +1034,7 @@ FUNCTION_TOOL_SCHEMAS = [
"email": {"type": "string", "description": "Single email address (convenience for add, or the primary email for update)."},
"emails": {"type": "array", "items": {"type": "string"}, "description": "Full list of email addresses (for update; first is primary)."},
"phones": {"type": "array", "items": {"type": "string"}, "description": "Full list of phone numbers (for update)."},
"address": {"type": "string", "description": "Postal/mailing address as a single human-readable string."},
},
"required": ["action"]
}
@@ -1186,6 +1188,21 @@ FUNCTION_TOOL_SCHEMAS = [
}
}
},
{
"type": "function",
"function": {
"name": "manage_bg_jobs",
"description": "Inspect and control detached background `bash` jobs (started with the `#!bg` marker). action='list' shows this chat's jobs with id/status/age/command; action='output' returns a job's captured output so far (use for a still-running job, or to re-read a finished one); action='kill' terminates a runaway job's process tree instead of waiting out its max-runtime. output and kill need job_id from list.",
"parameters": {
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["list", "output", "kill"], "description": "list | output | kill (default: list)"},
"job_id": {"type": "string", "description": "Background job id (required for output/kill; from action='list')"},
},
"required": ["action"]
}
}
},
]
@@ -1204,23 +1221,26 @@ def function_call_to_tool_block(name: str, arguments: str) -> Optional[ToolBlock
logger.error(f"Failed to parse function call arguments for {name}: {arguments}")
return None
tool_type = _TOOL_NAME_MAP.get(name, name)
_BUILTIN_EMAIL_TOOLS = {"list_email_accounts", "send_email", "list_emails", "read_email", "reply_to_email",
"archive_email", "delete_email", "mark_email_read", "bulk_email", "download_attachment"}
# Some models emit valid JSON that isn't an object (e.g. a bare array
# ["ls -la"], string, or number) as the function arguments. Every branch
# below assumes a dict and calls args.get(...), so a non-dict would raise
# AttributeError and abort the whole agent stream. Coerce to {} instead.
# ["ls -la"], string, or number) as function arguments. Most local tools keep
# the legacy empty-object coercion for stream robustness, but email MCP tools
# must fail closed so a malformed call cannot read the default mailbox.
if not isinstance(args, dict):
if tool_type.startswith("mcp__email__") or name in _BUILTIN_EMAIL_TOOLS:
logger.warning(f"Non-object email function call arguments for {name}: {args!r}; rejecting")
return None
logger.warning(f"Non-object function call arguments for {name}: {args!r}; treating as empty")
args = {}
tool_type = _TOOL_NAME_MAP.get(name, name)
# Allow MCP tools through (namespaced as mcp__serverid__toolname)
if tool_type.startswith("mcp__"):
content = json.dumps(args) if args else "{}"
return ToolBlock(tool_type, content)
# Email tools are implemented as MCP — route them to email
_BUILTIN_EMAIL_TOOLS = {"list_email_accounts", "send_email", "list_emails", "read_email", "reply_to_email",
"archive_email", "delete_email", "mark_email_read", "bulk_email", "download_attachment"}
if name in _BUILTIN_EMAIL_TOOLS:
return ToolBlock(f"mcp__email__{name}", json.dumps(args) if args else "{}")
if tool_type not in TOOL_TAGS:
@@ -1386,6 +1406,12 @@ def function_call_to_tool_block(name: str, arguments: str) -> Optional[ToolBlock
content = json.dumps(args)
elif tool_type == "ask_teacher":
content = args.get("model", "auto") + "\n" + args.get("problem", "")
elif tool_type == "ask_user":
# Keep user-facing labels readable in the tool trace. The outer SSE
# JSON encoder will escape them for transport and JSON.parse restores
# them once; pre-escaping here caused literal ``\u00f1`` sequences to
# remain visible in the debug panel.
content = json.dumps(args, ensure_ascii=False)
else:
content = json.dumps(args)
+3
View File
@@ -14,6 +14,7 @@ logger = logging.getLogger(__name__)
NON_ADMIN_BLOCKED_TOOLS = {
"bash",
"python",
"manage_bg_jobs",
"read_file",
"write_file",
"edit_file",
@@ -114,6 +115,8 @@ _PLAN_MODE_KNOWN_MUTATORS = {
# Shell is never read-only-safe; block it explicitly so it stays out of plan
# mode even if the schema list fails to load.
"bash", "python",
# Controls shell processes (kill); plan mode can't run bash anyway.
"manage_bg_jobs",
}
+35
View File
@@ -4,6 +4,8 @@ src.constants which imports nothing from src). Adding a project import here
will reintroduce the circular dependency that this module exists to break.
"""
import json
from src.constants import MAX_OUTPUT_CHARS
_mcp_manager = None
@@ -37,3 +39,36 @@ def _truncate(text: str, limit: int = MAX_OUTPUT_CHARS) -> str:
if len(text) > limit:
return text[:limit] + f"\n... (truncated, {len(text)} chars total)"
return text
def _parse_tool_args(content):
"""Parse a tool-call argument blob.
Accepts either a JSON string or an already-decoded dict. Unwraps the
common `{"body": {...}}` envelope that smaller models emit when they
read tool descriptions like "Body is JSON: {...}" literally and
pass `body` as a field name rather than treating it as a noun.
Returns a dict on success, raises ValueError on bad JSON.
"""
if isinstance(content, str):
try:
args = json.loads(content) if content.strip() else {}
except (json.JSONDecodeError, TypeError) as e:
raise ValueError(str(e))
elif isinstance(content, dict):
args = content
else:
args = {}
# Unwrap {"body": {...}} envelope, but only if `body` is the sole key
# and points at a dict. We don't want to clobber a legitimate `body`
# field on tools where it's a real arg (e.g. send_email body text).
if (
isinstance(args, dict)
and len(args) == 1
and "body" in args
and isinstance(args["body"], dict)
and "action" in args["body"] # extra safety: only unwrap if the inner dict looks like a tool call
):
args = args["body"]
return args