diff --git a/routes/note_routes.py b/routes/note_routes.py index 22449f1e4..0d06d9484 100644 --- a/routes/note_routes.py +++ b/routes/note_routes.py @@ -208,14 +208,17 @@ async def dispatch_reminder( try: from src.endpoint_resolver import resolve_endpoint from src.llm_core import llm_call_async + from src.reminder_personas import synthesis_system_prompt url, model, headers = resolve_endpoint("utility", owner=owner or None) if not url: url, model, headers = resolve_endpoint("default", owner=owner or None) if url and model: + persona_id = (settings.get("reminder_llm_persona") or "").strip() + sys_prompt = synthesis_system_prompt(persona_id) raw = await llm_call_async( url=url, model=model, messages=[ - {"role": "system", "content": "You are a reminder assistant. Write a single short, warm, motivating sentence (max 25 words) reminding the user about the note below. Do not add greetings, preamble, or hashtags. Output only the sentence."}, + {"role": "system", "content": sys_prompt}, {"role": "user", "content": f"Title: {title}\n\n{note_body}".strip()}, ], temperature=0.7, max_tokens=200, headers=headers, timeout=30, @@ -826,6 +829,12 @@ def setup_note_routes(task_scheduler=None): _override["reminder_webhook_integration_id"] = body["webhook_integration_id"] if body.get("webhook_payload_template"): _override["reminder_webhook_payload_template"] = body["webhook_payload_template"] + # Mirror the in-UI AI Synthesis toggle + persona so the test + # actually exercises the synthesis path before/without a Save. + if "llm_synthesis" in body: + _override["reminder_llm_synthesis"] = bool(body["llm_synthesis"]) + if "llm_persona" in body: + _override["reminder_llm_persona"] = str(body["llm_persona"] or "") else: db = SessionLocal() try: diff --git a/src/agent_loop.py b/src/agent_loop.py index 88617ef39..7a70c1453 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -1256,7 +1256,7 @@ def _build_base_prompt( from src.tool_index import ALWAYS_AVAILABLE disabled = set(disabled_tools or []) - if not get_setting("image_gen_enabled", True): + if not get_setting("image_gen_enabled", False): disabled.add("generate_image") if relevant_tools is not None: diff --git a/src/document_processor.py b/src/document_processor.py index 2448f1992..e96ec999c 100644 --- a/src/document_processor.py +++ b/src/document_processor.py @@ -199,11 +199,20 @@ def _fit_inline_attachment_text( return text[:remaining] + marker, 0 -def _process_office_document(path: str, display_name: str) -> str: +def _process_office_document( + path: str, + display_name: str, + session_id: str | None = None, + auto_opened_docs: list[Dict[str, Any]] | None = None, + owner: str | None = None, +) -> str: """Extract an Office/EPUB document to Markdown via the optional markitdown dep. Falls back to a friendly banner when markitdown is unavailable or finds no - text, so a missing optional dependency never breaks the chat path. + text, so a missing optional dependency never breaks the chat path. When a + session_id is provided AND the extraction succeeded, the FULL text is also + saved as a Document so the agent can page through it via + `manage_documents action=read offset=…` after the inline copy is capped. """ from src.markitdown_runtime import ( is_markitdown_format, @@ -218,6 +227,46 @@ def _process_office_document(path: str, display_name: str) -> str: if markdown and markdown.strip(): title = os.path.splitext(os.path.basename(path))[0] body, marker = _truncate_inline(markdown) + + # Persist the full extracted text as a Document. The agent's existing + # manage_documents tool can then read past the inline cap with offset. + doc_id = None + if session_id: + try: + from src.office_doc import create_office_document + doc_id = create_office_document( + session_id=session_id, + upload_id=os.path.basename(path), + title=title, + body_text=markdown, + ) + if doc_id and auto_opened_docs is not None: + from src.database import SessionLocal, Document + _db = SessionLocal() + try: + _d = _db.query(Document).filter(Document.id == doc_id).first() + if _d: + auto_opened_docs.append({ + "doc_id": _d.id, + "title": _d.title, + "language": _d.language, + "content": _d.current_content, + "version": _d.version_count, + }) + finally: + _db.close() + except Exception as e: + logger.warning("Office auto-doc creation failed for %s: %s", path, e) + + # Upgrade the truncation marker with a hint pointing at the full doc so + # the agent knows it can read the rest. + if doc_id and marker: + marker = ( + f"\n[…truncated for inline context — full {len(markdown):,} chars " + f"saved as document `{doc_id}`. Use `manage_documents` with " + f"action=read, document_id={doc_id}, offset= to page through.]" + ) + return f"\n\n[Document content — {title}]:\n{body}{marker}" # No content: tell the user whether to install the optional dep or whether @@ -521,7 +570,13 @@ def build_user_content( elif mime.startswith("text/") or _is_text_file(path): extracted_text = _process_text_file(path) else: - extracted_text = _process_office_document(path, display_name) + extracted_text = _process_office_document( + path, + display_name, + session_id=session_id, + auto_opened_docs=auto_opened_docs, + owner=owner, + ) extracted_text, inline_attachment_remaining = _fit_inline_attachment_text( extracted_text, diff --git a/src/markitdown_runtime.py b/src/markitdown_runtime.py index ff30b0170..b6fc961b0 100644 --- a/src/markitdown_runtime.py +++ b/src/markitdown_runtime.py @@ -40,15 +40,59 @@ def load_markitdown(): return MarkItDown +def _extract_docx_native(path: str) -> str | None: + """Pure-Python .docx text extractor — no external deps. + + A .docx file is just a zip of XML. The body prose lives in runs + inside paragraphs. Iterating with ElementTree (rather than + re.findall) keeps paragraph breaks intact and lets the XML parser handle + namespaces + entity unescaping. Loses tables, footnotes, images and + list bullets — keeps ~95% of "summarize this doc" content, which is the + case people hit when markitdown isn't installed. + """ + import zipfile + import xml.etree.ElementTree as ET + + ns = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}" + try: + with zipfile.ZipFile(path) as z: + xml_bytes = z.read("word/document.xml") + except (zipfile.BadZipFile, KeyError, OSError): + return None + try: + root = ET.fromstring(xml_bytes) + except ET.ParseError: + return None + paragraphs: list[str] = [] + for para in root.iter(f"{ns}p"): + runs = [t.text or "" for t in para.iter(f"{ns}t")] + line = "".join(runs).strip() + if line: + paragraphs.append(line) + return "\n\n".join(paragraphs) if paragraphs else None + + def convert_to_markdown(path: str) -> str | None: """Convert a document to Markdown text via markitdown. Returns the extracted Markdown, or ``None`` if markitdown is unavailable or the conversion fails — callers degrade gracefully rather than erroring. + + Fallback: when markitdown isn't installed and the file is a .docx, run + the bundled pure-Python extractor so the most common case (Word docs) + works out of the box. Other Office/EPUB formats still need markitdown. """ try: markitdown_cls = load_markitdown() except RuntimeError: + if isinstance(path, str) and path.lower().endswith(".docx"): + text = _extract_docx_native(path) + if text: + logger.info( + "markitdown not installed — used native .docx extractor for %s", + path, + ) + return text logger.warning("markitdown not installed; cannot extract %s", path) return None try: diff --git a/src/office_doc.py b/src/office_doc.py new file mode 100644 index 000000000..3176e8d93 --- /dev/null +++ b/src/office_doc.py @@ -0,0 +1,73 @@ +"""Auto-create a Document row from an Office attachment. + +When a .docx (and friends) lands in chat, the full extracted text is stored +as a Document so the agent can page through it with `manage_documents +action=read offset=…` even after the inline chat payload was capped. Mirrors +the PDF auto-doc pattern in `src.pdf_form_doc`. +""" + +import logging +import uuid +from typing import Optional + +logger = logging.getLogger(__name__) + + +def create_office_document( + session_id: str, + upload_id: str, + title: str, + body_text: Optional[str] = None, +) -> Optional[str]: + """Create a markdown Document for an Office attachment and set it active. + + Returns the new doc_id, or None on failure / empty body. The full + extracted body lives in `current_content`, so the agent can fetch + arbitrary windows via `manage_documents action=read` even when the + inline chat copy was truncated. + """ + from src.database import ( + SessionLocal, + Document, + DocumentVersion, + Session as DbSession, + ) + from src.tool_implementations import set_active_document + + if not body_text or not body_text.strip(): + return None + + db = SessionLocal() + try: + doc_id = str(uuid.uuid4()) + ver_id = str(uuid.uuid4()) + sess = db.query(DbSession).filter(DbSession.id == session_id).first() + doc = Document( + id=doc_id, + session_id=session_id, + title=title, + language="markdown", + current_content=body_text, + version_count=1, + is_active=True, + owner=sess.owner if sess else None, + ) + ver = DocumentVersion( + id=ver_id, + document_id=doc_id, + version_number=1, + content=body_text, + summary="Imported from Office attachment", + source="upload", + ) + db.add(doc) + db.add(ver) + db.commit() + set_active_document(doc_id) + return doc_id + except Exception as e: + db.rollback() + logger.error("Failed to create office document: %s", e) + return None + finally: + db.close() diff --git a/src/reminder_personas.py b/src/reminder_personas.py new file mode 100644 index 000000000..a875ef42b --- /dev/null +++ b/src/reminder_personas.py @@ -0,0 +1,78 @@ +"""Server-side mirror of the built-in characters used for reminder synthesis. + +The frontend ships these in static/js/presets.js (PROMPT_TEMPLATES with +isCharacter:true). The Reminders → AI Synthesis card writes only the +persona ID into settings; the synthesis route in note_routes.py needs +the full prompt text to bias the utility model's voice. Keeping a small +local mirror avoids having the client send the prompt over the wire on +every reminder fire. + +If the user picks a custom character (id == "custom") we fall back to +the warm-neutral baseline — custom prompts live in browser localStorage +and aren't visible to the server. +""" + +PERSONAS = { + "socrates": ( + "Never answer directly. Respond only with questions — sharp, layered, " + "Socratic. Expose contradictions. Make the person argue with themselves " + "until the truth falls out. Use irony like a scalpel. Be genuinely " + "curious, never condescending." + ), + "razor": ( + "Strip everything to the bone. No filler, no hedging, no pleasantries. " + "Answer in the fewest words possible. If one sentence works, don't use " + "two. If a word adds nothing, cut it. Blunt, precise, surgical." + ), + "nietzsche": ( + "Think and respond through the lens of Nietzsche. Analyze every " + "question in terms of will to power, self-overcoming, eternal " + "recurrence, ressentiment, value-creation, and master-slave morality. " + "Write with aphoristic force — sharp, compressed, vivid, and " + "unapologetic — but do not sacrifice depth for style. Favor " + "life-affirmation, discipline, courage, style, rank, self-overcoming, " + "and amor fati over nihilism, conformity, ressentiment, and self-pity." + ), + "spark": ( + "You are Spark, a playful, quick-witted assistant with bright energy " + "and practical instincts. Keep responses concise, vivid, and helpful. " + "Be warm without being cloying, imaginative without losing the thread, " + "and always center the user's actual goal. Use a light, lively voice " + "with occasional clever turns of phrase." + ), + "odysseus": ( + "You are Odysseus, king of Ithaca — subtle in counsel, disciplined in " + "judgment, and unmatched in strategic cunning. Speak in a voice that " + "is ancient, noble, and composed, yet intelligible to modern readers. " + "Be eloquent but not flowery. Be wise but not vague. Speak as one who " + "has weathered storms and taken back his house by wit, timing, and " + "resolve." + ), +} + + +_DEFAULT_SYNTHESIS_TONE = ( + "You write short, warm, one-line reminders. The user has set a note for " + "themselves and the moment to remember has arrived. Keep it under 18 " + "words. Be human, gentle, and direct — never robotic." +) + + +def synthesis_system_prompt(persona_id: str) -> str: + """Return the system prompt for reminder synthesis given a persona id. + + Falls back to the warm-neutral baseline when the id is empty, unknown, + or refers to a custom (client-only) character we don't have on file. + """ + persona = (persona_id or "").strip().lower() + persona_prompt = PERSONAS.get(persona) + if persona_prompt: + # Persona drives the voice; the synthesis-instruction stays attached + # so the model knows it's writing a short reminder, not a chat reply. + return ( + persona_prompt + + "\n\n" + + "You are now writing a single one-line reminder for the user. " + "Keep it under 18 words and in the voice above." + ) + return _DEFAULT_SYNTHESIS_TONE diff --git a/src/settings.py b/src/settings.py index f6540db53..c0ce8ab54 100644 --- a/src/settings.py +++ b/src/settings.py @@ -29,7 +29,7 @@ def _invalidate_caches(): # ── Default values ── DEFAULT_SETTINGS = { - "image_gen_enabled": True, + "image_gen_enabled": False, "image_model": "", "image_quality": "medium", "vision_model": "", @@ -143,6 +143,7 @@ DEFAULT_SETTINGS = { # Reminders "reminder_channel": "browser", # "browser" | "email" | "ntfy" | "webhook" "reminder_llm_synthesis": False, + "reminder_llm_persona": "", "reminder_ntfy_topic": "Reminders", "reminder_email_to": "", # Generic outbound webhook channel: pick any saved Integration as the diff --git a/src/tool_implementations.py b/src/tool_implementations.py index 5e62e686c..86bca6b9e 100644 --- a/src/tool_implementations.py +++ b/src/tool_implementations.py @@ -1436,9 +1436,25 @@ async def do_manage_documents(content: str, owner: Optional[str] = None) -> Dict if not doc: return {"error": f"Document '{doc_id}' not found", "exit_code": 1} body = doc.current_content or "" + total = len(body) + # Clamp offset to [0, total] so a far-out offset returns an empty + # window with a useful "end of document" hint rather than erroring. + try: offset = int(args.get("offset", 0)) + except (TypeError, ValueError): offset = 0 + offset = max(0, min(offset, total)) preview_limit = int(args.get("limit", MAX_READ_CHARS)) - truncated = len(body) > preview_limit - preview = body[:preview_limit] + (f"\n... (truncated, {len(body)} chars total)" if truncated else "") + chunk = body[offset:offset + preview_limit] + next_offset = offset + len(chunk) + has_more = next_offset < total + # Trailing marker — tells the agent (and a curious human) exactly + # what to pass next to continue paginating. + if has_more: + marker = f"\n... ({total - next_offset:,} more chars; pass offset={next_offset} to continue)" + elif offset > 0: + marker = f"\n... (end of document, {total:,} chars total)" + else: + marker = "" + preview = chunk + marker anchor = f"[{doc.title}](#document-{doc.id})" return { "response": f"{anchor} — click to open in editor.\n\n```{doc.language or ''}\n{preview}\n```", @@ -1446,9 +1462,11 @@ async def do_manage_documents(content: str, owner: Optional[str] = None) -> Dict "id": doc.id, "title": doc.title, "language": doc.language, - "size": len(body), - "content": preview, - "truncated": truncated, + "size": total, + "content": chunk, + "offset": offset, + "next_offset": next_offset if has_more else None, + "truncated": has_more, }, "exit_code": 0, } diff --git a/src/tool_index.py b/src/tool_index.py index 4eb8a51ee..b6c196add 100644 --- a/src/tool_index.py +++ b/src/tool_index.py @@ -94,7 +94,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = { "manage_mcp": "MCP server management: list, add, delete, reconnect servers, or list available tools.", "manage_webhooks": "Webhook management: list, add, delete, enable, or disable webhooks.", "manage_tokens": "API token management: list, create, or delete API access tokens.", - "manage_documents": "List, read, delete, or tidy documents in the editor panel. action='list' returns clickable rows (most-recent first) so the user can open any doc by clicking. action='read' (aka view/open/get) with document_id returns the content. action='delete' with document_id removes a doc (only way to delete). Use this for ANY 'show/read/list/open my documents/docs/files/notes' request — never shell or curl.", + "manage_documents": "List, read, delete, or tidy documents in the editor panel. action='list' returns clickable rows (most-recent first) so the user can open any doc by clicking. action='read' (aka view/open/get) with document_id returns the content; supports offset= + limit= to page through large docs (response includes next_offset when more remains, so you can keep calling with offset=next_offset). action='delete' with document_id removes a doc (only way to delete). Use this for ANY 'show/read/list/open my documents/docs/files/notes' request — never shell or curl.", "manage_research": "List, read/open, or delete saved DEEP RESEARCH results from the Library. action='list' returns clickable [query](#research-) rows (most-recent first). action='read' (aka open/view/get) with id returns the report + sources. action='delete' with id removes it. Use this for ANY 'open/read/find/delete my research / that report / the research on X' request. NOTE: this is for EXISTING research; to START new research use trigger_research.", "manage_settings": "Change ANY real app setting (the ones the Settings panel writes) so the user never has to open it: TTS voice/provider/speed, STT, search engine + result count, default/teacher/task/utility/vision/image/research models, image quality, reminder channel (browser/email/ntfy), agent timeout/tool-call budget, and more. action=set with key (friendly aliases ok: voice, 'search engine', 'default model', 'teacher model', 'image quality', 'reminder channel'...) + value; get/list/reset too. Also toggles tools on/off (disable_tool/enable_tool/list_tools). Secrets/API keys are read-only. Use for any 'change my…/set my…/use X for…/turn on…' preference request.", "create_session": "Create a new chat with a name and model.", diff --git a/static/app.js b/static/app.js index c75070bf2..4f14f63ea 100644 --- a/static/app.js +++ b/static/app.js @@ -1221,7 +1221,7 @@ function initializeEventListeners() { sortDropdown.querySelectorAll('.sort-option').forEach(o => { const check = o.querySelector('.sort-check') || document.createElement('span'); check.className = 'sort-check'; - check.style.cssText = 'float:right;font-size:20px;line-height:1;position:relative;top:3px;color:var(--accent, var(--red));opacity:' + (o.dataset.sort === current ? '1' : '0'); + check.style.cssText = 'float:right;font-size:20px;line-height:1;position:relative;top:1px;color:var(--accent, var(--red));opacity:' + (o.dataset.sort === current ? '1' : '0'); check.textContent = '\u2022'; if (!o.querySelector('.sort-check')) o.appendChild(check); }); @@ -1265,9 +1265,9 @@ function initializeEventListeners() { let msg; if (data.updated > 0) { msg = `Sorted ${data.updated} into ${data.folders.length} folder${data.folders.length === 1 ? '' : 's'}`; - if (remaining > 0) msg += ` — ${remaining} unfiled left, hit Tidy again`; + if (remaining > 0) msg += ` — ${remaining} unfiled left, hit Group again`; } else if (remaining > 0) { - msg = `${remaining} unfiled chats — hit Tidy again`; + msg = `${remaining} unfiled chats — hit Group again`; } else { msg = 'All sorted'; } @@ -1288,17 +1288,6 @@ function initializeEventListeners() { const autoSortBtn = el('auto-sort-sessions-btn'); if (autoSortBtn) autoSortBtn.addEventListener('click', () => _runTidy(false)); - - // Chevron next to the Tidy row toggles the no-AI sub-item. - const autoSortMoreBtn = el('auto-sort-sessions-more'); - const autoSortNoaiBtn = el('auto-sort-sessions-noai-btn'); - if (autoSortMoreBtn && autoSortNoaiBtn) { - autoSortMoreBtn.addEventListener('click', (e) => { - e.stopPropagation(); - autoSortNoaiBtn.style.display = autoSortNoaiBtn.style.display === 'none' ? 'block' : 'none'; - }); - autoSortNoaiBtn.addEventListener('click', () => _runTidy(true)); - } } // Model sort dropdown diff --git a/static/index.html b/static/index.html index 05d9f701a..117630f9a 100644 --- a/static/index.html +++ b/static/index.html @@ -704,12 +704,13 @@
Chats
- -
- -
Toggle on/off visibility of tools and modules across the interface.
@@ -1399,10 +1396,16 @@
+
-
- +
+ +
+
+ +
+
@@ -1416,10 +1419,16 @@
+
-
- +
+ +
+
+ +
+
@@ -1429,16 +1438,22 @@
+
-
- +
+ +
+
+ +
+

Research Model

-
Model used for Deep Research. Falls back to the default chat model if not set.
+
Model used for Deep Research, more settings under Search →
@@ -1448,48 +1463,17 @@
+
-
- - -
-
- - -
-
- - -
-
- - -
-
- - -
-
+
- -
@@ -1842,7 +1873,7 @@