docs: correct spelling in README (#2235 )

* Doc: README spelling corrections * Doc: README spelling correction for server * Doc: README spelling correction fix * Doc: README spelling correction fix --------- Co-authored-by: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com>
fix: read allow_bash/allow_web_search from JSON body (#3229 ) (#3281 )
2026-06-17 02:05:22 -04:00 · 2026-06-11 19:57:17 +01:00 · 2026-06-11 19:14:41 +01:00 · 2026-06-11 18:23:54 +01:00 · 2026-06-11 17:01:14 +00:00 · 2026-06-11 17:55:33 +01:00
90 changed files with 3044 additions and 236 deletions
@@ -218,7 +218,7 @@ docker compose exec odysseus sh -lc 'test -e /dev/kfd && test -d /dev/dri && ls
 > the CUDA Toolkit at runtime. If Cookbook logs show `Unable to find cudart
 > library`, `Could NOT find CUDAToolkit`, `CUDA Toolkit not found`, or
 > tensors/layers assigned to CPU, that is a Cookbook/llama.cpp build issue —
-> not a Docker passthrough failure. Re-install the serve engine via
+> not a Docker passthrough failure. Reinstall the serve engine via
 > **Cookbook → Dependencies** to get a CUDA-enabled build.
 >
 > The same split applies to AMD/ROCm: seeing `/dev/kfd` and `/dev/dri` inside
@@ -498,6 +498,7 @@ app.state.session_manager = session_manager
 memory_manager    = components["memory_manager"]
 memory_vector     = components.get("memory_vector")
 upload_handler    = components["upload_handler"]
+app.state.upload_handler = upload_handler
 personal_docs_mgr = components["personal_docs_manager"]
 api_key_manager   = components["api_key_manager"]
 preset_manager    = components["preset_manager"]
@@ -675,6 +676,9 @@ app.include_router(setup_shell_routes())
 from routes.cookbook_routes import setup_cookbook_routes
 app.include_router(setup_cookbook_routes())

+from routes.workspace_routes import setup_workspace_routes
+app.include_router(setup_workspace_routes())
+
 # Hardware model fitting (cookbook "What Fits?" tab)
 from routes.hwfit_routes import setup_hwfit_routes
 app.include_router(setup_hwfit_routes())
@@ -191,6 +191,8 @@ def _windows_bash_fallbacks() -> List[str]:
        base = os.environ.get(env_name)
        if base:
            roots.append(ntpath.join(base, "Git"))
+            if env_name == "LocalAppData":
+                roots.append(ntpath.join(base, "Programs", "Git"))
    roots.extend(_WINDOWS_BASH_DEFAULT_ROOTS)

    paths: List[str] = []
@@ -30,14 +30,26 @@ function Fail($msg) {
    exit 1
 }

+function Test-WindowsBashStub($path) {
+    if (-not $path) { return $false }
+    $lowered = $path.ToLowerInvariant()
+    foreach ($stub in @("system32\bash.exe", "sysnative\bash.exe", "windowsapps\bash.exe")) {
+        if ($lowered.Contains($stub)) { return $true }
+    }
+    return $false
+}
+
 function Find-GitBash {
    $cmd = Get-Command bash -ErrorAction SilentlyContinue
-    if ($cmd) { return $cmd.Source }
+    if ($cmd -and -not (Test-WindowsBashStub $cmd.Source)) { return $cmd.Source }

    $roots = @()
    foreach ($name in @("ProgramFiles", "ProgramW6432", "ProgramFiles(x86)", "LocalAppData")) {
        $base = [Environment]::GetEnvironmentVariable($name)
-        if ($base) { $roots += (Join-Path $base "Git") }
+        if ($base) {
+            $roots += (Join-Path $base "Git")
+            if ($name -eq "LocalAppData") { $roots += (Join-Path $base "Programs\Git") }
+        }
    }
    $roots += @("C:\Program Files\Git", "C:\Program Files (x86)\Git")

@@ -43,3 +43,7 @@ qrcode[pil]
 croniter
 pytest
 pytest-asyncio
+# starlette.testclient prefers httpx2 since Starlette 1.2.0 and warns on every
+# TestClient import when only classic httpx is present. Runtime code keeps
+# using `httpx` above; this is test-client only.
+httpx2
@@ -154,6 +154,7 @@ def setup_api_token_routes() -> APIRouter:
    @router.patch("/tokens/{token_id}")
    async def update_token(request: Request, token_id: str):
        require_admin(request)
+        current_user = get_current_user(request)
        try:
            payload = await request.json()
        except Exception:
@@ -162,6 +163,8 @@ def setup_api_token_routes() -> APIRouter:
            token = db.query(ApiToken).filter(ApiToken.id == token_id).first()
            if not token:
                raise HTTPException(404, "Token not found")
+            if current_user and token.owner != current_user:
+                raise HTTPException(403, "Not your token")
            if isinstance(payload.get("name"), str) and payload["name"].strip():
                token.name = payload["name"].strip()[:MAX_NAME_LEN]
            # Only touch scopes when the caller actually sent them. A partial
@@ -189,10 +192,14 @@ def setup_api_token_routes() -> APIRouter:
    @router.delete("/tokens/{token_id}")
    def delete_token(request: Request, token_id: str):
        require_admin(request)
+        current_user = get_current_user(request)
        with get_db_session() as db:
-            deleted = db.query(ApiToken).filter(ApiToken.id == token_id).delete()
-            if not deleted:
+            token = db.query(ApiToken).filter(ApiToken.id == token_id).first()
+            if not token:
                raise HTTPException(404, "Token not found")
+            if current_user and token.owner != current_user:
+                raise HTTPException(403, "Not your token")
+            db.delete(token)
        _invalidate_cache(request)
        return {"status": "deleted"}

@@ -416,6 +416,17 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
        except Exception as e:
            logger.warning("Failed to rename memory.json owner references %s -> %s: %s", old_username, new_username, e)

+        # uploads.json: upload rows use owner metadata for access checks and
+        # owner-prefixed index keys for dedupe. Rename both so attachments keep
+        # resolving after the account username changes.
+        try:
+            upload_handler = getattr(request.app.state, "upload_handler", None)
+            rename_owner = getattr(upload_handler, "rename_owner", None)
+            if callable(rename_owner):
+                rename_owner(old_username, new_username)
+        except Exception as e:
+            logger.warning("Failed to rename upload owner references %s -> %s: %s", old_username, new_username, e)
+
        # skills: SKILL.md frontmatter carries owner: <username>; the usage
        # sidecar (_usage.json) keys entries as owner::skill-name. Both must
        # be updated or the renamed user's Skills panel goes empty.
@@ -62,6 +62,33 @@ def _stream_set(session_id: str, **fields) -> None:
    rec.update(fields)


+def _resolve_request_workspace(request, raw_value) -> tuple:
+    """Resolve the posted workspace for this request: (workspace, rejected).
+
+    Privilege is checked BEFORE the path ever touches the filesystem. Only
+    admin/single-user callers can use the workspace-backed file/shell tools,
+    so only they get vet_workspace() and the workspace_rejected signal. For
+    any other caller the submitted value is dropped uniformly, with no vetting
+    and no event: otherwise the presence/absence of workspace_rejected would
+    let a non-admin chat caller probe which host paths exist.
+
+    vet_workspace rejects non-directories, sensitive roots (.ssh, .gnupg,
+    ...), and filesystem roots; on rejection there is no confinement and the
+    default tool-path allowlist applies. The rejected value is surfaced so the
+    stream can tell an admin client (which believes a workspace is active)
+    that it was dropped.
+    """
+    requested = (raw_value or "").strip()
+    if not requested:
+        return "", ""
+    from src.tool_security import owner_is_admin_or_single_user
+    if not owner_is_admin_or_single_user(get_current_user(request)):
+        return "", ""
+    from src.tool_execution import vet_workspace
+    workspace = vet_workspace(requested) or ""
+    return workspace, (requested if not workspace else "")
+
+
 def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool:
    if not session_url or not endpoint_base:
        return False
@@ -447,8 +474,11 @@ def setup_chat_routes(
        use_research = form_data.get("use_research")
        time_filter = form_data.get("time_filter")
        preset_id = form_data.get("preset_id")
-        allow_bash = form_data.get("allow_bash")
-        allow_web_search = form_data.get("allow_web_search")
+        # Issue #3229: API callers send JSON, not FormData.  Read from the
+        # JSON body as fallback so callers who send {"allow_bash": true}
+        # actually get bash enabled.
+        allow_bash = form_data.get("allow_bash") or (body or {}).get("allow_bash")
+        allow_web_search = form_data.get("allow_web_search") or (body or {}).get("allow_web_search")
        use_rag = form_data.get("use_rag")
        search_context = form_data.get("search_context")  # pre-fetched web search results (compare mode)
        compare_mode = str(form_data.get("compare_mode", "")).lower() == "true"
@@ -457,6 +487,10 @@ def setup_chat_routes(
        # manual form posts that still send plan_mode=true.
        plan_mode = False
        chat_mode = str(form_data.get("mode", "")).lower()  # 'chat' or 'agent'
+        # Workspace: confine the agent's file/shell tools to this folder.
+        workspace, workspace_rejected = _resolve_request_workspace(
+            request, form_data.get("workspace")
+        )
        # Plan mode is a modifier on agent mode — it only makes sense with tools.
        if plan_mode:
            chat_mode = "agent"
@@ -656,9 +690,13 @@ def setup_chat_routes(

        # Build disabled-tools set from frontend toggles + user privileges
        disabled_tools = set()
-        if str(allow_bash).lower() != "true":
+        # Only disable bash/web_search when the caller *explicitly* set them
+        # to a falsy value.  When unset (None), defer to per-user privilege
+        # checks below — this lets admins with can_use_bash=True use bash
+        # by default without having to send allow_bash in every request.
+        if allow_bash is not None and str(allow_bash).lower() != "true":
            disabled_tools.add("bash")
-        if str(allow_web_search).lower() != "true":
+        if allow_web_search is not None and str(allow_web_search).lower() != "true":
            disabled_tools.add("web_search")
            disabled_tools.add("web_fetch")

@@ -761,6 +799,13 @@ def setup_chat_routes(
            # Register active stream for partial-save safety net
            _active_streams[session] = {"status": "streaming", "partial": "", "query": message, "is_research": effective_do_research, "mode": _effective_mode}

+            # The client sent a workspace the server refused to bind (deleted
+            # folder, file path, sensitive dir, filesystem root). Tell it up
+            # front so the UI can clear the pill instead of displaying a
+            # confinement that is not actually in effect.
+            if workspace_rejected:
+                yield f"data: {json.dumps({'type': 'workspace_rejected', 'data': {'path': workspace_rejected}})}\n\n"
+
            if ctx.preprocessed.attachment_meta:
                yield f"data: {json.dumps({'type': 'attachments', 'data': ctx.preprocessed.attachment_meta})}\n\n"

@@ -1138,6 +1183,7 @@ def setup_chat_routes(
                        fallbacks=_fallback_candidates,
                        plan_mode=plan_mode,
                        approved_plan=approved_plan or None,
+                        workspace=workspace or None,
                    ):
                        if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
                            try:
@@ -0,0 +1,19 @@
+"""Pure helpers for shaping cookbook task output for the status response.
+
+Kept dependency-free (no FastAPI / SQLAlchemy imports) so the behavior can be
+unit-tested without standing up the whole app.
+"""
+
+
+def error_aware_output_tail(full_snapshot: str, status: str) -> str:
+    """Return the trailing slice of a task log for the status response.
+
+    Failed tasks return the last 50 lines so the "Copy last 50 lines" action
+    surfaces the actual error context (stack traces, build output). Running and
+    other non-error tasks keep the cheaper 12-line tail to limit the payload on
+    the 10s polling interval.
+    """
+    if not full_snapshot:
+        return ""
+    tail_lines = 50 if status == "error" else 12
+    return "\n".join(full_snapshot.splitlines()[-tail_lines:])
@@ -30,6 +30,7 @@ from core.platform_compat import (
    which_tool,
 )
 from routes.shell_routes import TMUX_LOG_DIR
+from routes.cookbook_output import error_aware_output_tail

 logger = logging.getLogger(__name__)

@@ -2873,6 +2874,7 @@ def setup_cookbook_routes() -> APIRouter:
            # snapshot to classify (DOWNLOAD_OK / exit marker) — evaluate it even
            # when the PID is gone instead of blindly reporting "stopped".
            download_zero_files = False
+            exit_code = None
            status = "unknown"
            download_has_ok = task_type == "download" and "DOWNLOAD_OK" in full_snapshot
            download_has_failed = task_type == "download" and "DOWNLOAD_FAILED" in full_snapshot
@@ -2946,7 +2948,7 @@ def setup_cookbook_routes() -> APIRouter:
                status = "error"
            if download_zero_files:
                diagnosis = {"message": "No matching files were downloaded. The model repo or filename/quant pattern may be wrong (for example a ':Q4_K_M' tag that does not exist in the repo). Check the repo and the include/quant pattern."}
-            output_tail = "\n".join(full_snapshot.splitlines()[-12:]) if full_snapshot else ""
+            output_tail = error_aware_output_tail(full_snapshot, status)

            results.append({
                "session_id": session_id,
@@ -2957,6 +2959,7 @@ def setup_cookbook_routes() -> APIRouter:
                "phase": serve_phase,
                "diagnosis": diagnosis,
                "output_tail": output_tail,
+                "exit_code": exit_code,
                "cmd": _payload.get("_cmd") or "",
                "tps": phase_info.get("tps"),
                "reqs": phase_info.get("reqs"),
@@ -304,6 +304,7 @@ OWNER_SCOPED_EMAIL_CACHE_TABLES = {
    "email_ai_replies",
    "email_calendar_extractions",
    "email_urgency_alerts",
+    "sender_signatures",
 }


@@ -341,6 +342,55 @@ def _ensure_owner_scoped_email_cache_table(conn, table: str, create_sql: str, co
        _lg.getLogger(__name__).warning(f"{table} owner-migration skipped: {_mig_e}")


+def _ensure_sender_signatures_table(conn):
+    """Create/migrate learned sender signatures to an owner-scoped cache."""
+    create_sql = """
+        CREATE TABLE IF NOT EXISTS sender_signatures (
+            from_address TEXT,
+            owner TEXT DEFAULT '',
+            signature_text TEXT,
+            sample_count INTEGER,
+            last_built_at TEXT NOT NULL,
+            model_used TEXT,
+            source TEXT,
+            PRIMARY KEY (from_address, owner)
+        )
+    """
+    conn.execute(create_sql)
+    try:
+        info = conn.execute("PRAGMA table_info(sender_signatures)").fetchall()
+        cols = [r[1] for r in info]
+        pk_cols = [r[1] for r in sorted((r for r in info if r[5]), key=lambda r: r[5])]
+        if "owner" in cols and pk_cols == ["from_address", "owner"]:
+            return
+
+        conn.execute("ALTER TABLE sender_signatures RENAME TO sender_signatures__old")
+        conn.execute(create_sql)
+        old_cols = [r[1] for r in conn.execute("PRAGMA table_info(sender_signatures__old)").fetchall()]
+        copy_cols = [
+            c for c in (
+                "from_address",
+                "signature_text",
+                "sample_count",
+                "last_built_at",
+                "model_used",
+                "source",
+            )
+            if c in old_cols
+        ]
+        source_owner = "COALESCE(owner, '')" if "owner" in old_cols else "''"
+        conn.execute(
+            f"INSERT OR IGNORE INTO sender_signatures "
+            f"({', '.join([*copy_cols, 'owner'])}) "
+            f"SELECT {', '.join([*copy_cols, source_owner])} "
+            f"FROM sender_signatures__old"
+        )
+        conn.execute("DROP TABLE sender_signatures__old")
+    except Exception as _mig_e:
+        import logging as _lg
+        _lg.getLogger(__name__).warning(f"sender_signatures owner-migration skipped: {_mig_e}")
+
+
 def attachment_extract_dir(folder: str, uid: str) -> Path:
    """Containment-safe extraction directory for an attachment.

@@ -559,20 +609,10 @@ def _init_scheduled_db():
            conn.execute("ALTER TABLE email_boundaries ADD COLUMN turns_json TEXT")
    except Exception:
        pass
-    # Per-sender signature cache. Populated by `learn_sender_signatures`
-    # action: the LLM extracts the common trailing block across N emails
-    # from each sender; the renderer folds it consistently for every
-    # future email from that address.
-    conn.execute("""
-        CREATE TABLE IF NOT EXISTS sender_signatures (
-            from_address TEXT PRIMARY KEY,
-            signature_text TEXT,
-            sample_count INTEGER,
-            last_built_at TEXT NOT NULL,
-            model_used TEXT,
-            source TEXT
-        )
-    """)
+    # Per-sender signature cache. Populated by `learn_sender_signatures`.
+    # Message sender addresses are global, so signatures must be scoped to the
+    # mailbox owner before `/read` returns them to the renderer.
+    _ensure_sender_signatures_table(conn)
    conn.commit()
    conn.close()

@@ -249,6 +249,41 @@ def _uid_from_fetch_meta(meta_b: bytes) -> str:
    return m.group(1).decode() if m else ""


+_FETCH_SEQ_RE = re.compile(rb"^(\d+)\s+\(")
+
+
+def _group_uid_fetch_records(msg_data) -> list:
+    """Group an imaplib UID FETCH response into per-message (meta, payload).
+
+    imaplib yields an interleaved list: ``(meta, literal)`` tuples for
+    attributes that carry a literal (``RFC822.HEADER {n}`` etc.) plus bare
+    ``bytes`` elements for everything the server sends outside a literal.
+    Where each attribute lands is server-specific: Dovecot sends FLAGS
+    *before* the header literal (so it ends up inside the tuple meta), while
+    Gmail sends FLAGS *after* it, arriving as a bare ``b' FLAGS (\\Seen))'``
+    element. Dropping bare elements therefore silently loses FLAGS on Gmail
+    and every message renders as unread/unflagged.
+
+    A tuple whose meta starts with a sequence number opens a new record;
+    every other part — continuation tuple or bare bytes — is folded into the
+    current record's meta so attribute regexes see the full meta text.
+    Plain ``b')'`` terminators get folded in too, which is harmless.
+    """
+    grouped: list = []  # list of (meta_bytes, payload_bytes_or_None)
+    for part in (msg_data or []):
+        if isinstance(part, tuple):
+            meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
+            if _FETCH_SEQ_RE.match(meta_b):
+                grouped.append((meta_b, part[1]))
+            elif grouped:
+                cur_meta, cur_payload = grouped[-1]
+                grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
+        elif isinstance(part, (bytes, bytearray)) and grouped:
+            cur_meta, cur_payload = grouped[-1]
+            grouped[-1] = (cur_meta + b" " + bytes(part), cur_payload)
+    return grouped
+
+
 def _smtp_ready(cfg: dict) -> bool:
    return bool(cfg.get("smtp_host") and cfg.get("smtp_user") and cfg.get("smtp_password"))

@@ -799,20 +834,11 @@ def setup_email_routes():
                except Exception as e:
                    logger.warning(f"Batch fetch failed, falling back to per-UID: {e}")
                    status, msg_data = "NO", []
-                # imaplib batch responses interleave (meta, payload) tuples and
-                # `b')'` terminators. Group by message: each tuple where the
-                # meta begins with a seq number starts a new message record.
-                seq_re = re.compile(rb'^(\d+)\s+\(')
-                grouped = []  # list of (meta_str, payload_bytes)
-                for part in (msg_data or []):
-                    if isinstance(part, tuple):
-                        meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
-                        if seq_re.match(meta_b):
-                            grouped.append((meta_b, part[1]))
-                        elif grouped:
-                            # continuation of previous message — concatenate meta info if any
-                            cur_meta, cur_payload = grouped[-1]
-                            grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
+                # Group the batched response into per-message (meta, payload)
+                # records. Bare bytes parts must be kept: Gmail returns FLAGS
+                # after the header literal as a bare element, and dropping it
+                # rendered every Gmail message as unread/unflagged.
+                grouped = _group_uid_fetch_records(msg_data)

                if status != "OK" and not grouped:
                    conn.logout()
@@ -1098,14 +1124,15 @@ def setup_email_routes():
                            continue
                        raw_header = None
                        flags = ""
-                        for part in msg_data:
-                            if isinstance(part, tuple):
-                                meta = part[0].decode() if isinstance(part[0], bytes) else str(part[0])
-                                if b"RFC822.HEADER" in part[0] if isinstance(part[0], bytes) else "RFC822.HEADER" in meta:
-                                    raw_header = part[1]
-                                flag_match = re.search(r'FLAGS \(([^)]*)\)', meta)
-                                if flag_match:
-                                    flags = flag_match.group(1)
+                        # Same Gmail caveat as the list route: FLAGS may
+                        # arrive after the header literal, so group bare
+                        # parts back into the message meta before scanning.
+                        for meta_b, payload in _group_uid_fetch_records(msg_data):
+                            if payload and b"RFC822.HEADER" in meta_b:
+                                raw_header = payload
+                            flag_match = re.search(rb'FLAGS \(([^)]*)\)', meta_b)
+                            if flag_match:
+                                flags = flag_match.group(1).decode(errors="replace")
                        if not raw_header:
                            continue
                        msg = email_mod.message_from_bytes(raw_header)
@@ -1247,8 +1274,9 @@ def setup_email_routes():
                try:
                    if sender_addr:
                        _rs = _c.execute(
-                            "SELECT signature_text FROM sender_signatures WHERE from_address = ?",
-                            (sender_addr.lower().strip(),),
+                            f"SELECT signature_text FROM sender_signatures "
+                            f"WHERE from_address = ? AND {owner_clause}",
+                            (sender_addr.lower().strip(), *owner_params),
                        ).fetchone()
                        if _rs and _rs[0]:
                            cached_sender_sig = _rs[0]
@@ -105,6 +105,13 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
        if memory_manager.find_duplicates(text, user_mem):
            return {"ok": True, "count": len(user_mem), "message": "Memory already exists"}

+        if memory_data.session_id:
+            try:
+                session_obj = session_manager.get_session(memory_data.session_id)
+            except KeyError:
+                raise HTTPException(404, "Session not found")
+            _assert_session_owner(session_obj, user)
+
        new_entry = memory_manager.add_entry(text, memory_data.source, memory_data.category, owner=user)
        if memory_data.session_id:
            new_entry["session_id"] = memory_data.session_id
@@ -163,8 +170,17 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM

            session_id = memory.get("session_id")
            if session_id and session_id in session_manager.sessions:
-                session = session_manager.get_session(session_id)
-                memory["session_name"] = session.name if session else f"Session {session_id[:6]}"
+                try:
+                    session = session_manager.get_session(session_id)
+                    if session:
+                        _assert_session_owner(session, user)
+                    memory["session_name"] = session.name if session else f"Session {session_id[:6]}"
+                except KeyError:
+                    memory["session_name"] = "Unknown"
+                except HTTPException as exc:
+                    if exc.status_code != 404:
+                        raise
+                    memory["session_name"] = "Unknown"
            else:
                memory["session_name"] = "Unknown"

@@ -0,0 +1,85 @@
+"""Workspace API - browse server directories to pick a tool workspace folder."""
+import os
+from fastapi import APIRouter, Request, HTTPException, Query
+
+from src.auth_helpers import get_current_user
+from src.tool_security import owner_is_admin_or_single_user
+
+# Cap entries returned per directory (mirrors filesystem_tools._CODENAV_MAX_HITS).
+# A huge directory shouldn't dump thousands of rows into the picker; the user can
+# type/paste a path to jump straight in instead.
+_MAX_BROWSE_DIRS = 500
+
+
+def setup_workspace_routes():
+    router = APIRouter(prefix="/api/workspace", tags=["workspace"])
+
+    @router.get("/browse")
+    def browse(request: Request, path: str = Query(default="")):
+        """List subdirectories of `path` (default: home) so the UI can navigate
+        the server filesystem and pick a workspace folder. Directories only.
+
+        ADMIN-ONLY: this enumerates the server filesystem, so it is gated the
+        same way the file/shell tools are (read_file/write_file/bash are in
+        NON_ADMIN_BLOCKED_TOOLS). A non-admin who can't use those tools must not
+        be able to map the host's directory tree either.
+        """
+        owner = get_current_user(request)
+        if not owner_is_admin_or_single_user(owner):
+            raise HTTPException(status_code=403, detail="Workspace browsing is admin-only")
+
+        # Resolve symlinks so the reported path is canonical and the UI navigates
+        # real directories (defends against symlink games in displayed paths).
+        target = os.path.realpath(os.path.expanduser(path.strip() or "~"))
+        if not os.path.isdir(target):
+            target = os.path.realpath(os.path.expanduser("~"))
+
+        dirs = []
+        try:
+            with os.scandir(target) as it:
+                for entry in it:
+                    try:
+                        # Don't follow symlinks when classifying - a symlinked
+                        # dir is skipped rather than letting the browser wander
+                        # off via a link. Hidden entries are omitted.
+                        if entry.is_dir(follow_symlinks=False) and not entry.name.startswith("."):
+                            # Build the child path server-side with os.path.join
+                            # so it's correct on Windows (backslashes) and Linux.
+                            dirs.append({"name": entry.name, "path": os.path.join(target, entry.name)})
+                    except OSError:
+                        continue
+        except (PermissionError, OSError):
+            dirs = []
+
+        dirs_sorted = sorted(dirs, key=lambda d: d["name"].lower())
+        truncated = len(dirs_sorted) > _MAX_BROWSE_DIRS
+        parent = os.path.dirname(target)
+        from src.tool_execution import vet_workspace
+        return {
+            "path": target,
+            "parent": parent if parent and parent != target else None,
+            "dirs": dirs_sorted[:_MAX_BROWSE_DIRS],
+            "truncated": truncated,
+            # Whether this directory may be bound as a workspace (filesystem
+            # roots and sensitive dirs may be browsed through but not chosen).
+            "selectable": vet_workspace(target) is not None,
+        }
+
+    @router.get("/vet")
+    def vet(request: Request, path: str = Query(default="")):
+        """Validate a workspace path without binding it.
+
+        The UI calls this before persisting a manually typed path (/workspace
+        set) so a typo, file path, deleted folder, sensitive dir, or filesystem
+        root is rejected up front with the canonical path returned on success,
+        instead of being stored client-side and silently dropped at chat time.
+        Admin-gated like /browse: it confirms path existence on the host.
+        """
+        owner = get_current_user(request)
+        if not owner_is_admin_or_single_user(owner):
+            raise HTTPException(status_code=403, detail="Workspace selection is admin-only")
+        from src.tool_execution import vet_workspace
+        resolved = vet_workspace(path)
+        return {"ok": resolved is not None, "path": resolved}
+
+    return router
@@ -299,6 +299,40 @@ def fetch_webpage_content(url: str, timeout: int = 5, retry_attempt: int = 0) ->
        _cache_result(cache_file, cache_key, result, url)
        return result

+    # Plain-text / Markdown / JSON handling. Sources like
+    # raw.githubusercontent.com serve Markdown as `text/plain`, JSON APIs and
+    # raw config files serve `application/json`, and a lot of code and tool
+    # docs live in `.md` / `.txt`. These have no HTML structure, so the HTML
+    # branch below would extract nothing and report "no readable text content".
+    # Return the body verbatim instead. The `is_html` guard keeps real HTML
+    # (including `application/xhtml+xml`) on the parsing path; the `json` check
+    # covers `application/json` and `+json` suffixes; the URL-suffix fallback
+    # catches servers that mislabel text files as `application/octet-stream`.
+    is_html = "html" in content_type
+    is_json = "json" in content_type
+    url_path = url.lower().split("?", 1)[0].split("#", 1)[0]
+    looks_like_text_file = url_path.endswith(
+        (".md", ".markdown", ".txt", ".text", ".json", ".jsonl")
+    )
+    if not is_html and (content_type.startswith("text/") or is_json or looks_like_text_file):
+        text_body = (response.text or "").strip()
+        result = {
+            "url": url,
+            "title": os.path.basename(url_path) or url,
+            "content": text_body,
+            "lists": [],
+            "tables": [],
+            "code_blocks": [],
+            "meta_description": "",
+            "meta_keywords": "",
+            "js_rendered": False,
+            "js_message": "",
+            "success": bool(text_body),
+            "error": "" if text_body else "Empty response body",
+        }
+        _cache_result(cache_file, cache_key, result, url)
+        return result
+
    # HTML handling
    try:
        soup = BeautifulSoup(response.text, "html.parser")
@@ -21,7 +21,7 @@ from src.settings import get_setting
 from src.prompt_security import untrusted_context_message
 from src.tool_security import blocked_tools_for_owner, plan_mode_disabled_tools
 from src.tool_policy import GUIDE_ONLY_DIRECTIVE, ToolPolicy
-from src.tool_utils import get_mcp_manager
+from src.tool_utils import _truncate, get_mcp_manager
 from src.agent_tools import (
    parse_tool_blocks,
    strip_tool_blocks,
@@ -272,7 +272,7 @@ _DOMAIN_TOOL_MAP = {
    "notes_calendar_tasks": {"manage_notes", "manage_calendar", "manage_tasks"},
    "ui": {"ui_control"},
    "sessions": {"create_session", "list_sessions", "manage_session", "send_to_session", "search_chats"},
-    "files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls"},
+    "files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls", "get_workspace"},
    "settings": {"manage_settings", "manage_endpoints", "manage_mcp", "manage_webhooks", "manage_tokens", "app_api"},
 }

@@ -309,6 +309,7 @@ NEVER pipe multi-line Python through `python -c "..."` — shell quoting eats re
 <python code>
 ```
 Execute Python code. Use for computation, data processing, scripting. NOT for writing code for the user (use create_document for that). Same sandbox limits as bash — no TTY, no GUI, no `input()`; for anything the user should interact with, generate a single HTML file with inline JS instead.
+Prefer a dedicated tool whenever one fits the job (reading, searching, or writing files); use python only for computation/processing no dedicated tool covers - not for reading or writing files.
 Do NOT use Python/requests for web lookup/search/latest/current requests when `web_search` or `web_fetch` is available.""",

    "web_search": """\
@@ -347,6 +348,11 @@ Write content to a file. First line is the path, rest is the content.""",
 ```
 Edit an EXISTING file by exact string replacement. PREFER this over bash (sed/echo/redirects) for changing files — it shows a before/after diff. `old_string` must match the file exactly and be unique unless `replace_all` is true. Use write_file to create a new file.""",

+    "get_workspace": """\
+```get_workspace
+```
+Return the absolute path of the active workspace folder. File tools are CONFINED to it (paths can be RELATIVE to it); the shell starts there (cwd) but is NOT sandboxed. Call this first when the user says "the project"/"the code"/"this folder" without a path, instead of asking them. No arguments.""",
+
    "create_document": """\
 ```create_document
 <title>
@@ -1726,6 +1732,7 @@ async def stream_agent_loop(
    plan_mode: bool = False,
    approved_plan: Optional[str] = None,
    tool_policy: Optional[ToolPolicy] = None,
+    workspace: Optional[str] = None,
    _is_teacher_run: bool = False,
 ) -> AsyncGenerator[str, None]:
    """Streaming agent loop generator.
@@ -1795,7 +1802,17 @@ async def stream_agent_loop(
    if not guide_only and not _relevant_tools and bool(_intent.get("low_signal")):
        from src.tool_index import ALWAYS_AVAILABLE
        _relevant_tools = set(ALWAYS_AVAILABLE)
-        logger.info("[tool-rag] Low-signal agent message; skipping retrieval and using always-available tools only")
+        if workspace:
+            # An active workspace IS the file-work signal: a vague "look at the
+            # project" means explore this folder. Surface only the READ-ONLY file
+            # tools (intersection with the plan-mode read-only allowlist) so the
+            # agent can investigate; write/shell tools stay out until the request
+            # actually calls for them (RAG retrieval adds those on a real ask).
+            from src.tool_security import PLAN_MODE_READONLY_TOOLS
+            _relevant_tools |= (_DOMAIN_TOOL_MAP["files"] & PLAN_MODE_READONLY_TOOLS)
+            logger.info("[tool-rag] Low-signal but workspace active; including read-only file tools")
+        else:
+            logger.info("[tool-rag] Low-signal agent message; skipping retrieval and using always-available tools only")
    if not guide_only and not _relevant_tools:
        try:
            from src.tool_index import get_tool_index, ALWAYS_AVAILABLE
@@ -2644,6 +2661,7 @@ async def stream_agent_loop(
                            tool_policy=tool_policy,
                            owner=owner,
                            progress_cb=_push_progress,
+                            workspace=workspace,
                        )
                    finally:
                        # Sentinel so the drainer knows to stop.
@@ -2751,18 +2769,20 @@ async def stream_agent_loop(
                # On a bash/python timeout the result carries error + (often
                # empty) stdout/stderr; fall back to the error so the "timed
                # out" reason reaches the UI instead of a blank result.
-                output_text = (result["stdout"] or result["stderr"] or result.get("error", ""))[:2000]
+                raw = result["stdout"] or result["stderr"] or result.get("error", "")
+                output_text = _truncate(raw)
            elif "output" in result:
                # bash / python canonical result: {"output": ..., "exit_code": ...}
-                output_text = (result["output"] or "")[:2000]
+                raw = result["output"] or ""
+                output_text = _truncate(raw)
            elif "response" in result:
                # AI interaction tools (chat_with_model, send_to_session)
                label = result.get("model", result.get("session_name", "AI"))
-                output_text = f"{label}: {result['response']}"[:4000]
+                output_text = _truncate(f"{label}: {result['response']}")
            elif "content" in result:
-                output_text = result["content"][:2000]
+                output_text = _truncate(result["content"])
            elif "results" in result:
-                output_text = result["results"][:4000]
+                output_text = _truncate(result["results"])
            elif "session_id" in result and "name" in result:
                output_text = f"Session created: {result['name']} (id: {result['session_id']})"
            elif "success" in result:
@@ -2772,7 +2792,7 @@ async def stream_agent_loop(
                    else f"Error: {result.get('error', '')}"
                )
            elif "error" in result:
-                output_text = result["error"][:2000]
+                output_text = _truncate(result["error"])

            # Emit tool_output (include ui_event data if present)
            tool_output_data = {"type": "tool_output", "tool": block.tool_type, "command": cmd_display, "output": output_text, "exit_code": result.get("exit_code")}
@@ -20,7 +20,7 @@ logger = logging.getLogger(__name__)

 from .subprocess_tools import BashTool, PythonTool
 from .web_tools import WebSearchTool, WebFetchTool
-from .filesystem_tools import ReadFileTool, WriteFileTool, EditFileTool, LsTool, GlobTool, GrepTool
+from .filesystem_tools import ReadFileTool, WriteFileTool, EditFileTool, LsTool, GlobTool, GrepTool, GetWorkspaceTool
 from .document_tools import CreateDocumentTool, UpdateDocumentTool, EditDocumentTool, SuggestDocumentTool, ManageDocumentTool

 TOOL_HANDLERS = {
@@ -39,6 +39,7 @@ TOOL_HANDLERS = {
    "edit_document": EditDocumentTool().execute,
    "suggest_document": SuggestDocumentTool().execute,
    "manage_documents": ManageDocumentTool().execute,
+    "get_workspace": GetWorkspaceTool().execute,
 }

 # ---------------------------------------------------------------------------
@@ -51,7 +52,7 @@ PYTHON_TIMEOUT = 30

 # Tool types that trigger execution
 TOOL_TAGS = {"bash", "python", "web_search", "web_fetch", "read_file", "write_file", "edit_file",
-             "grep", "glob", "ls",
+             "grep", "glob", "ls", "get_workspace",
             "create_document", "update_document", "edit_document",
             "search_chats",
             "chat_with_model", "create_session", "list_sessions",
@@ -46,13 +46,7 @@ def _unified_diff(old: str, new: str, path: str) -> Optional[Dict[str, Any]]:

 class EditFileTool:
    async def execute(self, content: str, ctx: dict) -> dict:
-        from src.tool_execution import (
-                    _resolve_tool_path,
-                    _resolve_tool_path_in_workspace,
-                    _resolve_search_root,
-                    _truncate
-                )
-        workspace = ctx.get("workspace")
+        from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
        try:
            args = json.loads(content) if content.strip().startswith("{") else {}
        except (json.JSONDecodeError, TypeError):
@@ -64,8 +58,7 @@ class EditFileTool:
        if not raw_path:
            return {"error": "edit_file: path required", "exit_code": 1}
        try:
-            path = (_resolve_tool_path_in_workspace(workspace, raw_path)
-                    if workspace else _resolve_tool_path(raw_path))
+            path = _resolve_tool_path(raw_path)
        except ValueError as e:
            return {"error": f"edit_file: {e}", "exit_code": 1}
        if old == "":
@@ -113,13 +106,7 @@ class EditFileTool:

 class ReadFileTool:
    async def execute(self, content: str, ctx: dict) -> dict:
-        from src.tool_execution import (
-                    _resolve_tool_path,
-                    _resolve_tool_path_in_workspace,
-                    _resolve_search_root,
-                    _truncate
-                )
-        workspace = ctx.get("workspace")
+        from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
        raw_path, offset, limit = content.split("\n", 1)[0].strip(), 0, 0
        _stripped = content.strip()
        if _stripped.startswith("{"):
@@ -131,8 +118,7 @@ class ReadFileTool:
            except (json.JSONDecodeError, TypeError, ValueError):
                pass
        try:
-            path = (_resolve_tool_path_in_workspace(workspace, raw_path)
-                    if workspace else _resolve_tool_path(raw_path))
+            path = _resolve_tool_path(raw_path)
        except ValueError as e:
            return {"error": f"read_file: {e}", "exit_code": 1}
        try:
@@ -170,19 +156,12 @@ class ReadFileTool:

 class WriteFileTool:
    async def execute(self, content: str, ctx: dict) -> dict:
-        from src.tool_execution import (
-                    _resolve_tool_path,
-                    _resolve_tool_path_in_workspace,
-                    _resolve_search_root,
-                    _truncate
-                )
-        workspace = ctx.get("workspace")
+        from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
        lines = content.split("\n", 1)
        raw_path = lines[0].strip()
        body = lines[1] if len(lines) > 1 else ""
        try:
-            path = (_resolve_tool_path_in_workspace(workspace, raw_path)
-                    if workspace else _resolve_tool_path(raw_path))
+            path = _resolve_tool_path(raw_path)
        except ValueError as e:
            return {"error": f"write_file: {e}", "exit_code": 1}
        try:
@@ -212,13 +191,7 @@ class WriteFileTool:

 class LsTool:
    async def execute(self, content: str, ctx: dict) -> dict:
-        from src.tool_execution import (
-                    _resolve_tool_path,
-                    _resolve_tool_path_in_workspace,
-                    _resolve_search_root,
-                    _truncate
-                )
-        workspace = ctx.get("workspace")
+        from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
        raw_path = ""
        _s = (content or "").strip()
        if _s.startswith("{"):
@@ -267,13 +240,7 @@ class LsTool:

 class GlobTool:
    async def execute(self, content: str, ctx: dict) -> dict:
-        from src.tool_execution import (
-                    _resolve_tool_path,
-                    _resolve_tool_path_in_workspace,
-                    _resolve_search_root,
-                    _truncate
-                )
-        workspace = ctx.get("workspace")
+        from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
        args = {}
        _s = (content or "").strip()
        if _s.startswith("{"):
@@ -325,13 +292,7 @@ class GlobTool:

 class GrepTool:
    async def execute(self, content: str, ctx: dict) -> dict:
-        from src.tool_execution import (
-                    _resolve_tool_path,
-                    _resolve_tool_path_in_workspace,
-                    _resolve_search_root,
-                    _truncate
-                )
-        workspace = ctx.get("workspace")
+        from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
        args: Dict[str, Any] = {}
        _s = (content or "").strip()
        if _s.startswith("{"):
@@ -417,3 +378,21 @@ class GrepTool:
        if len(lines) >= max_hits:
            out += f"\n... [capped at {max_hits} matches]"
        return {"output": _truncate(out), "exit_code": 0}
+
+class GetWorkspaceTool:
+    """Report the active workspace folder (no args). File tools are confined to
+    it; the shell starts there (cwd) but is NOT sandboxed."""
+    async def execute(self, content: str, ctx: dict) -> dict:
+        from src.tool_execution import get_active_workspace
+        ws = get_active_workspace()
+        if ws:
+            return {
+                "output": f"{ws}\n(File tools are confined to this folder; the shell starts "
+                          f"here but is not sandboxed and can reach outside it.)",
+                "exit_code": 0,
+            }
+        return {
+            "output": "No workspace is set. File tools use the default allowed roots; "
+                      "resolve paths from the user or use absolute paths.",
+            "exit_code": 0,
+        }
@@ -102,16 +102,15 @@ async def _run_subprocess_streaming(

 class BashTool:
    async def execute(self, content: str, ctx: dict) -> dict:
-        from src.tool_execution import _AGENT_WORKDIR, _truncate
+        from src.tool_execution import agent_cwd, _truncate
        progress_cb = ctx.get("progress_cb")
-        workspace = ctx.get("workspace")
        _subproc_env = ctx.get("subproc_env")
        proc = await asyncio.create_subprocess_shell(
            content,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
            env=_subproc_env,
-            cwd=workspace or _AGENT_WORKDIR,
+            cwd=agent_cwd(),
        )
        stdout, stderr, rc, timed_out = await _run_subprocess_streaming(
            proc,
@@ -129,16 +128,15 @@ class BashTool:

 class PythonTool:
    async def execute(self, content: str, ctx: dict) -> dict:
-        from src.tool_execution import _AGENT_WORKDIR, _truncate
+        from src.tool_execution import agent_cwd, _truncate
        progress_cb = ctx.get("progress_cb")
-        workspace = ctx.get("workspace")
        _subproc_env = ctx.get("subproc_env")
        proc = await asyncio.create_subprocess_exec(
            (sys.executable or "python"), "-I", "-c", content,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
            env=_subproc_env,
-            cwd=workspace or _AGENT_WORKDIR,
+            cwd=agent_cwd(),
        )
        stdout, stderr, rc, timed_out = await _run_subprocess_streaming(
            proc,
@@ -57,7 +57,12 @@ class APIKeyManager:
            # Legacy/wrong shape (e.g. a list) — .items() would raise. Ignore it.
            logger.warning("API keys file has unexpected shape (%s); ignoring", type(encrypted_keys).__name__)
            return {}
-        return encrypted_keys
+
+        return {
+            str(provider): key
+            for provider, key in encrypted_keys.items()
+            if isinstance(key, str)
+        }

    def save(self, provider: str, api_key: str):
        """Save encrypted API key to file.
@@ -82,4 +87,3 @@ class APIKeyManager:
            except (InvalidToken, ValueError) as e:
                logger.warning("Failed to decrypt API key for %s: %s", provider, e)
        return decrypted
-
@@ -809,14 +809,14 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
        import email as _email_mod
        import asyncio as _aio
        from datetime import datetime as _dt, timedelta as _td
-        from routes.email_helpers import _imap_connect, SCHEDULED_DB
+        from routes.email_helpers import _email_cache_owner_clause, _imap_connect, SCHEDULED_DB
        from src.endpoint_resolver import resolve_endpoint
        from src.llm_core import llm_call_async

        # 1. Pull recent UIDs + From headers cheaply (header-only fetch).
        def _pull_headers():
            results = []
-            conn = _imap_connect(None)
+            conn = _imap_connect(None, owner=owner)
            try:
                conn.select("INBOX", readonly=True)
                status, data = conn.search(None, "ALL")
@@ -868,9 +868,11 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
        # 3. Eligibility: ≥3 emails AND (no cache OR cache > 30 days old).
        try:
            conn = _sql3.connect(SCHEDULED_DB)
+            owner_clause, owner_params = _email_cache_owner_clause(owner)
            cached = {
                r[0]: r[1] for r in conn.execute(
-                    "SELECT from_address, last_built_at FROM sender_signatures"
+                    f"SELECT from_address, last_built_at FROM sender_signatures WHERE {owner_clause}",
+                    owner_params,
                ).fetchall()
            }
            conn.close()
@@ -901,7 +903,7 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo

            def _fetch_bodies(_msgs):
                bodies = []
-                conn2 = _imap_connect(None)
+                conn2 = _imap_connect(None, owner=owner)
                try:
                    conn2.select("INBOX", readonly=True)
                    for mm in _msgs:
@@ -978,11 +980,12 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo

            try:
                conn = _sql3.connect(SCHEDULED_DB)
+                owner_value = (owner or "").strip()
                conn.execute(
                    "INSERT OR REPLACE INTO sender_signatures "
-                    "(from_address, signature_text, sample_count, last_built_at, model_used, source) "
-                    "VALUES (?, ?, ?, ?, ?, ?)",
-                    (addr, cached_sig, len(bodies), _dt.utcnow().isoformat(), model, "llm"),
+                    "(from_address, owner, signature_text, sample_count, last_built_at, model_used, source) "
+                    "VALUES (?, ?, ?, ?, ?, ?, ?)",
+                    (addr, owner_value, cached_sig, len(bodies), _dt.utcnow().isoformat(), model, "llm"),
                )
                conn.commit()
                conn.close()
@@ -457,15 +457,25 @@ def _detect_provider(url: str) -> str:

 def _is_self_hosted_openai_compatible(url: str) -> bool:
    """True for custom/local OpenAI-compatible servers (llama.cpp, LM Studio,
-    vLLM, text-generation-webui, etc.) as opposed to api.openai.com itself.
+    vLLM, text-generation-webui, etc.) as opposed to cloud APIs.

    Used to gate llama.cpp-server-specific payload extras (``session_id``,
-    ``cache_prompt``) — sending unrecognized top-level fields to OpenAI's
-    actual API returns a 400 ("Unrecognized request argument"), but
-    self-hosted servers generally ignore unknown fields and many (notably
-    llama.cpp's server) use them for KV-cache slot affinity (issue #2927).
+    ``cache_prompt``) used for KV-cache slot affinity (issue #2927). Strict
+    cloud providers reject unrecognized top-level fields (api.openai.com
+    returns 400, Mistral returns 422 "extra_forbidden", issue #3793), and any
+    unknown OpenAI-compatible host used to be treated as self-hosted, so those
+    fields leaked to every strict provider added as a custom endpoint.
+
+    A server only counts as self-hosted when it also resolves as local:
+    loopback/private/tailscale host, or the endpoint explicitly configured
+    with kind "local". A self-hosted server exposed via a public hostname
+    loses the affinity hint unless its endpoint kind is set to "local" -
+    a lost perf hint, versus a hard 4xx on every request the other way.
    """
-    return _detect_provider(url) == "openai" and not _host_match(url, "openai.com")
+    if _detect_provider(url) != "openai" or _host_match(url, "openai.com"):
+        return False
+    from src.model_context import is_local_endpoint
+    return is_local_endpoint(url)


 def _apply_local_cache_affinity(payload: Dict, url: str, session_id: Optional[str]) -> None:
@@ -681,6 +691,27 @@ def _restricts_temperature(model: str) -> bool:
    m = model.lower()
    return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS)

+# Anthropic removed the sampling parameters (temperature, top_p, top_k) starting
+# with Claude Opus 4.7. On Opus 4.7 and later, sending `temperature` at all —
+# even 0.0 — returns HTTP 400. Earlier Claude models (Opus 4.6 and below, every
+# Sonnet/Haiku) still accept temperature in [0.0, 1.0], so the omission must be
+# version-gated rather than applied to all `claude-*` models.
+def _anthropic_rejects_temperature(model: str) -> bool:
+    """Check if a native-Anthropic model rejects the temperature field (Opus 4.7+)."""
+    if not isinstance(model, str) or not model:
+        return False
+    # `(?<![a-z])` anchors "opus" to a word boundary so a substring match like
+    # `oct-opus`/`octopus-4-8` can't be read as Opus (it would otherwise strip
+    # temperature). Cap the minor at 1-2 digits and forbid a trailing digit so a
+    # dated id like `claude-opus-4-20250514` (Opus 4.0) parses as major-only (no
+    # minor match, kept) instead of reading the date `20250514` as a giant minor
+    # that would falsely test >= 4.7. Dated 4.7+ snapshots (`claude-opus-4-7-
+    # 20260201`) keep their explicit minor and are still matched.
+    match = re.search(r"(?<![a-z])opus[-_]?(\d+)[-_.](\d{1,2})(?!\d)", model.lower())
+    if not match:
+        return False
+    return (int(match.group(1)), int(match.group(2))) >= (4, 7)
+
 # Models that support structured thinking — may output </think> without opening tag
 _THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma")

@@ -784,8 +815,11 @@ def _build_anthropic_payload(model, messages, temperature, max_tokens, stream=Fa
        "model": model,
        "messages": chat_messages,
        "max_tokens": max_tokens if max_tokens and max_tokens > 0 else 4096,
-        "temperature": temperature,
    }
+    # Opus 4.7+ removed the sampling parameters — sending `temperature` (even 0.0)
+    # returns HTTP 400. Omit it for those models; older Claude models still take it.
+    if not _anthropic_rejects_temperature(model):
+        payload["temperature"] = temperature
    if system_parts:
        system_text = "\n\n".join(system_parts)
        # Send `system` as a structured text block so we can attach a prompt-cache
@@ -5,6 +5,7 @@ Query and cache model context window sizes from OpenAI-compatible APIs.
 Provides token estimation for context usage tracking.
 """

+import ipaddress
 import logging
 import sys
 from typing import Dict, List, Optional, Tuple
@@ -19,7 +20,20 @@ _LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1", "host.docker.interna
 _PRIVATE_PREFIXES = ("10.", "172.16.", "172.17.", "172.18.", "172.19.",
                     "172.20.", "172.21.", "172.22.", "172.23.", "172.24.",
                     "172.25.", "172.26.", "172.27.", "172.28.", "172.29.",
-                     "172.30.", "172.31.", "192.168.", "100.")
+                     "172.30.", "172.31.", "192.168.")
+
+# Tailscale uses the CGNAT range 100.64.0.0/10, NOT all of 100.0.0.0/8.
+# A bare "100." prefix would classify public addresses (e.g. AWS ranges
+# under 100.x outside the CGNAT block) as local; routes/model_routes.py
+# already narrows this the same way for endpoint classification.
+_TAILSCALE_CGNAT = ipaddress.ip_network("100.64.0.0/10")
+
+
+def _in_tailscale_range(host: str) -> bool:
+    try:
+        return ipaddress.ip_address(host) in _TAILSCALE_CGNAT
+    except ValueError:
+        return False


 def _normalize_base_for_compare(url: str) -> str:
@@ -64,7 +78,7 @@ def _configured_endpoint_kind(url: str) -> Optional[str]:
        return None


-def _is_local_endpoint(url: str) -> bool:
+def is_local_endpoint(url: str) -> bool:
    """Check if URL points to a local/private/tailscale address."""
    kind = _configured_endpoint_kind(url)
    if kind in ("api", "proxy"):
@@ -73,7 +87,7 @@ def _is_local_endpoint(url: str) -> bool:
        return True
    try:
        host = urlparse(url).hostname or ""
-        return host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES)
+        return host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES) or _in_tailscale_range(host)
    except Exception:
        return False

@@ -219,7 +233,7 @@ def get_context_length(endpoint_url: str, model: str) -> int:
    Falls back to DEFAULT_CONTEXT if unavailable.
    """
    configured_kind = _configured_endpoint_kind(endpoint_url)
-    is_local = _is_local_endpoint(endpoint_url)
+    is_local = is_local_endpoint(endpoint_url)
    # Key on (endpoint_url, model): the same model id can be served by two
    # different remote endpoints with different real context windows (e.g. a
    # capped proxy vs. the full provider), so caching by model id alone would
@@ -273,7 +287,7 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
        return DEFAULT_CONTEXT

    # Try llama.cpp /slots endpoint first — reports actual serving context
-    if _is_local_endpoint(endpoint_url):
+    if is_local_endpoint(endpoint_url):
        try:
            base = endpoint_url.split("/v1")[0] if "/v1" in endpoint_url else endpoint_url.rsplit("/", 1)[0]
            r = httpx.get(f"{base}/slots", timeout=REQUEST_TIMEOUT)
@@ -337,7 +351,7 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
    # For local/self-hosted endpoints, trust the API value (user set --max-model-len)
    # For cloud APIs, use the larger value (API can report low defaults)
    if api_ctx and known:
-        _is_local = _is_local_endpoint(endpoint_url)
+        _is_local = is_local_endpoint(endpoint_url)
        if _is_local and api_ctx < known:
            logger.info(f"Local endpoint reports {api_ctx} for {model} (known max: {known}) — using API value")
            return api_ctx
@@ -214,6 +214,24 @@ def _search_like(
    return _rows_to_results(db, shaped, query, context_messages)


+def _fetch_messages_by_id(db, message_ids):
+    """Fetch (message, session_name) for many message ids in a single query.
+
+    The FTS search returns a list of hit ids; fetching each row on its own was an
+    N+1 query (one SELECT per hit). Batch them with one IN(...) query and return
+    a lookup so the caller can reassemble results in hit (relevance) order.
+    """
+    if not message_ids:
+        return {}
+    rows = (
+        db.query(DBChatMessage, DBSession.name)
+        .join(DBSession, DBChatMessage.session_id == DBSession.id)
+        .filter(DBChatMessage.id.in_(message_ids))
+        .all()
+    )
+    return {msg.id: (msg, session_name) for msg, session_name in rows}
+
+
 def _search_fts(
    db,
    query: str,
@@ -267,19 +285,13 @@ def _search_fts(
    if not hits:
        return None

+    by_id = _fetch_messages_by_id(db, [hit[0] for hit in hits])
    rows = []
    for hit in hits:
-        message_id = hit[0]
-        snippet = hit[1] or ""
-        row = (
-            db.query(DBChatMessage, DBSession.name)
-            .join(DBSession, DBChatMessage.session_id == DBSession.id)
-            .filter(DBChatMessage.id == message_id)
-            .first()
-        )
-        if row:
-            msg, session_name = row
-            rows.append((msg, session_name, snippet))
+        found = by_id.get(hit[0])
+        if found:
+            msg, session_name = found
+            rows.append((msg, session_name, hit[1] or ""))
    return _rows_to_results(db, rows, query, context_messages)


@@ -9,6 +9,7 @@ Extracted from agent_tools.py.

 import asyncio
 import collections
+import contextvars
 import json
 import logging
 import os
@@ -146,7 +147,13 @@ def _resolve_tool_path(raw_path: str) -> str:

    Returns the realpath on success. Raises ValueError on rejection.
    Symlinks are resolved before comparison.
+
+    When a workspace is active for this turn, paths are confined to it instead
+    of the default allowlist (see _resolve_tool_path_in_workspace).
    """
+    ws = get_active_workspace()
+    if ws:
+        return _resolve_tool_path_in_workspace(ws, raw_path)
    if raw_path is None or not str(raw_path).strip():
        raise ValueError("path is required")
    expanded = os.path.expanduser(str(raw_path).strip())
@@ -207,6 +214,55 @@ def _resolve_tool_path_in_workspace(workspace: str, raw_path: str) -> str:



+# ---------------------------------------------------------------------------
+# Active workspace (per-turn, context-local)
+# ---------------------------------------------------------------------------
+# Set ONCE in execute_tool_block from the request's `workspace`. The path
+# resolvers (_resolve_tool_path / _resolve_search_root) and the subprocess cwd
+# helper (agent_cwd) read it from here, so confinement is enforced in a single
+# place: any tool that resolves paths through these helpers is confined
+# automatically and cannot accidentally bypass the workspace. contextvars are
+# task-local, so concurrent turns don't leak into each other.
+_active_workspace: contextvars.ContextVar = contextvars.ContextVar(
+    "agent_active_workspace", default=None
+)
+
+
+def get_active_workspace() -> Optional[str]:
+    """The folder the agent is confined to this turn, or None."""
+    return _active_workspace.get()
+
+
+def vet_workspace(raw: str) -> Optional[str]:
+    """Validate a requested workspace path at bind time.
+
+    Returns the canonical path, or None when it is unusable: not a real
+    directory, or itself a sensitive path (.ssh, .gnupg, ...). The in-workspace
+    resolver deny-lists sensitive paths *inside* the workspace, but the
+    empty-path search root is the workspace itself, so the root has to be
+    vetted before it is ever bound.
+    """
+    raw = (raw or "").strip()
+    if not raw:
+        return None
+    resolved = os.path.realpath(os.path.expanduser(raw))
+    if not os.path.isdir(resolved) or _is_sensitive_path(resolved):
+        return None
+    # Reject filesystem roots: binding / (or a Windows drive/UNC root) as the
+    # workspace would make every absolute path "inside" it, collapsing the
+    # confinement into host-wide file access. A root is its own dirname, which
+    # also covers C:\ and \\server\share without platform-specific lists.
+    if os.path.dirname(resolved) == resolved:
+        return None
+    return resolved
+
+
+def agent_cwd() -> str:
+    """Working directory for agent subprocesses (bash/python/background jobs):
+    the active workspace when set, else the persistent data dir."""
+    return get_active_workspace() or _AGENT_WORKDIR
+
+
 def get_mcp_manager():
    from src import agent_tools
    return agent_tools.get_mcp_manager()
@@ -217,10 +273,15 @@ def get_mcp_manager():
 def _resolve_search_root(raw_path: str) -> str:
    """Resolve + confine a code-nav path (grep/glob/ls).

-    An empty path defaults to the agent's primary root (project data dir) and a
-    supplied path is confined by the global allowlist + sensitive-file policy.
+    With a workspace active, the workspace folder is the root and a supplied
+    path is confined inside it. Otherwise an empty path defaults to the agent's
+    primary root (project data dir) and a supplied path is confined by the
+    global allowlist + sensitive-file policy.
    """
    raw = (raw_path or "").strip()
+    ws = get_active_workspace()
+    if ws:
+        return os.path.realpath(ws) if not raw else _resolve_tool_path_in_workspace(ws, raw)
    if not raw:
        roots = _tool_path_roots()
        return roots[0] if roots else os.path.realpath(".")
@@ -392,7 +453,6 @@ async def _direct_fallback(
    tool: str,
    content: str,
    progress_cb: Optional[Callable[[Dict], Awaitable[None]]] = None,
-    workspace: Optional[str] = None,
 ) -> Optional[Dict]:
    _subproc_env = {
        **os.environ,
@@ -405,7 +465,6 @@ async def _direct_fallback(
    try:
        ctx = {
            "progress_cb": progress_cb,
-            "workspace": workspace,
            "subproc_env": _subproc_env,
        }

@@ -448,6 +507,34 @@ async def execute_tool_block(
 ) -> Tuple[str, Dict]:
    """Execute a single tool block. Returns (description, result_dict).

+    Thin wrapper: bind the per-turn workspace (so the path resolvers + subprocess
+    cwd confine to it) for the duration of this call, then delegate. Reset on the
+    way out so the binding never leaks to the next tool call.
+    """
+    token = _active_workspace.set(workspace or None)
+    try:
+        return await _execute_tool_block_impl(
+            block,
+            session_id=session_id,
+            disabled_tools=disabled_tools,
+            owner=owner,
+            progress_cb=progress_cb,
+            tool_policy=tool_policy,
+        )
+    finally:
+        _active_workspace.reset(token)
+
+
+async def _execute_tool_block_impl(
+    block: Any,
+    session_id: Optional[str] = None,
+    disabled_tools: Optional[set] = None,
+    owner: Optional[str] = None,
+    progress_cb: Optional[Callable[[Dict], Awaitable[None]]] = None,
+    tool_policy: Optional[Any] = None,
+) -> Tuple[str, Dict]:
+    """Execute a single tool block. Returns (description, result_dict).
+
    `progress_cb` is forwarded to long-running subprocess tools
    (bash, python) so the agent loop can emit `tool_progress` SSE
    events while the command is in flight. Ignored by other tools.
@@ -621,7 +708,7 @@ async def execute_tool_block(
        _is_bg, _bg_cmd = _split_bg_marker(content)
        if _is_bg and _bg_cmd:
            from src import bg_jobs
-            rec = bg_jobs.launch(_bg_cmd, session_id=session_id, cwd=_AGENT_WORKDIR)
+            rec = bg_jobs.launch(_bg_cmd, session_id=session_id, cwd=agent_cwd())
            short = _bg_cmd.strip().split(chr(10))[0][:80]
            desc = f"bash (background): {short}"
            result = {
@@ -644,7 +731,7 @@ async def execute_tool_block(
        first_line = content.split(chr(10))[0][:80]
        desc = f"{tool}: {first_line}"
        result = await _call_mcp_tool(tool, content, progress_cb=progress_cb)
-    elif tool in ("grep", "glob", "ls"):
+    elif tool in ("grep", "glob", "ls", "get_workspace"):
        # Code-navigation tools — no MCP server; run the direct implementation.
        first_line = content.split(chr(10))[0][:80]
        desc = f"{tool}: {first_line}"
@@ -744,7 +831,7 @@ async def execute_tool_block(
        desc = "edit_image"
        result = await do_edit_image(content, owner=owner)
    elif tool == "edit_file":
-        result = await _direct_fallback(tool, content, workspace=workspace) or {"error": "edit failed", "exit_code": 1}
+        result = await _direct_fallback(tool, content) or {"error": "edit failed", "exit_code": 1}
        desc = result.get("output") or result.get("error") or "edit_file"
    elif tool == "trigger_research":
        desc = "trigger_research"
@@ -67,14 +67,15 @@ COLLECTION_NAME = "odysseus_tool_index"
 # Each tool gets a searchable description that helps retrieval.
 # These are richer than the system prompt one-liners — they're for embedding.
 BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
-    "bash": "Run shell commands on the server. Install packages, check files, git operations, system info, and process management. Do not use for web lookup/search; use web_search or web_fetch when web tools are available.",
-    "python": "Execute Python code for computation, data processing, math, scripting, and parsing. Not for writing code for the user. Do not use for web lookup/search; use web_search or web_fetch when web tools are available.",
+    "bash": "Run shell commands on the server. Install packages, git operations, builds, system info, process management. Prefer a dedicated tool whenever one fits the job (file read/write/edit, search, listing); use bash only for what no dedicated tool covers. Do not use for web lookup/search; use web_search or web_fetch when web tools are available.",
+    "python": "Execute Python code for computation, data processing, math, scripting, and parsing. Not for writing code for the user. Prefer a dedicated tool for reading, writing, or searching files; use python only for what no dedicated tool covers. Do not use for web lookup/search; use web_search or web_fetch when web tools are available.",
    "web_search": "Quick single web lookup for a fact, current event, latest/current information, or doc mid-task. Use this instead of bash/curl/python/requests for web searches. NOT for 'research X' / 'do research on X' requests — those are deep-research jobs (use trigger_research). web_search = one query; trigger_research = a full researched report in the sidebar.",
    "web_fetch": "Fetch and read the text content of a specific URL/website the user names (e.g. 'check example.com', 'open this link'). Use when you have a concrete URL; for open-ended lookups use web_search instead.",
    "read_file": "Read a file from disk and return its contents. View source code, config files, logs. Supports an optional line range (offset/limit) for large files.",
    "grep": "Search file CONTENTS for a regex across a directory tree (ripgrep-backed, honours .gitignore). Returns file:line:match. Use to find where code/symbols/strings live — prefer over bash grep.",
    "glob": "Find FILES by glob pattern (e.g. '**/*.py'), newest first. Use to locate files by name/extension — prefer over bash find/ls.",
    "ls": "List a directory's entries (folders then files with sizes). Use to see what's in a folder — prefer over bash ls.",
+    "get_workspace": "Return the absolute path of the active workspace folder the user is working in. File tools are confined to it; the shell starts there but is not sandboxed. Call this first when the user refers to 'the project'/'the code'/'this folder' without giving a path, instead of asking them.",
    "write_file": "Write/create or fully rewrite a file ON DISK (source code, configs, project files). Use for new files or full rewrites — NOT create_document (editor panel) and NOT a bash heredoc.",
    "edit_file": "Edit an existing file ON DISK by exact string replacement (fix a bug, change a function). Shows a diff. The tool for changing files on disk — NOT edit_document (editor panel) and NOT bash sed/heredoc.",
    "create_document": "Create a new document in the editor panel. For code, articles, text content longer than 15 lines, unless an already-open document/email draft is the obvious target. If an email compose draft is open, edit that draft instead of creating another document.",
@@ -25,7 +25,7 @@ FUNCTION_TOOL_SCHEMAS = [
        "type": "function",
        "function": {
            "name": "bash",
-            "description": "Run a shell command (full access)",
+            "description": "Run a shell command (full access). Prefer a dedicated tool whenever one fits the job (reading, writing, editing, searching, or listing files); use bash only for what no dedicated tool covers (installs, git, builds, running programs, system info). Do NOT create or edit files via bash redirects/heredocs/sed -- use the dedicated file tools.",
            "parameters": {
                "type": "object",
                "properties": {
@@ -39,7 +39,7 @@ FUNCTION_TOOL_SCHEMAS = [
        "type": "function",
        "function": {
            "name": "python",
-            "description": "Execute Python code to compute a result or test something",
+            "description": "Execute Python code to compute a result or test something. Prefer a dedicated tool whenever one fits the job (reading, writing, or searching files); use python only for computation, data processing, or scripting no dedicated tool covers.",
            "parameters": {
                "type": "object",
                "properties": {
@@ -141,6 +141,14 @@ FUNCTION_TOOL_SCHEMAS = [
            }
        }
    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_workspace",
+            "description": "Return the absolute path of the active workspace folder the user is working in. File tools are confined to it; the shell starts there but is not sandboxed. Call this first when the user refers to 'the project'/'the code'/'this folder' without a path, instead of asking them. Takes no arguments.",
+            "parameters": {"type": "object", "properties": {}, "required": []}
+        }
+    },
    {
        "type": "function",
        "function": {
@@ -1246,6 +1254,8 @@ def function_call_to_tool_block(name: str, arguments: str) -> Optional[ToolBlock
            content = args.get("path", "")
    elif tool_type in ("grep", "glob", "ls"):
        content = json.dumps(args) if args else "{}"
+    elif tool_type == "get_workspace":
+        content = ""
    elif tool_type == "write_file":
        content = args.get("path", "") + "\n" + args.get("content", "")
    elif tool_type == "edit_file":
@@ -20,6 +20,7 @@ NON_ADMIN_BLOCKED_TOOLS = {
    "grep",
    "glob",
    "ls",
+    "get_workspace",
    "search_chats",
    "manage_memory",
    "manage_skills",
@@ -66,6 +67,7 @@ PLAN_MODE_READONLY_TOOLS = {
    "grep",
    "glob",
    "ls",
+    "get_workspace",
    "web_search",
    "web_fetch",
    "search_chats",
@@ -352,6 +352,86 @@ class UploadHandler:
                return dict(info)
        return None

+    def _renamed_upload_index_key(self, key: str, info: Dict[str, Any], old_owner: str, new_owner: str) -> str:
+        """Return the storage key to use after renaming an owned upload row."""
+        if isinstance(key, str) and ":" in key:
+            owner_part, rest = key.split(":", 1)
+            if owner_part.strip().lower() == old_owner:
+                return f"{new_owner}:{rest}"
+        file_hash = info.get("hash")
+        if file_hash:
+            return f"{new_owner}:{file_hash}"
+        return key
+
+    def _unique_upload_index_key(self, base_key: str, used_keys: set, reserved_keys: set, info: Dict[str, Any]) -> str:
+        """Choose a deterministic collision key without overwriting an existing row."""
+        if base_key not in used_keys and base_key not in reserved_keys:
+            return base_key
+
+        upload_id = str(info.get("id") or "renamed").strip() or "renamed"
+        candidate = f"{base_key}:{upload_id}"
+        if candidate not in used_keys and candidate not in reserved_keys:
+            return candidate
+
+        index = 2
+        while True:
+            candidate = f"{base_key}:{upload_id}:{index}"
+            if candidate not in used_keys and candidate not in reserved_keys:
+                return candidate
+            index += 1
+
+    def rename_owner(self, old_owner: str, new_owner: str) -> int:
+        """Rename upload metadata ownership from old_owner to new_owner.
+
+        Upload rows are keyed by owner-qualified hashes for dedupe and also
+        carry an `owner` field for access checks. Both must move together when
+        usernames change.
+        """
+        old_owner_normalized = str(old_owner or "").strip().lower()
+        new_owner = str(new_owner or "").strip()
+        if not old_owner_normalized or not new_owner:
+            return 0
+        if old_owner_normalized == new_owner.lower():
+            return 0
+
+        uploads_db_path = os.path.join(self.upload_dir, "uploads.json")
+        with self._index_lock:
+            current = self._load_upload_index()
+            if not current:
+                return 0
+
+            updated = {}
+            renamed = 0
+            original_keys = set(current.keys())
+
+            for key, info in current.items():
+                new_key = key
+                new_info = info
+                if isinstance(info, dict) and str(info.get("owner", "")).strip().lower() == old_owner_normalized:
+                    new_info = dict(info)
+                    new_info["owner"] = new_owner
+                    base_key = self._renamed_upload_index_key(key, new_info, old_owner_normalized, new_owner)
+                    new_key = self._unique_upload_index_key(
+                        base_key,
+                        set(updated.keys()),
+                        original_keys - {key},
+                        new_info,
+                    )
+                    if new_key != base_key:
+                        logger.warning(
+                            "Upload owner rename key collision for %s -> %s at %s; preserving row as %s",
+                            old_owner_normalized,
+                            new_owner,
+                            base_key,
+                            new_key,
+                        )
+                    renamed += 1
+                updated[new_key] = new_info
+
+            if renamed:
+                self._atomic_write_json(uploads_db_path, updated)
+            return renamed
+
    def _find_upload_path(self, upload_id: str) -> Optional[str]:
        """Find an upload file by ID while staying inside upload_dir."""
        if not self.validate_upload_id(upload_id):
@@ -202,6 +202,18 @@ class WebhookManager:
        self._client = httpx.AsyncClient(timeout=10, follow_redirects=False)
        self._loop: Optional[asyncio.AbstractEventLoop] = None
        self._api_key_manager = api_key_manager
+        # Strong references to in-flight fire-and-forget tasks. asyncio only
+        # keeps weak references to tasks, so without this the GC can collect a
+        # delivery task mid-flight and the webhook is silently never sent.
+        self._bg_tasks: set = set()
+
+    def _spawn_tracked(self, coro):
+        """Schedule a background task and hold a strong reference until it
+        finishes, so it can't be garbage-collected before delivery completes."""
+        task = asyncio.ensure_future(coro)
+        self._bg_tasks.add(task)
+        task.add_done_callback(self._bg_tasks.discard)
+        return task

    def set_loop(self, loop: asyncio.AbstractEventLoop):
        self._loop = loop
@@ -223,8 +235,8 @@ class WebhookManager:
        if event not in ALLOWED_EVENTS:
            return
        try:
-            loop = asyncio.get_running_loop()
-            loop.create_task(self.fire(event, payload))
+            asyncio.get_running_loop()
+            self._spawn_tracked(self.fire(event, payload))
        except RuntimeError:
            # Called from a sync thread (e.g. sync FastAPI route in threadpool)
            if self._loop and self._loop.is_running():
@@ -243,7 +255,7 @@ class WebhookManager:

        for wh in matching:
            decrypted_secret = self._decrypt_secret(wh.secret)
-            asyncio.create_task(self._deliver(wh.id, wh.url, decrypted_secret, event, payload))
+            self._spawn_tracked(self._deliver(wh.id, wh.url, decrypted_secret, event, payload))

    async def deliver_test(self, webhook_id: str, url: str, encrypted_secret: Optional[str]):
        """Public method for the test-webhook route."""
@@ -4,6 +4,7 @@
 // ============================================
 import Storage from './js/storage.js';
 import uiModule from './js/ui.js';
+import workspaceModule from './js/workspace.js';
 import fileHandlerModule from './js/fileHandler.js';
 import modelsModule from './js/models.js';
 import ragModule from './js/rag.js';
@@ -1159,7 +1160,7 @@ function initializeEventListeners() {
        if (!p.can_use_bash) {
          const bashToggle = document.getElementById('bash-toggle');
          if (bashToggle) bashToggle.closest('.chat-input-toggle')?.style.setProperty('display', 'none');
-          const bashBtn = document.getElementById('tool-bash-btn');
+          const bashBtn = document.getElementById('bash-toggle-btn');
          if (bashBtn) bashBtn.style.display = 'none';
        }
        // Hide document button
@@ -1176,11 +1177,7 @@ function initializeEventListeners() {
          const resOverflow = document.getElementById('overflow-research-btn');
          if (resOverflow) resOverflow.style.display = 'none';
        }
-        // Hide image generation options
-        if (!p.can_generate_images) {
-          const imgBtn = document.getElementById('tool-image-btn');
-          if (imgBtn) imgBtn.style.display = 'none';
-        }
+
      }
    })
    .catch(() => {});
@@ -1626,6 +1623,8 @@ function initializeEventListeners() {
      // Slide the pill to the active button
      const toggle = agentBtn.closest('.mode-toggle');
      if (toggle) toggle.classList.toggle('mode-chat', mode === 'chat');
+      // Workspace pill + overflow entry are agent-only - hide immediately (no flash).
+      try { workspaceModule.applyMode(mode); } catch (_) {}
      // Delay tool glow-up for a staggered effect
      setTimeout(() => applyModeToToggles(mode), 500);
    }
@@ -1701,6 +1700,7 @@ function initializeEventListeners() {
  }
  setupToggle('web-toggle-btn', 'web-toggle', 'web');
  setupToggle('bash-toggle-btn', 'bash-toggle', 'bash');
+  try { workspaceModule.initWorkspace(); } catch (_) {}

  // Document editor toggle (special: uses module panel, not a checkbox)
  const overflowDocBtn = el('overflow-doc-btn');
@@ -1040,6 +1040,13 @@
                <span>RAG</span>
                <span class="overflow-active-dot"></span>
              </button>
+              <button type="button" class="overflow-menu-item" id="overflow-workspace-btn">
+                <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+                  <path d="M3 7a2 2 0 0 1 2-2h4l2 2h8a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2z"/>
+                </svg>
+                <span>Workspace</span>
+                <span class="overflow-active-dot"></span>
+              </button>
              <!-- Inline "deep research mode" toggle removed (superseded by the
                   Deep Research sidebar / trigger_research). The hidden
                   #research-toggle checkbox is kept inert so existing JS refs
@@ -1071,6 +1078,12 @@
              <polyline points="4 17 10 11 4 5"/><line x1="12" y1="19" x2="20" y2="19"/>
            </svg>
          </button>
+          <!-- Workspace indicator (hidden until a folder is set) -->
+          <button type="button" class="input-icon-btn tool-indicator" title="Workspace - click to clear" id="workspace-indicator-btn" aria-label="Clear workspace" style="display:none;">
+            <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M3 7a2 2 0 0 1 2-2h4l2 2h8a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2z"/></svg>
+            <span style="font-size:11px;margin-left:2px;max-width:120px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;" id="workspace-indicator-name"></span>
+            <svg class="tool-indicator-x" width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round"><line x1="6" y1="6" x2="18" y2="18"/><line x1="18" y1="6" x2="6" y2="18"/></svg>
+          </button>
          <!-- RAG toolbar indicator (hidden until active) -->
          <button type="button" class="input-icon-btn tool-indicator" title="RAG active — click to deactivate" id="rag-indicator-btn" style="display:none;">
            <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
@@ -2342,7 +2355,7 @@
 <script type="module" src="/static/js/chatRenderer.js"></script>
 <script type="module" src="/static/js/codeRunner.js"></script>
 <script type="module" src="/static/js/chatStream.js"></script>
-<script type="module" src="/static/js/chat.js?v=20260604s"></script>
+<script type="module" src="/static/js/chat.js?v=20260609ws"></script>
 <script type="module" src="/static/js/cookbook.js"></script>
 <script src="/static/js/cookbookSchedule.js"></script>
 <script type="module" src="/static/js/search-chat.js"></script>
@@ -802,15 +802,15 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
        } else {
          fd.append('use_web', 'true');
        }
+      } else if (isAgentMode) {
+        fd.append('allow_web_search', 'false');
      }
      if (el('research-toggle').checked) {
        fd.append('use_research', 'true');
        // Research always runs in chat mode — override agent if set
        fd.set('mode', 'chat');
      }
-      if (el('bash-toggle').checked) {
-        fd.append('allow_bash', 'true');
-      }
+      fd.append('allow_bash', el('bash-toggle').checked ? 'true' : 'false');
      const ragChk = el('rag-toggle');
      if (ragChk && !ragChk.checked) {
        fd.append('use_rag', 'false');
@@ -819,6 +819,10 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
      if (incognitoChk && incognitoChk.checked) {
        fd.append('incognito', 'true');
      }
+      const _ws = (Storage.KEYS && Storage.get(Storage.KEYS.WORKSPACE, '')) || '';
+      if (_ws) {
+        fd.append('workspace', _ws);
+      }
      if (presetsModule.getSelectedPreset()) {
        fd.append('preset_id', presetsModule.getSelectedPreset());
      }
@@ -1781,6 +1785,21 @@ import { wireArrowUpRecall, getLastUserMessageFromChatHistory } from './composer
                  _sourcesData = json.data; _sourcesType = 'web';
                  _sourcesHtml = _buildSourcesBox(json.data, 'web');
                }
+              } else if (json.type === 'workspace_rejected') {
+                // Server refused to bind the posted workspace (deleted folder,
+                // file path, sensitive dir, filesystem root). Clear the stored
+                // value so the pill stops claiming a confinement that is not in
+                // effect, and tell the user.
+                const _wsPath = (json.data && json.data.path) || '';
+                import('./workspace.js').then((m) => {
+                  const ws = m.default || m;
+                  if (ws && ws.setWorkspace) ws.setWorkspace('');
+                });
+                uiModule.showToast(
+                  `Workspace ${_wsPath || '(unknown)'} is no longer usable; running without confinement`,
+                  6000
+                );
+                continue;
              } else if (json.type === 'model_fallback') {
                // Model went offline — switched to fallback
                var _fbData = json.data || {};
@@ -406,7 +406,7 @@ export const ERROR_PATTERNS = [
      { label: 'Repair kernel package', action: () => {
        const _vp = (_envState.env === 'venv' && _envState.envPath)
          ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3` : 'python3';
-        _launchServeTask('repair-kernels', 'pip-update', `${_vp} -m pip install --user --break-system-packages kernels<0.15`);
+        _launchServeTask('repair-kernels', 'pip-update', `${_vp} -m pip install --user --break-system-packages "kernels<0.15"`);
      }},
      { label: 'Open Dependencies', action: () => _openCookbookDependencies('sglang') },
    ],
@@ -3547,6 +3547,7 @@ async function _pollBackgroundStatus() {
          updates.status = live.status === 'ready' ? 'ready' : 'running';
        }
        if (live.progress && live.progress !== task.progress) updates.progress = live.progress;
+        if (live.exit_code != null && live.exit_code !== task.exit_code) updates.exit_code = live.exit_code;
        if (live.output_tail) {
          const previous = String(task.output || '');
          const tail = String(live.output_tail || '');
@@ -17,6 +17,7 @@ import chatRenderer from './chatRenderer.js';
 import spinnerModule from './spinner.js';
 import themeModule from './theme.js';
 import documentModule from './document.js';
+import workspaceModule from './workspace.js';
 import settingsModule from './settings.js';
 import cookbookModule from './cookbook.js';
 import { EVAL_PROMPTS } from './compare/index.js';
@@ -1229,6 +1230,40 @@ async function _cmdToggleDoc(args, ctx) {
  return true;
 }

+// Workspace: confine the agent's file/shell tools to a folder. Not a boolean -
+// show / set <path> / clear / pick (open the directory browser).
+async function _cmdWorkspace(args, ctx) {
+  const sub = (args[0] || '').toLowerCase();
+  const rest = args.slice(1).join(' ').trim();
+  const cur = workspaceModule.getWorkspace();
+  if (!sub || sub === 'show' || sub === 'status' || sub === 'info') {
+    slashReply(cur ? `Workspace: <code>${uiModule.esc(cur)}</code>` : 'No workspace set. <code>/workspace pick</code> or <code>/workspace set /path</code>.');
+    return true;
+  }
+  if (sub === 'set' || sub === 'cd' || sub === 'use') {
+    if (!rest) { slashReply('Usage: <code>/workspace set /absolute/path</code>'); return true; }
+    // Validate server-side before persisting so the pill never claims a
+    // workspace the backend will refuse to bind (typo, file path, deleted
+    // folder, sensitive dir, filesystem root).
+    workspaceModule.vetAndSetWorkspace(rest).then(({ ok, path }) => {
+      if (ok) slashReply(`Workspace set: <code>${uiModule.esc(path)}</code>`);
+      else slashReply(`Not a usable workspace folder: <code>${uiModule.esc(rest)}</code>. It must be an existing directory, not a filesystem root or sensitive path.`);
+    });
+    return true;
+  }
+  if (sub === 'clear' || sub === 'off' || sub === 'none' || sub === 'unset') {
+    workspaceModule.clearWorkspace();
+    slashReply('Workspace cleared.');
+    return true;
+  }
+  if (sub === 'pick' || sub === 'browse' || sub === 'open') {
+    workspaceModule.openWorkspaceBrowser();
+    return true;
+  }
+  slashReply('Usage: <code>/workspace</code> · <code>set /path</code> · <code>clear</code> · <code>pick</code>');
+  return true;
+}
+
 async function _cmdToggleShow(args, ctx) {
  const name = (args[0] || '').toLowerCase();
  const val = (args[1] || '').toLowerCase();
@@ -5731,6 +5766,14 @@ const COMMANDS = {
      '_show':     { handler: _cmdToggleShow,      alias: [],     help: 'Show all toggle states',  usage: '/toggle' }
    }
  },
+  workspace: {
+    alias: ['ws'],
+    category: 'Agent',
+    help: 'Set the folder the agent works in',
+    handler: _cmdWorkspace,
+    noUserBubble: true,
+    usage: '/workspace [set <path> | clear | pick]',
+  },
  memory: {
    alias: ['m'],
    category: 'Memory',
@@ -23,7 +23,8 @@ export const KEYS = {
  MCP_ACTIVE: 'odysseus-mcp-active',
  SECTION_ORDER: 'sidebar-section-order',
  ADMIN_LAST_TAB: 'admin-last-tab',
-  DENSITY: 'odysseus-density'
+  DENSITY: 'odysseus-density',
+  WORKSPACE: 'odysseus-workspace'
 };

 /**
@@ -0,0 +1,208 @@
+// static/js/workspace.js
+//
+// Workspace picker: browse server directories in a draggable modal, choose a
+// folder, and show it as a removable pill in the chat input bar. While set, the
+// chat request sends `workspace` so the agent's file/shell tools are confined
+// to that folder (see routes/chat_routes.py + src/tool_execution.py).
+
+import Storage, { KEYS } from './storage.js';
+import uiModule from './ui.js';
+import { makeWindowDraggable } from './windowDrag.js';
+
+const API_BASE = window.location.origin;
+// Same folder glyph as the overflow menu item + pill (not an emoji).
+const _FOLDER_SVG = '<svg class="workspace-row-icon" width="15" height="15" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M3 7a2 2 0 0 1 2-2h4l2 2h8a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2z"/></svg>';
+let _modal = null;
+let _curPath = '';
+
+export function getWorkspace() {
+  return Storage.get(KEYS.WORKSPACE, '') || '';
+}
+
+function _basename(p) {
+  if (!p) return '';
+  // Handle both POSIX (/) and Windows (\) separators.
+  const parts = p.replace(/[\\/]+$/, '').split(/[\\/]/);
+  return parts[parts.length - 1] || p;
+}
+
+// Workspace only applies to agent mode (it scopes the file/shell tools), so the
+// pill + overflow entry are hidden in chat mode, like the bash toggle.
+function _isChatMode() {
+  const b = document.getElementById('mode-chat-btn');
+  return !!(b && b.classList.contains('active'));
+}
+
+export function syncWorkspaceIndicator(path) {
+  const chat = _isChatMode();
+  const pill = document.getElementById('workspace-indicator-btn');
+  const name = document.getElementById('workspace-indicator-name');
+  const overflow = document.getElementById('overflow-workspace-btn');
+  if (pill) {
+    pill.style.display = (path && !chat) ? '' : 'none';
+    pill.classList.toggle('active', !!path);
+    if (path) pill.title = `Workspace: ${path}\nFile tools are confined here; shell commands start here but are not sandboxed and can reach outside it.\nClick to clear.`;
+  }
+  if (name) name.textContent = path ? _basename(path) : '';
+  if (overflow) {
+    overflow.style.display = chat ? 'none' : '';
+    overflow.classList.toggle('active', !!path);
+  }
+  // Recompute the "+" overflow dot (app.js owns updatePlusDot via this event).
+  try { document.dispatchEvent(new CustomEvent('overflow-state-change')); } catch (_) {}
+}
+
+// Called by the agent/chat mode toggle so the pill + overflow entry follow mode.
+export function applyMode(_mode) {
+  syncWorkspaceIndicator(getWorkspace());
+}
+
+export function setWorkspace(path) {
+  if (path) Storage.set(KEYS.WORKSPACE, path);
+  else Storage.remove(KEYS.WORKSPACE);
+  syncWorkspaceIndicator(path || '');
+}
+
+/**
+ * Validate a manually entered path server-side, then persist the canonical
+ * form. Returns {ok, path|null}. Without this, a typo / file path / deleted
+ * folder / filesystem root would be stored and shown as active while the
+ * backend silently refuses to bind it on every send.
+ */
+export async function vetAndSetWorkspace(path) {
+  try {
+    const res = await fetch(`${API_BASE}/api/workspace/vet?path=${encodeURIComponent(path)}`, { credentials: 'same-origin' });
+    if (!res.ok) return { ok: false, path: null };
+    const data = await res.json();
+    if (data.ok && data.path) {
+      setWorkspace(data.path);
+      return { ok: true, path: data.path };
+    }
+    return { ok: false, path: null };
+  } catch (e) {
+    return { ok: false, path: null };
+  }
+}
+
+export function clearWorkspace() {
+  setWorkspace('');
+  if (uiModule && uiModule.showToast) uiModule.showToast('Workspace cleared');
+}
+
+async function _load(path) {
+  const url = `${API_BASE}/api/workspace/browse${path ? `?path=${encodeURIComponent(path)}` : ''}`;
+  const res = await fetch(url, { credentials: 'same-origin' });
+  if (!res.ok) throw new Error(`browse failed: ${res.status}`);
+  return res.json();
+}
+
+function _render(data) {
+  _curPath = data.path;
+  const body = _modal.querySelector('#workspace-body');
+  const pathEl = _modal.querySelector('#workspace-cur-path');
+  if (pathEl) {
+    // Reflect the resolved (realpath) location back into the editable field.
+    pathEl.value = data.path;
+    pathEl.title = data.path;
+  }
+  let rows = '';
+  if (data.parent) {
+    rows += `<div class="workspace-row workspace-up" data-path="${encodeURIComponent(data.parent)}">↑ ..</div>`;
+  }
+  for (const d of data.dirs) {
+    // Backend supplies the full child path (os.path.join → cross-platform).
+    rows += `<div class="workspace-row" data-path="${encodeURIComponent(d.path)}">${_FOLDER_SVG}<span>${uiModule.esc(d.name)}</span></div>`;
+  }
+  if (data.truncated) {
+    rows += '<div class="workspace-empty">Too many folders to list. Type or paste a path above to jump in.</div>';
+  }
+  if (!data.dirs.length && !data.parent) rows = '<div class="workspace-empty">No subfolders</div>';
+  body.innerHTML = rows || '<div class="workspace-empty">No subfolders</div>';
+  body.querySelectorAll('.workspace-row').forEach((row) => {
+    row.addEventListener('click', () => _navigate(decodeURIComponent(row.dataset.path)));
+  });
+  // Filesystem roots (and sensitive dirs) can be browsed through but never
+  // bound as the workspace; the backend rejects them too.
+  const useBtn = _modal.querySelector('#workspace-use');
+  if (useBtn) {
+    useBtn.disabled = data.selectable === false;
+    useBtn.title = data.selectable === false ? 'This folder cannot be used as a workspace' : '';
+  }
+}
+
+async function _navigate(path) {
+  try {
+    _render(await _load(path));
+  } catch (e) {
+    if (uiModule && uiModule.showError) uiModule.showError('Could not open folder');
+  }
+}
+
+function _getModal() {
+  if (_modal) return _modal;
+  _modal = document.createElement('div');
+  _modal.id = 'workspace-modal';
+  _modal.className = 'modal';
+  _modal.style.display = 'none';
+  _modal.innerHTML = `
+    <div class="modal-content">
+      <div class="modal-header">
+        <h4><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-2px;margin-right:6px"><path d="M3 7a2 2 0 0 1 2-2h4l2 2h8a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2z"/></svg>Select workspace</h4>
+        <button class="close-btn" id="workspace-close" aria-label="Close">✖</button>
+      </div>
+      <input type="text" class="styled-prompt-input workspace-cur" id="workspace-cur-path"
+             spellcheck="false" autocomplete="off" autocapitalize="off" autocorrect="off"
+             placeholder="Type or paste a folder path, then press Enter" />
+      <p class="muted workspace-note">File tools are <strong>confined</strong> to this folder. Shell commands start here but are <strong>not sandboxed</strong> and can reach outside it. A workspace scopes the tools; it is not a security boundary.</p>
+      <div class="modal-body workspace-body" id="workspace-body"></div>
+      <div class="modal-footer workspace-footer">
+        <button type="button" class="confirm-btn confirm-btn-secondary" id="workspace-cancel">Cancel</button>
+        <button type="button" class="confirm-btn confirm-btn-primary" id="workspace-use">Use this folder</button>
+      </div>
+    </div>`;
+  document.body.appendChild(_modal);
+  _modal.querySelector('#workspace-close').addEventListener('click', closeWorkspaceBrowser);
+  _modal.querySelector('#workspace-cancel').addEventListener('click', closeWorkspaceBrowser);
+  // Editable path bar: Enter navigates to a typed/pasted folder.
+  _modal.querySelector('#workspace-cur-path').addEventListener('keydown', (e) => {
+    if (e.key === 'Enter') {
+      e.preventDefault();
+      const v = e.target.value.trim();
+      if (v) _navigate(v);
+    }
+  });
+  _modal.querySelector('#workspace-use').addEventListener('click', () => {
+    setWorkspace(_curPath);
+    if (uiModule && uiModule.showToast) uiModule.showToast(`Workspace set: ${_basename(_curPath)}`);
+    closeWorkspaceBrowser();
+  });
+  const content = _modal.querySelector('.modal-content');
+  const header = _modal.querySelector('.modal-header');
+  if (content && header) makeWindowDraggable(_modal, { content, header });
+  return _modal;
+}
+
+export async function openWorkspaceBrowser() {
+  const modal = _getModal();
+  modal.style.display = 'flex';
+  try {
+    _render(await _load(getWorkspace() || ''));
+  } catch (e) {
+    if (uiModule && uiModule.showError) uiModule.showError('Could not browse folders');
+  }
+}
+
+export function closeWorkspaceBrowser() {
+  if (_modal) _modal.style.display = 'none';
+}
+
+export function initWorkspace() {
+  // Restore persisted workspace into the pill on load.
+  syncWorkspaceIndicator(getWorkspace());
+  const overflow = document.getElementById('overflow-workspace-btn');
+  if (overflow) overflow.addEventListener('click', openWorkspaceBrowser);
+  const pill = document.getElementById('workspace-indicator-btn');
+  if (pill) pill.addEventListener('click', clearWorkspace);
+}
+
+export default { initWorkspace, openWorkspaceBrowser, getWorkspace, setWorkspace, vetAndSetWorkspace, clearWorkspace, syncWorkspaceIndicator, applyMode };
@@ -36606,3 +36606,48 @@ body.theme-frosted .modal {
   the input beside it (.confirm-btn won't stretch on its own). */
 .ask-user-other-send { flex-shrink: 0; white-space: nowrap; min-height: 39px; }
 .ask-user-other-send:disabled { opacity: 0.5; cursor: default; }
+
+/* ── Workspace picker ───────────────────────────────────────────── */
+/* Layout (width/flex column/max-height) inherited from base .modal-content. */
+/* Editable path/address bar: reuses .styled-prompt-input for border/bg/radius/
+   focus ring (set in the element's class list). Overrides only the deltas:
+   mono font, and full-bleed via flex stretch with no horizontal margin (the
+   modal-content's 10px padding is the gutter) instead of the base width:100%,
+   which overflowed against the overflow:auto scrollbar. */
+.workspace-cur {
+  align-self: stretch;
+  width: auto;
+  min-width: 0;
+  margin: 4px 0 8px;
+  font-family: var(--mono, monospace);
+  font-size: 12px;
+}
+/* flex/overflow inherited from base .modal-body; only the padding differs. */
+.workspace-body { padding: 6px 0; }
+.workspace-row {
+  padding: 7px 18px;
+  cursor: pointer;
+  font-size: 13px;
+  display: flex;
+  align-items: center;
+  gap: 8px;
+}
+.workspace-row > span {
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+.workspace-row-icon { flex-shrink: 0; opacity: 0.75; }
+.workspace-row:hover {
+  background: color-mix(in srgb, var(--border) 20%, transparent);
+}
+.workspace-up { opacity: 0.7; }
+.workspace-empty { padding: 14px 18px; opacity: 0.5; font-size: 13px; }
+.workspace-footer {
+  display: flex;
+  justify-content: flex-end;
+  gap: 8px;
+  padding: 10px 18px;
+  border-top: 1px solid var(--border);
+}
+.workspace-note { margin: 0 0 8px; font-size: 11px; line-height: 1.4; }
@@ -0,0 +1,202 @@
+# Test Layout Inventory
+
+## Purpose
+
+Inventory for the first low-risk split of the flat `tests/` directory
+(issue #3712, parent #2523). This document only records *what* should move
+first and *why*; it moves nothing. The actual move is a separate, mechanical
+PR that relocates the listed files verbatim and changes no test content.
+
+The target layout and category definitions come from
+[`TESTING_STANDARD.md`](./TESTING_STANDARD.md); the collection-time markers
+come from [`_taxonomy.py`](./_taxonomy.py), which classifies by **filename
+tokens only** (paths are ignored, except the `tests/helpers/` rule). A file
+keeps its `area_*`/`sub_*` markers when moved into a subdirectory, and
+`conftest.py` discovers marker names recursively (`rglob`), so a move does not
+disturb marker registration or focused selection.
+
+## Current low-risk candidate groups
+
+Groups whose tests need no route/app setup and no real DB/session setup:
+
+1. **CLI / script tests** (`area_cli`, 28 files) - load `scripts/` entry
+   points via `tests.helpers.cli_loader.load_script`; DB access is stubbed
+   with `tests.helpers.db_stubs` (`SessionLocal` is a plain stub attribute).
+   No `TestClient`, no FastAPI app import, no SQLite files.
+2. **Helper self-tests** (`area_helpers`) - e.g. `test_helpers_import_state.py`,
+   `test_db_stubs_helper.py`. Safe but tiny (two files), and they test the
+   shared helpers from the #3685 audit (merged) that the rest of the suite
+   depends on; little payoff as a first slice.
+3. **Pure unit / parsing tests** (`area_unit`) - `*_nonstring.py`,
+   `*_nondict.py`, parsing tests. Large and heterogeneous; some touch
+   provider/session modules, so the boundary is less crisp.
+4. **Static checks** - e.g. `test_readme_ascii_fenced.py`,
+   `test_docs_no_orphan_images.py`. Safe but tiny and `uncategorized` in the
+   taxonomy, so a move buys little and matches no existing marker.
+
+Not candidates for the first move (per #3712 guidance): security/owner-scope
+tests, route/API tests, DB/session-heavy tests, auth/session concurrency
+tests, and the taxonomy/runner infrastructure tests that changed recently
+(#3491, #3556, #3659, #3711).
+
+## Recommended first move
+
+**CLI / script tests → `tests/cli/`**
+
+Why this group over the alternatives:
+
+- Lowest coupling: every file imports only the script under test (via
+  `cli_loader`) plus `tests.helpers` stubs - no app, no routes, no real DB.
+- Crisp, machine-checkable boundary: the set is exactly the files classified
+  `area_cli` by `_taxonomy.py`, so before/after selection counts can be
+  compared mechanically.
+- Already the planned target dir for this category in `TESTING_STANDARD.md`
+  (`tests/cli/`).
+- Absolute imports (`from tests.helpers...`) and unique basenames mean no
+  import-order or module-name collisions after the move.
+- Lower risk than helper self-tests (tiny group, little payoff), unit tests
+  (fuzzy boundary), or anything security/route/session-shaped.
+
+## Files included in the first move
+
+The 28 files classified `area_cli` (verified against `_taxonomy.py`):
+
+Note: this inventory was refreshed against current `dev` after `tests/test_research_cli_status.py` was added to the `area_cli` set.
+
+- `tests/test_calendar_cli_name.py`
+- `tests/test_contacts_cli_rows.py`
+- `tests/test_cookbook_cli_state.py`
+- `tests/test_docs_cli_content_length.py`
+- `tests/test_gallery_cli_album_count.py`
+- `tests/test_gallery_cli_preview.py`
+- `tests/test_logs_cli_resolve_nonstring.py`
+- `tests/test_mail_cli_read_empty_fetch.py`
+- `tests/test_mail_cli_recipients.py`
+- `tests/test_mcp_cli_env_serialize.py`
+- `tests/test_mcp_cli_json.py`
+- `tests/test_memory_cli_rows.py`
+- `tests/test_notes_cli_items.py`
+- `tests/test_personal_cli_rows.py`
+- `tests/test_preset_cli_invalid_entries.py`
+- `tests/test_preset_cli_set_corrupt_entry.py`
+- `tests/test_preset_cli_store.py`
+- `tests/test_research_cli_preview.py`
+- `tests/test_research_cli_status_filter.py`
+- `tests/test_research_cli_status.py`
+- `tests/test_research_cli_store.py`
+- `tests/test_sessions_cli.py`
+- `tests/test_signature_cli_export.py`
+- `tests/test_skills_cli_preview.py`
+- `tests/test_skills_cli_rows.py`
+- `tests/test_tasks_cli_preview.py`
+- `tests/test_theme_cli_store.py`
+- `tests/test_webhook_cli_mask.py`
+
+## Files intentionally excluded
+
+- `tests/test_backup_cli_security.py` - classifies as `area_security`
+  (security outranks cli in the taxonomy); moving it into `tests/cli/` would
+  make the directory disagree with its marker. It belongs with the security
+  group in a later phase.
+- `tests/test_run_focus.py`, `tests/test_taxonomy.py` - taxonomy/runner
+  infrastructure tests, recently changed (#3556, #3659); they also pin
+  flat-layout paths (e.g. `tests/test_auth_config_lock_concurrency.py` in
+  `test_run_focus.py`), so they stay put.
+- Script-like but `uncategorized` files - `test_pr_blocker_audit.py`,
+  `test_update_database_script.py`, `test_windows_update_script.py`,
+  `test_setup_admin_user.py`, `test_amd_gpu_check_args.py`, `test_hwfit_*.py`.
+  They exercise `scripts/` too, but moving them would make `tests/cli/`
+  diverge from the `area_cli` marker set. Reclassify or move them in a later,
+  separate slice.
+- Everything else (security, routes, services, unit, js, helpers) - out of
+  scope for the first move by design.
+
+## How this was verified
+
+Read-only checks, run from the repo root on this branch. Note the real API is
+`classify_test_path` (there is no `classify_test_file`).
+
+```bash
+# Compute the area_cli set and confirm test_backup_cli_security.py is
+# area_security. Expected: 28 files, then "security".
+.venv/bin/python - <<'PY'
+from pathlib import Path
+from tests._taxonomy import classify_test_path
+
+cli = [p for p in sorted(Path("tests").glob("test_*.py"))
+       if classify_test_path(p).area == "cli"]
+print(len(cli))
+for p in cli:
+    print(p)
+print(classify_test_path("tests/test_backup_cli_security.py").area)
+PY
+
+# Coupling check across the CLI files. Expected: the only hits are
+# "SessionLocal" as stub attribute names passed to tests.helpers.db_stubs;
+# no TestClient, FastAPI, create_app, sqlite, or dependency_overrides.
+rg -n "TestClient|FastAPI|create_app|SessionLocal|sqlite|dependency_overrides" \
+  tests/test_*cli*.py tests/test_sessions_cli.py
+
+# Hard-coded flat paths to the exact CLI files outside tests/. Expected: no matches.
+.venv/bin/python - <<'PY2' > /tmp/area_cli_paths.txt
+from pathlib import Path
+from tests._taxonomy import classify_test_path
+
+for path in sorted(Path("tests").glob("test_*.py")):
+    if classify_test_path(path).area == "cli":
+        print(path)
+PY2
+
+rg -n -F -f /tmp/area_cli_paths.txt .github scripts docs \
+  tests/README.md tests/TESTING_STANDARD.md pyproject.toml 2>/dev/null || true
+```
+
+Also checked by reading the code: `tests/conftest.py` registers sub-markers
+from a recursive `rglob` scan, and `tests/_taxonomy.py` classifies by filename
+tokens only (plus the `tests/helpers/` directory rule), so the markers of the
+28 files do not change when they move into `tests/cli/`.
+
+## Validation for the future move PR
+
+Run with the project venv (`.venv/bin/python`); system `python3` may miss
+pinned deps. Before the move, record the baseline; after, compare:
+
+```bash
+# Selection must match the 28 files before and after the move.
+.venv/bin/python tests/run_focus.py --dry-run --area cli
+.venv/bin/python -m pytest -m area_cli -q
+
+# Moved files pass when targeted directly.
+.venv/bin/python -m pytest tests/cli/ -q
+
+# Whole-suite collection still succeeds (catches import/path breakage).
+.venv/bin/python -m pytest --collect-only -q
+
+# Taxonomy/runner infrastructure is unaffected.
+.venv/bin/python -m pytest tests/test_taxonomy.py tests/test_run_focus.py -q
+
+# No stale flat-path references to the moved files. Expected: no matches
+# outside tests/cli/ itself.
+.venv/bin/python - <<'PY2' > /tmp/area_cli_paths.txt
+from pathlib import Path
+from tests._taxonomy import classify_test_path
+
+for path in sorted(Path("tests").glob("test_*.py")):
+    if classify_test_path(path).area == "cli":
+        print(path)
+PY2
+
+rg -n -F -f /tmp/area_cli_paths.txt .github scripts docs \
+  tests/README.md tests/TESTING_STANDARD.md pyproject.toml 2>/dev/null || true
+```
+
+Pass criteria: identical test counts for `-m area_cli` before/after, zero
+collection errors, and no changes outside the moved files.
+
+## Non-goals
+
+- No file moves, renames, or deletions in this PR.
+- No changes to `conftest.py`, `_taxonomy.py`, `run_focus.py`, helpers,
+  markers, CI workflows, or production code.
+- No recommendation to split the whole suite at once; later groups get their
+  own inventory-then-move slices.
@@ -51,10 +51,11 @@ Every new or refactored test should be:

 ## Test taxonomy

-Tests are classified by the categories below. Today the suite is flat under
-`tests/`; the **Target dir** column is the phased layout from #2523 that we move
-toward *after* helpers and determinism are stable. Until a category is moved,
-new tests in that category stay in flat `tests/` but should still follow this
+Tests are classified by the categories below. Today the suite is mostly flat
+under `tests/` (the current `area_cli` set has moved to `tests/cli/`); the
+**Target dir** column is the phased layout from #2523 that we move toward
+*after* helpers and determinism are stable. Until a category is moved, new
+tests in that category stay in flat `tests/` but should still follow this
 standard.

 | Category | What it covers | Examples today | Target dir |
@@ -0,0 +1,57 @@
+"""`odysseus-research list --status complete` must match completed runs.
+
+Completed research runs are persisted with status "done" (research_handler),
+but the user-facing CLI value is the friendlier "complete". The CLI offered
+"complete" yet filtered `status != args.status`, so `--status complete` never
+matched any record. The fix keeps "complete" as the CLI value and maps it to
+the stored "done" at filter time, so the on-disk corpus stays the source of
+truth and the documented CLI surface keeps working.
+"""
+import importlib.machinery
+import importlib.util
+import json
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+ROOT = Path(__file__).resolve().parents[2]
+
+
+def _load_cli():
+    path = ROOT / "scripts" / "odysseus-research"
+    loader = importlib.machinery.SourceFileLoader("odysseus_research_cli_status", str(path))
+    spec = importlib.util.spec_from_loader(loader.name, loader)
+    module = importlib.util.module_from_spec(spec)
+    loader.exec_module(module)
+    return module
+
+
+def test_complete_is_a_valid_status_choice():
+    cli = _load_cli()
+    parser = cli._build_parser()
+    ns = parser.parse_args(["list", "--status", "complete"])
+    assert ns.status == "complete"
+
+
+def test_filter_returns_completed_runs(tmp_path, monkeypatch):
+    cli = _load_cli(); cli._DATA_DIR = tmp_path
+    (tmp_path / "r1.json").write_text(json.dumps({"query": "q1", "status": "done"}))
+    (tmp_path / "r2.json").write_text(json.dumps({"query": "q2", "status": "running"}))
+    emitted = []
+    monkeypatch.setattr(cli, "emit", lambda value, args: emitted.append(value))
+    # CLI "complete" must map to the stored "done" and match r1.
+    cli.cmd_list(SimpleNamespace(status="complete", limit=50))
+    ids = [r["id"] for r in emitted[0]]
+    assert ids == ["r1"]  # only the completed run
+
+
+def test_verbatim_status_still_filters(tmp_path, monkeypatch):
+    cli = _load_cli(); cli._DATA_DIR = tmp_path
+    (tmp_path / "r1.json").write_text(json.dumps({"query": "q1", "status": "done"}))
+    (tmp_path / "r2.json").write_text(json.dumps({"query": "q2", "status": "running"}))
+    emitted = []
+    monkeypatch.setattr(cli, "emit", lambda value, args: emitted.append(value))
+    cli.cmd_list(SimpleNamespace(status="running", limit=50))
+    ids = [r["id"] for r in emitted[0]]
+    assert ids == ["r2"]  # verbatim choices pass through unchanged
@@ -21,7 +21,7 @@ import json
 from pathlib import Path
 from types import SimpleNamespace

-ROOT = Path(__file__).resolve().parents[1]
+ROOT = Path(__file__).resolve().parents[2]


 def _load_cli():
@@ -0,0 +1,43 @@
+"""Tool-output display truncation uses _truncate with an indicator.
+
+Previously agent_loop sliced tool output to a hard character limit ([:2000]
+or [:4000]) with no signal to the UI that data was lost.  Now it delegates to
+tool_utils._truncate which caps at MAX_OUTPUT_CHARS (10 000) and appends
+a ``... (truncated, N chars total)`` suffix so the frontend can show a
+truncation indicator in the tool bubble.
+"""
+from src.tool_utils import _truncate, MAX_OUTPUT_CHARS
+
+
+def test_short_output_unchanged():
+    """Outputs within the limit pass through verbatim."""
+    text = "hello world"
+    assert _truncate(text) == text
+
+
+def test_long_output_truncated_with_indicator():
+    """Outputs exceeding MAX_OUTPUT_CHARS are truncated with a suffix."""
+    text = "x" * (MAX_OUTPUT_CHARS + 500)
+    result = _truncate(text)
+    assert len(result) > MAX_OUTPUT_CHARS  # includes suffix
+    assert result.startswith("x" * MAX_OUTPUT_CHARS)
+    assert "truncated" in result
+    assert str(len(text)) in result  # original length reported
+
+
+def test_exact_limit_unchanged():
+    """An output exactly at the limit is not truncated."""
+    text = "a" * MAX_OUTPUT_CHARS
+    assert _truncate(text) == text
+
+
+def test_default_limit_matches_constant():
+    """_truncate default limit equals MAX_OUTPUT_CHARS (10 000)."""
+    assert MAX_OUTPUT_CHARS == 10_000
+    text = "y" * 10_001
+    result = _truncate(text)
+    assert "truncated" in result
+
+
+def test_empty_string():
+    assert _truncate("") == ""
@@ -33,3 +33,19 @@ def test_api_key_manager_load_resilience(tmp_path):
    assert loaded["good_provider"] == "good_value"
    assert "bad_provider" not in loaded
    assert "garbage_provider" not in loaded
+
+
+def test_load_ignores_non_string_raw_values(tmp_path):
+    mgr = APIKeyManager(str(tmp_path))
+
+    mgr.save("openai", "sk-openai")
+    with open(mgr.api_keys_file, "r", encoding="utf-8") as f:
+        keys = json.load(f)
+
+    keys["missing_provider"] = None
+    keys["numeric_provider"] = 42
+    keys["object_provider"] = {"encrypted": keys["openai"]}
+    with open(mgr.api_keys_file, "w", encoding="utf-8") as f:
+        json.dump(keys, f)
+
+    assert mgr.load() == {"openai": "sk-openai"}
@@ -287,8 +287,9 @@ def test_delete_token_deletes_and_invalidates_cache(monkeypatch, token_routes_mo
    monkeypatch.setattr(mod, "get_current_user", lambda req: req.state.current_user)
    monkeypatch.setattr(mod, "ApiToken", MagicMock())

+    fake_token = SimpleNamespace(id="abcd1234", owner="alice", name="test")
    fake_session = MagicMock()
-    fake_session.query.return_value.filter.return_value.delete.return_value = 1
+    fake_session.query.return_value.filter.return_value.first.return_value = fake_token
    monkeypatch.setattr(mod, "get_db_session", lambda: _db_ctx(fake_session))

    invalidator = MagicMock()
@@ -297,6 +298,7 @@ def test_delete_token_deletes_and_invalidates_cache(monkeypatch, token_routes_mo
    resp = delete_token(request=req, token_id="abcd1234")

    assert resp == {"status": "deleted"}
+    fake_session.delete.assert_called_once_with(fake_token)
    invalidator.assert_called_once()


@@ -312,7 +314,7 @@ def test_delete_missing_token_returns_404_without_invalidating_cache(monkeypatch
    monkeypatch.setattr(mod, "ApiToken", MagicMock())

    fake_session = MagicMock()
-    fake_session.query.return_value.filter.return_value.delete.return_value = 0
+    fake_session.query.return_value.filter.return_value.first.return_value = None
    monkeypatch.setattr(mod, "get_db_session", lambda: _db_ctx(fake_session))

    invalidator = MagicMock()
@@ -404,3 +406,99 @@ def test_update_missing_token_returns_404(monkeypatch, token_routes_mod):
    with pytest.raises(HTTPException) as exc:
        asyncio.run(update_token(request=req, token_id="missing99"))
    assert exc.value.status_code == 404
+
+
+# ---------------------------------------------------------------------------
+# 7. Owner check — update/delete reject a different admin's token with 403
+# ---------------------------------------------------------------------------
+
+
+def _bob_patch_request(invalidator, body):
+    """An admin request from bob whose async .json() yields `body`."""
+    req = _req("bob", is_admin=True, invalidator=invalidator)
+
+    async def _json():
+        return body
+
+    req.json = _json
+    return req
+
+
+def test_update_token_rejects_non_owner(monkeypatch, token_routes_mod):
+    monkeypatch.setenv("AUTH_ENABLED", "true")
+    mod = token_routes_mod
+    monkeypatch.setattr(mod, "get_current_user", lambda req: req.state.current_user)
+
+    token = SimpleNamespace(
+        id="tok123", name="alice-token", owner="alice",
+        token_prefix="ody_alic", scopes="chat", is_active=True,
+    )
+    fake_session = MagicMock()
+    fake_session.query.return_value.filter.return_value.first.return_value = token
+    monkeypatch.setattr(mod, "get_db_session", lambda: _db_ctx(fake_session))
+
+    req = _bob_patch_request(MagicMock(), {"name": "hijacked"})
+    update_token = _get_handler(mod, "PATCH", "/tokens/{token_id}")
+    with pytest.raises(HTTPException) as exc:
+        asyncio.run(update_token(request=req, token_id="tok123"))
+    assert exc.value.status_code == 403
+    assert token.name == "alice-token"
+
+
+def test_delete_token_rejects_non_owner(monkeypatch, token_routes_mod):
+    monkeypatch.setenv("AUTH_ENABLED", "true")
+    mod = token_routes_mod
+    monkeypatch.setattr(mod, "get_current_user", lambda req: req.state.current_user)
+    monkeypatch.setattr(mod, "ApiToken", MagicMock())
+
+    fake_token = SimpleNamespace(id="tok123", owner="alice", name="alice-token")
+    fake_session = MagicMock()
+    fake_session.query.return_value.filter.return_value.first.return_value = fake_token
+    monkeypatch.setattr(mod, "get_db_session", lambda: _db_ctx(fake_session))
+
+    invalidator = MagicMock()
+    req = _req("bob", is_admin=True, invalidator=invalidator)
+    delete_token = _get_handler(mod, "DELETE", "/tokens/{token_id}")
+    with pytest.raises(HTTPException) as exc:
+        delete_token(request=req, token_id="tok123")
+    assert exc.value.status_code == 403
+    fake_session.delete.assert_not_called()
+    invalidator.assert_not_called()
+
+
+def test_update_token_owner_check_skipped_when_auth_disabled(monkeypatch, token_routes_mod):
+    monkeypatch.setenv("AUTH_ENABLED", "false")
+    mod = token_routes_mod
+    monkeypatch.setattr(mod, "get_current_user", lambda req: None)
+
+    token = SimpleNamespace(
+        id="tok123", name="original", owner="alice",
+        token_prefix="ody_alic", scopes="chat", is_active=True,
+    )
+    fake_session = MagicMock()
+    fake_session.query.return_value.filter.return_value.first.return_value = token
+    monkeypatch.setattr(mod, "get_db_session", lambda: _db_ctx(fake_session))
+
+    req = _bob_patch_request(MagicMock(), {"name": "renamed-in-single-user"})
+    update_token = _get_handler(mod, "PATCH", "/tokens/{token_id}")
+    resp = asyncio.run(update_token(request=req, token_id="tok123"))
+    assert resp["name"] == "renamed-in-single-user"
+
+
+def test_delete_token_owner_check_skipped_when_auth_disabled(monkeypatch, token_routes_mod):
+    monkeypatch.setenv("AUTH_ENABLED", "false")
+    mod = token_routes_mod
+    monkeypatch.setattr(mod, "get_current_user", lambda req: None)
+    monkeypatch.setattr(mod, "ApiToken", MagicMock())
+
+    fake_token = SimpleNamespace(id="tok123", owner="alice", name="alice-token")
+    fake_session = MagicMock()
+    fake_session.query.return_value.filter.return_value.first.return_value = fake_token
+    monkeypatch.setattr(mod, "get_db_session", lambda: _db_ctx(fake_session))
+
+    invalidator = MagicMock()
+    req = _req("", is_admin=True, invalidator=invalidator)
+    delete_token = _get_handler(mod, "DELETE", "/tokens/{token_id}")
+    resp = delete_token(request=req, token_id="tok123")
+    assert resp == {"status": "deleted"}
+    fake_session.delete.assert_called_once_with(fake_token)
@@ -106,6 +106,9 @@ async def test_learn_sender_signatures_resolves_llm_for_task_owner(monkeypatch):
    from src.builtin_actions import action_learn_sender_signatures

    class FakeImap:
+        def __init__(self, owner=""):
+            self.owner = owner
+
        def select(self, *_args, **_kwargs):
            return "OK", []

@@ -119,13 +122,20 @@ async def test_learn_sender_signatures_resolves_llm_for_task_owner(monkeypatch):
            return None

    calls, _fallback_calls = _resolver_spy(monkeypatch, utility_result=("", "", {}), default_result=("", "", {}))
-    monkeypatch.setattr(email_helpers, "_imap_connect", lambda _account_id=None: FakeImap())
+    imap_owners = []
+
+    def fake_imap_connect(_account_id=None, owner=""):
+        imap_owners.append(owner)
+        return FakeImap(owner)
+
+    monkeypatch.setattr(email_helpers, "_imap_connect", fake_imap_connect)

    message, ok = await action_learn_sender_signatures("alice")

    assert ok is False
    assert message == "No LLM endpoint available"
    assert calls == [("utility", "alice"), ("default", "alice")]
+    assert imap_owners == ["alice"]


@pytest.mark.asyncio
@@ -0,0 +1,94 @@
+"""llama.cpp slot-affinity fields must never reach cloud providers (#3793).
+
+_apply_local_cache_affinity adds session_id + cache_prompt to outgoing
+payloads for KV-cache slot affinity (#2927). The old gate treated any unknown
+OpenAI-compatible host as self-hosted, so strict cloud APIs added as custom
+endpoints (Mistral at api.mistral.ai) received the extra fields and rejected
+every request with 422 extra_forbidden. Self-hosted now also requires the
+endpoint to resolve as local: loopback/private/tailscale host, or endpoint
+kind explicitly configured as "local".
+"""
+import pytest
+
+import src.llm_core as llm_core
+import src.model_context as model_context
+
+
+def _affinity_fields(url, monkeypatch, kind=None):
+    monkeypatch.setattr(model_context, "_configured_endpoint_kind", lambda _u: kind)
+    payload = {}
+    llm_core._apply_local_cache_affinity(payload, url, "sess-123")
+    return payload
+
+
+def test_mistral_cloud_api_gets_no_affinity_fields(monkeypatch):
+    # The #3793 repro: Mistral rejects unknown body fields with 422.
+    payload = _affinity_fields("https://api.mistral.ai/v1", monkeypatch)
+    assert payload == {}
+
+
+def test_openai_api_gets_no_affinity_fields(monkeypatch):
+    payload = _affinity_fields("https://api.openai.com/v1", monkeypatch)
+    assert payload == {}
+
+
+def test_unknown_public_host_gets_no_affinity_fields(monkeypatch):
+    # Any strict cloud provider added as a custom endpoint, not just Mistral.
+    payload = _affinity_fields("https://llm.example-cloud.com/v1", monkeypatch)
+    assert payload == {}
+
+
+def test_localhost_server_gets_affinity_fields(monkeypatch):
+    payload = _affinity_fields("http://localhost:8080/v1", monkeypatch)
+    assert payload == {"session_id": "sess-123", "cache_prompt": True}
+
+
+def test_private_lan_server_gets_affinity_fields(monkeypatch):
+    payload = _affinity_fields("http://192.168.1.50:8000/v1", monkeypatch)
+    assert payload == {"session_id": "sess-123", "cache_prompt": True}
+
+
+def test_public_host_with_local_kind_override_gets_affinity_fields(monkeypatch):
+    # Escape hatch: a self-hosted llama.cpp exposed via a tunnel keeps the
+    # slot-affinity hint when its endpoint kind is configured as "local".
+    payload = _affinity_fields("https://my-llama.example.com/v1", monkeypatch, kind="local")
+    assert payload == {"session_id": "sess-123", "cache_prompt": True}
+
+
+def test_no_session_id_is_a_noop(monkeypatch):
+    monkeypatch.setattr(model_context, "_configured_endpoint_kind", lambda _u: None)
+    payload = {}
+    llm_core._apply_local_cache_affinity(payload, "http://localhost:8080/v1", None)
+    assert payload == {}
+
+
+# Cloud-host sweep absorbed from #3839 (credit: Shabablinchikow) - every cloud
+# API that falls through provider detection to the OpenAI-compatible default
+# must stay clean, not just the Mistral host from the original report.
+@pytest.mark.parametrize("url", [
+    "https://api.mistral.ai/v1/chat/completions",
+    "https://api.deepseek.com/v1/chat/completions",
+    "https://api.x.ai/v1/chat/completions",
+    "https://api.together.xyz/v1/chat/completions",
+    "https://api.fireworks.ai/inference/v1/chat/completions",
+    "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
+])
+def test_cloud_openai_compatible_hosts_get_no_affinity_fields(monkeypatch, url):
+    assert _affinity_fields(url, monkeypatch) == {}
+
+
+# Tailscale CGNAT boundaries (review finding on #3945): only 100.64.0.0/10 is
+# Tailscale; the rest of 100.0.0.0/8 contains public ranges, and a strict
+# provider addressed by one must not receive the llama.cpp extras.
+def test_host_just_below_cgnat_gets_no_affinity_fields(monkeypatch):
+    assert _affinity_fields("http://100.63.255.255/v1", monkeypatch) == {}
+
+
+def test_host_just_above_cgnat_gets_no_affinity_fields(monkeypatch):
+    assert _affinity_fields("http://100.128.0.1/v1", monkeypatch) == {}
+
+
+@pytest.mark.parametrize("host", ["100.64.0.1", "100.100.50.2", "100.127.255.254"])
+def test_hosts_inside_cgnat_get_affinity_fields(monkeypatch, host):
+    payload = _affinity_fields(f"http://{host}:8080/v1", monkeypatch)
+    assert payload == {"session_id": "sess-123", "cache_prompt": True}
@@ -1,50 +1,227 @@
+"""Issue #3229 — allow_bash / allow_web_search must work for JSON API callers
+and admin users must get bash enabled by default.
+
+Bug: allow_bash and allow_web_search were only read from form_data, so JSON
+API callers (Content-Type: application/json) always had bash disabled.
+
+Fix: (1) Read from JSON body as fallback.
+     (2) Only add bash/web_search to disabled_tools when explicitly set to a
+         falsy value; when unset (None), defer to per-user privilege checks.
+"""
+
+import ast
 from pathlib import Path

+import pytest

-CHAT_ROUTES = Path(__file__).resolve().parents[1] / "routes" / "chat_routes.py"
+_CHAT_ROUTES = Path(__file__).resolve().parent.parent / "routes" / "chat_routes.py"


-def _source() -> str:
-    return CHAT_ROUTES.read_text(encoding="utf-8")
+# ── Source-level guards ─────────────────────────────────────────


-def test_research_fast_path_respects_tool_policy():
-    src = _source()
-    assert "pre_context_tool_policy = build_effective_tool_policy(" in src
-    assert "allow_tool_preprocessing = not pre_context_tool_policy.block_all_tool_calls" in src
-    assert "allow_tool_preprocessing=allow_tool_preprocessing" in src
-    assert "research_blocked_by_policy = bool(" in src
-    assert 'tool_policy.blocks("trigger_research")' in src
-    assert 'tool_policy.blocks("manage_research")' in src
-    assert 'effective_do_research = bool(' in src
-    assert 'if effective_do_research:' in src
-    assert '"is_research": effective_do_research' in src
-    assert "_effective_mode = 'research' if effective_do_research else (chat_mode or 'chat')" in src
-    assert '_model_suffix = "Research" if effective_do_research else None' in src
-    assert "do_research=effective_do_research" in src
+def test_allow_bash_reads_from_body_as_fallback():
+    """chat_stream must read allow_bash from the JSON body, not just form_data."""
+    source = _CHAT_ROUTES.read_text(encoding="utf-8")
+    tree = ast.parse(source)
+
+    # Find the chat_stream function
+    chat_stream_func = None
+    for node in ast.walk(tree):
+        if isinstance(node, ast.AsyncFunctionDef) and node.name == "chat_stream":
+            chat_stream_func = node
+            break
+    assert chat_stream_func is not None, "chat_stream function not found"
+
+    # Look for an assignment to allow_bash that references 'body'
+    found_body_fallback = False
+    for node in ast.walk(chat_stream_func):
+        if isinstance(node, ast.Assign):
+            for target in node.targets:
+                if isinstance(target, ast.Name) and target.id == "allow_bash":
+                    # Check if 'body' appears in the value
+                    src_segment = ast.get_source_segment(source, node)
+                    if src_segment and "body" in src_segment:
+                        found_body_fallback = True
+    assert found_body_fallback, (
+        "allow_bash assignment in chat_stream must fall back to JSON body"
+    )


-def test_non_streaming_chat_path_uses_tool_policy_before_context_and_research():
-    src = _source()
-    chat_endpoint = src[src.index("async def chat_endpoint"):src.index("# ------------------------------------------------------------------ #", src.index("async def chat_endpoint"))]
-    assert "tool_policy = build_effective_tool_policy(last_user_message=message)" in chat_endpoint
-    assert "allow_tool_preprocessing = not tool_policy.block_all_tool_calls" in chat_endpoint
-    assert 'if not tool_policy.blocks("manage_memory"):' in chat_endpoint
-    assert "allow_tool_preprocessing=allow_tool_preprocessing" in chat_endpoint
-    assert 'tool_policy.blocks("trigger_research")' in chat_endpoint
-    assert "if use_research and not research_blocked_by_policy:" in chat_endpoint
-    assert "allow_background_extraction=not tool_policy.block_all_tool_calls" in chat_endpoint
+def test_allow_web_search_reads_from_body_as_fallback():
+    """chat_stream must read allow_web_search from the JSON body, not just form_data."""
+    source = _CHAT_ROUTES.read_text(encoding="utf-8")
+    tree = ast.parse(source)
+
+    chat_stream_func = None
+    for node in ast.walk(tree):
+        if isinstance(node, ast.AsyncFunctionDef) and node.name == "chat_stream":
+            chat_stream_func = node
+            break
+    assert chat_stream_func is not None
+
+    found_body_fallback = False
+    for node in ast.walk(chat_stream_func):
+        if isinstance(node, ast.Assign):
+            for target in node.targets:
+                if isinstance(target, ast.Name) and target.id == "allow_web_search":
+                    src_segment = ast.get_source_segment(source, node)
+                    if src_segment and "body" in src_segment:
+                        found_body_fallback = True
+    assert found_body_fallback, (
+        "allow_web_search assignment in chat_stream must fall back to JSON body"
+    )


-def test_image_generation_fast_path_checks_policy_before_tool_start():
-    src = _source()
-    policy_gate = src.index('if tool_policy.blocks("generate_image"):')
-    tool_start = src.index('"type": "tool_start", "tool": "generate_image"')
-    generator_call = src.index("do_generate_image(")
-    assert policy_gate < tool_start
-    assert policy_gate < generator_call
+def test_disabled_tools_does_not_bash_when_allow_bash_is_none():
+    """When allow_bash is not set (None), bash must NOT be unconditionally
+    added to disabled_tools.  The per-user privilege check handles it.
+    """
+    source = _CHAT_ROUTES.read_text(encoding="utf-8")
+
+    # The fix changes:
+    #   if str(allow_bash).lower() != "true":
+    # to:
+    #   if allow_bash is not None and str(allow_bash).lower() != "true":
+    assert "allow_bash is not None" in source, (
+        "disabled_tools check must guard against allow_bash being None"
+    )
+    assert "allow_web_search is not None" in source, (
+        "disabled_tools check must guard against allow_web_search being None"
+    )


-def test_streaming_chat_paths_disable_background_extraction_under_policy():
-    src = _source()
-    assert src.count("allow_background_extraction=not tool_policy.block_all_tool_calls") >= 3
+# ── Functional tests of the disabled-tools logic ───────────────
+
+
+def _build_disabled_tools(
+    allow_bash=None,
+    allow_web_search=None,
+    can_use_bash=True,
+    can_use_browser=True,
+):
+    """Replicate the disabled-tools logic from chat_stream for unit testing.
+
+    Returns the set of tool names that would be disabled.
+    """
+    disabled_tools = set()
+
+    # Issue #3229 fix: only disable when explicitly set to a falsy value.
+    if allow_bash is not None and str(allow_bash).lower() != "true":
+        disabled_tools.add("bash")
+    if allow_web_search is not None and str(allow_web_search).lower() != "true":
+        disabled_tools.add("web_search")
+        disabled_tools.add("web_fetch")
+
+    # Enforce per-user privileges
+    if not can_use_bash:
+        disabled_tools.update({"bash", "python", "read_file", "write_file"})
+    if not can_use_browser:
+        disabled_tools.add("builtin_browser")
+
+    return disabled_tools
+
+
+def test_json_body_allow_bash_true_enables_bash():
+    """API caller sending {"allow_bash": true} gets bash enabled."""
+    disabled = _build_disabled_tools(allow_bash="true")
+    assert "bash" not in disabled
+
+
+def test_json_body_allow_bash_false_disables_bash():
+    """API caller sending {"allow_bash": false} gets bash disabled."""
+    disabled = _build_disabled_tools(allow_bash="false")
+    assert "bash" in disabled
+
+
+def test_json_body_allow_web_search_true_enables_web():
+    """API caller sending {"allow_web_search": true} gets web tools enabled."""
+    disabled = _build_disabled_tools(allow_web_search="true")
+    assert "web_search" not in disabled
+    assert "web_fetch" not in disabled
+
+
+def test_json_body_allow_web_search_false_disables_web():
+    """API caller sending {"allow_web_search": false} gets web tools disabled."""
+    disabled = _build_disabled_tools(allow_web_search="false")
+    assert "web_search" in disabled
+    assert "web_fetch" in disabled
+
+
+def test_admin_user_gets_bash_enabled_by_default():
+    """When allow_bash is not set and user has can_use_bash privilege,
+    bash must NOT be disabled.
+    """
+    disabled = _build_disabled_tools(allow_bash=None, can_use_bash=True)
+    assert "bash" not in disabled
+
+
+def test_admin_user_gets_web_search_enabled_by_default():
+    """When allow_web_search is not set and user has normal privileges,
+    web_search must NOT be disabled.
+    """
+    disabled = _build_disabled_tools(allow_web_search=None)
+    assert "web_search" not in disabled
+    assert "web_fetch" not in disabled
+
+
+def test_non_privileged_user_without_explicit_flag_still_disabled():
+    """A user without can_use_bash privilege who doesn't send allow_bash
+    should still have bash disabled via the privilege check.
+    """
+    disabled = _build_disabled_tools(allow_bash=None, can_use_bash=False)
+    assert "bash" in disabled
+
+
+def test_non_privileged_user_explicit_true_overridden_by_privilege():
+    """Even if allow_bash=true is sent, a user without can_use_bash
+    privilege still gets bash disabled by the privilege gate.
+    """
+    disabled = _build_disabled_tools(allow_bash="true", can_use_bash=False)
+    assert "bash" in disabled
+
+
+def test_form_data_none_body_true_works():
+    """Simulates: form_data has no allow_bash, body has allow_bash=true.
+    After the fallback (`form_data.get(...) or body.get(...)`), allow_bash
+    should be "true".
+    """
+    # Simulate the fallback logic
+    form_data_val = None  # not in form_data
+    body_val = "true"     # from JSON body
+    allow_bash = form_data_val or body_val
+    assert str(allow_bash).lower() == "true"
+
+    disabled = _build_disabled_tools(allow_bash=allow_bash)
+    assert "bash" not in disabled
+
+
+def test_explicit_false_disables_even_for_admin():
+    """An admin who explicitly sends allow_bash=false should have bash disabled."""
+    disabled = _build_disabled_tools(
+        allow_bash="false", can_use_bash=True,
+    )
+    assert "bash" in disabled
+
+
+# ── Frontend source-level guards ──────────────────────────────
+
+_CHAT_JS = Path(__file__).resolve().parent.parent / "static" / "js" / "chat.js"
+
+
+def test_frontend_always_sends_explicit_allow_bash():
+    """chat.js must always send allow_bash (both true and false), not only on toggle ON."""
+    source = _CHAT_JS.read_text(encoding="utf-8")
+    # Must not only append 'true' — must also handle the false case
+    assert "allow_bash', el('bash-toggle').checked ? 'true' : 'false'" in source or \
+           "allow_bash', 'false'" in source, (
+        "Frontend must send explicit allow_bash=false when toggle is off"
+    )
+
+
+def test_frontend_sends_explicit_allow_web_search_false_in_agent_mode():
+    """chat.js must send allow_web_search=false when web toggle is off in agent mode."""
+    source = _CHAT_JS.read_text(encoding="utf-8")
+    assert "allow_web_search', 'false'" in source, (
+        "Frontend must send explicit allow_web_search=false in agent mode when toggle is off"
+    )
@@ -11,7 +11,7 @@ import src.model_context as mc

 def _setup(monkeypatch, windows):
    """windows: {endpoint_url: context_length}. Force the remote path."""
-    monkeypatch.setattr(mc, "_is_local_endpoint", lambda url: False)
+    monkeypatch.setattr(mc, "is_local_endpoint", lambda url: False)
    monkeypatch.setattr(mc, "_configured_endpoint_kind", lambda url: "api")
    monkeypatch.setattr(mc, "_query_context_length", lambda url, model: windows[url])
    mc._context_cache.clear()
@@ -0,0 +1,12 @@
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent.parent
+DIAGNOSIS_JS = ROOT / "static" / "js" / "cookbook-diagnosis.js"
+
+
+def test_repair_kernels_pip_spec_is_shell_quoted():
+    source = DIAGNOSIS_JS.read_text(encoding="utf-8")
+
+    assert '"kernels<0.15"' in source
+    assert " --break-system-packages kernels<0.15" not in source
@@ -0,0 +1,56 @@
+"""Behavioral guard for the cookbook error output-tail expansion.
+
+When a task reaches status "error" the status endpoint previously returned
+only the last 12 lines of the subprocess log. The "Copy last 50 lines"
+context-menu action was therefore copying the same 12 lines — useless for
+diagnosing failures that emit long stack traces or build output.
+
+`error_aware_output_tail` now returns the last 50 lines on error and keeps
+the cheaper 12-line tail for running/other tasks.
+"""
+from routes.cookbook_output import error_aware_output_tail
+
+
+def _snapshot(n):
+    return "\n".join(f"line {i}" for i in range(n))
+
+
+def test_error_status_returns_last_50_lines():
+    snap = _snapshot(200)
+    tail = error_aware_output_tail(snap, "error")
+    lines = tail.splitlines()
+    assert len(lines) == 50, f"error tail should be 50 lines, got {len(lines)}"
+    assert lines[0] == "line 150"
+    assert lines[-1] == "line 199"
+
+
+def test_non_error_status_returns_last_12_lines():
+    snap = _snapshot(200)
+    for status in ("running", "ready", "completed", "stopped", "unknown"):
+        tail = error_aware_output_tail(snap, status)
+        lines = tail.splitlines()
+        assert len(lines) == 12, f"{status} tail should be 12 lines, got {len(lines)}"
+        assert lines[-1] == "line 199"
+
+
+def test_short_snapshot_returns_all_lines():
+    # Fewer lines than the cap — return everything, no padding.
+    snap = _snapshot(5)
+    assert error_aware_output_tail(snap, "error").splitlines() == [
+        "line 0", "line 1", "line 2", "line 3", "line 4",
+    ]
+    assert len(error_aware_output_tail(snap, "running").splitlines()) == 5
+
+
+def test_empty_snapshot_returns_empty_string():
+    assert error_aware_output_tail("", "error") == ""
+    assert error_aware_output_tail("", "running") == ""
+
+
+def test_error_tail_is_wider_than_non_error():
+    snap = _snapshot(100)
+    err = error_aware_output_tail(snap, "error").splitlines()
+    run = error_aware_output_tail(snap, "running").splitlines()
+    assert len(err) > len(run)
+    # The non-error tail is a strict suffix of the error tail.
+    assert err[-len(run):] == run
@@ -0,0 +1,71 @@
+"""Regression tests for _group_uid_fetch_records (Gmail FLAGS placement).
+
+imaplib hands back UID FETCH responses as an interleaved list of
+``(meta, literal)`` tuples and bare ``bytes`` elements. Dovecot sends FLAGS
+before the RFC822.HEADER literal, so they sit inside the tuple meta; Gmail
+sends FLAGS *after* the literal, as a bare ``b' FLAGS (\\Seen))'`` element.
+The old grouping loop only looked at tuples, so on Gmail every message lost
+its FLAGS and rendered as unread/unflagged in the email library.
+"""
+
+import re
+
+from routes.email_routes import _group_uid_fetch_records, _uid_from_fetch_meta
+
+
+def _flags(meta_b: bytes) -> str:
+    m = re.search(rb"FLAGS \(([^)]*)\)", meta_b)
+    return m.group(1).decode() if m else ""
+
+
+# Captured shape of a real Gmail response to
+# UID FETCH a,b (UID FLAGS RFC822.HEADER RFC822.SIZE):
+GMAIL_RESPONSE = [
+    (b"10779 (UID 18723 RFC822.SIZE 54308 RFC822.HEADER {24}", b"Subject: read one\r\n\r\n"),
+    rb" FLAGS (\Seen))",
+    (b"10780 (UID 18724 RFC822.SIZE 124310 RFC822.HEADER {26}", b"Subject: unread one\r\n\r\n"),
+    rb" FLAGS ())",
+]
+
+# Dovecot puts FLAGS before the literal and terminates with a bare b')'.
+DOVECOT_RESPONSE = [
+    (rb"1 (UID 5 FLAGS (\Seen) RFC822.SIZE 100 RFC822.HEADER {18}", b"Subject: hi\r\n\r\n"),
+    b")",
+    (b"2 (UID 6 FLAGS () RFC822.SIZE 90 RFC822.HEADER {19}", b"Subject: new\r\n\r\n"),
+    b")",
+]
+
+
+def test_gmail_post_literal_flags_attach_to_their_own_message():
+    grouped = _group_uid_fetch_records(GMAIL_RESPONSE)
+
+    assert len(grouped) == 2
+    assert _uid_from_fetch_meta(grouped[0][0]) == "18723"
+    assert _flags(grouped[0][0]) == r"\Seen"
+    assert grouped[0][1] == b"Subject: read one\r\n\r\n"
+
+    assert _uid_from_fetch_meta(grouped[1][0]) == "18724"
+    assert _flags(grouped[1][0]) == ""
+    assert grouped[1][1] == b"Subject: unread one\r\n\r\n"
+
+
+def test_dovecot_pre_literal_flags_unchanged():
+    grouped = _group_uid_fetch_records(DOVECOT_RESPONSE)
+
+    assert len(grouped) == 2
+    assert _flags(grouped[0][0]) == r"\Seen"
+    assert _flags(grouped[1][0]) == ""
+    assert grouped[1][1] == b"Subject: new\r\n\r\n"
+
+
+def test_size_and_uid_survive_grouping():
+    grouped = _group_uid_fetch_records(GMAIL_RESPONSE)
+    sizes = [re.search(rb"RFC822\.SIZE (\d+)", m).group(1) for m, _ in grouped]
+    assert sizes == [b"54308", b"124310"]
+
+
+def test_empty_and_none_inputs():
+    assert _group_uid_fetch_records(None) == []
+    assert _group_uid_fetch_records([]) == []
+    # A stray bare element before any tuple opens no record and must not crash.
+    assert _group_uid_fetch_records([rb" FLAGS (\Seen))"]) == []
@@ -1,5 +1,7 @@
 import sqlite3
+from contextlib import contextmanager
 from datetime import datetime, timedelta, timezone
+from types import SimpleNamespace

 import pytest

@@ -117,6 +119,71 @@ def test_email_ai_cache_tables_are_owner_scoped_and_migrate_legacy_rows(tmp_path
        conn.close()


+def test_sender_signature_cache_is_owner_scoped_and_migrates_legacy_rows(tmp_path, monkeypatch):
+    import routes.email_helpers as email_helpers
+
+    db_path = tmp_path / "scheduled_emails.db"
+    monkeypatch.setattr(email_helpers, "SCHEDULED_DB", db_path)
+
+    conn = sqlite3.connect(db_path)
+    conn.execute(
+        """
+        CREATE TABLE sender_signatures (
+            from_address TEXT PRIMARY KEY,
+            signature_text TEXT,
+            sample_count INTEGER,
+            last_built_at TEXT NOT NULL,
+            model_used TEXT,
+            source TEXT
+        )
+        """
+    )
+    conn.execute(
+        """
+        INSERT INTO sender_signatures
+        (from_address, signature_text, sample_count, last_built_at, model_used, source)
+        VALUES ('writer@example.com', 'legacy sig', 3, '2026-01-01', 'm', 'llm')
+        """
+    )
+    conn.commit()
+    conn.close()
+
+    email_helpers._init_scheduled_db()
+
+    conn = sqlite3.connect(db_path)
+    try:
+        info = conn.execute("PRAGMA table_info(sender_signatures)").fetchall()
+        pk_cols = [r[1] for r in sorted((r for r in info if r[5]), key=lambda r: r[5])]
+        assert pk_cols == ["from_address", "owner"]
+        assert conn.execute(
+            "SELECT owner, signature_text FROM sender_signatures WHERE from_address=?",
+            ("writer@example.com",),
+        ).fetchone() == ("", "legacy sig")
+        conn.execute(
+            """
+            INSERT INTO sender_signatures
+            (from_address, owner, signature_text, sample_count, last_built_at, model_used, source)
+            VALUES (?, ?, ?, ?, ?, ?, ?)
+            """,
+            ("writer@example.com", "alice", "alice sig", 3, "2026-01-02", "m", "llm"),
+        )
+        conn.execute(
+            """
+            INSERT INTO sender_signatures
+            (from_address, owner, signature_text, sample_count, last_built_at, model_used, source)
+            VALUES (?, ?, ?, ?, ?, ?, ?)
+            """,
+            ("writer@example.com", "bob", "bob sig", 3, "2026-01-03", "m", "llm"),
+        )
+        rows = conn.execute(
+            "SELECT owner, signature_text FROM sender_signatures WHERE from_address=? ORDER BY owner",
+            ("writer@example.com",),
+        ).fetchall()
+        assert rows == [("", "legacy sig"), ("alice", "alice sig"), ("bob", "bob sig")]
+    finally:
+        conn.close()
+
+
@pytest.mark.asyncio
 async def test_ai_reply_cache_lookup_is_owner_scoped(tmp_path, monkeypatch):
    import routes.email_helpers as email_helpers
@@ -166,6 +233,136 @@ async def test_ai_reply_cache_lookup_is_owner_scoped(tmp_path, monkeypatch):
    assert result["model_used"] == "m-b"


+@pytest.mark.asyncio
+async def test_sender_signature_read_lookup_is_owner_scoped(tmp_path, monkeypatch):
+    import routes.email_helpers as email_helpers
+    import routes.email_routes as email_routes
+
+    db_path = tmp_path / "scheduled_emails.db"
+    monkeypatch.setattr(email_helpers, "SCHEDULED_DB", db_path)
+    monkeypatch.setattr(email_routes, "SCHEDULED_DB", db_path)
+    email_helpers._init_scheduled_db()
+
+    conn = sqlite3.connect(db_path)
+    conn.execute(
+        """
+        INSERT INTO sender_signatures
+        (from_address, owner, signature_text, sample_count, last_built_at, model_used, source)
+        VALUES (?, ?, ?, ?, ?, ?, ?)
+        """,
+        ("writer@example.com", "alice", "alice private sig", 3, "2026-01-01", "m-a", "llm"),
+    )
+    conn.execute(
+        """
+        INSERT INTO sender_signatures
+        (from_address, owner, signature_text, sample_count, last_built_at, model_used, source)
+        VALUES (?, ?, ?, ?, ?, ?, ?)
+        """,
+        ("writer@example.com", "bob", "bob private sig", 3, "2026-01-02", "m-b", "llm"),
+    )
+    conn.commit()
+    conn.close()
+
+    raw = (
+        b"From: Writer <writer@example.com>\r\n"
+        b"To: Bob <bob@example.com>\r\n"
+        b"Subject: Hello\r\n"
+        b"Message-ID: <shared@example.com>\r\n"
+        b"Date: Tue, 01 Jan 2026 12:00:00 +0000\r\n"
+        b"Content-Type: text/plain; charset=utf-8\r\n"
+        b"\r\n"
+        b"Body"
+    )
+
+    class FakeImap:
+        def select(self, *_args, **_kwargs):
+            return "OK", []
+
+        def uid(self, command, _uid, query):
+            assert command == "FETCH"
+            assert query == "(BODY.PEEK[])"
+            return "OK", [(b"1 (UID 1 BODY[])", raw)]
+
+    @contextmanager
+    def fake_imap(_account_id=None, owner=""):
+        assert owner == "bob"
+        yield FakeImap()
+
+    monkeypatch.setattr(email_routes, "_imap", fake_imap)
+    router = email_routes.setup_email_routes()
+    read_email = _route_endpoint(router, "/api/email/read/{uid}", "GET")
+
+    result = await read_email("1", folder="INBOX", account_id=None, owner="bob", mark_seen=False)
+
+    assert result["sender_signature"] == "bob private sig"
+
+
+@pytest.mark.asyncio
+async def test_sender_signature_clear_cache_keeps_other_owner_rows(tmp_path, monkeypatch):
+    import routes.email_helpers as email_helpers
+    import routes.task_routes as task_routes
+
+    db_path = tmp_path / "scheduled_emails.db"
+    monkeypatch.setattr(email_helpers, "SCHEDULED_DB", db_path)
+    email_helpers._init_scheduled_db()
+
+    conn = sqlite3.connect(db_path)
+    conn.execute(
+        """
+        INSERT INTO sender_signatures
+        (from_address, owner, signature_text, sample_count, last_built_at, model_used, source)
+        VALUES (?, ?, ?, ?, ?, ?, ?)
+        """,
+        ("writer@example.com", "alice", "alice private sig", 3, "2026-01-01", "m-a", "llm"),
+    )
+    conn.execute(
+        """
+        INSERT INTO sender_signatures
+        (from_address, owner, signature_text, sample_count, last_built_at, model_used, source)
+        VALUES (?, ?, ?, ?, ?, ?, ?)
+        """,
+        ("writer@example.com", "bob", "bob private sig", 3, "2026-01-02", "m-b", "llm"),
+    )
+    conn.commit()
+    conn.close()
+
+    class FakeQuery:
+        def filter(self, *_args):
+            return self
+
+        def first(self):
+            return SimpleNamespace(
+                id="task-1",
+                owner="alice",
+                action="learn_sender_signatures",
+            )
+
+    class FakeDb:
+        def query(self, _model):
+            return FakeQuery()
+
+        def close(self):
+            pass
+
+    monkeypatch.setattr(task_routes, "SessionLocal", lambda: FakeDb())
+    monkeypatch.setattr(task_routes, "get_current_user", lambda _request: "alice")
+
+    router = task_routes.setup_task_routes(task_scheduler=SimpleNamespace(pop_notifications=lambda owner: []))
+    clear_cache = _route_endpoint(router, "/api/tasks/{task_id}/clear-cache", "POST")
+
+    result = await clear_cache(SimpleNamespace(), "task-1")
+
+    assert result["cleared"]["sender_signatures"] == 1
+    conn = sqlite3.connect(db_path)
+    try:
+        rows = conn.execute(
+            "SELECT owner, signature_text FROM sender_signatures ORDER BY owner",
+        ).fetchall()
+    finally:
+        conn.close()
+    assert rows == [("bob", "bob private sig")]
+
+
@pytest.mark.asyncio
 async def test_scheduled_email_routes_are_owner_scoped(tmp_path, monkeypatch):
    import routes.email_helpers as email_helpers
@@ -0,0 +1,94 @@
+"""Regression guard: Opus 4.7+ rejects the temperature field entirely.
+
+Anthropic removed the sampling parameters (temperature, top_p, top_k) starting
+with Claude Opus 4.7 — sending `temperature` at all, even 0.0, returns HTTP 400.
+This broke every native-Anthropic call to Opus 4.7/4.8, including the research
+endpoint probe (temperature=0) and all DeepResearcher LLM calls, because
+_build_anthropic_payload sent `temperature` unconditionally.
+
+Earlier Claude models (Opus 4.6 and below, every Sonnet/Haiku) still accept
+temperature in [0.0, 1.0], so the omission is version-gated — the clamp-to-[0,1]
+behavior for those models (test_llm_core_anthropic_temp_clamp.py) is unchanged.
+"""
+import os
+
+os.environ.setdefault("DATABASE_URL", "sqlite:///:memory:")
+
+import pytest
+
+from src.llm_core import _anthropic_rejects_temperature, _build_anthropic_payload
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "claude-opus-4-7",
+        "claude-opus-4-8",
+        "claude-opus-4-8-20260101",  # tolerate a dated snapshot suffix
+        "claude-opus-4-7-20260201",  # dated 4.7 snapshot — explicit minor, still >= 4.7
+        "anthropic/claude-opus-4-7",  # tolerate a provider-prefixed id
+        "claude-opus-4-10",  # future minor still >= 4.7
+        "claude-opus-5-0",  # future major
+    ],
+)
+def test_opus_47_plus_rejects_temperature(model):
+    assert _anthropic_rejects_temperature(model) is True
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "claude-opus-4-6",
+        "claude-opus-4-5",
+        "claude-opus-4-1",
+        "claude-opus-4-0",
+        "claude-opus-4",  # bare major (no minor) — kept
+        "claude-opus-4-20250514",  # Opus 4.0 dated id — the date must NOT read as a 4.7+ minor
+        "claude-opus-4-1-20250805",  # Opus 4.1 dated id — explicit minor before the date
+        "claude-opus-4-6-20251201",  # dated 4.6 snapshot — older, still keeps temperature
+        "claude-sonnet-4-6",
+        "claude-3-5-sonnet",
+        "claude-3-opus-20240229",  # legacy Claude 3 Opus — no opus-N-M pattern, kept
+        "claude-haiku-4-5",
+        "claude-x",
+        "octopus-4-8",  # "opus" only as a substring of another word — must not match
+        "myproxy/octopus-4-8",  # same, behind a provider prefix
+        "",
+        None,
+    ],
+)
+def test_older_claude_models_keep_temperature(model):
+    assert _anthropic_rejects_temperature(model) is False
+
+
+@pytest.mark.parametrize("model", [123, 1.5, ["claude-opus-4-8"], {"a": 1}, object()])
+def test_non_string_model_is_handled_without_crashing(model):
+    # Defensive: the gate must not raise on a non-string model (the old builder
+    # never called .lower() on it). Truthy non-strings should classify as False.
+    assert _anthropic_rejects_temperature(model) is False
+
+
+def _payload(model, temperature=0.0):
+    return _build_anthropic_payload(
+        model, [{"role": "user", "content": "hi"}], temperature, 100
+    )
+
+
+def test_payload_omits_temperature_for_opus_47_plus():
+    # The endpoint probe sends temperature=0; on Opus 4.7+ that field must be gone.
+    payload = _payload("claude-opus-4-8", 0.0)
+    assert "temperature" not in payload
+
+
+def test_payload_keeps_temperature_for_older_models():
+    payload = _payload("claude-opus-4-6", 0.3)
+    assert payload["temperature"] == 0.3
+    # Older models retain the [0,1] clamp (Nietzsche preset at 1.2 -> 1.0).
+    assert _payload("claude-3-5-sonnet", 1.2)["temperature"] == 1.0
+
+
+def test_payload_keeps_temperature_for_dated_opus_4_0():
+    # Anthropic's dated id for Opus 4.0 (claude-opus-4-20250514) is in this repo's
+    # ANTHROPIC_MODELS list. The date must not be misread as a >= 4.7 minor, or the
+    # user's temperature would be silently dropped on a model that accepts it.
+    assert _payload("claude-opus-4-20250514", 0.5)["temperature"] == 0.5
@@ -14,6 +14,7 @@ import pytest
 from fastapi import HTTPException

 import routes.memory_routes as mr
+from src.request_models import MemoryAddRequest


 def _route(router, path, method):
@@ -38,6 +39,13 @@ def _router(monkeypatch, caller):
    return mr.setup_memory_routes(mem, sm)


+def _request(user):
+    return SimpleNamespace(
+        state=SimpleNamespace(current_user=user),
+        app=SimpleNamespace(state=SimpleNamespace(auth_manager=None)),
+    )
+
+
 def test_extract_rejects_other_users_session(monkeypatch):
    router = _router(monkeypatch, caller="bob")
    extract = _route(router, "/api/memory/extract", "POST")
@@ -59,3 +67,61 @@ def test_owner_can_access_own_session(monkeypatch):
    gbs = _route(router, "/api/memory/by-session/{session_id}", "GET")
    out = gbs(request=None, session_id="alice-sess")
    assert out["session_name"] == "Secret project"
+
+
+def test_add_memory_rejects_other_users_session(monkeypatch):
+    memory_manager = MagicMock()
+    session_manager = MagicMock()
+    memory_vector = MagicMock(healthy=True)
+    router = mr.setup_memory_routes(
+        memory_manager=memory_manager,
+        session_manager=session_manager,
+        memory_vector=memory_vector,
+    )
+    add_memory = _route(router, "/api/memory/add", "POST")
+
+    memory_manager.load.return_value = []
+    memory_manager.find_duplicates.return_value = False
+    session_manager.get_session.return_value = SimpleNamespace(owner="bob", name="Bob session")
+
+    with pytest.raises(HTTPException) as exc:
+        asyncio.run(
+            add_memory(
+                request=_request("alice"),
+                memory_data=MemoryAddRequest(
+                    text="Alice note",
+                    category="fact",
+                    source="user",
+                    session_id="bob-session",
+                ),
+            )
+        )
+
+    assert exc.value.status_code == 404
+    assert exc.value.detail == "Session not found"
+    session_manager.get_session.assert_called_once_with("bob-session")
+    memory_manager.add_entry.assert_not_called()
+    memory_manager.save.assert_not_called()
+    memory_vector.add.assert_not_called()
+
+
+def test_timeline_does_not_expose_other_users_session_name():
+    memory_manager = MagicMock()
+    session_manager = MagicMock()
+    session_manager.sessions = {"bob-session": object()}
+    session_manager.get_session.return_value = SimpleNamespace(owner="bob", name="Bob roadmap")
+    memory_manager.load.return_value = [
+        {
+            "id": "m1",
+            "text": "Alice note",
+            "owner": "alice",
+            "session_id": "bob-session",
+            "timestamp": 1,
+        }
+    ]
+    router = mr.setup_memory_routes(memory_manager, session_manager)
+    timeline = _route(router, "/api/memory/timeline", "GET")
+
+    out = timeline(request=_request("alice"))
+
+    assert out["timeline"][0]["session_name"] == "Unknown"
@@ -6,7 +6,7 @@ import types
 import pytest

 import src.model_context as model_context
-from src.model_context import _is_local_endpoint, estimate_tokens, _lookup_known
+from src.model_context import is_local_endpoint, estimate_tokens, _lookup_known


 class _Column:
@@ -56,20 +56,20 @@ def _install_endpoint_db(monkeypatch, rows):

 class TestIsLocalEndpoint:
    def test_localhost(self):
-        assert _is_local_endpoint("http://localhost:5000/v1/chat/completions") is True
+        assert is_local_endpoint("http://localhost:5000/v1/chat/completions") is True

    def test_loopback_ipv4(self):
-        assert _is_local_endpoint("http://127.0.0.1:8080/v1/chat/completions") is True
+        assert is_local_endpoint("http://127.0.0.1:8080/v1/chat/completions") is True

    def test_private_192_168(self):
-        assert _is_local_endpoint("http://192.168.1.1:11434/v1/chat/completions") is True
+        assert is_local_endpoint("http://192.168.1.1:11434/v1/chat/completions") is True

    def test_private_10(self):
-        assert _is_local_endpoint("http://10.0.0.5:8000/v1/chat/completions") is True
+        assert is_local_endpoint("http://10.0.0.5:8000/v1/chat/completions") is True

    def test_tailscale_100(self):
        # 100.64.0.0/10 is the CGNAT range Tailscale uses.
-        assert _is_local_endpoint("http://100.64.0.1:5000/v1/chat/completions") is True
+        assert is_local_endpoint("http://100.64.0.1:5000/v1/chat/completions") is True

    def test_configured_tailscale_proxy_is_remote(self, monkeypatch):
        _install_endpoint_db(monkeypatch, [
@@ -81,19 +81,19 @@ class TestIsLocalEndpoint:
            )
        ])

-        assert _is_local_endpoint("http://100.117.136.97:34521/v1/chat/completions") is False
+        assert is_local_endpoint("http://100.117.136.97:34521/v1/chat/completions") is False

    def test_openai_is_remote(self):
-        assert _is_local_endpoint("https://api.openai.com/v1/chat/completions") is False
+        assert is_local_endpoint("https://api.openai.com/v1/chat/completions") is False

    def test_anthropic_is_remote(self):
-        assert _is_local_endpoint("https://api.anthropic.com/v1/messages") is False
+        assert is_local_endpoint("https://api.anthropic.com/v1/messages") is False

    def test_empty_url(self):
-        assert _is_local_endpoint("") is False
+        assert is_local_endpoint("") is False

    def test_malformed_url(self):
-        assert _is_local_endpoint("not-a-url") is False
+        assert is_local_endpoint("not-a-url") is False


 class TestEstimateTokens:
@@ -47,6 +47,20 @@ def test_find_bash_checks_local_app_data_git_install(monkeypatch):
    assert platform_compat.find_bash() == expected


+def test_find_bash_checks_local_app_data_programs_git_install(monkeypatch):
+    _reset_bash_cache(monkeypatch)
+    monkeypatch.setattr(platform_compat, "IS_WINDOWS", True)
+    monkeypatch.setattr(platform_compat.shutil, "which", lambda _name: None)
+    for env_name in platform_compat._WINDOWS_BASH_ROOT_ENV_VARS:
+        monkeypatch.delenv(env_name, raising=False)
+    monkeypatch.setenv("LocalAppData", r"C:\Users\alice\AppData\Local")
+
+    expected = r"C:\Users\alice\AppData\Local\Programs\Git\bin\bash.exe"
+    monkeypatch.setattr(platform_compat.os.path, "exists", lambda path: path == expected)
+
+    assert platform_compat.find_bash() == expected
+
+
 def test_find_bash_skips_windows_wsl_stub(monkeypatch):
    _reset_bash_cache(monkeypatch)
    monkeypatch.setattr(platform_compat, "IS_WINDOWS", True)
@@ -1,4 +1,4 @@
-"""Renaming a user must update all three owner caches, not just the SQL DB.
+"""Renaming a user must update non-SQL owner stores, not just the SQL DB.

 The DB owner-rename loop in the rename_user route updates every SQL-backed
 owner column, but three file-backed / in-memory stores are left stale:
@@ -17,6 +17,9 @@ owner column, but three file-backed / in-memory stores are left stale:
 4. data/memory.json  — a flat array where every entry has an `owner` field;
   memory_manager.load(owner=user) filters on it, so all memories vanish.

+5. data/uploads/uploads.json — each upload row carries an `owner` field and
+   owner-prefixed index key; stale metadata denies renamed users their uploads.
+
 Regression coverage: these bugs are invisible in unit tests that mock the DB
 loop but don't exercise the file/cache patches added to the route.
 """
@@ -67,11 +70,12 @@ def rename_endpoint(monkeypatch, tmp_path):
    return _route(ar.setup_auth_routes(am), "rename_user"), am, tmp_path


-def _request(tmp_path, session_manager=None, token="t", research_handler=None):
+def _request(tmp_path, session_manager=None, token="t", research_handler=None, upload_handler=None):
    state = SimpleNamespace(
        invalidate_token_cache=lambda: None,
        session_manager=session_manager,
        research_handler=research_handler,
+        upload_handler=upload_handler,
    )
    return SimpleNamespace(
        cookies={"odysseus_session": token},
@@ -415,7 +419,56 @@ def test_rename_no_memory_json_does_not_crash(rename_endpoint):


 # ---------------------------------------------------------------------------
-# 4. Skills (SKILL.md frontmatter + _usage.json sidecar)
+# 4. uploads.json
+# ---------------------------------------------------------------------------
+
+def test_rename_updates_upload_metadata_owner(rename_endpoint):
+    endpoint, _am, tmp_path = rename_endpoint
+    from src.upload_handler import UploadHandler
+
+    upload_dir = tmp_path / "uploads"
+    dated = upload_dir / "2026" / "06" / "09"
+    dated.mkdir(parents=True)
+    upload_id = "a" * 32 + ".txt"
+    upload_path = dated / upload_id
+    upload_path.write_text("alice private upload", encoding="utf-8")
+    handler = UploadHandler(str(tmp_path), str(upload_dir))
+    handler._atomic_write_json(
+        str(upload_dir / "uploads.json"),
+        {
+            "alice:hash-alice": {
+                "id": upload_id,
+                "path": str(upload_path),
+                "mime": "text/plain",
+                "size": upload_path.stat().st_size,
+                "name": "note.txt",
+                "hash": "hash-alice",
+                "original_name": "note.txt",
+                "uploaded_at": "2026-06-09T10:00:00",
+                "last_accessed": "2026-06-09T10:00:00",
+                "client_ip": "127.0.0.1",
+                "owner": "alice",
+            },
+        },
+    )
+
+    asyncio.run(
+        endpoint(
+            "alice",
+            SimpleNamespace(username="alice2"),
+            _request(tmp_path, upload_handler=handler),
+        )
+    )
+
+    updated = json.loads((upload_dir / "uploads.json").read_text(encoding="utf-8"))
+    assert "alice:hash-alice" not in updated
+    assert updated["alice2:hash-alice"]["owner"] == "alice2"
+    assert handler.resolve_upload(upload_id, owner="alice2")["path"] == str(upload_path)
+    assert handler.resolve_upload(upload_id, owner="alice") is None
+
+
+# ---------------------------------------------------------------------------
+# 5. Skills (SKILL.md frontmatter + _usage.json sidecar)
 # ---------------------------------------------------------------------------

 _SKILL_MD = """\
@@ -522,7 +575,7 @@ def test_rename_usage_keys_case_insensitive(rename_endpoint):


 # ---------------------------------------------------------------------------
-# 5. Rollback: auth rename must be restored if SQL owner migration fails
+# 6. Rollback: auth rename must be restored if SQL owner migration fails
 # ---------------------------------------------------------------------------

 def test_owner_migration_failure_rolls_back_auth_rename(monkeypatch, tmp_path):
@@ -583,7 +636,7 @@ def test_self_rename_owner_migration_failure_rolls_back_auth_session(monkeypatch


 # ---------------------------------------------------------------------------
-# 6. P1 regression: rejected auth rename must not mutate file-backed stores
+# 7. P1 regression: rejected auth rename must not mutate file-backed stores
 # ---------------------------------------------------------------------------

 def test_rejected_rename_does_not_mutate_files(monkeypatch, tmp_path):
@@ -0,0 +1,55 @@
+"""FTS session search must fetch hit rows in one query, not one per hit.
+
+_search_fts looked up each FTS hit's full row with its own
+db.query(...).filter(id == message_id).first(), an N+1 query. The lookup is now
+a single batched IN(...) query via _fetch_messages_by_id.
+"""
+from src.session_search import _fetch_messages_by_id
+
+
+class _Msg:
+    def __init__(self, mid):
+        self.id = mid
+
+
+class _Query:
+    def __init__(self, rows, calls):
+        self._rows = rows
+        self._calls = calls
+
+    def join(self, *a, **k):
+        return self
+
+    def filter(self, *a, **k):
+        return self
+
+    def all(self):
+        self._calls["all"] += 1
+        return self._rows
+
+
+class _DB:
+    def __init__(self, rows):
+        self._rows = rows
+        self.calls = {"query": 0, "all": 0}
+
+    def query(self, *a, **k):
+        self.calls["query"] += 1
+        return _Query(self._rows, self.calls)
+
+
+def test_batches_into_single_query():
+    rows = [(_Msg("m1"), "Session One"), (_Msg("m2"), "Session Two")]
+    db = _DB(rows)
+    out = _fetch_messages_by_id(db, ["m1", "m2"])
+    # One query for all hits, not one per hit.
+    assert db.calls["query"] == 1
+    assert db.calls["all"] == 1
+    assert out["m1"][1] == "Session One"
+    assert out["m2"][0].id == "m2"
+
+
+def test_empty_ids_does_no_query():
+    db = _DB([])
+    assert _fetch_messages_by_id(db, []) == {}
+    assert db.calls["query"] == 0
@@ -0,0 +1,101 @@
+import json
+import os
+from pathlib import Path
+
+from src.upload_handler import UploadHandler
+
+
+def _make_handler(tmp_path: Path) -> UploadHandler:
+    base = tmp_path / "base"
+    upload = tmp_path / "uploads"
+    base.mkdir()
+    upload.mkdir()
+    return UploadHandler(base_dir=str(base), upload_dir=str(upload))
+
+
+def _db_path(handler: UploadHandler) -> str:
+    return os.path.join(handler.upload_dir, "uploads.json")
+
+
+def _write_upload_file(handler: UploadHandler, file_id: str, content: bytes = b"content") -> str:
+    upload_day = Path(handler.upload_dir) / "2026" / "06" / "09"
+    upload_day.mkdir(parents=True, exist_ok=True)
+    path = upload_day / file_id
+    path.write_bytes(content)
+    return str(path)
+
+
+def _entry(handler: UploadHandler, owner: str, file_hash: str, file_id: str) -> dict:
+    path = _write_upload_file(handler, file_id, content=f"{owner}:{file_hash}".encode())
+    return {
+        "id": file_id,
+        "path": path,
+        "mime": "text/plain",
+        "size": os.path.getsize(path),
+        "name": f"{file_id}.txt",
+        "hash": file_hash,
+        "original_name": f"{file_id}.txt",
+        "uploaded_at": "2026-06-09T10:00:00",
+        "last_accessed": "2026-06-09T10:00:00",
+        "client_ip": "127.0.0.1",
+        "owner": owner,
+    }
+
+
+def test_rename_owner_updates_upload_metadata_key_and_resolver(tmp_path):
+    handler = _make_handler(tmp_path)
+    alice_id = "a" * 32 + ".txt"
+    alice_entry = _entry(handler, "Alice", "hash-alice", alice_id)
+    bob_entry = _entry(handler, "bob", "hash-bob", "b" * 32 + ".txt")
+    handler._atomic_write_json(
+        _db_path(handler),
+        {
+            "Alice:hash-alice": alice_entry,
+            "bob:hash-bob": bob_entry,
+        },
+    )
+
+    renamed = handler.rename_owner("alice", "alice2")
+
+    assert renamed == 1
+    updated = json.loads(Path(_db_path(handler)).read_text(encoding="utf-8"))
+    assert "Alice:hash-alice" not in updated
+    assert "alice2:hash-alice" in updated
+    assert updated["alice2:hash-alice"]["owner"] == "alice2"
+    assert updated["alice2:hash-alice"]["path"] == alice_entry["path"]
+    assert updated["alice2:hash-alice"]["hash"] == alice_entry["hash"]
+    assert updated["alice2:hash-alice"]["uploaded_at"] == alice_entry["uploaded_at"]
+    assert updated["alice2:hash-alice"]["last_accessed"] == alice_entry["last_accessed"]
+    assert updated["bob:hash-bob"]["owner"] == "bob"
+
+    assert handler.resolve_upload(alice_id, owner="alice2")["id"] == alice_id
+    assert handler.resolve_upload(alice_id, owner="alice") is None
+
+
+def test_rename_owner_preserves_rows_when_target_key_collides(tmp_path):
+    handler = _make_handler(tmp_path)
+    migrated_id = "c" * 32 + ".txt"
+    existing_id = "d" * 32 + ".txt"
+    migrated = _entry(handler, "alice", "same-hash", migrated_id)
+    existing = _entry(handler, "alice2", "same-hash", existing_id)
+    unrelated = _entry(handler, "carol", "other-hash", "e" * 32 + ".txt")
+    handler._atomic_write_json(
+        _db_path(handler),
+        {
+            "alice:same-hash": migrated,
+            "alice2:same-hash": existing,
+            "carol:other-hash": unrelated,
+        },
+    )
+
+    renamed = handler.rename_owner("alice", "alice2")
+
+    assert renamed == 1
+    updated = json.loads(Path(_db_path(handler)).read_text(encoding="utf-8"))
+    assert len(updated) == 3
+    assert updated["alice2:same-hash"]["id"] == existing_id
+    migrated_key = f"alice2:same-hash:{migrated_id}"
+    assert updated[migrated_key]["id"] == migrated_id
+    assert updated[migrated_key]["owner"] == "alice2"
+    assert updated[migrated_key]["path"] == migrated["path"]
+    assert updated["carol:other-hash"] == unrelated
@@ -0,0 +1,110 @@
+"""fetch_webpage_content must return plain-text and Markdown bodies verbatim.
+
+raw.githubusercontent.com serves Markdown as `text/plain`, and a lot of code
+and tool documentation lives in `.md` / `.txt`. Those have no HTML structure,
+so the HTML branch extracted nothing and web_fetch reported "no readable text
+content". The plain-text branch returns the body as-is. HTML stays on the
+parsing path.
+"""
+import types
+
+import pytest
+
+from services.search import content as content_mod
+
+
+class _FakeResponse:
+    def __init__(self, text, content_type, status_code=200):
+        self.text = text
+        self.content = text.encode("utf-8")
+        self.headers = {"Content-Type": content_type}
+        self.status_code = status_code
+
+    def raise_for_status(self):
+        return None
+
+
+@pytest.fixture
+def no_cache(monkeypatch, tmp_path):
+    # Force a cache miss and skip disk writes so the test is hermetic.
+    monkeypatch.setattr(content_mod, "CONTENT_CACHE_DIR", tmp_path)
+    monkeypatch.setattr(content_mod, "_cache_result", lambda *a, **k: None)
+
+
+def _patch_fetch(monkeypatch, text, content_type):
+    monkeypatch.setattr(
+        content_mod,
+        "_get_public_url",
+        lambda url, headers=None, timeout=5: _FakeResponse(text, content_type),
+    )
+
+
+MARKDOWN = "# Title\n\nSome **docs** with a [link](https://example.com).\n"
+
+
+def test_markdown_text_plain_returns_body(monkeypatch, no_cache):
+    _patch_fetch(monkeypatch, MARKDOWN, "text/plain; charset=utf-8")
+    r = content_mod.fetch_webpage_content(
+        "https://raw.githubusercontent.com/o/r/master/Documentation/Patterns.md"
+    )
+    assert r["success"] is True
+    assert r["content"] == MARKDOWN.strip()
+    assert r["title"] == "patterns.md"
+    assert r["error"] == ""
+
+
+def test_text_markdown_content_type_returns_body(monkeypatch, no_cache):
+    _patch_fetch(monkeypatch, MARKDOWN, "text/markdown")
+    r = content_mod.fetch_webpage_content("https://example.com/readme")
+    assert r["success"] is True
+    assert r["content"] == MARKDOWN.strip()
+
+
+def test_octet_stream_with_txt_suffix_returns_body(monkeypatch, no_cache):
+    # Some servers mislabel text files; the URL-suffix fallback still reads it.
+    _patch_fetch(monkeypatch, "plain notes\nline two\n", "application/octet-stream")
+    r = content_mod.fetch_webpage_content("https://example.com/notes.txt")
+    assert r["success"] is True
+    assert r["content"] == "plain notes\nline two"
+
+
+def test_application_json_returns_body(monkeypatch, no_cache):
+    # application/json is not text/*; it must still be returned verbatim
+    # instead of being fed to the HTML parser (which yields empty content).
+    body = '{"name": "odysseus", "items": [1, 2, 3]}'
+    _patch_fetch(monkeypatch, body, "application/json")
+    r = content_mod.fetch_webpage_content("https://api.example.com/data")
+    assert r["success"] is True
+    assert r["content"] == body
+
+
+def test_ld_json_suffix_content_type_returns_body(monkeypatch, no_cache):
+    body = '{"@context": "https://schema.org"}'
+    _patch_fetch(monkeypatch, body, "application/ld+json")
+    r = content_mod.fetch_webpage_content("https://example.com/meta")
+    assert r["success"] is True
+    assert r["content"] == body
+
+
+def test_json_suffix_with_octet_stream_returns_body(monkeypatch, no_cache):
+    body = '{"raw": true}'
+    _patch_fetch(monkeypatch, body, "application/octet-stream")
+    r = content_mod.fetch_webpage_content("https://example.com/package.json")
+    assert r["success"] is True
+    assert r["content"] == body
+
+
+def test_empty_text_body_is_not_success(monkeypatch, no_cache):
+    _patch_fetch(monkeypatch, "   \n  ", "text/plain")
+    r = content_mod.fetch_webpage_content("https://example.com/blank.txt")
+    assert r["success"] is False
+    assert r["content"] == ""
+
+
+def test_html_still_uses_parser(monkeypatch, no_cache):
+    # An HTML body must not be short-circuited by the text branch.
+    html = "<html><head><title>Hi</title></head><body><p>Hello world body text</p></body></html>"
+    _patch_fetch(monkeypatch, html, "text/html; charset=utf-8")
+    r = content_mod.fetch_webpage_content("https://example.com/page")
+    assert r["title"] == "Hi"
+    assert "Hello world body text" in r["content"]
@@ -0,0 +1,55 @@
+"""Fire-and-forget webhook tasks must be referenced until they finish.
+
+asyncio keeps only a weak reference to a bare create_task() result, so a
+delivery task could be garbage-collected before it ran and the webhook silently
+dropped. WebhookManager now holds a strong reference for the task's lifetime and
+releases it on completion.
+"""
+import asyncio
+import sys
+
+# webhook_manager does `from src.database import SessionLocal, Webhook` at import
+# time. The shared test harness stubs src.database without Webhook, so ensure the
+# attribute exists before importing the manager. These tests never touch the DB
+# (the manager is built via __new__), so a placeholder class is sufficient.
+_db = sys.modules.get("src.database")
+if _db is not None and not hasattr(_db, "Webhook"):
+    _db.Webhook = type("Webhook", (), {})
+
+from src.webhook_manager import WebhookManager  # noqa: E402
+
+
+def test_spawn_tracked_holds_then_releases_reference():
+    async def run():
+        wm = WebhookManager.__new__(WebhookManager)
+        wm._bg_tasks = set()
+
+        gate = asyncio.Event()
+
+        async def work():
+            await gate.wait()
+
+        task = wm._spawn_tracked(work())
+        # Referenced while in flight (this is what stops GC from collecting it).
+        assert task in wm._bg_tasks
+        gate.set()
+        await task
+        # Reference released once done, so the set does not grow unbounded.
+        assert task not in wm._bg_tasks
+
+    asyncio.run(run())
+
+
+def test_spawn_tracked_runs_the_coroutine():
+    async def run():
+        wm = WebhookManager.__new__(WebhookManager)
+        wm._bg_tasks = set()
+        ran = []
+
+        async def work():
+            ran.append(True)
+
+        await wm._spawn_tracked(work())
+        assert ran == [True]
+
+    asyncio.run(run())
@@ -0,0 +1,328 @@
+"""Workspace confinement.
+
+The agent's per-turn workspace is a single context-local binding set in
+execute_tool_block. The shared path resolvers (_resolve_tool_path /
+_resolve_search_root) and the subprocess cwd helper (agent_cwd) read it, so
+confinement is enforced in ONE place: a tool that uses the shared helpers is
+confined automatically and a new tool cannot accidentally bypass it.
+
+Covers: the resolver helper, the central binding (the safety net), end-to-end
+confinement of read/write/edit/grep/ls + subprocess cwd via execute_tool_block,
+the get_workspace tool, no-leak across calls, and the admin-gated browse route.
+"""
+import json
+import os
+import tempfile
+from types import SimpleNamespace
+
+import pytest
+
+from src.tool_execution import (
+    _AGENT_WORKDIR,
+    _active_workspace,
+    _resolve_search_root,
+    _resolve_tool_path,
+    _resolve_tool_path_in_workspace,
+    agent_cwd,
+    execute_tool_block,
+    get_active_workspace,
+)
+
+
+def _block(tool, content=""):
+    return SimpleNamespace(tool_type=tool, content=content)
+
+
+@pytest.fixture
+def ws():
+    d = tempfile.mkdtemp()
+    with open(os.path.join(d, "a.txt"), "w") as f:
+        f.write("x")
+    return d
+
+
+@pytest.fixture
+def admin(monkeypatch):
+    """Pass the public-tool gate so file tools dispatch in tests."""
+    monkeypatch.setattr(
+        "src.tool_execution.owner_is_admin_or_single_user", lambda owner: True
+    )
+
+
+# ── the resolver helper ────────────────────────────────────────────────
+
+def test_resolver_confines(ws):
+    real = os.path.realpath(os.path.join(ws, "a.txt"))
+    assert _resolve_tool_path_in_workspace(ws, "a.txt") == real          # relative
+    assert _resolve_tool_path_in_workspace(ws, os.path.join(ws, "a.txt")) == real  # abs inside
+    outside = tempfile.mkdtemp()
+    with pytest.raises(ValueError):                                       # abs outside
+        _resolve_tool_path_in_workspace(ws, os.path.join(outside, "x.txt"))
+    with pytest.raises(ValueError):                                       # parent escape
+        _resolve_tool_path_in_workspace(ws, os.path.join("..", "..", "escape.txt"))
+
+
+def test_resolver_blocks_sensitive_inside_workspace(ws):
+    os.makedirs(os.path.join(ws, ".ssh"), exist_ok=True)
+    with pytest.raises(ValueError):
+        _resolve_tool_path_in_workspace(ws, ".ssh/authorized_keys")
+
+
+# ── the central binding: the safety net ─────────────────────────────────
+
+def test_active_binding_confines_shared_resolvers(ws):
+    """ANY tool resolving paths through the shared helpers is confined while the
+    binding is active, without doing anything workspace-specific itself. This is
+    what stops a newly added tool from accidentally ignoring the workspace."""
+    token = _active_workspace.set(ws)
+    try:
+        assert get_active_workspace() == ws
+        assert agent_cwd() == ws
+        assert _resolve_tool_path("a.txt") == os.path.realpath(os.path.join(ws, "a.txt"))
+        with pytest.raises(ValueError):          # normally-allowed root, now outside ws
+            _resolve_tool_path("/tmp/whatever.txt")
+        assert _resolve_search_root("") == os.path.realpath(ws)
+    finally:
+        _active_workspace.reset(token)
+
+
+def test_no_binding_uses_default_roots():
+    assert get_active_workspace() is None
+    assert agent_cwd() == _AGENT_WORKDIR
+    with pytest.raises(ValueError):
+        _resolve_tool_path("/etc/hosts")
+
+
+# ── end-to-end via execute_tool_block (sets + resets the binding) ───────
+
+@pytest.mark.asyncio
+async def test_read_write_edit_confined_e2e(ws, admin):
+    _, r = await execute_tool_block(_block("write_file", "note.txt\nhello"), owner="a", workspace=ws)
+    assert r["exit_code"] == 0 and os.path.isfile(os.path.join(ws, "note.txt"))
+    _, r = await execute_tool_block(_block("read_file", "note.txt"), owner="a", workspace=ws)
+    assert r["exit_code"] == 0 and r["output"] == "hello"
+
+    with open(os.path.join(ws, "f.txt"), "w") as f:
+        f.write("foo bar")
+    _, r = await execute_tool_block(
+        _block("edit_file", json.dumps({"path": "f.txt", "old_string": "foo", "new_string": "baz"})),
+        owner="a", workspace=ws,
+    )
+    assert r["exit_code"] == 0
+    with open(os.path.join(ws, "f.txt")) as f:
+        assert f.read() == "baz bar"
+
+    # outside the workspace is rejected, and nothing is created
+    outside = tempfile.mkdtemp()
+    of = os.path.join(outside, "secret.txt")
+    with open(of, "w") as f:
+        f.write("nope")
+    _, r = await execute_tool_block(_block("read_file", of), owner="a", workspace=ws)
+    assert r["exit_code"] == 1 and "outside the workspace" in r["error"]
+    escape = os.path.join(outside, "_esc.txt")
+    _, r = await execute_tool_block(_block("write_file", f"{escape}\nx"), owner="a", workspace=ws)
+    assert r["exit_code"] == 1 and "outside the workspace" in r["error"]
+    assert not os.path.exists(escape)
+
+
+@pytest.mark.asyncio
+async def test_grep_and_ls_confined_e2e(ws, admin):
+    with open(os.path.join(ws, "doc.txt"), "w") as f:
+        f.write("hello workspace\n")
+    _, r = await execute_tool_block(_block("grep", json.dumps({"pattern": "hello"})), owner="a", workspace=ws)
+    assert r["exit_code"] == 0 and "doc.txt" in r["output"]
+    outside = tempfile.mkdtemp()
+    _, r = await execute_tool_block(_block("grep", json.dumps({"pattern": "x", "path": outside})), owner="a", workspace=ws)
+    assert r["exit_code"] == 1 and "outside the workspace" in r["error"]
+    _, r = await execute_tool_block(_block("ls", ""), owner="a", workspace=ws)
+    assert r["exit_code"] == 0 and "doc.txt" in r["output"]
+    _, r = await execute_tool_block(_block("ls", outside), owner="a", workspace=ws)
+    assert r["exit_code"] == 1 and "outside the workspace" in r["error"]
+
+
+@pytest.mark.asyncio
+async def test_subprocess_cwd_is_workspace_e2e(ws, admin):
+    """python tool runs with cwd = workspace (OS-agnostic probe)."""
+    _, r = await execute_tool_block(_block("python", "import os; print(os.getcwd())"), owner="a", workspace=ws)
+    assert r["exit_code"] == 0
+    assert os.path.realpath(r["output"].strip()) == os.path.realpath(ws)
+
+
+# ── get_workspace tool ──────────────────────────────────────────────────
+
+@pytest.mark.asyncio
+async def test_get_workspace_tool(ws, admin):
+    _, r = await execute_tool_block(_block("get_workspace", ""), owner="a", workspace=ws)
+    assert r["exit_code"] == 0 and r["output"].startswith(ws) and "not sandboxed" in r["output"]
+    _, r = await execute_tool_block(_block("get_workspace", ""), owner="a")  # none active
+    assert r["exit_code"] == 0 and "No workspace" in r["output"]
+
+
+# ── no leak across calls ────────────────────────────────────────────────
+
+@pytest.mark.asyncio
+async def test_binding_does_not_leak(ws, admin):
+    await execute_tool_block(_block("ls", ""), owner="a", workspace=ws)
+    assert get_active_workspace() is None
+
+
+# ── tool selection: an active workspace is the file-work signal ─────────
+# A vague ("low-signal") message like "look at the local project" matches no
+# domain keywords, so retrieval is normally skipped. When a workspace is set it
+# must still surface the file tools, otherwise the agent says it has no file
+# access (the bug this guards against).
+
+def _sent_tool_names(monkeypatch, *, workspace):
+    import asyncio
+    import src.agent_loop as al
+
+    monkeypatch.setattr(al, "get_setting", lambda key, default=None: default, raising=False)
+    monkeypatch.setattr(al, "get_mcp_manager", lambda: None, raising=False)
+    monkeypatch.setattr(al, "estimate_tokens", lambda *a, **k: 10, raising=False)
+    # Isolate the selection logic from owner gating (tested separately).
+    monkeypatch.setattr(al, "blocked_tools_for_owner", lambda owner: set(), raising=False)
+
+    captured = []
+
+    async def _fake_stream(_candidates, messages, **kwargs):
+        captured.append(kwargs.get("tools"))
+        yield "data: " + json.dumps({"delta": "ok"}) + "\n\n"
+        yield "data: [DONE]\n\n"
+
+    monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
+
+    async def _run():
+        gen = al.stream_agent_loop(
+            "https://api.openai.com/v1", "gpt-test",
+            [{"role": "user", "content": "look at the local project"}],
+            max_rounds=1, relevant_tools=None, owner="admin", workspace=workspace,
+        )
+        return [c async for c in gen]
+
+    asyncio.run(_run())
+    schemas = captured[0] or []
+    return {t["function"]["name"] for t in schemas if isinstance(t, dict) and "function" in t}
+
+
+def test_low_signal_with_workspace_surfaces_readonly_file_tools(monkeypatch):
+    names = _sent_tool_names(monkeypatch, workspace="/tmp")
+    # read-only nav tools surface so the agent can explore
+    assert "read_file" in names
+    assert "get_workspace" in names
+    assert "grep" in names
+    # write/shell tools do NOT surface on a vague message
+    assert "write_file" not in names
+    assert "edit_file" not in names
+    assert "bash" not in names
+    assert "python" not in names
+
+
+def test_low_signal_without_workspace_excludes_file_tools(monkeypatch):
+    names = _sent_tool_names(monkeypatch, workspace=None)
+    assert "read_file" not in names
+    assert "get_workspace" not in names
+
+
+# ── browse route is admin-gated ─────────────────────────────────────────
+
+def test_browse_is_admin_gated(monkeypatch):
+    from fastapi import HTTPException
+    import routes.workspace_routes as wr
+
+    router = wr.setup_workspace_routes()
+    browse = next(r.endpoint for r in router.routes if r.path == "/api/workspace/browse")
+
+    monkeypatch.setattr(wr, "get_current_user", lambda req: "bob")
+    monkeypatch.setattr(wr, "owner_is_admin_or_single_user", lambda owner: False)
+    with pytest.raises(HTTPException) as ei:
+        browse(request=object(), path="/")
+    assert ei.value.status_code == 403
+
+    monkeypatch.setattr(wr, "owner_is_admin_or_single_user", lambda owner: True)
+    out = browse(request=object(), path=os.path.expanduser("~"))
+    assert "dirs" in out and "path" in out
+    assert all("name" in d and "path" in d for d in out["dirs"])
+
+
+# ── bind-time vetting of the workspace root ─────────────────────────────
+
+def test_vet_workspace_accepts_normal_dir(ws):
+    from src.tool_execution import vet_workspace
+    assert vet_workspace(ws) == os.path.realpath(ws)
+
+
+def test_vet_workspace_rejects_sensitive_root(tmp_path):
+    # The resolver deny-lists sensitive paths inside the workspace, but the
+    # empty-path search root is the workspace itself - a sensitive root must
+    # be rejected before it is bound or `ls` with no path would list it.
+    from src.tool_execution import vet_workspace
+    ssh_dir = tmp_path / ".ssh"
+    ssh_dir.mkdir()
+    assert vet_workspace(str(ssh_dir)) is None
+
+
+def test_vet_workspace_rejects_nondir_and_empty(ws):
+    from src.tool_execution import vet_workspace
+    assert vet_workspace(os.path.join(ws, "a.txt")) is None  # file, not dir
+    assert vet_workspace("/nonexistent/path/xyz") is None
+    assert vet_workspace("") is None
+    assert vet_workspace("   ") is None
+
+
+def test_vet_workspace_rejects_filesystem_root():
+    # Binding / would make every absolute path "inside" the workspace,
+    # collapsing confinement into host-wide file access.
+    from src.tool_execution import vet_workspace
+    assert vet_workspace("/") is None
+
+
+def test_browse_marks_root_unselectable_and_vet_endpoint(monkeypatch):
+    import routes.workspace_routes as wr
+
+    router = wr.setup_workspace_routes()
+    browse = next(r.endpoint for r in router.routes if r.path == "/api/workspace/browse")
+    vet = next(r.endpoint for r in router.routes if r.path == "/api/workspace/vet")
+
+    monkeypatch.setattr(wr, "get_current_user", lambda req: "admin")
+    monkeypatch.setattr(wr, "owner_is_admin_or_single_user", lambda owner: True)
+
+    out = browse(request=object(), path="/")
+    assert out["selectable"] is False
+    out = browse(request=object(), path=os.path.expanduser("~"))
+    assert out["selectable"] is True
+
+    assert vet(request=object(), path="/") == {"ok": False, "path": None}
+    home = os.path.realpath(os.path.expanduser("~"))
+    assert vet(request=object(), path="~") == {"ok": True, "path": home}
+
+    from fastapi import HTTPException
+    monkeypatch.setattr(wr, "owner_is_admin_or_single_user", lambda owner: False)
+    with pytest.raises(HTTPException) as ei:
+        vet(request=object(), path="/tmp")
+    assert ei.value.status_code == 403
+
+
+# ── send-time privilege gate (no path oracle for non-admins) ────────────
+
+def test_request_workspace_gate(ws, monkeypatch):
+    """Non-admin chat callers must get a uniform drop with no vetting: the
+    workspace_rejected signal would otherwise reveal which host paths exist."""
+    import routes.chat_routes as cr
+
+    monkeypatch.setattr(cr, "get_current_user", lambda req: "bob")
+    vet_calls = []
+    import src.tool_execution as te
+    real_vet = te.vet_workspace
+    monkeypatch.setattr(te, "vet_workspace", lambda p: vet_calls.append(p) or real_vet(p))
+
+    import src.tool_security as ts
+    monkeypatch.setattr(ts, "owner_is_admin_or_single_user", lambda owner: False)
+    # Valid and invalid paths are indistinguishable for a non-admin: both
+    # drop silently, and the path never reaches the filesystem.
+    assert cr._resolve_request_workspace(object(), ws) == ("", "")
+    assert cr._resolve_request_workspace(object(), "/nonexistent/xyz") == ("", "")
+    assert vet_calls == []
+
+    monkeypatch.setattr(ts, "owner_is_admin_or_single_user", lambda owner: True)
+    assert cr._resolve_request_workspace(object(), ws) == (os.path.realpath(ws), "")
+    assert cr._resolve_request_workspace(object(), "/nonexistent/xyz") == ("", "/nonexistent/xyz")