Agent email safety: stage drafts for user approval instead of auto-send

Closes the auto-send hole that let earlier models invent signatures
(e.g. signing 'David' for a user named Felix) and SMTP them to real
recipients before the user could review.

New setting: agent_email_confirm (default True).

When on, the MCP send_email and reply_to_email tools no longer SMTP
directly — they write the composed email to scheduled_emails with a new
status 'agent_draft' (far-future send_at so the scheduled-send poller
ignores them) and return a {pending: true, pending_id, to, subject,
body, message: ...} payload. The model surfaces that to the user.

Backend endpoints to approve / cancel:
- GET    /api/email/pending          → list staged drafts for the owner
- POST   /api/email/pending/{id}/approve → flip status to 'pending' +
                                           backdate send_at so the
                                           existing scheduled-send
                                           poller delivers immediately
- DELETE /api/email/pending/{id}     → status = 'cancelled'

UI:
- Settings / AI Defaults gets a new 'Email Safety' card with the
  toggle, default on.
- Tool descriptions for send_email and reply_to_email now include the
  pending behavior + an explicit 'DO NOT invent a signature, do not
  type a person's name' guardrail.

Pass 2 (next): inline chat card with Send / Discard buttons so the user
doesn't have to type a confirmation reply. Today's prompt + the listing
endpoint give the model a clean path to surface drafts.
This commit is contained in:
pewdiepie-archdaemon
2026-06-11 08:50:06 +09:00
parent 2b1e2e9e20
commit bc2d934b94
6 changed files with 212 additions and 3 deletions
+102 -1
View File
@@ -885,8 +885,109 @@ def _smtp_connect(account=None, cfg=None):
return conn
def _read_agent_email_confirm_setting() -> bool:
"""True if the user wants agent send_email/reply_to_email calls to be
queued for manual approval instead of SMTPed immediately. Defaults to
True so a fresh install is safe — agents have been observed inventing
signatures and sending to real recipients without the user's review."""
try:
from src.settings import get_setting
return bool(get_setting("agent_email_confirm", True))
except Exception:
return True
def _stash_agent_draft(*, to, subject, body, in_reply_to=None, references=None,
cc=None, bcc=None, account=None) -> dict:
"""Insert the composed email into scheduled_emails with status
'agent_draft' and a far-future send_at so the scheduled-send poller
never picks it up. Returns the pending payload the model surfaces to
the user (and that the chat UI can render as an approval card)."""
try:
from src.constants import SCHEDULED_EMAILS_DB
except Exception:
return {"success": False, "error": "Pending-email storage unavailable"}
pending_id = uuid.uuid4().hex[:16]
far_future = "9999-12-31T00:00:00"
now = datetime.utcnow().isoformat()
try:
conn = sqlite3.connect(SCHEDULED_EMAILS_DB)
# Touch the schema in case the email-routes init hasn't run yet
# (MCP server can boot independently).
conn.execute("""
CREATE TABLE IF NOT EXISTS scheduled_emails (
id TEXT PRIMARY KEY,
to_addr TEXT NOT NULL,
cc TEXT,
bcc TEXT,
subject TEXT,
body TEXT NOT NULL,
in_reply_to TEXT,
references_hdr TEXT,
attachments TEXT,
send_at TEXT NOT NULL,
created_at TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
error TEXT,
owner TEXT DEFAULT '',
account_id TEXT,
odysseus_kind TEXT
)
""")
conn.execute("""
INSERT INTO scheduled_emails
(id, to_addr, cc, bcc, subject, body, in_reply_to, references_hdr,
attachments, send_at, created_at, status, account_id, odysseus_kind, owner)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'agent_draft', ?, ?, ?)
""", (
pending_id,
to if isinstance(to, str) else ", ".join(to),
cc if isinstance(cc, str) else (", ".join(cc) if cc else None),
bcc if isinstance(bcc, str) else (", ".join(bcc) if bcc else None),
subject or "",
body or "",
in_reply_to or None,
references if isinstance(references, str) else (" ".join(references) if references else None),
"[]",
far_future,
now,
account or None,
"agent_draft",
"",
))
conn.commit()
conn.close()
except Exception as e:
return {"success": False, "error": f"Failed to stash draft: {e}"}
return {
"success": True,
"pending": True,
"pending_id": pending_id,
"to": to if isinstance(to, str) else ", ".join(to),
"subject": subject or "",
"body": body or "",
"message": (
"✋ Draft staged for your approval — nothing has been sent yet.\n"
"Review the To/Subject/Body above. Reply 'send' to deliver, or "
"'cancel' to discard."
),
}
def _send_email(to, subject, body, in_reply_to=None, references=None, cc=None, bcc=None, account=None):
"""Send an email via SMTP. Returns dict with status."""
"""Send an email via SMTP. Returns dict with status.
When the `agent_email_confirm` setting is on (the default), the email
is NOT SMTPed — instead it lands in scheduled_emails as an
`agent_draft` row and the user reviews + approves it from the chat
UI. This closes the auto-send hole that let earlier models invent
signatures and ship them to real recipients without confirmation."""
if _read_agent_email_confirm_setting():
return _stash_agent_draft(
to=to, subject=subject, body=body,
in_reply_to=in_reply_to, references=references,
cc=cc, bcc=bcc, account=account,
)
send_account, cfg = _resolve_send_config(account)
msg = EmailMessage()
msg["From"] = _clean_header_value(cfg["from_address"])
+73
View File
@@ -2071,6 +2071,79 @@ def setup_email_routes():
logger.error(f"cancel_scheduled {sid!r} failed: {e}")
return {"success": False, "error": "Mail operation failed"}
# ── Agent send-confirm: list/approve/cancel ──────────────────────────
# When `agent_email_confirm` is on, the MCP send_email tool drops the
# composed email into scheduled_emails with status='agent_draft' (a
# far-future send_at so the poller never picks it up). These endpoints
# let the chat UI surface them for the user and either approve (flip
# to status='pending' with send_at=now so the poller delivers it) or
# cancel (status='cancelled').
@router.get("/pending")
async def list_pending_agent_drafts(owner: str = Depends(require_owner)):
import sqlite3
try:
conn = sqlite3.connect(SCHEDULED_DB)
conn.row_factory = sqlite3.Row
# The MCP server can't easily set owner, so it stores '' — fall
# back to those rows in addition to the caller's owner.
rows = conn.execute(
"""SELECT id, to_addr, subject, body, created_at, account_id
FROM scheduled_emails
WHERE status = 'agent_draft' AND (owner = ? OR owner = '')
ORDER BY created_at DESC""",
(owner or "",),
).fetchall()
conn.close()
return {"pending": [dict(r) for r in rows]}
except Exception as e:
logger.error(f"list_pending_agent_drafts failed: {e}")
return {"pending": [], "error": "Mail operation failed"}
@router.post("/pending/{sid}/approve")
async def approve_agent_draft(sid: str, owner: str = Depends(require_owner)):
"""Approve a draft staged by the agent: flip status → pending and
backdate send_at so the scheduled-send poller picks it up
immediately."""
import sqlite3
try:
conn = sqlite3.connect(SCHEDULED_DB)
cur = conn.execute(
"""UPDATE scheduled_emails
SET status = 'pending', send_at = ?
WHERE id = ? AND status = 'agent_draft' AND (owner = ? OR owner = '')""",
(datetime.utcnow().isoformat(), sid, owner or ""),
)
conn.commit()
affected = cur.rowcount
conn.close()
if not affected:
return {"success": False, "error": "Draft not found or already handled"}
return {"success": True}
except Exception as e:
logger.error(f"approve_agent_draft {sid!r} failed: {e}")
return {"success": False, "error": "Mail operation failed"}
@router.delete("/pending/{sid}")
async def cancel_agent_draft(sid: str, owner: str = Depends(require_owner)):
"""Discard a draft the agent staged for approval."""
import sqlite3
try:
conn = sqlite3.connect(SCHEDULED_DB)
cur = conn.execute(
"""UPDATE scheduled_emails SET status = 'cancelled'
WHERE id = ? AND status = 'agent_draft' AND (owner = ? OR owner = '')""",
(sid, owner or ""),
)
conn.commit()
affected = cur.rowcount
conn.close()
if not affected:
return {"success": False, "error": "Draft not found or already handled"}
return {"success": True}
except Exception as e:
logger.error(f"cancel_agent_draft {sid!r} failed: {e}")
return {"success": False, "error": "Mail operation failed"}
@router.get("/resolve-contact")
async def resolve_contact(name: str = Query(..., description="Name to search for"), owner: str = Depends(require_owner)):
"""Search Sent folder for a contact by name. Returns matching email addresses."""
+6 -2
View File
@@ -416,7 +416,9 @@ Notes, checklists, AND user reminders. Use this for "create/add/write a note", t
```send_email
{"to": "recipient@example.com", "subject": "Re: Your question", "body": "Hi, ...", "account": "gmail"}
```
Send a new email via SMTP. Use `resolve_contact` first if you only have a name. If multiple email accounts exist, call `list_email_accounts` first and pass the chosen `account`.""",
Send a new email via SMTP. Use `resolve_contact` first if you only have a name. If multiple email accounts exist, call `list_email_accounts` first and pass the chosen `account`.
CRITICAL — signatures: DO NOT invent a sign-off name. End the body with just `Thanks,` or similar — never type a person's name unless the user explicitly told you what to sign as. When `agent_email_confirm` is on (default), the tool returns `{pending: true, pending_id: ...}` and stages the email for the user to approve in the chat UI instead of SMTPing immediately.""",
"list_emails": """\
```list_emails
{"folder": "INBOX", "max_results": 20, "unread_only": false, "account": "gmail"}
@@ -427,7 +429,9 @@ List recent emails from a folder, newest first, including read messages by defau
```reply_to_email
{"uid": "1234", "body": "Sounds good — talk Friday.", "account": "gmail"}
```
SEND a reply email immediately by UID. Do not use this for "open a reply" or "start a reply" — those should use `ui_control` with `open_email_reply <uid> <folder> reply` to open the email draft document. For follow-up requests like "reply ..." after reading/listing email where the user clearly wants to send now, use the exact UID and account from the latest `read_email`/`list_emails` result. Never invent UID `1`. Threads automatically (In-Reply-To/References handled).""",
SEND a reply email immediately by UID. Do not use this for "open a reply" or "start a reply" — those should use `ui_control` with `open_email_reply <uid> <folder> reply` to open the email draft document. For follow-up requests like "reply ..." after reading/listing email where the user clearly wants to send now, use the exact UID and account from the latest `read_email`/`list_emails` result. Never invent UID `1`. Threads automatically (In-Reply-To/References handled).
CRITICAL — signatures: DO NOT invent a sign-off name. End the body with just `Thanks,` or similar — never type a person's name unless the user explicitly told you what to sign as. When `agent_email_confirm` is on (default), the tool returns `{pending: true, pending_id: ...}` and stages the email for the user to approve in the chat UI instead of SMTPing immediately.""",
"bulk_email": """\
```bulk_email
{"action": "delete", "uids": ["10997", "10998"], "folder": "INBOX", "account": "Gmail"}
+8
View File
@@ -29,6 +29,14 @@ def _invalidate_caches():
# ── Default values ──
DEFAULT_SETTINGS = {
# Agent email safety: when True, the MCP send_email / reply_to_email
# tools don't SMTP directly. They stage the composed message into the
# scheduled_emails table with status='agent_draft' and return a
# pending_id + the rendered email so the user can review and approve
# (or cancel) before it actually goes out. Default ON because models
# have been observed inventing signatures and sending to real
# recipients without confirmation.
"agent_email_confirm": True,
"image_gen_enabled": False,
"image_model": "",
"image_quality": "medium",
+4
View File
@@ -1547,6 +1547,10 @@
working for anyone who wired it via `manage_settings` /
settings backup. Re-add this card to surface the toggle
again once the core experience is faster. -->
<div class="admin-card">
<h2 style="display:flex;align-items:center;gap:6px;"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="margin-right:1px;opacity:0.6;flex-shrink:0"><rect x="2" y="4" width="20" height="16" rx="2"/><polyline points="2 6 12 13 22 6"/></svg>Email Safety<span style="flex:1"></span><label class="admin-switch" title="When on, agent send_email and reply_to_email tools stage a draft for your approval instead of sending immediately."><input type="checkbox" id="set-agentEmailConfirm" checked><span class="admin-slider"></span></label></h2>
<div class="admin-toggle-sub" style="margin-bottom:8px">When on, agent <code>send_email</code> / <code>reply_to_email</code> tools stage a draft for your approval (in the chat) instead of SMTPing immediately. Stops models from inventing a signature and sending it to a real recipient before you can review.</div>
</div>
</div>
<!-- ═══ SEARCH TAB ═══ -->
+19
View File
@@ -1699,6 +1699,25 @@ async function initAgentSettings() {
msg.textContent = (cur > 0 ? 'Limit: ' + cur + ' tool calls' : 'Unlimited tool calls') +
(curR != null ? ' · ' + curR + ' steps/message' : '') +
(supInput && supInput.checked ? ' · supervisor on' : '');
// Standalone Email Safety toggle (separate card on the AI Defaults tab).
// Default to ON if the setting isn't present so a fresh install is safe.
var emailConfirm = el('set-agentEmailConfirm');
if (emailConfirm) {
try {
var s = await fetch('/api/auth/settings', { credentials: 'same-origin' }).then(r => r.json());
emailConfirm.checked = s.agent_email_confirm !== false;
} catch (_) {}
emailConfirm.addEventListener('change', async () => {
try {
await fetch('/api/auth/settings', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ agent_email_confirm: !!emailConfirm.checked }),
});
} catch (_) {}
});
}
}
/*