mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-07-02 01:22:07 -04:00
Isolate untrusted context from visible user prompts (#3584)
Prevent untrusted source/context guard text from being merged into the current visible user request during provider message sanitization. Changes: - Detect untrusted context blocks during LLM message sanitization - Insert a short assistant boundary before the current user request - Keep the visible user prompt as its own user message - Preserve normal consecutive user-message merging for non-untrusted cases - Strengthen prompt-security wording to avoid mentioning guard wrappers - Add regression coverage for untrusted context followed by a user prompt Notes: - Untrusted context remains role:user for safety - This does not add prompt debug logging - This does not change frontend draft persistence
This commit is contained in:
@@ -1196,6 +1196,25 @@ def _as_content_blocks(content) -> List[Dict]:
|
||||
return []
|
||||
|
||||
|
||||
def _is_untrusted_context_content(content) -> bool:
|
||||
if isinstance(content, str):
|
||||
return (
|
||||
content.startswith("UNTRUSTED SOURCE DATA\n")
|
||||
or "<<<UNTRUSTED_SOURCE_DATA>>>" in content
|
||||
)
|
||||
if isinstance(content, list):
|
||||
return any(
|
||||
isinstance(block, dict)
|
||||
and block.get("type") == "text"
|
||||
and _is_untrusted_context_content(block.get("text") or "")
|
||||
for block in content
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
_REFERENCE_CONTEXT_BOUNDARY = "Reference context received."
|
||||
|
||||
|
||||
def _sanitize_llm_messages(messages: List[Dict]) -> List[Dict]:
|
||||
"""Strip Odysseus-only metadata before sending messages to providers.
|
||||
|
||||
@@ -1308,6 +1327,10 @@ def _sanitize_llm_messages(messages: List[Dict]) -> List[Dict]:
|
||||
|
||||
last = merged[-1]
|
||||
if last.get("role") == "user" and item.get("role") == "user":
|
||||
if _is_untrusted_context_content(last.get("content")):
|
||||
merged.append({"role": "assistant", "content": _REFERENCE_CONTEXT_BOUNDARY})
|
||||
merged.append(item)
|
||||
continue
|
||||
last_copy = dict(last)
|
||||
lc = last_copy.get("content")
|
||||
ic = item.get("content")
|
||||
|
||||
Reference in New Issue
Block a user