mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-07-02 01:22:07 -04:00
fix: improve uploaded document retrieval and deep research reuse (#4784)
* fix: improve uploaded document retrieval and deep research reuse * test: add coverage for upload manifest and document pagination * chore: rerun CI * fix: restore _insert_before_latest_user helper * fix(agent_loop): restore missing upload context helper
This commit is contained in:
@@ -755,6 +755,46 @@ def _extract_last_user_message(messages: List[Dict]) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
def _insert_before_latest_user(messages: List[Dict], context_msg: Dict) -> List[Dict]:
|
||||
"""Insert a context message immediately before the latest user turn."""
|
||||
out = list(messages or [])
|
||||
for idx in range(len(out) - 1, -1, -1):
|
||||
if out[idx].get("role") == "user":
|
||||
out.insert(idx, context_msg)
|
||||
return out
|
||||
out.append(context_msg)
|
||||
return out
|
||||
|
||||
|
||||
def _uploaded_files_context_message(uploaded_files: Optional[List[Dict]]) -> Optional[Dict]:
|
||||
if not uploaded_files:
|
||||
return None
|
||||
|
||||
lines = [
|
||||
"Uploaded files attached to the latest user turn:",
|
||||
]
|
||||
for item in uploaded_files[:20]:
|
||||
name = str(item.get("name") or item.get("id") or "upload")
|
||||
bits = [
|
||||
f"id={item.get('id', '')}",
|
||||
f"name={name}",
|
||||
]
|
||||
if item.get("mime"):
|
||||
bits.append(f"mime={item.get('mime')}")
|
||||
if item.get("size") is not None:
|
||||
bits.append(f"size={item.get('size')} bytes")
|
||||
if item.get("path"):
|
||||
bits.append(f"path={item.get('path')}")
|
||||
lines.append("- " + "; ".join(bits))
|
||||
if len(uploaded_files) > 20:
|
||||
lines.append(f"- ... {len(uploaded_files) - 20} more upload(s) omitted from this manifest")
|
||||
lines.extend([
|
||||
"",
|
||||
"The attachment contents may already be in the latest user message. If an attachment is marked truncated or omitted, read its listed path with `read_file` when that tool is available. Do not say uploaded files are undiscoverable when they are listed here.",
|
||||
])
|
||||
return untrusted_context_message("current chat uploaded files", "\n".join(lines))
|
||||
|
||||
|
||||
def _strip_think_blocks(text: str) -> str:
|
||||
"""Linear-time equivalent of
|
||||
``re.sub(r'<think>.*?</think>', '', text, flags=DOTALL|IGNORECASE)``.
|
||||
@@ -1986,6 +2026,7 @@ async def stream_agent_loop(
|
||||
tool_policy: Optional[ToolPolicy] = None,
|
||||
workspace: Optional[str] = None,
|
||||
forced_tools: Optional[Set[str]] = None,
|
||||
uploaded_files: Optional[List[Dict]] = None,
|
||||
_is_teacher_run: bool = False,
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""Streaming agent loop generator.
|
||||
@@ -2021,6 +2062,11 @@ async def stream_agent_loop(
|
||||
# filtered to read-only tools below (after the disabled map is loaded).
|
||||
disabled_tools.update(plan_mode_disabled_tools())
|
||||
|
||||
uploaded_files = uploaded_files or []
|
||||
_upload_msg = _uploaded_files_context_message(uploaded_files)
|
||||
if _upload_msg:
|
||||
messages = _insert_before_latest_user(messages, _upload_msg)
|
||||
|
||||
_t0 = time.time()
|
||||
_needs_admin = _detect_admin_intent(messages)
|
||||
_last_user = _extract_last_user_message(messages)
|
||||
@@ -2232,6 +2278,15 @@ async def stream_agent_loop(
|
||||
if _relevant_tools is not None and active_document is not None:
|
||||
_relevant_tools.update({"edit_document", "update_document", "suggest_document"})
|
||||
|
||||
# Current-turn chat uploads are real files under the upload/data root. Make
|
||||
# the read-side file/document tools visible immediately so the agent can
|
||||
# inspect files whose inline text was truncated or omitted.
|
||||
if not guide_only and uploaded_files:
|
||||
if _relevant_tools is None:
|
||||
from src.tool_index import ALWAYS_AVAILABLE
|
||||
_relevant_tools = set(ALWAYS_AVAILABLE)
|
||||
_relevant_tools.update({"read_file", "grep", "ls", "manage_documents"})
|
||||
|
||||
# Per-request UI toggles are stronger than retrieval. If the user turns on
|
||||
# Search, the model must see the search tools even when the latest text is a
|
||||
# typo or otherwise low-signal for tool RAG.
|
||||
|
||||
@@ -564,9 +564,20 @@ class ManageDocumentTool:
|
||||
if not doc:
|
||||
return {"error": f"Document '{doc_id}' not found", "exit_code": 1}
|
||||
body = doc.current_content or ""
|
||||
preview_limit = int(args.get("limit", MAX_READ_CHARS))
|
||||
truncated = len(body) > preview_limit
|
||||
preview = body[:preview_limit] + (f"\n... (truncated, {len(body)} chars total)" if truncated else "")
|
||||
try:
|
||||
preview_limit = max(1, min(int(args.get("limit", MAX_READ_CHARS)), MAX_READ_CHARS))
|
||||
except (TypeError, ValueError):
|
||||
preview_limit = MAX_READ_CHARS
|
||||
try:
|
||||
offset = max(0, int(args.get("offset", 0) or 0))
|
||||
except (TypeError, ValueError):
|
||||
offset = 0
|
||||
offset = min(offset, len(body))
|
||||
end = min(offset + preview_limit, len(body))
|
||||
truncated = end < len(body)
|
||||
preview = body[offset:end]
|
||||
if truncated:
|
||||
preview += f"\n... (truncated, {len(body)} chars total; next_offset={end})"
|
||||
anchor = f"[{doc.title}](#document-{doc.id})"
|
||||
return {
|
||||
"response": f"{anchor} — click to open in editor.\n\n```{doc.language or ''}\n{preview}\n```",
|
||||
@@ -577,6 +588,8 @@ class ManageDocumentTool:
|
||||
"size": len(body),
|
||||
"content": preview,
|
||||
"truncated": truncated,
|
||||
"offset": offset,
|
||||
"next_offset": end if truncated else None,
|
||||
},
|
||||
"exit_code": 0,
|
||||
}
|
||||
@@ -609,4 +622,4 @@ class ManageDocumentTool:
|
||||
logger.error(f"manage_documents error: {e}")
|
||||
return {"error": str(e), "exit_code": 1}
|
||||
finally:
|
||||
db.close()
|
||||
db.close()
|
||||
|
||||
Reference in New Issue
Block a user