mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-28 07:35:27 -04:00
fix: improve uploaded document retrieval and deep research reuse (#4784)
* fix: improve uploaded document retrieval and deep research reuse * test: add coverage for upload manifest and document pagination * chore: rerun CI * fix: restore _insert_before_latest_user helper * fix(agent_loop): restore missing upload context helper
This commit is contained in:
@@ -39,6 +39,7 @@ try:
|
||||
_classify_agent_request,
|
||||
_compute_final_metrics,
|
||||
_append_tool_results,
|
||||
_insert_before_latest_user,
|
||||
_MCP_KEYWORDS,
|
||||
)
|
||||
_IMPORTED_AGENT_LOOP = sys.modules.get("src.agent_loop")
|
||||
@@ -73,6 +74,36 @@ def test_polish_internet_search_request_classifies_as_web():
|
||||
assert "web" in intent["domains"]
|
||||
|
||||
|
||||
def test_insert_before_latest_user_places_context_before_last_user_turn():
|
||||
messages = [
|
||||
{"role": "user", "content": "first"},
|
||||
{"role": "assistant", "content": "reply"},
|
||||
{"role": "user", "content": "latest"},
|
||||
]
|
||||
context = {"role": "system", "content": "context"}
|
||||
|
||||
out = _insert_before_latest_user(messages, context)
|
||||
|
||||
assert out == [
|
||||
{"role": "user", "content": "first"},
|
||||
{"role": "assistant", "content": "reply"},
|
||||
context,
|
||||
{"role": "user", "content": "latest"},
|
||||
]
|
||||
assert messages == [
|
||||
{"role": "user", "content": "first"},
|
||||
{"role": "assistant", "content": "reply"},
|
||||
{"role": "user", "content": "latest"},
|
||||
]
|
||||
|
||||
|
||||
def test_insert_before_latest_user_appends_when_no_user_message_exists():
|
||||
messages = [{"role": "assistant", "content": "reply"}]
|
||||
context = {"role": "system", "content": "context"}
|
||||
|
||||
assert _insert_before_latest_user(messages, context) == [messages[0], context]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _detect_admin_intent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
import asyncio
|
||||
import os
|
||||
import shutil
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
@@ -10,6 +14,7 @@ from routes.chat_helpers import (
|
||||
_session_is_research_spinoff,
|
||||
auto_name_session,
|
||||
build_chat_context,
|
||||
build_uploaded_file_manifest,
|
||||
clean_thinking_for_save,
|
||||
needs_auto_name,
|
||||
PreprocessedMessage,
|
||||
@@ -145,6 +150,126 @@ class _FakeSession:
|
||||
self.history.append(message)
|
||||
|
||||
|
||||
class _ManifestUploadHandler:
|
||||
def __init__(self, upload_dir, rows):
|
||||
self.upload_dir = str(upload_dir)
|
||||
self.rows = rows
|
||||
self.calls = []
|
||||
|
||||
def _inside_upload_dir(self, path):
|
||||
base = os.path.realpath(self.upload_dir)
|
||||
candidate = os.path.realpath(path)
|
||||
try:
|
||||
return os.path.commonpath([base, candidate]) == base
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def resolve_upload(self, upload_id, owner=None):
|
||||
self.calls.append((upload_id, owner))
|
||||
row = self.rows.get(upload_id)
|
||||
if isinstance(row, dict) and row.get("owner") and row.get("owner") != owner:
|
||||
return None
|
||||
return row
|
||||
|
||||
|
||||
def _manifest_test_dir(name):
|
||||
root = Path(__file__).resolve().parents[1] / "tmp_pytest_probe" / f"{name}-{uuid.uuid4().hex}"
|
||||
root.mkdir(parents=True, exist_ok=False)
|
||||
return root
|
||||
|
||||
|
||||
def test_build_uploaded_file_manifest_filters_and_nulls_unreadable_paths(monkeypatch):
|
||||
root = _manifest_test_dir("manifest")
|
||||
try:
|
||||
upload_dir = root / "uploads"
|
||||
upload_dir.mkdir()
|
||||
good = upload_dir / "good.txt"
|
||||
good.write_text("hello", encoding="utf-8")
|
||||
outside = root / "outside.txt"
|
||||
outside.write_text("nope", encoding="utf-8")
|
||||
missing = upload_dir / "missing.txt"
|
||||
|
||||
import src.settings as settings
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings,
|
||||
"get_setting",
|
||||
lambda key: [str(upload_dir)] if key == "tool_path_extra_roots" else None,
|
||||
)
|
||||
handler = _ManifestUploadHandler(upload_dir, {
|
||||
"good": {
|
||||
"id": "good",
|
||||
"name": "good.txt",
|
||||
"mime": "text/plain",
|
||||
"size": 5,
|
||||
"path": str(good),
|
||||
"owner": "alice",
|
||||
},
|
||||
"bob": {
|
||||
"id": "bob",
|
||||
"name": "bob.txt",
|
||||
"path": str(good),
|
||||
"owner": "bob",
|
||||
},
|
||||
"outside": {
|
||||
"id": "outside",
|
||||
"name": "outside.txt",
|
||||
"path": str(outside),
|
||||
"owner": "alice",
|
||||
},
|
||||
"missing": {
|
||||
"id": "missing",
|
||||
"name": "missing.txt",
|
||||
"path": str(missing),
|
||||
"owner": "alice",
|
||||
},
|
||||
"bad": ["not", "a", "dict"],
|
||||
})
|
||||
|
||||
manifest = build_uploaded_file_manifest(
|
||||
["good", "bob", "outside", "missing", "bad"],
|
||||
handler,
|
||||
owner="alice",
|
||||
)
|
||||
|
||||
assert [item["id"] for item in manifest] == ["good", "outside", "missing"]
|
||||
assert os.path.realpath(manifest[0]["path"]) == os.path.realpath(good)
|
||||
assert manifest[1]["path"] is None
|
||||
assert manifest[2]["path"] is None
|
||||
assert handler.calls == [
|
||||
("good", "alice"),
|
||||
("bob", "alice"),
|
||||
("outside", "alice"),
|
||||
("missing", "alice"),
|
||||
("bad", "alice"),
|
||||
]
|
||||
finally:
|
||||
shutil.rmtree(root, ignore_errors=True)
|
||||
|
||||
|
||||
def test_build_uploaded_file_manifest_hides_paths_read_file_cannot_open(monkeypatch):
|
||||
root = _manifest_test_dir("manifest-unreadable")
|
||||
try:
|
||||
upload_dir = root / "uploads"
|
||||
upload_dir.mkdir()
|
||||
upload = upload_dir / "upload.txt"
|
||||
upload.write_text("hello", encoding="utf-8")
|
||||
handler = _ManifestUploadHandler(upload_dir, {
|
||||
"upload": {"id": "upload", "name": "upload.txt", "path": str(upload), "owner": "alice"},
|
||||
})
|
||||
|
||||
def reject_path(_path):
|
||||
raise ValueError("outside the allowed roots")
|
||||
|
||||
monkeypatch.setattr("src.tool_execution._resolve_tool_path", reject_path)
|
||||
|
||||
manifest = build_uploaded_file_manifest(["upload"], handler, owner="alice")
|
||||
|
||||
assert manifest[0]["path"] is None
|
||||
finally:
|
||||
shutil.rmtree(root, ignore_errors=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name,expected", [
|
||||
# 24h format (the bug this PR fixes)
|
||||
("deepseek-v4-flash 14:05:33", True),
|
||||
|
||||
@@ -100,6 +100,105 @@ def test_default_ssh_port_omits_flag():
|
||||
assert port_flag == ""
|
||||
|
||||
|
||||
def _documents_endpoint(total: int):
|
||||
calls = []
|
||||
document_router = APIRouter()
|
||||
|
||||
@document_router.get("/api/documents/library")
|
||||
async def documents_library(
|
||||
request: Request,
|
||||
search=None,
|
||||
language=None,
|
||||
sort="recent",
|
||||
offset=0,
|
||||
limit=20,
|
||||
archived=False,
|
||||
):
|
||||
calls.append({
|
||||
"owner": request.state.current_user,
|
||||
"search": search,
|
||||
"language": language,
|
||||
"sort": sort,
|
||||
"offset": offset,
|
||||
"limit": limit,
|
||||
"archived": archived,
|
||||
})
|
||||
end = min(offset + limit, total)
|
||||
docs = [{"id": f"doc-{i}"} for i in range(offset, end)]
|
||||
return {"documents": docs, "total": total}
|
||||
|
||||
router = codex_routes.setup_codex_routes(document_router=document_router)
|
||||
return _route_endpoint("/api/codex/documents", "GET", router=router), calls
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_documents_pagination_clamps_offset_and_limit():
|
||||
endpoint, calls = _documents_endpoint(total=99)
|
||||
|
||||
result = await endpoint(_codex_request(["documents:read"]), offset=-10, limit=500)
|
||||
|
||||
assert calls[-1]["owner"] == "alice"
|
||||
assert calls[-1]["offset"] == 0
|
||||
assert calls[-1]["limit"] == 50
|
||||
assert len(result["documents"]) == 50
|
||||
assert result["next_offset"] == 50
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_documents_pagination_clamps_zero_limit_to_one():
|
||||
endpoint, calls = _documents_endpoint(total=3)
|
||||
|
||||
result = await endpoint(_codex_request(["documents:read"]), offset=0, limit=0)
|
||||
|
||||
assert calls[-1]["limit"] == 1
|
||||
assert len(result["documents"]) == 1
|
||||
assert result["next_offset"] == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_documents_pagination_returns_next_offset_when_truncated():
|
||||
endpoint, _calls = _documents_endpoint(total=7)
|
||||
|
||||
result = await endpoint(_codex_request(["documents:read"]), offset=2, limit=3)
|
||||
|
||||
assert [doc["id"] for doc in result["documents"]] == ["doc-2", "doc-3", "doc-4"]
|
||||
assert result["next_offset"] == 5
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_documents_pagination_rejects_invalid_offset():
|
||||
endpoint, _calls = _documents_endpoint(total=7)
|
||||
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
await endpoint(_codex_request(["documents:read"]), offset="soon", limit=3)
|
||||
|
||||
assert exc.value.status_code == 400
|
||||
assert exc.value.detail == "Invalid offset"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_documents_pagination_rejects_invalid_limit():
|
||||
endpoint, _calls = _documents_endpoint(total=7)
|
||||
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
await endpoint(_codex_request(["documents:read"]), offset=0, limit="many")
|
||||
|
||||
assert exc.value.status_code == 400
|
||||
assert exc.value.detail == "Invalid limit"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_documents_pagination_out_of_range_offset_returns_empty_page():
|
||||
endpoint, calls = _documents_endpoint(total=3)
|
||||
|
||||
result = await endpoint(_codex_request(["documents:read"]), offset=10, limit=2)
|
||||
|
||||
assert calls[-1]["offset"] == 10
|
||||
assert calls[-1]["limit"] == 2
|
||||
assert result["documents"] == []
|
||||
assert result["next_offset"] is None
|
||||
|
||||
|
||||
def test_adopt_rejects_ssh_option_host_before_shell(monkeypatch):
|
||||
calls = []
|
||||
|
||||
|
||||
Reference in New Issue
Block a user