From 20691d60190d53ccab8a853e8db3ed7b2ecd3de3 Mon Sep 17 00:00:00 2001 From: Solanki Sumit <125974181+YAMRAJ13y@users.noreply.github.com> Date: Sat, 27 Jun 2026 23:29:28 +0530 Subject: [PATCH] fix(upload): handle corrupt uploads index and malformed vision JSON Use the upload handler's tolerant index loader when reading upload metadata so corrupt uploads.json degrades to missing metadata instead of a 500. Return 400 for malformed vision JSON request bodies and add regression coverage for both paths. --- routes/upload_routes.py | 31 ++++++++++++------------- tests/test_upload_routes_owner_scope.py | 29 +++++++++++++++++++++++ 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/routes/upload_routes.py b/routes/upload_routes.py index 8b8f2d292..781aaf031 100644 --- a/routes/upload_routes.py +++ b/routes/upload_routes.py @@ -201,14 +201,13 @@ def setup_upload_routes(upload_handler): import mimetypes as _mt # Look up original filename and owner from uploads.json original_name = file_id - info = None - uploads_db = os.path.join(_upload_root(), "uploads.json") - if os.path.exists(uploads_db): - with open(uploads_db, encoding="utf-8") as f: - db = json.load(f) - info = next((fi for fi in db.values() if fi.get("id") == file_id), None) - if info: - original_name = info.get("name", file_id) + # _load_upload_index() tolerates a missing/corrupt uploads.json (it falls + # back to the .bak sibling, then to {}), so a truncated DB degrades to + # "no metadata" instead of a 500 from an unhandled JSONDecodeError. + db = upload_handler._load_upload_index() + info = next((fi for fi in db.values() if fi.get("id") == file_id), None) + if info: + original_name = info.get("name", file_id) auth_mgr = getattr(request.app.state, "auth_manager", None) auth_configured = bool(auth_mgr and auth_mgr.is_configured) current_user = effective_user(request) @@ -254,13 +253,10 @@ def setup_upload_routes(upload_handler): def _load_upload_info(file_id: str): """Look up the uploads.json record for a file_id, with owner/auth checks.""" - info = None - uploads_db = os.path.join(_upload_root(), "uploads.json") - if os.path.exists(uploads_db): - with open(uploads_db, encoding="utf-8") as f: - db = json.load(f) - info = next((fi for fi in db.values() if fi.get("id") == file_id), None) - return info + # Corruption-tolerant load (see download_file): a bad uploads.json yields + # {} rather than raising JSONDecodeError out of the vision path. + db = upload_handler._load_upload_index() + return next((fi for fi in db.values() if fi.get("id") == file_id), None) def _vision_cache_path(file_id: str) -> str: cache_dir = os.path.join(_upload_root(), ".vision") @@ -328,7 +324,10 @@ def setup_upload_routes(upload_handler): if file_owner != current_user and not auth_mgr.is_admin(current_user): raise HTTPException(404, "File not found") _resolve_upload_path(file_id) - body = await request.json() + try: + body = await request.json() + except json.JSONDecodeError: + raise HTTPException(400, "Request body must be valid JSON") text = (body or {}).get("text", "") if not isinstance(text, str): raise HTTPException(400, "text must be a string") diff --git a/tests/test_upload_routes_owner_scope.py b/tests/test_upload_routes_owner_scope.py index a2647f580..a29b7c795 100644 --- a/tests/test_upload_routes_owner_scope.py +++ b/tests/test_upload_routes_owner_scope.py @@ -313,3 +313,32 @@ def test_put_vision_text_allows_same_owner_to_write_cache(tmp_path, monkeypatch) assert (upload_dir / ".vision" / f"{alice_id}.txt").read_text( encoding="utf-8" ) == "edited alice text" + + +def test_download_file_survives_corrupted_uploads_json(tmp_path, monkeypatch): + # A truncated/corrupt uploads.json must not 500 the download endpoint — + # metadata simply becomes unavailable and the file is still served. + handler, alice_id, _bob_id, upload_dir = _make_upload_store(tmp_path, monkeypatch) + download_file = _upload_endpoints(handler, monkeypatch)["download_file"] + (upload_dir / "uploads.json").write_text('{"alice:h1": {', encoding="utf-8") + + # No auth configured -> owner gate skipped. + response = asyncio.run(download_file(_Request(), alice_id)) + + assert str(response.path).endswith(alice_id) + # Metadata unreadable, so the display filename falls back to the file_id. + assert response.filename == alice_id + + +def test_put_vision_text_returns_400_on_malformed_json(tmp_path, monkeypatch): + # A non-JSON request body must yield 400, not an unhandled JSONDecodeError -> 500. + handler, alice_id, _bob_id, _upload_dir = _make_upload_store(tmp_path, monkeypatch) + put_vision_text = _upload_endpoints(handler, monkeypatch)["put_vision_text"] + + class _BadJsonRequest(_Request): + async def json(self): + raise json.JSONDecodeError("Expecting value", "not json", 0) + + with pytest.raises(HTTPException) as exc: + asyncio.run(put_vision_text(_BadJsonRequest(), alice_id)) + assert exc.value.status_code == 400