fix(upload): handle corrupt uploads index and malformed vision JSON

Use the upload handler's tolerant index loader when reading upload metadata so corrupt uploads.json degrades to missing metadata instead of a 500. Return 400 for malformed vision JSON request bodies and add regression coverage for both paths.
This commit is contained in:
Solanki Sumit
2026-06-27 23:29:28 +05:30
committed by GitHub
parent 228efbc70a
commit 20691d6019
2 changed files with 44 additions and 16 deletions
+15 -16
View File
@@ -201,14 +201,13 @@ def setup_upload_routes(upload_handler):
import mimetypes as _mt import mimetypes as _mt
# Look up original filename and owner from uploads.json # Look up original filename and owner from uploads.json
original_name = file_id original_name = file_id
info = None # _load_upload_index() tolerates a missing/corrupt uploads.json (it falls
uploads_db = os.path.join(_upload_root(), "uploads.json") # back to the .bak sibling, then to {}), so a truncated DB degrades to
if os.path.exists(uploads_db): # "no metadata" instead of a 500 from an unhandled JSONDecodeError.
with open(uploads_db, encoding="utf-8") as f: db = upload_handler._load_upload_index()
db = json.load(f) info = next((fi for fi in db.values() if fi.get("id") == file_id), None)
info = next((fi for fi in db.values() if fi.get("id") == file_id), None) if info:
if info: original_name = info.get("name", file_id)
original_name = info.get("name", file_id)
auth_mgr = getattr(request.app.state, "auth_manager", None) auth_mgr = getattr(request.app.state, "auth_manager", None)
auth_configured = bool(auth_mgr and auth_mgr.is_configured) auth_configured = bool(auth_mgr and auth_mgr.is_configured)
current_user = effective_user(request) current_user = effective_user(request)
@@ -254,13 +253,10 @@ def setup_upload_routes(upload_handler):
def _load_upload_info(file_id: str): def _load_upload_info(file_id: str):
"""Look up the uploads.json record for a file_id, with owner/auth checks.""" """Look up the uploads.json record for a file_id, with owner/auth checks."""
info = None # Corruption-tolerant load (see download_file): a bad uploads.json yields
uploads_db = os.path.join(_upload_root(), "uploads.json") # {} rather than raising JSONDecodeError out of the vision path.
if os.path.exists(uploads_db): db = upload_handler._load_upload_index()
with open(uploads_db, encoding="utf-8") as f: return next((fi for fi in db.values() if fi.get("id") == file_id), None)
db = json.load(f)
info = next((fi for fi in db.values() if fi.get("id") == file_id), None)
return info
def _vision_cache_path(file_id: str) -> str: def _vision_cache_path(file_id: str) -> str:
cache_dir = os.path.join(_upload_root(), ".vision") cache_dir = os.path.join(_upload_root(), ".vision")
@@ -328,7 +324,10 @@ def setup_upload_routes(upload_handler):
if file_owner != current_user and not auth_mgr.is_admin(current_user): if file_owner != current_user and not auth_mgr.is_admin(current_user):
raise HTTPException(404, "File not found") raise HTTPException(404, "File not found")
_resolve_upload_path(file_id) _resolve_upload_path(file_id)
body = await request.json() try:
body = await request.json()
except json.JSONDecodeError:
raise HTTPException(400, "Request body must be valid JSON")
text = (body or {}).get("text", "") text = (body or {}).get("text", "")
if not isinstance(text, str): if not isinstance(text, str):
raise HTTPException(400, "text must be a string") raise HTTPException(400, "text must be a string")
+29
View File
@@ -313,3 +313,32 @@ def test_put_vision_text_allows_same_owner_to_write_cache(tmp_path, monkeypatch)
assert (upload_dir / ".vision" / f"{alice_id}.txt").read_text( assert (upload_dir / ".vision" / f"{alice_id}.txt").read_text(
encoding="utf-8" encoding="utf-8"
) == "edited alice text" ) == "edited alice text"
def test_download_file_survives_corrupted_uploads_json(tmp_path, monkeypatch):
# A truncated/corrupt uploads.json must not 500 the download endpoint —
# metadata simply becomes unavailable and the file is still served.
handler, alice_id, _bob_id, upload_dir = _make_upload_store(tmp_path, monkeypatch)
download_file = _upload_endpoints(handler, monkeypatch)["download_file"]
(upload_dir / "uploads.json").write_text('{"alice:h1": {', encoding="utf-8")
# No auth configured -> owner gate skipped.
response = asyncio.run(download_file(_Request(), alice_id))
assert str(response.path).endswith(alice_id)
# Metadata unreadable, so the display filename falls back to the file_id.
assert response.filename == alice_id
def test_put_vision_text_returns_400_on_malformed_json(tmp_path, monkeypatch):
# A non-JSON request body must yield 400, not an unhandled JSONDecodeError -> 500.
handler, alice_id, _bob_id, _upload_dir = _make_upload_store(tmp_path, monkeypatch)
put_vision_text = _upload_endpoints(handler, monkeypatch)["put_vision_text"]
class _BadJsonRequest(_Request):
async def json(self):
raise json.JSONDecodeError("Expecting value", "not json", 0)
with pytest.raises(HTTPException) as exc:
asyncio.run(put_vision_text(_BadJsonRequest(), alice_id))
assert exc.value.status_code == 400