fix(uploads): migrate upload ownership on rename (#3617)

This commit is contained in:
RaresKeY
2026-06-11 17:01:04 +03:00
committed by GitHub
parent f7a3605b16
commit c500bcb47d
5 changed files with 251 additions and 5 deletions
+1
View File
@@ -498,6 +498,7 @@ app.state.session_manager = session_manager
memory_manager = components["memory_manager"]
memory_vector = components.get("memory_vector")
upload_handler = components["upload_handler"]
app.state.upload_handler = upload_handler
personal_docs_mgr = components["personal_docs_manager"]
api_key_manager = components["api_key_manager"]
preset_manager = components["preset_manager"]
+11
View File
@@ -416,6 +416,17 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
except Exception as e:
logger.warning("Failed to rename memory.json owner references %s -> %s: %s", old_username, new_username, e)
# uploads.json: upload rows use owner metadata for access checks and
# owner-prefixed index keys for dedupe. Rename both so attachments keep
# resolving after the account username changes.
try:
upload_handler = getattr(request.app.state, "upload_handler", None)
rename_owner = getattr(upload_handler, "rename_owner", None)
if callable(rename_owner):
rename_owner(old_username, new_username)
except Exception as e:
logger.warning("Failed to rename upload owner references %s -> %s: %s", old_username, new_username, e)
# skills: SKILL.md frontmatter carries owner: <username>; the usage
# sidecar (_usage.json) keys entries as owner::skill-name. Both must
# be updated or the renamed user's Skills panel goes empty.
+80
View File
@@ -352,6 +352,86 @@ class UploadHandler:
return dict(info)
return None
def _renamed_upload_index_key(self, key: str, info: Dict[str, Any], old_owner: str, new_owner: str) -> str:
"""Return the storage key to use after renaming an owned upload row."""
if isinstance(key, str) and ":" in key:
owner_part, rest = key.split(":", 1)
if owner_part.strip().lower() == old_owner:
return f"{new_owner}:{rest}"
file_hash = info.get("hash")
if file_hash:
return f"{new_owner}:{file_hash}"
return key
def _unique_upload_index_key(self, base_key: str, used_keys: set, reserved_keys: set, info: Dict[str, Any]) -> str:
"""Choose a deterministic collision key without overwriting an existing row."""
if base_key not in used_keys and base_key not in reserved_keys:
return base_key
upload_id = str(info.get("id") or "renamed").strip() or "renamed"
candidate = f"{base_key}:{upload_id}"
if candidate not in used_keys and candidate not in reserved_keys:
return candidate
index = 2
while True:
candidate = f"{base_key}:{upload_id}:{index}"
if candidate not in used_keys and candidate not in reserved_keys:
return candidate
index += 1
def rename_owner(self, old_owner: str, new_owner: str) -> int:
"""Rename upload metadata ownership from old_owner to new_owner.
Upload rows are keyed by owner-qualified hashes for dedupe and also
carry an `owner` field for access checks. Both must move together when
usernames change.
"""
old_owner_normalized = str(old_owner or "").strip().lower()
new_owner = str(new_owner or "").strip()
if not old_owner_normalized or not new_owner:
return 0
if old_owner_normalized == new_owner.lower():
return 0
uploads_db_path = os.path.join(self.upload_dir, "uploads.json")
with self._index_lock:
current = self._load_upload_index()
if not current:
return 0
updated = {}
renamed = 0
original_keys = set(current.keys())
for key, info in current.items():
new_key = key
new_info = info
if isinstance(info, dict) and str(info.get("owner", "")).strip().lower() == old_owner_normalized:
new_info = dict(info)
new_info["owner"] = new_owner
base_key = self._renamed_upload_index_key(key, new_info, old_owner_normalized, new_owner)
new_key = self._unique_upload_index_key(
base_key,
set(updated.keys()),
original_keys - {key},
new_info,
)
if new_key != base_key:
logger.warning(
"Upload owner rename key collision for %s -> %s at %s; preserving row as %s",
old_owner_normalized,
new_owner,
base_key,
new_key,
)
renamed += 1
updated[new_key] = new_info
if renamed:
self._atomic_write_json(uploads_db_path, updated)
return renamed
def _find_upload_path(self, upload_id: str) -> Optional[str]:
"""Find an upload file by ID while staying inside upload_dir."""
if not self.validate_upload_id(upload_id):
+58 -5
View File
@@ -1,4 +1,4 @@
"""Renaming a user must update all three owner caches, not just the SQL DB.
"""Renaming a user must update non-SQL owner stores, not just the SQL DB.
The DB owner-rename loop in the rename_user route updates every SQL-backed
owner column, but three file-backed / in-memory stores are left stale:
@@ -17,6 +17,9 @@ owner column, but three file-backed / in-memory stores are left stale:
4. data/memory.json — a flat array where every entry has an `owner` field;
memory_manager.load(owner=user) filters on it, so all memories vanish.
5. data/uploads/uploads.json — each upload row carries an `owner` field and
owner-prefixed index key; stale metadata denies renamed users their uploads.
Regression coverage: these bugs are invisible in unit tests that mock the DB
loop but don't exercise the file/cache patches added to the route.
"""
@@ -67,11 +70,12 @@ def rename_endpoint(monkeypatch, tmp_path):
return _route(ar.setup_auth_routes(am), "rename_user"), am, tmp_path
def _request(tmp_path, session_manager=None, token="t", research_handler=None):
def _request(tmp_path, session_manager=None, token="t", research_handler=None, upload_handler=None):
state = SimpleNamespace(
invalidate_token_cache=lambda: None,
session_manager=session_manager,
research_handler=research_handler,
upload_handler=upload_handler,
)
return SimpleNamespace(
cookies={"odysseus_session": token},
@@ -415,7 +419,56 @@ def test_rename_no_memory_json_does_not_crash(rename_endpoint):
# ---------------------------------------------------------------------------
# 4. Skills (SKILL.md frontmatter + _usage.json sidecar)
# 4. uploads.json
# ---------------------------------------------------------------------------
def test_rename_updates_upload_metadata_owner(rename_endpoint):
endpoint, _am, tmp_path = rename_endpoint
from src.upload_handler import UploadHandler
upload_dir = tmp_path / "uploads"
dated = upload_dir / "2026" / "06" / "09"
dated.mkdir(parents=True)
upload_id = "a" * 32 + ".txt"
upload_path = dated / upload_id
upload_path.write_text("alice private upload", encoding="utf-8")
handler = UploadHandler(str(tmp_path), str(upload_dir))
handler._atomic_write_json(
str(upload_dir / "uploads.json"),
{
"alice:hash-alice": {
"id": upload_id,
"path": str(upload_path),
"mime": "text/plain",
"size": upload_path.stat().st_size,
"name": "note.txt",
"hash": "hash-alice",
"original_name": "note.txt",
"uploaded_at": "2026-06-09T10:00:00",
"last_accessed": "2026-06-09T10:00:00",
"client_ip": "127.0.0.1",
"owner": "alice",
},
},
)
asyncio.run(
endpoint(
"alice",
SimpleNamespace(username="alice2"),
_request(tmp_path, upload_handler=handler),
)
)
updated = json.loads((upload_dir / "uploads.json").read_text(encoding="utf-8"))
assert "alice:hash-alice" not in updated
assert updated["alice2:hash-alice"]["owner"] == "alice2"
assert handler.resolve_upload(upload_id, owner="alice2")["path"] == str(upload_path)
assert handler.resolve_upload(upload_id, owner="alice") is None
# ---------------------------------------------------------------------------
# 5. Skills (SKILL.md frontmatter + _usage.json sidecar)
# ---------------------------------------------------------------------------
_SKILL_MD = """\
@@ -522,7 +575,7 @@ def test_rename_usage_keys_case_insensitive(rename_endpoint):
# ---------------------------------------------------------------------------
# 5. Rollback: auth rename must be restored if SQL owner migration fails
# 6. Rollback: auth rename must be restored if SQL owner migration fails
# ---------------------------------------------------------------------------
def test_owner_migration_failure_rolls_back_auth_rename(monkeypatch, tmp_path):
@@ -583,7 +636,7 @@ def test_self_rename_owner_migration_failure_rolls_back_auth_session(monkeypatch
# ---------------------------------------------------------------------------
# 6. P1 regression: rejected auth rename must not mutate file-backed stores
# 7. P1 regression: rejected auth rename must not mutate file-backed stores
# ---------------------------------------------------------------------------
def test_rejected_rename_does_not_mutate_files(monkeypatch, tmp_path):
+101
View File
@@ -0,0 +1,101 @@
import json
import os
from pathlib import Path
from src.upload_handler import UploadHandler
def _make_handler(tmp_path: Path) -> UploadHandler:
base = tmp_path / "base"
upload = tmp_path / "uploads"
base.mkdir()
upload.mkdir()
return UploadHandler(base_dir=str(base), upload_dir=str(upload))
def _db_path(handler: UploadHandler) -> str:
return os.path.join(handler.upload_dir, "uploads.json")
def _write_upload_file(handler: UploadHandler, file_id: str, content: bytes = b"content") -> str:
upload_day = Path(handler.upload_dir) / "2026" / "06" / "09"
upload_day.mkdir(parents=True, exist_ok=True)
path = upload_day / file_id
path.write_bytes(content)
return str(path)
def _entry(handler: UploadHandler, owner: str, file_hash: str, file_id: str) -> dict:
path = _write_upload_file(handler, file_id, content=f"{owner}:{file_hash}".encode())
return {
"id": file_id,
"path": path,
"mime": "text/plain",
"size": os.path.getsize(path),
"name": f"{file_id}.txt",
"hash": file_hash,
"original_name": f"{file_id}.txt",
"uploaded_at": "2026-06-09T10:00:00",
"last_accessed": "2026-06-09T10:00:00",
"client_ip": "127.0.0.1",
"owner": owner,
}
def test_rename_owner_updates_upload_metadata_key_and_resolver(tmp_path):
handler = _make_handler(tmp_path)
alice_id = "a" * 32 + ".txt"
alice_entry = _entry(handler, "Alice", "hash-alice", alice_id)
bob_entry = _entry(handler, "bob", "hash-bob", "b" * 32 + ".txt")
handler._atomic_write_json(
_db_path(handler),
{
"Alice:hash-alice": alice_entry,
"bob:hash-bob": bob_entry,
},
)
renamed = handler.rename_owner("alice", "alice2")
assert renamed == 1
updated = json.loads(Path(_db_path(handler)).read_text(encoding="utf-8"))
assert "Alice:hash-alice" not in updated
assert "alice2:hash-alice" in updated
assert updated["alice2:hash-alice"]["owner"] == "alice2"
assert updated["alice2:hash-alice"]["path"] == alice_entry["path"]
assert updated["alice2:hash-alice"]["hash"] == alice_entry["hash"]
assert updated["alice2:hash-alice"]["uploaded_at"] == alice_entry["uploaded_at"]
assert updated["alice2:hash-alice"]["last_accessed"] == alice_entry["last_accessed"]
assert updated["bob:hash-bob"]["owner"] == "bob"
assert handler.resolve_upload(alice_id, owner="alice2")["id"] == alice_id
assert handler.resolve_upload(alice_id, owner="alice") is None
def test_rename_owner_preserves_rows_when_target_key_collides(tmp_path):
handler = _make_handler(tmp_path)
migrated_id = "c" * 32 + ".txt"
existing_id = "d" * 32 + ".txt"
migrated = _entry(handler, "alice", "same-hash", migrated_id)
existing = _entry(handler, "alice2", "same-hash", existing_id)
unrelated = _entry(handler, "carol", "other-hash", "e" * 32 + ".txt")
handler._atomic_write_json(
_db_path(handler),
{
"alice:same-hash": migrated,
"alice2:same-hash": existing,
"carol:other-hash": unrelated,
},
)
renamed = handler.rename_owner("alice", "alice2")
assert renamed == 1
updated = json.loads(Path(_db_path(handler)).read_text(encoding="utf-8"))
assert len(updated) == 3
assert updated["alice2:same-hash"]["id"] == existing_id
migrated_key = f"alice2:same-hash:{migrated_id}"
assert updated[migrated_key]["id"] == migrated_id
assert updated[migrated_key]["owner"] == "alice2"
assert updated[migrated_key]["path"] == migrated["path"]
assert updated["carol:other-hash"] == unrelated