mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
fix(uploads): migrate upload ownership on rename (#3617)
This commit is contained in:
@@ -498,6 +498,7 @@ app.state.session_manager = session_manager
|
||||
memory_manager = components["memory_manager"]
|
||||
memory_vector = components.get("memory_vector")
|
||||
upload_handler = components["upload_handler"]
|
||||
app.state.upload_handler = upload_handler
|
||||
personal_docs_mgr = components["personal_docs_manager"]
|
||||
api_key_manager = components["api_key_manager"]
|
||||
preset_manager = components["preset_manager"]
|
||||
|
||||
@@ -416,6 +416,17 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
|
||||
except Exception as e:
|
||||
logger.warning("Failed to rename memory.json owner references %s -> %s: %s", old_username, new_username, e)
|
||||
|
||||
# uploads.json: upload rows use owner metadata for access checks and
|
||||
# owner-prefixed index keys for dedupe. Rename both so attachments keep
|
||||
# resolving after the account username changes.
|
||||
try:
|
||||
upload_handler = getattr(request.app.state, "upload_handler", None)
|
||||
rename_owner = getattr(upload_handler, "rename_owner", None)
|
||||
if callable(rename_owner):
|
||||
rename_owner(old_username, new_username)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to rename upload owner references %s -> %s: %s", old_username, new_username, e)
|
||||
|
||||
# skills: SKILL.md frontmatter carries owner: <username>; the usage
|
||||
# sidecar (_usage.json) keys entries as owner::skill-name. Both must
|
||||
# be updated or the renamed user's Skills panel goes empty.
|
||||
|
||||
@@ -352,6 +352,86 @@ class UploadHandler:
|
||||
return dict(info)
|
||||
return None
|
||||
|
||||
def _renamed_upload_index_key(self, key: str, info: Dict[str, Any], old_owner: str, new_owner: str) -> str:
|
||||
"""Return the storage key to use after renaming an owned upload row."""
|
||||
if isinstance(key, str) and ":" in key:
|
||||
owner_part, rest = key.split(":", 1)
|
||||
if owner_part.strip().lower() == old_owner:
|
||||
return f"{new_owner}:{rest}"
|
||||
file_hash = info.get("hash")
|
||||
if file_hash:
|
||||
return f"{new_owner}:{file_hash}"
|
||||
return key
|
||||
|
||||
def _unique_upload_index_key(self, base_key: str, used_keys: set, reserved_keys: set, info: Dict[str, Any]) -> str:
|
||||
"""Choose a deterministic collision key without overwriting an existing row."""
|
||||
if base_key not in used_keys and base_key not in reserved_keys:
|
||||
return base_key
|
||||
|
||||
upload_id = str(info.get("id") or "renamed").strip() or "renamed"
|
||||
candidate = f"{base_key}:{upload_id}"
|
||||
if candidate not in used_keys and candidate not in reserved_keys:
|
||||
return candidate
|
||||
|
||||
index = 2
|
||||
while True:
|
||||
candidate = f"{base_key}:{upload_id}:{index}"
|
||||
if candidate not in used_keys and candidate not in reserved_keys:
|
||||
return candidate
|
||||
index += 1
|
||||
|
||||
def rename_owner(self, old_owner: str, new_owner: str) -> int:
|
||||
"""Rename upload metadata ownership from old_owner to new_owner.
|
||||
|
||||
Upload rows are keyed by owner-qualified hashes for dedupe and also
|
||||
carry an `owner` field for access checks. Both must move together when
|
||||
usernames change.
|
||||
"""
|
||||
old_owner_normalized = str(old_owner or "").strip().lower()
|
||||
new_owner = str(new_owner or "").strip()
|
||||
if not old_owner_normalized or not new_owner:
|
||||
return 0
|
||||
if old_owner_normalized == new_owner.lower():
|
||||
return 0
|
||||
|
||||
uploads_db_path = os.path.join(self.upload_dir, "uploads.json")
|
||||
with self._index_lock:
|
||||
current = self._load_upload_index()
|
||||
if not current:
|
||||
return 0
|
||||
|
||||
updated = {}
|
||||
renamed = 0
|
||||
original_keys = set(current.keys())
|
||||
|
||||
for key, info in current.items():
|
||||
new_key = key
|
||||
new_info = info
|
||||
if isinstance(info, dict) and str(info.get("owner", "")).strip().lower() == old_owner_normalized:
|
||||
new_info = dict(info)
|
||||
new_info["owner"] = new_owner
|
||||
base_key = self._renamed_upload_index_key(key, new_info, old_owner_normalized, new_owner)
|
||||
new_key = self._unique_upload_index_key(
|
||||
base_key,
|
||||
set(updated.keys()),
|
||||
original_keys - {key},
|
||||
new_info,
|
||||
)
|
||||
if new_key != base_key:
|
||||
logger.warning(
|
||||
"Upload owner rename key collision for %s -> %s at %s; preserving row as %s",
|
||||
old_owner_normalized,
|
||||
new_owner,
|
||||
base_key,
|
||||
new_key,
|
||||
)
|
||||
renamed += 1
|
||||
updated[new_key] = new_info
|
||||
|
||||
if renamed:
|
||||
self._atomic_write_json(uploads_db_path, updated)
|
||||
return renamed
|
||||
|
||||
def _find_upload_path(self, upload_id: str) -> Optional[str]:
|
||||
"""Find an upload file by ID while staying inside upload_dir."""
|
||||
if not self.validate_upload_id(upload_id):
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Renaming a user must update all three owner caches, not just the SQL DB.
|
||||
"""Renaming a user must update non-SQL owner stores, not just the SQL DB.
|
||||
|
||||
The DB owner-rename loop in the rename_user route updates every SQL-backed
|
||||
owner column, but three file-backed / in-memory stores are left stale:
|
||||
@@ -17,6 +17,9 @@ owner column, but three file-backed / in-memory stores are left stale:
|
||||
4. data/memory.json — a flat array where every entry has an `owner` field;
|
||||
memory_manager.load(owner=user) filters on it, so all memories vanish.
|
||||
|
||||
5. data/uploads/uploads.json — each upload row carries an `owner` field and
|
||||
owner-prefixed index key; stale metadata denies renamed users their uploads.
|
||||
|
||||
Regression coverage: these bugs are invisible in unit tests that mock the DB
|
||||
loop but don't exercise the file/cache patches added to the route.
|
||||
"""
|
||||
@@ -67,11 +70,12 @@ def rename_endpoint(monkeypatch, tmp_path):
|
||||
return _route(ar.setup_auth_routes(am), "rename_user"), am, tmp_path
|
||||
|
||||
|
||||
def _request(tmp_path, session_manager=None, token="t", research_handler=None):
|
||||
def _request(tmp_path, session_manager=None, token="t", research_handler=None, upload_handler=None):
|
||||
state = SimpleNamespace(
|
||||
invalidate_token_cache=lambda: None,
|
||||
session_manager=session_manager,
|
||||
research_handler=research_handler,
|
||||
upload_handler=upload_handler,
|
||||
)
|
||||
return SimpleNamespace(
|
||||
cookies={"odysseus_session": token},
|
||||
@@ -415,7 +419,56 @@ def test_rename_no_memory_json_does_not_crash(rename_endpoint):
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 4. Skills (SKILL.md frontmatter + _usage.json sidecar)
|
||||
# 4. uploads.json
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_rename_updates_upload_metadata_owner(rename_endpoint):
|
||||
endpoint, _am, tmp_path = rename_endpoint
|
||||
from src.upload_handler import UploadHandler
|
||||
|
||||
upload_dir = tmp_path / "uploads"
|
||||
dated = upload_dir / "2026" / "06" / "09"
|
||||
dated.mkdir(parents=True)
|
||||
upload_id = "a" * 32 + ".txt"
|
||||
upload_path = dated / upload_id
|
||||
upload_path.write_text("alice private upload", encoding="utf-8")
|
||||
handler = UploadHandler(str(tmp_path), str(upload_dir))
|
||||
handler._atomic_write_json(
|
||||
str(upload_dir / "uploads.json"),
|
||||
{
|
||||
"alice:hash-alice": {
|
||||
"id": upload_id,
|
||||
"path": str(upload_path),
|
||||
"mime": "text/plain",
|
||||
"size": upload_path.stat().st_size,
|
||||
"name": "note.txt",
|
||||
"hash": "hash-alice",
|
||||
"original_name": "note.txt",
|
||||
"uploaded_at": "2026-06-09T10:00:00",
|
||||
"last_accessed": "2026-06-09T10:00:00",
|
||||
"client_ip": "127.0.0.1",
|
||||
"owner": "alice",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
asyncio.run(
|
||||
endpoint(
|
||||
"alice",
|
||||
SimpleNamespace(username="alice2"),
|
||||
_request(tmp_path, upload_handler=handler),
|
||||
)
|
||||
)
|
||||
|
||||
updated = json.loads((upload_dir / "uploads.json").read_text(encoding="utf-8"))
|
||||
assert "alice:hash-alice" not in updated
|
||||
assert updated["alice2:hash-alice"]["owner"] == "alice2"
|
||||
assert handler.resolve_upload(upload_id, owner="alice2")["path"] == str(upload_path)
|
||||
assert handler.resolve_upload(upload_id, owner="alice") is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 5. Skills (SKILL.md frontmatter + _usage.json sidecar)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_SKILL_MD = """\
|
||||
@@ -522,7 +575,7 @@ def test_rename_usage_keys_case_insensitive(rename_endpoint):
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 5. Rollback: auth rename must be restored if SQL owner migration fails
|
||||
# 6. Rollback: auth rename must be restored if SQL owner migration fails
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_owner_migration_failure_rolls_back_auth_rename(monkeypatch, tmp_path):
|
||||
@@ -583,7 +636,7 @@ def test_self_rename_owner_migration_failure_rolls_back_auth_session(monkeypatch
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 6. P1 regression: rejected auth rename must not mutate file-backed stores
|
||||
# 7. P1 regression: rejected auth rename must not mutate file-backed stores
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_rejected_rename_does_not_mutate_files(monkeypatch, tmp_path):
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from src.upload_handler import UploadHandler
|
||||
|
||||
|
||||
def _make_handler(tmp_path: Path) -> UploadHandler:
|
||||
base = tmp_path / "base"
|
||||
upload = tmp_path / "uploads"
|
||||
base.mkdir()
|
||||
upload.mkdir()
|
||||
return UploadHandler(base_dir=str(base), upload_dir=str(upload))
|
||||
|
||||
|
||||
def _db_path(handler: UploadHandler) -> str:
|
||||
return os.path.join(handler.upload_dir, "uploads.json")
|
||||
|
||||
|
||||
def _write_upload_file(handler: UploadHandler, file_id: str, content: bytes = b"content") -> str:
|
||||
upload_day = Path(handler.upload_dir) / "2026" / "06" / "09"
|
||||
upload_day.mkdir(parents=True, exist_ok=True)
|
||||
path = upload_day / file_id
|
||||
path.write_bytes(content)
|
||||
return str(path)
|
||||
|
||||
|
||||
def _entry(handler: UploadHandler, owner: str, file_hash: str, file_id: str) -> dict:
|
||||
path = _write_upload_file(handler, file_id, content=f"{owner}:{file_hash}".encode())
|
||||
return {
|
||||
"id": file_id,
|
||||
"path": path,
|
||||
"mime": "text/plain",
|
||||
"size": os.path.getsize(path),
|
||||
"name": f"{file_id}.txt",
|
||||
"hash": file_hash,
|
||||
"original_name": f"{file_id}.txt",
|
||||
"uploaded_at": "2026-06-09T10:00:00",
|
||||
"last_accessed": "2026-06-09T10:00:00",
|
||||
"client_ip": "127.0.0.1",
|
||||
"owner": owner,
|
||||
}
|
||||
|
||||
|
||||
def test_rename_owner_updates_upload_metadata_key_and_resolver(tmp_path):
|
||||
handler = _make_handler(tmp_path)
|
||||
alice_id = "a" * 32 + ".txt"
|
||||
alice_entry = _entry(handler, "Alice", "hash-alice", alice_id)
|
||||
bob_entry = _entry(handler, "bob", "hash-bob", "b" * 32 + ".txt")
|
||||
handler._atomic_write_json(
|
||||
_db_path(handler),
|
||||
{
|
||||
"Alice:hash-alice": alice_entry,
|
||||
"bob:hash-bob": bob_entry,
|
||||
},
|
||||
)
|
||||
|
||||
renamed = handler.rename_owner("alice", "alice2")
|
||||
|
||||
assert renamed == 1
|
||||
updated = json.loads(Path(_db_path(handler)).read_text(encoding="utf-8"))
|
||||
assert "Alice:hash-alice" not in updated
|
||||
assert "alice2:hash-alice" in updated
|
||||
assert updated["alice2:hash-alice"]["owner"] == "alice2"
|
||||
assert updated["alice2:hash-alice"]["path"] == alice_entry["path"]
|
||||
assert updated["alice2:hash-alice"]["hash"] == alice_entry["hash"]
|
||||
assert updated["alice2:hash-alice"]["uploaded_at"] == alice_entry["uploaded_at"]
|
||||
assert updated["alice2:hash-alice"]["last_accessed"] == alice_entry["last_accessed"]
|
||||
assert updated["bob:hash-bob"]["owner"] == "bob"
|
||||
|
||||
assert handler.resolve_upload(alice_id, owner="alice2")["id"] == alice_id
|
||||
assert handler.resolve_upload(alice_id, owner="alice") is None
|
||||
|
||||
|
||||
def test_rename_owner_preserves_rows_when_target_key_collides(tmp_path):
|
||||
handler = _make_handler(tmp_path)
|
||||
migrated_id = "c" * 32 + ".txt"
|
||||
existing_id = "d" * 32 + ".txt"
|
||||
migrated = _entry(handler, "alice", "same-hash", migrated_id)
|
||||
existing = _entry(handler, "alice2", "same-hash", existing_id)
|
||||
unrelated = _entry(handler, "carol", "other-hash", "e" * 32 + ".txt")
|
||||
handler._atomic_write_json(
|
||||
_db_path(handler),
|
||||
{
|
||||
"alice:same-hash": migrated,
|
||||
"alice2:same-hash": existing,
|
||||
"carol:other-hash": unrelated,
|
||||
},
|
||||
)
|
||||
|
||||
renamed = handler.rename_owner("alice", "alice2")
|
||||
|
||||
assert renamed == 1
|
||||
updated = json.loads(Path(_db_path(handler)).read_text(encoding="utf-8"))
|
||||
assert len(updated) == 3
|
||||
assert updated["alice2:same-hash"]["id"] == existing_id
|
||||
migrated_key = f"alice2:same-hash:{migrated_id}"
|
||||
assert updated[migrated_key]["id"] == migrated_id
|
||||
assert updated[migrated_key]["owner"] == "alice2"
|
||||
assert updated[migrated_key]["path"] == migrated["path"]
|
||||
assert updated["carol:other-hash"] == unrelated
|
||||
Reference in New Issue
Block a user