diff --git a/app.py b/app.py index 365eee94a..755fc252e 100644 --- a/app.py +++ b/app.py @@ -498,6 +498,7 @@ app.state.session_manager = session_manager memory_manager = components["memory_manager"] memory_vector = components.get("memory_vector") upload_handler = components["upload_handler"] +app.state.upload_handler = upload_handler personal_docs_mgr = components["personal_docs_manager"] api_key_manager = components["api_key_manager"] preset_manager = components["preset_manager"] diff --git a/routes/auth_routes.py b/routes/auth_routes.py index b9158c93a..a9cc8ecb1 100644 --- a/routes/auth_routes.py +++ b/routes/auth_routes.py @@ -416,6 +416,17 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: except Exception as e: logger.warning("Failed to rename memory.json owner references %s -> %s: %s", old_username, new_username, e) + # uploads.json: upload rows use owner metadata for access checks and + # owner-prefixed index keys for dedupe. Rename both so attachments keep + # resolving after the account username changes. + try: + upload_handler = getattr(request.app.state, "upload_handler", None) + rename_owner = getattr(upload_handler, "rename_owner", None) + if callable(rename_owner): + rename_owner(old_username, new_username) + except Exception as e: + logger.warning("Failed to rename upload owner references %s -> %s: %s", old_username, new_username, e) + # skills: SKILL.md frontmatter carries owner: ; the usage # sidecar (_usage.json) keys entries as owner::skill-name. Both must # be updated or the renamed user's Skills panel goes empty. diff --git a/src/upload_handler.py b/src/upload_handler.py index 95bce306d..4c4e526bc 100644 --- a/src/upload_handler.py +++ b/src/upload_handler.py @@ -352,6 +352,86 @@ class UploadHandler: return dict(info) return None + def _renamed_upload_index_key(self, key: str, info: Dict[str, Any], old_owner: str, new_owner: str) -> str: + """Return the storage key to use after renaming an owned upload row.""" + if isinstance(key, str) and ":" in key: + owner_part, rest = key.split(":", 1) + if owner_part.strip().lower() == old_owner: + return f"{new_owner}:{rest}" + file_hash = info.get("hash") + if file_hash: + return f"{new_owner}:{file_hash}" + return key + + def _unique_upload_index_key(self, base_key: str, used_keys: set, reserved_keys: set, info: Dict[str, Any]) -> str: + """Choose a deterministic collision key without overwriting an existing row.""" + if base_key not in used_keys and base_key not in reserved_keys: + return base_key + + upload_id = str(info.get("id") or "renamed").strip() or "renamed" + candidate = f"{base_key}:{upload_id}" + if candidate not in used_keys and candidate not in reserved_keys: + return candidate + + index = 2 + while True: + candidate = f"{base_key}:{upload_id}:{index}" + if candidate not in used_keys and candidate not in reserved_keys: + return candidate + index += 1 + + def rename_owner(self, old_owner: str, new_owner: str) -> int: + """Rename upload metadata ownership from old_owner to new_owner. + + Upload rows are keyed by owner-qualified hashes for dedupe and also + carry an `owner` field for access checks. Both must move together when + usernames change. + """ + old_owner_normalized = str(old_owner or "").strip().lower() + new_owner = str(new_owner or "").strip() + if not old_owner_normalized or not new_owner: + return 0 + if old_owner_normalized == new_owner.lower(): + return 0 + + uploads_db_path = os.path.join(self.upload_dir, "uploads.json") + with self._index_lock: + current = self._load_upload_index() + if not current: + return 0 + + updated = {} + renamed = 0 + original_keys = set(current.keys()) + + for key, info in current.items(): + new_key = key + new_info = info + if isinstance(info, dict) and str(info.get("owner", "")).strip().lower() == old_owner_normalized: + new_info = dict(info) + new_info["owner"] = new_owner + base_key = self._renamed_upload_index_key(key, new_info, old_owner_normalized, new_owner) + new_key = self._unique_upload_index_key( + base_key, + set(updated.keys()), + original_keys - {key}, + new_info, + ) + if new_key != base_key: + logger.warning( + "Upload owner rename key collision for %s -> %s at %s; preserving row as %s", + old_owner_normalized, + new_owner, + base_key, + new_key, + ) + renamed += 1 + updated[new_key] = new_info + + if renamed: + self._atomic_write_json(uploads_db_path, updated) + return renamed + def _find_upload_path(self, upload_id: str) -> Optional[str]: """Find an upload file by ID while staying inside upload_dir.""" if not self.validate_upload_id(upload_id): diff --git a/tests/test_rename_user_owner_sync.py b/tests/test_rename_user_owner_sync.py index e5e89b4dc..721496bc3 100644 --- a/tests/test_rename_user_owner_sync.py +++ b/tests/test_rename_user_owner_sync.py @@ -1,4 +1,4 @@ -"""Renaming a user must update all three owner caches, not just the SQL DB. +"""Renaming a user must update non-SQL owner stores, not just the SQL DB. The DB owner-rename loop in the rename_user route updates every SQL-backed owner column, but three file-backed / in-memory stores are left stale: @@ -17,6 +17,9 @@ owner column, but three file-backed / in-memory stores are left stale: 4. data/memory.json — a flat array where every entry has an `owner` field; memory_manager.load(owner=user) filters on it, so all memories vanish. +5. data/uploads/uploads.json — each upload row carries an `owner` field and + owner-prefixed index key; stale metadata denies renamed users their uploads. + Regression coverage: these bugs are invisible in unit tests that mock the DB loop but don't exercise the file/cache patches added to the route. """ @@ -67,11 +70,12 @@ def rename_endpoint(monkeypatch, tmp_path): return _route(ar.setup_auth_routes(am), "rename_user"), am, tmp_path -def _request(tmp_path, session_manager=None, token="t", research_handler=None): +def _request(tmp_path, session_manager=None, token="t", research_handler=None, upload_handler=None): state = SimpleNamespace( invalidate_token_cache=lambda: None, session_manager=session_manager, research_handler=research_handler, + upload_handler=upload_handler, ) return SimpleNamespace( cookies={"odysseus_session": token}, @@ -415,7 +419,56 @@ def test_rename_no_memory_json_does_not_crash(rename_endpoint): # --------------------------------------------------------------------------- -# 4. Skills (SKILL.md frontmatter + _usage.json sidecar) +# 4. uploads.json +# --------------------------------------------------------------------------- + +def test_rename_updates_upload_metadata_owner(rename_endpoint): + endpoint, _am, tmp_path = rename_endpoint + from src.upload_handler import UploadHandler + + upload_dir = tmp_path / "uploads" + dated = upload_dir / "2026" / "06" / "09" + dated.mkdir(parents=True) + upload_id = "a" * 32 + ".txt" + upload_path = dated / upload_id + upload_path.write_text("alice private upload", encoding="utf-8") + handler = UploadHandler(str(tmp_path), str(upload_dir)) + handler._atomic_write_json( + str(upload_dir / "uploads.json"), + { + "alice:hash-alice": { + "id": upload_id, + "path": str(upload_path), + "mime": "text/plain", + "size": upload_path.stat().st_size, + "name": "note.txt", + "hash": "hash-alice", + "original_name": "note.txt", + "uploaded_at": "2026-06-09T10:00:00", + "last_accessed": "2026-06-09T10:00:00", + "client_ip": "127.0.0.1", + "owner": "alice", + }, + }, + ) + + asyncio.run( + endpoint( + "alice", + SimpleNamespace(username="alice2"), + _request(tmp_path, upload_handler=handler), + ) + ) + + updated = json.loads((upload_dir / "uploads.json").read_text(encoding="utf-8")) + assert "alice:hash-alice" not in updated + assert updated["alice2:hash-alice"]["owner"] == "alice2" + assert handler.resolve_upload(upload_id, owner="alice2")["path"] == str(upload_path) + assert handler.resolve_upload(upload_id, owner="alice") is None + + +# --------------------------------------------------------------------------- +# 5. Skills (SKILL.md frontmatter + _usage.json sidecar) # --------------------------------------------------------------------------- _SKILL_MD = """\ @@ -522,7 +575,7 @@ def test_rename_usage_keys_case_insensitive(rename_endpoint): # --------------------------------------------------------------------------- -# 5. Rollback: auth rename must be restored if SQL owner migration fails +# 6. Rollback: auth rename must be restored if SQL owner migration fails # --------------------------------------------------------------------------- def test_owner_migration_failure_rolls_back_auth_rename(monkeypatch, tmp_path): @@ -583,7 +636,7 @@ def test_self_rename_owner_migration_failure_rolls_back_auth_session(monkeypatch # --------------------------------------------------------------------------- -# 6. P1 regression: rejected auth rename must not mutate file-backed stores +# 7. P1 regression: rejected auth rename must not mutate file-backed stores # --------------------------------------------------------------------------- def test_rejected_rename_does_not_mutate_files(monkeypatch, tmp_path): diff --git a/tests/test_upload_handler_rename_owner.py b/tests/test_upload_handler_rename_owner.py new file mode 100644 index 000000000..08ce60308 --- /dev/null +++ b/tests/test_upload_handler_rename_owner.py @@ -0,0 +1,101 @@ +import json +import os +from pathlib import Path + +from src.upload_handler import UploadHandler + + +def _make_handler(tmp_path: Path) -> UploadHandler: + base = tmp_path / "base" + upload = tmp_path / "uploads" + base.mkdir() + upload.mkdir() + return UploadHandler(base_dir=str(base), upload_dir=str(upload)) + + +def _db_path(handler: UploadHandler) -> str: + return os.path.join(handler.upload_dir, "uploads.json") + + +def _write_upload_file(handler: UploadHandler, file_id: str, content: bytes = b"content") -> str: + upload_day = Path(handler.upload_dir) / "2026" / "06" / "09" + upload_day.mkdir(parents=True, exist_ok=True) + path = upload_day / file_id + path.write_bytes(content) + return str(path) + + +def _entry(handler: UploadHandler, owner: str, file_hash: str, file_id: str) -> dict: + path = _write_upload_file(handler, file_id, content=f"{owner}:{file_hash}".encode()) + return { + "id": file_id, + "path": path, + "mime": "text/plain", + "size": os.path.getsize(path), + "name": f"{file_id}.txt", + "hash": file_hash, + "original_name": f"{file_id}.txt", + "uploaded_at": "2026-06-09T10:00:00", + "last_accessed": "2026-06-09T10:00:00", + "client_ip": "127.0.0.1", + "owner": owner, + } + + +def test_rename_owner_updates_upload_metadata_key_and_resolver(tmp_path): + handler = _make_handler(tmp_path) + alice_id = "a" * 32 + ".txt" + alice_entry = _entry(handler, "Alice", "hash-alice", alice_id) + bob_entry = _entry(handler, "bob", "hash-bob", "b" * 32 + ".txt") + handler._atomic_write_json( + _db_path(handler), + { + "Alice:hash-alice": alice_entry, + "bob:hash-bob": bob_entry, + }, + ) + + renamed = handler.rename_owner("alice", "alice2") + + assert renamed == 1 + updated = json.loads(Path(_db_path(handler)).read_text(encoding="utf-8")) + assert "Alice:hash-alice" not in updated + assert "alice2:hash-alice" in updated + assert updated["alice2:hash-alice"]["owner"] == "alice2" + assert updated["alice2:hash-alice"]["path"] == alice_entry["path"] + assert updated["alice2:hash-alice"]["hash"] == alice_entry["hash"] + assert updated["alice2:hash-alice"]["uploaded_at"] == alice_entry["uploaded_at"] + assert updated["alice2:hash-alice"]["last_accessed"] == alice_entry["last_accessed"] + assert updated["bob:hash-bob"]["owner"] == "bob" + + assert handler.resolve_upload(alice_id, owner="alice2")["id"] == alice_id + assert handler.resolve_upload(alice_id, owner="alice") is None + + +def test_rename_owner_preserves_rows_when_target_key_collides(tmp_path): + handler = _make_handler(tmp_path) + migrated_id = "c" * 32 + ".txt" + existing_id = "d" * 32 + ".txt" + migrated = _entry(handler, "alice", "same-hash", migrated_id) + existing = _entry(handler, "alice2", "same-hash", existing_id) + unrelated = _entry(handler, "carol", "other-hash", "e" * 32 + ".txt") + handler._atomic_write_json( + _db_path(handler), + { + "alice:same-hash": migrated, + "alice2:same-hash": existing, + "carol:other-hash": unrelated, + }, + ) + + renamed = handler.rename_owner("alice", "alice2") + + assert renamed == 1 + updated = json.loads(Path(_db_path(handler)).read_text(encoding="utf-8")) + assert len(updated) == 3 + assert updated["alice2:same-hash"]["id"] == existing_id + migrated_key = f"alice2:same-hash:{migrated_id}" + assert updated[migrated_key]["id"] == migrated_id + assert updated[migrated_key]["owner"] == "alice2" + assert updated[migrated_key]["path"] == migrated["path"] + assert updated["carol:other-hash"] == unrelated