fix: optimize upload manifest performance and fix owner rename bug

This commit is contained in:
Muhammad-Ikhwan-Fathulloh
2026-06-16 23:11:30 +07:00
parent 37da04e8b5
commit b3ed60e95a
+48 -13
View File
@@ -113,6 +113,10 @@ class UploadHandler:
self.file_detector = None self.file_detector = None
logger.warning("python-magic not available, falling back to basic detection") logger.warning("python-magic not available, falling back to basic detection")
# In-memory index cache to avoid O(N) disk I/O on every request
self._index_cache: Optional[Dict[str, Any]] = None
self._index_mtime: float = 0.0
def inside_base_dir(self, path: str) -> bool: def inside_base_dir(self, path: str) -> bool:
"""Check if path is inside base directory""" """Check if path is inside base directory"""
base = os.path.realpath(self.base_dir) base = os.path.realpath(self.base_dir)
@@ -317,6 +321,13 @@ class UploadHandler:
except OSError: except OSError:
pass pass
os.replace(tmp, path) os.replace(tmp, path)
# Update cache if this is the main index
if path.endswith("uploads.json"):
self._index_cache = data
try:
self._index_mtime = os.path.getmtime(path)
except OSError:
self._index_mtime = time.time()
except Exception: except Exception:
try: try:
os.unlink(tmp) os.unlink(tmp)
@@ -325,22 +336,40 @@ class UploadHandler:
raise raise
def _load_upload_index(self) -> Dict[str, Any]: def _load_upload_index(self) -> Dict[str, Any]:
"""Load the upload index from disk/cache. Uses mtime-based validation
to avoid redundant parsing on hot paths.
"""
uploads_db_path = os.path.join(self.upload_dir, "uploads.json") uploads_db_path = os.path.join(self.upload_dir, "uploads.json")
if not os.path.exists(uploads_db_path): if not os.path.exists(uploads_db_path):
self._index_cache = {}
self._index_mtime = 0.0
return {} return {}
# Check cache validity
try:
mtime = os.path.getmtime(uploads_db_path)
if self._index_cache is not None and mtime <= self._index_mtime:
return self._index_cache
except OSError:
mtime = 0.0
# Try the live file first, fall back to the .bak sibling if the # Try the live file first, fall back to the .bak sibling if the
# live file is truncated/corrupted (e.g. a previous writer was # live file is truncated/corrupted.
# SIGKILL'd mid-rename before the new code path was deployed).
for candidate in (uploads_db_path, uploads_db_path + ".bak"): for candidate in (uploads_db_path, uploads_db_path + ".bak"):
if not os.path.exists(candidate): if not os.path.exists(candidate):
continue continue
try: try:
with open(candidate, "r", encoding="utf-8") as f: with open(candidate, "r", encoding="utf-8") as f:
data = json.load(f) data = json.load(f)
return data if isinstance(data, dict) else {} if isinstance(data, dict):
self._index_cache = data
self._index_mtime = mtime
return data
except Exception as e: except Exception as e:
logger.warning(f"Failed to read uploads database ({candidate}): {e}") logger.warning(f"Failed to read uploads database ({candidate}): {e}")
continue continue
self._index_cache = {}
return {} return {}
def get_upload_info(self, upload_id: str) -> Optional[Dict[str, Any]]: def get_upload_info(self, upload_id: str) -> Optional[Dict[str, Any]]:
@@ -353,14 +382,23 @@ class UploadHandler:
return None return None
def _renamed_upload_index_key(self, key: str, info: Dict[str, Any], old_owner: str, new_owner: str) -> str: def _renamed_upload_index_key(self, key: str, info: Dict[str, Any], old_owner: str, new_owner: str) -> str:
"""Return the storage key to use after renaming an owned upload row.""" """Return the storage key to use after renaming an owned upload row.
if isinstance(key, str) and ":" in key:
owner_part, rest = key.split(":", 1) Harden against usernames with colons by using the explicit metadata
if owner_part.strip().lower() == old_owner: fields instead of trying to parse the key string.
return f"{new_owner}:{rest}" """
file_hash = info.get("hash") file_hash = info.get("hash")
if file_hash: if file_hash:
return f"{new_owner}:{file_hash}" return f"{new_owner}:{file_hash}"
# Fallback for rows without an explicit hash (should not happen in modern Odysseus)
if isinstance(key, str) and ":" in key:
# Join all but the last part if there are multiple colons
parts = key.rsplit(":", 1)
if len(parts) == 2:
owner_part, rest = parts[0], parts[1]
if owner_part.strip().lower() == old_owner.strip().lower():
return f"{new_owner}:{rest}"
return key return key
def _unique_upload_index_key(self, base_key: str, used_keys: set, reserved_keys: set, info: Dict[str, Any]) -> str: def _unique_upload_index_key(self, base_key: str, used_keys: set, reserved_keys: set, info: Dict[str, Any]) -> str:
@@ -543,11 +581,8 @@ class UploadHandler:
total_size = 0 total_size = 0
file_types = {} file_types = {}
uploads_db_path = os.path.join(self.upload_dir, "uploads.json") files = self._load_upload_index()
if os.path.exists(uploads_db_path): if files:
with open(uploads_db_path, "r", encoding="utf-8") as f:
files = json.load(f)
total_files = len(files) total_files = len(files)
for file_info in files.values(): for file_info in files.values():
total_size += file_info.get("size", 0) total_size += file_info.get("size", 0)