fix: optimize upload manifest performance and fix owner rename bug

This commit is contained in:
Muhammad-Ikhwan-Fathulloh
2026-06-16 23:11:30 +07:00
parent 37da04e8b5
commit b3ed60e95a
+48 -13
View File
@@ -112,6 +112,10 @@ class UploadHandler:
except Exception:
self.file_detector = None
logger.warning("python-magic not available, falling back to basic detection")
# In-memory index cache to avoid O(N) disk I/O on every request
self._index_cache: Optional[Dict[str, Any]] = None
self._index_mtime: float = 0.0
def inside_base_dir(self, path: str) -> bool:
"""Check if path is inside base directory"""
@@ -317,6 +321,13 @@ class UploadHandler:
except OSError:
pass
os.replace(tmp, path)
# Update cache if this is the main index
if path.endswith("uploads.json"):
self._index_cache = data
try:
self._index_mtime = os.path.getmtime(path)
except OSError:
self._index_mtime = time.time()
except Exception:
try:
os.unlink(tmp)
@@ -325,22 +336,40 @@ class UploadHandler:
raise
def _load_upload_index(self) -> Dict[str, Any]:
"""Load the upload index from disk/cache. Uses mtime-based validation
to avoid redundant parsing on hot paths.
"""
uploads_db_path = os.path.join(self.upload_dir, "uploads.json")
if not os.path.exists(uploads_db_path):
self._index_cache = {}
self._index_mtime = 0.0
return {}
# Check cache validity
try:
mtime = os.path.getmtime(uploads_db_path)
if self._index_cache is not None and mtime <= self._index_mtime:
return self._index_cache
except OSError:
mtime = 0.0
# Try the live file first, fall back to the .bak sibling if the
# live file is truncated/corrupted (e.g. a previous writer was
# SIGKILL'd mid-rename before the new code path was deployed).
# live file is truncated/corrupted.
for candidate in (uploads_db_path, uploads_db_path + ".bak"):
if not os.path.exists(candidate):
continue
try:
with open(candidate, "r", encoding="utf-8") as f:
data = json.load(f)
return data if isinstance(data, dict) else {}
if isinstance(data, dict):
self._index_cache = data
self._index_mtime = mtime
return data
except Exception as e:
logger.warning(f"Failed to read uploads database ({candidate}): {e}")
continue
self._index_cache = {}
return {}
def get_upload_info(self, upload_id: str) -> Optional[Dict[str, Any]]:
@@ -353,14 +382,23 @@ class UploadHandler:
return None
def _renamed_upload_index_key(self, key: str, info: Dict[str, Any], old_owner: str, new_owner: str) -> str:
"""Return the storage key to use after renaming an owned upload row."""
if isinstance(key, str) and ":" in key:
owner_part, rest = key.split(":", 1)
if owner_part.strip().lower() == old_owner:
return f"{new_owner}:{rest}"
"""Return the storage key to use after renaming an owned upload row.
Harden against usernames with colons by using the explicit metadata
fields instead of trying to parse the key string.
"""
file_hash = info.get("hash")
if file_hash:
return f"{new_owner}:{file_hash}"
# Fallback for rows without an explicit hash (should not happen in modern Odysseus)
if isinstance(key, str) and ":" in key:
# Join all but the last part if there are multiple colons
parts = key.rsplit(":", 1)
if len(parts) == 2:
owner_part, rest = parts[0], parts[1]
if owner_part.strip().lower() == old_owner.strip().lower():
return f"{new_owner}:{rest}"
return key
def _unique_upload_index_key(self, base_key: str, used_keys: set, reserved_keys: set, info: Dict[str, Any]) -> str:
@@ -543,11 +581,8 @@ class UploadHandler:
total_size = 0
file_types = {}
uploads_db_path = os.path.join(self.upload_dir, "uploads.json")
if os.path.exists(uploads_db_path):
with open(uploads_db_path, "r", encoding="utf-8") as f:
files = json.load(f)
files = self._load_upload_index()
if files:
total_files = len(files)
for file_info in files.values():
total_size += file_info.get("size", 0)