mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-20 19:55:25 -04:00
fix(auth): clean up rename and null-owner ownership (#4340)
This commit is contained in:
@@ -322,6 +322,47 @@ class PersonalDocsManager:
|
||||
else:
|
||||
logger.info(f"Directory not in index: {directory}")
|
||||
|
||||
def rename_directory(self, old_directory: str, new_directory: str, *, path_map: Dict[str, str] = None):
|
||||
"""Rewrite tracked directory and excluded-file paths after an owner rename."""
|
||||
old_directory = os.path.abspath(old_directory)
|
||||
new_directory = os.path.abspath(new_directory)
|
||||
path_map = {os.path.abspath(k): os.path.abspath(v) for k, v in (path_map or {}).items()}
|
||||
|
||||
def rewrite(path: str) -> str:
|
||||
abs_path = os.path.abspath(path)
|
||||
mapped = path_map.get(abs_path)
|
||||
if mapped:
|
||||
return mapped
|
||||
if abs_path == old_directory:
|
||||
return new_directory
|
||||
if abs_path.startswith(old_directory + os.sep):
|
||||
return new_directory + abs_path[len(old_directory):]
|
||||
return abs_path
|
||||
|
||||
changed_dirs = False
|
||||
rewritten_dirs = []
|
||||
for directory in self.indexed_directories:
|
||||
rewritten = rewrite(directory)
|
||||
changed_dirs = changed_dirs or rewritten != os.path.abspath(directory)
|
||||
if rewritten not in rewritten_dirs:
|
||||
rewritten_dirs.append(rewritten)
|
||||
if changed_dirs:
|
||||
self.indexed_directories = rewritten_dirs
|
||||
self.save_directories()
|
||||
|
||||
changed_excluded = False
|
||||
rewritten_excluded = set()
|
||||
for path in self.excluded_files:
|
||||
rewritten = rewrite(path)
|
||||
changed_excluded = changed_excluded or rewritten != os.path.abspath(path)
|
||||
rewritten_excluded.add(rewritten)
|
||||
if changed_excluded:
|
||||
self.excluded_files = rewritten_excluded
|
||||
self._save_excluded()
|
||||
|
||||
if changed_dirs or changed_excluded:
|
||||
self.refresh_index()
|
||||
|
||||
def get_indexed_directories(self):
|
||||
"""Get the list of all indexed directories."""
|
||||
return self.indexed_directories.copy()
|
||||
|
||||
@@ -50,6 +50,23 @@ def _generate_doc_id(text: str, owner: str = "") -> str:
|
||||
return f"doc_{hashlib.sha256(key.encode('utf-8')).hexdigest()[:16]}"
|
||||
|
||||
|
||||
def _rewrite_owner_path(value: str, path_map: Dict[str, str], path_prefixes: List[tuple]) -> str:
|
||||
if not isinstance(value, str) or not value:
|
||||
return value
|
||||
abs_value = os.path.abspath(value)
|
||||
mapped = path_map.get(abs_value)
|
||||
if mapped:
|
||||
return mapped
|
||||
for old_prefix, new_prefix in path_prefixes:
|
||||
old_abs = os.path.abspath(old_prefix)
|
||||
new_abs = os.path.abspath(new_prefix)
|
||||
if abs_value == old_abs:
|
||||
return new_abs
|
||||
if abs_value.startswith(old_abs + os.sep):
|
||||
return new_abs + abs_value[len(old_abs):]
|
||||
return value
|
||||
|
||||
|
||||
class VectorRAG:
|
||||
"""RAG system using ChromaDB vector storage with hybrid search."""
|
||||
|
||||
@@ -250,6 +267,75 @@ class VectorRAG:
|
||||
"failed_count": len(docs) - len(valid),
|
||||
}
|
||||
|
||||
def rename_owner(
|
||||
self,
|
||||
old_owner: str,
|
||||
new_owner: str,
|
||||
*,
|
||||
path_map: Optional[Dict[str, str]] = None,
|
||||
path_prefixes: Optional[List[tuple]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Rewrite existing RAG metadata after an auth username rename."""
|
||||
if not self.healthy:
|
||||
return {"success": False, "updated_count": 0, "message": "Collection not initialized"}
|
||||
|
||||
old_owner = (old_owner or "").strip().lower()
|
||||
new_owner = (new_owner or "").strip().lower()
|
||||
if not old_owner or not new_owner or old_owner == new_owner:
|
||||
return {"success": True, "updated_count": 0, "message": "No owner rename needed"}
|
||||
|
||||
path_map = {os.path.abspath(k): os.path.abspath(v) for k, v in (path_map or {}).items()}
|
||||
path_prefixes = path_prefixes or []
|
||||
updated_ids = set()
|
||||
failed_count = 0
|
||||
|
||||
for lane_name, collection in self._collections_for_delete():
|
||||
try:
|
||||
results = collection.get(
|
||||
where={"owner": old_owner},
|
||||
include=["metadatas"],
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("rename_owner metadata scan failed in %s lane: %s", lane_name, e)
|
||||
failed_count += 1
|
||||
continue
|
||||
|
||||
ids = results.get("ids") or []
|
||||
metadatas = results.get("metadatas") or []
|
||||
if not ids:
|
||||
continue
|
||||
|
||||
new_metas = []
|
||||
selected_ids = []
|
||||
for doc_id, meta in zip(ids, metadatas):
|
||||
if not isinstance(meta, dict):
|
||||
continue
|
||||
next_meta = dict(meta)
|
||||
if str(next_meta.get("owner", "")).strip().lower() == old_owner:
|
||||
next_meta["owner"] = new_owner
|
||||
for key in ("source", "directory"):
|
||||
next_meta[key] = _rewrite_owner_path(next_meta.get(key), path_map, path_prefixes)
|
||||
selected_ids.append(doc_id)
|
||||
new_metas.append(next_meta)
|
||||
|
||||
if not selected_ids:
|
||||
continue
|
||||
|
||||
try:
|
||||
collection.update(ids=selected_ids, metadatas=new_metas)
|
||||
updated_ids.update(selected_ids)
|
||||
except Exception as e:
|
||||
logger.warning("rename_owner metadata update failed in %s lane: %s", lane_name, e)
|
||||
failed_count += len(selected_ids)
|
||||
|
||||
success = failed_count == 0
|
||||
return {
|
||||
"success": success,
|
||||
"updated_count": len(updated_ids),
|
||||
"failed_count": failed_count,
|
||||
"message": f"Updated {len(updated_ids)} RAG chunk(s)",
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Search — hybrid: vector similarity + keyword overlap
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user