mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 02:05:22 -04:00
Re-enable VectorRAG init with lazy retry
Personal Docs (POST /api/personal/add_directory and friends) currently returns HTTP 503 'RAG system is not available' for every request, because get_rag_manager() and rag_manager are both hardcoded off. The disablement was added when chromadb 1.4.1 / pydantic 2.12 were mutually incompatible at the client init layer. That compat issue is fixed in the current pins (chromadb 1.5.x + pydantic 2.13.x). Verified by calling the original lazy initializer against a running chroma server — VectorRAG instantiates, reports healthy=True, and indexes successfully. This change: 1. src/rag_singleton.py — replace the hardcoded `return None` in get_rag_manager() with the original lazy init body. Keeps the 30s retry-throttle so a missing chroma server doesn't busy-retry on every request. 2. app.py — replace the parallel `rag_manager = None` / `rag_available = False` hardcoding with a get_rag_manager() call. Logs the resolved state at startup. If chroma isn't reachable yet, rag_manager stays None and personal-doc routes still return 503, but the *next* request will hit the retry-throttle path in get_rag_manager() and try to init again. Doesn't touch requirements.txt. Repos using docker-compose get chroma automatically; manual installs that want Personal Docs to work still need to either pip install chromadb (full package) and run `chroma run` or point at an external chroma instance via env. That can be a follow-up README / requirements-optional note.
This commit is contained in:
@@ -355,15 +355,26 @@ async def serve_generated_image(filename: str, request: Request):
|
|||||||
from services.youtube import init_youtube
|
from services.youtube import init_youtube
|
||||||
init_youtube()
|
init_youtube()
|
||||||
|
|
||||||
# ========= RAG (vector document RAG — DISABLED) =========
|
# ========= RAG (vector document RAG) =========
|
||||||
# VectorRAG (ChromaDB-backed personal-document semantic search) is unused
|
# VectorRAG (ChromaDB-backed personal-document semantic search). Initialized
|
||||||
# (0 directories ever indexed) and its chromadb 1.4.1 / pydantic 2.12 client
|
# lazily via get_rag_manager() — returns None if ChromaDB isn't reachable
|
||||||
# can't even instantiate — it threw at init and cost ~30s of startup waiting on
|
# (no server running on the configured host:port), in which case personal-doc
|
||||||
# the embedding probe. Disabled. All callers already guard on rag_available /
|
# routes return a clean 503 instead of busy-retrying every request.
|
||||||
# `if rag_manager`, so personal-doc routes degrade cleanly.
|
#
|
||||||
rag_manager = None
|
# Note: this was previously hardcoded off because chromadb 1.4.1 / pydantic
|
||||||
rag_available = False
|
# 2.12 were mutually incompatible at the time. With the current pins
|
||||||
logger.info("Vector document RAG disabled (unused)")
|
# (chromadb 1.5.x + pydantic 2.13.x) the init works and Personal Docs
|
||||||
|
# (POST /api/personal/add_directory etc.) is functional again.
|
||||||
|
from src.rag_singleton import get_rag_manager
|
||||||
|
rag_manager = get_rag_manager()
|
||||||
|
rag_available = rag_manager is not None
|
||||||
|
if rag_available:
|
||||||
|
logger.info("Vector document RAG initialized")
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"Vector document RAG not available at startup "
|
||||||
|
"(ChromaDB may not be reachable yet — routes will retry lazily)"
|
||||||
|
)
|
||||||
|
|
||||||
# ========= IMPORT CONFIG =========
|
# ========= IMPORT CONFIG =========
|
||||||
from src.config import config
|
from src.config import config
|
||||||
|
|||||||
+13
-8
@@ -12,16 +12,21 @@ rag_instance = None
|
|||||||
_last_attempt = 0.0
|
_last_attempt = 0.0
|
||||||
_RETRY_INTERVAL = 30 # seconds between re-init attempts
|
_RETRY_INTERVAL = 30 # seconds between re-init attempts
|
||||||
|
|
||||||
|
|
||||||
def get_rag_manager():
|
def get_rag_manager():
|
||||||
"""Disabled: vector document RAG (VectorRAG/ChromaDB) is unused and its
|
"""Lazy ChromaDB-backed VectorRAG initializer.
|
||||||
client is incompatible with the installed pydantic. Return None so personal-
|
|
||||||
doc routes fall back to non-vector behavior instead of re-attempting (and
|
|
||||||
re-hanging on) a broken ChromaDB init every 30s."""
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
Returns the VectorRAG instance on first successful init, None if ChromaDB
|
||||||
|
isn't reachable / available. Failed init attempts are throttled to once
|
||||||
|
per _RETRY_INTERVAL seconds so a missing ChromaDB doesn't busy-retry on
|
||||||
|
every request — callers (personal-doc routes etc.) get None back and
|
||||||
|
return a clean 503 to the user instead.
|
||||||
|
|
||||||
def _get_rag_manager_legacy():
|
Historical note: this used to be hardcoded to ``return None`` with a
|
||||||
"""Original lazy initializer, kept for reference / easy re-enable."""
|
comment about chromadb 1.4.1 / pydantic 2.12 being mutually incompatible.
|
||||||
|
That compat issue is resolved in current pinned versions
|
||||||
|
(chromadb 1.5.x + pydantic 2.13.x), so the real initializer is back.
|
||||||
|
"""
|
||||||
global rag_instance, _last_attempt
|
global rag_instance, _last_attempt
|
||||||
|
|
||||||
if rag_instance is not None:
|
if rag_instance is not None:
|
||||||
@@ -29,7 +34,7 @@ def _get_rag_manager_legacy():
|
|||||||
|
|
||||||
now = time.monotonic()
|
now = time.monotonic()
|
||||||
if now - _last_attempt < _RETRY_INTERVAL:
|
if now - _last_attempt < _RETRY_INTERVAL:
|
||||||
return None # too soon to retry
|
return None # too soon to retry — last attempt failed
|
||||||
|
|
||||||
_last_attempt = now
|
_last_attempt = now
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user