mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
Scope vision model resolution by owner (#3009)
This commit is contained in:
@@ -198,7 +198,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
|
||||
|
||||
title = os.path.splitext(meta.get("original_name") or meta.get("name") or upload_id)[0]
|
||||
try:
|
||||
body_text = strip_pdf_content_marker(_process_pdf(pdf_path))
|
||||
body_text = strip_pdf_content_marker(_process_pdf(pdf_path, owner=user))
|
||||
except Exception:
|
||||
body_text = None
|
||||
|
||||
@@ -437,7 +437,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
|
||||
raise HTTPException(404, "Source PDF could not be located")
|
||||
|
||||
try:
|
||||
body_text = strip_pdf_content_marker(_process_pdf(pdf_path))
|
||||
body_text = strip_pdf_content_marker(_process_pdf(pdf_path, owner=user))
|
||||
except Exception as e:
|
||||
logger.error(f"extract_pdf_text failed for {pdf_path}: {e}")
|
||||
raise HTTPException(500, f"Extraction failed: {e}")
|
||||
@@ -1156,7 +1156,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
|
||||
settings = _load_vl_settings()
|
||||
vl_model = settings.get("vision_model", "")
|
||||
try:
|
||||
url, model_id, headers = _resolve_vl_model(vl_model)
|
||||
url, model_id, headers = _resolve_vl_model(vl_model, owner=user)
|
||||
except Exception as e:
|
||||
raise HTTPException(503, f"No vision model available: {e}")
|
||||
|
||||
|
||||
@@ -1760,7 +1760,7 @@ def setup_gallery_routes() -> APIRouter:
|
||||
return {"error": "Vision is disabled — enable it in Settings → Vision"}
|
||||
configured = vl_settings.get("vision_model", "")
|
||||
try:
|
||||
chat_url, model_name, headers = _resolve_vl_model(configured)
|
||||
chat_url, model_name, headers = _resolve_vl_model(configured, owner=user)
|
||||
except ValueError:
|
||||
return {"error": "No vision model configured — set one in Settings → Vision"}
|
||||
if not chat_url:
|
||||
|
||||
@@ -371,7 +371,7 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
|
||||
tmp.write(content)
|
||||
tmp_path = tmp.name
|
||||
try:
|
||||
text = _process_pdf(tmp_path)
|
||||
text = _process_pdf(tmp_path, owner=_owner(request))
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
else:
|
||||
|
||||
@@ -225,7 +225,7 @@ def setup_upload_routes(upload_handler):
|
||||
logger.warning(f"Vision cache read failed for {file_id}: {e}")
|
||||
from src.document_processor import analyze_image_with_vl
|
||||
try:
|
||||
text = analyze_image_with_vl(path) or ""
|
||||
text = analyze_image_with_vl(path, owner=current_user) or ""
|
||||
except Exception as e:
|
||||
logger.error(f"Vision analysis failed for {file_id}: {e}")
|
||||
raise HTTPException(500, f"Vision analysis failed: {e}")
|
||||
|
||||
+1
-1
@@ -229,7 +229,7 @@ class ChatHandler:
|
||||
except Exception:
|
||||
vl_desc = None
|
||||
if not vl_desc:
|
||||
vl_result = analyze_image_with_vl_result(file_info["path"])
|
||||
vl_result = analyze_image_with_vl_result(file_info["path"], owner=owner)
|
||||
vl_desc = vl_result.get("text", "")
|
||||
vl_model = vl_result.get("model", "")
|
||||
if vl_desc and not vl_desc.startswith("["):
|
||||
|
||||
+12
-12
@@ -109,7 +109,7 @@ def _process_text_file(path: str) -> str:
|
||||
return result
|
||||
|
||||
|
||||
def _process_pdf(path: str) -> str:
|
||||
def _process_pdf(path: str, owner: str | None = None) -> str:
|
||||
"""Process PDF file with text extraction (pypdf). Uses VL model for image-heavy pages."""
|
||||
try:
|
||||
from pypdf import PdfReader
|
||||
@@ -133,7 +133,7 @@ def _process_pdf(path: str) -> str:
|
||||
temp_img_path = tmp.name
|
||||
try:
|
||||
img.image.save(temp_img_path, "PNG") # pypdf -> PIL image
|
||||
ocr_text = analyze_image_with_vl(temp_img_path)
|
||||
ocr_text = analyze_image_with_vl(temp_img_path, owner=owner)
|
||||
if ocr_text and "unavailable" not in ocr_text.lower():
|
||||
pdf_text += f"\n\n[Page {page_num + 1} image {img_index + 1} text]: {ocr_text}"
|
||||
finally:
|
||||
@@ -254,7 +254,7 @@ def _load_vl_settings() -> dict:
|
||||
return {}
|
||||
|
||||
|
||||
def _resolve_vl_model(configured: str) -> tuple:
|
||||
def _resolve_vl_model(configured: str, owner: str | None = None) -> tuple:
|
||||
"""Resolve the vision model to (url, model_id, headers).
|
||||
|
||||
Uses admin-configured model if set, otherwise tries auto-detection
|
||||
@@ -263,7 +263,7 @@ def _resolve_vl_model(configured: str) -> tuple:
|
||||
from src.ai_interaction import _resolve_model
|
||||
|
||||
if configured:
|
||||
return _resolve_model(configured)
|
||||
return _resolve_model(configured, owner=owner)
|
||||
|
||||
# Auto-detect: try known vision-capable models in priority order
|
||||
candidates = [
|
||||
@@ -274,14 +274,14 @@ def _resolve_vl_model(configured: str) -> tuple:
|
||||
]
|
||||
for candidate in candidates:
|
||||
try:
|
||||
return _resolve_model(candidate)
|
||||
return _resolve_model(candidate, owner=owner)
|
||||
except (ValueError, Exception):
|
||||
continue
|
||||
|
||||
raise ValueError("No vision model available")
|
||||
|
||||
|
||||
def analyze_image_with_vl_result(image_path: str) -> dict:
|
||||
def analyze_image_with_vl_result(image_path: str, owner: str | None = None) -> dict:
|
||||
"""Analyze an image and return both text and the model that produced it."""
|
||||
logger.info(f"Analyzing image with VL model: {image_path}")
|
||||
try:
|
||||
@@ -291,7 +291,7 @@ def analyze_image_with_vl_result(image_path: str) -> dict:
|
||||
vl_model = settings.get("vision_model", "")
|
||||
|
||||
try:
|
||||
url, model_id, headers = _resolve_vl_model(vl_model)
|
||||
url, model_id, headers = _resolve_vl_model(vl_model, owner=owner)
|
||||
except ValueError:
|
||||
return {"text": "[No vision model configured — set one in Settings → Vision]", "model": vl_model or ""}
|
||||
|
||||
@@ -316,7 +316,7 @@ def analyze_image_with_vl_result(image_path: str) -> dict:
|
||||
# — same shape as task/chat but its own list (`vision_model_fallbacks`).
|
||||
try:
|
||||
from src.endpoint_resolver import resolve_vision_fallback_candidates
|
||||
_vl_candidates = [(url, model_id, headers)] + resolve_vision_fallback_candidates()
|
||||
_vl_candidates = [(url, model_id, headers)] + resolve_vision_fallback_candidates(owner=owner)
|
||||
except Exception:
|
||||
_vl_candidates = [(url, model_id, headers)]
|
||||
|
||||
@@ -338,9 +338,9 @@ def analyze_image_with_vl_result(image_path: str) -> dict:
|
||||
return {"text": "[VL model unavailable - image not analyzed]", "model": ""}
|
||||
|
||||
|
||||
def analyze_image_with_vl(image_path: str) -> str:
|
||||
def analyze_image_with_vl(image_path: str, owner: str | None = None) -> str:
|
||||
"""Analyze an image using the admin-configured Vision-Language model."""
|
||||
return analyze_image_with_vl_result(image_path).get("text", "")
|
||||
return analyze_image_with_vl_result(image_path, owner=owner).get("text", "")
|
||||
|
||||
|
||||
def build_user_content(
|
||||
@@ -434,7 +434,7 @@ def build_user_content(
|
||||
# Pull the PDF prose once — used as either intro_text
|
||||
# (form path) or the doc body (plain path).
|
||||
try:
|
||||
pdf_body_text = strip_pdf_content_marker(_process_pdf(path))
|
||||
pdf_body_text = strip_pdf_content_marker(_process_pdf(path, owner=owner))
|
||||
except Exception:
|
||||
pdf_body_text = None
|
||||
|
||||
@@ -517,7 +517,7 @@ def build_user_content(
|
||||
except Exception as e:
|
||||
logger.warning(f"PDF auto-doc creation failed for {path}: {e}")
|
||||
if extracted_text is None:
|
||||
extracted_text = _process_pdf(path)
|
||||
extracted_text = _process_pdf(path, owner=owner)
|
||||
elif mime.startswith("text/") or _is_text_file(path):
|
||||
extracted_text = _process_text_file(path)
|
||||
else:
|
||||
|
||||
@@ -35,7 +35,7 @@ def test_pdf_body_marker_stripped_without_eating_text(monkeypatch, tmp_path):
|
||||
|
||||
# Shape _process_pdf actually returns: marker, then a page-text marker, then body.
|
||||
raw = "\n\n[PDF content]:\n\n[Page 1 text]:\nto the board, the agenda is set"
|
||||
monkeypatch.setattr(dp, "_process_pdf", lambda path: raw)
|
||||
monkeypatch.setattr(dp, "_process_pdf", lambda path, owner=None: raw)
|
||||
monkeypatch.setattr(pdf_forms, "has_form_fields", lambda path: False)
|
||||
monkeypatch.setattr(pdf_form_doc, "create_plain_pdf_document", lambda **kw: "doc-123")
|
||||
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
from pathlib import Path
|
||||
|
||||
from src import ai_interaction
|
||||
from src import document_processor as dp
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
|
||||
|
||||
def test_configured_vision_model_resolution_passes_owner(monkeypatch):
|
||||
seen = []
|
||||
|
||||
def fake_resolve_model(spec, owner=None):
|
||||
seen.append((spec, owner))
|
||||
return ("http://example.test/chat/completions", spec, {"Authorization": "Bearer token"})
|
||||
|
||||
monkeypatch.setattr(ai_interaction, "_resolve_model", fake_resolve_model)
|
||||
|
||||
assert dp._resolve_vl_model("gpt-4o", owner="alice") == (
|
||||
"http://example.test/chat/completions",
|
||||
"gpt-4o",
|
||||
{"Authorization": "Bearer token"},
|
||||
)
|
||||
assert seen == [("gpt-4o", "alice")]
|
||||
|
||||
|
||||
def test_auto_detected_vision_model_resolution_passes_owner(monkeypatch):
|
||||
seen = []
|
||||
|
||||
def fake_resolve_model(spec, owner=None):
|
||||
seen.append((spec, owner))
|
||||
if spec == "llava":
|
||||
return ("http://example.test/chat/completions", spec, {})
|
||||
raise ValueError("not available")
|
||||
|
||||
monkeypatch.setattr(ai_interaction, "_resolve_model", fake_resolve_model)
|
||||
|
||||
assert dp._resolve_vl_model("", owner="alice") == (
|
||||
"http://example.test/chat/completions",
|
||||
"llava",
|
||||
{},
|
||||
)
|
||||
assert seen
|
||||
assert all(owner == "alice" for _spec, owner in seen)
|
||||
|
||||
|
||||
def test_vision_analysis_uses_owner_scoped_primary_and_fallback(monkeypatch, tmp_path):
|
||||
seen = {}
|
||||
|
||||
def fake_resolve_vl_model(configured, owner=None):
|
||||
seen["primary"] = (configured, owner)
|
||||
return ("http://primary.test/chat/completions", "vision-primary", {"X-Test": "1"})
|
||||
|
||||
def fake_fallbacks(owner=None):
|
||||
seen["fallback_owner"] = owner
|
||||
return []
|
||||
|
||||
def fake_llm_call(url, model, messages, headers=None, timeout=None):
|
||||
seen["llm"] = (url, model, headers, timeout, messages)
|
||||
return "description"
|
||||
|
||||
monkeypatch.setattr(dp, "_load_vl_settings", lambda: {"vision_enabled": True, "vision_model": "gpt-4o"})
|
||||
monkeypatch.setattr(dp, "_resolve_vl_model", fake_resolve_vl_model)
|
||||
monkeypatch.setattr(dp, "llm_call", fake_llm_call)
|
||||
|
||||
from src import endpoint_resolver
|
||||
|
||||
monkeypatch.setattr(endpoint_resolver, "resolve_vision_fallback_candidates", fake_fallbacks)
|
||||
|
||||
image = tmp_path / "image.png"
|
||||
image.write_bytes(b"not-a-real-png-but-base64-is-enough")
|
||||
|
||||
assert dp.analyze_image_with_vl_result(str(image), owner="alice") == {
|
||||
"text": "description",
|
||||
"model": "vision-primary",
|
||||
}
|
||||
assert seen["primary"] == ("gpt-4o", "alice")
|
||||
assert seen["fallback_owner"] == "alice"
|
||||
assert seen["llm"][:4] == (
|
||||
"http://primary.test/chat/completions",
|
||||
"vision-primary",
|
||||
{"X-Test": "1"},
|
||||
120,
|
||||
)
|
||||
|
||||
|
||||
def test_request_vision_call_sites_pass_owner():
|
||||
chat_source = (ROOT / "src" / "chat_handler.py").read_text()
|
||||
processor_source = (ROOT / "src" / "document_processor.py").read_text()
|
||||
upload_source = (ROOT / "routes" / "upload_routes.py").read_text()
|
||||
document_source = (ROOT / "routes" / "document_routes.py").read_text()
|
||||
gallery_source = (ROOT / "routes" / "gallery_routes.py").read_text()
|
||||
memory_source = (ROOT / "routes" / "memory_routes.py").read_text()
|
||||
|
||||
assert 'analyze_image_with_vl_result(file_info["path"], owner=owner)' in chat_source
|
||||
assert "analyze_image_with_vl(path, owner=current_user)" in upload_source
|
||||
assert "_process_pdf(path, owner=owner)" in processor_source
|
||||
assert "_process_pdf(pdf_path, owner=user)" in document_source
|
||||
assert "_resolve_vl_model(vl_model, owner=user)" in document_source
|
||||
assert "_resolve_vl_model(configured, owner=user)" in gallery_source
|
||||
assert "_process_pdf(tmp_path, owner=_owner(request))" in memory_source
|
||||
Reference in New Issue
Block a user