Scope vision model resolution by owner (#3009)

This commit is contained in:
Vykos
2026-06-07 12:39:02 +02:00
committed by GitHub
parent c11ce66e0e
commit ff4508d396
8 changed files with 121 additions and 20 deletions
+3 -3
View File
@@ -198,7 +198,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
title = os.path.splitext(meta.get("original_name") or meta.get("name") or upload_id)[0] title = os.path.splitext(meta.get("original_name") or meta.get("name") or upload_id)[0]
try: try:
body_text = strip_pdf_content_marker(_process_pdf(pdf_path)) body_text = strip_pdf_content_marker(_process_pdf(pdf_path, owner=user))
except Exception: except Exception:
body_text = None body_text = None
@@ -437,7 +437,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
raise HTTPException(404, "Source PDF could not be located") raise HTTPException(404, "Source PDF could not be located")
try: try:
body_text = strip_pdf_content_marker(_process_pdf(pdf_path)) body_text = strip_pdf_content_marker(_process_pdf(pdf_path, owner=user))
except Exception as e: except Exception as e:
logger.error(f"extract_pdf_text failed for {pdf_path}: {e}") logger.error(f"extract_pdf_text failed for {pdf_path}: {e}")
raise HTTPException(500, f"Extraction failed: {e}") raise HTTPException(500, f"Extraction failed: {e}")
@@ -1156,7 +1156,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
settings = _load_vl_settings() settings = _load_vl_settings()
vl_model = settings.get("vision_model", "") vl_model = settings.get("vision_model", "")
try: try:
url, model_id, headers = _resolve_vl_model(vl_model) url, model_id, headers = _resolve_vl_model(vl_model, owner=user)
except Exception as e: except Exception as e:
raise HTTPException(503, f"No vision model available: {e}") raise HTTPException(503, f"No vision model available: {e}")
+1 -1
View File
@@ -1760,7 +1760,7 @@ def setup_gallery_routes() -> APIRouter:
return {"error": "Vision is disabled — enable it in Settings → Vision"} return {"error": "Vision is disabled — enable it in Settings → Vision"}
configured = vl_settings.get("vision_model", "") configured = vl_settings.get("vision_model", "")
try: try:
chat_url, model_name, headers = _resolve_vl_model(configured) chat_url, model_name, headers = _resolve_vl_model(configured, owner=user)
except ValueError: except ValueError:
return {"error": "No vision model configured — set one in Settings → Vision"} return {"error": "No vision model configured — set one in Settings → Vision"}
if not chat_url: if not chat_url:
+1 -1
View File
@@ -371,7 +371,7 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
tmp.write(content) tmp.write(content)
tmp_path = tmp.name tmp_path = tmp.name
try: try:
text = _process_pdf(tmp_path) text = _process_pdf(tmp_path, owner=_owner(request))
finally: finally:
os.unlink(tmp_path) os.unlink(tmp_path)
else: else:
+1 -1
View File
@@ -225,7 +225,7 @@ def setup_upload_routes(upload_handler):
logger.warning(f"Vision cache read failed for {file_id}: {e}") logger.warning(f"Vision cache read failed for {file_id}: {e}")
from src.document_processor import analyze_image_with_vl from src.document_processor import analyze_image_with_vl
try: try:
text = analyze_image_with_vl(path) or "" text = analyze_image_with_vl(path, owner=current_user) or ""
except Exception as e: except Exception as e:
logger.error(f"Vision analysis failed for {file_id}: {e}") logger.error(f"Vision analysis failed for {file_id}: {e}")
raise HTTPException(500, f"Vision analysis failed: {e}") raise HTTPException(500, f"Vision analysis failed: {e}")
+1 -1
View File
@@ -229,7 +229,7 @@ class ChatHandler:
except Exception: except Exception:
vl_desc = None vl_desc = None
if not vl_desc: if not vl_desc:
vl_result = analyze_image_with_vl_result(file_info["path"]) vl_result = analyze_image_with_vl_result(file_info["path"], owner=owner)
vl_desc = vl_result.get("text", "") vl_desc = vl_result.get("text", "")
vl_model = vl_result.get("model", "") vl_model = vl_result.get("model", "")
if vl_desc and not vl_desc.startswith("["): if vl_desc and not vl_desc.startswith("["):
+12 -12
View File
@@ -109,7 +109,7 @@ def _process_text_file(path: str) -> str:
return result return result
def _process_pdf(path: str) -> str: def _process_pdf(path: str, owner: str | None = None) -> str:
"""Process PDF file with text extraction (pypdf). Uses VL model for image-heavy pages.""" """Process PDF file with text extraction (pypdf). Uses VL model for image-heavy pages."""
try: try:
from pypdf import PdfReader from pypdf import PdfReader
@@ -133,7 +133,7 @@ def _process_pdf(path: str) -> str:
temp_img_path = tmp.name temp_img_path = tmp.name
try: try:
img.image.save(temp_img_path, "PNG") # pypdf -> PIL image img.image.save(temp_img_path, "PNG") # pypdf -> PIL image
ocr_text = analyze_image_with_vl(temp_img_path) ocr_text = analyze_image_with_vl(temp_img_path, owner=owner)
if ocr_text and "unavailable" not in ocr_text.lower(): if ocr_text and "unavailable" not in ocr_text.lower():
pdf_text += f"\n\n[Page {page_num + 1} image {img_index + 1} text]: {ocr_text}" pdf_text += f"\n\n[Page {page_num + 1} image {img_index + 1} text]: {ocr_text}"
finally: finally:
@@ -254,7 +254,7 @@ def _load_vl_settings() -> dict:
return {} return {}
def _resolve_vl_model(configured: str) -> tuple: def _resolve_vl_model(configured: str, owner: str | None = None) -> tuple:
"""Resolve the vision model to (url, model_id, headers). """Resolve the vision model to (url, model_id, headers).
Uses admin-configured model if set, otherwise tries auto-detection Uses admin-configured model if set, otherwise tries auto-detection
@@ -263,7 +263,7 @@ def _resolve_vl_model(configured: str) -> tuple:
from src.ai_interaction import _resolve_model from src.ai_interaction import _resolve_model
if configured: if configured:
return _resolve_model(configured) return _resolve_model(configured, owner=owner)
# Auto-detect: try known vision-capable models in priority order # Auto-detect: try known vision-capable models in priority order
candidates = [ candidates = [
@@ -274,14 +274,14 @@ def _resolve_vl_model(configured: str) -> tuple:
] ]
for candidate in candidates: for candidate in candidates:
try: try:
return _resolve_model(candidate) return _resolve_model(candidate, owner=owner)
except (ValueError, Exception): except (ValueError, Exception):
continue continue
raise ValueError("No vision model available") raise ValueError("No vision model available")
def analyze_image_with_vl_result(image_path: str) -> dict: def analyze_image_with_vl_result(image_path: str, owner: str | None = None) -> dict:
"""Analyze an image and return both text and the model that produced it.""" """Analyze an image and return both text and the model that produced it."""
logger.info(f"Analyzing image with VL model: {image_path}") logger.info(f"Analyzing image with VL model: {image_path}")
try: try:
@@ -291,7 +291,7 @@ def analyze_image_with_vl_result(image_path: str) -> dict:
vl_model = settings.get("vision_model", "") vl_model = settings.get("vision_model", "")
try: try:
url, model_id, headers = _resolve_vl_model(vl_model) url, model_id, headers = _resolve_vl_model(vl_model, owner=owner)
except ValueError: except ValueError:
return {"text": "[No vision model configured — set one in Settings → Vision]", "model": vl_model or ""} return {"text": "[No vision model configured — set one in Settings → Vision]", "model": vl_model or ""}
@@ -316,7 +316,7 @@ def analyze_image_with_vl_result(image_path: str) -> dict:
# — same shape as task/chat but its own list (`vision_model_fallbacks`). # — same shape as task/chat but its own list (`vision_model_fallbacks`).
try: try:
from src.endpoint_resolver import resolve_vision_fallback_candidates from src.endpoint_resolver import resolve_vision_fallback_candidates
_vl_candidates = [(url, model_id, headers)] + resolve_vision_fallback_candidates() _vl_candidates = [(url, model_id, headers)] + resolve_vision_fallback_candidates(owner=owner)
except Exception: except Exception:
_vl_candidates = [(url, model_id, headers)] _vl_candidates = [(url, model_id, headers)]
@@ -338,9 +338,9 @@ def analyze_image_with_vl_result(image_path: str) -> dict:
return {"text": "[VL model unavailable - image not analyzed]", "model": ""} return {"text": "[VL model unavailable - image not analyzed]", "model": ""}
def analyze_image_with_vl(image_path: str) -> str: def analyze_image_with_vl(image_path: str, owner: str | None = None) -> str:
"""Analyze an image using the admin-configured Vision-Language model.""" """Analyze an image using the admin-configured Vision-Language model."""
return analyze_image_with_vl_result(image_path).get("text", "") return analyze_image_with_vl_result(image_path, owner=owner).get("text", "")
def build_user_content( def build_user_content(
@@ -434,7 +434,7 @@ def build_user_content(
# Pull the PDF prose once — used as either intro_text # Pull the PDF prose once — used as either intro_text
# (form path) or the doc body (plain path). # (form path) or the doc body (plain path).
try: try:
pdf_body_text = strip_pdf_content_marker(_process_pdf(path)) pdf_body_text = strip_pdf_content_marker(_process_pdf(path, owner=owner))
except Exception: except Exception:
pdf_body_text = None pdf_body_text = None
@@ -517,7 +517,7 @@ def build_user_content(
except Exception as e: except Exception as e:
logger.warning(f"PDF auto-doc creation failed for {path}: {e}") logger.warning(f"PDF auto-doc creation failed for {path}: {e}")
if extracted_text is None: if extracted_text is None:
extracted_text = _process_pdf(path) extracted_text = _process_pdf(path, owner=owner)
elif mime.startswith("text/") or _is_text_file(path): elif mime.startswith("text/") or _is_text_file(path):
extracted_text = _process_text_file(path) extracted_text = _process_text_file(path)
else: else:
+1 -1
View File
@@ -35,7 +35,7 @@ def test_pdf_body_marker_stripped_without_eating_text(monkeypatch, tmp_path):
# Shape _process_pdf actually returns: marker, then a page-text marker, then body. # Shape _process_pdf actually returns: marker, then a page-text marker, then body.
raw = "\n\n[PDF content]:\n\n[Page 1 text]:\nto the board, the agenda is set" raw = "\n\n[PDF content]:\n\n[Page 1 text]:\nto the board, the agenda is set"
monkeypatch.setattr(dp, "_process_pdf", lambda path: raw) monkeypatch.setattr(dp, "_process_pdf", lambda path, owner=None: raw)
monkeypatch.setattr(pdf_forms, "has_form_fields", lambda path: False) monkeypatch.setattr(pdf_forms, "has_form_fields", lambda path: False)
monkeypatch.setattr(pdf_form_doc, "create_plain_pdf_document", lambda **kw: "doc-123") monkeypatch.setattr(pdf_form_doc, "create_plain_pdf_document", lambda **kw: "doc-123")
+101
View File
@@ -0,0 +1,101 @@
from pathlib import Path
from src import ai_interaction
from src import document_processor as dp
ROOT = Path(__file__).resolve().parents[1]
def test_configured_vision_model_resolution_passes_owner(monkeypatch):
seen = []
def fake_resolve_model(spec, owner=None):
seen.append((spec, owner))
return ("http://example.test/chat/completions", spec, {"Authorization": "Bearer token"})
monkeypatch.setattr(ai_interaction, "_resolve_model", fake_resolve_model)
assert dp._resolve_vl_model("gpt-4o", owner="alice") == (
"http://example.test/chat/completions",
"gpt-4o",
{"Authorization": "Bearer token"},
)
assert seen == [("gpt-4o", "alice")]
def test_auto_detected_vision_model_resolution_passes_owner(monkeypatch):
seen = []
def fake_resolve_model(spec, owner=None):
seen.append((spec, owner))
if spec == "llava":
return ("http://example.test/chat/completions", spec, {})
raise ValueError("not available")
monkeypatch.setattr(ai_interaction, "_resolve_model", fake_resolve_model)
assert dp._resolve_vl_model("", owner="alice") == (
"http://example.test/chat/completions",
"llava",
{},
)
assert seen
assert all(owner == "alice" for _spec, owner in seen)
def test_vision_analysis_uses_owner_scoped_primary_and_fallback(monkeypatch, tmp_path):
seen = {}
def fake_resolve_vl_model(configured, owner=None):
seen["primary"] = (configured, owner)
return ("http://primary.test/chat/completions", "vision-primary", {"X-Test": "1"})
def fake_fallbacks(owner=None):
seen["fallback_owner"] = owner
return []
def fake_llm_call(url, model, messages, headers=None, timeout=None):
seen["llm"] = (url, model, headers, timeout, messages)
return "description"
monkeypatch.setattr(dp, "_load_vl_settings", lambda: {"vision_enabled": True, "vision_model": "gpt-4o"})
monkeypatch.setattr(dp, "_resolve_vl_model", fake_resolve_vl_model)
monkeypatch.setattr(dp, "llm_call", fake_llm_call)
from src import endpoint_resolver
monkeypatch.setattr(endpoint_resolver, "resolve_vision_fallback_candidates", fake_fallbacks)
image = tmp_path / "image.png"
image.write_bytes(b"not-a-real-png-but-base64-is-enough")
assert dp.analyze_image_with_vl_result(str(image), owner="alice") == {
"text": "description",
"model": "vision-primary",
}
assert seen["primary"] == ("gpt-4o", "alice")
assert seen["fallback_owner"] == "alice"
assert seen["llm"][:4] == (
"http://primary.test/chat/completions",
"vision-primary",
{"X-Test": "1"},
120,
)
def test_request_vision_call_sites_pass_owner():
chat_source = (ROOT / "src" / "chat_handler.py").read_text()
processor_source = (ROOT / "src" / "document_processor.py").read_text()
upload_source = (ROOT / "routes" / "upload_routes.py").read_text()
document_source = (ROOT / "routes" / "document_routes.py").read_text()
gallery_source = (ROOT / "routes" / "gallery_routes.py").read_text()
memory_source = (ROOT / "routes" / "memory_routes.py").read_text()
assert 'analyze_image_with_vl_result(file_info["path"], owner=owner)' in chat_source
assert "analyze_image_with_vl(path, owner=current_user)" in upload_source
assert "_process_pdf(path, owner=owner)" in processor_source
assert "_process_pdf(pdf_path, owner=user)" in document_source
assert "_resolve_vl_model(vl_model, owner=user)" in document_source
assert "_resolve_vl_model(configured, owner=user)" in gallery_source
assert "_process_pdf(tmp_path, owner=_owner(request))" in memory_source