diff --git a/routes/document_routes.py b/routes/document_routes.py index e2b562159..b4f6aad77 100644 --- a/routes/document_routes.py +++ b/routes/document_routes.py @@ -198,7 +198,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: title = os.path.splitext(meta.get("original_name") or meta.get("name") or upload_id)[0] try: - body_text = strip_pdf_content_marker(_process_pdf(pdf_path)) + body_text = strip_pdf_content_marker(_process_pdf(pdf_path, owner=user)) except Exception: body_text = None @@ -437,7 +437,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: raise HTTPException(404, "Source PDF could not be located") try: - body_text = strip_pdf_content_marker(_process_pdf(pdf_path)) + body_text = strip_pdf_content_marker(_process_pdf(pdf_path, owner=user)) except Exception as e: logger.error(f"extract_pdf_text failed for {pdf_path}: {e}") raise HTTPException(500, f"Extraction failed: {e}") @@ -1156,7 +1156,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: settings = _load_vl_settings() vl_model = settings.get("vision_model", "") try: - url, model_id, headers = _resolve_vl_model(vl_model) + url, model_id, headers = _resolve_vl_model(vl_model, owner=user) except Exception as e: raise HTTPException(503, f"No vision model available: {e}") diff --git a/routes/gallery_routes.py b/routes/gallery_routes.py index ce6f6271b..3b991e4ce 100644 --- a/routes/gallery_routes.py +++ b/routes/gallery_routes.py @@ -1760,7 +1760,7 @@ def setup_gallery_routes() -> APIRouter: return {"error": "Vision is disabled — enable it in Settings → Vision"} configured = vl_settings.get("vision_model", "") try: - chat_url, model_name, headers = _resolve_vl_model(configured) + chat_url, model_name, headers = _resolve_vl_model(configured, owner=user) except ValueError: return {"error": "No vision model configured — set one in Settings → Vision"} if not chat_url: diff --git a/routes/memory_routes.py b/routes/memory_routes.py index c71146e52..9da566fa7 100644 --- a/routes/memory_routes.py +++ b/routes/memory_routes.py @@ -371,7 +371,7 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM tmp.write(content) tmp_path = tmp.name try: - text = _process_pdf(tmp_path) + text = _process_pdf(tmp_path, owner=_owner(request)) finally: os.unlink(tmp_path) else: diff --git a/routes/upload_routes.py b/routes/upload_routes.py index f348453ac..489e4923a 100644 --- a/routes/upload_routes.py +++ b/routes/upload_routes.py @@ -225,7 +225,7 @@ def setup_upload_routes(upload_handler): logger.warning(f"Vision cache read failed for {file_id}: {e}") from src.document_processor import analyze_image_with_vl try: - text = analyze_image_with_vl(path) or "" + text = analyze_image_with_vl(path, owner=current_user) or "" except Exception as e: logger.error(f"Vision analysis failed for {file_id}: {e}") raise HTTPException(500, f"Vision analysis failed: {e}") diff --git a/src/chat_handler.py b/src/chat_handler.py index 330ffbe6b..45666dd8d 100644 --- a/src/chat_handler.py +++ b/src/chat_handler.py @@ -229,7 +229,7 @@ class ChatHandler: except Exception: vl_desc = None if not vl_desc: - vl_result = analyze_image_with_vl_result(file_info["path"]) + vl_result = analyze_image_with_vl_result(file_info["path"], owner=owner) vl_desc = vl_result.get("text", "") vl_model = vl_result.get("model", "") if vl_desc and not vl_desc.startswith("["): diff --git a/src/document_processor.py b/src/document_processor.py index 1d9a1ca9a..1d09673a1 100644 --- a/src/document_processor.py +++ b/src/document_processor.py @@ -109,7 +109,7 @@ def _process_text_file(path: str) -> str: return result -def _process_pdf(path: str) -> str: +def _process_pdf(path: str, owner: str | None = None) -> str: """Process PDF file with text extraction (pypdf). Uses VL model for image-heavy pages.""" try: from pypdf import PdfReader @@ -133,7 +133,7 @@ def _process_pdf(path: str) -> str: temp_img_path = tmp.name try: img.image.save(temp_img_path, "PNG") # pypdf -> PIL image - ocr_text = analyze_image_with_vl(temp_img_path) + ocr_text = analyze_image_with_vl(temp_img_path, owner=owner) if ocr_text and "unavailable" not in ocr_text.lower(): pdf_text += f"\n\n[Page {page_num + 1} image {img_index + 1} text]: {ocr_text}" finally: @@ -254,7 +254,7 @@ def _load_vl_settings() -> dict: return {} -def _resolve_vl_model(configured: str) -> tuple: +def _resolve_vl_model(configured: str, owner: str | None = None) -> tuple: """Resolve the vision model to (url, model_id, headers). Uses admin-configured model if set, otherwise tries auto-detection @@ -263,7 +263,7 @@ def _resolve_vl_model(configured: str) -> tuple: from src.ai_interaction import _resolve_model if configured: - return _resolve_model(configured) + return _resolve_model(configured, owner=owner) # Auto-detect: try known vision-capable models in priority order candidates = [ @@ -274,14 +274,14 @@ def _resolve_vl_model(configured: str) -> tuple: ] for candidate in candidates: try: - return _resolve_model(candidate) + return _resolve_model(candidate, owner=owner) except (ValueError, Exception): continue raise ValueError("No vision model available") -def analyze_image_with_vl_result(image_path: str) -> dict: +def analyze_image_with_vl_result(image_path: str, owner: str | None = None) -> dict: """Analyze an image and return both text and the model that produced it.""" logger.info(f"Analyzing image with VL model: {image_path}") try: @@ -291,7 +291,7 @@ def analyze_image_with_vl_result(image_path: str) -> dict: vl_model = settings.get("vision_model", "") try: - url, model_id, headers = _resolve_vl_model(vl_model) + url, model_id, headers = _resolve_vl_model(vl_model, owner=owner) except ValueError: return {"text": "[No vision model configured — set one in Settings → Vision]", "model": vl_model or ""} @@ -316,7 +316,7 @@ def analyze_image_with_vl_result(image_path: str) -> dict: # — same shape as task/chat but its own list (`vision_model_fallbacks`). try: from src.endpoint_resolver import resolve_vision_fallback_candidates - _vl_candidates = [(url, model_id, headers)] + resolve_vision_fallback_candidates() + _vl_candidates = [(url, model_id, headers)] + resolve_vision_fallback_candidates(owner=owner) except Exception: _vl_candidates = [(url, model_id, headers)] @@ -338,9 +338,9 @@ def analyze_image_with_vl_result(image_path: str) -> dict: return {"text": "[VL model unavailable - image not analyzed]", "model": ""} -def analyze_image_with_vl(image_path: str) -> str: +def analyze_image_with_vl(image_path: str, owner: str | None = None) -> str: """Analyze an image using the admin-configured Vision-Language model.""" - return analyze_image_with_vl_result(image_path).get("text", "") + return analyze_image_with_vl_result(image_path, owner=owner).get("text", "") def build_user_content( @@ -434,7 +434,7 @@ def build_user_content( # Pull the PDF prose once — used as either intro_text # (form path) or the doc body (plain path). try: - pdf_body_text = strip_pdf_content_marker(_process_pdf(path)) + pdf_body_text = strip_pdf_content_marker(_process_pdf(path, owner=owner)) except Exception: pdf_body_text = None @@ -517,7 +517,7 @@ def build_user_content( except Exception as e: logger.warning(f"PDF auto-doc creation failed for {path}: {e}") if extracted_text is None: - extracted_text = _process_pdf(path) + extracted_text = _process_pdf(path, owner=owner) elif mime.startswith("text/") or _is_text_file(path): extracted_text = _process_text_file(path) else: diff --git a/tests/test_build_user_content_pdf_marker.py b/tests/test_build_user_content_pdf_marker.py index d57e0eff8..d2bb5b421 100644 --- a/tests/test_build_user_content_pdf_marker.py +++ b/tests/test_build_user_content_pdf_marker.py @@ -35,7 +35,7 @@ def test_pdf_body_marker_stripped_without_eating_text(monkeypatch, tmp_path): # Shape _process_pdf actually returns: marker, then a page-text marker, then body. raw = "\n\n[PDF content]:\n\n[Page 1 text]:\nto the board, the agenda is set" - monkeypatch.setattr(dp, "_process_pdf", lambda path: raw) + monkeypatch.setattr(dp, "_process_pdf", lambda path, owner=None: raw) monkeypatch.setattr(pdf_forms, "has_form_fields", lambda path: False) monkeypatch.setattr(pdf_form_doc, "create_plain_pdf_document", lambda **kw: "doc-123") diff --git a/tests/test_vision_owner_scope.py b/tests/test_vision_owner_scope.py new file mode 100644 index 000000000..90a17adb3 --- /dev/null +++ b/tests/test_vision_owner_scope.py @@ -0,0 +1,101 @@ +from pathlib import Path + +from src import ai_interaction +from src import document_processor as dp + + +ROOT = Path(__file__).resolve().parents[1] + + +def test_configured_vision_model_resolution_passes_owner(monkeypatch): + seen = [] + + def fake_resolve_model(spec, owner=None): + seen.append((spec, owner)) + return ("http://example.test/chat/completions", spec, {"Authorization": "Bearer token"}) + + monkeypatch.setattr(ai_interaction, "_resolve_model", fake_resolve_model) + + assert dp._resolve_vl_model("gpt-4o", owner="alice") == ( + "http://example.test/chat/completions", + "gpt-4o", + {"Authorization": "Bearer token"}, + ) + assert seen == [("gpt-4o", "alice")] + + +def test_auto_detected_vision_model_resolution_passes_owner(monkeypatch): + seen = [] + + def fake_resolve_model(spec, owner=None): + seen.append((spec, owner)) + if spec == "llava": + return ("http://example.test/chat/completions", spec, {}) + raise ValueError("not available") + + monkeypatch.setattr(ai_interaction, "_resolve_model", fake_resolve_model) + + assert dp._resolve_vl_model("", owner="alice") == ( + "http://example.test/chat/completions", + "llava", + {}, + ) + assert seen + assert all(owner == "alice" for _spec, owner in seen) + + +def test_vision_analysis_uses_owner_scoped_primary_and_fallback(monkeypatch, tmp_path): + seen = {} + + def fake_resolve_vl_model(configured, owner=None): + seen["primary"] = (configured, owner) + return ("http://primary.test/chat/completions", "vision-primary", {"X-Test": "1"}) + + def fake_fallbacks(owner=None): + seen["fallback_owner"] = owner + return [] + + def fake_llm_call(url, model, messages, headers=None, timeout=None): + seen["llm"] = (url, model, headers, timeout, messages) + return "description" + + monkeypatch.setattr(dp, "_load_vl_settings", lambda: {"vision_enabled": True, "vision_model": "gpt-4o"}) + monkeypatch.setattr(dp, "_resolve_vl_model", fake_resolve_vl_model) + monkeypatch.setattr(dp, "llm_call", fake_llm_call) + + from src import endpoint_resolver + + monkeypatch.setattr(endpoint_resolver, "resolve_vision_fallback_candidates", fake_fallbacks) + + image = tmp_path / "image.png" + image.write_bytes(b"not-a-real-png-but-base64-is-enough") + + assert dp.analyze_image_with_vl_result(str(image), owner="alice") == { + "text": "description", + "model": "vision-primary", + } + assert seen["primary"] == ("gpt-4o", "alice") + assert seen["fallback_owner"] == "alice" + assert seen["llm"][:4] == ( + "http://primary.test/chat/completions", + "vision-primary", + {"X-Test": "1"}, + 120, + ) + + +def test_request_vision_call_sites_pass_owner(): + chat_source = (ROOT / "src" / "chat_handler.py").read_text() + processor_source = (ROOT / "src" / "document_processor.py").read_text() + upload_source = (ROOT / "routes" / "upload_routes.py").read_text() + document_source = (ROOT / "routes" / "document_routes.py").read_text() + gallery_source = (ROOT / "routes" / "gallery_routes.py").read_text() + memory_source = (ROOT / "routes" / "memory_routes.py").read_text() + + assert 'analyze_image_with_vl_result(file_info["path"], owner=owner)' in chat_source + assert "analyze_image_with_vl(path, owner=current_user)" in upload_source + assert "_process_pdf(path, owner=owner)" in processor_source + assert "_process_pdf(pdf_path, owner=user)" in document_source + assert "_resolve_vl_model(vl_model, owner=user)" in document_source + assert "_resolve_vl_model(configured, owner=user)" in gallery_source + assert "_process_pdf(tmp_path, owner=_owner(request))" in memory_source