Scope vision model resolution by owner (#3009)

2026-06-16 09:45:24 -04:00 · 2026-06-07 12:39:02 +02:00
parent c11ce66e0e
commit ff4508d396
8 changed files with 121 additions and 20 deletions
@@ -229,7 +229,7 @@ class ChatHandler:
                            except Exception:
                                vl_desc = None
                        if not vl_desc:
-                            vl_result = analyze_image_with_vl_result(file_info["path"])
+                            vl_result = analyze_image_with_vl_result(file_info["path"], owner=owner)
                            vl_desc = vl_result.get("text", "")
                            vl_model = vl_result.get("model", "")
                            if vl_desc and not vl_desc.startswith("["):
@@ -109,7 +109,7 @@ def _process_text_file(path: str) -> str:
        return result


-def _process_pdf(path: str) -> str:
+def _process_pdf(path: str, owner: str | None = None) -> str:
    """Process PDF file with text extraction (pypdf). Uses VL model for image-heavy pages."""
    try:
        from pypdf import PdfReader
@@ -133,7 +133,7 @@ def _process_pdf(path: str) -> str:
                            temp_img_path = tmp.name
                        try:
                            img.image.save(temp_img_path, "PNG")  # pypdf -> PIL image
-                            ocr_text = analyze_image_with_vl(temp_img_path)
+                            ocr_text = analyze_image_with_vl(temp_img_path, owner=owner)
                            if ocr_text and "unavailable" not in ocr_text.lower():
                                pdf_text += f"\n\n[Page {page_num + 1} image {img_index + 1} text]: {ocr_text}"
                        finally:
@@ -254,7 +254,7 @@ def _load_vl_settings() -> dict:
        return {}


-def _resolve_vl_model(configured: str) -> tuple:
+def _resolve_vl_model(configured: str, owner: str | None = None) -> tuple:
    """Resolve the vision model to (url, model_id, headers).

    Uses admin-configured model if set, otherwise tries auto-detection
@@ -263,7 +263,7 @@ def _resolve_vl_model(configured: str) -> tuple:
    from src.ai_interaction import _resolve_model

    if configured:
-        return _resolve_model(configured)
+        return _resolve_model(configured, owner=owner)

    # Auto-detect: try known vision-capable models in priority order
    candidates = [
@@ -274,14 +274,14 @@ def _resolve_vl_model(configured: str) -> tuple:
    ]
    for candidate in candidates:
        try:
-            return _resolve_model(candidate)
+            return _resolve_model(candidate, owner=owner)
        except (ValueError, Exception):
            continue

    raise ValueError("No vision model available")


-def analyze_image_with_vl_result(image_path: str) -> dict:
+def analyze_image_with_vl_result(image_path: str, owner: str | None = None) -> dict:
    """Analyze an image and return both text and the model that produced it."""
    logger.info(f"Analyzing image with VL model: {image_path}")
    try:
@@ -291,7 +291,7 @@ def analyze_image_with_vl_result(image_path: str) -> dict:
        vl_model = settings.get("vision_model", "")

        try:
-            url, model_id, headers = _resolve_vl_model(vl_model)
+            url, model_id, headers = _resolve_vl_model(vl_model, owner=owner)
        except ValueError:
            return {"text": "[No vision model configured — set one in Settings → Vision]", "model": vl_model or ""}

@@ -316,7 +316,7 @@ def analyze_image_with_vl_result(image_path: str) -> dict:
        # — same shape as task/chat but its own list (`vision_model_fallbacks`).
        try:
            from src.endpoint_resolver import resolve_vision_fallback_candidates
-            _vl_candidates = [(url, model_id, headers)] + resolve_vision_fallback_candidates()
+            _vl_candidates = [(url, model_id, headers)] + resolve_vision_fallback_candidates(owner=owner)
        except Exception:
            _vl_candidates = [(url, model_id, headers)]

@@ -338,9 +338,9 @@ def analyze_image_with_vl_result(image_path: str) -> dict:
        return {"text": "[VL model unavailable - image not analyzed]", "model": ""}


-def analyze_image_with_vl(image_path: str) -> str:
+def analyze_image_with_vl(image_path: str, owner: str | None = None) -> str:
    """Analyze an image using the admin-configured Vision-Language model."""
-    return analyze_image_with_vl_result(image_path).get("text", "")
+    return analyze_image_with_vl_result(image_path, owner=owner).get("text", "")


 def build_user_content(
@@ -434,7 +434,7 @@ def build_user_content(
                        # Pull the PDF prose once — used as either intro_text
                        # (form path) or the doc body (plain path).
                        try:
-                            pdf_body_text = strip_pdf_content_marker(_process_pdf(path))
+                            pdf_body_text = strip_pdf_content_marker(_process_pdf(path, owner=owner))
                        except Exception:
                            pdf_body_text = None

@@ -517,7 +517,7 @@ def build_user_content(
                    except Exception as e:
                        logger.warning(f"PDF auto-doc creation failed for {path}: {e}")
                if extracted_text is None:
-                    extracted_text = _process_pdf(path)
+                    extracted_text = _process_pdf(path, owner=owner)
            elif mime.startswith("text/") or _is_text_file(path):
                extracted_text = _process_text_file(path)
            else: