Recognize local vision models so their images aren't dropped (#185)

An image attachment only got through if the model name was on a short
built-in list. Anything else was treated as text-only and the image was
quietly dropped, so the model never saw it. That left out a lot of the
smaller vision models you can run locally (moondream was the one I hit).

Pulled the check into is_vision_model() in chat_helpers, broadened it to
cover those, and added a test. Models that already worked are unaffected.

Fixes #124.
This commit is contained in:
Håkon Julius Størholt
2026-06-01 06:09:21 +02:00
committed by GitHub
parent 32e7cec362
commit 91d3511580
3 changed files with 62 additions and 12 deletions
+30
View File
@@ -23,6 +23,36 @@ def extract_urls(text: str) -> List[str]:
return cleaned_urls
# Model-name substrings that signal native image input. A missed match here
# silently drops the image from the chat request (it gets swapped for a text
# caption), so the model never sees it. Keep this broad, especially for local
# models (Ollama/llama.cpp) that ship under many names. See issue #124.
_VISION_MODEL_KEYWORDS = (
# hosted
"gpt-4o", "gpt-4.1", "gpt-4.5", "gpt-4-turbo", "gpt-4-vision",
"claude-sonnet", "claude-opus", "claude-haiku", "gemini",
# open / local
"vision", "llava", "bakllava", "moondream", "pixtral", "minicpm",
"internvl", "cogvlm", "qwen-vl", "qwen2-vl", "qwen3-vl", "qwen3vl",
)
# Catches the "*-VL-*" / "*VL*" family not covered by a literal keyword above
# (e.g. Qwen2.5-VL and various tags): a standalone "vl" token, plus "vlm".
_VISION_VL_RE = re.compile(r'(?<![a-z])vl(?![a-z])|vlm')
def is_vision_model(model_name: str) -> bool:
"""Best-effort check of whether a model can natively accept images.
Decides whether image attachments get passed through to the model or
swapped for a separate caption. Err toward True, since a false negative
drops the image entirely. See issue #124.
"""
m = (model_name or "").lower()
if any(kw in m for kw in _VISION_MODEL_KEYWORDS):
return True
return bool(_VISION_VL_RE.search(m))
def validate_message(message: str) -> str:
"""Validate message input."""
if not message: