mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-16 01:35:36 -04:00
Recognize gemma3/llama4/mistral-small3.1+/multimodal as vision models (#1430)
is_vision_model() classified several genuinely multimodal families as text-only because their names contain neither "vision" nor "vl": Gemma 3 (4b+), Llama 4, Mistral Small 3.1/3.2, and *-multimodal models (e.g. phi-4-multimodal). For those the attached image was stripped before the request, so the model never saw it — a "can't read the image" report (issue #1274), common with Ollama tags like gemma3:4b. Add those keywords (plus a generic "multimodal"). Per the file's err-toward-True policy (#124), a rare text-only tag treated as vision is the safer failure than dropping a real image. Guard tests confirm the text-only siblings (gemma2, plain gemma, mistral-small, phi-3) are not over-matched. Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -28,3 +28,21 @@ def test_text_only_models_not_flagged():
|
||||
|
||||
def test_none_is_safe():
|
||||
assert is_vision_model(None) is False
|
||||
|
||||
|
||||
def test_recognizes_multimodal_families_without_vision_in_name():
|
||||
# issue #1274: these are vision-capable but their names don't contain
|
||||
# "vision"/"vl", so they were dropped and the model never saw the image.
|
||||
for name in [
|
||||
"gemma3:4b", "gemma3", "gemma-3-27b-it",
|
||||
"llama4:scout", "llama4", "llama-4-maverick",
|
||||
"mistral-small3.1", "mistral-small-3.2",
|
||||
"phi-4-multimodal", "phi4-multimodal",
|
||||
]:
|
||||
assert is_vision_model(name), f"{name!r} should be detected as vision-capable"
|
||||
|
||||
|
||||
def test_new_keywords_do_not_overmatch_text_models():
|
||||
# The added families must not flag their text-only siblings.
|
||||
for name in ["gemma2:9b", "gemma:7b", "llama3.3", "mistral-small", "phi-3-mini"]:
|
||||
assert not is_vision_model(name), f"{name!r} should not be flagged as vision"
|
||||
|
||||
Reference in New Issue
Block a user