mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-24 05:35:31 -04:00
e8175c9535
* fix: Images cannot be seen by model that is vision capable * fix: skip http(s) image_url for Ollama (images[] is base64-only) --------- Co-authored-by: michaelxer <michaelxer@users.noreply.github.com>
110 lines
4.2 KiB
Python
110 lines
4.2 KiB
Python
"""Regression tests for Ollama-native multimodal image routing (issue #4723).
|
|
|
|
Odysseus builds user messages in OpenAI style::
|
|
|
|
{"role": "user", "content": [
|
|
{"type": "text", "text": "..."},
|
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAA"}},
|
|
]}
|
|
|
|
Native Ollama ``/api/chat`` does **not** accept a list for ``content``. It
|
|
expects ``content`` to be a string and images carried separately on
|
|
``images`` (a list of raw base64 strings, no ``data:`` prefix). Without
|
|
this conversion the image block silently never reaches the vision model —
|
|
the model reports "I can't see the image" even though it is vision-capable
|
|
and the request succeeded.
|
|
"""
|
|
from src import llm_core
|
|
|
|
|
|
def _multimodal_msg():
|
|
return {
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": "What is in this picture?"},
|
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}},
|
|
{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,BBBB"}},
|
|
],
|
|
}
|
|
|
|
|
|
def test_ollama_payload_converts_openai_image_blocks_to_native_images_array():
|
|
payload = llm_core._build_ollama_payload(
|
|
"gemma4:e4b", [_multimodal_msg()], temperature=0.0, max_tokens=0,
|
|
)
|
|
msg = payload["messages"][0]
|
|
# Content must be a string, not a list — native Ollama rejects lists.
|
|
assert isinstance(msg["content"], str)
|
|
assert "What is in this picture?" in msg["content"]
|
|
# Base64 data extracted into the native images array (no data: prefix).
|
|
assert msg["images"] == ["AAAA", "BBBB"]
|
|
|
|
|
|
def test_ollama_payload_skips_http_image_url():
|
|
"""Non-data-URI image_url values are skipped with a warning because
|
|
native Ollama images[] accepts base64 only."""
|
|
msg = {
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": "Look"},
|
|
{"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}},
|
|
],
|
|
}
|
|
payload = llm_core._build_ollama_payload("gemma4:e4b", [msg], temperature=0.0, max_tokens=0)
|
|
out = payload["messages"][0]
|
|
assert out["content"] == "Look"
|
|
# HTTP URL is NOT added to images — Ollama cannot fetch it.
|
|
assert "images" not in out
|
|
|
|
|
|
def test_ollama_payload_preserves_native_images_array():
|
|
"""If the caller already used Ollama's native shape, leave it alone."""
|
|
msg = {
|
|
"role": "user",
|
|
"content": "Describe",
|
|
"images": ["XXXX"],
|
|
}
|
|
payload = llm_core._build_ollama_payload("gemma4:e4b", [msg], temperature=0.0, max_tokens=0)
|
|
out = payload["messages"][0]
|
|
assert out["content"] == "Describe"
|
|
assert out["images"] == ["XXXX"]
|
|
|
|
|
|
def test_ollama_payload_merges_native_and_openai_images():
|
|
"""A message that carries both native ``images`` and OpenAI ``image_url``
|
|
blocks (e.g. assembled by different code paths) must produce one combined
|
|
list rather than drop either half."""
|
|
msg = {
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": "Hi"},
|
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64,OPENAI"}},
|
|
],
|
|
"images": ["NATIVE"],
|
|
}
|
|
payload = llm_core._build_ollama_payload("gemma4:e4b", [msg], temperature=0.0, max_tokens=0)
|
|
out = payload["messages"][0]
|
|
assert out["content"] == "Hi"
|
|
assert out["images"] == ["NATIVE", "OPENAI"]
|
|
|
|
|
|
def test_ollama_payload_text_only_message_untouched():
|
|
msgs = [{"role": "user", "content": "hello"}]
|
|
payload = llm_core._build_ollama_payload("gemma4:e4b", msgs, temperature=0.0, max_tokens=0)
|
|
assert payload["messages"][0] == {"role": "user", "content": "hello"}
|
|
|
|
|
|
def test_ollama_payload_string_content_with_only_image_block():
|
|
"""A message whose content list has only image_url blocks (no text part)
|
|
still yields a non-empty content string so native Ollama accepts it."""
|
|
msg = {
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64,QQ=="}},
|
|
],
|
|
}
|
|
payload = llm_core._build_ollama_payload("gemma4:e4b", [msg], temperature=0.0, max_tokens=0)
|
|
out = payload["messages"][0]
|
|
assert isinstance(out["content"], str)
|
|
assert out["images"] == ["QQ=="]
|