feat(ollama): pass base64 image blocks through to Ollama payload

This commit is contained in:
Md.Nahin Alam
2026-04-03 03:29:00 +06:00
parent 63ad0196d6
commit 0e7a2446c7
2 changed files with 82 additions and 2 deletions

View File

@@ -49,6 +49,18 @@ def normalize_ollama_model(model_name: str) -> str:
return model_name
def _extract_ollama_image_data(block: dict) -> str | None:
source = block.get("source")
if not isinstance(source, dict):
return None
if source.get("type") != "base64":
return None
data = source.get("data")
if isinstance(data, str) and data:
return data
return None
def anthropic_to_ollama_messages(messages: list[dict]) -> list[dict]:
ollama_messages = []
for msg in messages:
@@ -58,15 +70,23 @@ def anthropic_to_ollama_messages(messages: list[dict]) -> list[dict]:
ollama_messages.append({"role": role, "content": content})
elif isinstance(content, list):
text_parts = []
image_parts = []
for block in content:
if isinstance(block, dict):
if block.get("type") == "text":
text_parts.append(block.get("text", ""))
elif block.get("type") == "image":
text_parts.append("[image]")
image_data = _extract_ollama_image_data(block)
if image_data:
image_parts.append(image_data)
else:
text_parts.append("[image]")
elif isinstance(block, str):
text_parts.append(block)
ollama_messages.append({"role": role, "content": "\n".join(text_parts)})
ollama_message = {"role": role, "content": "\n".join(text_parts)}
if image_parts:
ollama_message["images"] = image_parts
ollama_messages.append(ollama_message)
return ollama_messages

View File

@@ -38,6 +38,26 @@ def test_converts_image_block_to_placeholder():
assert "[image]" in result[0]["content"]
assert "Describe this" in result[0]["content"]
def test_converts_base64_image_block_to_ollama_images():
messages = [{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": "YWJjMTIz",
},
},
{"type": "text", "text": "Describe this"},
],
}]
result = anthropic_to_ollama_messages(messages)
assert result[0]["images"] == ["YWJjMTIz"]
assert "Describe this" in result[0]["content"]
def test_converts_multi_turn():
messages = [
{"role": "user", "content": "Hi"},
@@ -118,3 +138,43 @@ async def test_ollama_chat_prepends_system():
)
assert captured["messages"][0]["role"] == "system"
assert "helpful" in captured["messages"][0]["content"]
@pytest.mark.asyncio
async def test_ollama_chat_includes_base64_images_in_payload():
captured = {}
async def mock_post(url, json=None, **kwargs):
captured.update(json or {})
m = MagicMock()
m.raise_for_status = MagicMock()
m.json.return_value = {
"message": {"content": "ok"},
"created_at": "",
"prompt_eval_count": 1,
"eval_count": 1,
}
return m
with patch("ollama_provider.httpx.AsyncClient") as MockClient:
MockClient.return_value.__aenter__.return_value.post = mock_post
await ollama_chat(
model="llama3:8b",
messages=[{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": "ZHVtbXk=",
},
},
{"type": "text", "text": "What is in this image?"},
],
}],
)
assert captured["messages"][0]["images"] == ["ZHVtbXk="]
assert "What is in this image?" in captured["messages"][0]["content"]