Merge pull request #206 from alamnahin/feat/ollama-image-passthrough
feat(ollama): pass Anthropic base64 image blocks to Ollama images payload
This commit is contained in:
@@ -49,6 +49,18 @@ def normalize_ollama_model(model_name: str) -> str:
|
|||||||
return model_name
|
return model_name
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_ollama_image_data(block: dict) -> str | None:
|
||||||
|
source = block.get("source")
|
||||||
|
if not isinstance(source, dict):
|
||||||
|
return None
|
||||||
|
if source.get("type") != "base64":
|
||||||
|
return None
|
||||||
|
data = source.get("data")
|
||||||
|
if isinstance(data, str) and data:
|
||||||
|
return data
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def anthropic_to_ollama_messages(messages: list[dict]) -> list[dict]:
|
def anthropic_to_ollama_messages(messages: list[dict]) -> list[dict]:
|
||||||
ollama_messages = []
|
ollama_messages = []
|
||||||
for msg in messages:
|
for msg in messages:
|
||||||
@@ -58,15 +70,23 @@ def anthropic_to_ollama_messages(messages: list[dict]) -> list[dict]:
|
|||||||
ollama_messages.append({"role": role, "content": content})
|
ollama_messages.append({"role": role, "content": content})
|
||||||
elif isinstance(content, list):
|
elif isinstance(content, list):
|
||||||
text_parts = []
|
text_parts = []
|
||||||
|
image_parts = []
|
||||||
for block in content:
|
for block in content:
|
||||||
if isinstance(block, dict):
|
if isinstance(block, dict):
|
||||||
if block.get("type") == "text":
|
if block.get("type") == "text":
|
||||||
text_parts.append(block.get("text", ""))
|
text_parts.append(block.get("text", ""))
|
||||||
elif block.get("type") == "image":
|
elif block.get("type") == "image":
|
||||||
|
image_data = _extract_ollama_image_data(block)
|
||||||
|
if image_data:
|
||||||
|
image_parts.append(image_data)
|
||||||
|
else:
|
||||||
text_parts.append("[image]")
|
text_parts.append("[image]")
|
||||||
elif isinstance(block, str):
|
elif isinstance(block, str):
|
||||||
text_parts.append(block)
|
text_parts.append(block)
|
||||||
ollama_messages.append({"role": role, "content": "\n".join(text_parts)})
|
ollama_message = {"role": role, "content": "\n".join(text_parts)}
|
||||||
|
if image_parts:
|
||||||
|
ollama_message["images"] = image_parts
|
||||||
|
ollama_messages.append(ollama_message)
|
||||||
return ollama_messages
|
return ollama_messages
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -38,6 +38,26 @@ def test_converts_image_block_to_placeholder():
|
|||||||
assert "[image]" in result[0]["content"]
|
assert "[image]" in result[0]["content"]
|
||||||
assert "Describe this" in result[0]["content"]
|
assert "Describe this" in result[0]["content"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_converts_base64_image_block_to_ollama_images():
|
||||||
|
messages = [{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"source": {
|
||||||
|
"type": "base64",
|
||||||
|
"media_type": "image/png",
|
||||||
|
"data": "YWJjMTIz",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{"type": "text", "text": "Describe this"},
|
||||||
|
],
|
||||||
|
}]
|
||||||
|
result = anthropic_to_ollama_messages(messages)
|
||||||
|
assert result[0]["images"] == ["YWJjMTIz"]
|
||||||
|
assert "Describe this" in result[0]["content"]
|
||||||
|
|
||||||
def test_converts_multi_turn():
|
def test_converts_multi_turn():
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "user", "content": "Hi"},
|
{"role": "user", "content": "Hi"},
|
||||||
@@ -118,3 +138,43 @@ async def test_ollama_chat_prepends_system():
|
|||||||
)
|
)
|
||||||
assert captured["messages"][0]["role"] == "system"
|
assert captured["messages"][0]["role"] == "system"
|
||||||
assert "helpful" in captured["messages"][0]["content"]
|
assert "helpful" in captured["messages"][0]["content"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_ollama_chat_includes_base64_images_in_payload():
|
||||||
|
captured = {}
|
||||||
|
|
||||||
|
async def mock_post(url, json=None, **kwargs):
|
||||||
|
captured.update(json or {})
|
||||||
|
m = MagicMock()
|
||||||
|
m.raise_for_status = MagicMock()
|
||||||
|
m.json.return_value = {
|
||||||
|
"message": {"content": "ok"},
|
||||||
|
"created_at": "",
|
||||||
|
"prompt_eval_count": 1,
|
||||||
|
"eval_count": 1,
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
|
||||||
|
with patch("ollama_provider.httpx.AsyncClient") as MockClient:
|
||||||
|
MockClient.return_value.__aenter__.return_value.post = mock_post
|
||||||
|
await ollama_chat(
|
||||||
|
model="llama3:8b",
|
||||||
|
messages=[{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"source": {
|
||||||
|
"type": "base64",
|
||||||
|
"media_type": "image/jpeg",
|
||||||
|
"data": "ZHVtbXk=",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{"type": "text", "text": "What is in this image?"},
|
||||||
|
],
|
||||||
|
}],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert captured["messages"][0]["images"] == ["ZHVtbXk="]
|
||||||
|
assert "What is in this image?" in captured["messages"][0]["content"]
|
||||||
|
|||||||
Reference in New Issue
Block a user