diff --git a/ollama_provider.py b/ollama_provider.py index de19265d..73e5965d 100644 --- a/ollama_provider.py +++ b/ollama_provider.py @@ -49,6 +49,18 @@ def normalize_ollama_model(model_name: str) -> str: return model_name +def _extract_ollama_image_data(block: dict) -> str | None: + source = block.get("source") + if not isinstance(source, dict): + return None + if source.get("type") != "base64": + return None + data = source.get("data") + if isinstance(data, str) and data: + return data + return None + + def anthropic_to_ollama_messages(messages: list[dict]) -> list[dict]: ollama_messages = [] for msg in messages: @@ -58,15 +70,23 @@ def anthropic_to_ollama_messages(messages: list[dict]) -> list[dict]: ollama_messages.append({"role": role, "content": content}) elif isinstance(content, list): text_parts = [] + image_parts = [] for block in content: if isinstance(block, dict): if block.get("type") == "text": text_parts.append(block.get("text", "")) elif block.get("type") == "image": - text_parts.append("[image]") + image_data = _extract_ollama_image_data(block) + if image_data: + image_parts.append(image_data) + else: + text_parts.append("[image]") elif isinstance(block, str): text_parts.append(block) - ollama_messages.append({"role": role, "content": "\n".join(text_parts)}) + ollama_message = {"role": role, "content": "\n".join(text_parts)} + if image_parts: + ollama_message["images"] = image_parts + ollama_messages.append(ollama_message) return ollama_messages diff --git a/test_ollama_provider.py b/test_ollama_provider.py index 8028e761..fb57de13 100644 --- a/test_ollama_provider.py +++ b/test_ollama_provider.py @@ -38,6 +38,26 @@ def test_converts_image_block_to_placeholder(): assert "[image]" in result[0]["content"] assert "Describe this" in result[0]["content"] + +def test_converts_base64_image_block_to_ollama_images(): + messages = [{ + "role": "user", + "content": [ + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": "YWJjMTIz", + }, + }, + {"type": "text", "text": "Describe this"}, + ], + }] + result = anthropic_to_ollama_messages(messages) + assert result[0]["images"] == ["YWJjMTIz"] + assert "Describe this" in result[0]["content"] + def test_converts_multi_turn(): messages = [ {"role": "user", "content": "Hi"}, @@ -118,3 +138,43 @@ async def test_ollama_chat_prepends_system(): ) assert captured["messages"][0]["role"] == "system" assert "helpful" in captured["messages"][0]["content"] + + +@pytest.mark.asyncio +async def test_ollama_chat_includes_base64_images_in_payload(): + captured = {} + + async def mock_post(url, json=None, **kwargs): + captured.update(json or {}) + m = MagicMock() + m.raise_for_status = MagicMock() + m.json.return_value = { + "message": {"content": "ok"}, + "created_at": "", + "prompt_eval_count": 1, + "eval_count": 1, + } + return m + + with patch("ollama_provider.httpx.AsyncClient") as MockClient: + MockClient.return_value.__aenter__.return_value.post = mock_post + await ollama_chat( + model="llama3:8b", + messages=[{ + "role": "user", + "content": [ + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": "ZHVtbXk=", + }, + }, + {"type": "text", "text": "What is in this image?"}, + ], + }], + ) + + assert captured["messages"][0]["images"] == ["ZHVtbXk="] + assert "What is in this image?" in captured["messages"][0]["content"]