fix: restore image paste and image tool-result handling (#308)

This commit is contained in:
KRATOS
2026-04-04 11:40:26 +05:30
committed by GitHub
parent 365bd3102d
commit c52245fc0a
7 changed files with 228 additions and 11 deletions

View File

@@ -0,0 +1,22 @@
import { expect, test } from 'bun:test'
import { supportsClipboardImageFallback } from './usePasteHandler.ts'
test('supports clipboard image fallback on Windows', () => {
expect(supportsClipboardImageFallback('windows')).toBe(true)
})
test('supports clipboard image fallback on macOS', () => {
expect(supportsClipboardImageFallback('macos')).toBe(true)
})
test('supports clipboard image fallback on Linux', () => {
expect(supportsClipboardImageFallback('linux')).toBe(true)
})
test('does not support clipboard image fallback on WSL', () => {
expect(supportsClipboardImageFallback('wsl')).toBe(false)
})
test('does not support clipboard image fallback on unknown platforms', () => {
expect(supportsClipboardImageFallback('unknown')).toBe(false)
})

View File

@@ -15,6 +15,14 @@ import { getPlatform } from '../utils/platform.js'
const CLIPBOARD_CHECK_DEBOUNCE_MS = 50
const PASTE_COMPLETION_TIMEOUT_MS = 100
export function supportsClipboardImageFallback(
platform: ReturnType<typeof getPlatform>,
): boolean {
return (
platform === 'macos' || platform === 'windows' || platform === 'linux'
)
}
type PasteHandlerProps = {
onPaste?: (text: string) => void
onInput: (input: string, key: Key) => void
@@ -52,7 +60,9 @@ export function usePasteHandler({
// that key is Enter, it submits the old input and the paste is lost.
const pastePendingRef = React.useRef(false)
const isMacOS = React.useMemo(() => getPlatform() === 'macos', [])
const platform = React.useMemo(() => getPlatform(), [])
const isMacOS = platform === 'macos'
const canFallbackToClipboardImage = supportsClipboardImageFallback(platform)
React.useEffect(() => {
return () => {
@@ -178,7 +188,11 @@ export function usePasteHandler({
// If paste is empty (common when trying to paste images with Cmd+V),
// check if clipboard has an image (macOS only)
if (isMacOS && onImagePaste && pastedText.length === 0) {
if (
canFallbackToClipboardImage &&
onImagePaste &&
pastedText.length === 0
) {
checkClipboardForImage()
return { chunks: [], timeoutId: null }
}
@@ -202,7 +216,13 @@ export function usePasteHandler({
pastePendingRef,
)
},
[checkClipboardForImage, isMacOS, onImagePaste, onPaste],
[
checkClipboardForImage,
canFallbackToClipboardImage,
isMacOS,
onImagePaste,
onPaste,
],
)
// Paste detection is now done via the InputEvent's keypress.isPasted flag,
@@ -242,7 +262,12 @@ export function usePasteHandler({
// When the user pastes an image with Cmd+V, the terminal sends an empty
// bracketed paste sequence. The keypress parser emits this as isPasted=true
// with empty input.
if (isFromPaste && input.length === 0 && isMacOS && onImagePaste) {
if (
isFromPaste &&
input.length === 0 &&
canFallbackToClipboardImage &&
onImagePaste
) {
checkClipboardForImage()
// Reset isPasting since there's no text content to process
setIsPasting(false)

View File

@@ -226,6 +226,88 @@ test('preserves Gemini tool call extra_content in follow-up requests', async ()
})
})
test('preserves image tool results as placeholders in follow-up requests', async () => {
let requestBody: Record<string, unknown> | undefined
globalThis.fetch = (async (_input, init) => {
requestBody = JSON.parse(String(init?.body))
return new Response(
JSON.stringify({
id: 'chatcmpl-1',
model: 'qwen/qwen3.6-plus',
choices: [
{
message: {
role: 'assistant',
content: 'done',
},
finish_reason: 'stop',
},
],
usage: {
prompt_tokens: 12,
completion_tokens: 4,
total_tokens: 16,
},
}),
{
headers: {
'Content-Type': 'application/json',
},
},
)
}) as FetchType
const client = createOpenAIShimClient({}) as OpenAIShimClient
await client.beta.messages.create({
model: 'qwen/qwen3.6-plus',
system: 'test system',
messages: [
{ role: 'user', content: 'Read this screenshot' },
{
role: 'assistant',
content: [
{
type: 'tool_use',
id: 'call_image_1',
name: 'Read',
input: { file_path: 'C:\\temp\\screenshot.png' },
},
],
},
{
role: 'user',
content: [
{
type: 'tool_result',
tool_use_id: 'call_image_1',
content: [
{
type: 'image',
source: {
type: 'base64',
media_type: 'image/png',
data: 'ZmFrZQ==',
},
},
],
},
],
},
],
max_tokens: 64,
stream: false,
})
const toolMessage = (requestBody?.messages as Array<Record<string, unknown>>).find(
message => message.role === 'tool',
) as { content?: string } | undefined
expect(toolMessage?.content).toContain('[image:image/png]')
})
test('preserves Gemini tool call extra_content from streaming chunks', async () => {
globalThis.fetch = (async (_input, _init) => {
const chunks = makeStreamChunks([

View File

@@ -113,6 +113,37 @@ function convertSystemPrompt(
return String(system)
}
function convertToolResultContent(content: unknown): string {
if (typeof content === 'string') return content
if (!Array.isArray(content)) return JSON.stringify(content ?? '')
const chunks: string[] = []
for (const block of content) {
if (block?.type === 'text' && typeof block.text === 'string') {
chunks.push(block.text)
continue
}
if (block?.type === 'image') {
const source = block.source
if (source?.type === 'url' && source.url) {
chunks.push(`[Image](${source.url})`)
} else if (source?.type === 'base64') {
chunks.push(`[image:${source.media_type ?? 'unknown'}]`)
} else {
chunks.push('[image]')
}
continue
}
if (typeof block?.text === 'string') {
chunks.push(block.text)
}
}
return chunks.join('\n')
}
function convertContentBlocks(
content: unknown,
): string | Array<{ type: string; text?: string; image_url?: { url: string } }> {
@@ -189,11 +220,7 @@ function convertMessages(
// Emit tool results as tool messages
for (const tr of toolResults) {
const trContent = Array.isArray(tr.content)
? tr.content.map((c: { text?: string }) => c.text ?? '').join('\n')
: typeof tr.content === 'string'
? tr.content
: JSON.stringify(tr.content ?? '')
const trContent = convertToolResultContent(tr.content)
result.push({
role: 'tool',
tool_call_id: tr.tool_use_id ?? 'unknown',