fix: restore image paste and image tool-result handling (#308)
This commit is contained in:
22
src/hooks/usePasteHandler.test.ts
Normal file
22
src/hooks/usePasteHandler.test.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
import { expect, test } from 'bun:test'
|
||||
import { supportsClipboardImageFallback } from './usePasteHandler.ts'
|
||||
|
||||
test('supports clipboard image fallback on Windows', () => {
|
||||
expect(supportsClipboardImageFallback('windows')).toBe(true)
|
||||
})
|
||||
|
||||
test('supports clipboard image fallback on macOS', () => {
|
||||
expect(supportsClipboardImageFallback('macos')).toBe(true)
|
||||
})
|
||||
|
||||
test('supports clipboard image fallback on Linux', () => {
|
||||
expect(supportsClipboardImageFallback('linux')).toBe(true)
|
||||
})
|
||||
|
||||
test('does not support clipboard image fallback on WSL', () => {
|
||||
expect(supportsClipboardImageFallback('wsl')).toBe(false)
|
||||
})
|
||||
|
||||
test('does not support clipboard image fallback on unknown platforms', () => {
|
||||
expect(supportsClipboardImageFallback('unknown')).toBe(false)
|
||||
})
|
||||
@@ -15,6 +15,14 @@ import { getPlatform } from '../utils/platform.js'
|
||||
const CLIPBOARD_CHECK_DEBOUNCE_MS = 50
|
||||
const PASTE_COMPLETION_TIMEOUT_MS = 100
|
||||
|
||||
export function supportsClipboardImageFallback(
|
||||
platform: ReturnType<typeof getPlatform>,
|
||||
): boolean {
|
||||
return (
|
||||
platform === 'macos' || platform === 'windows' || platform === 'linux'
|
||||
)
|
||||
}
|
||||
|
||||
type PasteHandlerProps = {
|
||||
onPaste?: (text: string) => void
|
||||
onInput: (input: string, key: Key) => void
|
||||
@@ -52,7 +60,9 @@ export function usePasteHandler({
|
||||
// that key is Enter, it submits the old input and the paste is lost.
|
||||
const pastePendingRef = React.useRef(false)
|
||||
|
||||
const isMacOS = React.useMemo(() => getPlatform() === 'macos', [])
|
||||
const platform = React.useMemo(() => getPlatform(), [])
|
||||
const isMacOS = platform === 'macos'
|
||||
const canFallbackToClipboardImage = supportsClipboardImageFallback(platform)
|
||||
|
||||
React.useEffect(() => {
|
||||
return () => {
|
||||
@@ -178,7 +188,11 @@ export function usePasteHandler({
|
||||
|
||||
// If paste is empty (common when trying to paste images with Cmd+V),
|
||||
// check if clipboard has an image (macOS only)
|
||||
if (isMacOS && onImagePaste && pastedText.length === 0) {
|
||||
if (
|
||||
canFallbackToClipboardImage &&
|
||||
onImagePaste &&
|
||||
pastedText.length === 0
|
||||
) {
|
||||
checkClipboardForImage()
|
||||
return { chunks: [], timeoutId: null }
|
||||
}
|
||||
@@ -202,7 +216,13 @@ export function usePasteHandler({
|
||||
pastePendingRef,
|
||||
)
|
||||
},
|
||||
[checkClipboardForImage, isMacOS, onImagePaste, onPaste],
|
||||
[
|
||||
checkClipboardForImage,
|
||||
canFallbackToClipboardImage,
|
||||
isMacOS,
|
||||
onImagePaste,
|
||||
onPaste,
|
||||
],
|
||||
)
|
||||
|
||||
// Paste detection is now done via the InputEvent's keypress.isPasted flag,
|
||||
@@ -242,7 +262,12 @@ export function usePasteHandler({
|
||||
// When the user pastes an image with Cmd+V, the terminal sends an empty
|
||||
// bracketed paste sequence. The keypress parser emits this as isPasted=true
|
||||
// with empty input.
|
||||
if (isFromPaste && input.length === 0 && isMacOS && onImagePaste) {
|
||||
if (
|
||||
isFromPaste &&
|
||||
input.length === 0 &&
|
||||
canFallbackToClipboardImage &&
|
||||
onImagePaste
|
||||
) {
|
||||
checkClipboardForImage()
|
||||
// Reset isPasting since there's no text content to process
|
||||
setIsPasting(false)
|
||||
|
||||
@@ -226,6 +226,88 @@ test('preserves Gemini tool call extra_content in follow-up requests', async ()
|
||||
})
|
||||
})
|
||||
|
||||
test('preserves image tool results as placeholders in follow-up requests', async () => {
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
requestBody = JSON.parse(String(init?.body))
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'qwen/qwen3.6-plus',
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: 'done',
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
prompt_tokens: 12,
|
||||
completion_tokens: 4,
|
||||
total_tokens: 16,
|
||||
},
|
||||
}),
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
|
||||
await client.beta.messages.create({
|
||||
model: 'qwen/qwen3.6-plus',
|
||||
system: 'test system',
|
||||
messages: [
|
||||
{ role: 'user', content: 'Read this screenshot' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_use',
|
||||
id: 'call_image_1',
|
||||
name: 'Read',
|
||||
input: { file_path: 'C:\\temp\\screenshot.png' },
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_result',
|
||||
tool_use_id: 'call_image_1',
|
||||
content: [
|
||||
{
|
||||
type: 'image',
|
||||
source: {
|
||||
type: 'base64',
|
||||
media_type: 'image/png',
|
||||
data: 'ZmFrZQ==',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
})
|
||||
|
||||
const toolMessage = (requestBody?.messages as Array<Record<string, unknown>>).find(
|
||||
message => message.role === 'tool',
|
||||
) as { content?: string } | undefined
|
||||
|
||||
expect(toolMessage?.content).toContain('[image:image/png]')
|
||||
})
|
||||
|
||||
test('preserves Gemini tool call extra_content from streaming chunks', async () => {
|
||||
globalThis.fetch = (async (_input, _init) => {
|
||||
const chunks = makeStreamChunks([
|
||||
|
||||
@@ -113,6 +113,37 @@ function convertSystemPrompt(
|
||||
return String(system)
|
||||
}
|
||||
|
||||
function convertToolResultContent(content: unknown): string {
|
||||
if (typeof content === 'string') return content
|
||||
if (!Array.isArray(content)) return JSON.stringify(content ?? '')
|
||||
|
||||
const chunks: string[] = []
|
||||
for (const block of content) {
|
||||
if (block?.type === 'text' && typeof block.text === 'string') {
|
||||
chunks.push(block.text)
|
||||
continue
|
||||
}
|
||||
|
||||
if (block?.type === 'image') {
|
||||
const source = block.source
|
||||
if (source?.type === 'url' && source.url) {
|
||||
chunks.push(`[Image](${source.url})`)
|
||||
} else if (source?.type === 'base64') {
|
||||
chunks.push(`[image:${source.media_type ?? 'unknown'}]`)
|
||||
} else {
|
||||
chunks.push('[image]')
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if (typeof block?.text === 'string') {
|
||||
chunks.push(block.text)
|
||||
}
|
||||
}
|
||||
|
||||
return chunks.join('\n')
|
||||
}
|
||||
|
||||
function convertContentBlocks(
|
||||
content: unknown,
|
||||
): string | Array<{ type: string; text?: string; image_url?: { url: string } }> {
|
||||
@@ -189,11 +220,7 @@ function convertMessages(
|
||||
|
||||
// Emit tool results as tool messages
|
||||
for (const tr of toolResults) {
|
||||
const trContent = Array.isArray(tr.content)
|
||||
? tr.content.map((c: { text?: string }) => c.text ?? '').join('\n')
|
||||
: typeof tr.content === 'string'
|
||||
? tr.content
|
||||
: JSON.stringify(tr.content ?? '')
|
||||
const trContent = convertToolResultContent(tr.content)
|
||||
result.push({
|
||||
role: 'tool',
|
||||
tool_call_id: tr.tool_use_id ?? 'unknown',
|
||||
|
||||
Reference in New Issue
Block a user