Compare commits

..

5 Commits

Author SHA1 Message Date
Juan Camilo
98f38d8bfc test: trim extra blank lines in conversation recovery test
Keep the focused provider-resume test diff clean so the regression branch stays easy to review.

Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev>
2026-04-07 15:27:39 +02:00
Juan Camilo
279cd1a7e1 test: move provider-sensitive resume coverage behind module mocks 2026-04-07 15:02:41 +02:00
Juan Camilo
5c13223aa4 test: isolate provider env in conversation recovery tests 2026-04-07 15:02:41 +02:00
Juan Camilo
2c8842f87c test: align resume stripping expectation with orphan-thinking filter 2026-04-07 15:02:41 +02:00
Juan Camilo
858f06d964 fix: strip Anthropic-specific params from 3P provider paths
Three silent failure modes affecting all third-party provider users:

1. Thinking blocks serialized as <thinking> text corrupt multi-turn
   context — strip them instead of converting to raw text tags.

2. Unknown models fall through to 200k context window default, so
   auto-compact never triggers — use conservative 8k for unknown
   3P models with a warning log.

3. Session resume with thinking blocks causes 400 or context corruption
   on 3P providers — strip thinking/redacted_thinking content blocks
   from deserialized messages when resuming against a non-Anthropic
   provider.

Addresses findings 2, 3, and 5 from #248.
2026-04-07 15:02:13 +02:00
13 changed files with 34 additions and 278 deletions

View File

@@ -68,11 +68,11 @@ When a user describes what they want an agent to do, you will:
assistant: "Now let me use the test-runner agent to run the tests"
</example>
- <example>
Context: User is creating an agent for Claude Code product questions.
user: "How do I configure Claude Code hooks?"
assistant: "I'm going to use the ${AGENT_TOOL_NAME} tool to launch the claude-code-guide agent to answer the question"
Context: User is creating an agent to respond to the word "hello" with a friendly jok.
user: "Hello"
assistant: "I'm going to use the ${AGENT_TOOL_NAME} tool to launch the greeting-responder agent to respond with a friendly joke"
<commentary>
Since the user is asking how to use Claude Code, use the claude-code-guide agent.
Since the user is greeting, use the greeting-responder agent to respond with a friendly joke.
</commentary>
</example>
- If the user mentioned or implied that the agent should be used proactively, you should include examples of this.

View File

@@ -238,7 +238,6 @@ import { usePromptsFromClaudeInChrome } from 'src/hooks/usePromptsFromClaudeInCh
import { getTipToShowOnSpinner, recordShownTip } from 'src/services/tips/tipScheduler.js';
import type { Theme } from 'src/utils/theme.js';
import { isPromptTypingSuppressionActive } from './replInputSuppression.js';
import { shouldRunStartupChecks } from './replStartupGates.js';
import { checkAndDisableBypassPermissionsIfNeeded, checkAndDisableAutoModeIfNeeded, useKickOffCheckAndDisableBypassPermissionsIfNeeded, useKickOffCheckAndDisableAutoModeIfNeeded } from 'src/utils/permissions/bypassPermissionsKillswitch.js';
import { SandboxManager } from 'src/utils/sandbox/sandbox-adapter.js';
import { SANDBOX_NETWORK_ACCESS_TOOL_NAME } from 'src/cli/structuredIO.js';
@@ -793,8 +792,10 @@ export function REPL({
// accepts, and only then is the REPL component mounted and this effect runs.
// This ensures that plugin installations from repository and user settings only
// happen after explicit user consent to trust the current working directory.
// Deferring startup checks is handled below (after promptTypingSuppressionActive
// is declared) to avoid temporal dead zone issues.
useEffect(() => {
if (isRemoteSession) return;
void performStartupChecks(setAppState);
}, [setAppState, isRemoteSession]);
// Allow Claude in Chrome MCP to send prompts through MCP notifications
// and sync permission mode changes to the Chrome extension
@@ -1428,25 +1429,6 @@ export function REPL({
const activeRemote = sshRemote.isRemoteMode ? sshRemote : directConnect.isRemoteMode ? directConnect : remoteSession;
const [pastedContents, setPastedContents] = useState<Record<number, PastedContent>>({});
const [submitCount, setSubmitCount] = useState(0);
// Defer startup checks until the user has submitted their first message.
// A timeout or grace period is insufficient (issue #363): if the user pauses
// before typing, startup checks can still fire and recommendation dialogs
// steal focus. Only the user's first submission guarantees the prompt was
// the first thing they interacted with.
const startupChecksStartedRef = React.useRef(false);
const hasHadFirstSubmission = (submitCount ?? 0) > 0;
useEffect(() => {
if (isRemoteSession) return;
if (startupChecksStartedRef.current) return;
if (!shouldRunStartupChecks({
isRemoteSession,
hasStarted: startupChecksStartedRef.current,
hasHadFirstSubmission,
})) return;
startupChecksStartedRef.current = true;
void performStartupChecks(setAppState);
}, [setAppState, isRemoteSession, hasHadFirstSubmission]);
// Ref instead of state to avoid triggering React re-renders on every
// streaming text_delta. The spinner reads this via its animation timer.
const responseLengthRef = useRef(0);
@@ -2079,14 +2061,13 @@ export function REPL({
if (allowDialogsWithAnimation && showRemoteCallout) return 'remote-callout';
// LSP plugin recommendation (lowest priority - non-blocking suggestion)
// Suppress during startup window to prevent stealing focus from the prompt (issue #363)
if (allowDialogsWithAnimation && lspRecommendation && startupChecksStartedRef.current) return 'lsp-recommendation';
if (allowDialogsWithAnimation && lspRecommendation) return 'lsp-recommendation';
// Plugin hint from CLI/SDK stderr (same priority band as LSP rec)
if (allowDialogsWithAnimation && hintRecommendation && startupChecksStartedRef.current) return 'plugin-hint';
if (allowDialogsWithAnimation && hintRecommendation) return 'plugin-hint';
// Desktop app upsell (max 3 launches, lowest priority)
if (allowDialogsWithAnimation && showDesktopUpsellStartup && startupChecksStartedRef.current) return 'desktop-upsell';
if (allowDialogsWithAnimation && showDesktopUpsellStartup) return 'desktop-upsell';
return undefined;
}
const focusedInputDialog = getFocusedInputDialog();

View File

@@ -1,53 +0,0 @@
import { describe, expect, test } from 'bun:test'
import { shouldRunStartupChecks } from './replStartupGates.js'
describe('shouldRunStartupChecks', () => {
test('runs checks after first message submission', () => {
expect(shouldRunStartupChecks({
isRemoteSession: false,
hasStarted: false,
hasHadFirstSubmission: true,
})).toBe(true)
})
test('skips checks in remote sessions even after submission', () => {
expect(shouldRunStartupChecks({
isRemoteSession: true,
hasStarted: false,
hasHadFirstSubmission: true,
})).toBe(false)
})
test('skips checks if already started', () => {
expect(shouldRunStartupChecks({
isRemoteSession: false,
hasStarted: true,
hasHadFirstSubmission: true,
})).toBe(false)
})
test('does not run checks before first submission', () => {
expect(shouldRunStartupChecks({
isRemoteSession: false,
hasStarted: false,
hasHadFirstSubmission: false,
})).toBe(false)
})
test('does not run checks when idle before first submission', () => {
expect(shouldRunStartupChecks({
isRemoteSession: false,
hasStarted: false,
hasHadFirstSubmission: false,
})).toBe(false)
})
test('skips checks in remote session regardless of other conditions', () => {
expect(shouldRunStartupChecks({
isRemoteSession: true,
hasStarted: false,
hasHadFirstSubmission: false,
})).toBe(false)
})
})

View File

@@ -1,35 +0,0 @@
/**
* Startup gates for the REPL.
*
* Prevents startup plugin checks and recommendation dialogs from stealing
* focus before the user has interacted with the prompt.
*
* This addresses the root cause of issue #363: on mount, performStartupChecks
* triggers plugin loading, which populates trackedFiles, which triggers
* useLspPluginRecommendation to surface an LSP recommendation dialog. Since
* promptTypingSuppressionActive is false before the user has typed anything,
* getFocusedInputDialog() returns the dialog, unmounting PromptInput entirely.
*
* The fix gates startup checks on actual prompt interaction. A pure timeout
* or grace period is insufficient because pausing before typing would still
* allow dialogs to steal focus. Only the user's first submission guarantees
* the prompt is no longer in the vulnerable pre-interaction window.
*/
/**
* Determines whether startup checks should run.
*
* Startup checks are deferred until the user has submitted their first
* message. This guarantees the prompt was the first thing the user interacted
* with, so no recommendation dialog can steal focus before the first keystroke.
*/
export function shouldRunStartupChecks(options: {
isRemoteSession: boolean;
hasStarted: boolean;
hasHadFirstSubmission: boolean;
}): boolean {
if (options.isRemoteSession) return false;
if (options.hasStarted) return false;
if (!options.hasHadFirstSubmission) return false;
return true;
}

View File

@@ -1,33 +0,0 @@
import { describe, expect, test } from 'bun:test'
import { SkillTool } from '../../tools/SkillTool/SkillTool.js'
import {
getSchemaValidationErrorOverride,
getSchemaValidationToolUseResult,
} from './toolExecution.js'
describe('getSchemaValidationErrorOverride', () => {
test('returns actionable missing-skill error for SkillTool', () => {
expect(getSchemaValidationErrorOverride(SkillTool, {})).toBe(
'Missing skill name. Pass the slash command name as the skill parameter (e.g., skill: "commit" for /commit, skill: "review-pr" for /review-pr).',
)
})
test('does not override unrelated tool schema failures', () => {
expect(getSchemaValidationErrorOverride({ name: 'Read' } as never, {})).toBe(
null,
)
})
test('does not override SkillTool when skill is present', () => {
expect(
getSchemaValidationErrorOverride(SkillTool, { skill: 'commit' }),
).toBe(null)
})
test('uses the actionable override for structured toolUseResult too', () => {
expect(getSchemaValidationToolUseResult(SkillTool, {} as never)).toBe(
'InputValidationError: Missing skill name. Pass the slash command name as the skill parameter (e.g., skill: "commit" for /commit, skill: "review-pr" for /review-pr).',
)
})
})

View File

@@ -43,7 +43,6 @@ import { FILE_READ_TOOL_NAME } from '../../tools/FileReadTool/prompt.js'
import { FILE_WRITE_TOOL_NAME } from '../../tools/FileWriteTool/prompt.js'
import { NOTEBOOK_EDIT_TOOL_NAME } from '../../tools/NotebookEditTool/constants.js'
import { POWERSHELL_TOOL_NAME } from '../../tools/PowerShellTool/toolName.js'
import { SKILL_TOOL_NAME } from '../../tools/SkillTool/constants.js'
import { parseGitCommitId } from '../../tools/shared/gitOperationTracking.js'
import {
isDeferredTool,
@@ -597,31 +596,6 @@ export function buildSchemaNotSentHint(
)
}
export function getSchemaValidationErrorOverride(
tool: Tool,
input: unknown,
): string | null {
if (tool.name !== SKILL_TOOL_NAME || !input || typeof input !== 'object') {
return null
}
const skill = (input as { skill?: unknown }).skill
if (skill === undefined || skill === null) {
return 'Missing skill name. Pass the slash command name as the skill parameter (e.g., skill: "commit" for /commit, skill: "review-pr" for /review-pr).'
}
return null
}
export function getSchemaValidationToolUseResult(
tool: Tool,
input: unknown,
fallbackMessage?: string,
): string {
const override = getSchemaValidationErrorOverride(tool, input)
return `InputValidationError: ${override ?? fallbackMessage ?? ''}`
}
async function checkPermissionsAndCallTool(
tool: Tool,
toolUseID: string,
@@ -640,9 +614,7 @@ async function checkPermissionsAndCallTool(
// Validate input types with zod (surprisingly, the model is not great at generating valid input)
const parsedInput = tool.inputSchema.safeParse(input)
if (!parsedInput.success) {
const fallbackErrorContent = formatZodValidationError(tool.name, parsedInput.error)
let errorContent =
getSchemaValidationErrorOverride(tool, input) ?? fallbackErrorContent
let errorContent = formatZodValidationError(tool.name, parsedInput.error)
const schemaHint = buildSchemaNotSentHint(
tool,
@@ -700,11 +672,7 @@ async function checkPermissionsAndCallTool(
tool_use_id: toolUseID,
},
],
toolUseResult: getSchemaValidationToolUseResult(
tool,
input,
parsedInput.error.message,
),
toolUseResult: `InputValidationError: ${parsedInput.error.message}`,
sourceToolAssistantUUID: assistantMessage.uuid,
}),
},

View File

@@ -156,24 +156,34 @@ ${AGENT_TOOL_NAME}({
const currentExamples = `Example usage:
<example_agent_descriptions>
"claude-code-guide": use this agent when the user asks how Claude Code works or how to use its features
"statusline-setup": use this agent to configure the user's Claude Code status line setting
"test-runner": use this agent after you are done writing code to run tests
"greeting-responder": use this agent to respond to user greetings with a friendly joke
</example_agent_descriptions>
<example>
user: "How do I configure Claude Code hooks?"
user: "Please write a function that checks if a number is prime"
assistant: I'm going to use the ${FILE_WRITE_TOOL_NAME} tool to write the following code:
<code>
function isPrime(n) {
if (n <= 1) return false
for (let i = 2; i * i <= n; i++) {
if (n % i === 0) return false
}
return true
}
</code>
<commentary>
This is a Claude Code usage question, so use the claude-code-guide agent
Since a significant piece of code was written and the task was completed, now use the test-runner agent to run the tests
</commentary>
assistant: Uses the ${AGENT_TOOL_NAME} tool to launch the claude-code-guide agent
assistant: Uses the ${AGENT_TOOL_NAME} tool to launch the test-runner agent
</example>
<example>
user: "Set up my Claude Code status line"
user: "Hello"
<commentary>
This matches the statusline-setup agent, so use it to configure the setting
Since the user is greeting, use the greeting-responder agent to respond with a friendly joke
</commentary>
assistant: "I'm going to use the ${AGENT_TOOL_NAME} tool to launch the statusline-setup agent"
assistant: "I'm going to use the ${AGENT_TOOL_NAME} tool to launch the greeting-responder agent"
</example>
`

View File

@@ -1,31 +0,0 @@
import { describe, expect, test } from 'bun:test'
import { SkillTool } from './SkillTool.js'
describe('SkillTool missing parameter handling', () => {
test('missing skill stays required at the schema level', async () => {
const parsed = SkillTool.inputSchema.safeParse({})
expect(parsed.success).toBe(false)
})
test('validateInput still returns an actionable error when called with missing skill', async () => {
const result = await SkillTool.validateInput?.({} as never, {
options: { tools: [] },
messages: [],
} as never)
expect(result).toEqual({
result: false,
message:
'Missing skill name. Pass the slash command name as the skill parameter (e.g., skill: "commit" for /commit, skill: "review-pr" for /review-pr).',
errorCode: 1,
})
})
test('valid skill input still parses and validates', async () => {
const parsed = SkillTool.inputSchema.safeParse({ skill: 'commit' })
expect(parsed.success).toBe(true)
})
})

View File

@@ -352,16 +352,6 @@ export const SkillTool: Tool<InputSchema, Output, Progress> = buildTool({
toAutoClassifierInput: ({ skill }) => skill ?? '',
async validateInput({ skill }, context): Promise<ValidationResult> {
if (!skill || typeof skill !== 'string') {
return {
result: false,
message:
'Missing skill name. Pass the slash command name as the skill parameter ' +
'(e.g., skill: "commit" for /commit, skill: "review-pr" for /review-pr).',
errorCode: 1,
}
}
// Skills are just skill names, no arguments
const trimmed = skill.trim()
if (!trimmed) {
@@ -444,7 +434,7 @@ export const SkillTool: Tool<InputSchema, Output, Progress> = buildTool({
context,
): Promise<PermissionDecision> {
// Skills are just skill names, no arguments
const trimmed = skill ?? ''
const trimmed = skill.trim()
// Remove leading slash if present (for compatibility)
const commandName = trimmed.startsWith('/') ? trimmed.substring(1) : trimmed
@@ -602,7 +592,7 @@ export const SkillTool: Tool<InputSchema, Output, Progress> = buildTool({
// - Skill is a prompt-based skill
// Skills are just names, with optional arguments
const trimmed = skill ?? ''
const trimmed = skill.trim()
// Remove leading slash if present (for compatibility)
const commandName = trimmed.startsWith('/') ? trimmed.substring(1) : trimmed

View File

@@ -1,7 +1,6 @@
import { expect, test } from 'bun:test'
import { z } from 'zod/v4'
import { getEmptyToolPermissionContext, type Tool, type Tools } from '../Tool.js'
import { SkillTool } from '../tools/SkillTool/SkillTool.js'
import { toolToAPISchema } from './api.js'
test('toolToAPISchema preserves provider-specific schema keywords in input_schema', async () => {
@@ -65,16 +64,3 @@ test('toolToAPISchema preserves provider-specific schema keywords in input_schem
},
})
})
test('toolToAPISchema keeps skill required for SkillTool', async () => {
const schema = await toolToAPISchema(SkillTool, {
getToolPermissionContext: async () => getEmptyToolPermissionContext(),
tools: [] as unknown as Tools,
agents: [],
})
expect((schema as { input_schema: unknown }).input_schema).toMatchObject({
type: 'object',
required: ['skill'],
})
})

View File

@@ -94,22 +94,3 @@ test('gpt-5.4 family keeps large max output overrides within provider limits', (
expect(getMaxOutputTokensForModel('gpt-5.4-mini')).toBe(128_000)
expect(getMaxOutputTokensForModel('gpt-5.4-nano')).toBe(128_000)
})
test('MiniMax-M2.7 uses explicit provider-specific context and output caps', () => {
process.env.CLAUDE_CODE_USE_OPENAI = '1'
delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
expect(getContextWindowForModel('MiniMax-M2.7')).toBe(204_800)
expect(getModelMaxOutputTokens('MiniMax-M2.7')).toEqual({
default: 131_072,
upperLimit: 131_072,
})
expect(getMaxOutputTokensForModel('MiniMax-M2.7')).toBe(131_072)
})
test('unknown openai-compatible models still use the conservative fallback window', () => {
process.env.CLAUDE_CODE_USE_OPENAI = '1'
delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
expect(getContextWindowForModel('some-unknown-3p-model')).toBe(8_000)
})

View File

@@ -74,7 +74,7 @@ export function getContextWindowForModel(
// OpenAI-compatible provider — use known context windows for the model.
// Unknown models get a conservative 8k default so auto-compact triggers
// before hitting a hard context_window_exceeded error.
// before hitting a hard context_window_exceeded error (issue #248 finding 3).
const isOpenAIProvider =
isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) ||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) ||

View File

@@ -44,10 +44,6 @@ const OPENAI_CONTEXT_WINDOWS: Record<string, number> = {
'mistral-large-latest': 131_072,
'mistral-small-latest': 131_072,
// MiniMax
'MiniMax-M2.7': 204_800,
'minimax-m2.7': 204_800,
// Google (via OpenRouter)
'google/gemini-2.0-flash':1_048_576,
'google/gemini-2.5-pro': 1_048_576,
@@ -114,10 +110,6 @@ const OPENAI_MAX_OUTPUT_TOKENS: Record<string, number> = {
'mistral-large-latest': 32_768,
'mistral-small-latest': 32_768,
// MiniMax
'MiniMax-M2.7': 131_072,
'minimax-m2.7': 131_072,
// Google (via OpenRouter)
'google/gemini-2.0-flash': 8_192,
'google/gemini-2.5-pro': 65_536,