feat: implement Hook Chains runtime integration for self-healing agent mesh MVP (#711)

* feat: implement Hook Chains runtime integration for self-healing agent mesh MVP

- Add Hook Chains config loader, evaluator, and dispatcher in src/utils/hookChains.ts
- Wire PostToolUseFailure hook dispatch in executePostToolUseFailureHooks()
- Wire TaskCompleted hook dispatch in executeTaskCompletedHooks()
- Integrate fallback-agent launcher with permission preservation (canUseTool threading)
- Add safety hardening for config-read errors (try-catch protection)
- Update docs with MVP runtime trigger explanation
- Add 10 unit tests and 4 integration tests covering config, rules, guards, and actions

This completes the self-healing agent mesh MVP by enabling declarative rule-based
responses to tool failures and task completions, with fallback agent spawning,
team notification, and capacity warming actions.

* Update docs/hook-chains.md

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update src/utils/hookChains.ts

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* fix: address PR #711 review blockers for Hook Chains

- Gate hook-chain dispatch behind feature('HOOK_CHAINS') and default env gate to off
- Remove committed local artifact (agent.log) and ignore it in .gitignore
- Revert hook dispatcher signature threading changes for canUseTool
- Use ToolUseContext metadata hookChainsCanUseTool for fallback launch permissions
- Make spawn_fallback_agent fail explicitly when launcher context is unavailable
- Add config cache max age and guard map size limits to bound runtime memory
- Update docs and tests for default-off gating and explicit fallback failure

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Urvish L.
2026-04-22 13:40:23 +02:00
committed by GitHub
parent 5b9cd21e37
commit 44a2c30d5f
9 changed files with 2905 additions and 22 deletions

View File

@@ -10,6 +10,7 @@ import { wrapSpawn } from './ShellCommand.js'
import { TaskOutput } from './task/TaskOutput.js'
import { getCwd } from './cwd.js'
import { randomUUID } from 'crypto'
import { feature } from 'bun:bundle'
import { formatShellPrefixCommand } from './bash/shellPrefix.js'
import {
getHookEnvFilePath,
@@ -134,6 +135,7 @@ import { registerPendingAsyncHook } from './hooks/AsyncHookRegistry.js'
import { enqueuePendingNotification } from './messageQueueManager.js'
import {
extractTextContent,
createAssistantMessage,
getLastAssistantMessage,
wrapInSystemReminder,
} from './messages.js'
@@ -145,6 +147,7 @@ import {
import { createAttachmentMessage } from './attachments.js'
import { all } from './generators.js'
import { findToolByName, type Tools, type ToolUseContext } from '../Tool.js'
import type { CanUseToolFn } from '../hooks/useCanUseTool.js'
import { execPromptHook } from './hooks/execPromptHook.js'
import type { Message, AssistantMessage } from '../types/message.js'
import { execAgentHook } from './hooks/execAgentHook.js'
@@ -162,9 +165,147 @@ import type { AppState } from '../state/AppState.js'
import { jsonStringify, jsonParse } from './slowOperations.js'
import { isEnvTruthy } from './envUtils.js'
import { errorMessage, getErrnoCode } from './errors.js'
import { getAgentName, getTeamName, getTeammateColor } from './teammate.js'
import type {
HookChainOutcome,
HookChainRuntimeContext,
SpawnFallbackAgentRequest,
SpawnFallbackAgentResponse,
} from './hookChains.js'
const TOOL_HOOK_EXECUTION_TIMEOUT_MS = 10 * 60 * 1000
function normalizeFallbackAgentModel(
model: string | undefined,
): 'sonnet' | 'opus' | 'haiku' | undefined {
if (model === 'sonnet' || model === 'opus' || model === 'haiku') {
return model
}
return undefined
}
async function launchFallbackAgentFromHookChains(
request: SpawnFallbackAgentRequest,
toolUseContext: ToolUseContext,
canUseTool: CanUseToolFn,
): Promise<SpawnFallbackAgentResponse> {
try {
const { AgentTool } = await import('../tools/AgentTool/AgentTool.js')
const normalizedModel = normalizeFallbackAgentModel(request.model)
const result = await AgentTool.call(
{
prompt: request.prompt,
description: request.description,
run_in_background: true,
...(request.agentType ? { subagent_type: request.agentType } : {}),
...(normalizedModel ? { model: normalizedModel } : {}),
},
toolUseContext,
canUseTool,
createAssistantMessage({ content: [] }),
)
const data = result.data as
| {
status?: string
agentId?: string
agent_id?: string
}
| undefined
const status = data?.status
if (
status === 'async_launched' ||
status === 'completed' ||
status === 'remote_launched' ||
status === 'teammate_spawned'
) {
return {
launched: true,
agentId: data?.agentId ?? data?.agent_id,
}
}
return {
launched: true,
reason:
status !== undefined
? `Fallback launched with status ${status}`
: undefined,
}
} catch (error) {
return {
launched: false,
reason: `Fallback launch failed: ${errorMessage(error)}`,
}
}
}
async function dispatchHookChainFromHookRuntime(args: {
eventName: 'PostToolUseFailure' | 'TaskCompleted'
outcome: HookChainOutcome
payload: Record<string, unknown>
signal?: AbortSignal
toolUseContext?: ToolUseContext
}): Promise<void> {
try {
if (!feature('HOOK_CHAINS')) {
return
}
const { dispatchHookChainsForEvent } = await import('./hookChains.js')
const runtime: HookChainRuntimeContext = {
signal: args.signal,
senderName: getAgentName() ?? undefined,
senderColor: getTeammateColor() ?? undefined,
teamName: getTeamName() ?? undefined,
}
const chainDepth = args.toolUseContext?.queryTracking?.depth
if (typeof chainDepth === 'number' && Number.isFinite(chainDepth)) {
runtime.chainDepth = chainDepth
}
const hookChainsCanUseTool = (
args.toolUseContext as
| (ToolUseContext & { hookChainsCanUseTool?: CanUseToolFn })
| undefined
)?.hookChainsCanUseTool
if (args.toolUseContext) {
runtime.onSpawnFallbackAgent = request => {
if (!hookChainsCanUseTool) {
return Promise.resolve({
launched: false,
reason:
'Fallback action requires canUseTool in this hook runtime context',
})
}
return launchFallbackAgentFromHookChains(
request,
args.toolUseContext!,
hookChainsCanUseTool,
)
}
}
await dispatchHookChainsForEvent({
event: {
eventName: args.eventName,
outcome: args.outcome,
payload: args.payload,
},
runtime,
})
} catch (error) {
logForDebugging(
`[hook-chains] Dispatch failed for ${args.eventName}: ${errorMessage(error)}`,
)
}
}
/**
* SessionEnd hooks run during shutdown/clear and need a much tighter bound
* than TOOL_HOOK_EXECUTION_TIMEOUT_MS. This value is used by callers as both
@@ -3502,9 +3643,11 @@ export async function* executePostToolUseFailureHooks<ToolInput>(
): AsyncGenerator<AggregatedHookResult> {
const appState = toolUseContext.getAppState()
const sessionId = toolUseContext.agentId ?? getSessionId()
if (!hasHookForEvent('PostToolUseFailure', appState, sessionId)) {
return
}
const hasPostToolFailureHooks = hasHookForEvent(
'PostToolUseFailure',
appState,
sessionId,
)
const hookInput: PostToolUseFailureHookInput = {
...createBaseHookInput(permissionMode, undefined, toolUseContext),
@@ -3516,12 +3659,33 @@ export async function* executePostToolUseFailureHooks<ToolInput>(
is_interrupt: isInterrupt,
}
yield* executeHooks({
hookInput,
toolUseID,
matchQuery: toolName,
let blockingHookCount = 0
if (hasPostToolFailureHooks) {
for await (const result of executeHooks({
hookInput,
toolUseID,
matchQuery: toolName,
signal,
timeoutMs,
toolUseContext,
})) {
if (result.blockingError) {
blockingHookCount++
}
yield result
}
}
await dispatchHookChainFromHookRuntime({
eventName: 'PostToolUseFailure',
outcome: 'failed',
payload: {
...hookInput,
hook_blocking_error_count: blockingHookCount,
hook_execution_skipped: !hasPostToolFailureHooks,
},
signal,
timeoutMs,
toolUseContext,
})
}
@@ -3807,12 +3971,36 @@ export async function* executeTaskCompletedHooks(
team_name: teamName,
}
yield* executeHooks({
let blockingHookCount = 0
let preventedContinuation = false
for await (const result of executeHooks({
hookInput,
toolUseID: randomUUID(),
signal,
timeoutMs,
toolUseContext,
})) {
if (result.blockingError) {
blockingHookCount++
}
if (result.preventContinuation) {
preventedContinuation = true
}
yield result
}
await dispatchHookChainFromHookRuntime({
eventName: 'TaskCompleted',
outcome:
blockingHookCount > 0 || preventedContinuation ? 'failed' : 'success',
payload: {
...hookInput,
hook_blocking_error_count: blockingHookCount,
hook_prevented_continuation: preventedContinuation,
},
signal,
toolUseContext,
})
}