feat: implement Hook Chains runtime integration for self-healing agent mesh MVP (#711)

* feat: implement Hook Chains runtime integration for self-healing agent mesh MVP - Add Hook Chains config loader, evaluator, and dispatcher in src/utils/hookChains.ts - Wire PostToolUseFailure hook dispatch in executePostToolUseFailureHooks() - Wire TaskCompleted hook dispatch in executeTaskCompletedHooks() - Integrate fallback-agent launcher with permission preservation (canUseTool threading) - Add safety hardening for config-read errors (try-catch protection) - Update docs with MVP runtime trigger explanation - Add 10 unit tests and 4 integration tests covering config, rules, guards, and actions This completes the self-healing agent mesh MVP by enabling declarative rule-based responses to tool failures and task completions, with fallback agent spawning, team notification, and capacity warming actions. * Update docs/hook-chains.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/utils/hookChains.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix: address PR #711 review blockers for Hook Chains - Gate hook-chain dispatch behind feature('HOOK_CHAINS') and default env gate to off - Remove committed local artifact (agent.log) and ignore it in .gitignore - Revert hook dispatcher signature threading changes for canUseTool - Use ToolUseContext metadata hookChainsCanUseTool for fallback launch permissions - Make spawn_fallback_agent fail explicitly when launcher context is unavailable - Add config cache max age and guard map size limits to bound runtime memory - Update docs and tests for default-off gating and explicit fallback failure --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-04-22 13:40:23 +02:00
parent 5b9cd21e37
commit 44a2c30d5f
9 changed files with 2905 additions and 22 deletions
--- a/src/utils/hooks.ts
+++ b/src/utils/hooks.ts
@@ -10,6 +10,7 @@ import { wrapSpawn } from './ShellCommand.js'
 import { TaskOutput } from './task/TaskOutput.js'
 import { getCwd } from './cwd.js'
 import { randomUUID } from 'crypto'
+import { feature } from 'bun:bundle'
 import { formatShellPrefixCommand } from './bash/shellPrefix.js'
 import {
  getHookEnvFilePath,
@@ -134,6 +135,7 @@ import { registerPendingAsyncHook } from './hooks/AsyncHookRegistry.js'
 import { enqueuePendingNotification } from './messageQueueManager.js'
 import {
  extractTextContent,
+  createAssistantMessage,
  getLastAssistantMessage,
  wrapInSystemReminder,
 } from './messages.js'
@@ -145,6 +147,7 @@ import {
 import { createAttachmentMessage } from './attachments.js'
 import { all } from './generators.js'
 import { findToolByName, type Tools, type ToolUseContext } from '../Tool.js'
+import type { CanUseToolFn } from '../hooks/useCanUseTool.js'
 import { execPromptHook } from './hooks/execPromptHook.js'
 import type { Message, AssistantMessage } from '../types/message.js'
 import { execAgentHook } from './hooks/execAgentHook.js'
@@ -162,9 +165,147 @@ import type { AppState } from '../state/AppState.js'
 import { jsonStringify, jsonParse } from './slowOperations.js'
 import { isEnvTruthy } from './envUtils.js'
 import { errorMessage, getErrnoCode } from './errors.js'
+import { getAgentName, getTeamName, getTeammateColor } from './teammate.js'
+import type {
+  HookChainOutcome,
+  HookChainRuntimeContext,
+  SpawnFallbackAgentRequest,
+  SpawnFallbackAgentResponse,
+} from './hookChains.js'

 const TOOL_HOOK_EXECUTION_TIMEOUT_MS = 10 * 60 * 1000

+function normalizeFallbackAgentModel(
+  model: string | undefined,
+): 'sonnet' | 'opus' | 'haiku' | undefined {
+  if (model === 'sonnet' || model === 'opus' || model === 'haiku') {
+    return model
+  }
+  return undefined
+}
+
+async function launchFallbackAgentFromHookChains(
+  request: SpawnFallbackAgentRequest,
+  toolUseContext: ToolUseContext,
+  canUseTool: CanUseToolFn,
+): Promise<SpawnFallbackAgentResponse> {
+  try {
+    const { AgentTool } = await import('../tools/AgentTool/AgentTool.js')
+    const normalizedModel = normalizeFallbackAgentModel(request.model)
+    const result = await AgentTool.call(
+      {
+        prompt: request.prompt,
+        description: request.description,
+        run_in_background: true,
+        ...(request.agentType ? { subagent_type: request.agentType } : {}),
+        ...(normalizedModel ? { model: normalizedModel } : {}),
+      },
+      toolUseContext,
+      canUseTool,
+      createAssistantMessage({ content: [] }),
+    )
+
+    const data = result.data as
+      | {
+          status?: string
+          agentId?: string
+          agent_id?: string
+        }
+      | undefined
+    const status = data?.status
+
+    if (
+      status === 'async_launched' ||
+      status === 'completed' ||
+      status === 'remote_launched' ||
+      status === 'teammate_spawned'
+    ) {
+      return {
+        launched: true,
+        agentId: data?.agentId ?? data?.agent_id,
+      }
+    }
+
+    return {
+      launched: true,
+      reason:
+        status !== undefined
+          ? `Fallback launched with status ${status}`
+          : undefined,
+    }
+  } catch (error) {
+    return {
+      launched: false,
+      reason: `Fallback launch failed: ${errorMessage(error)}`,
+    }
+  }
+}
+
+async function dispatchHookChainFromHookRuntime(args: {
+  eventName: 'PostToolUseFailure' | 'TaskCompleted'
+  outcome: HookChainOutcome
+  payload: Record<string, unknown>
+  signal?: AbortSignal
+  toolUseContext?: ToolUseContext
+}): Promise<void> {
+  try {
+    if (!feature('HOOK_CHAINS')) {
+      return
+    }
+
+    const { dispatchHookChainsForEvent } = await import('./hookChains.js')
+
+    const runtime: HookChainRuntimeContext = {
+      signal: args.signal,
+      senderName: getAgentName() ?? undefined,
+      senderColor: getTeammateColor() ?? undefined,
+      teamName: getTeamName() ?? undefined,
+    }
+
+    const chainDepth = args.toolUseContext?.queryTracking?.depth
+    if (typeof chainDepth === 'number' && Number.isFinite(chainDepth)) {
+      runtime.chainDepth = chainDepth
+    }
+
+    const hookChainsCanUseTool = (
+      args.toolUseContext as
+        | (ToolUseContext & { hookChainsCanUseTool?: CanUseToolFn })
+        | undefined
+    )?.hookChainsCanUseTool
+
+    if (args.toolUseContext) {
+      runtime.onSpawnFallbackAgent = request => {
+        if (!hookChainsCanUseTool) {
+          return Promise.resolve({
+            launched: false,
+            reason:
+              'Fallback action requires canUseTool in this hook runtime context',
+          })
+        }
+
+        return launchFallbackAgentFromHookChains(
+          request,
+          args.toolUseContext!,
+          hookChainsCanUseTool,
+        )
+      }
+    }
+
+    await dispatchHookChainsForEvent({
+      event: {
+        eventName: args.eventName,
+        outcome: args.outcome,
+        payload: args.payload,
+      },
+      runtime,
+    })
+  } catch (error) {
+    logForDebugging(
+      `[hook-chains] Dispatch failed for ${args.eventName}: ${errorMessage(error)}`,
+    )
+  }
+}
+
 /**
 * SessionEnd hooks run during shutdown/clear and need a much tighter bound
 * than TOOL_HOOK_EXECUTION_TIMEOUT_MS. This value is used by callers as both
@@ -3502,9 +3643,11 @@ export async function* executePostToolUseFailureHooks<ToolInput>(
 ): AsyncGenerator<AggregatedHookResult> {
  const appState = toolUseContext.getAppState()
  const sessionId = toolUseContext.agentId ?? getSessionId()
-  if (!hasHookForEvent('PostToolUseFailure', appState, sessionId)) {
-    return
-  }
+  const hasPostToolFailureHooks = hasHookForEvent(
+    'PostToolUseFailure',
+    appState,
+    sessionId,
+  )

  const hookInput: PostToolUseFailureHookInput = {
    ...createBaseHookInput(permissionMode, undefined, toolUseContext),
@@ -3516,12 +3659,33 @@ export async function* executePostToolUseFailureHooks<ToolInput>(
    is_interrupt: isInterrupt,
  }

-  yield* executeHooks({
-    hookInput,
-    toolUseID,
-    matchQuery: toolName,
+  let blockingHookCount = 0
+
+  if (hasPostToolFailureHooks) {
+    for await (const result of executeHooks({
+      hookInput,
+      toolUseID,
+      matchQuery: toolName,
+      signal,
+      timeoutMs,
+      toolUseContext,
+    })) {
+      if (result.blockingError) {
+        blockingHookCount++
+      }
+      yield result
+    }
+  }
+
+  await dispatchHookChainFromHookRuntime({
+    eventName: 'PostToolUseFailure',
+    outcome: 'failed',
+    payload: {
+      ...hookInput,
+      hook_blocking_error_count: blockingHookCount,
+      hook_execution_skipped: !hasPostToolFailureHooks,
+    },
    signal,
-    timeoutMs,
    toolUseContext,
  })
 }
@@ -3807,12 +3971,36 @@ export async function* executeTaskCompletedHooks(
    team_name: teamName,
  }

-  yield* executeHooks({
+  let blockingHookCount = 0
+  let preventedContinuation = false
+
+  for await (const result of executeHooks({
    hookInput,
    toolUseID: randomUUID(),
    signal,
    timeoutMs,
    toolUseContext,
+  })) {
+    if (result.blockingError) {
+      blockingHookCount++
+    }
+    if (result.preventContinuation) {
+      preventedContinuation = true
+    }
+    yield result
+  }
+
+  await dispatchHookChainFromHookRuntime({
+    eventName: 'TaskCompleted',
+    outcome:
+      blockingHookCount > 0 || preventedContinuation ? 'failed' : 'success',
+    payload: {
+      ...hookInput,
+      hook_blocking_error_count: blockingHookCount,
+      hook_prevented_continuation: preventedContinuation,
+    },
+    signal,
+    toolUseContext,
  })
 }