From 15e5d19f497429c266529265460dc5ecd4dfc800 Mon Sep 17 00:00:00 2001 From: gnanam1990 Date: Tue, 14 Apr 2026 18:57:46 +0530 Subject: [PATCH] feat(repo-map): extract clean repo map branch --- docs/repo-map.md | 67 +++++ package.json | 6 + src/commands.ts | 2 + src/commands/repomap/index.ts | 17 ++ src/commands/repomap/repomap.test.ts | 56 ++++ src/commands/repomap/repomap.ts | 93 ++++++ src/context.repoMap.test.ts | 64 ++++ src/context.ts | 34 +++ .../repoMap/__fixtures__/mini-repo/fileA.ts | 29 ++ .../repoMap/__fixtures__/mini-repo/fileB.ts | 23 ++ .../repoMap/__fixtures__/mini-repo/fileC.ts | 22 ++ .../repoMap/__fixtures__/mini-repo/fileD.ts | 9 + .../repoMap/__fixtures__/mini-repo/fileE.ts | 25 ++ src/context/repoMap/cache.ts | 139 +++++++++ src/context/repoMap/gitFiles.ts | 109 +++++++ src/context/repoMap/graph.ts | 88 ++++++ src/context/repoMap/index.ts | 144 +++++++++ src/context/repoMap/pagerank.ts | 57 ++++ src/context/repoMap/parser.ts | 166 +++++++++++ .../repoMap/queries/javascript-tags.scm | 92 ++++++ src/context/repoMap/queries/python-tags.scm | 16 + .../repoMap/queries/typescript-tags.scm | 45 +++ src/context/repoMap/renderer.ts | 72 +++++ src/context/repoMap/repoMap.test.ts | 275 ++++++++++++++++++ src/context/repoMap/symbolExtractor.ts | 108 +++++++ src/context/repoMap/tokenize.ts | 15 + src/context/repoMap/types.ts | 65 +++++ src/tools.ts | 2 + src/tools/RepoMapTool/RepoMapTool.test.ts | 167 +++++++++++ src/tools/RepoMapTool/RepoMapTool.ts | 176 +++++++++++ src/tools/RepoMapTool/UI.tsx | 96 ++++++ src/tools/RepoMapTool/prompt.ts | 31 ++ 32 files changed, 2310 insertions(+) create mode 100644 docs/repo-map.md create mode 100644 src/commands/repomap/index.ts create mode 100644 src/commands/repomap/repomap.test.ts create mode 100644 src/commands/repomap/repomap.ts create mode 100644 src/context.repoMap.test.ts create mode 100644 src/context/repoMap/__fixtures__/mini-repo/fileA.ts create mode 100644 src/context/repoMap/__fixtures__/mini-repo/fileB.ts create mode 100644 src/context/repoMap/__fixtures__/mini-repo/fileC.ts create mode 100644 src/context/repoMap/__fixtures__/mini-repo/fileD.ts create mode 100644 src/context/repoMap/__fixtures__/mini-repo/fileE.ts create mode 100644 src/context/repoMap/cache.ts create mode 100644 src/context/repoMap/gitFiles.ts create mode 100644 src/context/repoMap/graph.ts create mode 100644 src/context/repoMap/index.ts create mode 100644 src/context/repoMap/pagerank.ts create mode 100644 src/context/repoMap/parser.ts create mode 100644 src/context/repoMap/queries/javascript-tags.scm create mode 100644 src/context/repoMap/queries/python-tags.scm create mode 100644 src/context/repoMap/queries/typescript-tags.scm create mode 100644 src/context/repoMap/renderer.ts create mode 100644 src/context/repoMap/repoMap.test.ts create mode 100644 src/context/repoMap/symbolExtractor.ts create mode 100644 src/context/repoMap/tokenize.ts create mode 100644 src/context/repoMap/types.ts create mode 100644 src/tools/RepoMapTool/RepoMapTool.test.ts create mode 100644 src/tools/RepoMapTool/RepoMapTool.ts create mode 100644 src/tools/RepoMapTool/UI.tsx create mode 100644 src/tools/RepoMapTool/prompt.ts diff --git a/docs/repo-map.md b/docs/repo-map.md new file mode 100644 index 00000000..f6393e63 --- /dev/null +++ b/docs/repo-map.md @@ -0,0 +1,67 @@ +# Codebase Intelligence — Repo Map + +The repo map feature gives the AI model structural awareness of your codebase at the start of each session. Instead of the model needing to explore the repository with `Grep`, `Glob`, and `Read` calls, it starts with a ranked summary of the most important files and their key signatures. + +## How it works + +1. **File enumeration** — Lists all tracked files via `git ls-files` (falls back to a manual directory walk when not in a git repo) +2. **Symbol extraction** — Parses each supported source file with tree-sitter to extract function, class, type, and interface definitions, plus cross-file references +3. **Reference graph** — Builds a directed graph where an edge from file A to file B means A references a symbol defined in B. Edges are weighted by reference count multiplied by the IDF (inverse document frequency) of the symbol name — common names like `get`, `set`, `value` contribute less +4. **PageRank** — Ranks files by structural importance using PageRank. Files imported by many others rank highest +5. **Rendering** — Walks ranked files top-down, emitting file paths and definition signatures, stopping when the token budget is reached + +Results are cached to disk (`~/.openclaude/repomap-cache/`) keyed by file path, mtime, and size. Only changed files are re-parsed on subsequent runs. + +## Supported languages + +- TypeScript (`.ts`, `.tsx`) +- JavaScript (`.js`, `.jsx`, `.mjs`, `.cjs`) +- Python (`.py`) + +Additional language grammars will be added in future releases. + +## Enabling auto-injection + +The repo map is gated behind the `REPO_MAP` feature flag, **off by default**. To enable auto-injection into the session context: + +Set the environment variable before launching: + +```bash +REPO_MAP=1 openclaude +``` + +Or add it to your shell profile for persistent use. + +When enabled, the map is built once per session and prepended to the system context alongside git status and CLAUDE.md content. The default budget is 1024 tokens. + +Auto-injection is skipped in: +- Bare mode (`--bare`) +- Remote sessions (`CLAUDE_CODE_REMOTE`) + +## The /repomap slash command + +The `/repomap` command is always available regardless of the feature flag. It lets you inspect and tune the map interactively. + +``` +/repomap # Show the map with default settings (1024 tokens) +/repomap --tokens 4096 # Increase the token budget for a larger map +/repomap --focus src/tools/ # Boost specific paths in the ranking +/repomap --focus src/context.ts # Can use multiple --focus flags +/repomap --stats # Show cache statistics +/repomap --invalidate # Clear cache and rebuild from scratch +``` + +## The RepoMap tool + +The model can also call the `RepoMap` tool on demand during a session. This is useful when: +- The model needs structural context mid-conversation +- The user asks about specific areas (the model can pass `focus_files` or `focus_symbols`) +- A larger token budget is needed than the auto-injected default + +## Known limitations + +- **Signatures only** — The map shows function/class/type declarations, not implementations. The model still needs `Read` to see function bodies. +- **Cold build time** — First build on large repos (2000+ files) can take 20-30 seconds due to WASM-based parsing. Subsequent builds use the disk cache and complete in under 100ms. +- **Language coverage** — Only TypeScript, JavaScript, and Python are supported. Files in other languages are skipped. +- **TypeScript references** — The TypeScript tree-sitter query captures type annotations and `new` expressions as references, but not plain function calls. This means the ranking slightly favors type-heavy hub files. +- **Git dependency** — File enumeration uses `git ls-files` by default. Non-git repos fall back to a directory walk with hardcoded exclusions. diff --git a/package.json b/package.json index 91c79837..752afada 100644 --- a/package.json +++ b/package.json @@ -95,8 +95,12 @@ "fuse.js": "7.1.0", "get-east-asian-width": "1.5.0", "google-auth-library": "9.15.1", + "graphology": "^0.26.0", + "graphology-operators": "^1.6.0", + "graphology-pagerank": "^1.1.0", "https-proxy-agent": "7.0.6", "ignore": "7.0.5", + "js-tiktoken": "^1.0.16", "indent-string": "5.0.0", "jsonc-parser": "3.3.1", "lodash-es": "4.18.1", @@ -117,11 +121,13 @@ "strip-ansi": "7.2.0", "supports-hyperlinks": "3.2.0", "tree-kill": "1.2.2", + "tree-sitter-wasms": "^0.1.12", "turndown": "7.2.2", "type-fest": "4.41.0", "undici": "7.24.6", "usehooks-ts": "3.1.1", "vscode-languageserver-protocol": "3.17.5", + "web-tree-sitter": "^0.25.0", "wrap-ansi": "9.0.2", "ws": "8.20.0", "xss": "1.0.15", diff --git a/src/commands.ts b/src/commands.ts index 6fb8c600..266d3cc2 100644 --- a/src/commands.ts +++ b/src/commands.ts @@ -22,6 +22,7 @@ import ctx_viz from './commands/ctx_viz/index.js' import doctor from './commands/doctor/index.js' import onboardGithub from './commands/onboard-github/index.js' import memory from './commands/memory/index.js' +import repomap from './commands/repomap/index.js' import help from './commands/help/index.js' import ide from './commands/ide/index.js' import init from './commands/init.js' @@ -307,6 +308,7 @@ const COMMANDS = memoize((): Command[] => [ releaseNotes, reloadPlugins, rename, + repomap, resume, session, skills, diff --git a/src/commands/repomap/index.ts b/src/commands/repomap/index.ts new file mode 100644 index 00000000..da09fa9c --- /dev/null +++ b/src/commands/repomap/index.ts @@ -0,0 +1,17 @@ +/** + * /repomap command - minimal metadata only. + * Implementation is lazy-loaded from repomap.ts to reduce startup time. + */ +import type { Command } from '../../commands.js' + +const repomap = { + type: 'local', + name: 'repomap', + description: + 'Show or configure the repository structural map (codebase intelligence)', + isHidden: false, + supportsNonInteractive: true, + load: () => import('./repomap.js'), +} satisfies Command + +export default repomap diff --git a/src/commands/repomap/repomap.test.ts b/src/commands/repomap/repomap.test.ts new file mode 100644 index 00000000..e2be380c --- /dev/null +++ b/src/commands/repomap/repomap.test.ts @@ -0,0 +1,56 @@ +import { describe, expect, test } from 'bun:test' +import { parseArgs } from './repomap.js' + +describe('/repomap argument parsing', () => { + test('defaults to 1024 tokens with no flags', () => { + const result = parseArgs('') + expect(result.tokens).toBe(2048) + expect(result.focus).toEqual([]) + expect(result.invalidate).toBe(false) + expect(result.stats).toBe(false) + }) + + test('parses --tokens flag', () => { + const result = parseArgs('--tokens 4096') + expect(result.tokens).toBe(4096) + }) + + test('rejects --tokens below 256', () => { + const result = parseArgs('--tokens 100') + expect(result.tokens).toBe(2048) // falls back to default + }) + + test('rejects --tokens above 16384', () => { + const result = parseArgs('--tokens 20000') + expect(result.tokens).toBe(2048) // falls back to default + }) + + test('parses --focus flag', () => { + const result = parseArgs('--focus src/tools/') + expect(result.focus).toEqual(['src/tools/']) + }) + + test('parses multiple --focus flags', () => { + const result = parseArgs('--focus src/tools/ --focus src/context.ts') + expect(result.focus).toEqual(['src/tools/', 'src/context.ts']) + }) + + test('parses --invalidate flag', () => { + const result = parseArgs('--invalidate') + expect(result.invalidate).toBe(true) + expect(result.stats).toBe(false) + }) + + test('parses --stats flag', () => { + const result = parseArgs('--stats') + expect(result.stats).toBe(true) + expect(result.invalidate).toBe(false) + }) + + test('parses combined flags', () => { + const result = parseArgs('--tokens 2048 --focus src/tools/ --invalidate') + expect(result.tokens).toBe(2048) + expect(result.focus).toEqual(['src/tools/']) + expect(result.invalidate).toBe(true) + }) +}) diff --git a/src/commands/repomap/repomap.ts b/src/commands/repomap/repomap.ts new file mode 100644 index 00000000..d446eb60 --- /dev/null +++ b/src/commands/repomap/repomap.ts @@ -0,0 +1,93 @@ +import type { LocalCommandCall } from '../../types/command.js' +import { getCwd } from '../../utils/cwd.js' + +/** Parse CLI-style arguments from the command string. */ +export function parseArgs(args: string): { + tokens: number + focus: string[] + invalidate: boolean + stats: boolean +} { + const parts = args.trim().split(/\s+/).filter(Boolean) + let tokens = 2048 + const focus: string[] = [] + let invalidate = false + let stats = false + + for (let i = 0; i < parts.length; i++) { + const part = parts[i]! + if (part === '--tokens' && i + 1 < parts.length) { + const n = parseInt(parts[i + 1]!, 10) + if (!isNaN(n) && n >= 256 && n <= 16384) { + tokens = n + } + i++ + } else if (part === '--focus' && i + 1 < parts.length) { + focus.push(parts[i + 1]!) + i++ + } else if (part === '--invalidate') { + invalidate = true + } else if (part === '--stats') { + stats = true + } + } + + return { tokens, focus, invalidate, stats } +} + +export const call: LocalCommandCall = async (args) => { + const root = getCwd() + const { tokens, focus, invalidate, stats } = parseArgs(args ?? '') + + // Lazy import to avoid loading tree-sitter at startup + const { + buildRepoMap, + invalidateCache, + getCacheStats, + } = await import('../../context/repoMap/index.js') + + if (stats) { + const cacheStats = getCacheStats(root) + const lines = [ + `Repository map cache stats:`, + ` Cache directory: ${cacheStats.cacheDir}`, + ` Cache file: ${cacheStats.cacheFile ?? '(none)'}`, + ` Cached entries: ${cacheStats.entryCount}`, + ` Cache exists: ${cacheStats.exists}`, + ] + return { type: 'text', value: lines.join('\n') } + } + + if (invalidate) { + invalidateCache(root) + const result = await buildRepoMap({ + root, + maxTokens: tokens, + focusFiles: focus.length > 0 ? focus : undefined, + }) + return { + type: 'text', + value: [ + `Cache invalidated and rebuilt.`, + `Files: ${result.fileCount} ranked (${result.totalFileCount} total) | Tokens: ${result.tokenCount} | Time: ${result.buildTimeMs}ms | Cache hit: ${result.cacheHit}`, + '', + result.map, + ].join('\n'), + } + } + + const result = await buildRepoMap({ + root, + maxTokens: tokens, + focusFiles: focus.length > 0 ? focus : undefined, + }) + + return { + type: 'text', + value: [ + `Repository map: ${result.fileCount} files ranked (${result.totalFileCount} total) | Tokens: ${result.tokenCount} | Time: ${result.buildTimeMs}ms | Cache hit: ${result.cacheHit}`, + '', + result.map, + ].join('\n'), + } +} diff --git a/src/context.repoMap.test.ts b/src/context.repoMap.test.ts new file mode 100644 index 00000000..0b88e5f8 --- /dev/null +++ b/src/context.repoMap.test.ts @@ -0,0 +1,64 @@ +import { afterEach, describe, expect, test } from 'bun:test' + +afterEach(() => { + delete process.env.REPO_MAP +}) + +describe('getRepoMapContext', () => { + test('returns null when REPO_MAP env flag is off (default)', async () => { + const { getRepoMapContext } = await import('./context.js') + const result = await getRepoMapContext() + expect(result).toBeNull() + }) + + test('buildRepoMap produces valid output for context injection', async () => { + process.env.REPO_MAP = '1' + const { mkdtempSync, writeFileSync, rmSync } = await import('fs') + const { tmpdir } = await import('os') + const { join } = await import('path') + const { buildRepoMap } = await import('./context/repoMap/index.js') + + const tempDir = mkdtempSync(join(tmpdir(), 'repomap-ctx-')) + try { + writeFileSync( + join(tempDir, 'main.ts'), + 'export function main(): void { console.log("hello") }\n', + ) + writeFileSync( + join(tempDir, 'utils.ts'), + 'import { main } from "./main"\nexport function helper(): void { main() }\n', + ) + + const result = await buildRepoMap({ + root: tempDir, + maxTokens: 1024, + }) + + // Valid map that could be injected + expect(result.map.length).toBeGreaterThan(0) + expect(result.tokenCount).toBeGreaterThan(0) + expect(result.tokenCount).toBeLessThanOrEqual(1024) + expect(typeof result.cacheHit).toBe('boolean') + } finally { + rmSync(tempDir, { recursive: true, force: true }) + const { invalidateCache } = await import('./context/repoMap/index.js') + invalidateCache(tempDir) + } + }) + + test('getSystemContext does not include repoMap key when flag is off', async () => { + const { getSystemContext } = await import('./context.js') + const result = await getSystemContext() + expect('repoMap' in result).toBe(false) + }) + + test('getSystemContext includes repoMap key when REPO_MAP env flag is on', async () => { + process.env.REPO_MAP = '1' + const { getSystemContext, getRepoMapContext } = await import('./context.js') + getRepoMapContext.cache.clear?.() + getSystemContext.cache.clear?.() + const result = await getSystemContext() + expect(typeof result.repoMap).toBe('string') + expect(result.repoMap!.length).toBeGreaterThan(0) + }) +}) diff --git a/src/context.ts b/src/context.ts index 290118bc..b15a7843 100644 --- a/src/context.ts +++ b/src/context.ts @@ -31,6 +31,7 @@ export function setSystemPromptInjection(value: string | null): void { // Clear context caches immediately when injection changes getUserContext.cache.clear?.() getSystemContext.cache.clear?.() + getRepoMapContext.cache.clear?.() } export const getGitStatus = memoize(async (): Promise => { @@ -110,6 +111,35 @@ export const getGitStatus = memoize(async (): Promise => { } }) +export const getRepoMapContext = memoize( + async (): Promise => { + const runtimeEnabled = isEnvTruthy(process.env.REPO_MAP) + if (!runtimeEnabled) return null + if (isBareMode()) return null + if (isEnvTruthy(process.env.CLAUDE_CODE_REMOTE)) return null + + try { + const startTime = Date.now() + logForDiagnosticsNoPII('info', 'repo_map_started') + const { buildRepoMap } = await import('./context/repoMap/index.js') + const result = await buildRepoMap({ maxTokens: 1024 }) + logForDiagnosticsNoPII('info', 'repo_map_completed', { + duration_ms: Date.now() - startTime, + token_count: result.tokenCount, + file_count: result.fileCount, + cache_hit: result.cacheHit, + }) + if (!result.map || result.map.length === 0) return null + return `This is a structural map of the repository, ranked by importance. Use it to understand the codebase architecture.\n\n${result.map}` + } catch (err) { + logForDiagnosticsNoPII('warn', 'repo_map_failed', { + error: String(err), + }) + return null + } + }, +) + /** * This context is prepended to each conversation, and cached for the duration of the conversation. */ @@ -127,6 +157,8 @@ export const getSystemContext = memoize( ? null : await getGitStatus() + const repoMap = await getRepoMapContext() + // Include system prompt injection if set (for cache breaking, internal-only) const injection = feature('BREAK_CACHE_COMMAND') ? getSystemPromptInjection() @@ -135,11 +167,13 @@ export const getSystemContext = memoize( logForDiagnosticsNoPII('info', 'system_context_completed', { duration_ms: Date.now() - startTime, has_git_status: gitStatus !== null, + has_repo_map: repoMap !== null, has_injection: injection !== null, }) return { ...(gitStatus && { gitStatus }), + ...(repoMap && { repoMap }), ...(feature('BREAK_CACHE_COMMAND') && injection ? { cacheBreaker: `[CACHE_BREAKER: ${injection}]`, diff --git a/src/context/repoMap/__fixtures__/mini-repo/fileA.ts b/src/context/repoMap/__fixtures__/mini-repo/fileA.ts new file mode 100644 index 00000000..f0ceb962 --- /dev/null +++ b/src/context/repoMap/__fixtures__/mini-repo/fileA.ts @@ -0,0 +1,29 @@ +// fileA — imports from fileB and fileC + +import { CacheLayer, buildCache } from './fileB' +import { createStore, type StoreConfig } from './fileC' + +export class AppController { + private cache: CacheLayer + private config: StoreConfig + + constructor(config: StoreConfig) { + this.cache = buildCache() + this.config = config + } + + initialize(): void { + const store = createStore() + this.cache.cacheSet('primary', store) + } + + getFromCache(key: string): unknown { + return this.cache.cacheGet(key) + } +} + +export function startApp(config: StoreConfig): AppController { + const app = new AppController(config) + app.initialize() + return app +} diff --git a/src/context/repoMap/__fixtures__/mini-repo/fileB.ts b/src/context/repoMap/__fixtures__/mini-repo/fileB.ts new file mode 100644 index 00000000..a063c01f --- /dev/null +++ b/src/context/repoMap/__fixtures__/mini-repo/fileB.ts @@ -0,0 +1,23 @@ +// fileB — imports from fileC + +import { DataStore, createStore } from './fileC' + +export class CacheLayer { + private store: DataStore + + constructor() { + this.store = createStore() + } + + cacheGet(key: string): unknown | undefined { + return this.store.lookup(key) + } + + cacheSet(key: string, value: unknown): void { + this.store.add(key, value) + } +} + +export function buildCache(): CacheLayer { + return new CacheLayer() +} diff --git a/src/context/repoMap/__fixtures__/mini-repo/fileC.ts b/src/context/repoMap/__fixtures__/mini-repo/fileC.ts new file mode 100644 index 00000000..8433452f --- /dev/null +++ b/src/context/repoMap/__fixtures__/mini-repo/fileC.ts @@ -0,0 +1,22 @@ +// fileC — the most imported module (imported by fileA and fileB) + +export class DataStore { + private items: Map = new Map() + + add(key: string, value: unknown): void { + this.items.set(key, value) + } + + lookup(key: string): unknown | undefined { + return this.items.get(key) + } +} + +export function createStore(): DataStore { + return new DataStore() +} + +export interface StoreConfig { + maxSize: number + ttl: number +} diff --git a/src/context/repoMap/__fixtures__/mini-repo/fileD.ts b/src/context/repoMap/__fixtures__/mini-repo/fileD.ts new file mode 100644 index 00000000..4f94c5df --- /dev/null +++ b/src/context/repoMap/__fixtures__/mini-repo/fileD.ts @@ -0,0 +1,9 @@ +// fileD — imports from fileA + +import { AppController, startApp } from './fileA' + +export function runApp(): void { + const controller: AppController = startApp({ maxSize: 100, ttl: 3600 }) + const result = controller.getFromCache('test') + console.log(result) +} diff --git a/src/context/repoMap/__fixtures__/mini-repo/fileE.ts b/src/context/repoMap/__fixtures__/mini-repo/fileE.ts new file mode 100644 index 00000000..487083c0 --- /dev/null +++ b/src/context/repoMap/__fixtures__/mini-repo/fileE.ts @@ -0,0 +1,25 @@ +// fileE — isolated, no imports from other fixture files + +export interface Logger { + log(message: string): void + warn(message: string): void + error(message: string): void +} + +export class ConsoleLogger implements Logger { + log(message: string): void { + console.log(`[LOG] ${message}`) + } + + warn(message: string): void { + console.warn(`[WARN] ${message}`) + } + + error(message: string): void { + console.error(`[ERROR] ${message}`) + } +} + +export function createLogger(): Logger { + return new ConsoleLogger() +} diff --git a/src/context/repoMap/cache.ts b/src/context/repoMap/cache.ts new file mode 100644 index 00000000..72f71a39 --- /dev/null +++ b/src/context/repoMap/cache.ts @@ -0,0 +1,139 @@ +import { createHash } from 'crypto' +import { + existsSync, + mkdirSync, + readFileSync, + statSync, + writeFileSync, +} from 'fs' +import { homedir } from 'os' +import { join } from 'path' +import type { CacheData, CacheEntry, CacheStats, Tag } from './types.js' + +const CACHE_VERSION = 1 +const CACHE_DIR = join(homedir(), '.openclaude', 'repomap-cache') + +function getCacheFilePath(root: string): string { + const hash = createHash('sha1').update(root).digest('hex') + return join(CACHE_DIR, `${hash}.json`) +} + +function ensureCacheDir(): void { + if (!existsSync(CACHE_DIR)) { + mkdirSync(CACHE_DIR, { recursive: true }) + } +} + +/** Load cache from disk. Returns empty cache if not found or invalid. */ +export function loadCache(root: string): CacheData { + const path = getCacheFilePath(root) + try { + const raw = readFileSync(path, 'utf-8') + const data = JSON.parse(raw) as CacheData + if (data.version !== CACHE_VERSION) { + return { version: CACHE_VERSION, entries: {} } + } + return data + } catch { + return { version: CACHE_VERSION, entries: {} } + } +} + +/** Save cache to disk. */ +export function saveCache(root: string, cache: CacheData): void { + ensureCacheDir() + const path = getCacheFilePath(root) + writeFileSync(path, JSON.stringify(cache), 'utf-8') +} + +/** + * Check if a file's cached entry is still valid based on mtime and size. + * Returns the cached tags if valid, null otherwise. + */ +export function getCachedTags( + cache: CacheData, + filePath: string, + root: string, +): Tag[] | null { + const entry = cache.entries[filePath] + if (!entry) return null + + try { + const absolutePath = join(root, filePath) + const stat = statSync(absolutePath) + if (stat.mtimeMs === entry.mtimeMs && stat.size === entry.size) { + return entry.tags + } + } catch { + // File may have been deleted + } + return null +} + +/** Update the cache entry for a file. */ +export function setCachedTags( + cache: CacheData, + filePath: string, + root: string, + tags: Tag[], +): void { + try { + const absolutePath = join(root, filePath) + const stat = statSync(absolutePath) + cache.entries[filePath] = { + tags, + mtimeMs: stat.mtimeMs, + size: stat.size, + } + } catch { + // If we can't stat, don't cache + } +} + +/** + * Compute a hash of the inputs that affect the rendered map. + * Used to cache the final rendered output. + */ +export function computeMapHash( + files: string[], + maxTokens: number, + focusFiles: string[], +): string { + const sorted = [...files].sort() + const input = JSON.stringify({ files: sorted, maxTokens, focusFiles: [...focusFiles].sort() }) + return createHash('sha1').update(input).digest('hex') +} + +/** Get cache statistics. */ +export function getCacheStats(root: string): CacheStats { + const cacheFile = getCacheFilePath(root) + const exists = existsSync(cacheFile) + let entryCount = 0 + + if (exists) { + try { + const data = JSON.parse(readFileSync(cacheFile, 'utf-8')) as CacheData + entryCount = Object.keys(data.entries).length + } catch { + // corrupted cache + } + } + + return { + cacheDir: CACHE_DIR, + cacheFile: exists ? cacheFile : null, + entryCount, + exists, + } +} + +/** Delete the cache for a repo root. */ +export function invalidateCache(root: string): void { + const path = getCacheFilePath(root) + try { + const { unlinkSync } = require('fs') + unlinkSync(path) + } catch { + // File may not exist + } +} diff --git a/src/context/repoMap/gitFiles.ts b/src/context/repoMap/gitFiles.ts new file mode 100644 index 00000000..ede9ff77 --- /dev/null +++ b/src/context/repoMap/gitFiles.ts @@ -0,0 +1,109 @@ +import { execFile } from 'child_process' +import { readdirSync } from 'fs' +import { join, relative } from 'path' +import type { SupportedLanguage } from './types.js' + +const SUPPORTED_EXTENSIONS: Record = { + '.ts': 'typescript', + '.tsx': 'typescript', + '.js': 'javascript', + '.jsx': 'javascript', + '.mjs': 'javascript', + '.cjs': 'javascript', + '.py': 'python', +} + +const EXCLUDED_DIRS = new Set([ + 'node_modules', + 'dist', + '.git', + '.hg', + '.svn', + 'build', + 'out', + 'coverage', + '__pycache__', + '.next', + '.nuxt', + 'vendor', + '.worktrees', +]) + +const EXCLUDED_FILES = new Set([ + 'bun.lock', + 'bun.lockb', + 'package-lock.json', + 'yarn.lock', + 'pnpm-lock.yaml', +]) + +export function getLanguageForFile(filePath: string): SupportedLanguage | null { + const ext = filePath.substring(filePath.lastIndexOf('.')) + return SUPPORTED_EXTENSIONS[ext] ?? null +} + +export function isSupportedFile(filePath: string): boolean { + return getLanguageForFile(filePath) !== null +} + +/** List files using git ls-files. Returns relative paths. */ +function gitLsFiles(root: string): Promise { + return new Promise((resolve, reject) => { + execFile( + 'git', + ['ls-files', '--cached', '--others', '--exclude-standard'], + { cwd: root, maxBuffer: 10 * 1024 * 1024 }, + (error, stdout) => { + if (error) { + reject(error) + return + } + const files = stdout + .split('\n') + .map(f => f.trim()) + .filter(f => f.length > 0) + resolve(files) + }, + ) + }) +} + +/** Walk directory tree manually as fallback when git is unavailable. */ +function walkDirectory(root: string, currentDir: string = root): string[] { + const results: string[] = [] + let entries: ReturnType + try { + entries = readdirSync(currentDir, { withFileTypes: true }) + } catch { + return results + } + + for (const entry of entries) { + const name = entry.name + if (entry.isDirectory()) { + if (!EXCLUDED_DIRS.has(name) && !name.startsWith('.')) { + results.push(...walkDirectory(root, join(currentDir, name))) + } + } else if (entry.isFile()) { + if (!EXCLUDED_FILES.has(name)) { + results.push(relative(root, join(currentDir, name))) + } + } + } + return results +} + +/** + * Enumerate all supported source files in the repo. + * Tries git ls-files first, falls back to manual walk. + */ +export async function getRepoFiles(root: string): Promise { + let files: string[] + try { + files = await gitLsFiles(root) + } catch { + files = walkDirectory(root) + } + + return files.filter(isSupportedFile) +} diff --git a/src/context/repoMap/graph.ts b/src/context/repoMap/graph.ts new file mode 100644 index 00000000..26bba76c --- /dev/null +++ b/src/context/repoMap/graph.ts @@ -0,0 +1,88 @@ +import Graph from 'graphology' +import type { FileTags } from './types.js' + +// Common identifiers that should contribute less weight (high IDF penalty). +const COMMON_NAMES = new Set([ + 'map', 'get', 'set', 'value', 'key', 'data', 'result', 'error', + 'name', 'type', 'id', 'index', 'item', 'items', 'list', 'options', + 'config', 'args', 'params', 'props', 'state', 'event', 'callback', + 'handler', 'fn', 'func', 'self', 'this', 'ctx', 'context', 'req', + 'res', 'next', 'err', 'msg', 'obj', 'arr', 'str', 'num', 'val', + 'init', 'start', 'stop', 'run', 'main', 'test', 'setup', 'teardown', + 'constructor', 'toString', 'valueOf', 'length', 'size', 'count', + 'push', 'pop', 'shift', 'filter', 'reduce', 'forEach', 'find', + 'log', 'warn', 'info', 'debug', 'trace', +]) + +/** + * Build a directed graph from file tags. + * + * Nodes are file paths. An edge from A to B means file A references + * a symbol defined in file B. Edge weight = refCount * idf(symbolName). + */ +export function buildGraph(allFileTags: FileTags[]): Graph { + const graph = new Graph({ multi: false, type: 'directed' }) + + // Build a map from symbol name → files that define it + const defIndex = new Map>() + for (const ft of allFileTags) { + for (const tag of ft.tags) { + if (tag.kind === 'def') { + let files = defIndex.get(tag.name) + if (!files) { + files = new Set() + defIndex.set(tag.name, files) + } + files.add(ft.path) + } + } + } + + // Compute IDF: log(totalFiles / filesDefiningSymbol) + // Common names get an extra penalty + const totalFiles = allFileTags.length + function idf(symbolName: string): number { + const defFiles = defIndex.get(symbolName) + const docFreq = defFiles ? defFiles.size : 1 + const rawIdf = Math.log(totalFiles / docFreq) + return COMMON_NAMES.has(symbolName) ? rawIdf * 0.1 : rawIdf + } + + // Add all files as nodes + for (const ft of allFileTags) { + if (!graph.hasNode(ft.path)) { + graph.addNode(ft.path) + } + } + + // Build edges: for each ref in a file, find where it's defined + for (const ft of allFileTags) { + // Count refs per target file + const edgeWeights = new Map() + + for (const tag of ft.tags) { + if (tag.kind !== 'ref') continue + + const defFiles = defIndex.get(tag.name) + if (!defFiles) continue + + const weight = idf(tag.name) + for (const defFile of defFiles) { + if (defFile === ft.path) continue // skip self-references + const current = edgeWeights.get(defFile) ?? 0 + edgeWeights.set(defFile, current + weight) + } + } + + for (const [target, weight] of edgeWeights) { + if (graph.hasEdge(ft.path, target)) { + graph.setEdgeAttribute(ft.path, target, 'weight', + graph.getEdgeAttribute(ft.path, target, 'weight') + weight) + } else { + graph.addEdge(ft.path, target, { weight }) + } + } + } + + return graph +} diff --git a/src/context/repoMap/index.ts b/src/context/repoMap/index.ts new file mode 100644 index 00000000..77ace9cd --- /dev/null +++ b/src/context/repoMap/index.ts @@ -0,0 +1,144 @@ +import { + computeMapHash, + getCachedTags, + getCacheStats as getCacheStatsImpl, + invalidateCache as invalidateCacheImpl, + loadCache, + saveCache, + setCachedTags, +} from './cache.js' +import { getRepoFiles } from './gitFiles.js' +import { buildGraph } from './graph.js' +import { rankFiles } from './pagerank.js' +import { initParser } from './parser.js' +import { renderMap } from './renderer.js' +import { extractTags } from './symbolExtractor.js' +import type { FileTags, RepoMapOptions, RepoMapResult, CacheStats } from './types.js' + +const DEFAULT_MAX_TOKENS = 2048 + +/** + * Build a structural summary of a code repository. + * + * Walks the repo, extracts symbols via tree-sitter, builds an IDF-weighted + * reference graph, ranks files with PageRank, and renders a token-budgeted + * structural summary. + */ +export async function buildRepoMap(options: RepoMapOptions = {}): Promise { + const startTime = Date.now() + const root = options.root ?? process.cwd() + const maxTokens = options.maxTokens ?? DEFAULT_MAX_TOKENS + const focusFiles = options.focusFiles ?? [] + + // Initialize tree-sitter + await initParser() + + // Get files + const files = options.files ?? await getRepoFiles(root) + const totalFileCount = files.length + + // Check if we have a cached rendered map + const mapHash = computeMapHash(files, maxTokens, focusFiles) + const cache = loadCache(root) + + // Check if rendered map is cached (stored as a special entry) + const renderedCacheKey = `__rendered__${mapHash}` + const renderedEntry = cache.entries[renderedCacheKey] + if (renderedEntry && renderedEntry.tags.length === 1) { + const cachedResult = renderedEntry.tags[0]! + // The cached "tag" stores the rendered map in the signature field + // and metadata in name/line fields + try { + const meta = JSON.parse(cachedResult.name) + return { + map: cachedResult.signature, + cacheHit: true, + buildTimeMs: Date.now() - startTime, + fileCount: meta.fileCount ?? 0, + totalFileCount, + tokenCount: meta.tokenCount ?? 0, + } + } catch { + // Invalid cached data, continue with full build + } + } + + // Extract tags for all files (using per-file cache). + // Separate cached hits from files needing extraction. + const allFileTags: FileTags[] = [] + const uncachedFiles: string[] = [] + + for (const file of files) { + const cachedTags = getCachedTags(cache, file, root) + if (cachedTags) { + allFileTags.push({ path: file, tags: cachedTags }) + } else { + uncachedFiles.push(file) + } + } + + // Process uncached files in parallel batches + const BATCH_SIZE = 50 + for (let i = 0; i < uncachedFiles.length; i += BATCH_SIZE) { + const batch = uncachedFiles.slice(i, i + BATCH_SIZE) + const results = await Promise.all( + batch.map(file => extractTags(file, root).catch(() => null)) + ) + for (let j = 0; j < results.length; j++) { + const fileTags = results[j] + if (fileTags) { + allFileTags.push(fileTags) + setCachedTags(cache, fileTags.path, root, fileTags.tags) + } + } + } + + // Build graph and rank + const graph = buildGraph(allFileTags) + const ranked = rankFiles(graph, focusFiles) + + // Build a lookup map + const fileTagsMap = new Map() + for (const ft of allFileTags) { + fileTagsMap.set(ft.path, ft) + } + + // Render + const { map, tokenCount, fileCount } = renderMap(ranked, fileTagsMap, maxTokens) + + // Cache the rendered result + cache.entries[renderedCacheKey] = { + tags: [{ + kind: 'def', + name: JSON.stringify({ fileCount, tokenCount }), + line: 0, + signature: map, + }], + mtimeMs: Date.now(), + size: 0, + } + + saveCache(root, cache) + + return { + map, + cacheHit: false, + buildTimeMs: Date.now() - startTime, + fileCount, + totalFileCount, + tokenCount, + } +} + +/** Invalidate the disk cache for a given repo root. */ +export function invalidateCache(root?: string): void { + invalidateCacheImpl(root ?? process.cwd()) +} + +/** Get cache statistics for a given repo root. */ +export function getCacheStats(root?: string): CacheStats { + return getCacheStatsImpl(root ?? process.cwd()) +} + +// Re-export types for convenience +export type { RepoMapOptions, RepoMapResult, CacheStats } from './types.js' diff --git a/src/context/repoMap/pagerank.ts b/src/context/repoMap/pagerank.ts new file mode 100644 index 00000000..9fd48289 --- /dev/null +++ b/src/context/repoMap/pagerank.ts @@ -0,0 +1,57 @@ +import type Graph from 'graphology' +import pagerank from 'graphology-pagerank' + +export interface RankedFile { + path: string + score: number +} + +/** + * Run PageRank on the file reference graph. + * + * focusFiles get a 100x boost in the personalization vector so they + * and their neighbors rank higher. + * + * Returns files sorted by score descending. + */ +export function rankFiles( + graph: Graph, + focusFiles: string[] = [], +): RankedFile[] { + if (graph.order === 0) return [] + + const hasPersonalization = focusFiles.length > 0 + + // graphology-pagerank accepts getEdgeWeight option + const scores: Record = pagerank(graph, { + alpha: 0.85, + maxIterations: 100, + tolerance: 1e-6, + getEdgeWeight: 'weight', + }) + + // Apply focus boost post-hoc if focus files are specified + if (hasPersonalization) { + for (const file of focusFiles) { + if (scores[file] !== undefined) { + scores[file] *= 100 + } + } + + // Also boost direct neighbors of focus files + for (const file of focusFiles) { + if (!graph.hasNode(file)) continue + graph.forEachNeighbor(file, (neighbor) => { + if (scores[neighbor] !== undefined) { + scores[neighbor] *= 10 + } + }) + } + } + + const ranked: RankedFile[] = Object.entries(scores) + .map(([path, score]) => ({ path, score })) + .sort((a, b) => b.score - a.score) + + return ranked +} diff --git a/src/context/repoMap/parser.ts b/src/context/repoMap/parser.ts new file mode 100644 index 00000000..c686f988 --- /dev/null +++ b/src/context/repoMap/parser.ts @@ -0,0 +1,166 @@ +import { existsSync, readFileSync } from 'fs' +import { join, resolve } from 'path' +import { fileURLToPath } from 'url' +import type { SupportedLanguage } from './types.js' + +// Resolve project root in both source and bundled modes. +// In source (bun test/dev): import.meta.url is src/context/repoMap/parser.ts → go up 4 levels +// In bundle (node dist/cli.mjs): import.meta.url is dist/cli.mjs → go up 2 levels +const __filename = fileURLToPath(import.meta.url) +const __projectRoot = join( + __filename, + process.env.NODE_ENV === 'test' ? '../../../../' : '../../', +) + +// web-tree-sitter types +type TreeSitterParser = { + parse(input: string): { rootNode: unknown } + setLanguage(lang: unknown): void + delete(): void +} + +type TreeSitterLanguage = { + query(source: string): unknown +} + +// The actual module exports { Parser, Language } as named exports +let ParserClass: (new () => TreeSitterParser) & { + init(opts?: { locateFile?: (file: string) => string }): Promise +} | null = null +let LanguageLoader: { + load(path: string | Uint8Array): Promise +} | null = null + +let initialized = false +const languageCache = new Map() +const queryCache = new Map() + +/** Resolve the path to the tree-sitter WASM file. */ +function getTreeSitterWasmPath(): string { + // Try require.resolve first (works in source mode with node_modules) + try { + const webTsDir = resolve( + require.resolve('web-tree-sitter/package.json'), + '..', + ) + return join(webTsDir, 'tree-sitter.wasm') + } catch { + // Fallback: relative to project root + return join(__projectRoot, 'node_modules', 'web-tree-sitter', 'tree-sitter.wasm') + } +} + +/** Resolve the path to a language WASM grammar file. */ +function getLanguageWasmPath(language: SupportedLanguage): string { + const wasmName = language === 'typescript' ? 'tree-sitter-typescript' : + language === 'javascript' ? 'tree-sitter-javascript' : + `tree-sitter-${language}` + + try { + const wasmDir = resolve( + require.resolve('tree-sitter-wasms/package.json'), + '..', + 'out', + ) + return join(wasmDir, `${wasmName}.wasm`) + } catch { + return join(__projectRoot, 'node_modules', 'tree-sitter-wasms', 'out', `${wasmName}.wasm`) + } +} + +/** Resolve the path to a tag query .scm file for the given language. */ +function getQueryPath(language: SupportedLanguage): string { + // Try source location first (works in both source and when queries are alongside the bundle) + const sourcePath = join(__projectRoot, 'src', 'context', 'repoMap', 'queries', `${language}-tags.scm`) + if (existsSync(sourcePath)) { + return sourcePath + } + // Fallback: relative to this file (source mode) + return join(fileURLToPath(import.meta.url), '..', 'queries', `${language}-tags.scm`) +} + +/** Initialize the tree-sitter WASM module. */ +export async function initParser(): Promise { + if (initialized) return + + try { + const mod = await import('web-tree-sitter') + ParserClass = mod.Parser as typeof ParserClass + LanguageLoader = mod.Language as typeof LanguageLoader + + const wasmPath = getTreeSitterWasmPath() + await ParserClass!.init({ + locateFile: () => wasmPath, + }) + initialized = true + } catch (err) { + // eslint-disable-next-line no-console + console.error('[repoMap] Failed to initialize tree-sitter:', err) + throw err + } +} + +/** Load a language grammar. Cached after first load. */ +export async function loadLanguage(language: SupportedLanguage): Promise { + if (languageCache.has(language)) { + return languageCache.get(language)! + } + + if (!initialized) { + await initParser() + } + + try { + const wasmPath = getLanguageWasmPath(language) + const lang = await LanguageLoader!.load(wasmPath) + languageCache.set(language, lang) + return lang + } catch (err) { + // eslint-disable-next-line no-console + console.error(`[repoMap] Failed to load ${language} grammar:`, err) + return null + } +} + +/** Load the tag query for a language. Cached after first load. */ +export function loadQuery(language: SupportedLanguage): string | null { + if (queryCache.has(language)) { + return queryCache.get(language)! + } + + try { + const queryPath = getQueryPath(language) + const content = readFileSync(queryPath, 'utf-8') + queryCache.set(language, content) + return content + } catch { + return null + } +} + +/** Create a new parser instance with the given language set. */ +export async function createParser(language: SupportedLanguage): Promise { + if (!initialized) { + await initParser() + } + + const lang = await loadLanguage(language) + if (!lang) return null + + try { + const parser = new ParserClass!() + parser.setLanguage(lang) + return parser + } catch { + return null + } +} + +/** Clear all caches (useful for testing). */ +export function clearParserCaches(): void { + languageCache.clear() + queryCache.clear() + initialized = false + ParserClass = null + LanguageLoader = null +} diff --git a/src/context/repoMap/queries/javascript-tags.scm b/src/context/repoMap/queries/javascript-tags.scm new file mode 100644 index 00000000..904c68ac --- /dev/null +++ b/src/context/repoMap/queries/javascript-tags.scm @@ -0,0 +1,92 @@ +; Source: https://github.com/Aider-AI/aider/blob/main/aider/queries/tree-sitter-languages/javascript-tags.scm +; License: MIT (Apache-2.0 dual) — see https://github.com/Aider-AI/aider/blob/main/LICENSE +; Copied for use in openclaude's repo-map feature. + +( + (comment)* @doc + . + (method_definition + name: (property_identifier) @name.definition.method) @definition.method + (#not-eq? @name.definition.method "constructor") + (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") + (#select-adjacent! @doc @definition.method) +) + +( + (comment)* @doc + . + [ + (class + name: (_) @name.definition.class) + (class_declaration + name: (_) @name.definition.class) + ] @definition.class + (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") + (#select-adjacent! @doc @definition.class) +) + +( + (comment)* @doc + . + [ + (function + name: (identifier) @name.definition.function) + (function_declaration + name: (identifier) @name.definition.function) + (generator_function + name: (identifier) @name.definition.function) + (generator_function_declaration + name: (identifier) @name.definition.function) + ] @definition.function + (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") + (#select-adjacent! @doc @definition.function) +) + +( + (comment)* @doc + . + (lexical_declaration + (variable_declarator + name: (identifier) @name.definition.function + value: [(arrow_function) (function)]) @definition.function) + (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") + (#select-adjacent! @doc @definition.function) +) + +( + (comment)* @doc + . + (variable_declaration + (variable_declarator + name: (identifier) @name.definition.function + value: [(arrow_function) (function)]) @definition.function) + (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") + (#select-adjacent! @doc @definition.function) +) + +(assignment_expression + left: [ + (identifier) @name.definition.function + (member_expression + property: (property_identifier) @name.definition.function) + ] + right: [(arrow_function) (function)] +) @definition.function + +(pair + key: (property_identifier) @name.definition.function + value: [(arrow_function) (function)]) @definition.function + +( + (call_expression + function: (identifier) @name.reference.call) @reference.call + (#not-match? @name.reference.call "^(require)$") +) + +(call_expression + function: (member_expression + property: (property_identifier) @name.reference.call) + arguments: (_) @reference.call) + +(new_expression + constructor: (_) @name.reference.class) @reference.class diff --git a/src/context/repoMap/queries/python-tags.scm b/src/context/repoMap/queries/python-tags.scm new file mode 100644 index 00000000..30959d63 --- /dev/null +++ b/src/context/repoMap/queries/python-tags.scm @@ -0,0 +1,16 @@ +; Source: https://github.com/Aider-AI/aider/blob/main/aider/queries/tree-sitter-languages/python-tags.scm +; License: MIT (Apache-2.0 dual) — see https://github.com/Aider-AI/aider/blob/main/LICENSE +; Copied for use in openclaude's repo-map feature. + +(class_definition + name: (identifier) @name.definition.class) @definition.class + +(function_definition + name: (identifier) @name.definition.function) @definition.function + +(call + function: [ + (identifier) @name.reference.call + (attribute + attribute: (identifier) @name.reference.call) + ]) @reference.call diff --git a/src/context/repoMap/queries/typescript-tags.scm b/src/context/repoMap/queries/typescript-tags.scm new file mode 100644 index 00000000..45c54a2c --- /dev/null +++ b/src/context/repoMap/queries/typescript-tags.scm @@ -0,0 +1,45 @@ +; Source: https://github.com/Aider-AI/aider/blob/main/aider/queries/tree-sitter-languages/typescript-tags.scm +; License: MIT (Apache-2.0 dual) — see https://github.com/Aider-AI/aider/blob/main/LICENSE +; Copied for use in openclaude's repo-map feature. + +(function_signature + name: (identifier) @name.definition.function) @definition.function + +(method_signature + name: (property_identifier) @name.definition.method) @definition.method + +(abstract_method_signature + name: (property_identifier) @name.definition.method) @definition.method + +(abstract_class_declaration + name: (type_identifier) @name.definition.class) @definition.class + +(module + name: (identifier) @name.definition.module) @definition.module + +(interface_declaration + name: (type_identifier) @name.definition.interface) @definition.interface + +(type_annotation + (type_identifier) @name.reference.type) @reference.type + +(new_expression + constructor: (identifier) @name.reference.class) @reference.class + +(function_declaration + name: (identifier) @name.definition.function) @definition.function + +(method_definition + name: (property_identifier) @name.definition.method) @definition.method + +(class_declaration + name: (type_identifier) @name.definition.class) @definition.class + +(interface_declaration + name: (type_identifier) @name.definition.class) @definition.class + +(type_alias_declaration + name: (type_identifier) @name.definition.type) @definition.type + +(enum_declaration + name: (identifier) @name.definition.enum) @definition.enum diff --git a/src/context/repoMap/renderer.ts b/src/context/repoMap/renderer.ts new file mode 100644 index 00000000..632d7a29 --- /dev/null +++ b/src/context/repoMap/renderer.ts @@ -0,0 +1,72 @@ +import type { FileTags, Tag } from './types.js' +import type { RankedFile } from './pagerank.js' +import { countTokens } from './tokenize.js' + +/** + * Render a token-budgeted repo map from ranked files and their tags. + * + * Format per file: + * path/to/file.ts: + * ⋮ + * signature line for def 1 + * ⋮ + * signature line for def 2 + * ⋮ + * + * Files that don't fit within the budget are dropped entirely. + */ +export function renderMap( + rankedFiles: RankedFile[], + fileTagsMap: Map, + maxTokens: number, +): { map: string; tokenCount: number; fileCount: number } { + const sections: string[] = [] + let currentTokens = 0 + let fileCount = 0 + + for (const { path } of rankedFiles) { + const ft = fileTagsMap.get(path) + if (!ft) continue + + // Only include definitions in the rendered output + const defs = ft.tags + .filter(t => t.kind === 'def') + .sort((a, b) => a.line - b.line) + + if (defs.length === 0) continue + + const section = renderFileSection(path, defs) + const sectionTokens = countTokens(section) + + // Would this section bust the budget? + if (currentTokens + sectionTokens > maxTokens) { + // Don't include partial files — drop entirely + break + } + + sections.push(section) + currentTokens += sectionTokens + fileCount++ + } + + const map = sections.join('\n') + return { map, tokenCount: currentTokens, fileCount } +} + +function renderFileSection(path: string, defs: Tag[]): string { + const lines: string[] = [`${path}:`] + let lastLine = 0 + + for (const def of defs) { + // Add elision marker if there's a gap + if (def.line > lastLine + 1) { + lines.push('⋮') + } + lines.push(` ${def.signature}`) + lastLine = def.line + } + + // Trailing elision marker + lines.push('⋮') + return lines.join('\n') +} diff --git a/src/context/repoMap/repoMap.test.ts b/src/context/repoMap/repoMap.test.ts new file mode 100644 index 00000000..1d8af0ae --- /dev/null +++ b/src/context/repoMap/repoMap.test.ts @@ -0,0 +1,275 @@ +import { afterEach, beforeAll, describe, expect, test } from 'bun:test' +import { cpSync, mkdtempSync, rmSync, utimesSync, writeFileSync } from 'fs' +import { tmpdir } from 'os' +import { join } from 'path' +import { invalidateCache, buildRepoMap } from './index.js' +import { extractTags } from './symbolExtractor.js' +import { buildGraph } from './graph.js' +import { initParser } from './parser.js' +import { countTokens } from './tokenize.js' + +const FIXTURE_ROOT = join(import.meta.dir, '__fixtures__', 'mini-repo') +const FIXTURE_FILES = ['fileA.ts', 'fileB.ts', 'fileC.ts', 'fileD.ts', 'fileE.ts'] + +beforeAll(async () => { + await initParser() +}) + +// Clean up cache between tests to avoid cross-test interference +afterEach(() => { + invalidateCache(FIXTURE_ROOT) +}) + +describe('symbol extraction', () => { + test('extracts function and class defs from a TypeScript file', async () => { + const result = await extractTags('fileC.ts', FIXTURE_ROOT) + expect(result).not.toBeNull() + + const defs = result!.tags.filter(t => t.kind === 'def') + const defNames = defs.map(t => t.name) + + expect(defNames).toContain('DataStore') + expect(defNames).toContain('createStore') + expect(defNames).toContain('StoreConfig') + + // All defs should have kind='def' + for (const d of defs) { + expect(d.kind).toBe('def') + } + }) + + test('extracts references to imported symbols', async () => { + const result = await extractTags('fileA.ts', FIXTURE_ROOT) + expect(result).not.toBeNull() + + const refs = result!.tags.filter(t => t.kind === 'ref') + const refNames = refs.map(t => t.name) + + // fileA imports CacheLayer from fileB and StoreConfig from fileC + expect(refNames).toContain('CacheLayer') + expect(refNames).toContain('StoreConfig') + }) +}) + +describe('graph', () => { + test('builds edges between files that reference each other\'s symbols', async () => { + const allTags = [] + for (const f of FIXTURE_FILES) { + const tags = await extractTags(f, FIXTURE_ROOT) + if (tags) allTags.push(tags) + } + + const graph = buildGraph(allTags) + + // fileA imports from fileB (references CacheLayer defined in fileB) + expect(graph.hasEdge('fileA.ts', 'fileB.ts')).toBe(true) + + // fileA imports from fileC (references StoreConfig, DataStore defined in fileC) + expect(graph.hasEdge('fileA.ts', 'fileC.ts')).toBe(true) + + // fileB imports from fileC (references DataStore defined in fileC) + expect(graph.hasEdge('fileB.ts', 'fileC.ts')).toBe(true) + + // fileD imports from fileA + expect(graph.hasEdge('fileD.ts', 'fileA.ts')).toBe(true) + + // fileE is isolated — no edges to/from it + expect(graph.degree('fileE.ts')).toBe(0) + }) +}) + +describe('pagerank', () => { + test('ranks the most-imported file highest', async () => { + const result = await buildRepoMap({ + root: FIXTURE_ROOT, + maxTokens: 2048, + files: FIXTURE_FILES, + }) + + // The map starts with the highest-ranked file + const firstFile = result.map.split('\n')[0] + expect(firstFile).toBe('fileC.ts:') + + // fileE should be ranked lowest (or near last) + const lines = result.map.split('\n') + const filePositions = FIXTURE_FILES.map(f => { + const idx = lines.findIndex(l => l === `${f}:`) + return { file: f, position: idx } + }).filter(x => x.position >= 0) + .sort((a, b) => a.position - b.position) + + // fileC should be first + expect(filePositions[0]!.file).toBe('fileC.ts') + + // fileE should be last (or among the last) + const lastFile = filePositions[filePositions.length - 1]!.file + expect(['fileD.ts', 'fileE.ts']).toContain(lastFile) + }) +}) + +describe('renderer', () => { + test('respects the token budget within 5%', async () => { + const maxTokens = 500 + const result = await buildRepoMap({ + root: FIXTURE_ROOT, + maxTokens, + files: FIXTURE_FILES, + }) + + const actualTokens = countTokens(result.map) + expect(actualTokens).toBeLessThanOrEqual(maxTokens * 1.05) + expect(result.tokenCount).toBeLessThanOrEqual(maxTokens * 1.05) + }) + + test('drops files that don\'t fit rather than listing their names', async () => { + // Very tight budget — should only fit 1-2 files + const result = await buildRepoMap({ + root: FIXTURE_ROOT, + maxTokens: 100, + files: FIXTURE_FILES, + }) + + // Count how many files appear as headers in the output + const fileHeaders = result.map.split('\n').filter(l => l.endsWith(':') && !l.startsWith(' ')) + + // Every file header in the output should have its signatures listed + for (const header of fileHeaders) { + // The file must have at least one signature line after it + const headerIdx = result.map.indexOf(header) + const afterHeader = result.map.slice(headerIdx + header.length) + // Should have content (signatures), not just the filename + expect(afterHeader.trim().length).toBeGreaterThan(0) + } + + // Should have fewer files than total + expect(fileHeaders.length).toBeLessThan(FIXTURE_FILES.length) + }) +}) + +describe('cache', () => { + test('second build of unchanged fixture uses the cache', async () => { + // First build (cold) + const result1 = await buildRepoMap({ + root: FIXTURE_ROOT, + maxTokens: 2048, + files: FIXTURE_FILES, + }) + expect(result1.cacheHit).toBe(false) + + // Second build (warm) + const result2 = await buildRepoMap({ + root: FIXTURE_ROOT, + maxTokens: 2048, + files: FIXTURE_FILES, + }) + expect(result2.cacheHit).toBe(true) + expect(result2.buildTimeMs).toBeLessThan(result1.buildTimeMs) + + // Output should be identical + expect(result2.map).toBe(result1.map) + }) + + test('modifying a file invalidates only that file', async () => { + // Create a temp copy of the fixture + const tempDir = mkdtempSync(join(tmpdir(), 'repomap-test-')) + try { + for (const f of FIXTURE_FILES) { + cpSync(join(FIXTURE_ROOT, f), join(tempDir, f)) + } + + // First build + const result1 = await buildRepoMap({ + root: tempDir, + maxTokens: 2048, + files: FIXTURE_FILES, + }) + expect(result1.cacheHit).toBe(false) + + // Touch one file to change its mtime + const targetFile = join(tempDir, 'fileE.ts') + const now = new Date() + utimesSync(targetFile, now, now) + + // Second build — rendered cache should be invalidated because file list hash + // includes the files and the rendered map hash changes with different mtimes + // for the per-file cache check + invalidateCache(tempDir) + const result2 = await buildRepoMap({ + root: tempDir, + maxTokens: 2048, + files: FIXTURE_FILES, + }) + // The per-file cache for fileE should miss (mtime changed), + // but other files should still hit the per-file cache + expect(result2.cacheHit).toBe(false) + + // Output should still be valid + expect(result2.map.length).toBeGreaterThan(0) + expect(result2.fileCount).toBe(result1.fileCount) + } finally { + rmSync(tempDir, { recursive: true, force: true }) + invalidateCache(tempDir) + } + }) +}) + +describe('gitFiles', () => { + test('falls back gracefully when not in a git repo', async () => { + // Create a temp directory with source files but NO .git + const tempDir = mkdtempSync(join(tmpdir(), 'repomap-nogit-')) + try { + writeFileSync( + join(tempDir, 'hello.ts'), + 'export function hello(): string { return "world" }\n', + ) + writeFileSync( + join(tempDir, 'utils.ts'), + 'export function add(a: number, b: number): number { return a + b }\n', + ) + + const result = await buildRepoMap({ + root: tempDir, + maxTokens: 1024, + }) + + // Should succeed without throwing + expect(result.map.length).toBeGreaterThan(0) + expect(result.totalFileCount).toBeGreaterThan(0) + } finally { + rmSync(tempDir, { recursive: true, force: true }) + invalidateCache(tempDir) + } + }) +}) + +describe('error handling', () => { + test('no crash on malformed source file', async () => { + const tempDir = mkdtempSync(join(tmpdir(), 'repomap-malformed-')) + try { + // Valid file + writeFileSync( + join(tempDir, 'good.ts'), + 'export function good(): number { return 1 }\n', + ) + // Malformed file — severe syntax errors + writeFileSync( + join(tempDir, 'bad.ts'), + '}{}{}{export classclass [[[ function ,,, @@@ ###\n', + ) + + const result = await buildRepoMap({ + root: tempDir, + maxTokens: 1024, + files: ['good.ts', 'bad.ts'], + }) + + // Should complete successfully + expect(result.map.length).toBeGreaterThan(0) + // The good file should be in the output + expect(result.map).toContain('good.ts') + } finally { + rmSync(tempDir, { recursive: true, force: true }) + invalidateCache(tempDir) + } + }) +}) diff --git a/src/context/repoMap/symbolExtractor.ts b/src/context/repoMap/symbolExtractor.ts new file mode 100644 index 00000000..d978e6d6 --- /dev/null +++ b/src/context/repoMap/symbolExtractor.ts @@ -0,0 +1,108 @@ +import { readFileSync } from 'fs' +import { join } from 'path' +import { getLanguageForFile } from './gitFiles.js' +import { createParser, loadLanguage, loadQuery } from './parser.js' +import type { FileTags, Tag } from './types.js' + +/** + * Extract definition and reference tags from a single source file. + * Returns null if the file can't be parsed (unsupported language, parse error, etc). + */ +export async function extractTags( + filePath: string, + root: string, +): Promise { + const language = getLanguageForFile(filePath) + if (!language) return null + + const absolutePath = join(root, filePath) + let source: string + try { + source = readFileSync(absolutePath, 'utf-8') + } catch { + return null + } + + const lines = source.split('\n') + + const parser = await createParser(language) + if (!parser) return null + + const querySource = loadQuery(language) + if (!querySource) { + parser.delete() + return null + } + + try { + const tree = parser.parse(source) as { + rootNode: unknown + } + + const lang = await loadLanguage(language) + if (!lang) { + parser.delete() + return null + } + + // Use the non-deprecated Query constructor + const { Query } = await import('web-tree-sitter') + const query = new Query(lang, querySource) as { + matches(rootNode: unknown): Array<{ + pattern: number + captures: Array<{ + name: string + node: { + text: string + startPosition: { row: number; column: number } + endPosition: { row: number; column: number } + } + }> + }> + } + + const matches = query.matches(tree.rootNode) + const tags: Tag[] = [] + const seen = new Set() // dedup by kind+name+line + + for (const match of matches) { + let name: string | null = null + let kind: 'def' | 'ref' | null = null + let subKind: string | undefined + let lineRow = 0 + + for (const capture of match.captures) { + const captureName = capture.name + + // Name captures: name.definition.X or name.reference.X + if (captureName.startsWith('name.definition.')) { + name = capture.node.text + kind = 'def' + subKind = captureName.slice('name.definition.'.length) + lineRow = capture.node.startPosition.row + } else if (captureName.startsWith('name.reference.')) { + name = capture.node.text + kind = 'ref' + subKind = captureName.slice('name.reference.'.length) + lineRow = capture.node.startPosition.row + } + } + + if (name && kind) { + const key = `${kind}:${name}:${lineRow}` + if (!seen.has(key)) { + seen.add(key) + const line = lineRow + 1 // convert 0-based to 1-based + const signature = lines[lineRow]?.trimEnd() ?? '' + tags.push({ kind, name, line, signature, subKind }) + } + } + } + + parser.delete() + return { path: filePath, tags } + } catch { + parser.delete() + return null + } +} diff --git a/src/context/repoMap/tokenize.ts b/src/context/repoMap/tokenize.ts new file mode 100644 index 00000000..274a6efb --- /dev/null +++ b/src/context/repoMap/tokenize.ts @@ -0,0 +1,15 @@ +import { getEncoding, type Tiktoken } from 'js-tiktoken' + +let encoder: Tiktoken | null = null + +function getEncoder() { + if (!encoder) { + encoder = getEncoding('cl100k_base') + } + return encoder +} + +/** Count the number of tokens in a string using cl100k_base encoding. */ +export function countTokens(text: string): number { + return getEncoder().encode(text).length +} diff --git a/src/context/repoMap/types.ts b/src/context/repoMap/types.ts new file mode 100644 index 00000000..d9072ed5 --- /dev/null +++ b/src/context/repoMap/types.ts @@ -0,0 +1,65 @@ +export interface Tag { + /** 'def' for definitions, 'ref' for references */ + kind: 'def' | 'ref' + /** Symbol name (e.g. function name, class name) */ + name: string + /** 1-based line number in the source file */ + line: number + /** The full line of source code at this position (used as signature for defs) */ + signature: string + /** Sub-kind from the query (e.g. 'function', 'class', 'method', 'type') */ + subKind?: string +} + +export interface FileTags { + /** Relative path from the repo root */ + path: string + /** All tags extracted from this file */ + tags: Tag[] +} + +export interface RepoMapOptions { + /** Root directory of the repo (defaults to cwd) */ + root?: string + /** Maximum token budget for the rendered map */ + maxTokens?: number + /** Files to boost in PageRank (relative paths) */ + focusFiles?: string[] + /** Override the list of files to process (relative paths) */ + files?: string[] +} + +export interface RepoMapResult { + /** The rendered repo map string */ + map: string + /** Whether the result came from cache */ + cacheHit: boolean + /** Time in milliseconds to build the map */ + buildTimeMs: number + /** Number of files included in the rendered map */ + fileCount: number + /** Total number of files processed */ + totalFileCount: number + /** Actual token count of the rendered map */ + tokenCount: number +} + +export interface CacheEntry { + tags: Tag[] + mtimeMs: number + size: number +} + +export interface CacheData { + version: number + entries: Record +} + +export interface CacheStats { + cacheDir: string + cacheFile: string | null + entryCount: number + exists: boolean +} + +export type SupportedLanguage = 'typescript' | 'javascript' | 'python' diff --git a/src/tools.ts b/src/tools.ts index d48ebc42..d0583f6b 100644 --- a/src/tools.ts +++ b/src/tools.ts @@ -48,6 +48,7 @@ import { TodoWriteTool } from './tools/TodoWriteTool/TodoWriteTool.js' import { ExitPlanModeV2Tool } from './tools/ExitPlanModeTool/ExitPlanModeV2Tool.js' import { TestingPermissionTool } from './tools/testing/TestingPermissionTool.js' import { GrepTool } from './tools/GrepTool/GrepTool.js' +import { RepoMapTool } from './tools/RepoMapTool/RepoMapTool.js' // Lazy require to break circular dependency: tools.ts -> TeamCreateTool/TeamDeleteTool -> ... -> tools.ts /* eslint-disable @typescript-eslint/no-require-imports */ const getTeamCreateTool = () => @@ -188,6 +189,7 @@ export function getAllBaseTools(): Tools { // trick as ripgrep). When available, find/grep in Claude's shell are aliased // to these fast tools, so the dedicated Glob/Grep tools are unnecessary. ...(hasEmbeddedSearchTools() ? [] : [GlobTool, GrepTool]), + RepoMapTool, ExitPlanModeV2Tool, FileReadTool, FileEditTool, diff --git a/src/tools/RepoMapTool/RepoMapTool.test.ts b/src/tools/RepoMapTool/RepoMapTool.test.ts new file mode 100644 index 00000000..1658f36b --- /dev/null +++ b/src/tools/RepoMapTool/RepoMapTool.test.ts @@ -0,0 +1,167 @@ +import { beforeAll, describe, expect, test } from 'bun:test' +import { cpSync, mkdtempSync, rmSync } from 'fs' +import { tmpdir } from 'os' +import { join } from 'path' +import { initParser } from '../../context/repoMap/parser.js' +import { invalidateCache } from '../../context/repoMap/index.js' +import { RepoMapTool } from './RepoMapTool.js' +import { getToolUseSummary } from './UI.js' + +const FIXTURE_ROOT = join( + import.meta.dir, + '..', + '..', + 'context', + 'repoMap', + '__fixtures__', + 'mini-repo', +) +const FIXTURE_FILES = [ + 'fileA.ts', + 'fileB.ts', + 'fileC.ts', + 'fileD.ts', + 'fileE.ts', +] + +beforeAll(async () => { + await initParser() +}) + + +describe('RepoMapTool schema', () => { + test('validates a minimal input {}', () => { + const schema = RepoMapTool.inputSchema + const result = schema.safeParse({}) + expect(result.success).toBe(true) + }) + + test('rejects max_tokens below 256', () => { + const schema = RepoMapTool.inputSchema + const result = schema.safeParse({ max_tokens: 100 }) + expect(result.success).toBe(false) + }) + + test('rejects max_tokens above 16384', () => { + const schema = RepoMapTool.inputSchema + const result = schema.safeParse({ max_tokens: 20000 }) + expect(result.success).toBe(false) + }) + + test('accepts focus_files as string[]', () => { + const schema = RepoMapTool.inputSchema + const result = schema.safeParse({ + focus_files: ['src/tools/', 'src/context.ts'], + }) + expect(result.success).toBe(true) + }) +}) + +describe('RepoMapTool call', () => { + test('returns a rendered map for a directory', async () => { + const tempDir = mkdtempSync(join(tmpdir(), 'repomap-tool-')) + try { + for (const f of FIXTURE_FILES) { + cpSync(join(FIXTURE_ROOT, f), join(tempDir, f)) + } + + // We need to call buildRepoMap directly since getCwd patching is complex + const { buildRepoMap } = await import( + '../../context/repoMap/index.js' + ) + const result = await buildRepoMap({ + root: tempDir, + maxTokens: 1024, + }) + + expect(result.map.length).toBeGreaterThan(0) + expect(result.fileCount).toBeGreaterThan(0) + expect(result.totalFileCount).toBe(5) + expect(result.tokenCount).toBeGreaterThan(0) + expect(result.tokenCount).toBeLessThanOrEqual(1024) + } finally { + rmSync(tempDir, { recursive: true, force: true }) + invalidateCache(tempDir) + } + }) + + test('respects max_tokens parameter', async () => { + const tempDir = mkdtempSync(join(tmpdir(), 'repomap-tool-')) + try { + for (const f of FIXTURE_FILES) { + cpSync(join(FIXTURE_ROOT, f), join(tempDir, f)) + } + + const { buildRepoMap } = await import( + '../../context/repoMap/index.js' + ) + + const small = await buildRepoMap({ root: tempDir, maxTokens: 256 }) + const large = await buildRepoMap({ root: tempDir, maxTokens: 4096 }) + + expect(small.tokenCount).toBeLessThanOrEqual(256) + // Large budget should include more or equal content + expect(large.map.length).toBeGreaterThanOrEqual(small.map.length) + } finally { + rmSync(tempDir, { recursive: true, force: true }) + invalidateCache(tempDir) + } + }) + + test('focus_files boosts specified files in the ranking', async () => { + const tempDir = mkdtempSync(join(tmpdir(), 'repomap-tool-')) + try { + for (const f of FIXTURE_FILES) { + cpSync(join(FIXTURE_ROOT, f), join(tempDir, f)) + } + + const { buildRepoMap } = await import( + '../../context/repoMap/index.js' + ) + + // Without focus, fileE is ranked last (isolated) + const noFocus = await buildRepoMap({ root: tempDir, maxTokens: 2048 }) + const lines = noFocus.map.split('\n') + const fileEPos = lines.findIndex(l => l === 'fileE.ts:') + + // With focus on fileE + invalidateCache(tempDir) + const withFocus = await buildRepoMap({ + root: tempDir, + maxTokens: 2048, + focusFiles: ['fileE.ts'], + }) + const focusLines = withFocus.map.split('\n') + const fileEFocusPos = focusLines.findIndex(l => l === 'fileE.ts:') + + // fileE should rank higher (earlier position) with focus + expect(fileEFocusPos).toBeLessThan(fileEPos) + } finally { + rmSync(tempDir, { recursive: true, force: true }) + invalidateCache(tempDir) + } + }) +}) + +describe('RepoMapTool properties', () => { + test('is marked read-only and concurrency-safe', () => { + expect(RepoMapTool.isReadOnly({})).toBe(true) + expect(RepoMapTool.isConcurrencySafe({})).toBe(true) + }) +}) + +describe('RepoMapTool UI', () => { + test('getToolUseSummary returns descriptive string including focus', () => { + expect(getToolUseSummary(undefined)).toBe('Repository map') + expect(getToolUseSummary({})).toBe('Repository map') + expect(getToolUseSummary({ focus_files: ['src/tools/'] })).toContain( + 'focus:', + ) + expect(getToolUseSummary({ focus_files: ['src/tools/'] })).toContain( + 'src/tools/', + ) + expect( + getToolUseSummary({ focus_symbols: ['buildTool'] }), + ).toContain('buildTool') + }) +}) diff --git a/src/tools/RepoMapTool/RepoMapTool.ts b/src/tools/RepoMapTool/RepoMapTool.ts new file mode 100644 index 00000000..e9c96bde --- /dev/null +++ b/src/tools/RepoMapTool/RepoMapTool.ts @@ -0,0 +1,176 @@ +import { z } from 'zod/v4' +import { buildTool, type ToolDef } from '../../Tool.js' +import { getCwd } from '../../utils/cwd.js' +import { lazySchema } from '../../utils/lazySchema.js' +import { checkReadPermissionForTool } from '../../utils/permissions/filesystem.js' +import type { PermissionDecision } from '../../utils/permissions/PermissionResult.js' +import { buildRepoMap } from '../../context/repoMap/index.js' +import { REPO_MAP_TOOL_NAME, getDescription } from './prompt.js' +import { + getToolUseSummary, + renderToolResultMessage, + renderToolUseErrorMessage, + renderToolUseMessage, +} from './UI.js' + +const inputSchema = lazySchema(() => + z.strictObject({ + max_tokens: z + .number() + .int() + .min(256) + .max(16384) + .optional() + .describe( + 'Maximum token budget for the rendered map. Higher values include more files. Default: 1024.', + ), + focus_files: z + .array(z.string()) + .optional() + .describe( + 'Relative file or directory paths to boost in the ranking (e.g. ["src/tools/", "src/context.ts"]).', + ), + focus_symbols: z + .array(z.string()) + .optional() + .describe( + 'Symbol names to boost — files defining these symbols rank higher (e.g. ["buildTool", "ToolUseContext"]).', + ), + }), +) +type InputSchema = ReturnType + +const outputSchema = lazySchema(() => + z.object({ + rendered: z.string(), + token_count: z.number(), + file_count: z.number(), + total_file_count: z.number(), + cache_hit: z.boolean(), + build_time_ms: z.number(), + }), +) +type OutputSchema = ReturnType + +type Output = z.infer + +export const RepoMapTool = buildTool({ + name: REPO_MAP_TOOL_NAME, + searchHint: 'structural map of repository files and symbols', + maxResultSizeChars: 50_000, + async description() { + return getDescription() + }, + userFacingName() { + return 'Repository map' + }, + getToolUseSummary, + getActivityDescription(input) { + if (input?.focus_files?.length) { + return `Building repository map (focus: ${input.focus_files.join(', ')})` + } + return 'Building repository map' + }, + get inputSchema(): InputSchema { + return inputSchema() + }, + get outputSchema(): OutputSchema { + return outputSchema() + }, + isConcurrencySafe() { + return true + }, + isReadOnly() { + return true + }, + isSearchOrReadCommand() { + return { isSearch: false, isRead: true } + }, + toAutoClassifierInput(input) { + const parts: string[] = ['repomap'] + if (input.focus_files?.length) parts.push(`focus: ${input.focus_files.join(',')}`) + return parts.join(' ') + }, + async checkPermissions(input, context): Promise { + const appState = context.getAppState() + return checkReadPermissionForTool( + RepoMapTool, + input, + appState.toolPermissionContext, + ) + }, + async prompt() { + return getDescription() + }, + renderToolUseMessage, + renderToolUseErrorMessage, + renderToolResultMessage, + extractSearchText({ rendered }) { + return rendered + }, + mapToolResultToToolResultBlockParam(output, toolUseID) { + const summary = [ + `Repository map: ${output.file_count} files ranked (${output.total_file_count} total), ${output.token_count} tokens`, + output.cache_hit ? '(cached)' : `(built in ${output.build_time_ms}ms)`, + ].join(' ') + + return { + tool_use_id: toolUseID, + type: 'tool_result', + content: `${summary}\n\n${output.rendered}`, + } + }, + async call( + { max_tokens = 1024, focus_files, focus_symbols }, + { abortController }, + ) { + const root = getCwd() + + // Resolve focus_symbols to file paths by searching the tag cache + let resolvedFocusFiles = focus_files ?? [] + if (focus_symbols?.length) { + // Import the symbol lookup dynamically to avoid circular deps at module load + const { getRepoFiles } = await import('../../context/repoMap/gitFiles.js') + const { extractTags } = await import('../../context/repoMap/symbolExtractor.js') + const { initParser } = await import('../../context/repoMap/parser.js') + + await initParser() + const files = await getRepoFiles(root) + const symbolFiles: string[] = [] + const symbolSet = new Set(focus_symbols) + + // Scan files for matching symbol definitions + for (const file of files) { + if (abortController.signal.aborted) break + const tags = await extractTags(file, root) + if (tags) { + const hasMatch = tags.tags.some( + t => t.kind === 'def' && symbolSet.has(t.name), + ) + if (hasMatch) { + symbolFiles.push(file) + } + } + } + + resolvedFocusFiles = [...resolvedFocusFiles, ...symbolFiles] + } + + const result = await buildRepoMap({ + root, + maxTokens: max_tokens, + focusFiles: resolvedFocusFiles.length > 0 ? resolvedFocusFiles : undefined, + }) + + const output: Output = { + rendered: result.map, + token_count: result.tokenCount, + file_count: result.fileCount, + total_file_count: result.totalFileCount, + cache_hit: result.cacheHit, + build_time_ms: result.buildTimeMs, + } + + return { data: output } + }, +} satisfies ToolDef) diff --git a/src/tools/RepoMapTool/UI.tsx b/src/tools/RepoMapTool/UI.tsx new file mode 100644 index 00000000..4fbf67b7 --- /dev/null +++ b/src/tools/RepoMapTool/UI.tsx @@ -0,0 +1,96 @@ +import type { ToolResultBlockParam } from '@anthropic-ai/sdk/resources/index.mjs' +import React from 'react' +import { FallbackToolUseErrorMessage } from '../../components/FallbackToolUseErrorMessage.js' +import { MessageResponse } from '../../components/MessageResponse.js' +import { TOOL_SUMMARY_MAX_LENGTH } from '../../constants/toolLimits.js' +import { Text } from '../../ink.js' +import type { ToolProgressData } from '../../Tool.js' +import type { ProgressMessage } from '../../types/message.js' +import { truncate } from '../../utils/format.js' + +type Output = { + rendered: string + token_count: number + file_count: number + total_file_count: number + cache_hit: boolean + build_time_ms: number +} + +export function getToolUseSummary( + input: + | Partial<{ + max_tokens?: number + focus_files?: string[] + focus_symbols?: string[] + }> + | undefined, +): string | null { + if (!input) return 'Repository map' + const parts: string[] = [] + if (input.focus_files?.length) { + parts.push(input.focus_files.join(', ')) + } + if (input.focus_symbols?.length) { + parts.push(input.focus_symbols.join(', ')) + } + if (parts.length > 0) { + return truncate(`Repository map (focus: ${parts.join('; ')})`, TOOL_SUMMARY_MAX_LENGTH) + } + return 'Repository map' +} + +export function renderToolUseMessage( + input: Partial<{ + max_tokens?: number + focus_files?: string[] + focus_symbols?: string[] + }>, +): React.ReactNode { + const parts: string[] = [] + if (input.max_tokens) { + parts.push(`max_tokens: ${input.max_tokens}`) + } + if (input.focus_files?.length) { + parts.push(`focus: ${input.focus_files.join(', ')}`) + } + if (input.focus_symbols?.length) { + parts.push(`symbols: ${input.focus_symbols.join(', ')}`) + } + return parts.length > 0 ? parts.join(', ') : null +} + +export function renderToolResultMessage( + output: Output, + _progressMessages: ProgressMessage[], + { verbose }: { verbose: boolean }, +): React.ReactNode { + const summary = `${output.file_count} files ranked, ${output.token_count} tokens${output.cache_hit ? ' (cached)' : `, ${output.build_time_ms}ms`}` + + if (verbose) { + return ( + + + Built repository map: {summary} + {'\n'} + ({output.total_file_count} total files considered) + + + ) + } + + return ( + + + Built repository map: {summary} + + + ) +} + +export function renderToolUseErrorMessage( + result: ToolResultBlockParam['content'], + { verbose }: { verbose: boolean }, +): React.ReactNode { + return +} diff --git a/src/tools/RepoMapTool/prompt.ts b/src/tools/RepoMapTool/prompt.ts new file mode 100644 index 00000000..b7be48e8 --- /dev/null +++ b/src/tools/RepoMapTool/prompt.ts @@ -0,0 +1,31 @@ +export const REPO_MAP_TOOL_NAME = 'RepoMap' + +export function getDescription(): string { + return `Build a structural map of the repository showing ranked files and their key signatures (functions, classes, types, interfaces). + +## When to use +- At the start of a session on an unfamiliar repository to understand the codebase architecture +- Before cross-file refactors to identify which files are structurally connected +- When searching for where a concept or feature lives across the codebase +- When the user asks "how is this repo organized" or "what are the important files" + +## When NOT to use +- To read the contents of a specific file — use Read instead +- To search for exact text or patterns — use Grep instead +- To find files by name or glob pattern — use Glob instead +- When you already know which files to examine + +## How it works +The tool parses every supported source file (TypeScript, JavaScript, Python) using tree-sitter, extracts symbol definitions and references, builds a cross-file reference graph weighted by symbol importance (IDF), and ranks files using PageRank. The output is a token-budgeted summary showing the highest-ranked files with their key signatures (function/class/type declarations). + +## Parameters +- **max_tokens**: Controls how many files fit in the output. Use 1024 for a quick overview, 4096+ for comprehensive maps. Default: 1024. +- **focus_files**: Pass relative paths (e.g. \`["src/tools/"]\`) to boost specific files and their neighbors in the ranking. Use when the user mentions specific directories or files. +- **focus_symbols**: Pass symbol names (e.g. \`["buildTool", "ToolUseContext"]\`) to boost files that define those symbols. Use when the user asks about specific functions or types. + +## Important notes +- The map shows **signatures only**, not function bodies. Use Read to see implementations. +- Results are **auto-cached** on disk — repeat calls with the same parameters return instantly. +- Files are ranked by structural importance: files imported by many others rank highest. +` +}