feat: add Codebase Intelligence — repo map with PageRank-ranked structural summaries

Add a new module that builds a structural map of the repository by parsing
source files with tree-sitter, building a cross-file reference graph
weighted by IDF, ranking files with PageRank, and rendering a
token-budgeted summary of the most important files and their signatures.

Stage 1 — Core module (src/context/repoMap/):
  Symbol extraction via web-tree-sitter WASM, IDF-weighted reference graph
  via graphology, PageRank ranking, token-budgeted rendering via js-tiktoken
  cl100k_base, disk cache with mtime invalidation. Supports TypeScript,
  JavaScript, and Python. 10 tests.

Stage 2 — RepoMap tool (src/tools/RepoMapTool/):
  buildTool wrapper registered in src/tools.ts. Read-only, concurrency-safe.
  Supports focus_files, focus_symbols, and max_tokens parameters. 9 tests.

Stage 3 — Integration:
  Auto-injection into session context behind REPO_MAP feature flag (off by
  default). /repomap slash command with --tokens, --focus, --stats, and
  --invalidate flags. User-facing docs in docs/repo-map.md. 13 tests.

With the flag off, the system context is byte-identical to previous behavior.

Dependencies: web-tree-sitter, tree-sitter-wasms, graphology,
graphology-pagerank, graphology-operators, js-tiktoken

Tests: 32 new, 621 total passing, 0 failures.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
gnanam1990
2026-04-09 17:26:34 +05:30
parent 6ea3eb6483
commit 81896618a1
35 changed files with 2384 additions and 0 deletions

View File

@@ -0,0 +1,17 @@
/**
* /repomap command - minimal metadata only.
* Implementation is lazy-loaded from repomap.ts to reduce startup time.
*/
import type { Command } from '../../commands.js'
const repomap = {
type: 'local',
name: 'repomap',
description:
'Show or configure the repository structural map (codebase intelligence)',
isHidden: false,
supportsNonInteractive: true,
load: () => import('./repomap.js'),
} satisfies Command
export default repomap

View File

@@ -0,0 +1,56 @@
import { describe, expect, test } from 'bun:test'
import { parseArgs } from './repomap.js'
describe('/repomap argument parsing', () => {
test('defaults to 1024 tokens with no flags', () => {
const result = parseArgs('')
expect(result.tokens).toBe(2048)
expect(result.focus).toEqual([])
expect(result.invalidate).toBe(false)
expect(result.stats).toBe(false)
})
test('parses --tokens flag', () => {
const result = parseArgs('--tokens 4096')
expect(result.tokens).toBe(4096)
})
test('rejects --tokens below 256', () => {
const result = parseArgs('--tokens 100')
expect(result.tokens).toBe(2048) // falls back to default
})
test('rejects --tokens above 16384', () => {
const result = parseArgs('--tokens 20000')
expect(result.tokens).toBe(2048) // falls back to default
})
test('parses --focus flag', () => {
const result = parseArgs('--focus src/tools/')
expect(result.focus).toEqual(['src/tools/'])
})
test('parses multiple --focus flags', () => {
const result = parseArgs('--focus src/tools/ --focus src/context.ts')
expect(result.focus).toEqual(['src/tools/', 'src/context.ts'])
})
test('parses --invalidate flag', () => {
const result = parseArgs('--invalidate')
expect(result.invalidate).toBe(true)
expect(result.stats).toBe(false)
})
test('parses --stats flag', () => {
const result = parseArgs('--stats')
expect(result.stats).toBe(true)
expect(result.invalidate).toBe(false)
})
test('parses combined flags', () => {
const result = parseArgs('--tokens 2048 --focus src/tools/ --invalidate')
expect(result.tokens).toBe(2048)
expect(result.focus).toEqual(['src/tools/'])
expect(result.invalidate).toBe(true)
})
})

View File

@@ -0,0 +1,93 @@
import type { LocalCommandCall } from '../../types/command.js'
import { getCwd } from '../../utils/cwd.js'
/** Parse CLI-style arguments from the command string. */
export function parseArgs(args: string): {
tokens: number
focus: string[]
invalidate: boolean
stats: boolean
} {
const parts = args.trim().split(/\s+/).filter(Boolean)
let tokens = 2048
const focus: string[] = []
let invalidate = false
let stats = false
for (let i = 0; i < parts.length; i++) {
const part = parts[i]!
if (part === '--tokens' && i + 1 < parts.length) {
const n = parseInt(parts[i + 1]!, 10)
if (!isNaN(n) && n >= 256 && n <= 16384) {
tokens = n
}
i++
} else if (part === '--focus' && i + 1 < parts.length) {
focus.push(parts[i + 1]!)
i++
} else if (part === '--invalidate') {
invalidate = true
} else if (part === '--stats') {
stats = true
}
}
return { tokens, focus, invalidate, stats }
}
export const call: LocalCommandCall = async (args) => {
const root = getCwd()
const { tokens, focus, invalidate, stats } = parseArgs(args ?? '')
// Lazy import to avoid loading tree-sitter at startup
const {
buildRepoMap,
invalidateCache,
getCacheStats,
} = await import('../../context/repoMap/index.js')
if (stats) {
const cacheStats = getCacheStats(root)
const lines = [
`Repository map cache stats:`,
` Cache directory: ${cacheStats.cacheDir}`,
` Cache file: ${cacheStats.cacheFile ?? '(none)'}`,
` Cached entries: ${cacheStats.entryCount}`,
` Cache exists: ${cacheStats.exists}`,
]
return { type: 'text', value: lines.join('\n') }
}
if (invalidate) {
invalidateCache(root)
const result = await buildRepoMap({
root,
maxTokens: tokens,
focusFiles: focus.length > 0 ? focus : undefined,
})
return {
type: 'text',
value: [
`Cache invalidated and rebuilt.`,
`Files: ${result.fileCount} ranked (${result.totalFileCount} total) | Tokens: ${result.tokenCount} | Time: ${result.buildTimeMs}ms | Cache hit: ${result.cacheHit}`,
'',
result.map,
].join('\n'),
}
}
const result = await buildRepoMap({
root,
maxTokens: tokens,
focusFiles: focus.length > 0 ? focus : undefined,
})
return {
type: 'text',
value: [
`Repository map: ${result.fileCount} files ranked (${result.totalFileCount} total) | Tokens: ${result.tokenCount} | Time: ${result.buildTimeMs}ms | Cache hit: ${result.cacheHit}`,
'',
result.map,
].join('\n'),
}
}