feat: add Codebase Intelligence — repo map with PageRank-ranked structural summaries
Add a new module that builds a structural map of the repository by parsing source files with tree-sitter, building a cross-file reference graph weighted by IDF, ranking files with PageRank, and rendering a token-budgeted summary of the most important files and their signatures. Stage 1 — Core module (src/context/repoMap/): Symbol extraction via web-tree-sitter WASM, IDF-weighted reference graph via graphology, PageRank ranking, token-budgeted rendering via js-tiktoken cl100k_base, disk cache with mtime invalidation. Supports TypeScript, JavaScript, and Python. 10 tests. Stage 2 — RepoMap tool (src/tools/RepoMapTool/): buildTool wrapper registered in src/tools.ts. Read-only, concurrency-safe. Supports focus_files, focus_symbols, and max_tokens parameters. 9 tests. Stage 3 — Integration: Auto-injection into session context behind REPO_MAP feature flag (off by default). /repomap slash command with --tokens, --focus, --stats, and --invalidate flags. User-facing docs in docs/repo-map.md. 13 tests. With the flag off, the system context is byte-identical to previous behavior. Dependencies: web-tree-sitter, tree-sitter-wasms, graphology, graphology-pagerank, graphology-operators, js-tiktoken Tests: 32 new, 621 total passing, 0 failures. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
109
src/context/repoMap/gitFiles.ts
Normal file
109
src/context/repoMap/gitFiles.ts
Normal file
@@ -0,0 +1,109 @@
|
||||
import { execFile } from 'child_process'
|
||||
import { readdirSync, statSync } from 'fs'
|
||||
import { join, relative } from 'path'
|
||||
import type { SupportedLanguage } from './types.js'
|
||||
|
||||
const SUPPORTED_EXTENSIONS: Record<string, SupportedLanguage> = {
|
||||
'.ts': 'typescript',
|
||||
'.tsx': 'typescript',
|
||||
'.js': 'javascript',
|
||||
'.jsx': 'javascript',
|
||||
'.mjs': 'javascript',
|
||||
'.cjs': 'javascript',
|
||||
'.py': 'python',
|
||||
}
|
||||
|
||||
const EXCLUDED_DIRS = new Set([
|
||||
'node_modules',
|
||||
'dist',
|
||||
'.git',
|
||||
'.hg',
|
||||
'.svn',
|
||||
'build',
|
||||
'out',
|
||||
'coverage',
|
||||
'__pycache__',
|
||||
'.next',
|
||||
'.nuxt',
|
||||
'vendor',
|
||||
'.worktrees',
|
||||
])
|
||||
|
||||
const EXCLUDED_FILES = new Set([
|
||||
'bun.lock',
|
||||
'bun.lockb',
|
||||
'package-lock.json',
|
||||
'yarn.lock',
|
||||
'pnpm-lock.yaml',
|
||||
])
|
||||
|
||||
export function getLanguageForFile(filePath: string): SupportedLanguage | null {
|
||||
const ext = filePath.substring(filePath.lastIndexOf('.'))
|
||||
return SUPPORTED_EXTENSIONS[ext] ?? null
|
||||
}
|
||||
|
||||
export function isSupportedFile(filePath: string): boolean {
|
||||
return getLanguageForFile(filePath) !== null
|
||||
}
|
||||
|
||||
/** List files using git ls-files. Returns relative paths. */
|
||||
function gitLsFiles(root: string): Promise<string[]> {
|
||||
return new Promise((resolve, reject) => {
|
||||
execFile(
|
||||
'git',
|
||||
['ls-files', '--cached', '--others', '--exclude-standard'],
|
||||
{ cwd: root, maxBuffer: 10 * 1024 * 1024 },
|
||||
(error, stdout) => {
|
||||
if (error) {
|
||||
reject(error)
|
||||
return
|
||||
}
|
||||
const files = stdout
|
||||
.split('\n')
|
||||
.map(f => f.trim())
|
||||
.filter(f => f.length > 0)
|
||||
resolve(files)
|
||||
},
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/** Walk directory tree manually as fallback when git is unavailable. */
|
||||
function walkDirectory(root: string, currentDir: string = root): string[] {
|
||||
const results: string[] = []
|
||||
let entries: ReturnType<typeof readdirSync>
|
||||
try {
|
||||
entries = readdirSync(currentDir, { withFileTypes: true })
|
||||
} catch {
|
||||
return results
|
||||
}
|
||||
|
||||
for (const entry of entries) {
|
||||
const name = entry.name
|
||||
if (entry.isDirectory()) {
|
||||
if (!EXCLUDED_DIRS.has(name) && !name.startsWith('.')) {
|
||||
results.push(...walkDirectory(root, join(currentDir, name)))
|
||||
}
|
||||
} else if (entry.isFile()) {
|
||||
if (!EXCLUDED_FILES.has(name)) {
|
||||
results.push(relative(root, join(currentDir, name)))
|
||||
}
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
/**
|
||||
* Enumerate all supported source files in the repo.
|
||||
* Tries git ls-files first, falls back to manual walk.
|
||||
*/
|
||||
export async function getRepoFiles(root: string): Promise<string[]> {
|
||||
let files: string[]
|
||||
try {
|
||||
files = await gitLsFiles(root)
|
||||
} catch {
|
||||
files = walkDirectory(root)
|
||||
}
|
||||
|
||||
return files.filter(isSupportedFile)
|
||||
}
|
||||
Reference in New Issue
Block a user