feat: add Codebase Intelligence — repo map with PageRank-ranked structural summaries

Add a new module that builds a structural map of the repository by parsing
source files with tree-sitter, building a cross-file reference graph
weighted by IDF, ranking files with PageRank, and rendering a
token-budgeted summary of the most important files and their signatures.

Stage 1 — Core module (src/context/repoMap/):
  Symbol extraction via web-tree-sitter WASM, IDF-weighted reference graph
  via graphology, PageRank ranking, token-budgeted rendering via js-tiktoken
  cl100k_base, disk cache with mtime invalidation. Supports TypeScript,
  JavaScript, and Python. 10 tests.

Stage 2 — RepoMap tool (src/tools/RepoMapTool/):
  buildTool wrapper registered in src/tools.ts. Read-only, concurrency-safe.
  Supports focus_files, focus_symbols, and max_tokens parameters. 9 tests.

Stage 3 — Integration:
  Auto-injection into session context behind REPO_MAP feature flag (off by
  default). /repomap slash command with --tokens, --focus, --stats, and
  --invalidate flags. User-facing docs in docs/repo-map.md. 13 tests.

With the flag off, the system context is byte-identical to previous behavior.

Dependencies: web-tree-sitter, tree-sitter-wasms, graphology,
graphology-pagerank, graphology-operators, js-tiktoken

Tests: 32 new, 621 total passing, 0 failures.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
gnanam1990
2026-04-09 17:26:34 +05:30
parent 6ea3eb6483
commit 81896618a1
35 changed files with 2384 additions and 0 deletions

View File

@@ -0,0 +1,108 @@
import { readFileSync } from 'fs'
import { join } from 'path'
import { getLanguageForFile } from './gitFiles.js'
import { createParser, loadLanguage, loadQuery } from './parser.js'
import type { FileTags, Tag } from './types.js'
/**
* Extract definition and reference tags from a single source file.
* Returns null if the file can't be parsed (unsupported language, parse error, etc).
*/
export async function extractTags(
filePath: string,
root: string,
): Promise<FileTags | null> {
const language = getLanguageForFile(filePath)
if (!language) return null
const absolutePath = join(root, filePath)
let source: string
try {
source = readFileSync(absolutePath, 'utf-8')
} catch {
return null
}
const lines = source.split('\n')
const parser = await createParser(language)
if (!parser) return null
const querySource = loadQuery(language)
if (!querySource) {
parser.delete()
return null
}
try {
const tree = parser.parse(source) as {
rootNode: unknown
}
const lang = await loadLanguage(language)
if (!lang) {
parser.delete()
return null
}
// Use the non-deprecated Query constructor
const { Query } = await import('web-tree-sitter')
const query = new Query(lang, querySource) as {
matches(rootNode: unknown): Array<{
pattern: number
captures: Array<{
name: string
node: {
text: string
startPosition: { row: number; column: number }
endPosition: { row: number; column: number }
}
}>
}>
}
const matches = query.matches(tree.rootNode)
const tags: Tag[] = []
const seen = new Set<string>() // dedup by kind+name+line
for (const match of matches) {
let name: string | null = null
let kind: 'def' | 'ref' | null = null
let subKind: string | undefined
let lineRow = 0
for (const capture of match.captures) {
const captureName = capture.name
// Name captures: name.definition.X or name.reference.X
if (captureName.startsWith('name.definition.')) {
name = capture.node.text
kind = 'def'
subKind = captureName.slice('name.definition.'.length)
lineRow = capture.node.startPosition.row
} else if (captureName.startsWith('name.reference.')) {
name = capture.node.text
kind = 'ref'
subKind = captureName.slice('name.reference.'.length)
lineRow = capture.node.startPosition.row
}
}
if (name && kind) {
const key = `${kind}:${name}:${lineRow}`
if (!seen.has(key)) {
seen.add(key)
const line = lineRow + 1 // convert 0-based to 1-based
const signature = lines[lineRow]?.trimEnd() ?? ''
tags.push({ kind, name, line, signature, subKind })
}
}
}
parser.delete()
return { path: filePath, tags }
} catch {
parser.delete()
return null
}
}