/**
* Definition extraction utilities with brace-matching support
* Extracts complete function/class definitions from source code
*/
import type { Language } from "../types.js";
interface ParserState {
inString: boolean;
inChar: boolean;
inSingleLineComment: boolean;
inMultiLineComment: boolean;
stringDelimiter: string;
escaped: boolean;
braceDepth: number;
templateDepth: number;
}
interface ExtractionResult {
definition: string;
startLine: number;
endLine: number;
signature?: string;
}
/**
* Extract a complete definition starting from a given line
* Uses brace-matching to find the end of the definition
*/
export function extractDefinition(
content: string,
startLine: number,
language: Language,
maxLines: number = 500
): ExtractionResult | null {
const lines = content.split("\n");
// Find the actual start of the definition by scanning backward
const defStart = findDefinitionStart(lines, startLine - 1, language);
// Extract the definition from start to closing brace
const result = extractFromStart(lines, defStart, language, maxLines);
if (!result) {
return null;
}
// Extract signature (first few lines until opening brace)
const signature = extractSignature(result.definition, language);
return {
definition: result.definition,
startLine: defStart + 1,
endLine: result.endLine + 1,
signature,
};
}
/**
* Find the start of a definition by scanning backward from the match line
* Looks for function/class/struct keywords
*/
function findDefinitionStart(
lines: string[],
matchLine: number,
language: Language
): number {
const keywords = getDefinitionKeywords(language);
// Scan backward up to 50 lines to find the definition start
const searchStart = Math.max(0, matchLine - 50);
for (let i = matchLine; i >= searchStart; i--) {
const line = lines[i].trim();
// Check if line contains a definition keyword
for (const keyword of keywords) {
if (line.includes(keyword)) {
// Found a potential start, but verify it's not in a comment
if (!isInComment(lines[i], language)) {
return i;
}
}
}
// If we hit a closing brace at depth 0, stop searching
if (line.endsWith("}") || line.endsWith("};")) {
return matchLine;
}
}
return matchLine;
}
/**
* Extract definition from start line to closing brace
*/
function extractFromStart(
lines: string[],
startLine: number,
language: Language,
maxLines: number
): { definition: string; endLine: number } | null {
const state: ParserState = {
inString: false,
inChar: false,
inSingleLineComment: false,
inMultiLineComment: false,
stringDelimiter: "",
escaped: false,
braceDepth: 0,
templateDepth: 0,
};
const extracted: string[] = [];
let foundOpeningBrace = false;
for (let i = startLine; i < lines.length && i < startLine + maxLines; i++) {
const line = lines[i];
extracted.push(line);
// Process each character to track state
for (let j = 0; j < line.length; j++) {
const char = line[j];
const next = j < line.length - 1 ? line[j + 1] : "";
const prev = j > 0 ? line[j - 1] : "";
updateParserState(state, char, next, prev, language);
// Track braces only when not in comments or strings
if (
!state.inString &&
!state.inChar &&
!state.inSingleLineComment &&
!state.inMultiLineComment
) {
if (char === "{") {
state.braceDepth++;
foundOpeningBrace = true;
} else if (char === "}") {
state.braceDepth--;
// Found the closing brace
if (foundOpeningBrace && state.braceDepth === 0) {
// Check if there's a semicolon after (for class/struct)
let endLine = i;
if (
j < line.length - 1 &&
line
.substring(j + 1)
.trim()
.startsWith(";")
) {
// Already on same line
} else if (
i < lines.length - 1 &&
lines[i + 1].trim().startsWith(";")
) {
// Semicolon on next line
extracted.push(lines[i + 1]);
endLine = i + 1;
}
return {
definition: extracted.join("\n"),
endLine,
};
}
}
}
}
// Reset single-line comment state
state.inSingleLineComment = false;
}
// If we hit max lines or end of file, return what we have
if (extracted.length > 0) {
return {
definition: extracted.join("\n"),
endLine: startLine + extracted.length - 1,
};
}
return null;
}
/**
* Update parser state based on current character
*/
function updateParserState(
state: ParserState,
char: string,
next: string,
prev: string,
language: Language
): void {
// Handle escape sequences
if (state.escaped) {
state.escaped = false;
return;
}
if ((state.inString || state.inChar) && char === "\\") {
state.escaped = true;
return;
}
// Handle multi-line comments
if (state.inMultiLineComment) {
if (char === "*" && next === "/") {
state.inMultiLineComment = false;
}
return;
}
// Handle single-line comments
if (state.inSingleLineComment) {
return;
}
// Start of comments
if (!state.inString && !state.inChar) {
if (char === "/" && next === "/") {
state.inSingleLineComment = true;
return;
}
if (char === "/" && next === "*") {
state.inMultiLineComment = true;
return;
}
}
// Handle strings
if (state.inString) {
if (char === state.stringDelimiter) {
state.inString = false;
state.stringDelimiter = "";
}
return;
}
// Handle character literals
if (state.inChar) {
if (char === "'") {
state.inChar = false;
}
return;
}
// Start of string or char
if (char === '"' || char === "'") {
if (char === '"') {
state.inString = true;
state.stringDelimiter = char;
} else {
state.inChar = true;
}
return;
}
// Handle template strings (JavaScript/TypeScript)
if (language === "js" && char === "`") {
if (state.templateDepth === 0) {
state.inString = true;
state.stringDelimiter = "`";
state.templateDepth = 1;
} else {
state.inString = false;
state.stringDelimiter = "";
state.templateDepth = 0;
}
}
}
/**
* Extract function/class signature (everything before opening brace)
*/
function extractSignature(definition: string, _language: Language): string {
const lines = definition.split("\n");
const signature: string[] = [];
for (const line of lines) {
signature.push(line);
// Stop at opening brace
if (line.includes("{")) {
// Get everything before the brace
const beforeBrace = line.substring(0, line.indexOf("{")).trim();
signature[signature.length - 1] = beforeBrace;
break;
}
}
return signature.join("\n").trim();
}
/**
* Check if a position in a line is within a comment
*/
function isInComment(line: string, _language: Language): boolean {
// Simple heuristic: check if line starts with comment markers
const trimmed = line.trim();
return (
trimmed.startsWith("//") ||
trimmed.startsWith("/*") ||
trimmed.startsWith("*")
);
}
/**
* Get definition keywords for a language
*/
function getDefinitionKeywords(language: Language): string[] {
switch (language) {
case "cpp":
case "c":
return [
"class ",
"struct ",
"enum ",
"union ",
"namespace ",
"void ",
"int ",
"bool ",
"char ",
"auto ",
"static ",
"virtual ",
"inline ",
"template",
];
case "rust":
return [
"fn ",
"struct ",
"enum ",
"trait ",
"impl ",
"pub fn",
"async fn",
];
case "js":
return [
"function ",
"class ",
"const ",
"let ",
"var ",
"async function",
"export function",
"export class",
];
case "webidl":
return ["interface ", "dictionary ", "enum ", "callback "];
default:
return ["function ", "class "];
}
}
/**
* Detect language from file extension
*/
export function detectLanguage(filePath: string): Language | null {
const ext = filePath.split(".").pop()?.toLowerCase();
switch (ext) {
case "cpp":
case "cc":
case "cxx":
case "h":
case "hpp":
case "hxx":
return "cpp";
case "c":
return "c";
case "js":
case "jsx":
case "mjs":
return "js";
case "rs":
return "rust";
case "webidl":
return "webidl";
default:
return null;
}
}