/**
* analyze_codebase tool implementation
*
* Performs comprehensive codebase analysis including:
* - File structure and organization
* - Language distribution
* - Code metrics (LOC, complexity)
* - Module/package structure
* - Entry points identification
*/
import { glob } from "glob";
import { promises as fs } from "fs";
import path from "path";
import ignoreModule from "ignore";
type Ignore = ReturnType<(typeof ignoreModule)["default"]>;
interface AnalyzeCodebaseArgs {
path: string;
languages?: string[];
depth?: number;
include_patterns?: string[];
exclude_patterns?: string[];
analysis_type?: "quick" | "standard" | "deep";
}
interface FileInfo {
relativePath: string;
language: string;
size: number;
lines: number;
}
interface AnalysisResult {
projectPath: string;
totalFiles: number;
totalLines: number;
totalSize: number;
languages: Record<string, { files: number; lines: number; percentage: number }>;
structure: {
directories: string[];
depth: number;
};
files: FileInfo[];
entryPoints: string[];
packageInfo?: {
name?: string;
version?: string;
dependencies?: Record<string, string>;
};
analysisType: string;
timestamp: string;
}
// Language detection based on file extension
const LANGUAGE_MAP: Record<string, string> = {
".ts": "typescript",
".tsx": "typescript",
".js": "javascript",
".jsx": "javascript",
".mjs": "javascript",
".cjs": "javascript",
".py": "python",
".java": "java",
".go": "go",
".rs": "rust",
".cpp": "cpp",
".cc": "cpp",
".cxx": "cpp",
".c": "c",
".h": "c",
".cs": "csharp",
".rb": "ruby",
".php": "php",
".swift": "swift",
".kt": "kotlin",
".scala": "scala",
".sh": "shell",
".bash": "shell",
".md": "markdown",
".json": "json",
".yaml": "yaml",
".yml": "yaml",
".toml": "toml",
".xml": "xml",
".html": "html",
".css": "css",
".scss": "scss",
".sass": "sass",
".sql": "sql",
};
// Common entry point file names
const ENTRY_POINT_PATTERNS = [
"index",
"main",
"app",
"server",
"__init__",
"cli",
"bin",
];
/**
* Load .gitignore patterns
*/
async function loadGitignore(projectPath: string): Promise<Ignore | null> {
try {
const gitignorePath = path.join(projectPath, ".gitignore");
const content = await fs.readFile(gitignorePath, "utf-8");
const ig = ignoreModule.default();
ig.add(content);
return ig;
} catch {
return null;
}
}
/**
* Detect language from file path
*/
function detectLanguage(filePath: string): string {
const ext = path.extname(filePath);
return LANGUAGE_MAP[ext] || "unknown";
}
/**
* Check if file is likely an entry point
*/
function isEntryPoint(filePath: string): boolean {
const basename = path.basename(filePath, path.extname(filePath));
return ENTRY_POINT_PATTERNS.some((pattern) =>
basename.toLowerCase().includes(pattern)
);
}
/**
* Count lines in a file
*/
async function countLines(filePath: string): Promise<number> {
try {
const content = await fs.readFile(filePath, "utf-8");
return content.split("\n").length;
} catch {
return 0;
}
}
/**
* Load package.json information
*/
async function loadPackageInfo(projectPath: string): Promise<any> {
try {
const packagePath = path.join(projectPath, "package.json");
const content = await fs.readFile(packagePath, "utf-8");
const pkg = JSON.parse(content);
return {
name: pkg.name,
version: pkg.version,
dependencies: pkg.dependencies,
};
} catch {
return undefined;
}
}
/**
* Get all directories in the project
*/
async function getDirectories(projectPath: string, ignore: Ignore | null): Promise<string[]> {
const dirs: string[] = [];
async function walk(dir: string) {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
if (entry.isDirectory()) {
const fullPath = path.join(dir, entry.name);
const relativePath = path.relative(projectPath, fullPath);
// Skip if ignored
if (ignore && ignore.ignores(relativePath)) {
continue;
}
// Skip common ignore patterns
if (
entry.name === "node_modules" ||
entry.name === ".git" ||
entry.name === "dist" ||
entry.name === "build" ||
entry.name === ".next"
) {
continue;
}
dirs.push(relativePath);
await walk(fullPath);
}
}
}
await walk(projectPath);
return dirs;
}
/**
* Analyze codebase
*/
export async function analyzeCodebase(
args: AnalyzeCodebaseArgs
): Promise<{ content: Array<{ type: string; text: string }> }> {
const {
path: projectPath,
languages = [],
depth = 3,
include_patterns = ["**/*"],
exclude_patterns = [
"**/node_modules/**",
"**/.git/**",
"**/dist/**",
"**/build/**",
"**/.next/**",
"**/__pycache__/**",
"**/*.pyc",
"**/venv/**",
"**/.venv/**",
],
analysis_type = "standard",
} = args;
// Validate path exists
try {
await fs.access(projectPath);
} catch {
throw new Error(`Path does not exist: ${projectPath}`);
}
// Load .gitignore
const ignore = await loadGitignore(projectPath);
// Find all files
const files = await glob(include_patterns, {
cwd: projectPath,
ignore: exclude_patterns,
nodir: true,
dot: false,
});
// Analyze each file
const fileInfos: FileInfo[] = [];
const languageStats: Record<string, { files: number; lines: number }> = {};
const entryPoints: string[] = [];
let totalLines = 0;
let totalSize = 0;
for (const file of files) {
const fullPath = path.join(projectPath, file);
const language = detectLanguage(file);
// Skip if language filtering is active and this language is not included
if (languages.length > 0 && !languages.includes(language)) {
continue;
}
// Get file stats
const stats = await fs.stat(fullPath);
const lines = analysis_type !== "quick" ? await countLines(fullPath) : 0;
fileInfos.push({
relativePath: file,
language,
size: stats.size,
lines,
});
// Update language statistics
if (!languageStats[language]) {
languageStats[language] = { files: 0, lines: 0 };
}
languageStats[language].files++;
languageStats[language].lines += lines;
// Update totals
totalLines += lines;
totalSize += stats.size;
// Check for entry points
if (isEntryPoint(file) && path.dirname(file).split(path.sep).length <= depth) {
entryPoints.push(file);
}
}
// Calculate language percentages
const languages_result: Record<string, { files: number; lines: number; percentage: number }> =
{};
for (const [lang, stats] of Object.entries(languageStats)) {
languages_result[lang] = {
...stats,
percentage: totalLines > 0 ? (stats.lines / totalLines) * 100 : 0,
};
}
// Get directory structure
const directories = analysis_type !== "quick" ? await getDirectories(projectPath, ignore) : [];
// Load package info
const packageInfo = analysis_type !== "quick" ? await loadPackageInfo(projectPath) : undefined;
// Build result
const result: AnalysisResult = {
projectPath,
totalFiles: fileInfos.length,
totalLines,
totalSize,
languages: languages_result,
structure: {
directories,
depth: Math.max(...directories.map((d) => d.split(path.sep).length), 0),
},
files: analysis_type === "deep" ? fileInfos : [],
entryPoints,
packageInfo,
analysisType: analysis_type,
timestamp: new Date().toISOString(),
};
return {
content: [
{
type: "text",
text: JSON.stringify(result, null, 2),
},
],
};
}