/**
* Language Detection Utilities
* Determines file types and languages based on extension and content
*/
import path from 'path';
// Map of file extensions to languages
const extensionToLanguage: Record<string, string> = {
// JavaScript/TypeScript
'.js': 'javascript',
'.mjs': 'javascript',
'.cjs': 'javascript',
'.jsx': 'javascriptreact',
'.ts': 'typescript',
'.tsx': 'typescriptreact',
'.mts': 'typescript',
'.cts': 'typescript',
// Web
'.html': 'html',
'.htm': 'html',
'.css': 'css',
'.scss': 'scss',
'.sass': 'sass',
'.less': 'less',
// Data/Config
'.json': 'json',
'.jsonc': 'jsonc',
'.yaml': 'yaml',
'.yml': 'yaml',
'.toml': 'toml',
'.xml': 'xml',
// Markdown
'.md': 'markdown',
'.mdx': 'mdx',
// Other
'.graphql': 'graphql',
'.gql': 'graphql',
'.sql': 'sql',
'.sh': 'shellscript',
'.bash': 'shellscript',
'.zsh': 'shellscript',
'.ps1': 'powershell',
'.py': 'python',
'.rb': 'ruby',
'.java': 'java',
'.go': 'go',
'.rs': 'rust',
'.c': 'c',
'.cpp': 'cpp',
'.h': 'c',
'.hpp': 'cpp',
};
// Binary file extensions to skip
const binaryExtensions = new Set([
'.png', '.jpg', '.jpeg', '.gif', '.ico', '.svg', '.webp', '.bmp',
'.woff', '.woff2', '.ttf', '.eot', '.otf',
'.mp3', '.mp4', '.wav', '.avi', '.mov', '.webm',
'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
'.zip', '.tar', '.gz', '.rar', '.7z',
'.exe', '.dll', '.so', '.dylib',
'.lock', '.map',
]);
// Code file extensions
const codeExtensions = new Set([
'.js', '.mjs', '.cjs', '.jsx',
'.ts', '.tsx', '.mts', '.cts',
'.html', '.htm',
'.css', '.scss', '.sass', '.less',
'.json', '.jsonc',
'.yaml', '.yml',
'.md', '.mdx',
'.graphql', '.gql',
'.py', '.rb', '.java', '.go', '.rs', '.c', '.cpp', '.h', '.hpp',
]);
/**
* Detect language from file path
*/
export function detectLanguage(filePath: string): string {
const ext = path.extname(filePath).toLowerCase();
return extensionToLanguage[ext] || 'plaintext';
}
/**
* Check if a file is a code file
*/
export function isCodeFile(filePath: string): boolean {
const ext = path.extname(filePath).toLowerCase();
return codeExtensions.has(ext);
}
/**
* Check if a file is binary
*/
export function isBinaryFile(filePath: string): boolean {
const ext = path.extname(filePath).toLowerCase();
return binaryExtensions.has(ext);
}
/**
* Check if file is a test file
*/
export function isTestFile(filePath: string): boolean {
const lowerPath = filePath.toLowerCase();
return (
lowerPath.includes('.spec.') ||
lowerPath.includes('.test.') ||
lowerPath.includes('__tests__') ||
lowerPath.includes('/test/') ||
lowerPath.includes('/tests/')
);
}
/**
* Check if file is a style guide or documentation
*/
export function isDocumentationFile(filePath: string): boolean {
const lowerPath = filePath.toLowerCase();
const fileName = path.basename(lowerPath);
return (
fileName === 'readme.md' ||
fileName === 'contributing.md' ||
fileName === 'changelog.md' ||
fileName === 'license.md' ||
fileName === 'style_guide.md' ||
fileName === 'style-guide.md' ||
fileName === 'architecture.md' ||
lowerPath.includes('/docs/')
);
}
/**
* Get all supported extensions
*/
export function getSupportedExtensions(): string[] {
return Array.from(codeExtensions);
}