// Copyright 2025 Chris Bunting
// Brief: Language detection service for Static Analysis MCP Server
// Scope: Automatically detects programming languages based on file extensions and content
import { Language } from '@mcp-code-analysis/shared-types';
import { readFileSync, existsSync } from 'fs';
import { extname } from 'path';
export class LanguageDetector {
private extensionMap: Map<string, Language> = new Map([
['.js', Language.JAVASCRIPT],
['.jsx', Language.JAVASCRIPT],
['.mjs', Language.JAVASCRIPT],
['.ts', Language.TYPESCRIPT],
['.tsx', Language.TYPESCRIPT],
['.py', Language.PYTHON],
['.pyx', Language.PYTHON],
['.java', Language.JAVA],
['.c', Language.C],
['.h', Language.C],
['.cpp', Language.CPP],
['.cxx', Language.CPP],
['.cc', Language.CPP],
['.hpp', Language.CPP],
['.go', Language.GO],
['.rs', Language.RUST],
]);
private shebangMap: Map<string, Language> = new Map([
['#!/usr/bin/env node', Language.JAVASCRIPT],
['#!/usr/bin/env python', Language.PYTHON],
['#!/usr/bin/env python3', Language.PYTHON],
['#!/usr/bin/env java', Language.JAVA],
['#!/usr/bin/env go', Language.GO],
['#!/usr/bin/env rust', Language.RUST],
]);
detectLanguage(filePath: string, content?: string): Language {
const extension = extname(filePath).toLowerCase();
// First, try to detect by file extension
if (this.extensionMap.has(extension)) {
return this.extensionMap.get(extension)!;
}
// If content is provided, try to detect by shebang
if (content) {
const firstLine = content.split('\n')[0].trim();
if (firstLine.startsWith('#!')) {
for (const [shebang, language] of this.shebangMap) {
if (firstLine.includes(shebang.substring(2))) {
return language;
}
}
}
// Additional content-based detection
return this.detectByContent(content);
}
// If file exists, read its content for detection
if (existsSync(filePath)) {
try {
const fileContent = readFileSync(filePath, 'utf-8');
return this.detectByContent(fileContent);
} catch (error) {
// If we can't read the file, fall back to extension-based detection
return this.getDefaultLanguage(extension);
}
}
return this.getDefaultLanguage(extension);
}
private detectByContent(content: string): Language {
const lines = content.split('\n');
// Check for shebang
const firstLine = lines[0].trim();
if (firstLine.startsWith('#!')) {
for (const [shebang, language] of this.shebangMap) {
if (firstLine.includes(shebang.substring(2))) {
return language;
}
}
}
// Check for language-specific patterns
const contentLower = content.toLowerCase();
// Python detection
if (contentLower.includes('import ') || contentLower.includes('from ')) {
const pythonImports = [
'import os', 'import sys', 'import numpy', 'import pandas',
'from os import', 'from sys import', 'from numpy import'
];
if (pythonImports.some(imp => contentLower.includes(imp))) {
return Language.PYTHON;
}
}
// Java detection
if (contentLower.includes('public class') || contentLower.includes('private class') ||
contentLower.includes('import java.')) {
return Language.JAVA;
}
// TypeScript detection
if (contentLower.includes(': ') && contentLower.includes('interface ') ||
contentLower.includes('type ') || contentLower.includes('enum ')) {
return Language.TYPESCRIPT;
}
// JavaScript detection
if (contentLower.includes('function ') || contentLower.includes('const ') ||
contentLower.includes('let ') || contentLower.includes('var ')) {
return Language.JAVASCRIPT;
}
// Go detection
if (contentLower.includes('package main') || contentLower.includes('func ') ||
contentLower.includes('import (')) {
return Language.GO;
}
// Rust detection
if (contentLower.includes('fn ') || contentLower.includes('let mut') ||
contentLower.includes('use std::')) {
return Language.RUST;
}
// C/C++ detection
if (contentLower.includes('#include <') || contentLower.includes('#include "') ||
contentLower.includes('int main') || contentLower.includes('void main')) {
// Differentiate between C and C++
if (contentLower.includes('#include <iostream>') || contentLower.includes('using namespace') ||
contentLower.includes('std::') || contentLower.includes('class ')) {
return Language.CPP;
}
return Language.C;
}
// Default fallback
return Language.JAVASCRIPT;
}
private getDefaultLanguage(extension: string): Language {
// Common fallbacks based on extension
switch (extension) {
case '.txt':
case '.md':
return Language.JAVASCRIPT; // Default for text files
default:
return Language.JAVASCRIPT;
}
}
getSupportedLanguages(): Language[] {
return Array.from(new Set(this.extensionMap.values()));
}
getSupportedExtensions(): string[] {
return Array.from(this.extensionMap.keys());
}
isLanguageSupported(language: Language): boolean {
return this.getSupportedLanguages().includes(language);
}
addExtensionMapping(extension: string, language: Language): void {
this.extensionMap.set(extension.toLowerCase(), language);
}
addShebangMapping(shebang: string, language: Language): void {
this.shebangMap.set(shebang, language);
}
}