python-analyzer.tsโข19 kB
/**
* Comprehensive Python Code Analyzer
* Focuses on code quality and security analysis for backend developers
*/
export interface CodeIssue {
type: 'quality' | 'security' | 'style' | 'performance' | 'maintainability';
severity: 'critical' | 'high' | 'medium' | 'low';
line: number;
column?: number;
message: string;
rule: string;
suggestion?: string;
codeSnippet?: string;
}
export interface AnalysisResult {
fileName: string;
totalLines: number;
totalIssues: number;
criticalIssues: number;
highIssues: number;
mediumIssues: number;
lowIssues: number;
issues: CodeIssue[];
summary: string;
recommendations: string[];
codeQualityScore: number; // 0-100
securityScore: number; // 0-100
}
export class PythonAnalyzer {
private securityPatterns = [
// SQL Injection patterns - improved to catch more variants
{ pattern: /execute\s*\(\s*['""][^'"]*%[^'"]*['""]/, rule: 'sql-injection-string-format', severity: 'critical' as const, message: 'Potential SQL injection vulnerability using string formatting' },
{ pattern: /execute\s*\(\s*['""][^'"]*\+[^'"]*['""]/, rule: 'sql-injection-concatenation', severity: 'critical' as const, message: 'Potential SQL injection vulnerability using string concatenation' },
{ pattern: /execute\s*\(\s*f["'][^"']*\{[^}]*\}/, rule: 'sql-injection-f-string', severity: 'high' as const, message: 'Potential SQL injection vulnerability using f-strings' },
{ pattern: /"[^"]*%[^"]*"\s*%/, rule: 'sql-injection-string-format', severity: 'critical' as const, message: 'Potential SQL injection vulnerability using string formatting' },
{ pattern: /"[^"]*"\s*\+\s*str\s*\(/, rule: 'sql-injection-concatenation', severity: 'critical' as const, message: 'Potential SQL injection vulnerability using string concatenation' },
// Command Injection
{ pattern: /os\.system\s*\(/, rule: 'command-injection-os-system', severity: 'critical' as const, message: 'Use of os.system() can lead to command injection' },
{ pattern: /subprocess\.(call|run|Popen)\s*\([^)]*shell\s*=\s*True/, rule: 'command-injection-shell', severity: 'high' as const, message: 'Using shell=True in subprocess can lead to command injection' },
{ pattern: /eval\s*\(/, rule: 'code-injection-eval', severity: 'critical' as const, message: 'Use of eval() can execute arbitrary code' },
{ pattern: /exec\s*\(/, rule: 'code-injection-exec', severity: 'critical' as const, message: 'Use of exec() can execute arbitrary code' },
// Insecure practices - improved patterns
{ pattern: /password\s*=\s*["'][^"']+["']/, rule: 'hardcoded-password', severity: 'high' as const, message: 'Hardcoded password detected' },
{ pattern: /PASSWORD\s*=\s*["'][^"']+["']/, rule: 'hardcoded-password', severity: 'high' as const, message: 'Hardcoded password detected' },
{ pattern: /api_key\s*=\s*["'][^"']+["']/, rule: 'hardcoded-api-key', severity: 'high' as const, message: 'Hardcoded API key detected' },
{ pattern: /API_KEY\s*=\s*["'][^"']+["']/, rule: 'hardcoded-api-key', severity: 'high' as const, message: 'Hardcoded API key detected' },
{ pattern: /secret\s*=\s*["'][^"']+["']/, rule: 'hardcoded-secret', severity: 'high' as const, message: 'Hardcoded secret detected' },
{ pattern: /SECRET\s*=\s*["'][^"']+["']/, rule: 'hardcoded-secret', severity: 'high' as const, message: 'Hardcoded secret detected' },
{ pattern: /token\s*=\s*["'][^"']+["']/, rule: 'hardcoded-token', severity: 'high' as const, message: 'Hardcoded token detected' },
{ pattern: /TOKEN\s*=\s*["'][^"']+["']/, rule: 'hardcoded-token', severity: 'high' as const, message: 'Hardcoded token detected' },
{ pattern: /secret_key\s*=\s*["'][^"']+["']/, rule: 'hardcoded-secret', severity: 'high' as const, message: 'Hardcoded secret key detected' },
// Unsafe random
{ pattern: /random\.random\(\)/, rule: 'weak-random', severity: 'medium' as const, message: 'Use secrets module for cryptographic randomness' },
{ pattern: /random\.randint\(/, rule: 'weak-random-int', severity: 'medium' as const, message: 'Use secrets.randbelow() for cryptographic randomness' },
// Insecure HTTP
{ pattern: /requests\.get\s*\(\s*["']http:\/\//, rule: 'insecure-http', severity: 'medium' as const, message: 'Using insecure HTTP instead of HTTPS' },
{ pattern: /urllib\.request\.urlopen\s*\(\s*["']http:\/\//, rule: 'insecure-http-urllib', severity: 'medium' as const, message: 'Using insecure HTTP instead of HTTPS' },
// SSL verification disabled
{ pattern: /verify\s*=\s*False/, rule: 'ssl-verification-disabled', severity: 'high' as const, message: 'SSL verification disabled' },
{ pattern: /ssl\._create_unverified_context/, rule: 'unverified-ssl-context', severity: 'high' as const, message: 'Creating unverified SSL context' },
];
private qualityPatterns = [
// Code complexity
{ pattern: /def\s+\w+\s*\([^)]*\):\s*\n(.*\n){50,}/, rule: 'function-too-long', severity: 'medium' as const, message: 'Function is too long (>50 lines)' },
{ pattern: /class\s+\w+[^:]*:\s*\n(.*\n){200,}/, rule: 'class-too-long', severity: 'medium' as const, message: 'Class is too long (>200 lines)' },
// Naming conventions
{ pattern: /def\s+[A-Z][a-zA-Z]*\s*\(/, rule: 'function-naming-convention', severity: 'low' as const, message: 'Function names should be snake_case' },
{ pattern: /class\s+[a-z][a-zA-Z]*\s*[\(:]/, rule: 'class-naming-convention', severity: 'low' as const, message: 'Class names should be PascalCase' },
{ pattern: /[A-Z]{2,}_[A-Z_]+\s*=/, rule: 'constant-naming-convention', severity: 'low' as const, message: 'Constants should be UPPER_SNAKE_CASE' },
// Exception handling
{ pattern: /except\s*:/, rule: 'bare-except', severity: 'medium' as const, message: 'Bare except clause catches all exceptions' },
{ pattern: /except\s+Exception\s*:/, rule: 'broad-exception', severity: 'low' as const, message: 'Catching broad Exception, be more specific' },
{ pattern: /pass\s*\n\s*except/, rule: 'silent-exception', severity: 'medium' as const, message: 'Silently passing exceptions can hide errors' },
// Code style
{ pattern: /^\s*#.*TODO/, rule: 'todo-comment', severity: 'low' as const, message: 'TODO comment found' },
{ pattern: /^\s*#.*FIXME/, rule: 'fixme-comment', severity: 'medium' as const, message: 'FIXME comment found' },
{ pattern: /^\s*#.*HACK/, rule: 'hack-comment', severity: 'medium' as const, message: 'HACK comment found' },
// Import issues
{ pattern: /^from\s+.*\s+import\s+\*/, rule: 'wildcard-import', severity: 'medium' as const, message: 'Wildcard imports can pollute namespace' },
{ pattern: /^import\s+.*,.*,.*/, rule: 'multiple-imports', severity: 'low' as const, message: 'Multiple imports on single line' },
// Performance issues
{ pattern: /for\s+\w+\s+in\s+range\s*\(\s*len\s*\([^)]+\)\s*\):/, rule: 'range-len-antipattern', severity: 'low' as const, message: 'Use enumerate() instead of range(len())' },
{ pattern: /\+\s*=\s*\[[^\]]*\]/, rule: 'list-concatenation', severity: 'low' as const, message: 'Use list.extend() instead of += for better performance' },
];
private maintainabilityPatterns = [
// Documentation
{ pattern: /def\s+(?!__).*\):\s*\n(?!\s*"""|\s*''')/, rule: 'missing-docstring', severity: 'low' as const, message: 'Function missing docstring' },
{ pattern: /class\s+.*:\s*\n(?!\s*"""|\s*''')/, rule: 'missing-class-docstring', severity: 'low' as const, message: 'Class missing docstring' },
// Magic numbers
{ pattern: /\b(?<![\.\w])\d{2,}\b(?!\s*[.,:]|\s*\))/, rule: 'magic-number', severity: 'low' as const, message: 'Magic number should be a named constant' },
// Code duplication indicators
{ pattern: /(def\s+\w+.*:\s*\n(?:\s+.*\n){3,})\1/, rule: 'duplicate-code', severity: 'medium' as const, message: 'Potential code duplication detected' },
];
public analyzePythonCode(code: string, fileName: string = 'unknown.py'): AnalysisResult {
const lines = code.split('\n');
const issues: CodeIssue[] = [];
// Analyze each line
lines.forEach((line, index) => {
const lineNumber = index + 1;
// Check security patterns
this.securityPatterns.forEach(pattern => {
if (pattern.pattern.test(line)) {
issues.push({
type: 'security',
severity: pattern.severity,
line: lineNumber,
message: pattern.message,
rule: pattern.rule,
codeSnippet: line.trim(),
suggestion: this.getSuggestion(pattern.rule, line)
});
}
});
// Check quality patterns
this.qualityPatterns.forEach(pattern => {
if (pattern.pattern.test(line)) {
issues.push({
type: 'quality',
severity: pattern.severity,
line: lineNumber,
message: pattern.message,
rule: pattern.rule,
codeSnippet: line.trim(),
suggestion: this.getSuggestion(pattern.rule, line)
});
}
});
// Check maintainability patterns
this.maintainabilityPatterns.forEach(pattern => {
if (pattern.pattern.test(line)) {
issues.push({
type: 'maintainability',
severity: pattern.severity,
line: lineNumber,
message: pattern.message,
rule: pattern.rule,
codeSnippet: line.trim(),
suggestion: this.getSuggestion(pattern.rule, line)
});
}
});
});
// Multi-line analysis
this.analyzeMultilinePatterns(code, issues);
// Calculate metrics
const criticalIssues = issues.filter(i => i.severity === 'critical').length;
const highIssues = issues.filter(i => i.severity === 'high').length;
const mediumIssues = issues.filter(i => i.severity === 'medium').length;
const lowIssues = issues.filter(i => i.severity === 'low').length;
const codeQualityScore = this.calculateCodeQualityScore(issues, lines.length);
const securityScore = this.calculateSecurityScore(issues);
return {
fileName,
totalLines: lines.length,
totalIssues: issues.length,
criticalIssues,
highIssues,
mediumIssues,
lowIssues,
issues: issues.sort((a, b) => {
const severityOrder = { critical: 4, high: 3, medium: 2, low: 1 };
return severityOrder[b.severity] - severityOrder[a.severity] || a.line - b.line;
}),
summary: this.generateSummary(issues, lines.length),
recommendations: this.generateRecommendations(issues),
codeQualityScore,
securityScore
};
}
private analyzeMultilinePatterns(code: string, issues: CodeIssue[]): void {
// Check for functions without docstrings
const functionRegex = /def\s+(\w+)\s*\([^)]*\):\s*\n((?:\s+.*\n?)*)/g;
let match;
while ((match = functionRegex.exec(code)) !== null) {
const functionBody = match[2];
const lineNumber = code.substring(0, match.index).split('\n').length;
// Check if function has docstring
if (!functionBody.trim().startsWith('"""') && !functionBody.trim().startsWith("'''")) {
if (!match[1].startsWith('_') && match[1] !== '__init__') { // Skip private methods and __init__
issues.push({
type: 'maintainability',
severity: 'low',
line: lineNumber,
message: `Function '${match[1]}' is missing a docstring`,
rule: 'missing-function-docstring',
suggestion: 'Add a docstring explaining the function purpose, parameters, and return value'
});
}
}
// Check function complexity (rough estimate based on control statements)
const controlStatements = (functionBody.match(/\b(if|for|while|try|except|with)\b/g) || []).length;
if (controlStatements > 10) {
issues.push({
type: 'quality',
severity: 'medium',
line: lineNumber,
message: `Function '${match[1]}' has high cyclomatic complexity (${controlStatements} control statements)`,
rule: 'high-complexity',
suggestion: 'Consider breaking this function into smaller, more focused functions'
});
}
}
}
private getSuggestion(rule: string, line: string): string {
const suggestions: Record<string, string> = {
'sql-injection-string-format': 'Use parameterized queries with placeholders: cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))',
'sql-injection-concatenation': 'Use parameterized queries instead of string concatenation',
'sql-injection-f-string': 'Use parameterized queries instead of f-strings for SQL',
'command-injection-os-system': 'Use subprocess.run() with a list of arguments instead of os.system()',
'command-injection-shell': 'Use subprocess.run() with shell=False and pass arguments as a list',
'code-injection-eval': 'Avoid eval(). Consider using ast.literal_eval() for safe evaluation of literals',
'code-injection-exec': 'Avoid exec(). Consider alternative approaches like importing modules dynamically',
'hardcoded-password': 'Store passwords in environment variables or secure configuration files',
'hardcoded-api-key': 'Store API keys in environment variables: os.getenv("API_KEY")',
'hardcoded-secret': 'Store secrets in environment variables or secure vaults',
'hardcoded-token': 'Store tokens in environment variables or secure configuration',
'weak-random': 'Use secrets.SystemRandom() for cryptographic purposes',
'weak-random-int': 'Use secrets.randbelow() for cryptographic random integers',
'insecure-http': 'Use HTTPS instead of HTTP for secure communication',
'ssl-verification-disabled': 'Enable SSL verification for secure connections',
'unverified-ssl-context': 'Use default SSL context or properly configure SSL verification',
'function-too-long': 'Break this function into smaller, more focused functions',
'class-too-long': 'Consider splitting this class into multiple smaller classes',
'function-naming-convention': 'Use snake_case for function names: def my_function():',
'class-naming-convention': 'Use PascalCase for class names: class MyClass:',
'bare-except': 'Catch specific exceptions: except ValueError: or except (TypeError, ValueError):',
'broad-exception': 'Catch more specific exceptions instead of broad Exception',
'wildcard-import': 'Import specific items: from module import item1, item2',
'range-len-antipattern': 'Use enumerate(): for i, item in enumerate(items):',
'missing-docstring': 'Add a docstring: """Brief description of the function."""',
'magic-number': 'Define as a named constant: MAX_RETRIES = 3'
};
return suggestions[rule] || 'Consider following Python best practices for this pattern';
}
private calculateCodeQualityScore(issues: CodeIssue[], totalLines: number): number {
const criticalPenalty = issues.filter(i => i.severity === 'critical').length * 20;
const highPenalty = issues.filter(i => i.severity === 'high').length * 10;
const mediumPenalty = issues.filter(i => i.severity === 'medium').length * 5;
const lowPenalty = issues.filter(i => i.severity === 'low').length * 1;
const totalPenalty = criticalPenalty + highPenalty + mediumPenalty + lowPenalty;
const lineBonus = Math.max(0, (100 - totalLines) * 0.1); // Slight bonus for concise code
return Math.max(0, Math.min(100, 100 - totalPenalty + lineBonus));
}
private calculateSecurityScore(issues: CodeIssue[]): number {
const securityIssues = issues.filter(i => i.type === 'security');
const criticalSecurityIssues = securityIssues.filter(i => i.severity === 'critical').length;
const highSecurityIssues = securityIssues.filter(i => i.severity === 'high').length;
const mediumSecurityIssues = securityIssues.filter(i => i.severity === 'medium').length;
if (criticalSecurityIssues > 0) return Math.max(0, 30 - (criticalSecurityIssues * 10));
if (highSecurityIssues > 0) return Math.max(30, 60 - (highSecurityIssues * 10));
if (mediumSecurityIssues > 0) return Math.max(60, 80 - (mediumSecurityIssues * 5));
return 100;
}
private generateSummary(issues: CodeIssue[], totalLines: number): string {
const criticalCount = issues.filter(i => i.severity === 'critical').length;
const highCount = issues.filter(i => i.severity === 'high').length;
const securityCount = issues.filter(i => i.type === 'security').length;
if (criticalCount > 0) {
return `โ ๏ธ CRITICAL: Code contains ${criticalCount} critical security issues that must be addressed immediately.`;
}
if (highCount > 0) {
return `๐ด HIGH PRIORITY: Code has ${highCount} high-priority issues requiring attention.`;
}
if (securityCount > 0) {
return `๐ SECURITY: Code has ${securityCount} security-related issues to review.`;
}
if (issues.length === 0) {
return `โ
EXCELLENT: No issues detected in this code! Well done!`;
}
return `๐ REVIEW: Code has ${issues.length} issues across ${totalLines} lines.`;
}
private generateRecommendations(issues: CodeIssue[]): string[] {
const recommendations: string[] = [];
const criticalIssues = issues.filter(i => i.severity === 'critical');
const securityIssues = issues.filter(i => i.type === 'security');
const qualityIssues = issues.filter(i => i.type === 'quality');
if (criticalIssues.length > 0) {
recommendations.push(`๐จ IMMEDIATE ACTION: Fix all ${criticalIssues.length} critical security vulnerabilities before deploying to production`);
}
if (securityIssues.length > 0) {
recommendations.push(`๐ SECURITY REVIEW: Conduct thorough security review focusing on input validation, authentication, and data handling`);
}
if (qualityIssues.length > 5) {
recommendations.push(`๐ง CODE REFACTORING: Consider refactoring to address code quality issues and improve maintainability`);
}
const namingIssues = issues.filter(i => i.rule.includes('naming-convention')).length;
if (namingIssues > 0) {
recommendations.push(`๐ NAMING CONVENTIONS: Follow PEP 8 naming conventions for better code readability`);
}
const docstringIssues = issues.filter(i => i.rule.includes('docstring')).length;
if (docstringIssues > 0) {
recommendations.push(`๐ DOCUMENTATION: Add comprehensive docstrings to improve code documentation`);
}
if (recommendations.length === 0) {
recommendations.push('โจ MAINTENANCE: Regular code reviews and automated testing will help maintain code quality');
}
return recommendations;
}
}