import { Octokit } from "@octokit/rest";
import fetch from "node-fetch";
interface ReadFileArgs {
owner: string;
repo: string;
path: string;
branch?: string;
encoding?: 'utf8' | 'base64' | 'raw';
max_size?: number;
}
export const readRepositoryFile = {
name: "read_repository_file",
description: "Read and analyze the contents of a specific file from a GitHub repository. This tool can handle text files, code files, configuration files, and provides syntax highlighting and content analysis.",
parameters: {
type: "object",
properties: {
owner: {
type: "string",
description: "The GitHub username or organization name that owns the repository"
},
repo: {
type: "string",
description: "The name of the GitHub repository"
},
path: {
type: "string",
description: "The file path within the repository (e.g., 'src/index.js', 'README.md')"
},
branch: {
type: "string",
description: "Optional: Branch name to read from (default: main/master branch)",
default: "main"
},
encoding: {
type: "string",
enum: ["utf8", "base64", "raw"],
description: "Optional: File encoding format (default: utf8 for text files)",
default: "utf8"
},
max_size: {
type: "number",
description: "Optional: Maximum file size to read in bytes (default: 1MB, max: 5MB)",
default: 1048576,
maximum: 5242880
}
},
required: ["owner", "repo", "path"]
},
async run(args: ReadFileArgs) {
try {
// Parameter validation
if (!args.owner || !args.repo || !args.path) {
throw new Error("Parameters 'owner', 'repo', and 'path' are required");
}
const octokit = new Octokit({
auth: process.env.GITHUB_TOKEN
});
const branch = args.branch || "main";
const encoding = args.encoding || "utf8";
const maxSize = Math.min(args.max_size || 1048576, 5242880); // Max 5MB
// Get file information
const fileInfo = await octokit.rest.repos.getContent({
owner: args.owner,
repo: args.repo,
path: args.path,
ref: branch
});
if (Array.isArray(fileInfo.data)) {
throw new Error(`Path '${args.path}' is a directory, not a file`);
}
const file = fileInfo.data;
if (!('content' in file)) {
throw new Error(`Unable to read file content for '${args.path}'`);
}
// Check file size
if (file.size > maxSize) {
throw new Error(`File size (${formatBytes(file.size)}) exceeds maximum allowed size (${formatBytes(maxSize)})`);
}
// Get file content
let content: string;
let contentAnalysis: string;
if (file.download_url && encoding === 'raw') {
// Download raw content for binary files or when specifically requested
const response = await fetch(file.download_url);
if (!response.ok) {
throw new Error(`Failed to download file: ${response.statusText}`);
}
content = await response.text();
contentAnalysis = analyzeFileContent(content, args.path, file.size, 'raw');
} else {
// Decode base64 content
const base64Content = file.content.replace(/\n/g, '');
if (encoding === 'base64') {
content = base64Content;
contentAnalysis = analyzeFileContent(content, args.path, file.size, 'base64');
} else {
// Default to UTF-8 text
try {
content = Buffer.from(base64Content, 'base64').toString('utf8');
contentAnalysis = analyzeFileContent(content, args.path, file.size, 'utf8');
} catch (error) {
throw new Error(`Failed to decode file as UTF-8. File might be binary. Try using encoding='base64' or encoding='raw'`);
}
}
}
// Generate comprehensive file analysis
const analysis = generateFileAnalysis(file, content, args, contentAnalysis);
return {
content: [{
type: "text",
text: analysis
}],
isError: false
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
return {
content: [{
type: "text",
text: `❌ **Error reading repository file**\n\n${errorMessage}\n\n**Common issues:**\n- File not found or path incorrect\n- Repository not found or private\n- File is too large\n- File is binary (try encoding='base64' or encoding='raw')\n- Invalid branch name\n- Rate limit exceeded (consider adding GITHUB_TOKEN)`
}],
isError: true
};
}
}
};
function analyzeFileContent(content: string, filePath: string, fileSize: number, encoding: string): string {
const analysis: string[] = [];
// File extension and type detection
const extension = filePath.split('.').pop()?.toLowerCase() || '';
const fileType = getFileType(extension);
analysis.push(`**File Type:** ${fileType}`);
analysis.push(`**Size:** ${formatBytes(fileSize)}`);
analysis.push(`**Encoding:** ${encoding}`);
if (encoding === 'utf8' && content) {
const lines = content.split('\n');
analysis.push(`**Lines:** ${lines.length}`);
// Character and word count for text files
const chars = content.length;
const words = content.split(/\s+/).filter(word => word.length > 0).length;
analysis.push(`**Characters:** ${chars.toLocaleString()}`);
analysis.push(`**Words:** ${words.toLocaleString()}`);
// Language-specific analysis
if (isCodeFile(extension)) {
const codeAnalysis = analyzeCodeFile(content, extension);
analysis.push(...codeAnalysis);
}
// Check for common patterns
const patterns = detectPatterns(content, extension);
if (patterns.length > 0) {
analysis.push(`**Detected Patterns:** ${patterns.join(', ')}`);
}
}
return analysis.join('\n');
}
function generateFileAnalysis(file: any, content: string, args: ReadFileArgs, contentAnalysis: string): string {
const extension = args.path.split('.').pop()?.toLowerCase() || '';
const language = getLanguageFromExtension(extension);
let formattedContent = content;
// Truncate very long content
const maxDisplayLength = 10000; // 10KB for display
let truncated = false;
if (content.length > maxDisplayLength) {
formattedContent = content.substring(0, maxDisplayLength);
truncated = true;
}
return `# 📄 File Content Analysis
## 📋 File Information
- **Repository:** ${args.owner}/${args.repo}
- **File Path:** \`${args.path}\`
- **Branch:** ${args.branch || 'main'}
- **SHA:** ${file.sha}
- **Last Modified:** ${file.type === 'file' ? 'Available via Git history' : 'N/A'}
- **Download URL:** [View Raw](${file.download_url})
## 📊 Content Analysis
${contentAnalysis}
## 📝 File Content
\`\`\`${language}
${formattedContent}
\`\`\`
${truncated ? `\n⚠️ **Content Truncated** - Showing first ${formatBytes(maxDisplayLength)} of ${formatBytes(content.length)}. Use the download URL above to view the complete file.` : ''}
---
*File read completed at ${new Date().toLocaleString()}*`;
}
function getFileType(extension: string): string {
const typeMap: { [key: string]: string } = {
// Programming languages
'js': 'JavaScript',
'ts': 'TypeScript',
'jsx': 'React JSX',
'tsx': 'React TSX',
'py': 'Python',
'java': 'Java',
'cpp': 'C++',
'c': 'C',
'cs': 'C#',
'php': 'PHP',
'rb': 'Ruby',
'go': 'Go',
'rs': 'Rust',
'swift': 'Swift',
'kt': 'Kotlin',
'scala': 'Scala',
'r': 'R',
'matlab': 'MATLAB',
// Web technologies
'html': 'HTML',
'css': 'CSS',
'scss': 'SCSS',
'sass': 'Sass',
'less': 'Less',
'vue': 'Vue.js',
// Data formats
'json': 'JSON',
'xml': 'XML',
'yaml': 'YAML',
'yml': 'YAML',
'toml': 'TOML',
'csv': 'CSV',
// Documentation
'md': 'Markdown',
'rst': 'reStructuredText',
'txt': 'Plain Text',
// Configuration
'ini': 'INI Configuration',
'conf': 'Configuration',
'config': 'Configuration',
'env': 'Environment Variables',
// Build/Package
'dockerfile': 'Docker',
'makefile': 'Makefile',
'gradle': 'Gradle',
'maven': 'Maven POM',
// Shell scripts
'sh': 'Shell Script',
'bash': 'Bash Script',
'zsh': 'Zsh Script',
'fish': 'Fish Script',
'ps1': 'PowerShell',
// Other
'sql': 'SQL',
'graphql': 'GraphQL',
'proto': 'Protocol Buffers'
};
return typeMap[extension] || `${extension.toUpperCase()} File`;
}
function getLanguageFromExtension(extension: string): string {
const langMap: { [key: string]: string } = {
'js': 'javascript',
'ts': 'typescript',
'jsx': 'jsx',
'tsx': 'tsx',
'py': 'python',
'java': 'java',
'cpp': 'cpp',
'c': 'c',
'cs': 'csharp',
'php': 'php',
'rb': 'ruby',
'go': 'go',
'rs': 'rust',
'swift': 'swift',
'kt': 'kotlin',
'scala': 'scala',
'html': 'html',
'css': 'css',
'scss': 'scss',
'json': 'json',
'xml': 'xml',
'yaml': 'yaml',
'yml': 'yaml',
'md': 'markdown',
'sh': 'bash',
'sql': 'sql',
'dockerfile': 'dockerfile'
};
return langMap[extension] || 'text';
}
function isCodeFile(extension: string): boolean {
const codeExtensions = [
'js', 'ts', 'jsx', 'tsx', 'py', 'java', 'cpp', 'c', 'cs', 'php', 'rb', 'go', 'rs',
'swift', 'kt', 'scala', 'html', 'css', 'scss', 'sass', 'less', 'vue'
];
return codeExtensions.includes(extension);
}
function analyzeCodeFile(content: string, extension: string): string[] {
const analysis: string[] = [];
const lines = content.split('\n');
// Count non-empty lines
const nonEmptyLines = lines.filter(line => line.trim().length > 0).length;
const emptyLines = lines.length - nonEmptyLines;
analysis.push(`**Code Lines:** ${nonEmptyLines}`);
analysis.push(`**Empty Lines:** ${emptyLines}`);
// Language-specific analysis
switch (extension) {
case 'js':
case 'ts':
case 'jsx':
case 'tsx':
analyzeJavaScript(content, analysis);
break;
case 'py':
analyzePython(content, analysis);
break;
case 'java':
analyzeJava(content, analysis);
break;
case 'json':
analyzeJSON(content, analysis);
break;
}
return analysis;
}
function analyzeJavaScript(content: string, analysis: string[]): void {
const functionMatches = content.match(/function\s+\w+|\w+\s*=>|\w+\s*:\s*function/g);
const classMatches = content.match(/class\s+\w+/g);
const importMatches = content.match(/import\s+.*from|require\s*\(/g);
if (functionMatches) analysis.push(`**Functions:** ${functionMatches.length}`);
if (classMatches) analysis.push(`**Classes:** ${classMatches.length}`);
if (importMatches) analysis.push(`**Imports:** ${importMatches.length}`);
}
function analyzePython(content: string, analysis: string[]): void {
const functionMatches = content.match(/def\s+\w+/g);
const classMatches = content.match(/class\s+\w+/g);
const importMatches = content.match(/import\s+\w+|from\s+\w+\s+import/g);
if (functionMatches) analysis.push(`**Functions:** ${functionMatches.length}`);
if (classMatches) analysis.push(`**Classes:** ${classMatches.length}`);
if (importMatches) analysis.push(`**Imports:** ${importMatches.length}`);
}
function analyzeJava(content: string, analysis: string[]): void {
const methodMatches = content.match(/\b(public|private|protected)\s+.*\s+\w+\s*\(/g);
const classMatches = content.match(/\b(public|private)\s+class\s+\w+/g);
const importMatches = content.match(/import\s+[\w.]+/g);
if (methodMatches) analysis.push(`**Methods:** ${methodMatches.length}`);
if (classMatches) analysis.push(`**Classes:** ${classMatches.length}`);
if (importMatches) analysis.push(`**Imports:** ${importMatches.length}`);
}
function analyzeJSON(content: string, analysis: string[]): void {
try {
const parsed = JSON.parse(content);
const keys = Object.keys(parsed);
analysis.push(`**Top-level Keys:** ${keys.length}`);
if (keys.length > 0) {
analysis.push(`**Keys:** ${keys.slice(0, 10).join(', ')}${keys.length > 10 ? '...' : ''}`);
}
} catch (error) {
analysis.push(`**JSON Status:** Invalid JSON format`);
}
}
function detectPatterns(content: string, extension: string): string[] {
const patterns: string[] = [];
// Common patterns
if (content.includes('TODO') || content.includes('FIXME')) patterns.push('TODO/FIXME comments');
if (content.includes('console.log') || content.includes('print(')) patterns.push('Debug statements');
if (content.includes('async') || content.includes('await')) patterns.push('Async/await');
if (content.includes('Promise')) patterns.push('Promises');
if (content.includes('export') || content.includes('module.exports')) patterns.push('Module exports');
if (content.includes('test(') || content.includes('describe(')) patterns.push('Unit tests');
if (content.includes('API') || content.includes('endpoint')) patterns.push('API references');
if (content.includes('database') || content.includes('DB')) patterns.push('Database operations');
return patterns;
}
function formatBytes(bytes: number): string {
if (bytes === 0) return '0 Bytes';
const k = 1024;
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
}