Skip to main content
Glama

LLM Researcher

by Code-Hex
github-code-search.ts8.9 kB
import { exec } from 'child_process'; import { promisify } from 'util'; import { config } from './config.js'; import type { SearchResult, SearchResultsPage, SearchStatistics, Searcher, SearchOptions, GitHubCodeResult, GitHubCodeSearchResponse, GitHubCodeSearchOptions, } from './types.js'; import { GitHubSearchException, GitHubAuthException, RatelimitException } from './types.js'; const execAsync = promisify(exec); export class GitHubCodeSearcher implements Searcher { private lastRequestTime = 0; private readonly defaultOptions: GitHubCodeSearchOptions = { sort: 'indexed', order: 'desc', perPage: 30, }; constructor(private options: Partial<GitHubCodeSearchOptions> = {}) { this.options = { ...this.defaultOptions, ...options }; } async rateLimit(): Promise<void> { const now = Date.now(); const timeSinceLastRequest = now - this.lastRequestTime; // GitHub Code Search API rate limit: 10 requests per minute const minDelay = 6000; // 6 seconds between requests if (timeSinceLastRequest < minDelay) { const delay = minDelay - timeSinceLastRequest; config.log(`GitHub Code Search rate limiting: waiting ${delay}ms`); await new Promise((resolve) => setTimeout(resolve, delay)); } this.lastRequestTime = Date.now(); } private async checkGitHubAuth(): Promise<void> { try { await execAsync('gh auth status', { timeout: 5000 }); } catch (error) { throw new GitHubAuthException(); } } /** * Build GitHub code search query from space-separated input * Examples: * "addClass language:js repo:jquery/jquery" -> "addClass+language:js+repo:jquery/jquery" * "useState in:file extension:tsx" -> "useState+in:file+extension:tsx" */ private buildQuery(userInput: string): string { // Split by spaces, join with spaces, then encode entire query const parts = userInput.trim().split(/\s+/); const joinedQuery = parts.join(' '); return encodeURIComponent(joinedQuery); } private async executeGitHubAPI(query: string, page: number): Promise<GitHubCodeSearchResponse> { await this.rateLimit(); await this.checkGitHubAuth(); const perPage = this.options.perPage || 30; const sort = this.options.sort; const order = this.options.order; const params: Record<string, any> = { q: query, page, per_page: perPage, }; if (sort) params.sort = sort; if (order) params.order = order; const paramString = Object.entries(params) .map(([key, value]) => `${key}=${value}`) .join('&'); // Add Accept header to get text matches const command = `gh api "/search/code?${paramString}" --header "Accept: application/vnd.github.text-match+json"`; config.log(`Executing: ${command}`); try { const { stdout, stderr } = await execAsync(command, { timeout: config.timeout, maxBuffer: 1024 * 1024 // 1MB buffer }); if (stderr) { config.log('GitHub API stderr:', stderr); } return JSON.parse(stdout); } catch (error) { const errorMessage = (error as Error).message; // Parse GitHub API error responses if (errorMessage.includes('401')) { throw new GitHubAuthException(); } else if (errorMessage.includes('403') && errorMessage.includes('rate limit')) { throw new RatelimitException(403); } else if (errorMessage.includes('422')) { throw new GitHubSearchException(422, 'Invalid search query'); } throw new GitHubSearchException(500, `GitHub API error: ${errorMessage}`); } } private formatCodeSnippetFromTextMatches(code: GitHubCodeResult): string { if (!code.text_matches || code.text_matches.length === 0) { return 'No code matches found'; } const snippets: string[] = []; // Process each text match for (const textMatch of code.text_matches) { if (textMatch.property === 'content' && textMatch.fragment) { // Split fragment into lines and show context const lines = textMatch.fragment.split('\n'); const matchedLines: string[] = []; // Add line numbers and highlight matches lines.forEach((line, index) => { if (line.trim()) { // Show max 5 lines per fragment if (matchedLines.length < 5) { matchedLines.push(`${index + 1}: ${line}`); } } }); if (matchedLines.length > 0) { snippets.push(matchedLines.join('\n')); } } } return snippets.length > 0 ? snippets.slice(0, 2).join('\n\n---\n\n') // Show max 2 fragments : 'No code matches found'; } private convertToRawUrl(htmlUrl: string): string { // Convert GitHub HTML URL to raw URL // https://github.com/owner/repo/blob/branch/path/file.ext -> https://raw.githubusercontent.com/owner/repo/branch/path/file.ext return htmlUrl.replace('https://github.com/', 'https://raw.githubusercontent.com/').replace('/blob/', '/'); } private formatCodeResult(code: GitHubCodeResult, index: number): SearchResult { const repo = code.repository; const language = repo.language ? ` [${repo.language}]` : ''; const stars = ` ⭐ ${repo.stargazers_count}`; // Get code snippet from text matches const codeSnippet = this.formatCodeSnippetFromTextMatches(code); const snippetText = `${code.name}${language}${stars} - ${repo.description || 'No description'}\n\nMatched Code:\n${codeSnippet}`; // Convert HTML URL to raw URL for extraction const rawUrl = this.convertToRawUrl(code.html_url); return { title: `${repo.full_name}/${code.path}`, url: rawUrl, snippet: snippetText, index, }; } async search(query: string, nextToken?: string, options?: SearchOptions): Promise<SearchResultsPage> { // If nextToken is provided, use it as page number, otherwise start from page 1 const page = nextToken ? parseInt(nextToken, 10) : 1; if (isNaN(page) || page < 1) { throw new Error('Invalid page number in nextToken'); } const builtQuery = this.buildQuery(query); config.log(`GitHub Code Search: "${query}" -> "${builtQuery}" (page ${page})`); const response = await this.executeGitHubAPI(builtQuery, page); const results = response.items.map((item, index) => this.formatCodeResult(item, index + 1) ); const perPage = this.options.perPage || 30; const totalResults = response.total_count; const totalPages = Math.ceil(totalResults / perPage); const searchPage: SearchResultsPage = { results, currentPage: page, totalPages, totalResults, hasNextPage: page < totalPages, hasPreviousPage: page > 1, query, }; config.log(`GitHub Code Search completed: ${results.length} results, page ${page}/${totalPages}`); return searchPage; } async getNextPage(currentPage: SearchResultsPage): Promise<SearchResultsPage> { if (!this.hasNextPage(currentPage)) { throw new Error('No next page available'); } return this.search(currentPage.query, (currentPage.currentPage + 1).toString(), undefined); } async getPreviousPage(currentPage: SearchResultsPage): Promise<SearchResultsPage> { if (!this.hasPreviousPage(currentPage)) { throw new Error('No previous page available'); } return this.search(currentPage.query, (currentPage.currentPage - 1).toString(), undefined); } async getSearchStatistics(query: string, page: number): Promise<SearchStatistics> { const nextToken = page > 1 ? page.toString() : undefined; const searchPage = await this.search(query, nextToken, undefined); return { totalResults: searchPage.totalResults, currentPage: searchPage.currentPage, totalPages: searchPage.totalPages, resultsPerPage: searchPage.results.length, hasNextPage: searchPage.hasNextPage, hasPreviousPage: searchPage.hasPreviousPage, }; } hasNextPage(currentPage: SearchResultsPage): boolean { return currentPage.hasNextPage; } hasPreviousPage(currentPage: SearchResultsPage): boolean { return currentPage.hasPreviousPage; } // GitHub Code Search specific methods setSort(sort: 'indexed' | 'created' | 'updated', order: 'asc' | 'desc' = 'desc'): void { this.options.sort = sort; this.options.order = order; } /** * Show examples of query syntax */ static getQueryExamples(): string[] { return [ 'addClass language:js repo:jquery/jquery', 'useState in:file extension:tsx', 'function language:python user:octocat', 'console.log extension:js org:facebook', 'import filename:package.json', 'class size:>1000 language:java', 'TODO in:file path:src/', ]; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Code-Hex/light-research-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server