github-code-search.ts•8.9 kB
import { exec } from 'child_process';
import { promisify } from 'util';
import { config } from './config.js';
import type {
SearchResult,
SearchResultsPage,
SearchStatistics,
Searcher,
SearchOptions,
GitHubCodeResult,
GitHubCodeSearchResponse,
GitHubCodeSearchOptions,
} from './types.js';
import { GitHubSearchException, GitHubAuthException, RatelimitException } from './types.js';
const execAsync = promisify(exec);
export class GitHubCodeSearcher implements Searcher {
private lastRequestTime = 0;
private readonly defaultOptions: GitHubCodeSearchOptions = {
sort: 'indexed',
order: 'desc',
perPage: 30,
};
constructor(private options: Partial<GitHubCodeSearchOptions> = {}) {
this.options = { ...this.defaultOptions, ...options };
}
async rateLimit(): Promise<void> {
const now = Date.now();
const timeSinceLastRequest = now - this.lastRequestTime;
// GitHub Code Search API rate limit: 10 requests per minute
const minDelay = 6000; // 6 seconds between requests
if (timeSinceLastRequest < minDelay) {
const delay = minDelay - timeSinceLastRequest;
config.log(`GitHub Code Search rate limiting: waiting ${delay}ms`);
await new Promise((resolve) => setTimeout(resolve, delay));
}
this.lastRequestTime = Date.now();
}
private async checkGitHubAuth(): Promise<void> {
try {
await execAsync('gh auth status', { timeout: 5000 });
} catch (error) {
throw new GitHubAuthException();
}
}
/**
* Build GitHub code search query from space-separated input
* Examples:
* "addClass language:js repo:jquery/jquery" -> "addClass+language:js+repo:jquery/jquery"
* "useState in:file extension:tsx" -> "useState+in:file+extension:tsx"
*/
private buildQuery(userInput: string): string {
// Split by spaces, join with spaces, then encode entire query
const parts = userInput.trim().split(/\s+/);
const joinedQuery = parts.join(' ');
return encodeURIComponent(joinedQuery);
}
private async executeGitHubAPI(query: string, page: number): Promise<GitHubCodeSearchResponse> {
await this.rateLimit();
await this.checkGitHubAuth();
const perPage = this.options.perPage || 30;
const sort = this.options.sort;
const order = this.options.order;
const params: Record<string, any> = {
q: query,
page,
per_page: perPage,
};
if (sort) params.sort = sort;
if (order) params.order = order;
const paramString = Object.entries(params)
.map(([key, value]) => `${key}=${value}`)
.join('&');
// Add Accept header to get text matches
const command = `gh api "/search/code?${paramString}" --header "Accept: application/vnd.github.text-match+json"`;
config.log(`Executing: ${command}`);
try {
const { stdout, stderr } = await execAsync(command, {
timeout: config.timeout,
maxBuffer: 1024 * 1024 // 1MB buffer
});
if (stderr) {
config.log('GitHub API stderr:', stderr);
}
return JSON.parse(stdout);
} catch (error) {
const errorMessage = (error as Error).message;
// Parse GitHub API error responses
if (errorMessage.includes('401')) {
throw new GitHubAuthException();
} else if (errorMessage.includes('403') && errorMessage.includes('rate limit')) {
throw new RatelimitException(403);
} else if (errorMessage.includes('422')) {
throw new GitHubSearchException(422, 'Invalid search query');
}
throw new GitHubSearchException(500, `GitHub API error: ${errorMessage}`);
}
}
private formatCodeSnippetFromTextMatches(code: GitHubCodeResult): string {
if (!code.text_matches || code.text_matches.length === 0) {
return 'No code matches found';
}
const snippets: string[] = [];
// Process each text match
for (const textMatch of code.text_matches) {
if (textMatch.property === 'content' && textMatch.fragment) {
// Split fragment into lines and show context
const lines = textMatch.fragment.split('\n');
const matchedLines: string[] = [];
// Add line numbers and highlight matches
lines.forEach((line, index) => {
if (line.trim()) {
// Show max 5 lines per fragment
if (matchedLines.length < 5) {
matchedLines.push(`${index + 1}: ${line}`);
}
}
});
if (matchedLines.length > 0) {
snippets.push(matchedLines.join('\n'));
}
}
}
return snippets.length > 0
? snippets.slice(0, 2).join('\n\n---\n\n') // Show max 2 fragments
: 'No code matches found';
}
private convertToRawUrl(htmlUrl: string): string {
// Convert GitHub HTML URL to raw URL
// https://github.com/owner/repo/blob/branch/path/file.ext -> https://raw.githubusercontent.com/owner/repo/branch/path/file.ext
return htmlUrl.replace('https://github.com/', 'https://raw.githubusercontent.com/').replace('/blob/', '/');
}
private formatCodeResult(code: GitHubCodeResult, index: number): SearchResult {
const repo = code.repository;
const language = repo.language ? ` [${repo.language}]` : '';
const stars = ` ⭐ ${repo.stargazers_count}`;
// Get code snippet from text matches
const codeSnippet = this.formatCodeSnippetFromTextMatches(code);
const snippetText = `${code.name}${language}${stars} - ${repo.description || 'No description'}\n\nMatched Code:\n${codeSnippet}`;
// Convert HTML URL to raw URL for extraction
const rawUrl = this.convertToRawUrl(code.html_url);
return {
title: `${repo.full_name}/${code.path}`,
url: rawUrl,
snippet: snippetText,
index,
};
}
async search(query: string, nextToken?: string, options?: SearchOptions): Promise<SearchResultsPage> {
// If nextToken is provided, use it as page number, otherwise start from page 1
const page = nextToken ? parseInt(nextToken, 10) : 1;
if (isNaN(page) || page < 1) {
throw new Error('Invalid page number in nextToken');
}
const builtQuery = this.buildQuery(query);
config.log(`GitHub Code Search: "${query}" -> "${builtQuery}" (page ${page})`);
const response = await this.executeGitHubAPI(builtQuery, page);
const results = response.items.map((item, index) =>
this.formatCodeResult(item, index + 1)
);
const perPage = this.options.perPage || 30;
const totalResults = response.total_count;
const totalPages = Math.ceil(totalResults / perPage);
const searchPage: SearchResultsPage = {
results,
currentPage: page,
totalPages,
totalResults,
hasNextPage: page < totalPages,
hasPreviousPage: page > 1,
query,
};
config.log(`GitHub Code Search completed: ${results.length} results, page ${page}/${totalPages}`);
return searchPage;
}
async getNextPage(currentPage: SearchResultsPage): Promise<SearchResultsPage> {
if (!this.hasNextPage(currentPage)) {
throw new Error('No next page available');
}
return this.search(currentPage.query, (currentPage.currentPage + 1).toString(), undefined);
}
async getPreviousPage(currentPage: SearchResultsPage): Promise<SearchResultsPage> {
if (!this.hasPreviousPage(currentPage)) {
throw new Error('No previous page available');
}
return this.search(currentPage.query, (currentPage.currentPage - 1).toString(), undefined);
}
async getSearchStatistics(query: string, page: number): Promise<SearchStatistics> {
const nextToken = page > 1 ? page.toString() : undefined;
const searchPage = await this.search(query, nextToken, undefined);
return {
totalResults: searchPage.totalResults,
currentPage: searchPage.currentPage,
totalPages: searchPage.totalPages,
resultsPerPage: searchPage.results.length,
hasNextPage: searchPage.hasNextPage,
hasPreviousPage: searchPage.hasPreviousPage,
};
}
hasNextPage(currentPage: SearchResultsPage): boolean {
return currentPage.hasNextPage;
}
hasPreviousPage(currentPage: SearchResultsPage): boolean {
return currentPage.hasPreviousPage;
}
// GitHub Code Search specific methods
setSort(sort: 'indexed' | 'created' | 'updated', order: 'asc' | 'desc' = 'desc'): void {
this.options.sort = sort;
this.options.order = order;
}
/**
* Show examples of query syntax
*/
static getQueryExamples(): string[] {
return [
'addClass language:js repo:jquery/jquery',
'useState in:file extension:tsx',
'function language:python user:octocat',
'console.log extension:js org:facebook',
'import filename:package.json',
'class size:>1000 language:java',
'TODO in:file path:src/',
];
}
}