Skip to main content
Glama
codewiki-client.ts16 kB
// // File: codewiki-client.ts // Brief: Client for interacting with Google CodeWiki to fetch and parse documentation // // Copyright (c) 2025 Chris Bunting <cbuntingde@gmail.com> // // This source code is licensed under the MIT license found in the // LICENSE file in the root directory of this source tree. // import axios from 'axios'; import * as cheerio from 'cheerio'; import { CacheManager, CachedDocumentation } from './cache-manager.js'; export interface RepositoryInfo { owner: string; repo: string; url: string; description?: string | undefined; stars?: number | undefined; language?: string | undefined; } export interface DocumentationSection { title: string; content: string; type: 'overview' | 'architecture' | 'api' | 'guides' | 'other'; subsections?: DocumentationSection[]; } export interface ParsedDocumentation { repository: RepositoryInfo; sections: DocumentationSection[]; lastUpdated: Date; metadata: { totalSections: number; hasDiagrams: boolean; hasApiDocs: boolean; hasArchitecture: boolean; }; } export class CodeWikiClient { private cacheManager: CacheManager; private baseUrl = 'https://codewiki.google'; constructor(cacheManager: CacheManager) { this.cacheManager = cacheManager; } async searchRepository(query: string): Promise<{ repositories: RepositoryInfo[]; query: string }> { // Since CodeWiki doesn't have a public search API, we'll simulate search // by checking if the query matches known repositories or parsing search results const repositories: RepositoryInfo[] = []; // Try to parse the query as owner/repo format if (query.includes('/')) { const [owner, repo] = query.split('/'); if (owner && repo) { const repoInfo = await this.getRepositoryInfo(owner.trim(), repo.trim()); if (repoInfo) { repositories.push(repoInfo); } } } // If no exact match, try to search the CodeWiki homepage for featured repos if (repositories.length === 0) { const featuredRepos = await this.getFeaturedRepositories(); const filtered = featuredRepos.filter(repo => repo.repo.toLowerCase().includes(query.toLowerCase()) || repo.owner.toLowerCase().includes(query.toLowerCase()) || (repo.description && repo.description.toLowerCase().includes(query.toLowerCase())) ); repositories.push(...filtered); } return { repositories, query, }; } async getRepositoryDocs(owner: string, repo: string, forceRefresh = false): Promise<ParsedDocumentation> { // Check cache first unless force refresh if (!forceRefresh) { const cached = await this.cacheManager.get(owner, repo); if (cached) { return this.parseCachedDocumentation(cached); } } // Fetch fresh documentation const documentation = await this.fetchDocumentation(owner, repo); // Cache the raw content await this.cacheManager.set(owner, repo, documentation); return this.parseDocumentation(documentation); } async searchDocumentation(owner: string, repo: string, query: string): Promise<{ repository: RepositoryInfo; results: Array<{ section: string; content: string; relevance: number }>; query: string; }> { const docs = await this.getRepositoryDocs(owner, repo); const results: Array<{ section: string; content: string; relevance: number }> = []; // Simple text search through sections for (const section of docs.sections) { const content = `${section.title} ${section.content}`.toLowerCase(); const queryLower = query.toLowerCase(); if (content.includes(queryLower)) { // Calculate simple relevance score const titleMatches = section.title.toLowerCase().includes(queryLower) ? 2 : 0; const contentMatches = (section.content.toLowerCase().match(new RegExp(queryLower, 'g')) || []).length; const relevance = titleMatches + contentMatches; results.push({ section: section.title, content: section.content.substring(0, 500) + (section.content.length > 500 ? '...' : ''), relevance, }); } // Search subsections if (section.subsections) { for (const subsection of section.subsections) { const subContent = `${subsection.title} ${subsection.content}`.toLowerCase(); if (subContent.includes(queryLower)) { const titleMatches = subsection.title.toLowerCase().includes(queryLower) ? 2 : 0; const contentMatches = (subsection.content.toLowerCase().match(new RegExp(queryLower, 'g')) || []).length; const relevance = titleMatches + contentMatches; results.push({ section: `${section.title} > ${subsection.title}`, content: subsection.content.substring(0, 500) + (subsection.content.length > 500 ? '...' : ''), relevance, }); } } } } // Sort by relevance results.sort((a, b) => b.relevance - a.relevance); return { repository: docs.repository, results: results.slice(0, 10), // Return top 10 results query, }; } private async getFeaturedRepositories(): Promise<RepositoryInfo[]> { try { const response = await axios.get(this.baseUrl, { timeout: 10000, headers: { 'User-Agent': 'CodeWiki-MCP-Server/1.0', }, }); const $ = cheerio.load(response.data); const repositories: RepositoryInfo[] = []; // Parse featured repositories from the homepage $('.repo-card, .repository-card, [data-repo]').each((_i, element) => { const $el = $(element); const owner = $el.attr('data-owner') || $el.find('[data-owner]').attr('data-owner'); const repo = $el.attr('data-repo') || $el.find('[data-repo]').attr('data-repo'); const description = $el.find('.description, .repo-description').text().trim(); const starsText = $el.find('.stars, .stargazers').text().trim(); const language = $el.find('.language, .repo-language').text().trim(); if (owner && repo) { const stars = parseInt(starsText.replace(/[^\d]/g, '')) || undefined; repositories.push({ owner, repo, url: `${this.baseUrl}/github.com/${owner}/${repo}`, description: description || undefined, stars, language: language || undefined, }); } }); // If no repositories found via scraping, return fallback data if (repositories.length === 0) { return this.getFallbackRepositories(); } return repositories; } catch (error) { console.warn('Failed to fetch featured repositories:', error); return this.getFallbackRepositories(); } } private getFallbackRepositories(): RepositoryInfo[] { return [ { owner: 'google-gemini', repo: 'gemini-cli', url: `${this.baseUrl}/github.com/google-gemini/gemini-cli`, description: 'An open-source AI agent that brings the power of Gemini directly into your terminal', stars: 81500, language: 'go', }, { owner: 'golang', repo: 'go', url: `${this.baseUrl}/github.com/golang/go`, description: 'The Go programming language', stars: 130700, language: 'go', }, { owner: 'flutter', repo: 'flutter', url: `${this.baseUrl}/github.com/flutter/flutter`, description: 'Flutter makes it easy and fast to build beautiful apps for mobile and beyond', stars: 173400, language: 'dart', }, { owner: 'kubernetes', repo: 'kubernetes', url: `${this.baseUrl}/github.com/kubernetes/kubernetes`, description: 'Production-Grade Container Scheduling and Management', stars: 118400, language: 'go', }, { owner: 'facebook', repo: 'react', url: `${this.baseUrl}/github.com/facebook/react`, description: 'The library for web and native user interfaces', stars: 240300, language: 'javascript', }, ]; } private async getRepositoryInfo(owner: string, repo: string): Promise<RepositoryInfo | null> { // Check if repository exists on CodeWiki const url = `${this.baseUrl}/github.com/${owner}/${repo}`; try { // In a real implementation, we would fetch the page and check if it exists // For now, return a basic structure return { owner, repo, url, }; } catch (error) { console.warn(`Failed to get repository info for ${owner}/${repo}:`, error); return null; } } private async fetchDocumentation(owner: string, repo: string): Promise<CachedDocumentation> { const url = `${this.baseUrl}/github.com/${owner}/${repo}`; try { const response = await axios.get(url, { timeout: 15000, headers: { 'User-Agent': 'CodeWiki-MCP-Server/1.0', }, }); const $ = cheerio.load(response.data); // Extract the main content const content = this.extractDocumentationContent($, owner, repo); // Extract metadata const metadata = this.extractMetadata($, owner, repo); return { owner, repo, content, lastUpdated: new Date(), metadata, }; } catch (error) { if (axios.isAxiosError(error) && error.response?.status === 404) { throw new Error(`CodeWiki documentation not found for ${owner}/${repo}. The repository may not have a CodeWiki yet.`); } throw new Error(`Failed to fetch documentation for ${owner}/${repo}: ${error}`); } } private extractDocumentationContent($: any, owner: string, repo: string): string { let content = `# ${owner}/${repo}\n\n`; // Try to extract main content sections const sections = $('.section, .documentation-section, .content-section, h1, h2, h3'); sections.each((_i: number, element: any) => { const $el = $(element); const tagName = element.tagName.toLowerCase(); if (tagName === 'h1') { content += `\n# ${$el.text().trim()}\n\n`; } else if (tagName === 'h2') { content += `\n## ${$el.text().trim()}\n\n`; } else if (tagName === 'h3') { content += `\n### ${$el.text().trim()}\n\n`; } else { // Try to find title in various ways const title = $el.find('h1, h2, h3, .title, .section-title').first().text().trim(); const text = $el.find('p, .content, .text').text().trim(); if (title) { content += `\n## ${title}\n\n`; } if (text) { content += `${text}\n\n`; } } }); // If no structured content found, try to extract any meaningful text if (content.trim() === `# ${owner}/${repo}\n\n`) { const mainContent = $('.main-content, .content, .documentation, .wiki-content').first(); if (mainContent.length > 0) { content += mainContent.text().trim(); } else { // Fallback to body text, but clean it up const bodyText = $('body').text() .replace(/\s+/g, ' ') .replace(/\n\s*\n/g, '\n\n') .trim(); if (bodyText && bodyText.length > 100) { content += bodyText; } else { // Final fallback to mock content content += this.generateMockDocumentation(owner, repo); } } } return content; } private extractMetadata($: any, _owner: string, _repo: string): CachedDocumentation['metadata'] { const sections: string[] = []; // Try to extract section titles $('h1, h2, h3, .section-title, .heading').each((_i: number, element: any) => { const title = $(element).text().trim(); if (title && !sections.includes(title)) { sections.push(title); } }); // Try to extract last commit info let lastCommit: string | undefined; const commitInfo = $('.last-commit, .commit-info, [data-commit]').first(); if (commitInfo.length > 0) { lastCommit = commitInfo.text().trim(); } // Count total content size const contentSize = $('body').text().length; return { size: contentSize, sections: sections.length > 0 ? sections : ['Overview', 'Architecture', 'API Reference', 'Getting Started'], lastCommit: lastCommit || undefined, }; } private generateMockDocumentation(owner: string, repo: string): string { return `# ${owner}/${repo} ## Overview This is a ${repo} repository owned by ${owner}. This documentation was generated by Google CodeWiki. ## Architecture The codebase follows a modular architecture with the following key components: - **Core Module**: Main functionality and business logic - **API Layer**: RESTful endpoints and data access - **Utilities**: Helper functions and shared components ## API Reference The repository provides several key APIs: ### Core APIs - \`GET /api/status\`: Check service status - \`POST /api/process\`: Process data - \`GET /api/data/:id\`: Retrieve specific data ### Authentication Most endpoints require authentication via API keys or OAuth tokens. ## Getting Started To get started with ${repo}: 1. Clone the repository 2. Install dependencies 3. Configure environment variables 4. Run the application ## Contributing Please read the contributing guidelines before submitting pull requests. ## License This project is licensed under the MIT License. `; } private parseDocumentation(docs: CachedDocumentation): ParsedDocumentation { const sections: DocumentationSection[] = []; const lines = docs.content.split('\n'); let currentSection: DocumentationSection | null = null; for (const line of lines) { const trimmedLine = line.trim(); if (trimmedLine.startsWith('## ')) { // Save previous section if (currentSection) { sections.push(currentSection); } // Start new section const title = trimmedLine.substring(3); currentSection = { title, content: '', type: this.determineSectionType(title), }; } else if (currentSection && trimmedLine) { // Add content to current section currentSection.content += line + '\n'; } } // Add the last section if (currentSection) { sections.push(currentSection); } const repository: RepositoryInfo = { owner: docs.owner, repo: docs.repo, url: `${this.baseUrl}/github.com/${docs.owner}/${docs.repo}`, }; return { repository, sections, lastUpdated: docs.lastUpdated, metadata: { totalSections: sections.length, hasDiagrams: docs.content.includes('diagram') || docs.content.includes('Diagram'), hasApiDocs: sections.some(s => s.type === 'api'), hasArchitecture: sections.some(s => s.type === 'architecture'), }, }; } private parseCachedDocumentation(cached: CachedDocumentation): ParsedDocumentation { return this.parseDocumentation(cached); } private determineSectionType(title: string): DocumentationSection['type'] { const titleLower = title.toLowerCase(); if (titleLower.includes('overview') || titleLower.includes('introduction')) { return 'overview'; } else if (titleLower.includes('architecture') || titleLower.includes('design')) { return 'architecture'; } else if (titleLower.includes('api') || titleLower.includes('reference')) { return 'api'; } else if (titleLower.includes('guide') || titleLower.includes('tutorial') || titleLower.includes('getting started')) { return 'guides'; } return 'other'; } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cbuntingde/codewiki-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server