Skip to main content
Glama
markdown-document.fetcher.ts3.85 kB
import {DocumentMetadata, MarkdownDocument} from "./types.js"; export class MarkdownDocumentFetcher { async fetch(url: string): Promise<MarkdownDocument> { try { const response = await fetch(url, { headers: { "user-agent": "AIApp BaaS MCP Server", }, }); if (!response.ok) { throw new Error(`Failed to fetch document: ${response.statusText}`); } const content = await response.text(); const metadata = this.extractMetadata(content, url); return { content, metadata, url, }; } catch (error) { console.error(`Error fetching document from ${url}:`, error); throw error; } } private extractMetadata(content: string, url: string): DocumentMetadata { const lines = content.split('\n'); let title = ''; let description = ''; const keywords: string[] = []; // Extract title from first h1 heading for (const line of lines.slice(0, 20)) { if (line.startsWith('# ')) { title = line.substring(2).trim(); break; } } // Extract description from first paragraph or second heading for (const line of lines.slice(0, 30)) { if (line.trim() && !line.startsWith('#') && !line.startsWith('```') && line.length > 20) { description = line.trim(); break; } } // Extract keywords from content const keywordSources = [ title, description, this.extractCodeBlocks(content), this.extractHeadings(content) ].filter(Boolean); for (const source of keywordSources) { const extractedKeywords = this.extractKeywordsFromText(source); keywords.push(...extractedKeywords); } // Add URL-based keywords const urlKeywords = this.extractKeywordsFromUrl(url); keywords.push(...urlKeywords); // Remove duplicates and filter const uniqueKeywords = Array.from(new Set(keywords)) .filter(keyword => keyword.length > 2 && keyword.length < 50); return { title: title || 'Untitled', description: description || '', keywords: uniqueKeywords, }; } private extractCodeBlocks(content: string): string { const codeBlockRegex = /```[\s\S]*?```/g; const codeBlocks = content.match(codeBlockRegex) || []; return codeBlocks.join(' '); } private extractHeadings(content: string): string { const lines = content.split('\n'); const headings = lines .filter(line => line.match(/^#{1,6}\s/)) .map(line => line.replace(/^#{1,6}\s/, '').trim()); return headings.join(' '); } private extractKeywordsFromText(text: string): string[] { // Remove code blocks and special characters const cleanText = text .replace(/```[\s\S]*?```/g, '') .replace(/`([^`]+)`/g, '$1') .replace(/[^\w\s가-힣]/g, ' '); // Split and filter keywords const words = cleanText .split(/\s+/) .map(word => word.trim().toLowerCase()) .filter(word => word.length > 2); // Add common technical terms const technicalTerms = [ 'api', 'jwt', 'token', 'cookie', 'auth', 'login', 'signup', 'react', 'nextjs', 'javascript', 'typescript', 'cors', 'http', 'https', 'json', 'fetch', 'axios', 'express', 'security', 'encryption', 'validation', 'error', 'response' ]; const foundTerms = technicalTerms.filter(term => cleanText.toLowerCase().includes(term) ); return [...words, ...foundTerms]; } private extractKeywordsFromUrl(url: string): string[] { try { const urlObj = new URL(url); const pathParts = urlObj.pathname .split('/') .filter(part => part.length > 0) .map(part => part.replace(/[-_]/g, ' ').toLowerCase()); return pathParts; } catch (error) { return []; } } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mbaas-inc/BaaS-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server