Skip to main content
Glama
fetchMarkdown.ts9.71 kB
import type { PresetConfig } from '$lib/presets' import { env } from '$env/dynamic/private' import tarStream from 'tar-stream' import { Readable } from 'stream' import { createGunzip } from 'zlib' import { minimatch } from 'minimatch' import { getPresetContent } from './presetCache' import { CacheDbService } from '$lib/server/cacheDb' import { log, logAlways, logErrorAlways } from '$lib/log' import { cleanTarballPath } from '$lib/utils/pathUtils' let cacheService: CacheDbService | null = null function getCacheService(): CacheDbService { if (!cacheService) { cacheService = new CacheDbService() } return cacheService } function sortFilesWithinGroup(files: string[]): string[] { return files.sort((a, b) => { const aPath = a.split('\n')[0].replace('## ', '') const bPath = b.split('\n')[0].replace('## ', '') // Check if one path is a parent of the other if (bPath.startsWith(aPath.replace('/index.md', '/'))) return -1 if (aPath.startsWith(bPath.replace('/index.md', '/'))) return 1 return aPath.localeCompare(bPath) }) } export async function fetchRepositoryTarball(owner: string, repo: string): Promise<Buffer> { const cacheKey = `${owner}/${repo}` const cache = getCacheService() const cachedBuffer = await cache.get(cacheKey) if (cachedBuffer) { logAlways(`Using cached tarball for ${cacheKey} from database`) return cachedBuffer } const url = `https://api.github.com/repos/${owner}/${repo}/tarball` logAlways(`Fetching tarball from: ${url}`) const response = await fetch(url, { headers: { Authorization: `Bearer ${env.GITHUB_TOKEN}`, Accept: 'application/vnd.github.v3.raw' } }) if (!response.ok) { throw new Error(`Failed to fetch tarball: ${response.statusText}`) } if (!response.body) { throw new Error('Response body is null') } const chunks: Uint8Array[] = [] const reader = response.body.getReader() while (true) { const { done, value } = await reader.read() if (done) break chunks.push(value) } const buffer = Buffer.concat(chunks) // Cache the buffer in database with 60 minutes TTL await cache.set(cacheKey, buffer, 60) return buffer } export async function processMarkdownFromTarball( tarballBuffer: Buffer, presetConfig: PresetConfig, includePathInfo: boolean ): Promise<string[] | { path: string; content: string }[]> { const { glob, ignore = [], minimize = undefined } = presetConfig // Create a Map to store files for each glob pattern while maintaining order const globResults = new Map<string, unknown[]>() const filePathsByPattern = new Map<string, string[]>() glob.forEach((pattern) => { globResults.set(pattern, []) filePathsByPattern.set(pattern, []) }) const extractStream = tarStream.extract() let processedFiles = 0 let matchedFiles = 0 extractStream.on('entry', (header, stream, next) => { processedFiles++ let matched = false for (const pattern of glob) { if (shouldIncludeFile(header.name, pattern, ignore)) { matched = true matchedFiles++ if (header.type === 'file') { let content = '' stream.on('data', (chunk) => (content += chunk.toString())) stream.on('end', () => { // Use the unified path utility to clean tarball paths const cleanPath = cleanTarballPath(header.name) const processedContent = minimizeContent(content, minimize) if (includePathInfo) { const files = globResults.get(pattern) || [] files.push({ path: cleanPath, content: processedContent }) globResults.set(pattern, files) } else { const contentWithHeader = `## ${cleanPath}\n\n${processedContent}` const files = globResults.get(pattern) || [] files.push(contentWithHeader) globResults.set(pattern, files) } const paths = filePathsByPattern.get(pattern) || [] paths.push(cleanPath) filePathsByPattern.set(pattern, paths) next() }) return } } } if (!matched) { stream.resume() next() } }) const tarballStream = Readable.from(tarballBuffer) const gunzipStream = createGunzip() tarballStream.pipe(gunzipStream).pipe(extractStream) await new Promise<void>((resolve) => extractStream.on('finish', resolve)) logAlways(`Total files processed: ${processedFiles}`) logAlways(`Files matching glob: ${matchedFiles}`) log('\nFinal file order:') glob.forEach((pattern, index) => { const paths = filePathsByPattern.get(pattern) || [] const sortedPaths = includePathInfo ? paths : sortFilesWithinGroup(paths.map((p) => `## ${p}`)).map((p) => p.replace('## ', '')) if (sortedPaths.length > 0) { log(`\nGlob pattern ${index + 1}: ${pattern}`) sortedPaths.forEach((path, i) => { log(` ${i + 1}. ${path}`) }) } }) // Combine results in the order of glob patterns const orderedResults: unknown[] = [] for (const pattern of glob) { const filesForPattern = globResults.get(pattern) || [] if (includePathInfo) { orderedResults.push(...filesForPattern) } else { orderedResults.push(...sortFilesWithinGroup(filesForPattern as string[])) } } return orderedResults as string[] | { path: string; content: string }[] } function shouldIncludeFile(filename: string, glob: string, ignore: string[] = []): boolean { const shouldIgnore = ignore.some((pattern) => minimatch(filename, pattern)) if (shouldIgnore) { logAlways(`❌ Ignored by pattern: ${filename}`) return false } return minimatch(filename, glob) } export async function clearRepositoryCache(): Promise<void> { const cache = getCacheService() await cache.clear() logAlways('Repository cache cleared') } export async function getRepositoryCacheStatus(): Promise<{ size: number repositories: string[] totalSizeBytes: number }> { const cache = getCacheService() const status = await cache.getStatus() return { size: status.count, repositories: status.keys, totalSizeBytes: status.totalSizeBytes } } export interface MinimizeOptions { normalizeWhitespace?: boolean removeLegacy?: boolean removePlaygroundLinks?: boolean removePrettierIgnore?: boolean removeNoteBlocks?: boolean removeDetailsBlocks?: boolean removeHtmlComments?: boolean removeDiffMarkers?: boolean } const defaultOptions: MinimizeOptions = { normalizeWhitespace: false, removeLegacy: false, removePlaygroundLinks: false, removePrettierIgnore: true, removeNoteBlocks: true, removeDetailsBlocks: true, removeHtmlComments: false, removeDiffMarkers: true } function removeQuoteBlocks(content: string, blockType: string): string { return content .split('\n') .reduce((acc: string[], line: string, index: number, lines: string[]) => { // If we find a block (with or without additional text), skip it and all subsequent blockquote lines if (line.trim().startsWith(`> [!${blockType}]`)) { // Skip all subsequent lines that are part of the blockquote let i = index while (i < lines.length && (lines[i].startsWith('>') || lines[i].trim() === '')) { i++ } // Update the index to skip all these lines index = i - 1 return acc } acc.push(line) return acc }, []) .join('\n') } function removeDiffMarkersFromContent(content: string): string { let inCodeBlock = false const lines = content.split('\n') const processedLines = lines.map((line) => { // Track if we're entering or leaving a code block // eslint-disable-next-line no-useless-escape if (line.trim().startsWith('\`\`\`')) { inCodeBlock = !inCodeBlock return line } if (inCodeBlock) { // Handle lines that end with --- or +++ with possible whitespace after // eslint-disable-next-line no-useless-escape line = line.replace(/(\+{3}|\-{3})[\s]*$/g, '') // Handle triple markers at start while preserving indentation // This captures the whitespace before the marker and adds it back // eslint-disable-next-line no-useless-escape line = line.replace(/^(\s*)(\+{3}|\-{3})\s*/g, '$1') // Handle single + or - markers at start while preserving indentation // eslint-disable-next-line no-useless-escape line = line.replace(/^(\s*)[\+\-](\s)/g, '$1') // Handle multi-line diff blocks where --- or +++ might be in the middle of line // eslint-disable-next-line no-useless-escape line = line.replace(/[\s]*(\+{3}|\-{3})[\s]*/g, '') } return line }) return processedLines.join('\n') } export function minimizeContent(content: string, options?: Partial<MinimizeOptions>): string { const settings: MinimizeOptions = options ? { ...defaultOptions, ...options } : defaultOptions let minimized = content minimized = minimized.replace(/NOTE: do not edit this file, it is generated in.*$/gm, '') if (settings.removeDiffMarkers) { minimized = removeDiffMarkersFromContent(minimized) } if (settings.removeLegacy) { minimized = removeQuoteBlocks(minimized, 'LEGACY') } if (settings.removeNoteBlocks) { minimized = removeQuoteBlocks(minimized, 'NOTE') } if (settings.removeDetailsBlocks) { minimized = removeQuoteBlocks(minimized, 'DETAILS') } if (settings.removePlaygroundLinks) { // Replace playground URLs with /[link] but keep the original link text minimized = minimized.replace(/\[([^\]]+)\]\(\/playground[^)]+\)/g, '[$1](/REMOVED)') } if (settings.removePrettierIgnore) { minimized = minimized .split('\n') .filter((line) => line.trim() !== '<!-- prettier-ignore -->') .join('\n') } if (settings.removeHtmlComments) { // Replace all HTML comments (including multi-line) with empty string minimized = minimized.replace(/<!--[\s\S]*?-->/g, '') } if (settings.normalizeWhitespace) { minimized = minimized.replace(/\s+/g, ' ') } minimized = minimized.trim() return minimized }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/khromov/llmctx'

If you have feedback or need assistance with the MCP directory API, please join our Discord server