Skip to main content
Glama
contentSync.ts7.43 kB
import { fetchRepositoryTarball, processMarkdownFromTarball, minimizeContent } from '$lib/fetchMarkdown' import { ContentDbService } from '$lib/server/contentDb' import type { CreateContentInput } from '$lib/types/db' import { presets, DEFAULT_REPOSITORY } from '$lib/presets' import { logAlways, logErrorAlways, log } from '$lib/log' function sortFilesWithinGroup( files: Array<{ path: string; content: string }> ): Array<{ path: string; content: string }> { return files.sort((a, b) => { const aPath = a.path const bPath = b.path // Check if one path is a parent of the other if (bPath.startsWith(aPath.replace('/index.md', '/'))) return -1 if (aPath.startsWith(bPath.replace('/index.md', '/'))) return 1 return aPath.localeCompare(bPath) }) } export class ContentSyncService { static readonly MAX_CONTENT_AGE_MS = 24 * 60 * 60 * 1000 static async syncRepository( options: { returnStats?: boolean } = {} ): Promise<{ success: boolean stats: { total_files: number total_size_bytes: number last_updated: Date } sync_details: { upserted_files: number deleted_files: number unchanged_files: number } timestamp: string }> { const { returnStats = true } = options const { owner, repo: repoName } = DEFAULT_REPOSITORY logAlways(`Starting sync for repository: ${owner}/${repoName}`) let upsertedFiles = 0 let deletedFiles = 0 let unchangedFiles = 0 try { logAlways(`Step 1: Syncing repository ${owner}/${repoName}`) const tarballBuffer = await fetchRepositoryTarball(owner, repoName) const filesWithPaths = (await processMarkdownFromTarball( tarballBuffer, { glob: ['**/*.md', '**/*.mdx'], ignore: [], title: `Sync ${owner}/${repoName}`, distilled: false }, true )) as Array<{ path: string content: string }> logAlways(`Found ${filesWithPaths.length} markdown files in ${owner}/${repoName}`) const existingFiles = await ContentDbService.getAllContent() const existingPaths = new Set(existingFiles.map((file) => file.path)) const foundPaths = new Set(filesWithPaths.map((file) => file.path)) const contentInputs: CreateContentInput[] = [] for (const file of filesWithPaths) { const filename = ContentDbService.extractFilename(file.path) const sizeBytes = new TextEncoder().encode(file.content).length const metadata = ContentDbService.extractFrontmatter(file.content) const hasChanged = await ContentDbService.hasContentChanged(file.path, file.content) if (hasChanged) { contentInputs.push({ path: file.path, filename, content: file.content, size_bytes: sizeBytes, metadata }) } else { unchangedFiles++ } } if (contentInputs.length > 0) { logAlways(`Upserting ${contentInputs.length} changed files`) await ContentDbService.batchUpsertContent(contentInputs) upsertedFiles = contentInputs.length } else { logAlways(`No file content changes detected`) } // Handle deletions - find files in DB that are no longer in the repository const deletedPaths = Array.from(existingPaths).filter((path) => !foundPaths.has(path)) if (deletedPaths.length > 0) { logAlways(`Deleting ${deletedPaths.length} files that no longer exist`) for (const deletedPath of deletedPaths) { logAlways(` Deleting: ${deletedPath}`) await ContentDbService.deleteContent(deletedPath) } deletedFiles = deletedPaths.length } else { logAlways(`No deleted files detected`) } let stats if (returnStats) { logAlways(`Step 2: Collecting final statistics`) stats = await ContentSyncService.getContentStats() } else { logAlways(`Step 2: Skipping stats collection (returnStats = false)`) // Return minimal stats structure stats = { total_files: 0, total_size_bytes: 0, last_updated: new Date() } } logAlways( `Sync completed successfully: ${upsertedFiles} upserted, ${deletedFiles} deleted, ${unchangedFiles} unchanged` ) return { success: true, stats, sync_details: { upserted_files: upsertedFiles, deleted_files: deletedFiles, unchanged_files: unchangedFiles }, timestamp: new Date().toISOString() } } catch (error) { logErrorAlways(`Failed to sync repository ${owner}/${repoName}:`, error) throw error } } static async isRepositoryContentStale(): Promise<boolean> { try { const stats = await ContentDbService.getContentStats() if (stats.total_files === 0) { return true // No content, consider stale } const lastUpdated = new Date(stats.last_updated) const contentAge = Date.now() - lastUpdated.getTime() const isStale = contentAge > ContentSyncService.MAX_CONTENT_AGE_MS if (isStale) { logAlways( `Repository content is stale (age: ${Math.floor(contentAge / 1000 / 60)} minutes)` ) } return isStale } catch (error) { logErrorAlways(`Error checking repository staleness:`, error) return true // On error, assume stale } } static async getPresetContentFromDb( presetKey: string ): Promise<Array<{ path: string; content: string }> | null> { const preset = presets[presetKey] if (!preset) { return null } try { const allContent = await ContentDbService.getAllContent() if (allContent.length === 0) { return null } log(`Checking ${allContent.length} files against glob patterns for preset ${presetKey}`) log(`Glob patterns: ${JSON.stringify(preset.glob)}`) log(`Ignore patterns: ${JSON.stringify(preset.ignore || [])}`) const { minimatch } = await import('minimatch') const orderedResults: Array<{ path: string; content: string }> = [] // Process one glob pattern at a time for (const pattern of preset.glob) { log(`\nProcessing glob pattern: ${pattern}`) const matchingFiles: Array<{ path: string; content: string }> = [] for (const dbContent of allContent) { const shouldIgnore = preset.ignore?.some((ignorePattern) => { const matches = minimatch(dbContent.path, ignorePattern) if (matches) { log(` File ${dbContent.path} ignored by pattern: ${ignorePattern}`) } return matches }) if (shouldIgnore) continue if (minimatch(dbContent.path, pattern)) { log(` File ${dbContent.path} matched`) let processedContent = dbContent.content if (preset.minimize && Object.keys(preset.minimize).length > 0) { processedContent = minimizeContent(dbContent.content, preset.minimize) } matchingFiles.push({ path: dbContent.path, content: processedContent }) } } const sortedFiles = sortFilesWithinGroup(matchingFiles) log(` Found ${sortedFiles.length} files for pattern: ${pattern}`) sortedFiles.forEach((file, i) => { log(` ${i + 1}. ${file.path}`) }) orderedResults.push(...sortedFiles) } logAlways( `Found ${orderedResults.length} files matching preset ${presetKey} from database in natural glob order` ) log('\nFinal file order:') orderedResults.forEach((file, i) => { log(` ${i + 1}. ${file.path}`) }) return orderedResults } catch (error) { logErrorAlways(`Failed to get preset content from database for ${presetKey}:`, error) return null } } static async getContentStats() { return ContentDbService.getContentStats() } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/khromov/llmctx'

If you have feedback or need assistance with the MCP directory API, please join our Discord server