Skip to main content
Glama
indexer.ts15.3 kB
import path from "node:path"; import fse from "fs-extra"; import { glob } from "glob"; import lunr from "lunr"; import { simpleGit } from "simple-git"; import { ensureRepoReady, getRepoPath } from "./repo.js"; import type { ControlMapping, FrmrDocumentRecord, FrmrDocumentType, IndexState, KsiItem, MarkdownDoc, VersionInfo, } from "./types.js"; import { createError, detectIdKey, envBoolean, extractControlLikeStrings, guessFrmrTypeFromFilename, normalizePath, sha256, unique, } from "./util.js"; const IGNORE_GLOBS = [ "**/node_modules/**", "**/.git/**", "**/.hg/**", "**/dist/**", "**/build/**", "**/.cache/**", ]; const INDEX_CACHE_FILE = path.join( process.env.HOME ?? ".", ".cache", "fedramp-docs", "index-v1.json", ); interface PersistedIndex { repoHead?: string | null; indexedAt: number; repoPath: string; frmrDocuments: FrmrDocumentRecord[]; ksiItems: KsiItem[]; controlMappings: ControlMapping[]; markdownDocs: Array<MarkdownDoc & { indexContent: string }>; errors: string[]; } let indexState: IndexState | null = null; let markdownIndex: lunr.Index | null = null; function getPersistEnabled(): boolean { return envBoolean("FEDRAMP_DOCS_INDEX_PERSIST", true); } async function loadPersistedIndex( repoHead: string | null, ): Promise<IndexState | null> { if (!getPersistEnabled()) { return null; } try { const exists = await fse.pathExists(INDEX_CACHE_FILE); if (!exists) { return null; } const raw = await fse.readFile(INDEX_CACHE_FILE, "utf8"); const data = JSON.parse(raw) as PersistedIndex; if (repoHead && data.repoHead && data.repoHead !== repoHead) { return null; } const docsMap = new Map<string, MarkdownDoc>(); const builder = new lunr.Builder(); builder.ref("path"); builder.field("content"); for (const doc of data.markdownDocs) { docsMap.set(doc.path, { path: doc.path, content: doc.content, sha256: doc.sha256, headings: doc.headings, lines: doc.lines, }); builder.add({ path: doc.path, content: doc.indexContent }); } markdownIndex = builder.build(); return { repoPath: data.repoPath, indexedAt: data.indexedAt, frmrDocuments: data.frmrDocuments, ksiItems: data.ksiItems, controlMappings: data.controlMappings, markdownDocs: docsMap, errors: data.errors, }; } catch (error) { console.warn( `Failed to load persisted index: ${(error as Error).message}`, ); return null; } } async function persistIndex( state: IndexState, repoHead: string | null, indexContentMap: Map<string, string>, ): Promise<void> { if (!getPersistEnabled()) { return; } const payload: PersistedIndex = { repoHead, indexedAt: state.indexedAt, repoPath: state.repoPath, frmrDocuments: state.frmrDocuments, ksiItems: state.ksiItems, controlMappings: state.controlMappings, markdownDocs: [...state.markdownDocs.values()].map((doc) => ({ ...doc, indexContent: indexContentMap.get(doc.path) ?? doc.content, })), errors: state.errors, }; await fse.ensureDir(path.dirname(INDEX_CACHE_FILE)); await fse.writeFile(INDEX_CACHE_FILE, JSON.stringify(payload), "utf8"); } function deriveTitleFromFilename(filename: string): string { return filename .replace(/^FRMR\.[A-Z]+\./i, "") .replace(/\.json$/i, "") .replace(/[-_]+/g, " ") .replace(/\b\w/g, (c) => c.toUpperCase()); } function extractVersionFromString(input: string): string | undefined { const match = input.match(/(20\d{2})[-_. ]?(0[1-9]|1[0-2])(?:[-_. ]?(0[1-9]|[12]\d|3[01]))?/); if (match) { return match[0].replace(/[-_. ]/g, "-"); } return undefined; } function extractPublishedDate(metadata: Record<string, unknown>): string | undefined { const candidateKeys = ["published", "published_at", "date", "released"]; for (const key of candidateKeys) { const value = metadata[key]; if (typeof value === "string") { return value; } } return undefined; } function normalizeDocType(typeGuess: string): FrmrDocumentType { const upper = typeGuess.toUpperCase(); if ( ["KSI", "MAS", "VDR", "SCN", "FRD", "ADS"].includes(upper) ) { return upper as FrmrDocumentType; } return "unknown"; } function coerceStringArray( value: unknown, ): string[] | undefined { if (!value) { return undefined; } if (Array.isArray(value)) { const items = value .map((item) => (typeof item === "string" ? item : undefined)) .filter((item): item is string => Boolean(item)); return items.length ? items : undefined; } if (typeof value === "string") { return [value]; } return undefined; } function buildKsiItems( doc: FrmrDocumentRecord, items: unknown[], ): KsiItem[] { const results: KsiItem[] = []; const idKey = doc.idKey ?? "id"; for (const rawItem of items) { if (!rawItem || typeof rawItem !== "object") { continue; } const item = rawItem as Record<string, unknown>; const idValue = (typeof item[idKey] === "string" && item[idKey]) || (typeof item.id === "string" && item.id) || (typeof item.uid === "string" && item.uid); if (!idValue) { continue; } const referencesValue = Array.isArray(item.references) ? (item.references as Array<Record<string, unknown>>) : undefined; const ksiItem: KsiItem = { id: idValue, title: typeof item.title === "string" ? item.title : undefined, description: typeof item.description === "string" ? item.description : undefined, category: typeof item.category === "string" ? item.category : Array.isArray(item.categories) ? (item.categories.find((value) => typeof value === "string") as | string | undefined) : undefined, status: typeof item.status === "string" ? item.status : undefined, sourceRef: typeof item.source_ref === "string" ? item.source_ref : Array.isArray(item.source_ref) ? item.source_ref .filter((value) => typeof value === "string") .join(", ") : typeof item.source === "string" ? item.source : undefined, requirements: coerceStringArray(item.requirements), controlMapping: unique(extractControlLikeStrings(item)), evidenceExamples: coerceStringArray(item.evidence_examples), references: referencesValue?.map((ref) => ({ type: typeof ref.type === "string" ? ref.type : typeof ref.kind === "string" ? ref.kind : undefined, id: typeof ref.id === "string" ? ref.id : undefined, text: typeof ref.text === "string" ? ref.text : typeof ref.description === "string" ? ref.description : undefined, })), docPath: doc.path, }; results.push(ksiItem); } return results; } function parseControlId( control: string, ): { control: string; enhancements: string[] } | null { const baseMatch = control.match(/^([A-Z]{2}-\d{1,3})/); if (!baseMatch) { return null; } const enhancements = unique( (control.match(/\(\w+\)/g) ?? []).map((value) => value), ); return { control: baseMatch[1], enhancements, }; } function collectControlMappings( type: FrmrDocumentType, pathRef: string, idKey: string | null, items: unknown[], ): ControlMapping[] { const mappings: ControlMapping[] = []; items.forEach((rawItem) => { if (!rawItem || typeof rawItem !== "object") { return; } const item = rawItem as Record<string, unknown>; const sourceId = (idKey && typeof item[idKey] === "string" && item[idKey]) || (typeof item.id === "string" ? item.id : undefined) || (typeof item.uid === "string" ? item.uid : undefined); if (!sourceId) { return; } const controls = unique(extractControlLikeStrings(item)); for (const controlCandidate of controls) { const parsed = parseControlId(controlCandidate); if (!parsed) { continue; } mappings.push({ source: type, sourceId, control: parsed.control, controlEnhancements: parsed.enhancements, path: pathRef, }); } }); return mappings; } function stripCodeBlocks(markdown: string): string { return markdown.replace(/```[\s\S]*?```/g, " "); } function extractHeadings(lines: string[]): Array<{ depth: number; title: string; line: number }> { const headings: Array<{ depth: number; title: string; line: number }> = []; lines.forEach((line, index) => { const match = line.match(/^(#{1,6})\s+(.*)$/); if (match) { headings.push({ depth: match[1].length, title: match[2].trim(), line: index + 1, }); } }); return headings; } interface BuildResult { state: IndexState; indexContentMap: Map<string, string>; } async function scanRepository(): Promise<BuildResult> { const repoPath = getRepoPath(); const jsonPaths = await glob("**/*.json", { cwd: repoPath, ignore: IGNORE_GLOBS, }); const markdownPaths = await glob("**/*.md", { cwd: repoPath, ignore: IGNORE_GLOBS, }); const frmrDocuments: FrmrDocumentRecord[] = []; const ksiItems: KsiItem[] = []; const controlMappings: ControlMapping[] = []; const markdownDocs = new Map<string, MarkdownDoc>(); const errors: string[] = []; for (const relativePath of jsonPaths) { const normalizedPath = normalizePath(relativePath); const absolutePath = path.join(repoPath, relativePath); let content: string; try { content = await fse.readFile(absolutePath, "utf8"); } catch (error) { errors.push( `Failed to read JSON file ${normalizedPath}: ${(error as Error).message}`, ); continue; } let parsed: unknown; try { parsed = JSON.parse(content); } catch (error) { errors.push( `Failed to parse JSON file ${normalizedPath}: ${(error as Error).message}`, ); continue; } if (!parsed || typeof parsed !== "object") { continue; } const docTypeGuess = normalizeDocType( guessFrmrTypeFromFilename(path.basename(relativePath)), ); const parsedRecord = parsed as Record<string, unknown>; const metadata = parsedRecord.metadata && typeof parsedRecord.metadata === "object" ? (parsedRecord.metadata as Record<string, unknown>) : {}; const title = (typeof metadata.title === "string" && metadata.title) || (typeof parsedRecord.title === "string" && parsedRecord.title) || deriveTitleFromFilename(path.basename(relativePath)); const version = (typeof metadata.version === "string" && metadata.version) || extractVersionFromString(path.basename(relativePath)); const published = extractPublishedDate(metadata); const candidateArrays = [ parsedRecord.items, parsedRecord.controls, parsedRecord.entries, parsedRecord.records, ].filter((value) => Array.isArray(value)) as unknown[][]; const items = candidateArrays.length ? candidateArrays[0] : []; const idKey = detectIdKey(items); const docRecord: FrmrDocumentRecord = { type: docTypeGuess, title, version, published, path: normalizedPath, idHint: docTypeGuess !== "unknown" ? docTypeGuess : undefined, itemCount: items.length, raw: parsed, rawText: content, topLevelKeys: Object.keys(parsedRecord), idKey, }; frmrDocuments.push(docRecord); if (docTypeGuess === "KSI" && items.length) { ksiItems.push(...buildKsiItems(docRecord, items)); } if (items.length) { controlMappings.push( ...collectControlMappings(docTypeGuess, normalizedPath, idKey, items), ); } } const builder = new lunr.Builder(); builder.ref("path"); builder.field("content"); const indexContentMap = new Map<string, string>(); for (const relativePath of markdownPaths) { const normalizedPath = normalizePath(relativePath); const absolutePath = path.join(repoPath, relativePath); let content: string; try { content = await fse.readFile(absolutePath, "utf8"); } catch (error) { errors.push( `Failed to read markdown file ${normalizedPath}: ${(error as Error).message}`, ); continue; } const lines = content.split(/\r?\n/); const headings = extractHeadings(lines); const doc: MarkdownDoc = { path: normalizedPath, content, sha256: sha256(content), headings, lines, }; markdownDocs.set(normalizedPath, doc); const indexContent = stripCodeBlocks(content); indexContentMap.set(normalizedPath, indexContent); builder.add({ path: normalizedPath, content: indexContent }); } markdownIndex = builder.build(); const indexedAt = Date.now(); const state: IndexState = { repoPath, indexedAt, frmrDocuments, ksiItems, controlMappings, markdownDocs, errors, }; return { state, indexContentMap }; } export async function buildIndex(force = false): Promise<IndexState> { await ensureRepoReady(); if (indexState && !force) { return indexState; } const repoPath = getRepoPath(); const git = simpleGit(repoPath); const repoHead = await git.revparse(["HEAD"]).catch(() => null); if (!force) { const cached = await loadPersistedIndex(repoHead); if (cached) { indexState = cached; return indexState; } } const { state, indexContentMap } = await scanRepository(); indexState = state; await persistIndex(state, repoHead, indexContentMap); return state; } export function getIndexState(): IndexState { if (!indexState) { throw createError({ code: "INDEX_NOT_READY", message: "Index not ready. Call buildIndex() first.", }); } return indexState; } export function getMarkdownIndex(): lunr.Index { if (!markdownIndex) { throw createError({ code: "INDEX_NOT_READY", message: "Markdown index not ready. Call buildIndex() first.", }); } return markdownIndex; } export function resolveFrmrDocument( pathRef: string, ): FrmrDocumentRecord | undefined { const state = getIndexState(); return state.frmrDocuments.find((doc) => doc.path === pathRef); } export function getFrmrDocuments(): FrmrDocumentRecord[] { return getIndexState().frmrDocuments; } export function getKsiItems(): KsiItem[] { return getIndexState().ksiItems; } export function getControlMappings(): ControlMapping[] { return getIndexState().controlMappings; } export function getMarkdownDoc(pathRef: string): MarkdownDoc | undefined { return getIndexState().markdownDocs.get(pathRef); } export function listVersions(): VersionInfo[] { return getIndexState().frmrDocuments.map((doc) => ({ type: doc.type, version: doc.version, published: doc.published, path: doc.path, })); } export function getIndexErrors(): string[] { return getIndexState().errors; }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ethanolivertroy/fedramp-docs-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server