import { readFile } from 'fs/promises';
import { dirname } from 'path';
import { NodeHtmlMarkdown } from 'node-html-markdown';
import { bundleMDX } from 'mdx-bundler';
import { getMDXComponent } from 'mdx-bundler/client/index.js';
import { createElement, Fragment } from 'react';
import type { ComponentType, ReactNode } from 'react';
import { renderToString } from 'react-dom/server';
import { parse } from 'yaml';
import { SearchMatch } from '../types/index.js';
const COMPONENT_TAG_REGEX = /<(?!\/)([A-Z][A-Za-z0-9]*(?:\.[A-Za-z0-9]+)?)\b/g;
const IMPORT_STATEMENT_REGEX = /^\s*import\s+(.+?)\s+from\s+['"][^'"]+['"]/gm;
const DECLARATION_REGEX = /^\s*(?:export\s+)?(?:const|let|var|function|class)\s+([A-Z][A-Za-z0-9]*)\b/gm;
const CODE_FENCE_REGEX = /```[\s\S]*?```/g;
const htmlToMarkdown = new NodeHtmlMarkdown();
type FallbackComponentMap = {
[key: string]: FallbackComponentMap | ComponentType<{ children?: ReactNode }>;
};
type MdxComponentProps = {
components?: FallbackComponentMap;
[key: string]: unknown;
};
/**
* Read an MDX file and convert it to Markdown
*
* @param filePath - Absolute path to the MDX file
* @returns The Markdown content
*/
export async function convertMdxToMarkdown(filePath: string): Promise<string> {
const source = await readFile(filePath, 'utf-8');
const undefinedComponents = getUndefinedComponentNames(source);
const { code } = await bundleMDX({
source,
cwd: dirname(filePath),
esbuildOptions: (esbuildOptions) => ({
...esbuildOptions,
platform: 'node',
external: ['react', 'react-dom']
})
});
const Component = getMDXComponent(code) as ComponentType<MdxComponentProps>;
const fallbackComponents =
undefinedComponents.length > 0
? buildFallbackComponentMap(undefinedComponents)
: undefined;
const props: MdxComponentProps | undefined = fallbackComponents
? { components: fallbackComponents }
: undefined;
const element = createElement(Component, props);
const html = renderToString(element);
return htmlToMarkdown.translate(html);
}
/**
* Search for content within an MDX file
*
* @param filePath - Absolute path to the MDX file
* @param query - Search query string
* @param contextLines - Number of lines of context around matches (default: 2)
* @returns Array of search matches with context
*/
export async function searchInMdxFile(
filePath: string,
query: string,
contextLines: number = 2
): Promise<SearchMatch[]> {
// Read the file content
const content = await readFile(filePath, 'utf-8');
const lines = content.split('\n');
const matches: SearchMatch[] = [];
// Search for the query in each line
for (let i = 0; i < lines.length; i++) {
if (lines[i].includes(query)) {
// Get context lines before and after
const beforeStart = Math.max(0, i - contextLines);
const afterEnd = Math.min(lines.length - 1, i + contextLines);
const before = lines.slice(beforeStart, i);
const after = lines.slice(i + 1, afterEnd + 1);
matches.push({
lineNumber: i + 1, // Line numbers start at 1
content: lines[i],
context: {
before,
after
}
});
}
}
return matches;
}
/**
* Extract frontmatter from an MDX file
* Frontmatter is the YAML content between --- markers at the start of the file
*
* @param filePath - Absolute path to the MDX file
* @returns Parsed frontmatter object or null if none exists
*/
export async function extractFrontmatter(
filePath: string
): Promise<Record<string, unknown> | null> {
const content = await readFile(filePath, 'utf-8');
const trimmedContent = content.trimStart();
// Check if file starts with frontmatter delimiter
if (!trimmedContent.startsWith('---')) {
return null;
}
// Extract content between --- markers
// We need to find the second occurance of ---
// The first one is at the start
const endOfFirstDelimiter = trimmedContent.indexOf('---') + 3;
const remainingContent = trimmedContent.substring(endOfFirstDelimiter);
const endOfFrontmatterIndex = remainingContent.indexOf('---');
if (endOfFrontmatterIndex === -1) {
// No closing delimiter found
return null;
}
const frontmatterString = remainingContent.substring(0, endOfFrontmatterIndex);
if (!frontmatterString.trim()) {
return null;
}
try {
const frontmatter = parse(frontmatterString);
if (typeof frontmatter === 'object' && frontmatter !== null) {
return frontmatter as Record<string, unknown>;
}
return null;
} catch {
// Return null if parsing fails
return null;
}
}
function getUndefinedComponentNames(source: string): string[] {
const sanitized = stripCodeBlocks(source);
const definedComponents = new Set<string>([
...extractImportedComponentNames(sanitized),
...extractLocallyDeclaredComponents(sanitized)
]);
const usedComponents = new Set<string>();
let match: RegExpExecArray | null;
while ((match = COMPONENT_TAG_REGEX.exec(sanitized)) !== null) {
usedComponents.add(match[1]);
}
return Array.from(usedComponents).filter((componentName) => {
const root = componentName.split('.')[0];
return !definedComponents.has(root);
});
}
function extractImportedComponentNames(content: string): Set<string> {
const names = new Set<string>();
let match: RegExpExecArray | null;
while ((match = IMPORT_STATEMENT_REGEX.exec(content)) !== null) {
const clause = match[1]?.trim();
if (!clause) {
continue;
}
const cleanedClause = clause.startsWith('type ')
? clause.slice(5).trim()
: clause;
const segments = splitImportClause(cleanedClause);
for (const segment of segments) {
const normalized = segment.trim();
if (!normalized) {
continue;
}
if (normalized.startsWith('{') && normalized.endsWith('}')) {
const named = normalized.slice(1, -1).trim();
if (!named) {
continue;
}
named.split(',').forEach((part) => {
const spec = part.trim();
if (!spec) {
return;
}
const withoutType = spec.startsWith('type ')
? spec.slice(5).trim()
: spec;
const aliasMatch = withoutType.match(/(.+?)\s+as\s+([A-Za-z0-9_$]+)/);
if (aliasMatch) {
names.add(aliasMatch[2]);
} else {
names.add(withoutType);
}
});
continue;
}
if (normalized.startsWith('*')) {
const namespaceMatch = normalized.match(/^\*\s+as\s+([A-Za-z0-9_$]+)/);
if (namespaceMatch) {
names.add(namespaceMatch[1]);
}
continue;
}
names.add(normalized);
}
}
return names;
}
function splitImportClause(clause: string): string[] {
const segments: string[] = [];
let current = '';
let depth = 0;
for (let i = 0; i < clause.length; i += 1) {
const char = clause[i];
if (char === '{') {
depth += 1;
} else if (char === '}') {
depth = Math.max(0, depth - 1);
}
if (char === ',' && depth === 0) {
if (current.trim()) {
segments.push(current.trim());
}
current = '';
continue;
}
current += char;
}
if (current.trim()) {
segments.push(current.trim());
}
return segments;
}
function extractLocallyDeclaredComponents(content: string): Set<string> {
const names = new Set<string>();
let match: RegExpExecArray | null;
while ((match = DECLARATION_REGEX.exec(content)) !== null) {
if (match[1]) {
names.add(match[1]);
}
}
return names;
}
function stripCodeBlocks(content: string): string {
return content.replace(CODE_FENCE_REGEX, '');
}
function buildFallbackComponentMap(componentNames: string[]): FallbackComponentMap {
const map: FallbackComponentMap = {};
for (const name of componentNames) {
assignFallbackComponent(map, name.split('.'));
}
return map;
}
function assignFallbackComponent(target: FallbackComponentMap, parts: string[]): void {
if (parts.length === 0) {
return;
}
const [current, ...rest] = parts;
if (rest.length === 0) {
if (typeof target[current] !== 'function') {
target[current] = createFallbackComponent();
}
return;
}
if (typeof target[current] !== 'object' || target[current] === null) {
target[current] = {};
}
assignFallbackComponent(target[current] as FallbackComponentMap, rest);
}
function createFallbackComponent() {
return function FallbackComponent(props: { children?: ReactNode }) {
return createElement(Fragment, null, props?.children ?? null);
};
}