Skip to main content
Glama
parser.ts9.26 kB
/** * Gutenberg Block Parser * * Parses WordPress Gutenberg block content into structured format. * Handles nested blocks (innerBlocks), block attributes, and reusable blocks. * * @package WP_Navigator_Pro * @since 1.1.0 */ import type { BlockSnapshot } from '../snapshots/types.js'; /** * Block delimiter markers in WordPress content */ const BLOCK_COMMENT_START = '<!-- wp:'; const BLOCK_COMMENT_END = '-->'; const BLOCK_COMMENT_CLOSE = '<!-- /wp:'; /** * Parse state for tracking position in content */ interface ParseState { content: string; position: number; } /** * Result from parsing a single block */ interface BlockParseResult { block: BlockSnapshot; endPosition: number; } /** * Check if position is at a block start marker */ function isBlockStart(state: ParseState): boolean { return ( state.content.substring(state.position, state.position + BLOCK_COMMENT_START.length) === BLOCK_COMMENT_START ); } /** * Check if position is at a block close marker */ function isBlockClose(state: ParseState, blockName: string): boolean { const closeMarker = `${BLOCK_COMMENT_CLOSE}${blockName}`; return ( state.content.substring(state.position, state.position + closeMarker.length) === closeMarker ); } /** * Parse block name and attributes from opening comment * * Handles formats: * - <!-- wp:paragraph --> * - <!-- wp:heading {"level":2} --> * - <!-- wp:separator /--> * - <!-- wp:acf/testimonial {"id":"123"} --> */ function parseBlockOpening(state: ParseState): { blockName: string; attrs: Record<string, unknown>; isSelfClosing: boolean; endPosition: number; } | null { if (!isBlockStart(state)) { return null; } // Find the end of the block comment const commentEnd = state.content.indexOf(BLOCK_COMMENT_END, state.position); if (commentEnd === -1) { return null; } const commentContent = state.content .substring(state.position + BLOCK_COMMENT_START.length, commentEnd) .trim(); // Check for self-closing marker (ends with /) const isSelfClosing = commentContent.endsWith('/'); const cleanContent = isSelfClosing ? commentContent.slice(0, -1).trim() : commentContent; // Parse block name and attributes // Block name can be "paragraph" or "namespace/block-name" const spaceIndex = cleanContent.indexOf(' '); let blockName: string; let attrsJson = ''; if (spaceIndex === -1) { blockName = cleanContent; } else { blockName = cleanContent.substring(0, spaceIndex); attrsJson = cleanContent.substring(spaceIndex + 1).trim(); } // Normalize block name (add core/ prefix if no namespace) if (!blockName.includes('/')) { blockName = `core/${blockName}`; } // Parse attributes JSON let attrs: Record<string, unknown> = {}; if (attrsJson) { try { attrs = JSON.parse(attrsJson); } catch { // Invalid JSON, keep empty attrs } } return { blockName, attrs, isSelfClosing, endPosition: commentEnd + 3, // length of '-->' }; } /** * Parse a single block and its inner content recursively */ function parseBlock(state: ParseState): BlockParseResult | null { const opening = parseBlockOpening(state); if (!opening) { return null; } const { blockName, attrs, isSelfClosing, endPosition } = opening; // For self-closing blocks, return immediately if (isSelfClosing) { return { block: { blockName, attrs, innerBlocks: [], innerHTML: '', innerContent: [], }, endPosition, }; } // Find the closing marker for this block // Need to handle nested blocks of the same type const closeMarker = `${BLOCK_COMMENT_CLOSE}${blockName.replace('core/', '')} -->`; const coreCloseMarker = `${BLOCK_COMMENT_CLOSE}${blockName} -->`; // Parse inner content - can contain HTML and nested blocks const innerBlocks: BlockSnapshot[] = []; const innerContent: string[] = []; let innerHTML = ''; let currentPosition = endPosition; let htmlBuffer = ''; while (currentPosition < state.content.length) { // Check if we've reached the closing marker const remainingContent = state.content.substring(currentPosition); if (remainingContent.startsWith(closeMarker) || remainingContent.startsWith(coreCloseMarker)) { // Flush any remaining HTML if (htmlBuffer) { innerContent.push(htmlBuffer); innerHTML += htmlBuffer; htmlBuffer = ''; } // Find end of closing marker const closeEnd = state.content.indexOf(' -->', currentPosition); return { block: { blockName, attrs, innerBlocks, innerHTML, innerContent, }, endPosition: closeEnd !== -1 ? closeEnd + 4 : currentPosition, }; } // Check if there's a nested block if (isBlockStart({ content: state.content, position: currentPosition })) { // Flush HTML buffer before nested block if (htmlBuffer) { innerContent.push(htmlBuffer); innerHTML += htmlBuffer; htmlBuffer = ''; } // Parse the nested block const nestedResult = parseBlock({ content: state.content, position: currentPosition }); if (nestedResult) { innerBlocks.push(nestedResult.block); innerContent.push(null as any); // Marker for inner block position currentPosition = nestedResult.endPosition; continue; } } // Regular character - add to HTML buffer htmlBuffer += state.content[currentPosition]; currentPosition++; } // If we reach here, there was no closing marker (malformed content) // Return what we have if (htmlBuffer) { innerContent.push(htmlBuffer); innerHTML += htmlBuffer; } return { block: { blockName, attrs, innerBlocks, innerHTML, innerContent, }, endPosition: currentPosition, }; } /** * Parse all Gutenberg blocks from WordPress content * * @param content - Raw WordPress content with Gutenberg block comments * @returns Array of parsed blocks with preserved hierarchy */ export function parseGutenbergBlocks(content: string): BlockSnapshot[] { if (!content || typeof content !== 'string') { return []; } const blocks: BlockSnapshot[] = []; const state: ParseState = { content, position: 0, }; while (state.position < content.length) { // Skip until we find a block start if (!isBlockStart(state)) { state.position++; continue; } // Parse the block const result = parseBlock(state); if (result) { blocks.push(result.block); state.position = result.endPosition; } else { // Couldn't parse, skip past the marker state.position++; } } return blocks; } /** * Serialize blocks back to WordPress block format * * @param blocks - Array of blocks to serialize * @returns WordPress block content string */ export function serializeBlocks(blocks: BlockSnapshot[]): string { return blocks.map(serializeBlock).join('\n\n'); } /** * Serialize a single block to WordPress format */ function serializeBlock(block: BlockSnapshot): string { const { blockName, attrs, innerBlocks, innerHTML } = block; // Remove core/ prefix for serialization const shortName = blockName.startsWith('core/') ? blockName.slice(5) : blockName; // Build attributes JSON if present const attrsJson = Object.keys(attrs).length > 0 ? ` ${JSON.stringify(attrs)}` : ''; // Self-closing block (no content, no inner blocks) if (!innerHTML && innerBlocks.length === 0) { return `<!-- wp:${shortName}${attrsJson} /-->`; } // Block with content const openTag = `<!-- wp:${shortName}${attrsJson} -->`; const closeTag = `<!-- /wp:${shortName} -->`; // If there are inner blocks, serialize them too let innerContentStr = innerHTML; if (innerBlocks.length > 0) { innerContentStr = innerBlocks.map(serializeBlock).join('\n'); } return `${openTag}\n${innerContentStr}\n${closeTag}`; } /** * Check if a block is a reusable block (block reference) */ export function isReusableBlock(block: BlockSnapshot): boolean { return block.blockName === 'core/block' && typeof block.attrs.ref === 'number'; } /** * Get the reference ID of a reusable block */ export function getReusableBlockRef(block: BlockSnapshot): number | null { if (isReusableBlock(block)) { return block.attrs.ref as number; } return null; } /** * Flatten blocks to a single-level array (removes nesting) */ export function flattenBlocks(blocks: BlockSnapshot[]): BlockSnapshot[] { const flat: BlockSnapshot[] = []; for (const block of blocks) { flat.push(block); if (block.innerBlocks.length > 0) { flat.push(...flattenBlocks(block.innerBlocks)); } } return flat; } /** * Count total blocks including nested */ export function countBlocks(blocks: BlockSnapshot[]): number { return flattenBlocks(blocks).length; } /** * Get all unique block types used */ export function getBlockTypes(blocks: BlockSnapshot[]): string[] { const types = new Set<string>(); for (const block of flattenBlocks(blocks)) { types.add(block.blockName); } return Array.from(types).sort(); }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/littlebearapps/wp-navigator-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server