Skip to main content
Glama

extract_revisions

Extract tracked changes and comments from DOCX files as structured JSON with before/after text per paragraph and revision details. Supports pagination for large documents without modifying the original file.

Instructions

Extract tracked changes as structured JSON with before/after text per paragraph, revision details, and comments. Supports pagination via offset and limit. Read-only - does not modify the document.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
file_pathYesPath to the DOCX file.
offsetNo0-based offset for pagination. Default: 0.
limitNoMax entries per page (1-500). Default: 50.

Implementation Reference

  • The tool handler for "extract_revisions", which resolves the session, performs input validation, utilizes the core library to extract revisions, and handles pagination.
    export async function extractRevisions_tool(
      manager: SessionManager,
      params: {
        file_path?: string;
        offset?: number;
        limit?: number;
      },
    ): Promise<ToolResponse> {
      const resolved = await resolveSessionForTool(manager, params, { toolName: 'extract_revisions' });
      if (!resolved.ok) return resolved.response;
      const { session, metadata } = resolved;
    
      // Validate limit
      const limit = params.limit ?? 50;
      if (typeof limit !== 'number' || limit < 1 || limit > 500) {
        return err('INVALID_LIMIT', `limit must be between 1 and 500, got ${limit}`, 'Provide a limit in the range 1–500.');
      }
    
      // Validate offset
      const offset = params.offset ?? 0;
      if (typeof offset !== 'number' || offset < 0) {
        return err('INVALID_OFFSET', `offset must be >= 0, got ${offset}`, 'Provide a non-negative offset.');
      }
    
      try {
        // Check extraction cache
        const cached = manager.getExtractionCache(session);
        let allChanges;
    
        if (cached) {
          allChanges = cached.changes;
        } else {
          // Compute extraction from DOM clones
          const docClone = session.doc.getDocumentXmlClone();
          const comments = await session.doc.getComments();
          const result = extractRevisions(docClone, comments);
          allChanges = result.changes;
          // Cache the full result for pagination
          manager.setExtractionCache(session, allChanges);
        }
    
        // Apply pagination
        const totalChanges = allChanges.length;
        const page = allChanges.slice(offset, offset + limit);
        const hasMore = offset + limit < totalChanges;
    
        return ok(mergeSessionResolutionMetadata({
          changes: page,
          total_changes: totalChanges,
          has_more: hasMore,
          edit_revision: session.editRevision,
          file_path: manager.normalizePath(session.originalPath),
        }, metadata));
      } catch (e: unknown) {
        return err('EXTRACTION_ERROR', errorMessage(e));
      }
    }
  • The core logic for extracting revisions by comparing cloned documents with changes accepted vs rejected.
    export function extractRevisions(
      doc: Document,
      comments: Comment[],
      opts?: { offset?: number; limit?: number },
    ): ExtractRevisionsResult {
      const body = doc.getElementsByTagNameNS(W_NS, 'body').item(0);
      if (!body) {
        return { changes: [], total_changes: 0, has_more: false };
      }
    
      // Clone DOM twice and apply accept/reject
      const acceptedDoc = doc.cloneNode(true) as Document;
      const rejectedDoc = doc.cloneNode(true) as Document;
      acceptChanges(acceptedDoc);
      rejectChanges(rejectedDoc);
    
      // Build comment lookup by anchoredParagraphId
      const commentsByParaId = new Map<string, Comment[]>();
      for (const c of comments) {
        if (c.anchoredParagraphId) {
          const existing = commentsByParaId.get(c.anchoredParagraphId);
          if (existing) {
            existing.push(c);
          } else {
            commentsByParaId.set(c.anchoredParagraphId, [c]);
          }
        }
      }
    
      // Walk all paragraphs in the original tracked DOM
      const allParagraphs = Array.from(body.getElementsByTagNameNS(W_NS, 'p'));
      const changedParagraphs: ParagraphRevision[] = [];
    
      for (const p of allParagraphs) {
        if (!paragraphHasRevisions(p)) continue;
    
        const paraId = getParagraphBookmarkId(p);
        if (!paraId) continue; // All paragraphs should have bookmarks from session resolution
    
        // Detect entirely-inserted/deleted paragraphs to avoid stale bookmark lookups.
        // When rejectChanges() removes an inserted paragraph, it relocates bookmarks
        // to adjacent paragraphs, which would give the wrong before_text.
        const isFullyInserted = paragraphIsEntirelyInserted(p);
        const isFullyDeleted = paragraphIsEntirelyDeleted(p);
    
        // Look up before_text in rejected clone by bookmark
        let beforeText: string;
        if (isFullyInserted) {
          beforeText = ''; // Didn't exist before
        } else {
          const rejectedP = findParagraphByBookmarkId(rejectedDoc, paraId);
          beforeText = rejectedP ? getParagraphText(rejectedP) : '';
        }
    
        // Look up after_text in accepted clone by bookmark
        let afterText: string;
        if (isFullyDeleted) {
          afterText = ''; // Doesn't exist after
        } else {
          const acceptedP = findParagraphByBookmarkId(acceptedDoc, paraId);
          afterText = acceptedP ? getParagraphText(acceptedP) : '';
        }
    
        // Collect revision entries
        const revisions = collectRevisionEntries(p);
    
        // Skip structurally-empty paragraphs with only paragraph-level markers
        // (e.g. empty inserted paragraphs from comparison engines with pPr/rPr/ins only)
        if (revisions.length === 0 && beforeText === '' && afterText === '') continue;
    
        // Associate comments
        const paraComments = commentsByParaId.get(paraId) ?? [];
        const revisionComments = paraComments.map(commentToRevisionComment);
    
        changedParagraphs.push({
          para_id: paraId,
          before_text: beforeText,
          after_text: afterText,
          revisions,
          comments: revisionComments,
        });
      }
    
      // Apply pagination
      const totalChanges = changedParagraphs.length;
      const offset = opts?.offset ?? 0;
      const limit = opts?.limit ?? totalChanges;
      const page = changedParagraphs.slice(offset, offset + limit);
      const hasMore = offset + limit < totalChanges;
    
      return {
        changes: page,
        total_changes: totalChanges,
        has_more: hasMore,
      };
    }
  • Type definition for the result returned by extractRevisions.
    export type ExtractRevisionsResult = {
      changes: ParagraphRevision[];
      total_changes: number;
      has_more: boolean;
    };

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/UseJunior/safe-docx'

If you have feedback or need assistance with the MCP directory API, please join our Discord server