get_comments
Extract all comments from DOCX files including text, authors, dates, IDs, and threaded replies to analyze feedback and track document revisions.
Instructions
Get all comments from the document with IDs, authors, dates, text, and anchored paragraph IDs. Includes threaded replies. Read-only.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| file_path | Yes | Path to the DOCX file. |
Implementation Reference
- MCP tool handler for 'get_comments', which resolves the session and calls session.doc.getComments().
export async function getComments( manager: SessionManager, params: { file_path?: string }, ): Promise<ToolResponse> { const resolved = await resolveSessionForTool(manager, params, { toolName: 'get_comments' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; try { const comments = await session.doc.getComments(); return ok(mergeSessionResolutionMetadata({ comments: comments.map((c) => mapComment(c)), file_path: manager.normalizePath(session.originalPath), }, metadata)); } catch (e: unknown) { return err('COMMENT_ERROR', errorMessage(e)); } } - Core implementation that parses the OOXML comment structure and threads replies.
export async function getComments(zip: DocxZip, documentXml: Document): Promise<Comment[]> { const commentsText = await zip.readTextOrNull('word/comments.xml'); if (!commentsText) return []; const commentsDoc = parseXml(commentsText); const commentEls = commentsDoc.getElementsByTagNameNS(OOXML.W_NS, W.comment); if (commentEls.length === 0) return []; // Build a map of commentId → { paraId, Comment } const byParaId = new Map<string, Comment>(); const byId = new Map<number, Comment>(); for (let i = 0; i < commentEls.length; i++) { const el = commentEls.item(i) as Element; const idStr = el.getAttributeNS(OOXML.W_NS, 'id') ?? el.getAttribute('w:id'); const id = idStr ? parseInt(idStr, 10) : -1; if (id < 0) continue; const author = el.getAttributeNS(OOXML.W_NS, 'author') ?? el.getAttribute('w:author') ?? ''; const date = el.getAttributeNS(OOXML.W_NS, 'date') ?? el.getAttribute('w:date') ?? ''; const initials = el.getAttributeNS(OOXML.W_NS, 'initials') ?? el.getAttribute('w:initials') ?? ''; // Extract text from <w:t> elements, skipping annotationRef runs const text = extractCommentText(el); // Get paraId from first <w:p> child const paras = el.getElementsByTagNameNS(OOXML.W_NS, W.p); let paragraphId: string | null = null; if (paras.length > 0) { const p = paras.item(0) as Element; paragraphId = p.getAttributeNS(OOXML.W14_NS, 'paraId') ?? p.getAttribute('w14:paraId') ?? null; } const comment: Comment = { id, author, date, initials, text, paragraphId, anchoredParagraphId: null, replies: [], }; byId.set(id, comment); if (paragraphId) byParaId.set(paragraphId, comment); } // Resolve anchoredParagraphId by scanning documentXml for commentRangeStart elements const rangeStarts = documentXml.getElementsByTagNameNS(OOXML.W_NS, W.commentRangeStart); for (let i = 0; i < rangeStarts.length; i++) { const rs = rangeStarts.item(i) as Element; const cidStr = rs.getAttributeNS(OOXML.W_NS, 'id') ?? rs.getAttribute('w:id'); if (!cidStr) continue; const cid = parseInt(cidStr, 10); const comment = byId.get(cid); if (!comment) continue; // Walk up to find enclosing <w:p> let parent = rs.parentNode; while (parent && parent.nodeType === 1) { const pel = parent as Element; if (pel.localName === W.p && pel.namespaceURI === OOXML.W_NS) { comment.anchoredParagraphId = getParagraphBookmarkId(pel); break; } parent = parent.parentNode; } } // Build thread tree from commentsExtended.xml const extText = await zip.readTextOrNull('word/commentsExtended.xml'); if (extText) { const extDoc = parseXml(extText); const exEls = extDoc.getElementsByTagNameNS(OOXML.W15_NS, 'commentEx'); for (let i = 0; i < exEls.length; i++) { const ex = exEls.item(i) as Element; const childParaId = ex.getAttributeNS(OOXML.W15_NS, 'paraId') ?? ex.getAttribute('w15:paraId'); const parentParaId = ex.getAttributeNS(OOXML.W15_NS, 'paraIdParent') ?? ex.getAttribute('w15:paraIdParent'); if (!childParaId || !parentParaId) continue; const child = byParaId.get(childParaId); const parentComment = byParaId.get(parentParaId); if (child && parentComment) { parentComment.replies.push(child); } } } // Collect root-level comments (those not appearing as anyone's reply) const replyParaIds = new Set<string>(); if (extText) { const extDoc = parseXml(extText); const exEls = extDoc.getElementsByTagNameNS(OOXML.W15_NS, 'commentEx'); for (let i = 0; i < exEls.length; i++) { const ex = exEls.item(i) as Element; const childParaId = ex.getAttributeNS(OOXML.W15_NS, 'paraId') ?? ex.getAttribute('w15:paraId'); const parentParaId = ex.getAttributeNS(OOXML.W15_NS, 'paraIdParent') ?? ex.getAttribute('w15:paraIdParent'); if (childParaId && parentParaId) { replyParaIds.add(childParaId); } } } const roots: Comment[] = []; for (const comment of byId.values()) { if (!comment.paragraphId || !replyParaIds.has(comment.paragraphId)) { roots.push(comment); } } return roots; }