docx-openFile

Open a .docx file from disk into memory for editing and management within the DOCX MCP Server, returning a file ID for subsequent operations.

Instructions

Open a .docx file from disk into memory and return id.

Input Schema

TableJSON Schema

Name	Required	Description	Default
`id`	No
`path`	Yes

Implementation Reference

src/index.ts:210-216 (handler)

Main handler for 'docx-openFile' tool: validates args, parses DOCX file to JSON using helper, generates ID if needed, registers in DocRegistry, returns ID.

case "docx-openFile": {
  const { id, path: filePath } = parseArgs<{ id?: string; path: string }>(args, tools["docx-openFile"].inputSchema);
  const json = await parseDocxFileToJson(filePath);
  const docId = id ?? nanoid();
  registry.open(docId, json);
  return ok({ id: docId });
}

src/index.ts:77-80 (schema)

Input schema definition for 'docx-openFile' tool, requiring 'path' and optional 'id'.

"docx-openFile": {
  description: "Open a .docx file from disk into memory and return id.",
  inputSchema: { type: "object", required: ["path"], properties: { id: { type: "string" }, path: { type: "string" } } }
},

src/index.ts:101-103 (registration)

Registers the listTools endpoint which exposes 'docx-openFile' via the tools object.

server.setRequestHandler(ListToolsRequestSchema, async () => ({
  tools: Object.entries(tools).map(([name, t]) => ({ name, description: t.description, inputSchema: t.inputSchema as any }))
}));

src/parser.ts:18-115 (helper)

Core helper: Parses DOCX ZIP buffer to structured JSON (metadata + content blocks like paragraphs, tables). Called indirectly by handler.

export async function parseDocxBufferToJson(buf: Uint8Array): Promise<DocxJSON> {
  const zip = await JSZip.loadAsync(buf as any);

  // Parse core properties
  const coreXml = await zip.file("docProps/core.xml")?.async("string");
  const appXml = await zip.file("docProps/app.xml")?.async("string");
  const meta: DocxJSON["meta"] = {};
  if (coreXml) {
    const core = await parseStringPromise(coreXml);
    const c = core["cp:coreProperties"] || {};
    meta.title = textOf(c["dc:title"]?.[0]);
    meta.subject = textOf(c["dc:subject"]?.[0]);
    meta.creator = textOf(c["dc:creator"]?.[0]);
    meta.description = textOf(c["dc:description"]?.[0]);
    meta.keywords = textOf(c["cp:keywords"]?.[0]);
    meta.lastModifiedBy = textOf(c["cp:lastModifiedBy"]?.[0]);
    meta.category = textOf(c["cp:category"]?.[0]);
    const created = textOf(c["dcterms:created"]?.[0]);
    const modified = textOf(c["dcterms:modified"]?.[0]);
    if (created) meta.createdAt = created;
    if (modified) meta.modifiedAt = modified;
  }
  if (appXml) {
    // company/manager sometimes in app.xml (not always)
    const app = await parseStringPromise(appXml);
    const a = app.Properties || {};
    meta.company = textOf(a.Company?.[0]);
    meta.manager = textOf(a.Manager?.[0]);
  }

  // Parse document.xml to extract paragraphs/tables at a basic level
  const docXml = await zip.file("word/document.xml")?.async("string");
  const content: any[] = [];
  if (docXml) {
    const doc = await parseStringPromise(docXml);
    const body = doc["w:document"]?.["w:body"]?.[0];
    const children: any[] = body ? Object.values(body).flat() as any[] : [];
    // xml2js gives arrays keyed by tags; iterate in original order via a custom approach
    // Fallback: manually scan body._children is not available, so we reconstruct by looking at known sequences
    const seq = [] as any[];
    for (const key of Object.keys(body || {})) {
      const arr = (body as any)[key];
      if (Array.isArray(arr)) {
        for (const item of arr) seq.push({ tag: key, node: item });
      }
    }
    for (const item of seq) {
      if (item.tag === "w:p") {
        const p = item.node;
        const pPr = p["w:pPr"]?.[0];
        let headingLevel: number | undefined;
        const styleVal = pPr?.["w:pStyle"]?.[0]?.["$"]?.["w:val"];
        if (typeof styleVal === "string") {
          const m = /Heading([1-6])/.exec(styleVal);
          if (m) headingLevel = parseInt(m[1], 10);
        }
        const runs = [] as any[];
        for (const r of p["w:r"] || []) {
          const t = textOf(r["w:t"]?.[0]);
          if (t) {
            const rPr = r["w:rPr"]?.[0] || {};
            runs.push({
              type: "text",
              text: t,
              bold: rPr["w:b"] ? true : undefined,
              italics: rPr["w:i"] ? true : undefined,
              underline: rPr["w:u"] ? true : undefined,
            });
          }
        }
        content.push(headingLevel ? { type: "heading", level: headingLevel, children: runs } : { type: "paragraph", children: runs });
      } else if (item.tag === "w:tbl") {
        const tbl = item.node;
        const rows = [] as any[];
        for (const tr of tbl["w:tr"] || []) {
          const cells = [] as any[];
          for (const tc of tr["w:tc"] || []) {
            const paras = [] as any[];
            for (const p of tc["w:p"] || []) {
              const runs = [] as any[];
              for (const r of p["w:r"] || []) {
                const t = textOf(r["w:t"]?.[0]);
                if (t) runs.push({ type: "text", text: t });
              }
              paras.push({ type: "paragraph", children: runs });
            }
            cells.push({ children: paras });
          }
          rows.push({ cells });
        }
        content.push({ type: "table", rows });
      }
    }
  }

  const json: DocxJSON = { meta, content };
  return json;
}

src/docx-utils.ts:114-117 (helper)

DocRegistry.open: Registers the parsed JSON as a managed document by calling create.

open(id: DocId, json: DocxJSON): ManagedDoc {
  // open means register from existing JSON (e.g., load from disk by caller)
  return this.create(id, json);
}

DOCX MCP Server

docx-openFile

Instructions

Input Schema

Implementation Reference

Other Tools

Latest Blog Posts

MCP directory API