Skip to main content
Glama

Google Drive MCP Server

by ducla5
google-docs-processor.ts12.9 kB
/** * Google Docs content processor * Handles Google Docs API integration and content conversion to markdown */ import { google, docs_v1 } from 'googleapis'; import { OAuth2Client } from 'google-auth-library'; import { ProcessedContent, ContentMetadata, DocumentStructure, ContentChunk, Heading, ImageInfo, TableInfo } from '../types/content.js'; export interface GoogleDocsProcessorOptions { chunkSize?: number; preserveFormatting?: boolean; includeImages?: boolean; includeTables?: boolean; } export class GoogleDocsProcessor { private docsApi: docs_v1.Docs; constructor(auth: OAuth2Client) { this.docsApi = google.docs({ version: 'v1', auth }); } /** * Process Google Docs file and convert to markdown */ async processGoogleDoc( fileId: string, options: GoogleDocsProcessorOptions = {} ): Promise<ProcessedContent> { try { // Get document content from Google Docs API const response = await this.docsApi.documents.get({ documentId: fileId }); const document = response.data; if (!document || !document.body) { throw new Error('Failed to retrieve document content'); } // Convert document to markdown const conversion = this.convertToMarkdown(document, options); // Create chunks const chunks = this.createChunks(conversion.markdown, options.chunkSize || 4000); // Create metadata const metadata: ContentMetadata = { wordCount: this.countWords(conversion.markdown), language: this.detectLanguage(conversion.markdown), headings: conversion.headings, images: conversion.images, tables: conversion.tables, lastProcessed: new Date() }; // Create document structure const structure = this.analyzeStructure(conversion.markdown, conversion.headings); return { markdown: conversion.markdown, metadata, structure, chunks }; } catch (error) { throw new Error(`Failed to process Google Doc: ${error instanceof Error ? error.message : 'Unknown error'}`); } } /** * Convert Google Docs document to markdown */ private convertToMarkdown( document: docs_v1.Schema$Document, options: GoogleDocsProcessorOptions ): { markdown: string; headings: Heading[]; images: ImageInfo[]; tables: TableInfo[]; } { const headings: Heading[] = []; const images: ImageInfo[] = []; const tables: TableInfo[] = []; let markdown = ''; let position = 0; if (!document.body?.content) { return { markdown: '', headings, images, tables }; } // Process document title if (document.title) { markdown += `# ${document.title}\n\n`; headings.push({ level: 1, text: document.title, position: 0 }); position = markdown.length; } // Process body content for (const element of document.body.content) { const result = this.processElement(element, options, position); markdown += result.content; headings.push(...result.headings); images.push(...result.images); tables.push(...result.tables); position = markdown.length; } return { markdown, headings, images, tables }; } /** * Process individual document element */ private processElement( element: docs_v1.Schema$StructuralElement, options: GoogleDocsProcessorOptions, position: number ): { content: string; headings: Heading[]; images: ImageInfo[]; tables: TableInfo[]; } { const headings: Heading[] = []; const images: ImageInfo[] = []; const tables: TableInfo[] = []; let content = ''; // Process paragraph if (element.paragraph) { const paragraphResult = this.processParagraph(element.paragraph, options, position); content += paragraphResult.content; headings.push(...paragraphResult.headings); images.push(...paragraphResult.images); } // Process table if (element.table && options.includeTables !== false) { const tableResult = this.processTable(element.table, position); content += tableResult.content; tables.push(...tableResult.tables); } // Process section break if (element.sectionBreak) { content += '\n---\n\n'; } return { content, headings, images, tables }; } /** * Process paragraph element */ private processParagraph( paragraph: docs_v1.Schema$Paragraph, options: GoogleDocsProcessorOptions, position: number ): { content: string; headings: Heading[]; images: ImageInfo[]; } { const headings: Heading[] = []; const images: ImageInfo[] = []; let content = ''; if (!paragraph.elements) { return { content: '\n', headings, images }; } // Check if this is a heading const style = paragraph.paragraphStyle; const headingLevel = this.getHeadingLevel(style); let paragraphText = ''; // Process paragraph elements for (const element of paragraph.elements) { if (element.textRun) { const textContent = this.processTextRun(element.textRun); paragraphText += textContent; } else if (element.inlineObjectElement && options.includeImages !== false) { const imageResult = this.processInlineObject(element.inlineObjectElement, position); paragraphText += imageResult.content; images.push(...imageResult.images); } } // Format as heading or regular paragraph if (headingLevel > 0 && paragraphText.trim()) { content = '#'.repeat(headingLevel) + ' ' + paragraphText.trim() + '\n\n'; headings.push({ level: headingLevel, text: paragraphText.trim(), position: position }); } else if (paragraphText.trim()) { // Check for list formatting const bullet = paragraph.bullet; if (bullet) { const listLevel = (bullet.nestingLevel || 0) + 1; const indent = ' '.repeat(Math.max(0, listLevel - 1)); const marker = this.getListMarker(bullet, listLevel); content = `${indent}${marker} ${paragraphText.trim()}\n`; } else { content = paragraphText.trim() + '\n\n'; } } else { content = '\n'; } return { content, headings, images }; } /** * Process text run with formatting */ private processTextRun(textRun: docs_v1.Schema$TextRun): string { let text = textRun.content || ''; const style = textRun.textStyle; if (!style) { return text; } // Apply formatting if (style.bold) { text = `**${text}**`; } if (style.italic) { text = `*${text}*`; } if (style.underline) { text = `<u>${text}</u>`; } if (style.strikethrough) { text = `~~${text}~~`; } if (style.link?.url) { text = `[${text}](${style.link.url})`; } return text; } /** * Process inline object (images, etc.) */ private processInlineObject( _inlineObject: docs_v1.Schema$InlineObjectElement, position: number ): { content: string; images: ImageInfo[]; } { const images: ImageInfo[] = []; // For now, just add a placeholder for images // In a full implementation, you'd extract image data const imageInfo: ImageInfo = { alt: 'Image', position: position }; images.push(imageInfo); return { content: '![Image](image-placeholder)\n\n', images }; } /** * Process table element */ private processTable( table: docs_v1.Schema$Table, position: number ): { content: string; tables: TableInfo[]; } { const tables: TableInfo[] = []; let content = ''; if (!table.tableRows) { return { content: '', tables }; } const rows = table.tableRows.length; const columns = table.tableRows[0]?.tableCells?.length || 0; // Create markdown table content += '\n'; // Process each row table.tableRows.forEach((row, rowIndex) => { if (!row.tableCells) return; content += '|'; row.tableCells.forEach(cell => { const cellContent = this.extractCellContent(cell); content += ` ${cellContent} |`; }); content += '\n'; // Add header separator after first row if (rowIndex === 0) { content += '|'; for (let i = 0; i < columns; i++) { content += ' --- |'; } content += '\n'; } }); content += '\n'; tables.push({ rows, columns, position }); return { content, tables }; } /** * Extract content from table cell */ private extractCellContent(cell: docs_v1.Schema$TableCell): string { if (!cell.content) return ''; let content = ''; for (const element of cell.content) { if (element.paragraph?.elements) { for (const paragraphElement of element.paragraph.elements) { if (paragraphElement.textRun) { content += paragraphElement.textRun.content || ''; } } } } return content.trim().replace(/\n/g, ' '); } /** * Get heading level from paragraph style */ private getHeadingLevel(style?: docs_v1.Schema$ParagraphStyle): number { if (!style?.namedStyleType) return 0; const styleType = style.namedStyleType; if (styleType === 'HEADING_1') return 1; if (styleType === 'HEADING_2') return 2; if (styleType === 'HEADING_3') return 3; if (styleType === 'HEADING_4') return 4; if (styleType === 'HEADING_5') return 5; if (styleType === 'HEADING_6') return 6; return 0; } /** * Get list marker for bullet points */ private getListMarker(_bullet: docs_v1.Schema$Bullet, _level: number): string { // Simple implementation - could be enhanced to handle different bullet styles return '-'; } /** * Create content chunks */ private createChunks(content: string, chunkSize: number): ContentChunk[] { const chunks: ContentChunk[] = []; const paragraphs = content.split('\n\n').filter(p => p.trim()); let currentChunk = ''; let chunkIndex = 0; let startPosition = 0; for (const paragraph of paragraphs) { const testChunk = currentChunk + (currentChunk ? '\n\n' : '') + paragraph; if (testChunk.length > chunkSize && currentChunk.length > 0) { // Create chunk chunks.push({ id: `chunk_${chunkIndex}`, content: currentChunk.trim(), metadata: { wordCount: this.countWords(currentChunk), position: { start: startPosition, end: startPosition + currentChunk.length } } }); // Start new chunk currentChunk = paragraph; startPosition = content.indexOf(currentChunk, startPosition + currentChunk.length); chunkIndex++; } else { currentChunk = testChunk; } } // Add final chunk if (currentChunk.trim()) { chunks.push({ id: `chunk_${chunkIndex}`, content: currentChunk.trim(), metadata: { wordCount: this.countWords(currentChunk), position: { start: startPosition, end: startPosition + currentChunk.length } } }); } return chunks; } /** * Analyze document structure */ private analyzeStructure(content: string, headings: Heading[]): DocumentStructure { const sections = headings.map((heading, index) => { const nextHeading = headings[index + 1]; const endPosition = nextHeading ? nextHeading.position : content.length; return { title: heading.text, level: heading.level, startPosition: heading.position, endPosition, content: content.slice(heading.position, endPosition).trim() }; }); const toc = headings.map(heading => ({ title: heading.text, level: heading.level, position: heading.position })); return { sections, toc, pageBreaks: [] // Google Docs doesn't have traditional page breaks }; } /** * Count words in text */ private countWords(text: string): number { return text.trim().split(/\s+/).filter(word => word.length > 0).length; } /** * Simple language detection */ private detectLanguage(text: string): string { // Basic English detection - could be enhanced const sample = text.slice(0, 1000).toLowerCase(); const englishWords = ['the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by']; const englishCount = englishWords.reduce((count, word) => { return count + (sample.match(new RegExp(`\\b${word}\\b`, 'g')) || []).length; }, 0); return englishCount > 5 ? 'en' : 'unknown'; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ducla5/gdriver-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server