content-processing-examples.js•11.7 kB
#!/usr/bin/env node
/**
* Google Drive MCP Server - Content Processing Examples
*
* This file demonstrates various ways to process and extract content
* from different file types using the MCP server tools.
*/
import { GoogleDriveMCPClient } from '../mcp-clients/generic-client.js';
async function contentProcessingExamples() {
const client = new GoogleDriveMCPClient('/path/to/google-drive-mcp-server/dist/index.js', {
GOOGLE_CLIENT_ID: 'your-client-id.googleusercontent.com',
GOOGLE_CLIENT_SECRET: 'your-client-secret'
});
try {
await client.connect();
console.log('=== Content Processing Examples ===\n');
// First, find some files to work with
const searchResults = await client.searchFiles('*', {
fileType: [
'application/pdf',
'application/vnd.google-apps.document',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
],
limit: 5
});
if (!searchResults.files || searchResults.files.length === 0) {
console.log('No suitable files found for content processing examples');
return;
}
const testFile = searchResults.files[0];
console.log(`Using test file: ${testFile.name} (${testFile.mimeType})\n`);
// Example 1: Get complete file content
console.log('1. Getting complete file content');
try {
const fullContent = await client.getFile(testFile.id, true);
console.log(`File size: ${formatFileSize(fullContent.size)}`);
console.log(`Content length: ${fullContent.content?.length || 0} characters`);
console.log(`Content preview: ${fullContent.content?.substring(0, 200) || 'No content'}...`);
} catch (error) {
console.log(`Error getting full content: ${error.message}`);
}
console.log();
// Example 2: Get content by character range
console.log('2. Getting content by character range (first 1000 characters)');
try {
const charChunk = await client.getContentChunk(testFile.id, {
startChar: 0,
endChar: 1000
});
console.log(`Chunk length: ${charChunk.content?.length || 0} characters`);
console.log(`Content: ${charChunk.content?.substring(0, 300) || 'No content'}...`);
} catch (error) {
console.log(`Error getting character chunk: ${error.message}`);
}
console.log();
// Example 3: Get content by page range (for PDFs and documents)
console.log('3. Getting content by page range (pages 1-2)');
try {
const pageChunk = await client.getContentChunk(testFile.id, {
startPage: 1,
endPage: 2
});
console.log(`Page chunk length: ${pageChunk.content?.length || 0} characters`);
console.log(`Content preview: ${pageChunk.content?.substring(0, 300) || 'No content'}...`);
} catch (error) {
console.log(`Error getting page chunk: ${error.message}`);
}
console.log();
// Example 4: Search for specific content within the file
console.log('4. Searching for specific content within the file');
const searchTerms = ['summary', 'introduction', 'conclusion', 'overview', 'abstract'];
for (const term of searchTerms) {
try {
const searchResult = await client.getContentChunk(testFile.id, {
searchQuery: term,
contextLines: 2
});
if (searchResult.matches && searchResult.matches.length > 0) {
console.log(`Found "${term}": ${searchResult.matches.length} matches`);
console.log(`First match: ${searchResult.matches[0].content?.substring(0, 200) || 'No content'}...`);
break;
}
} catch (error) {
console.log(`Error searching for "${term}": ${error.message}`);
}
}
console.log();
// Example 5: Process different file types
console.log('5. Processing different file types');
for (const file of searchResults.files.slice(0, 3)) {
console.log(`\nProcessing: ${file.name}`);
console.log(`Type: ${file.mimeType}`);
try {
const metadata = await client.getFileMetadata(file.id);
console.log(`Size: ${formatFileSize(metadata.size)}`);
console.log(`Pages: ${metadata.pageCount || 'Unknown'}`);
// Get a small sample of content
const sample = await client.getContentChunk(file.id, {
startChar: 0,
endChar: 500
});
console.log(`Content sample: ${sample.content?.substring(0, 150) || 'No content'}...`);
// Analyze content structure
if (sample.content) {
const lines = sample.content.split('\n').filter(line => line.trim());
const words = sample.content.split(/\s+/).filter(word => word.trim());
console.log(`Structure: ${lines.length} lines, ~${words.length} words in sample`);
}
} catch (error) {
console.log(`Error processing ${file.name}: ${error.message}`);
}
}
console.log();
// Example 6: Extract structured content (headers, lists, etc.)
console.log('6. Extracting structured content');
try {
const structuredContent = await client.getFile(testFile.id, true);
if (structuredContent.content) {
const content = structuredContent.content;
// Find headers (markdown style)
const headers = content.match(/^#+\s+.+$/gm) || [];
console.log(`Found ${headers.length} headers:`);
headers.slice(0, 5).forEach((header, index) => {
console.log(` ${index + 1}. ${header.trim()}`);
});
// Find lists
const listItems = content.match(/^[\s]*[-*+]\s+.+$/gm) || [];
console.log(`Found ${listItems.length} list items`);
// Find numbered lists
const numberedItems = content.match(/^[\s]*\d+\.\s+.+$/gm) || [];
console.log(`Found ${numberedItems.length} numbered list items`);
// Find tables (basic markdown table detection)
const tables = content.match(/\|.+\|/g) || [];
console.log(`Found ${tables.length} potential table rows`);
}
} catch (error) {
console.log(`Error extracting structured content: ${error.message}`);
}
console.log();
// Example 7: Content summarization approach
console.log('7. Content summarization approach');
try {
// Get the first few chunks of content
const chunks = [];
const chunkSize = 2000;
for (let i = 0; i < 3; i++) {
const chunk = await client.getContentChunk(testFile.id, {
startChar: i * chunkSize,
endChar: (i + 1) * chunkSize
});
if (chunk.content && chunk.content.trim()) {
chunks.push(chunk.content);
} else {
break;
}
}
console.log(`Extracted ${chunks.length} chunks for summarization`);
// Simple content analysis
const allContent = chunks.join(' ');
const sentences = allContent.split(/[.!?]+/).filter(s => s.trim().length > 10);
const words = allContent.split(/\s+/).filter(w => w.trim());
console.log(`Content analysis:`);
console.log(` - ${sentences.length} sentences`);
console.log(` - ${words.length} words`);
console.log(` - Average sentence length: ${Math.round(words.length / sentences.length)} words`);
// Extract key sentences (first and last of each chunk)
const keySentences = chunks.map(chunk => {
const chunkSentences = chunk.split(/[.!?]+/).filter(s => s.trim().length > 10);
return chunkSentences.length > 0 ? chunkSentences[0].trim() : '';
}).filter(s => s);
console.log(`Key sentences:`);
keySentences.forEach((sentence, index) => {
console.log(` ${index + 1}. ${sentence.substring(0, 100)}...`);
});
} catch (error) {
console.log(`Error in content summarization: ${error.message}`);
}
} catch (error) {
console.error('Content processing examples error:', error.message);
} finally {
await client.disconnect();
}
}
async function batchContentProcessing() {
const client = new GoogleDriveMCPClient('/path/to/google-drive-mcp-server/dist/index.js', {
GOOGLE_CLIENT_ID: 'your-client-id.googleusercontent.com',
GOOGLE_CLIENT_SECRET: 'your-client-secret'
});
try {
await client.connect();
console.log('\n=== Batch Content Processing ===\n');
// Find multiple files for batch processing
const searchResults = await client.searchFiles('*', {
fileType: [
'application/pdf',
'application/vnd.google-apps.document'
],
limit: 10
});
if (!searchResults.files || searchResults.files.length === 0) {
console.log('No files found for batch processing');
return;
}
console.log(`Processing ${searchResults.files.length} files in batch...\n`);
const results = [];
for (const [index, file] of searchResults.files.entries()) {
console.log(`Processing ${index + 1}/${searchResults.files.length}: ${file.name}`);
try {
// Get metadata
const metadata = await client.getFileMetadata(file.id);
// Skip very large files
if (metadata.size > 50 * 1024 * 1024) { // 50MB
console.log(` Skipping (too large: ${formatFileSize(metadata.size)})`);
continue;
}
// Get content summary (first 1000 characters)
const contentChunk = await client.getContentChunk(file.id, {
startChar: 0,
endChar: 1000
});
// Basic content analysis
const content = contentChunk.content || '';
const wordCount = content.split(/\s+/).filter(w => w.trim()).length;
const lineCount = content.split('\n').filter(l => l.trim()).length;
results.push({
name: file.name,
id: file.id,
type: file.mimeType,
size: metadata.size,
wordCount: wordCount,
lineCount: lineCount,
preview: content.substring(0, 200) + (content.length > 200 ? '...' : ''),
modifiedTime: file.modifiedTime
});
console.log(` ✓ Processed (${wordCount} words, ${formatFileSize(metadata.size)})`);
// Add small delay to avoid rate limiting
await new Promise(resolve => setTimeout(resolve, 100));
} catch (error) {
console.log(` ✗ Error: ${error.message}`);
}
}
// Display batch results
console.log(`\n=== Batch Processing Results ===`);
console.log(`Successfully processed: ${results.length} files`);
// Sort by word count
results.sort((a, b) => b.wordCount - a.wordCount);
console.log(`\nTop files by content length:`);
results.slice(0, 5).forEach((result, index) => {
console.log(`${index + 1}. ${result.name}`);
console.log(` Words: ${result.wordCount}, Size: ${formatFileSize(result.size)}`);
console.log(` Preview: ${result.preview}`);
console.log();
});
} catch (error) {
console.error('Batch content processing error:', error.message);
} finally {
await client.disconnect();
}
}
function formatFileSize(bytes) {
if (!bytes) return 'Unknown';
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(1024));
return Math.round(bytes / Math.pow(1024, i) * 100) / 100 + ' ' + sizes[i];
}
// Run examples
if (import.meta.url === `file://${process.argv[1]}`) {
(async () => {
await contentProcessingExamples();
await batchContentProcessing();
})().catch(console.error);
}