#!/usr/bin/env python3
"""
Test script for Word and Excel document processing functionality
"""
import asyncio
import sys
from pathlib import Path
# Add project to path
sys.path.append('/home/arne/src/nanonets_mcp')
async def test_document_support():
"""Test document processing capabilities"""
try:
from nanonets_mcp.server import mcp, DOCX_SUPPORT, EXCEL_SUPPORT
print("๐งช Testing Office Document Processing")
print("=" * 50)
# Test 1: Check support availability
print(f"Word Document Support: {DOCX_SUPPORT}")
print(f"Excel Document Support: {EXCEL_SUPPORT}")
if not (DOCX_SUPPORT or EXCEL_SUPPORT):
print("โ No office document dependencies installed")
print("Install with: pip install python-docx openpyxl pandas")
return False
# Test 2: List available tools
tools = await mcp.list_tools()
tool_names = [tool.name for tool in tools]
print(f"Available tools: {tool_names}")
word_tool_available = 'process_word_to_markdown' in tool_names
excel_tool_available = 'process_excel_to_markdown' in tool_names
print(f"Word processing tool available: {word_tool_available}")
print(f"Excel processing tool available: {excel_tool_available}")
# Test 3: Check supported formats
formats_result = await mcp.call_tool('get_supported_formats', {})
# Handle MCP result format
if hasattr(formats_result, '__iter__') and len(formats_result) >= 1:
formats = formats_result[0]
if hasattr(formats, 'text'):
# If it's wrapped in a TextContent object
import json
try:
formats = json.loads(formats.text)
except:
formats = {"supported_formats": ["Unable to parse"]}
elif hasattr(formats, '__dict__'):
formats = formats.__dict__
else:
formats = formats_result
supported_formats = formats.get('supported_formats', []) if isinstance(formats, dict) else ["Unknown"]
print(f"Supported formats: {supported_formats}")
print("\\nโ
Office document processing functionality is configured!")
return True
except Exception as e:
print(f"โ Error testing document functionality: {e}")
import traceback
traceback.print_exc()
return False
async def create_sample_documents():
"""Create sample Word and Excel documents for testing"""
print("\\n๐ Creating sample documents for testing...")
# Create sample Word document
if DOCX_SUPPORT:
try:
from docx import Document
doc = Document()
doc.add_heading('Sample Word Document', 0)
doc.add_paragraph('This is a sample paragraph with some text.')
doc.add_heading('Section 1', level=1)
doc.add_paragraph('This is content under section 1.')
# Add a table
table = doc.add_table(rows=3, cols=3)
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'Name'
hdr_cells[1].text = 'Age'
hdr_cells[2].text = 'City'
row_cells = table.rows[1].cells
row_cells[0].text = 'John Doe'
row_cells[1].text = '30'
row_cells[2].text = 'New York'
row_cells = table.rows[2].cells
row_cells[0].text = 'Jane Smith'
row_cells[1].text = '25'
row_cells[2].text = 'Los Angeles'
doc.save('tests/sample_docs/sample.docx')
print("โ
Created sample Word document: tests/sample_docs/sample.docx")
except Exception as e:
print(f"โ Error creating Word document: {e}")
# Create sample Excel document
if EXCEL_SUPPORT:
try:
import openpyxl
wb = openpyxl.Workbook()
# First sheet
ws1 = wb.active
ws1.title = "Employee Data"
ws1['A1'] = 'Name'
ws1['B1'] = 'Department'
ws1['C1'] = 'Salary'
ws1['A2'] = 'Alice Johnson'
ws1['B2'] = 'Engineering'
ws1['C2'] = 75000
ws1['A3'] = 'Bob Wilson'
ws1['B3'] = 'Marketing'
ws1['C3'] = 65000
# Second sheet
ws2 = wb.create_sheet("Financial Data")
ws2['A1'] = 'Quarter'
ws2['B1'] = 'Revenue'
ws2['C1'] = 'Expenses'
ws2['A2'] = 'Q1'
ws2['B2'] = 150000
ws2['C2'] = 120000
ws2['A3'] = 'Q2'
ws2['B3'] = 180000
ws2['C3'] = 140000
wb.save('tests/sample_docs/sample.xlsx')
print("โ
Created sample Excel document: tests/sample_docs/sample.xlsx")
except Exception as e:
print(f"โ Error creating Excel document: {e}")
async def test_with_sample_documents():
"""Test processing sample documents"""
print("\\n๐ค Testing document processing with sample files...")
try:
from nanonets_mcp.server import mcp
# Test Word document
word_file = Path('tests/sample_docs/sample.docx')
if word_file.exists() and DOCX_SUPPORT:
print("\\n๐ Testing Word document processing...")
result = await mcp.call_tool('process_word_to_markdown', {
'docx_data': str(word_file)
})
# Extract text content
if hasattr(result, '__iter__') and len(result) >= 1:
content = result[0]
if hasattr(content, 'text'):
text = content.text
else:
text = str(content)
else:
text = str(result)
print("๐ Word Document Result (first 300 chars):")
print("=" * 50)
print(text[:300] + "..." if len(text) > 300 else text)
print("=" * 50)
# Test Excel document
excel_file = Path('tests/sample_docs/sample.xlsx')
if excel_file.exists() and EXCEL_SUPPORT:
print("\\n๐ Testing Excel document processing...")
result = await mcp.call_tool('process_excel_to_markdown', {
'excel_data': str(excel_file)
})
# Extract text content
if hasattr(result, '__iter__') and len(result) >= 1:
content = result[0]
if hasattr(content, 'text'):
text = content.text
else:
text = str(content)
else:
text = str(result)
print("๐ Excel Document Result (first 500 chars):")
print("=" * 50)
print(text[:500] + "..." if len(text) > 500 else text)
print("=" * 50)
return True
except Exception as e:
print(f"โ Error testing with sample documents: {e}")
import traceback
traceback.print_exc()
return False
async def main():
"""Main test function"""
print("๐งช Nanonets MCP Server Office Document Test Suite")
print("=" * 60)
# Import support flags globally
global DOCX_SUPPORT, EXCEL_SUPPORT
try:
from nanonets_mcp.server import DOCX_SUPPORT, EXCEL_SUPPORT
except ImportError:
print("โ Cannot import server module")
return
# Test 1: Basic document support
basic_ok = await test_document_support()
# Test 2: Create sample documents
if basic_ok:
await create_sample_documents()
# Test 3: Process sample documents
sample_ok = await test_with_sample_documents() if basic_ok else True
print("\\n๐ Test Results:")
print("=" * 50)
print(f"Document Support: {'โ
PASS' if basic_ok else 'โ FAIL'}")
print(f"Sample Document Test: {'โ
PASS' if sample_ok else 'โ FAIL'}")
if basic_ok and sample_ok:
print("\\n๐ ALL TESTS PASSED! Office document processing is ready!")
else:
print("\\nโ ๏ธ Some tests failed. Check the logs above.")
if __name__ == "__main__":
asyncio.run(main())