read_file
Read CSV or Excel files with chunking support for large datasets. Specify sheet names, row offsets, and limits to extract data efficiently.
Instructions
Read an entire CSV or Excel file with optional chunking for large files
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| filePath | Yes | Path to the CSV or Excel file | |
| sheet | No | Sheet name for Excel files (optional, defaults to first sheet) | |
| offset | No | Starting row index for chunked reading (0-based, optional) | |
| limit | No | Maximum number of rows to return (optional, enables chunking) |
Implementation Reference
- src/handlers/data-operations.ts:5-98 (handler)The primary handler implementation for the 'read_file' MCP tool. Handles file reading, chunking with offset/limit, metadata generation, warnings, and large file suggestions.async readFile(args: ToolArgs): Promise<ToolResponse> { try { if (!args.filePath) { return { content: [ { type: 'text', text: JSON.stringify({ success: false, error: 'Missing required parameter: filePath', }, null, 2), }, ], }; } const { filePath, sheet, offset, limit } = args; const result = await readFileContentWithWarnings(filePath, sheet); // Store original data info for metadata const totalRows = result.data.length; const totalColumns = result.data[0]?.length || 0; const headers = totalRows > 0 ? result.data[0] : []; // Apply chunking if offset or limit are specified let chunkedData = result.data; let chunkMetadata = null; if (offset !== undefined || limit !== undefined) { const requestedOffset = offset || 0; const requestedLimit = limit || (totalRows - requestedOffset); // Validate chunk boundaries const { validOffset, validLimit } = validateChunkBoundaries(result.data, requestedOffset, requestedLimit); const endRow = validOffset + validLimit; chunkedData = result.data.slice(validOffset, endRow); chunkMetadata = { offset: validOffset, limit: validLimit, totalRows, returnedRows: chunkedData.length, hasMore: endRow < totalRows, nextOffset: endRow < totalRows ? endRow : null, note: validOffset > 0 && totalRows > 0 ? "This chunk doesn't include headers. Consider including row 0 for headers." : undefined, }; } const response: any = { success: true, data: chunkedData, rowCount: chunkedData.length, columnCount: totalColumns, headers: headers, }; // Add chunk metadata if chunking was used if (chunkMetadata) { response.chunkInfo = chunkMetadata; } // Include warnings if they exist if (result.warnings) { response.warnings = result.warnings; } // Add suggestion for large files if (!chunkMetadata && totalRows > 10000) { response.suggestion = `Large file detected (${totalRows} rows). Consider using offset/limit parameters for chunked reading to avoid token limits.`; } return { content: [ { type: 'text', text: JSON.stringify(response, null, 2), }, ], }; } catch (error) { return { content: [ { type: 'text', text: JSON.stringify({ success: false, error: error instanceof Error ? error.message : 'Unknown error occurred', }, null, 2), }, ], }; } }
- src/index.ts:1199-1200 (registration)MCP server registration mapping the tool name 'read_file' to the DataOperationsHandler.readFile method in the CallToolRequestSchema handler.case 'read_file': return await this.dataOpsHandler.readFile(toolArgs);
- src/index.ts:72-96 (schema)Tool schema definition including name, description, and input schema validation for the 'read_file' tool, provided in ListToolsRequestSchema response.name: 'read_file', description: 'Read an entire CSV or Excel file with optional chunking for large files', inputSchema: { type: 'object', properties: { filePath: { type: 'string', description: 'Path to the CSV or Excel file', }, sheet: { type: 'string', description: 'Sheet name for Excel files (optional, defaults to first sheet)', }, offset: { type: 'number', description: 'Starting row index for chunked reading (0-based, optional)', }, limit: { type: 'number', description: 'Maximum number of rows to return (optional, enables chunking)', }, }, required: ['filePath'], }, },
- src/utils/file-utils.ts:32-112 (helper)Core helper function that performs the actual file reading for CSV and Excel files, generates warnings for data quality issues, and returns parsed data. Called by the read_file handler.export async function readFileContentWithWarnings(filePath: string, sheet?: string): Promise<FileReadResult> { const ext = path.extname(filePath).toLowerCase(); const absolutePath = path.resolve(filePath); const warnings: string[] = []; try { await fs.access(absolutePath); } catch { throw new Error(`File not found: ${filePath}`); } if (ext === '.csv') { const content = await fs.readFile(absolutePath, 'utf-8'); // Check for binary content that might cause issues if (content.includes('\u0000') || content.includes('\uFFFD')) { throw new Error('File appears to contain binary data and cannot be read as CSV'); } try { const parsed = csv.parse(content, { skip_empty_lines: true, relax_quotes: true, relax_column_count: true, }); // Additional validation - ensure we have some valid data if (parsed.length === 0) { throw new Error('empty file: No valid CSV data found in file'); } // Check for malformed CSV issues and add warnings if (parsed.length > 1) { const expectedColumns = parsed[0].length; let inconsistentRows = 0; for (let i = 1; i < parsed.length; i++) { if (parsed[i].length !== expectedColumns) { inconsistentRows++; } } if (inconsistentRows > 0) { warnings.push(`CSV file has ${inconsistentRows} rows with inconsistent column count`); } } // Check for potential data quality issues const emptyRows = parsed.filter((row: any[]) => row.every((cell: any) => cell === '')).length; if (emptyRows > 0) { warnings.push(`Found ${emptyRows} completely empty rows`); } return { data: parsed, warnings: warnings.length > 0 ? warnings : undefined }; } catch (parseError) { throw new Error(`Failed to parse CSV file: ${parseError instanceof Error ? parseError.message : 'Unknown parsing error'}`); } } else if (ext === '.xlsx' || ext === '.xls') { const workbook = new ExcelJS.Workbook(); await workbook.xlsx.readFile(absolutePath); const sheetName = sheet || workbook.worksheets[0]?.name; const worksheet = workbook.getWorksheet(sheetName); if (!worksheet) { throw new Error(`Sheet "${sheetName}" not found in workbook`); } const data: any[][] = []; worksheet.eachRow((row, rowNumber) => { const rowData: any[] = []; row.eachCell((cell, colNumber) => { rowData[colNumber - 1] = cell.value || ''; }); data.push(rowData); }); return { data, warnings: warnings.length > 0 ? warnings : undefined }; } else { throw new Error('Unsupported file format. Please use .csv, .xlsx, or .xls files.'); } }