read_file

Read CSV or Excel files with chunking support for large datasets. Specify sheet names, row offsets, and limits to extract data efficiently.

Instructions

Read an entire CSV or Excel file with optional chunking for large files

Input Schema

TableJSON Schema

Name	Required	Description
`filePath`	Yes	Path to the CSV or Excel file
`sheet`	No	Sheet name for Excel files (optional, defaults to first sheet)
`offset`	No	Starting row index for chunked reading (0-based, optional)
`limit`	No	Maximum number of rows to return (optional, enables chunking)

Implementation Reference

src/handlers/data-operations.ts:5-98 (handler)

The primary handler implementation for the 'read_file' MCP tool. Handles file reading, chunking with offset/limit, metadata generation, warnings, and large file suggestions.

async readFile(args: ToolArgs): Promise<ToolResponse> {
  try {
    if (!args.filePath) {
      return {
        content: [
          {
            type: 'text',
            text: JSON.stringify({
              success: false,
              error: 'Missing required parameter: filePath',
            }, null, 2),
          },
        ],
      };
    }

    const { filePath, sheet, offset, limit } = args;
    const result = await readFileContentWithWarnings(filePath, sheet);

    // Store original data info for metadata
    const totalRows = result.data.length;
    const totalColumns = result.data[0]?.length || 0;
    const headers = totalRows > 0 ? result.data[0] : [];

    // Apply chunking if offset or limit are specified
    let chunkedData = result.data;
    let chunkMetadata = null;

    if (offset !== undefined || limit !== undefined) {
      const requestedOffset = offset || 0;
      const requestedLimit = limit || (totalRows - requestedOffset);

      // Validate chunk boundaries
      const { validOffset, validLimit } = validateChunkBoundaries(result.data, requestedOffset, requestedLimit);

      const endRow = validOffset + validLimit;
      chunkedData = result.data.slice(validOffset, endRow);

      chunkMetadata = {
        offset: validOffset,
        limit: validLimit,
        totalRows,
        returnedRows: chunkedData.length,
        hasMore: endRow < totalRows,
        nextOffset: endRow < totalRows ? endRow : null,
        note: validOffset > 0 && totalRows > 0 ? "This chunk doesn't include headers. Consider including row 0 for headers." : undefined,
      };
    }

    const response: any = {
      success: true,
      data: chunkedData,
      rowCount: chunkedData.length,
      columnCount: totalColumns,
      headers: headers,
    };

    // Add chunk metadata if chunking was used
    if (chunkMetadata) {
      response.chunkInfo = chunkMetadata;
    }

    // Include warnings if they exist
    if (result.warnings) {
      response.warnings = result.warnings;
    }

    // Add suggestion for large files
    if (!chunkMetadata && totalRows > 10000) {
      response.suggestion = `Large file detected (${totalRows} rows). Consider using offset/limit parameters for chunked reading to avoid token limits.`;
    }

    return {
      content: [
        {
          type: 'text',
          text: JSON.stringify(response, null, 2),
        },
      ],
    };
  } catch (error) {
    return {
      content: [
        {
          type: 'text',
          text: JSON.stringify({
            success: false,
            error: error instanceof Error ? error.message : 'Unknown error occurred',
          }, null, 2),
        },
      ],
    };
  }
}

src/index.ts:1199-1200 (registration)
MCP server registration mapping the tool name 'read_file' to the DataOperationsHandler.readFile method in the CallToolRequestSchema handler.
```
case 'read_file':
  return await this.dataOpsHandler.readFile(toolArgs);
```

src/index.ts:72-96 (schema)

Tool schema definition including name, description, and input schema validation for the 'read_file' tool, provided in ListToolsRequestSchema response.

  name: 'read_file',
  description: 'Read an entire CSV or Excel file with optional chunking for large files',
  inputSchema: {
    type: 'object',
    properties: {
      filePath: {
        type: 'string',
        description: 'Path to the CSV or Excel file',
      },
      sheet: {
        type: 'string',
        description: 'Sheet name for Excel files (optional, defaults to first sheet)',
      },
      offset: {
        type: 'number',
        description: 'Starting row index for chunked reading (0-based, optional)',
      },
      limit: {
        type: 'number',
        description: 'Maximum number of rows to return (optional, enables chunking)',
      },
    },
    required: ['filePath'],
  },
},

src/utils/file-utils.ts:32-112 (helper)

Core helper function that performs the actual file reading for CSV and Excel files, generates warnings for data quality issues, and returns parsed data. Called by the read_file handler.

export async function readFileContentWithWarnings(filePath: string, sheet?: string): Promise<FileReadResult> {
  const ext = path.extname(filePath).toLowerCase();
  const absolutePath = path.resolve(filePath);
  const warnings: string[] = [];

  try {
    await fs.access(absolutePath);
  } catch {
    throw new Error(`File not found: ${filePath}`);
  }

  if (ext === '.csv') {
    const content = await fs.readFile(absolutePath, 'utf-8');

    // Check for binary content that might cause issues
    if (content.includes('\u0000') || content.includes('\uFFFD')) {
      throw new Error('File appears to contain binary data and cannot be read as CSV');
    }

    try {
      const parsed = csv.parse(content, {
        skip_empty_lines: true,
        relax_quotes: true,
        relax_column_count: true,
      });

      // Additional validation - ensure we have some valid data
      if (parsed.length === 0) {
        throw new Error('empty file: No valid CSV data found in file');
      }

      // Check for malformed CSV issues and add warnings
      if (parsed.length > 1) {
        const expectedColumns = parsed[0].length;
        let inconsistentRows = 0;

        for (let i = 1; i < parsed.length; i++) {
          if (parsed[i].length !== expectedColumns) {
            inconsistentRows++;
          }
        }

        if (inconsistentRows > 0) {
          warnings.push(`CSV file has ${inconsistentRows} rows with inconsistent column count`);
        }
      }

      // Check for potential data quality issues
      const emptyRows = parsed.filter((row: any[]) => row.every((cell: any) => cell === '')).length;
      if (emptyRows > 0) {
        warnings.push(`Found ${emptyRows} completely empty rows`);
      }

      return { data: parsed, warnings: warnings.length > 0 ? warnings : undefined };
    } catch (parseError) {
      throw new Error(`Failed to parse CSV file: ${parseError instanceof Error ? parseError.message : 'Unknown parsing error'}`);
    }
  } else if (ext === '.xlsx' || ext === '.xls') {
    const workbook = new ExcelJS.Workbook();
    await workbook.xlsx.readFile(absolutePath);
    const sheetName = sheet || workbook.worksheets[0]?.name;
    const worksheet = workbook.getWorksheet(sheetName);

    if (!worksheet) {
      throw new Error(`Sheet "${sheetName}" not found in workbook`);
    }

    const data: any[][] = [];
    worksheet.eachRow((row, rowNumber) => {
      const rowData: any[] = [];
      row.eachCell((cell, colNumber) => {
        rowData[colNumber - 1] = cell.value || '';
      });
      data.push(rowData);
    });

    return { data, warnings: warnings.length > 0 ? warnings : undefined };
  } else {
    throw new Error('Unsupported file format. Please use .csv, .xlsx, or .xls files.');
  }
}

Excel MCP Server

read_file

Instructions

Input Schema

Implementation Reference

Other Tools

Latest Blog Posts

MCP directory API