ynab-mcpb

Overview Schema Related Servers Score Discussions

ynab-mcpb
src
tools
reconciliation

csvParser.ts•21 KiB

import { randomUUID } from "node:crypto"; import * as chrono from "chrono-node"; import Papa from "papaparse"; import type { BankTransaction } from "../../types/reconciliation.js"; export interface CSVParseResult { transactions: BankTransaction[]; errors: ParseError[]; warnings: ParseWarning[]; meta: { detectedDelimiter: string; detectedColumns: string[]; totalRows: number; validRows: number; skippedRows: number; }; } export interface ParseError { row: number; field: string; message: string; rawValue: string; } export interface ParseWarning { row: number; message: string; } export interface BankPreset { name: string; dateColumn: string | string[]; amountColumn?: string | string[]; debitColumn?: string; creditColumn?: string; descriptionColumn: string | string[]; amountMultiplier?: number; /** Expected date format hint: 'YMD', 'MDY', 'DMY' */ dateFormat?: "YMD" | "MDY" | "DMY"; /** Whether the CSV has a header row */ header?: boolean; } // Presets for Canadian banks export const BANK_PRESETS: Record<string, BankPreset> = { td: { name: "TD Canada Trust", // Real TD credit card exports are typically // headerless with columns: // [Date, Description, Debit, Credit, Balance] // but some tools/scrapers produce a // headered variant: Date,Description,Amount. // // We default to headerless here and rely on // auto-detection + flexible column candidates // so both forms are supported. header: false, dateColumn: ["0", "Date"], amountColumn: ["Amount"], debitColumn: "2", creditColumn: "3", descriptionColumn: ["1", "Description"], dateFormat: "MDY", // TD typically uses MM/DD/YYYY }, rbc: { name: "RBC Royal Bank", dateColumn: ["Transaction Date", "Date"], debitColumn: "Debit", creditColumn: "Credit", descriptionColumn: ["Description 1", "Description", "Transaction"], dateFormat: "YMD", // RBC typically uses YYYY-MM-DD }, scotiabank: { name: "Scotiabank", dateColumn: ["Date", "Transaction Date"], amountColumn: ["Amount"], descriptionColumn: ["Description", "Transaction Details"], dateFormat: "DMY", // Scotiabank often uses DD/MM/YYYY }, wealthsimple: { name: "Wealthsimple", dateColumn: ["Date"], amountColumn: ["Amount"], descriptionColumn: ["Description", "Payee"], amountMultiplier: 1, dateFormat: "YMD", }, tangerine: { name: "Tangerine", dateColumn: ["Date", "Transaction date"], amountColumn: ["Amount"], descriptionColumn: ["Name", "Transaction name", "Memo"], dateFormat: "MDY", }, }; /** * Safe delimiters allowed for CSV parsing. * Restricted to common, safe characters to prevent injection attacks. */ export const SAFE_DELIMITERS = [",", ";", "\t", "|", " "] as const; export type SafeDelimiter = (typeof SAFE_DELIMITERS)[number]; /** * Validates that a delimiter is safe for CSV parsing. * @throws {Error} if delimiter is not in the safe list */ function validateDelimiter( delimiter: string, ): asserts delimiter is SafeDelimiter { if (!SAFE_DELIMITERS.includes(delimiter as SafeDelimiter)) { throw new Error( `Unsafe delimiter "${delimiter}". Allowed delimiters: ${SAFE_DELIMITERS.join(", ")}`, ); } } export interface ParseCSVOptions { /** Bank preset key (e.g., 'td', 'rbc') */ preset?: string; /** Multiply all amounts by -1 */ invertAmounts?: boolean; /** * Explicit CSV delimiter override (defaults to PapaParse auto-detection). * Must be one of: comma (,), semicolon (;), tab (\t), pipe (|), or space ( ) */ delimiter?: string; /** Manual column overrides */ columns?: { date?: string; amount?: string; debit?: string; credit?: string; description?: string; }; /** Date format hint */ dateFormat?: "YMD" | "MDY" | "DMY"; /** * Whether the CSV has a header row. * If false, columns must be specified by index (e.g., "0", "1"). * Defaults to true. */ header?: boolean; /** Maximum number of rows to process (default: 10000) */ maxRows?: number; /** Maximum file size in bytes (default: 10MB) */ maxBytes?: number; } /** * Attempt to auto-detect the bank format from raw content. * Strategies: * 1. Parse first 5 lines. * 2. Check for header matches (existing logic). * 3. Check for headerless patterns (TD specific: date, desc, debit, credit, balance). */ function autoDetectFormat( content: string, ): { preset?: string; header?: boolean } | undefined { const preview = Papa.parse(content, { preview: 5, header: false, // Parse as array first to inspect structure skipEmptyLines: true, }); if (preview.errors.length > 0 || preview.data.length === 0) return undefined; const rows = preview.data as string[][]; const firstRow = rows[0]; if (!firstRow) return undefined; // 1. Check for known headers (RBC, etc.) const headerMatch = detectPreset(firstRow); if (headerMatch) { // Find key in BANK_PRESETS const key = Object.keys(BANK_PRESETS).find( (k) => BANK_PRESETS[k] === headerMatch, ); if (key) return { preset: key, header: true }; } // 2. Check for TD Headerless Pattern // Typical TD row: [Date, Description, Debit, Credit, Balance] // Date: MM/DD/YYYY (e.g., 11/21/2025) // Debit/Credit: Numbers or empty // Balance: Number if (checkTDPattern(rows)) { return { preset: "td", header: false }; } return undefined; } function checkTDPattern(rows: string[][]): boolean { // Needs at least one valid row // Headerless TD exports typically have at least // Date, Description, Debit, Credit columns. Require // 4+ columns to avoid misclassifying generic // Date/Description/Amount formats as TD. const validRows = rows.filter((r) => r.length >= 4); if (validRows.length === 0) return false; // Check first few rows for MM/DD/YYYY date in column 0 // AND numeric values in columns 2, 3, 4 (if present) let matchCount = 0; for (const row of validRows) { // Col 0: Date MM/DD/YYYY if (!/^\d{1,2}\/\d{1,2}\/\d{4}$/.test(row[0] || "")) continue; // Col 2 (Debit) or Col 3 (Credit) must be numeric-ish if present const isDebitNumeric = !row[2] || /^-?[\d,.]+$/.test(row[2]); const isCreditNumeric = !row[3] || /^-?[\d,.]+$/.test(row[3]); if (isDebitNumeric && isCreditNumeric) { matchCount++; } } // If majority of preview rows match, it's likely TD return matchCount > validRows.length / 2; } /** * Parse a bank CSV file into BankTransaction objects. * * IMPORTANT: Amounts are converted to MILLIUNITS (integers) at this boundary. * This is the ONLY place where float-to-milliunit conversion happens. */ export function parseCSV( content: string, options: ParseCSVOptions = {}, ): CSVParseResult { const errors: ParseError[] = []; const warnings: ParseWarning[] = []; // Security: Validate delimiter if provided if (options.delimiter) { validateDelimiter(options.delimiter); } // Security: Check file size limit const MAX_BYTES = options.maxBytes ?? 10 * 1024 * 1024; // 10MB default if (content.length > MAX_BYTES) { throw new Error( `File size exceeds limit of ${Math.round(MAX_BYTES / 1024 / 1024)}MB`, ); } // Auto-detect format when preset or header are not fully specified let detectedPreset: string | undefined = options.preset; let detectedHeader: boolean | undefined = options.header; if (!detectedPreset || detectedHeader === undefined) { const autoResult = autoDetectFormat(content); if (autoResult) { if (!detectedPreset) { detectedPreset = autoResult.preset; } if (detectedHeader === undefined && autoResult.header !== undefined) { detectedHeader = autoResult.header; } } } // Determine header setting: Explicit > Detected > Preset > Default (true) let hasHeader = true; if (detectedHeader !== undefined) { hasHeader = detectedHeader; } else if (detectedPreset) { const preset = BANK_PRESETS[detectedPreset]; if (preset && preset.header !== undefined) { hasHeader = preset.header; } } const maxRows = options.maxRows ?? 10000; // Parse with PapaParse // Security: Use preview to limit rows parsed into memory (prevents memory exhaustion) const parsed = Papa.parse(content, { header: hasHeader, preview: maxRows + (hasHeader ? 1 : 0), // +1 for header row if present dynamicTyping: false, // We'll handle type conversion ourselves skipEmptyLines: true, transformHeader: (h) => h.trim(), ...(options.delimiter ? { delimiter: options.delimiter } : {}), }); if (parsed.errors.length > 0) { for (const err of parsed.errors) { errors.push({ row: err.row ?? 0, field: "csv", message: err.message, rawValue: "", }); } } const rows = parsed.data as (Record<string, string> | string[])[]; let columns: string[] = []; if (hasHeader) { columns = parsed.meta.fields ?? []; } else { // If no header, rows are arrays. Create dummy columns based on max length const maxLen = rows.reduce( (max, row) => Math.max(max, Array.isArray(row) ? row.length : 0), 0, ); columns = Array.from({ length: maxLen }, (_, i) => String(i)); } const preset = detectedPreset ? BANK_PRESETS[detectedPreset] : hasHeader ? detectPreset(columns) : undefined; // Determine column names (Priority: Options > Preset > Defaults) const dateCandidates = options.columns?.date ? [options.columns.date] : (preset?.dateColumn ?? ["Date", "Transaction Date", "Posted Date"]); const descCandidates = options.columns?.description ? [options.columns.description] : (preset?.descriptionColumn ?? [ "Description", "Payee", "Merchant", "Name", ]); const dateCol = findColumn(columns, dateCandidates, !hasHeader); const descCol = findColumn(columns, descCandidates, !hasHeader); let amountCol: string | null = null; let debitCol: string | null = null; let creditCol: string | null = null; if (options.columns?.debit && options.columns?.credit) { debitCol = findColumn(columns, [options.columns.debit], !hasHeader); creditCol = findColumn(columns, [options.columns.credit], !hasHeader); } else if ( preset?.debitColumn && preset?.creditColumn && !options.columns?.amount && // If a preset also defines an amount column, prefer that when headers // are present. This lets TD support both headerless (debit/credit) // and headered (Amount) variants while RBC still uses debit/credit // with headers. (hasHeader ? !preset?.amountColumn : true) ) { debitCol = findColumn(columns, [preset.debitColumn], !hasHeader); creditCol = findColumn(columns, [preset.creditColumn], !hasHeader); } else { const amountCandidates = options.columns?.amount ? [options.columns.amount] : (preset?.amountColumn ?? ["Amount", "CAD$", "Value"]); amountCol = findColumn(columns, amountCandidates, !hasHeader); } if (!dateCol) { errors.push({ row: 0, field: "date", message: `Could not identify date column from: ${columns.join(", ")}. Try using preset option (td, rbc, scotiabank, etc.) or specify columns manually with columns.date`, rawValue: columns.join(", "), }); } if (!amountCol && (!debitCol || !creditCol)) { if (!debitCol && !creditCol) { errors.push({ row: 0, field: "amount", message: `Could not identify amount column from: ${columns.join(", ")}. Try using preset option or specify columns manually with columns.amount (or columns.debit/credit for split columns)`, rawValue: columns.join(", "), }); } else if (!debitCol || !creditCol) { errors.push({ row: 0, field: "amount", message: `Could not identify debit/credit columns pair from: ${columns.join(", ")}. Found ${debitCol ? "debit" : "credit"} but missing ${debitCol ? "credit" : "debit"}. Specify both with columns.debit and columns.credit`, rawValue: columns.join(", "), }); } } const transactions: BankTransaction[] = []; const dateFormat = options.dateFormat ?? preset?.dateFormat; // Papa.parse preview already limited rows, but keep defensive check for (let i = 0; i < Math.min(rows.length, maxRows); i++) { const row = rows[i]; if (!row) continue; // Helper to get value const getValue = (colName: string | null): string => { if (!colName) return ""; if (Array.isArray(row)) { const idx = Number.parseInt(colName, 10); return String(row[idx] ?? ""); } return String(row[colName as keyof typeof row] ?? ""); }; const rowNum = i + (hasHeader ? 2 : 1); // 1-indexed. Header consumes line 1. const rowWarnings: string[] = []; // Parse date const rawDate = getValue(dateCol)?.trim() ?? ""; const parsedDate = parseDate(rawDate, dateFormat); if (!parsedDate) { errors.push({ row: rowNum, field: "date", message: `Could not parse date: "${rawDate}"`, rawValue: rawDate, }); continue; } // Use LOCAL date components (now derived from UTC date object) const dateStr = formatLocalDate(parsedDate); // Parse amount let amountMilliunits: number; let rawAmount: string; if (amountCol) { rawAmount = getValue(amountCol)?.trim() ?? ""; const parsedAmount = parseAmount(rawAmount); if (!parsedAmount.valid) { errors.push({ row: rowNum, field: "amount", message: parsedAmount.reason ?? `Invalid amount: "${rawAmount}"`, rawValue: rawAmount, }); continue; } amountMilliunits = parsedAmount.valueMilliunits; } else if (debitCol && creditCol) { const debit = getValue(debitCol)?.trim() ?? ""; const credit = getValue(creditCol)?.trim() ?? ""; rawAmount = debit || credit; const parsedDebit = parseAmount(debit); const parsedCredit = parseAmount(credit); if (!parsedDebit.valid && debit) { errors.push({ row: rowNum, field: "amount", message: parsedDebit.reason ?? `Invalid debit amount: "${debit}"`, rawValue: debit, }); continue; } if (!parsedCredit.valid && credit) { errors.push({ row: rowNum, field: "amount", message: parsedCredit.reason ?? `Invalid credit amount: "${credit}"`, rawValue: credit, }); continue; } const debitMilliunits = parsedDebit.valid ? parsedDebit.valueMilliunits : 0; const creditMilliunits = parsedCredit.valid ? parsedCredit.valueMilliunits : 0; // Warn if both debit and credit have values (ambiguous) if (Math.abs(debitMilliunits) > 0 && Math.abs(creditMilliunits) > 0) { const warning = `Both Debit (${debit}) and Credit (${credit}) have values - using Debit`; rowWarnings.push(warning); warnings.push({ row: rowNum, message: warning }); } if (Math.abs(debitMilliunits) > 0) { amountMilliunits = -Math.abs(debitMilliunits); // Debits are outflows (negative) } else if (Math.abs(creditMilliunits) > 0) { amountMilliunits = Math.abs(creditMilliunits); // Credits are inflows (positive) } else { errors.push({ row: rowNum, field: "amount", message: "Missing debit/credit amount", rawValue: `${debit}|${credit}`, }); continue; } // Warn if debit column contains negative value (unusual) if (debitMilliunits < 0) { const warning = `Debit column contains negative value (${debit}) - treating as positive debit`; rowWarnings.push(warning); warnings.push({ row: rowNum, message: warning }); } } else { continue; } // Apply amount inversion if needed const multiplier = options.invertAmounts ? -1 : (preset?.amountMultiplier ?? 1); amountMilliunits *= multiplier; // Parse description & Sanitize let rawDesc = getValue(descCol)?.trim() ?? ""; // Security: Remove potentially malicious/confusing Unicode characters: // - ASCII control chars (0x00-0x1F, 0x7F) // - C1 control chars (0x80-0x9F) // - Bidirectional text overrides (U+202A-202E, U+2066-2069) // - Zero-width characters (U+200B-200D, U+FEFF) // - Unicode line/paragraph separators (U+2028-2029) rawDesc = rawDesc // biome-ignore lint/suspicious/noControlCharactersInRegex: strip ASCII/C1 control chars .replace(/[\u0000-\u001F\u007F-\u009F]/g, "") // ASCII + C1 control chars .replace(/[\u202A-\u202E\u2066-\u2069]/g, "") // Bidirectional overrides .replace(/\u200B|\u200C|\u200D|\uFEFF/g, "") // Zero-width chars .replace(/[\u2028-\u2029]/g, "") // Line/paragraph separators .substring(0, 500); transactions.push({ id: randomUUID(), date: dateStr, amount: amountMilliunits, payee: rawDesc || "Unknown", sourceRow: rowNum, raw: { date: rawDate, amount: rawAmount, description: rawDesc, }, ...(rowWarnings.length > 0 && { warnings: rowWarnings }), }); } return { transactions, errors, warnings, meta: { detectedDelimiter: parsed.meta.delimiter || ",", detectedColumns: columns, totalRows: rows.length, validRows: transactions.length, skippedRows: rows.length - transactions.length, }, }; } function parseDate( raw: string, formatHint?: "YMD" | "MDY" | "DMY", ): Date | null { if (!raw) return null; // 1. Try ISO format first (unambiguous) const isoMatch = raw.match(/^(\d{4})-(\d{2})-(\d{2})/); if (isoMatch) { const [, year, month, day] = isoMatch; if (!year || !month || !day) return null; return new Date( Date.UTC( Number.parseInt(year, 10), Number.parseInt(month, 10) - 1, Number.parseInt(day, 10), ), ); } // 2. Try explicit format hint for ambiguous numeric dates // Pattern: X/X/X or X-X-X where X can be 1-4 digits const numericMatch = raw.match(/^(\d{1,4})[/-](\d{1,2})[/-](\d{1,4})$/); if (numericMatch && formatHint) { const [, a, b, c] = numericMatch; if (!a || !b || !c) return null; let year: number; let month: number; let day: number; switch (formatHint) { case "YMD": // YYYY/MM/DD or YY/MM/DD year = Number.parseInt(a, 10); month = Number.parseInt(b, 10); day = Number.parseInt(c, 10); break; case "MDY": // US format: MM/DD/YYYY or MM/DD/YY month = Number.parseInt(a, 10); day = Number.parseInt(b, 10); year = Number.parseInt(c, 10); break; case "DMY": // European/UK format: DD/MM/YYYY or DD/MM/YY day = Number.parseInt(a, 10); month = Number.parseInt(b, 10); year = Number.parseInt(c, 10); break; } // Handle 2-digit years if (year < 100) year += 2000; // 25 -> 2025 if (month >= 1 && month <= 12 && day >= 1 && day <= 31) { return new Date(Date.UTC(year, month - 1, day)); } } // 3. Fallback to chrono-node (handles natural language, many formats) // Timezone strategy: chrono-node returns local time, but we extract only date components // and reconstruct as UTC to ensure consistent date handling across all parsing paths. // This prevents "off-by-one-day" errors from timezone conversions during date comparison. const parsed = chrono.parseDate(raw); if (parsed) { return new Date( Date.UTC(parsed.getFullYear(), parsed.getMonth(), parsed.getDate()), ); } return null; } function formatLocalDate(date: Date): string { const year = date.getUTCFullYear(); const month = String(date.getUTCMonth() + 1).padStart(2, "0"); const day = String(date.getUTCDate()).padStart(2, "0"); return `${year}-${month}-${day}`; } function findColumn( available: string[], candidates: string | string[], exactIndex = false, ): string | null { const candidateList = Array.isArray(candidates) ? candidates : [candidates]; for (const candidate of candidateList) { if (exactIndex) { // If exact index required (no header), check if candidate matches an index if (available.includes(candidate)) return candidate; } else { const lower = candidate.toLowerCase(); const found = available.find((col) => col.toLowerCase() === lower); if (found) return found; } } if (!exactIndex) { // Try partial match for (const candidate of candidateList) { const lower = candidate.toLowerCase(); const found = available.find((col) => col.toLowerCase().includes(lower)); if (found) return found; } } return null; } function detectPreset(columns: string[]): BankPreset | undefined { const colSet = new Set(columns.map((c) => c.toLowerCase())); if (colSet.has("description 1") || colSet.has("account type")) { return BANK_PRESETS["rbc"]; } if (columns.some((c) => c.toLowerCase().includes("cad$"))) { return BANK_PRESETS["td"]; } // Generic headered TD-style exports: Date, Description, Amount if (colSet.has("date") && colSet.has("description") && colSet.has("amount")) { return BANK_PRESETS["td"]; } return undefined; } // Currency helpers remain the same const CURRENCY_SYMBOLS = /[$€£¥]/g; const CURRENCY_CODES = /\b(CAD|USD|EUR|GBP)\b/gi; function parseAmount(str: string): { valid: boolean; valueMilliunits: number; reason?: string; } { if (!str || !str.trim()) { return { valid: false, valueMilliunits: 0, reason: "Missing amount value" }; } let cleaned = str .replace(CURRENCY_SYMBOLS, "") .replace(CURRENCY_CODES, "") .trim(); // Handle parentheses as negative: (123.45) → -123.45 if (cleaned.startsWith("(") && cleaned.endsWith(")")) { cleaned = `-${cleaned.slice(1, -1)}`; } // Detect European format: 1.234,56 → 1234.56 if (/^-?\d{1,3}(\.\d{3})+,\d{2}$/.test(cleaned)) { cleaned = cleaned.replace(/\./g, "").replace(",", "."); } // Handle thousands separator: 1,234.56 or 1,234 → 1234.56 or 1234 cleaned = cleaned.replace(/,(\d{3})/g, "$1"); const dollars = Number.parseFloat(cleaned); if (!Number.isFinite(dollars)) { return { valid: false, valueMilliunits: 0, reason: `Invalid amount: "${str}"`, }; } // Convert to milliunits: $1.00 → 1000 return { valid: true, valueMilliunits: Math.round(dollars * 1000) }; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/dizzlkheinz/ynab-mcpb'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

csvParser.ts•21 KiB