import { readFileSync } from "node:fs";
import { parse } from "csv-parse/sync";
export interface CsvData {
headers: string[];
rows: Record<string, string>[];
rowCount: number;
}
export function loadCsv(filePath: string): CsvData {
const content = readFileSync(filePath, "utf-8");
const rows = parse(content, {
columns: true,
skip_empty_lines: true,
trim: true,
}) as Record<string, string>[];
const headers = rows.length > 0 ? Object.keys(rows[0]) : [];
return { headers, rows, rowCount: rows.length };
}
export function describeCsv(data: CsvData): string {
const lines = [
`Rows: ${data.rowCount}`,
`Columns (${data.headers.length}): ${data.headers.join(", ")}`,
"",
"Column types (sampled from first 100 rows):",
];
const sample = data.rows.slice(0, 100);
for (const header of data.headers) {
const values = sample.map((r) => r[header]).filter(Boolean);
const numericCount = values.filter((v) => !isNaN(Number(v))).length;
const uniqueCount = new Set(values).size;
const nullCount = sample.length - values.length;
const type = numericCount > values.length * 0.8 ? "numeric" : "string";
lines.push(
` ${header}: ${type} (${uniqueCount} unique, ${nullCount} null)`,
);
}
return lines.join("\n");
}
export function filterRows(
data: CsvData,
column: string,
operator: string,
value: string,
): Record<string, string>[] {
return data.rows.filter((row) => {
const cell = row[column];
if (cell === undefined) return false;
const numCell = Number(cell);
const numValue = Number(value);
const isNumeric = !isNaN(numCell) && !isNaN(numValue);
switch (operator) {
case "eq":
return cell === value;
case "neq":
return cell !== value;
case "gt":
return isNumeric && numCell > numValue;
case "gte":
return isNumeric && numCell >= numValue;
case "lt":
return isNumeric && numCell < numValue;
case "lte":
return isNumeric && numCell <= numValue;
case "contains":
return cell.toLowerCase().includes(value.toLowerCase());
case "starts_with":
return cell.toLowerCase().startsWith(value.toLowerCase());
default:
return false;
}
});
}
export function aggregateColumn(
data: CsvData,
column: string,
operation: string,
): string {
const values = data.rows
.map((r) => Number(r[column]))
.filter((v) => !isNaN(v));
if (values.length === 0) return "No numeric values found";
switch (operation) {
case "sum":
return String(values.reduce((a, b) => a + b, 0));
case "avg":
return String(values.reduce((a, b) => a + b, 0) / values.length);
case "min":
return String(Math.min(...values));
case "max":
return String(Math.max(...values));
case "count":
return String(values.length);
case "median": {
const sorted = [...values].sort((a, b) => a - b);
const mid = Math.floor(sorted.length / 2);
return sorted.length % 2
? String(sorted[mid])
: String((sorted[mid - 1] + sorted[mid]) / 2);
}
default:
return "Unknown operation";
}
}
export function groupBy(
data: CsvData,
groupColumn: string,
aggColumn: string,
operation: string,
): Record<string, string> {
const groups: Record<string, number[]> = {};
for (const row of data.rows) {
const key = row[groupColumn] ?? "(empty)";
const val = Number(row[aggColumn]);
if (!groups[key]) groups[key] = [];
if (!isNaN(val)) groups[key].push(val);
}
const result: Record<string, string> = {};
for (const [key, values] of Object.entries(groups)) {
switch (operation) {
case "sum":
result[key] = String(values.reduce((a, b) => a + b, 0));
break;
case "avg":
result[key] = String(values.reduce((a, b) => a + b, 0) / values.length);
break;
case "count":
result[key] = String(values.length);
break;
case "min":
result[key] = String(Math.min(...values));
break;
case "max":
result[key] = String(Math.max(...values));
break;
default:
result[key] = "unknown operation";
}
}
return result;
}
export function sortRows(
rows: Record<string, string>[],
column: string,
direction: "asc" | "desc" = "asc",
): Record<string, string>[] {
return [...rows].sort((a, b) => {
const aVal = a[column] ?? "";
const bVal = b[column] ?? "";
const aNum = Number(aVal);
const bNum = Number(bVal);
let cmp: number;
if (!isNaN(aNum) && !isNaN(bNum)) {
cmp = aNum - bNum;
} else {
cmp = aVal.localeCompare(bVal);
}
return direction === "desc" ? -cmp : cmp;
});
}