Skip to main content
Glama
generator.js7.97 kB
/** * Schema Generator Module * Port of DataFlood C# SchemaGenerator.cs * Generates DataFlood schemas from folders containing JSON/CSV files */ import { SchemaInferrer } from './inferrer.js'; import { DataFloodModel } from '../models/DataFloodModel.js'; import { readFileSync, readdirSync, statSync } from 'fs'; import { join, extname, basename } from 'path'; import logger from '../../utils/logger.js'; const log = logger.child('SchemaGenerator'); export class SchemaGenerator { constructor() { this.processedJsonFileCount = 0; this.processedCsvFileCount = 0; this.schemaInferrer = new SchemaInferrer({ inferStringModels: true, inferHistograms: true, detectRelationships: true, detectFormats: true, detectPatterns: true, }); } /** * Generate schema from a folder containing JSON/CSV files * Matches C# GenerateSchema method * @param {string} folderPath - Path to folder containing data files * @returns {DataFloodModel} - Generated schema */ generateSchema(folderPath) { // Reset counters this.processedJsonFileCount = 0; this.processedCsvFileCount = 0; // Find all JSON and CSV files recursively const jsonFiles = this.findFiles(folderPath, '.json'); const csvFiles = this.findFiles(folderPath, '.csv'); if (jsonFiles.length === 0 && csvFiles.length === 0) { log.warn('No JSON or CSV files found in the specified directory.'); return new DataFloodModel({ $schema: 'https://smallminds.co/DataFlood/schema#', type: 'object', }); } const allDocuments = []; // Process JSON files for (const file of jsonFiles) { try { const content = readFileSync(file, 'utf8'); // Skip markdown files mistakenly saved as .json if (content.trim().startsWith('#')) { log.debug(`Skipping markdown file: ${basename(file)}`); continue; } const jsonData = JSON.parse(content); // If it's an array, add each element as a document if (Array.isArray(jsonData)) { allDocuments.push(...jsonData); } else { allDocuments.push(jsonData); } this.processedJsonFileCount++; log.debug(`Processed JSON file: ${basename(file)}`); } catch (error) { log.warn(`Failed to parse JSON file ${basename(file)}: ${error.message}`); } } // Process CSV files for (const file of csvFiles) { try { const csvObjects = this.parseCsvToJson(file); allDocuments.push(...csvObjects); this.processedCsvFileCount++; log.debug(`Processed CSV file: ${basename(file)}`); } catch (error) { log.warn(`Error processing CSV file ${basename(file)}: ${error.message}`); } } if (allDocuments.length === 0) { throw new Error('No valid JSON or CSV documents found.'); } log.info(`Processed ${this.processedJsonFileCount} JSON files and ${this.processedCsvFileCount} CSV files`); log.info(`Total documents: ${allDocuments.length}`); // Infer schema from all documents const schema = this.schemaInferrer.inferSchema(allDocuments); // Ensure schema has DataFlood $schema property if (!schema.$schema) { schema.$schema = 'https://smallminds.co/DataFlood/schema#'; } return schema; } /** * Generate schema from multiple folders * @param {string[]} folderPaths - Array of folder paths * @returns {DataFloodModel} - Merged schema */ generateSchemaFromMultipleFolders(folderPaths) { const schemas = []; for (const folderPath of folderPaths) { try { const schema = this.generateSchema(folderPath); schemas.push(schema); } catch (error) { log.warn(`Failed to generate schema from ${folderPath}: ${error.message}`); } } if (schemas.length === 0) { throw new Error('No schemas could be generated from any folder.'); } // Merge all schemas return this.mergeSchemas(schemas); } /** * Recursively find files with a specific extension * @param {string} dir - Directory to search * @param {string} ext - File extension (e.g., '.json') * @returns {string[]} - Array of file paths */ findFiles(dir, ext, fileList = []) { try { const files = readdirSync(dir); for (const file of files) { // Skip hidden files and directories if (file.startsWith('.')) continue; const filePath = join(dir, file); const stat = statSync(filePath); if (stat.isDirectory()) { this.findFiles(filePath, ext, fileList); } else if (extname(file).toLowerCase() === ext) { fileList.push(filePath); } } } catch (error) { log.warn(`Error reading directory ${dir}: ${error.message}`); } return fileList; } /** * Parse CSV file to JSON objects * Simplified CSV parser - production would use a library * @param {string} filePath - Path to CSV file * @returns {Object[]} - Array of JSON objects */ parseCsvToJson(filePath) { const content = readFileSync(filePath, 'utf8'); const lines = content.split('\n').filter(line => line.trim()); if (lines.length < 2) { return []; } // Parse headers const headers = this.parseCsvLine(lines[0]); const objects = []; // Parse data rows for (let i = 1; i < lines.length; i++) { const values = this.parseCsvLine(lines[i]); if (values.length === headers.length) { const obj = {}; for (let j = 0; j < headers.length; j++) { const value = values[j]; // Try to parse as number if (/^-?\d+(\.\d+)?$/.test(value)) { obj[headers[j]] = parseFloat(value); } else if (value.toLowerCase() === 'true' || value.toLowerCase() === 'false') { obj[headers[j]] = value.toLowerCase() === 'true'; } else { obj[headers[j]] = value; } } objects.push(obj); } } return objects; } /** * Parse a single CSV line handling quoted values * @param {string} line - CSV line * @returns {string[]} - Array of values */ parseCsvLine(line) { const values = []; let current = ''; let inQuotes = false; for (let i = 0; i < line.length; i++) { const char = line[i]; if (char === '"') { if (inQuotes && line[i + 1] === '"') { // Escaped quote current += '"'; i++; } else { // Toggle quote state inQuotes = !inQuotes; } } else if (char === ',' && !inQuotes) { values.push(current.trim()); current = ''; } else { current += char; } } // Add last value if (current || line.endsWith(',')) { values.push(current.trim()); } return values; } /** * Merge multiple schemas into one * @param {DataFloodModel[]} schemas - Array of schemas to merge * @returns {DataFloodModel} - Merged schema */ mergeSchemas(schemas) { if (schemas.length === 1) { return schemas[0]; } // Start with first schema let merged = schemas[0]; // Merge remaining schemas for (let i = 1; i < schemas.length; i++) { merged = this.schemaInferrer.mergeSchemas(merged, schemas[i]); } return merged; } /** * Get processing statistics * @returns {Object} - Statistics object */ getStatistics() { return { processedJsonFileCount: this.processedJsonFileCount, processedCsvFileCount: this.processedCsvFileCount, totalFilesProcessed: this.processedJsonFileCount + this.processedCsvFileCount, }; } } // Export default instance export default SchemaGenerator;

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/smallmindsco/MongTap'

If you have feedback or need assistance with the MCP directory API, please join our Discord server