validate_data_consistency
Cross-validate data integrity across related Excel/CSV files by checking referential integrity, data completeness, and value ranges to ensure consistency.
Instructions
Cross-validate data integrity across related files
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| primaryFile | Yes | Path to the primary data file to validate | |
| referenceFiles | Yes | Array of reference file paths for validation | |
| validationRules | No | Specific validation rules to apply (optional, defaults to all) | |
| keyColumns | No | Specific columns to validate for referential integrity (optional) | |
| autoDetectRelationships | No | Automatically detect column relationships (default: true) | |
| tolerance | No | Tolerance for numeric validations (default: 0.01) | |
| sheet | No | Sheet name for Excel files (optional) | |
| reportFormat | No | Format of validation report (default: detailed) |
Implementation Reference
- Core handler function implementing data consistency validation using referential integrity, data completeness, and value range rules. Orchestrates context building, indexing, rule execution, reporting, and recommendations.async validateDataConsistency( primaryFilePath: string, referenceFilePaths: string[], options: { validationRules?: string[]; keyColumns?: string[]; sheet?: string; autoDetectRelationships?: boolean; tolerance?: number; } = {} ): Promise<ValidationResult> { const startTime = Date.now(); try { // Build validation context const context = await this.contextBuilder.buildContext( primaryFilePath, referenceFilePaths, options.sheet ); // Build performance indexes const indexes = await this.indexer.buildIndexes(context); context.indexes = indexes; // Determine which rules to run const rulesToRun = options.validationRules || this.config.rules || [ 'referential_integrity', 'data_completeness', 'value_ranges' ]; // Configure rules based on options if (options.keyColumns) { const refIntegrityRule = this.rules.get('referential_integrity') as ReferentialIntegrityRule; if (refIntegrityRule) { // Update rule configuration (refIntegrityRule as any).config.keyColumns = options.keyColumns; (refIntegrityRule as any).config.autoDetect = false; } } // Run validation rules const allIssues: ValidationIssue[] = []; const validationPromises: Promise<ValidationIssue[]>[] = []; for (const ruleName of rulesToRun) { const rule = this.rules.get(ruleName); if (rule) { validationPromises.push(rule.validate(context, indexes)); } } // Execute validations (potentially in parallel) const results = await Promise.all(validationPromises); results.forEach(issues => allIssues.push(...issues)); // Calculate summary const summary = this.calculateSummary(context, allIssues, startTime); // Generate recommendations const recommendations = this.generateRecommendations(allIssues, context); // Create final result const result: ValidationResult = { success: allIssues.filter(i => i.severity === 'critical').length === 0, summary, issues: allIssues, recommendations }; // Generate report if requested if (this.config.reportFormat === 'detailed') { result.detailedReport = this.reporter.generateDetailedReport(result); } else { result.detailedReport = this.reporter.generateSummaryReport(result); } return result; } catch (error) { // Handle validation errors gracefully const summary: ValidationSummary = { totalFiles: referenceFilePaths.length + 1, totalRows: 0, totalIssues: 1, criticalIssues: 1, warningIssues: 0, infoIssues: 0, filesWithIssues: [primaryFilePath], validationTimeMs: Date.now() - startTime }; return { success: false, summary, issues: [{ rule: 'validation_engine', severity: 'critical', message: `Validation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, location: { file: primaryFilePath, row: 1, column: 'N/A' }, suggestion: 'Check file paths and formats. Ensure all files are accessible and valid.', affectedRows: [], metadata: { error: error instanceof Error ? error.message : String(error) } }], recommendations: [ 'Verify all file paths are correct and files exist', 'Check file formats are supported (.csv, .xlsx, .xls)', 'Ensure files are not corrupted or locked' ] }; } }