MongTap

incremental-trainer.js•13.6 KiB

/** * Incremental Training Module * Supports updating existing models with new data cumulatively * Matches DataFlood's MergeSchemas approach */ import inferrer from '../schema/inferrer.js'; class IncrementalTrainer { constructor() { this.inferrer = inferrer; } /** * Update an existing model with new data * @param {Object} existingModel - The current model * @param {Array} newData - New documents to learn from * @returns {Object} Updated model combining existing and new knowledge */ updateModel(existingModel, newData) { if (!newData || newData.length === 0) { return existingModel; } // Infer schema from new data const newModel = this.inferrer.inferSchema(newData); // Merge the models return this.mergeSchemas(existingModel, newModel); } /** * Merge two schemas together, combining their properties and statistics * Port of DataFlood's MergeSchemas method */ mergeSchemas(existing, newSchema) { if (!existing) return newSchema; if (!newSchema) return existing; const merged = { $schema: existing.$schema || newSchema.$schema, type: this.mergeTypes(existing.type, newSchema.type) }; // Merge string constraints and models if (existing.type === 'string' || newSchema.type === 'string') { merged.minLength = Math.min( existing.minLength ?? Number.MAX_SAFE_INTEGER, newSchema.minLength ?? Number.MAX_SAFE_INTEGER ); merged.maxLength = Math.max( existing.maxLength ?? 0, newSchema.maxLength ?? 0 ); if (merged.minLength === Number.MAX_SAFE_INTEGER) delete merged.minLength; if (merged.maxLength === 0) delete merged.maxLength; // Merge string models merged.stringModel = this.mergeStringModels(existing.stringModel, newSchema.stringModel); // Merge formats if (existing.format || newSchema.format) { merged.format = existing.format || newSchema.format; } // Merge patterns if (existing.pattern || newSchema.pattern) { merged.pattern = existing.pattern || newSchema.pattern; } } // Merge number/integer constraints if (['number', 'integer'].includes(existing.type) || ['number', 'integer'].includes(newSchema.type)) { if (existing.minimum !== undefined && newSchema.minimum !== undefined) { merged.minimum = Math.min(existing.minimum, newSchema.minimum); } else { merged.minimum = existing.minimum ?? newSchema.minimum; } if (existing.maximum !== undefined && newSchema.maximum !== undefined) { merged.maximum = Math.max(existing.maximum, newSchema.maximum); } else { merged.maximum = existing.maximum ?? newSchema.maximum; } // Merge histograms merged.histogram = this.mergeHistograms(existing.histogram, newSchema.histogram); // Merge multipleOf if (existing.multipleOf || newSchema.multipleOf) { merged.multipleOf = existing.multipleOf || newSchema.multipleOf; } } // Merge array constraints if (existing.type === 'array' || newSchema.type === 'array') { merged.minItems = Math.min( existing.minItems ?? Number.MAX_SAFE_INTEGER, newSchema.minItems ?? Number.MAX_SAFE_INTEGER ); merged.maxItems = Math.max( existing.maxItems ?? 0, newSchema.maxItems ?? 0 ); if (merged.minItems === Number.MAX_SAFE_INTEGER) delete merged.minItems; if (merged.maxItems === 0) delete merged.maxItems; // Merge array item schemas if (existing.items || newSchema.items) { if (existing.items && newSchema.items) { merged.items = this.mergeSchemas(existing.items, newSchema.items); } else { merged.items = existing.items || newSchema.items; } } } // Merge object properties if (existing.type === 'object' || newSchema.type === 'object') { merged.properties = this.mergeProperties( existing.properties, newSchema.properties ); merged.required = this.mergeRequired( existing.required, newSchema.required, merged.properties ); } // Merge enum values if (existing.enum || newSchema.enum) { merged.enum = this.mergeEnumValues(existing.enum, newSchema.enum); } // Merge anyOf unions if (existing.anyOf || newSchema.anyOf) { merged.anyOf = this.mergeAnyOf(existing.anyOf, newSchema.anyOf); } return merged; } /** * Merge type information */ mergeTypes(type1, type2) { if (!type1) return type2; if (!type2) return type1; if (type1 === type2) return type1; // If types differ, create a union (simplified for now) // In production, might want to use anyOf return type2; // Use newer type } /** * Merge object properties */ mergeProperties(props1, props2) { if (!props1) return props2; if (!props2) return props1; const merged = { ...props1 }; for (const [key, value] of Object.entries(props2)) { if (merged[key]) { merged[key] = this.mergeSchemas(merged[key], value); } else { merged[key] = value; } } return merged; } /** * Merge required field lists */ mergeRequired(req1, req2, properties) { if (!req1 && !req2) return undefined; const required = new Set(); // Only mark as required if required in both schemas if (req1 && req2) { for (const field of req1) { if (req2.includes(field) && properties && properties[field]) { required.add(field); } } } return required.size > 0 ? Array.from(required) : undefined; } /** * Merge string models with proper statistics combination */ mergeStringModels(model1, model2) { if (!model1) return model2; if (!model2) return model1; const merged = { minLength: Math.min(model1.minLength, model2.minLength), maxLength: Math.max(model1.maxLength, model2.maxLength), averageLength: (model1.averageLength + model2.averageLength) / 2 }; // Merge character frequency merged.characterFrequency = this.mergeFrequencies( model1.characterFrequency, model2.characterFrequency ); // Merge unique characters const allChars = new Set([ ...(model1.uniqueCharacters || ''), ...(model2.uniqueCharacters || '') ]); merged.uniqueCharacters = Array.from(allChars).join(''); // Merge value frequency merged.valueFrequency = this.mergeFrequencies( model1.valueFrequency, model2.valueFrequency ); // Merge patterns if (model1.patterns || model2.patterns) { const patternMap = {}; const addPatterns = (patterns) => { if (patterns && Array.isArray(patterns)) { for (const p of patterns) { const key = p.pattern || p; patternMap[key] = (patternMap[key] || 0) + (p.count || 1); } } else if (patterns && typeof patterns === 'object') { // Handle object format for (const [pattern, count] of Object.entries(patterns)) { patternMap[pattern] = (patternMap[pattern] || 0) + count; } } }; addPatterns(model1.patterns); addPatterns(model2.patterns); merged.patterns = Object.entries(patternMap) .map(([pattern, count]) => ({ pattern, count })) .sort((a, b) => b.count - a.count) .slice(0, 10); // Keep top 10 patterns } // Merge n-grams if (model1.nGrams || model2.nGrams) { merged.nGrams = {}; for (const n of [2, 3]) { const grams1 = model1.nGrams?.[n] || {}; const grams2 = model2.nGrams?.[n] || {}; merged.nGrams[n] = this.mergeFrequencies(grams1, grams2); } } // Update complexity and entropy merged.complexity = Math.max( model1.complexity || 0, model2.complexity || 0 ); merged.entropyScore = ( (model1.entropyScore || 0) + (model2.entropyScore || 0) ) / 2; // Keep sample values from both if (model1.sampleValues || model2.sampleValues) { const allSamples = [ ...(model1.sampleValues || []), ...(model2.sampleValues || []) ]; // Keep unique samples, up to 20 merged.sampleValues = [...new Set(allSamples)].slice(0, 20); } return merged; } /** * Merge histograms with proper bin combination */ mergeHistograms(hist1, hist2) { if (!hist1) return hist2; if (!hist2) return hist1; const merged = { minValue: Math.min(hist1.minValue, hist2.minValue), maxValue: Math.max(hist1.maxValue, hist2.maxValue), totalCount: (hist1.totalCount || 0) + (hist2.totalCount || 0) }; // Merge bins - simplified approach // In production, would need to recompute bins based on combined data const allBins = [...(hist1.bins || []), ...(hist2.bins || [])]; // Sort and merge overlapping bins allBins.sort((a, b) => a.min - b.min); const mergedBins = []; let currentBin = null; for (const bin of allBins) { if (!currentBin) { currentBin = { ...bin }; } else if (bin.min <= currentBin.max) { // Bins overlap, merge them currentBin.max = Math.max(currentBin.max, bin.max); currentBin.count = (currentBin.count || 0) + (bin.count || 0); currentBin.frequency = (currentBin.frequency || 0) + (bin.frequency || 0); } else { // No overlap, save current and start new mergedBins.push(currentBin); currentBin = { ...bin }; } } if (currentBin) { mergedBins.push(currentBin); } merged.bins = mergedBins.slice(0, 20); // Limit to 20 bins // Update statistics merged.mean = (hist1.mean + hist2.mean) / 2; merged.standardDeviation = Math.max( hist1.standardDeviation || 0, hist2.standardDeviation || 0 ); merged.complexity = Math.max( hist1.complexity || 0, hist2.complexity || 0 ); merged.entropy = ( (hist1.entropy || 0) + (hist2.entropy || 0) ) / 2; return merged; } /** * Merge frequency maps */ mergeFrequencies(freq1, freq2) { if (!freq1) return freq2; if (!freq2) return freq1; const merged = { ...freq1 }; for (const [key, value] of Object.entries(freq2)) { merged[key] = (merged[key] || 0) + value; } return merged; } /** * Merge enum values */ mergeEnumValues(enum1, enum2) { if (!enum1) return enum2; if (!enum2) return enum1; const allValues = new Set([...enum1, ...enum2]); return Array.from(allValues); } /** * Merge anyOf unions */ mergeAnyOf(anyOf1, anyOf2) { if (!anyOf1) return anyOf2; if (!anyOf2) return anyOf1; // For now, keep all unique schemas // In production, might want to merge similar schemas return [...anyOf1, ...anyOf2]; } /** * Train a model from scratch or update existing */ train(data, existingModel = null) { if (!existingModel) { // Train from scratch return this.inferrer.inferSchema(data); } else { // Incremental update return this.updateModel(existingModel, data); } } /** * Save model to file */ async saveModel(model, filepath) { const { writeFile } = await import('fs/promises'); const json = JSON.stringify(model, null, 2); await writeFile(filepath, json, 'utf8'); } /** * Load model from file */ async loadModel(filepath) { const { readFile } = await import('fs/promises'); const json = await readFile(filepath, 'utf8'); return JSON.parse(json); } } export { IncrementalTrainer };

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/smallmindsco/MongTap'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

incremental-trainer.js•13.6 KiB