Skip to main content
Glama
waldzellai

Exa Websets MCP Server

by waldzellai
iterativeIntelligence.ts18.6 kB
/** * Iterative Intelligence - Self-improving research system with webset registry */ export async function iterativeIntelligence( researchTopic: string, iterations: number = 3, registryPath: string = "./webset-registry" ): Promise<string> { if (!researchTopic) { return `Please provide a research topic to explore iteratively.`; } return `# 🧠 Iterative Intelligence Workflow: ${researchTopic} ## 🎯 Self-Improving Research with Webset Registry ## 🔄 Iterations: ${iterations} ## 📁 Registry Path: ${registryPath} This workflow creates a self-improving research system that maintains a searchable registry of all websets for fast retrieval and iterative analysis. ## Phase 1: Initialize Webset Registry First, let's set up or connect to your webset registry: \`\`\`javascript // Registry structure for fast webset retrieval class WebsetRegistry { constructor(registryPath) { this.registryPath = registryPath; this.indexPath = \`\${registryPath}/index.json\`; this.tagsPath = \`\${registryPath}/tags.json\`; this.relationshipsPath = \`\${registryPath}/relationships.json\`; this.metadataPath = \`\${registryPath}/metadata/\`; } // Initialize or load existing registry async initialize() { // Main index: websetId -> metadata mapping this.index = await this.loadOrCreate(this.indexPath, { websets: {}, lastUpdated: null, version: "1.0" }); // Tag index: tag -> [websetIds] mapping this.tags = await this.loadOrCreate(this.tagsPath, { byTag: {}, byWebset: {} }); // Relationship graph: tracks how websets relate this.relationships = await this.loadOrCreate(this.relationshipsPath, { iterations: {}, // Parent -> children mapping crossReferences: {}, // Websets that share entities evolution: {} // How queries evolved }); } // Register a new webset with rich metadata async registerWebset(websetId, metadata) { const enrichedMetadata = { websetId, ...metadata, registeredAt: new Date().toISOString(), searchVector: this.generateSearchVector(metadata), fingerprint: this.generateFingerprint(metadata) }; // Update main index this.index.websets[websetId] = enrichedMetadata; // Update tag indices for (const tag of metadata.tags) { if (!this.tags.byTag[tag]) this.tags.byTag[tag] = []; this.tags.byTag[tag].push(websetId); } this.tags.byWebset[websetId] = metadata.tags; // Save detailed metadata await this.saveMetadata(websetId, enrichedMetadata); await this.persist(); return enrichedMetadata; } // Fast retrieval methods async findWebsets(criteria) { const results = []; // Search by tags if (criteria.tags) { for (const tag of criteria.tags) { const websetIds = this.tags.byTag[tag] || []; results.push(...websetIds); } } // Search by query similarity if (criteria.query) { const queryVector = this.generateSearchVector({ query: criteria.query }); const similarities = Object.entries(this.index.websets) .map(([id, meta]) => ({ id, similarity: this.cosineSimilarity(queryVector, meta.searchVector) })) .filter(item => item.similarity > 0.7) .sort((a, b) => b.similarity - a.similarity); results.push(...similarities.map(s => s.id)); } // Search by iteration if (criteria.iteration) { const iterationWebsets = this.relationships.iterations[criteria.iteration] || []; results.push(...iterationWebsets); } // Remove duplicates and return with metadata const uniqueIds = [...new Set(results)]; return uniqueIds.map(id => this.index.websets[id]); } // Generate search vector for similarity matching generateSearchVector(metadata) { // Simple TF-IDF style vector for demo const text = \`\${metadata.query} \${metadata.description} \${(metadata.tags || []).join(' ')}\`; const words = text.toLowerCase().split(/\\s+/); const vector = {}; words.forEach(word => { vector[word] = (vector[word] || 0) + 1; }); return vector; } // Track relationships between websets async addRelationship(type, fromWebset, toWebset, metadata) { if (!this.relationships[type]) { this.relationships[type] = {}; } if (!this.relationships[type][fromWebset]) { this.relationships[type][fromWebset] = []; } this.relationships[type][fromWebset].push({ target: toWebset, metadata, createdAt: new Date().toISOString() }); await this.persist(); } } // Initialize registry const registry = new WebsetRegistry("${registryPath}"); await registry.initialize(); \`\`\` ## Phase 2: Iterative Research Process Now let's implement the iterative intelligence system: \`\`\`javascript // Iteration controller async function runIterativeResearch(topic, iterations) { const research = { topic: "${researchTopic}", iterations: [], knowledgeGraph: { entities: {}, relationships: [], concepts: {} }, registry: registry }; for (let i = 0; i < ${iterations}; i++) { console.log(\`\\n🔄 Starting Iteration \${i + 1}/\${iterations}\`); const iteration = await runIteration(research, i); research.iterations.push(iteration); // Update knowledge graph await updateKnowledgeGraph(research.knowledgeGraph, iteration); // Register all websets from this iteration for (const webset of iteration.websets) { await registry.registerWebset(webset.id, { query: webset.query, iteration: i + 1, parentTopic: topic, tags: generateTags(webset), description: webset.description, status: webset.status, metrics: { itemCount: webset.itemCount, relevanceScore: webset.relevanceScore } }); } } return research; } // Single iteration logic async function runIteration(research, iterationIndex) { const iteration = { number: iterationIndex + 1, timestamp: new Date().toISOString(), queries: [], websets: [], discoveries: [], gaps: [], nextQuestions: [] }; // Generate queries for this iteration if (iterationIndex === 0) { // First iteration: broad exploration iteration.queries = generateInitialQueries("${researchTopic}"); } else { // Subsequent iterations: based on gaps and discoveries const previousIteration = research.iterations[iterationIndex - 1]; iteration.queries = generateRefinedQueries( previousIteration.gaps, previousIteration.nextQuestions, research.knowledgeGraph ); } // Create websets for each query for (const query of iteration.queries) { const webset = await websets_search(query.text, query.limit || 200); iteration.websets.push({ id: webset.websetId, query: query.text, rationale: query.rationale, status: "processing", createdAt: new Date().toISOString() }); // Track parent-child relationships if (query.parentWebsetId) { await registry.addRelationship( "iterations", query.parentWebsetId, webset.websetId, { iterationNumber: iterationIndex + 1 } ); } } // Wait for websets to complete (or use webhooks) await waitForWebsetCompletion(iteration.websets); // Analyze results for (const webset of iteration.websets) { const analysis = await analyzeWebsetForInsights(webset.id); iteration.discoveries.push(...analysis.discoveries); iteration.gaps.push(...analysis.gaps); // Update webset status in registry await registry.updateWebset(webset.id, { status: "analyzed", discoveries: analysis.discoveries.length, gaps: analysis.gaps.length }); } // Generate next questions based on discoveries and gaps iteration.nextQuestions = generateNextQuestions( iteration.discoveries, iteration.gaps, research.knowledgeGraph ); return iteration; } // Query generation functions function generateInitialQueries(topic) { return [ { text: \`\${topic} overview comprehensive\`, rationale: "Broad exploration of the topic", limit: 300 }, { text: \`\${topic} latest developments 2024\`, rationale: "Current state and trends", limit: 200 }, { text: \`\${topic} key players companies organizations\`, rationale: "Identify main actors in the space", limit: 200 }, { text: \`\${topic} challenges problems issues\`, rationale: "Understand pain points and obstacles", limit: 150 } ]; } function generateRefinedQueries(gaps, questions, knowledgeGraph) { const refinedQueries = []; // Address specific gaps gaps.forEach(gap => { refinedQueries.push({ text: \`\${gap.topic} \${gap.missingInfo}\`, rationale: \`Fill knowledge gap: \${gap.description}\`, parentWebsetId: gap.sourceWebsetId, limit: 100 }); }); // Explore emerging questions questions.slice(0, 3).forEach(question => { refinedQueries.push({ text: question.query, rationale: \`Explore: \${question.rationale}\`, limit: 150 }); }); // Cross-reference entities const topEntities = getTopEntities(knowledgeGraph, 3); topEntities.forEach(entity => { refinedQueries.push({ text: \`"\${entity.name}" \${topic} relationship connection\`, rationale: \`Deep dive on key entity: \${entity.name}\`, limit: 100 }); }); return refinedQueries; } \`\`\` ## Phase 3: Fast Retrieval Interface Create an interface for quick webset retrieval: \`\`\`javascript // Retrieval assistant class WebsetRetrieval { constructor(registry) { this.registry = registry; } // Find websets by natural language query async findByQuery(naturalQuery) { console.log(\`🔍 Searching for: "\${naturalQuery}"\`); // Extract search criteria from natural language const criteria = this.parseSearchCriteria(naturalQuery); // Search registry const results = await this.registry.findWebsets(criteria); // Rank results const ranked = this.rankResults(results, naturalQuery); return ranked; } // Parse natural language into search criteria parseSearchCriteria(query) { const criteria = {}; // Extract tags (words after #) const tagMatches = query.match(/#\\w+/g); if (tagMatches) { criteria.tags = tagMatches.map(t => t.substring(1)); } // Extract iteration references const iterationMatch = query.match(/iteration\\s+(\\d+)/i); if (iterationMatch) { criteria.iteration = parseInt(iterationMatch[1]); } // Extract date ranges const dateMatch = query.match(/(?:from|since|after)\\s+(\\d{4}-\\d{2}-\\d{2})/i); if (dateMatch) { criteria.afterDate = dateMatch[1]; } // Use full query for similarity search criteria.query = query.replace(/#\\w+/g, '').trim(); return criteria; } // Get related websets async getRelated(websetId, relationshipType = "all") { const relationships = await this.registry.getRelationships(websetId); if (relationshipType === "all") { return relationships; } return relationships[relationshipType] || []; } // Get webset lineage (iteration history) async getLineage(websetId) { const lineage = { ancestors: [], descendants: [] }; // Find ancestors (previous iterations) let currentId = websetId; while (currentId) { const parent = await this.registry.getParent(currentId); if (parent) { lineage.ancestors.push(parent); currentId = parent.id; } else { break; } } // Find descendants (subsequent iterations) const children = await this.registry.getChildren(websetId); lineage.descendants = children; return lineage; } } // Usage examples const retrieval = new WebsetRetrieval(registry); // Find by natural query const results1 = await retrieval.findByQuery( "AI startups #funding #series-b iteration 2" ); // Get related websets const related = await retrieval.getRelated("ws_abc123", "crossReferences"); // Get iteration lineage const lineage = await retrieval.getLineage("ws_def456"); \`\`\` ## Phase 4: Batch Processing Setup Configure batch processing for regular updates: \`\`\`javascript // Batch processor for scheduled runs class IterativeIntelligenceBatch { constructor(config) { this.config = { schedule: config.schedule || "0 0 * * *", // Daily at midnight maxConcurrent: config.maxConcurrent || 3, registryPath: config.registryPath || "./webset-registry", topics: config.topics || [] }; this.registry = new WebsetRegistry(this.config.registryPath); } // Run batch process async runBatch() { console.log(\`🚀 Starting batch process at \${new Date().toISOString()}\`); const batchReport = { startTime: new Date().toISOString(), topics: [], newWebsets: 0, discoveries: 0, errors: [] }; // Process each topic for (const topicConfig of this.config.topics) { try { const result = await this.processTopic(topicConfig); batchReport.topics.push(result); batchReport.newWebsets += result.websetsCreated; batchReport.discoveries += result.discoveriesFound; } catch (error) { batchReport.errors.push({ topic: topicConfig.name, error: error.message }); } } batchReport.endTime = new Date().toISOString(); // Save batch report await this.saveBatchReport(batchReport); // Clean up old websets if needed await this.cleanupOldWebsets(); return batchReport; } // Process a single topic async processTopic(topicConfig) { const { name, iterations, filters } = topicConfig; // Check if we should run based on last run time const lastRun = await this.registry.getLastRun(name); if (lastRun && !this.shouldRunTopic(lastRun, topicConfig)) { return { skipped: true, reason: "Too soon since last run" }; } // Run iterative research const research = await runIterativeResearch(name, iterations); // Apply any post-processing filters if (filters) { await this.applyFilters(research, filters); } // Generate summary report const summary = { topic: name, iterations: research.iterations.length, websetsCreated: research.iterations.reduce( (sum, iter) => sum + iter.websets.length, 0 ), discoveriesFound: research.iterations.reduce( (sum, iter) => sum + iter.discoveries.length, 0 ), topEntities: this.extractTopEntities(research.knowledgeGraph), nextActions: this.generateNextActions(research) }; return summary; } // Cleanup old websets based on retention policy async cleanupOldWebsets() { const retentionDays = this.config.retentionDays || 30; const cutoffDate = new Date(); cutoffDate.setDate(cutoffDate.getDate() - retentionDays); const oldWebsets = await this.registry.findWebsets({ beforeDate: cutoffDate.toISOString() }); for (const webset of oldWebsets) { if (webset.metadata.keepForever) continue; // Archive before deletion await this.archiveWebset(webset); // Remove from active registry await this.registry.removeWebset(webset.id); } console.log(\`Cleaned up \${oldWebsets.length} old websets\`); } } // Configure batch processing const batchConfig = { schedule: "0 2 * * *", // 2 AM daily topics: [ { name: "AI startup ecosystem", iterations: 3, frequency: "daily" }, { name: "Quantum computing breakthroughs", iterations: 2, frequency: "weekly" } ], registryPath: "${registryPath}" }; const batchProcessor = new IterativeIntelligenceBatch(batchConfig); \`\`\` ## Phase 5: Registry Query Interface Simple interface for common retrieval patterns: \`\`\`javascript // Quick retrieval patterns const quickFind = { // Get today's websets today: async () => { const today = new Date().toISOString().split('T')[0]; return await registry.findWebsets({ afterDate: today }); }, // Get high-value websets highValue: async () => { const all = await registry.getAllWebsets(); return all.filter(w => w.metrics.itemCount > 100 && w.metrics.relevanceScore > 0.8 ); }, // Get by topic and iteration byTopicIteration: async (topic, iteration) => { return await registry.findWebsets({ tags: [topic.toLowerCase().replace(/\\s+/g, '-')], iteration: iteration }); }, // Get evolution chain evolutionChain: async (startWebsetId) => { const chain = [startWebsetId]; let current = startWebsetId; while (true) { const children = await registry.getChildren(current); if (children.length === 0) break; // Follow the main evolution path current = children[0].id; chain.push(current); } return chain; } }; \`\`\` ## 💡 Usage Examples ### Manual Run: \`\`\`javascript // Run iterative research manually const research = await runIterativeResearch("${researchTopic}", ${iterations}); // Find specific websets later const websets = await retrieval.findByQuery("${researchTopic} #deep-dive iteration 2"); \`\`\` ### Scheduled Batch: \`\`\`javascript // Set up cron job (in your scheduler) cron.schedule('0 2 * * *', async () => { await batchProcessor.runBatch(); }); \`\`\` ### Quick Retrieval: \`\`\`javascript // Find today's research const todaysWebsets = await quickFind.today(); // Get evolution of a topic const evolution = await quickFind.evolutionChain("ws_initial_123"); \`\`\` ## 🎯 Benefits 1. **Fast Retrieval**: Registry indexes enable instant webset discovery 2. **Relationship Tracking**: Understand how websets connect and evolve 3. **Batch Processing**: Automated research updates on schedule 4. **Knowledge Building**: Each iteration builds on previous discoveries 5. **Smart Querying**: Natural language search with multiple criteria Ready to start building your intelligent research system!`; }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/waldzellai/exa-mcp-server-websets'

If you have feedback or need assistance with the MCP directory API, please join our Discord server