/**
* Iterative Intelligence - Self-improving research system with webset registry
*/
export async function iterativeIntelligence(
researchTopic: string,
iterations: number = 3,
registryPath: string = "./webset-registry"
): Promise<string> {
if (!researchTopic) {
return `Please provide a research topic to explore iteratively.`;
}
return `# 🧠 Iterative Intelligence Workflow: ${researchTopic}
## 🎯 Self-Improving Research with Webset Registry
## 🔄 Iterations: ${iterations}
## 📁 Registry Path: ${registryPath}
This workflow creates a self-improving research system that maintains a searchable registry of all websets for fast retrieval and iterative analysis.
## Phase 1: Initialize Webset Registry
First, let's set up or connect to your webset registry:
\`\`\`javascript
// Registry structure for fast webset retrieval
class WebsetRegistry {
constructor(registryPath) {
this.registryPath = registryPath;
this.indexPath = \`\${registryPath}/index.json\`;
this.tagsPath = \`\${registryPath}/tags.json\`;
this.relationshipsPath = \`\${registryPath}/relationships.json\`;
this.metadataPath = \`\${registryPath}/metadata/\`;
}
// Initialize or load existing registry
async initialize() {
// Main index: websetId -> metadata mapping
this.index = await this.loadOrCreate(this.indexPath, {
websets: {},
lastUpdated: null,
version: "1.0"
});
// Tag index: tag -> [websetIds] mapping
this.tags = await this.loadOrCreate(this.tagsPath, {
byTag: {},
byWebset: {}
});
// Relationship graph: tracks how websets relate
this.relationships = await this.loadOrCreate(this.relationshipsPath, {
iterations: {}, // Parent -> children mapping
crossReferences: {}, // Websets that share entities
evolution: {} // How queries evolved
});
}
// Register a new webset with rich metadata
async registerWebset(websetId, metadata) {
const enrichedMetadata = {
websetId,
...metadata,
registeredAt: new Date().toISOString(),
searchVector: this.generateSearchVector(metadata),
fingerprint: this.generateFingerprint(metadata)
};
// Update main index
this.index.websets[websetId] = enrichedMetadata;
// Update tag indices
for (const tag of metadata.tags) {
if (!this.tags.byTag[tag]) this.tags.byTag[tag] = [];
this.tags.byTag[tag].push(websetId);
}
this.tags.byWebset[websetId] = metadata.tags;
// Save detailed metadata
await this.saveMetadata(websetId, enrichedMetadata);
await this.persist();
return enrichedMetadata;
}
// Fast retrieval methods
async findWebsets(criteria) {
const results = [];
// Search by tags
if (criteria.tags) {
for (const tag of criteria.tags) {
const websetIds = this.tags.byTag[tag] || [];
results.push(...websetIds);
}
}
// Search by query similarity
if (criteria.query) {
const queryVector = this.generateSearchVector({ query: criteria.query });
const similarities = Object.entries(this.index.websets)
.map(([id, meta]) => ({
id,
similarity: this.cosineSimilarity(queryVector, meta.searchVector)
}))
.filter(item => item.similarity > 0.7)
.sort((a, b) => b.similarity - a.similarity);
results.push(...similarities.map(s => s.id));
}
// Search by iteration
if (criteria.iteration) {
const iterationWebsets = this.relationships.iterations[criteria.iteration] || [];
results.push(...iterationWebsets);
}
// Remove duplicates and return with metadata
const uniqueIds = [...new Set(results)];
return uniqueIds.map(id => this.index.websets[id]);
}
// Generate search vector for similarity matching
generateSearchVector(metadata) {
// Simple TF-IDF style vector for demo
const text = \`\${metadata.query} \${metadata.description} \${(metadata.tags || []).join(' ')}\`;
const words = text.toLowerCase().split(/\\s+/);
const vector = {};
words.forEach(word => {
vector[word] = (vector[word] || 0) + 1;
});
return vector;
}
// Track relationships between websets
async addRelationship(type, fromWebset, toWebset, metadata) {
if (!this.relationships[type]) {
this.relationships[type] = {};
}
if (!this.relationships[type][fromWebset]) {
this.relationships[type][fromWebset] = [];
}
this.relationships[type][fromWebset].push({
target: toWebset,
metadata,
createdAt: new Date().toISOString()
});
await this.persist();
}
}
// Initialize registry
const registry = new WebsetRegistry("${registryPath}");
await registry.initialize();
\`\`\`
## Phase 2: Iterative Research Process
Now let's implement the iterative intelligence system:
\`\`\`javascript
// Iteration controller
async function runIterativeResearch(topic, iterations) {
const research = {
topic: "${researchTopic}",
iterations: [],
knowledgeGraph: {
entities: {},
relationships: [],
concepts: {}
},
registry: registry
};
for (let i = 0; i < ${iterations}; i++) {
console.log(\`\\n🔄 Starting Iteration \${i + 1}/\${iterations}\`);
const iteration = await runIteration(research, i);
research.iterations.push(iteration);
// Update knowledge graph
await updateKnowledgeGraph(research.knowledgeGraph, iteration);
// Register all websets from this iteration
for (const webset of iteration.websets) {
await registry.registerWebset(webset.id, {
query: webset.query,
iteration: i + 1,
parentTopic: topic,
tags: generateTags(webset),
description: webset.description,
status: webset.status,
metrics: {
itemCount: webset.itemCount,
relevanceScore: webset.relevanceScore
}
});
}
}
return research;
}
// Single iteration logic
async function runIteration(research, iterationIndex) {
const iteration = {
number: iterationIndex + 1,
timestamp: new Date().toISOString(),
queries: [],
websets: [],
discoveries: [],
gaps: [],
nextQuestions: []
};
// Generate queries for this iteration
if (iterationIndex === 0) {
// First iteration: broad exploration
iteration.queries = generateInitialQueries("${researchTopic}");
} else {
// Subsequent iterations: based on gaps and discoveries
const previousIteration = research.iterations[iterationIndex - 1];
iteration.queries = generateRefinedQueries(
previousIteration.gaps,
previousIteration.nextQuestions,
research.knowledgeGraph
);
}
// Create websets for each query
for (const query of iteration.queries) {
const webset = await websets_search(query.text, query.limit || 200);
iteration.websets.push({
id: webset.websetId,
query: query.text,
rationale: query.rationale,
status: "processing",
createdAt: new Date().toISOString()
});
// Track parent-child relationships
if (query.parentWebsetId) {
await registry.addRelationship(
"iterations",
query.parentWebsetId,
webset.websetId,
{ iterationNumber: iterationIndex + 1 }
);
}
}
// Wait for websets to complete (or use webhooks)
await waitForWebsetCompletion(iteration.websets);
// Analyze results
for (const webset of iteration.websets) {
const analysis = await analyzeWebsetForInsights(webset.id);
iteration.discoveries.push(...analysis.discoveries);
iteration.gaps.push(...analysis.gaps);
// Update webset status in registry
await registry.updateWebset(webset.id, {
status: "analyzed",
discoveries: analysis.discoveries.length,
gaps: analysis.gaps.length
});
}
// Generate next questions based on discoveries and gaps
iteration.nextQuestions = generateNextQuestions(
iteration.discoveries,
iteration.gaps,
research.knowledgeGraph
);
return iteration;
}
// Query generation functions
function generateInitialQueries(topic) {
return [
{
text: \`\${topic} overview comprehensive\`,
rationale: "Broad exploration of the topic",
limit: 300
},
{
text: \`\${topic} latest developments 2024\`,
rationale: "Current state and trends",
limit: 200
},
{
text: \`\${topic} key players companies organizations\`,
rationale: "Identify main actors in the space",
limit: 200
},
{
text: \`\${topic} challenges problems issues\`,
rationale: "Understand pain points and obstacles",
limit: 150
}
];
}
function generateRefinedQueries(gaps, questions, knowledgeGraph) {
const refinedQueries = [];
// Address specific gaps
gaps.forEach(gap => {
refinedQueries.push({
text: \`\${gap.topic} \${gap.missingInfo}\`,
rationale: \`Fill knowledge gap: \${gap.description}\`,
parentWebsetId: gap.sourceWebsetId,
limit: 100
});
});
// Explore emerging questions
questions.slice(0, 3).forEach(question => {
refinedQueries.push({
text: question.query,
rationale: \`Explore: \${question.rationale}\`,
limit: 150
});
});
// Cross-reference entities
const topEntities = getTopEntities(knowledgeGraph, 3);
topEntities.forEach(entity => {
refinedQueries.push({
text: \`"\${entity.name}" \${topic} relationship connection\`,
rationale: \`Deep dive on key entity: \${entity.name}\`,
limit: 100
});
});
return refinedQueries;
}
\`\`\`
## Phase 3: Fast Retrieval Interface
Create an interface for quick webset retrieval:
\`\`\`javascript
// Retrieval assistant
class WebsetRetrieval {
constructor(registry) {
this.registry = registry;
}
// Find websets by natural language query
async findByQuery(naturalQuery) {
console.log(\`🔍 Searching for: "\${naturalQuery}"\`);
// Extract search criteria from natural language
const criteria = this.parseSearchCriteria(naturalQuery);
// Search registry
const results = await this.registry.findWebsets(criteria);
// Rank results
const ranked = this.rankResults(results, naturalQuery);
return ranked;
}
// Parse natural language into search criteria
parseSearchCriteria(query) {
const criteria = {};
// Extract tags (words after #)
const tagMatches = query.match(/#\\w+/g);
if (tagMatches) {
criteria.tags = tagMatches.map(t => t.substring(1));
}
// Extract iteration references
const iterationMatch = query.match(/iteration\\s+(\\d+)/i);
if (iterationMatch) {
criteria.iteration = parseInt(iterationMatch[1]);
}
// Extract date ranges
const dateMatch = query.match(/(?:from|since|after)\\s+(\\d{4}-\\d{2}-\\d{2})/i);
if (dateMatch) {
criteria.afterDate = dateMatch[1];
}
// Use full query for similarity search
criteria.query = query.replace(/#\\w+/g, '').trim();
return criteria;
}
// Get related websets
async getRelated(websetId, relationshipType = "all") {
const relationships = await this.registry.getRelationships(websetId);
if (relationshipType === "all") {
return relationships;
}
return relationships[relationshipType] || [];
}
// Get webset lineage (iteration history)
async getLineage(websetId) {
const lineage = {
ancestors: [],
descendants: []
};
// Find ancestors (previous iterations)
let currentId = websetId;
while (currentId) {
const parent = await this.registry.getParent(currentId);
if (parent) {
lineage.ancestors.push(parent);
currentId = parent.id;
} else {
break;
}
}
// Find descendants (subsequent iterations)
const children = await this.registry.getChildren(websetId);
lineage.descendants = children;
return lineage;
}
}
// Usage examples
const retrieval = new WebsetRetrieval(registry);
// Find by natural query
const results1 = await retrieval.findByQuery(
"AI startups #funding #series-b iteration 2"
);
// Get related websets
const related = await retrieval.getRelated("ws_abc123", "crossReferences");
// Get iteration lineage
const lineage = await retrieval.getLineage("ws_def456");
\`\`\`
## Phase 4: Batch Processing Setup
Configure batch processing for regular updates:
\`\`\`javascript
// Batch processor for scheduled runs
class IterativeIntelligenceBatch {
constructor(config) {
this.config = {
schedule: config.schedule || "0 0 * * *", // Daily at midnight
maxConcurrent: config.maxConcurrent || 3,
registryPath: config.registryPath || "./webset-registry",
topics: config.topics || []
};
this.registry = new WebsetRegistry(this.config.registryPath);
}
// Run batch process
async runBatch() {
console.log(\`🚀 Starting batch process at \${new Date().toISOString()}\`);
const batchReport = {
startTime: new Date().toISOString(),
topics: [],
newWebsets: 0,
discoveries: 0,
errors: []
};
// Process each topic
for (const topicConfig of this.config.topics) {
try {
const result = await this.processTopic(topicConfig);
batchReport.topics.push(result);
batchReport.newWebsets += result.websetsCreated;
batchReport.discoveries += result.discoveriesFound;
} catch (error) {
batchReport.errors.push({
topic: topicConfig.name,
error: error.message
});
}
}
batchReport.endTime = new Date().toISOString();
// Save batch report
await this.saveBatchReport(batchReport);
// Clean up old websets if needed
await this.cleanupOldWebsets();
return batchReport;
}
// Process a single topic
async processTopic(topicConfig) {
const { name, iterations, filters } = topicConfig;
// Check if we should run based on last run time
const lastRun = await this.registry.getLastRun(name);
if (lastRun && !this.shouldRunTopic(lastRun, topicConfig)) {
return { skipped: true, reason: "Too soon since last run" };
}
// Run iterative research
const research = await runIterativeResearch(name, iterations);
// Apply any post-processing filters
if (filters) {
await this.applyFilters(research, filters);
}
// Generate summary report
const summary = {
topic: name,
iterations: research.iterations.length,
websetsCreated: research.iterations.reduce(
(sum, iter) => sum + iter.websets.length, 0
),
discoveriesFound: research.iterations.reduce(
(sum, iter) => sum + iter.discoveries.length, 0
),
topEntities: this.extractTopEntities(research.knowledgeGraph),
nextActions: this.generateNextActions(research)
};
return summary;
}
// Cleanup old websets based on retention policy
async cleanupOldWebsets() {
const retentionDays = this.config.retentionDays || 30;
const cutoffDate = new Date();
cutoffDate.setDate(cutoffDate.getDate() - retentionDays);
const oldWebsets = await this.registry.findWebsets({
beforeDate: cutoffDate.toISOString()
});
for (const webset of oldWebsets) {
if (webset.metadata.keepForever) continue;
// Archive before deletion
await this.archiveWebset(webset);
// Remove from active registry
await this.registry.removeWebset(webset.id);
}
console.log(\`Cleaned up \${oldWebsets.length} old websets\`);
}
}
// Configure batch processing
const batchConfig = {
schedule: "0 2 * * *", // 2 AM daily
topics: [
{
name: "AI startup ecosystem",
iterations: 3,
frequency: "daily"
},
{
name: "Quantum computing breakthroughs",
iterations: 2,
frequency: "weekly"
}
],
registryPath: "${registryPath}"
};
const batchProcessor = new IterativeIntelligenceBatch(batchConfig);
\`\`\`
## Phase 5: Registry Query Interface
Simple interface for common retrieval patterns:
\`\`\`javascript
// Quick retrieval patterns
const quickFind = {
// Get today's websets
today: async () => {
const today = new Date().toISOString().split('T')[0];
return await registry.findWebsets({ afterDate: today });
},
// Get high-value websets
highValue: async () => {
const all = await registry.getAllWebsets();
return all.filter(w =>
w.metrics.itemCount > 100 &&
w.metrics.relevanceScore > 0.8
);
},
// Get by topic and iteration
byTopicIteration: async (topic, iteration) => {
return await registry.findWebsets({
tags: [topic.toLowerCase().replace(/\\s+/g, '-')],
iteration: iteration
});
},
// Get evolution chain
evolutionChain: async (startWebsetId) => {
const chain = [startWebsetId];
let current = startWebsetId;
while (true) {
const children = await registry.getChildren(current);
if (children.length === 0) break;
// Follow the main evolution path
current = children[0].id;
chain.push(current);
}
return chain;
}
};
\`\`\`
## 💡 Usage Examples
### Manual Run:
\`\`\`javascript
// Run iterative research manually
const research = await runIterativeResearch("${researchTopic}", ${iterations});
// Find specific websets later
const websets = await retrieval.findByQuery("${researchTopic} #deep-dive iteration 2");
\`\`\`
### Scheduled Batch:
\`\`\`javascript
// Set up cron job (in your scheduler)
cron.schedule('0 2 * * *', async () => {
await batchProcessor.runBatch();
});
\`\`\`
### Quick Retrieval:
\`\`\`javascript
// Find today's research
const todaysWebsets = await quickFind.today();
// Get evolution of a topic
const evolution = await quickFind.evolutionChain("ws_initial_123");
\`\`\`
## 🎯 Benefits
1. **Fast Retrieval**: Registry indexes enable instant webset discovery
2. **Relationship Tracking**: Understand how websets connect and evolve
3. **Batch Processing**: Automated research updates on schedule
4. **Knowledge Building**: Each iteration builds on previous discoveries
5. **Smart Querying**: Natural language search with multiple criteria
Ready to start building your intelligent research system!`;
}