multi-source-synthesizer.service.tsā¢24.5 kB
import {
EnhancedWebpageContent,
EnhancedSynthesisParams,
EnhancedSynthesisResult
} from '../types/enhanced-types.js';
import { ContentExtractor } from './content-extractor.service.js';
import { EnhancedContentExtractor } from './enhanced-content-extractor.service.js';
import { ResearchEnhancer } from './research-enhancer.service.js';
/**
* MultiSourceSynthesizer
*
* Provides enhanced synthesis capabilities across multiple sources:
* 1. Detection of contradictions between sources
* 2. Source credibility assessment and weighting
* 3. Relationship mapping between content across sources
* 4. Advanced comparative analysis
*/
export class MultiSourceSynthesizer {
private researchEnhancer: ResearchEnhancer;
private contentExtractor: EnhancedContentExtractor;
constructor(
researchEnhancer: ResearchEnhancer,
contentExtractor: EnhancedContentExtractor
) {
this.researchEnhancer = researchEnhancer;
this.contentExtractor = contentExtractor;
}
/**
* Synthesize content from multiple sources with enhanced capabilities
*/
public async synthesize(params: EnhancedSynthesisParams): Promise<EnhancedSynthesisResult> {
const {
sources,
focus,
structure,
compareBy,
detectContradictions,
includeSourceCredibility,
visualizeRelationships
} = params;
// First, validate the sources
if (!sources || sources.length === 0) {
throw new Error('No sources provided for synthesis');
}
// Create basic document structure
let documentContent = `# Multi-Source Synthesis${focus ? `: ${focus}` : ''}\n\n`;
// Add introduction
documentContent += `## Introduction\n\n`;
documentContent += `This synthesis combines information from ${sources.length} sources`;
if (focus) {
documentContent += ` with a focus on "${focus}"`;
}
documentContent += `.\n\n`;
// Add methodology section if credibility assessment is included
if (includeSourceCredibility) {
documentContent += this.generateCredibilitySection(sources);
}
// Generate main content based on structure
switch (structure.toLowerCase()) {
case 'chronological':
documentContent += this.organizeChronologically(sources, focus);
break;
case 'compare_contrast':
documentContent += this.organizeCompareContrast(sources, focus, compareBy);
break;
case 'question_answer':
documentContent += this.organizeQuestionAnswer(sources, focus);
break;
case 'thematic':
default:
documentContent += this.organizeThematically(sources, focus);
break;
}
// If requested, analyze for contradictions
const contradictions = detectContradictions ?
this.detectContradictions(sources, focus) :
[];
// If contradictions were found, add a section
if (contradictions.length > 0) {
documentContent += `\n## Contradictions and Disagreements\n\n`;
contradictions.forEach(contradiction => {
documentContent += `### ${contradiction.topic}\n\n`;
documentContent += `The following sources present conflicting information on this topic:\n\n`;
contradiction.statements.forEach((statement, index) => {
const source = contradiction.sources[index] || 'Unknown source';
documentContent += `- **${source}**: ${statement}\n`;
});
documentContent += `\n`;
});
}
// Detect relationships between sources if requested
const relationships = visualizeRelationships ?
this.mapInformationRelationships(sources, focus) :
[];
// If relationships were found, add a section
if (relationships.length > 0) {
documentContent += `\n## Information Relationships\n\n`;
relationships.forEach(relationship => {
documentContent += `### ${relationship.type}\n\n`;
documentContent += `${relationship.description}\n\n`;
documentContent += `Sources: ${relationship.sources.join(', ')}\n\n`;
});
}
// Add sources section
documentContent += `\n## Sources\n\n`;
sources.forEach((source, index) => {
const credibilityInfo = source.sourceCredibility ?
` (Credibility: ${Math.round(source.sourceCredibility.score * 100)}%)` :
'';
documentContent += `${index + 1}. [${source.title}](${source.url})${credibilityInfo}\n`;
});
// Prepare the source assessment if requested
const sourcesAssessment = includeSourceCredibility ?
sources.map(source => ({
url: source.url,
credibilityScore: source.sourceCredibility?.score || 0.5,
bias: this.detectBias(source)
})) :
[];
// Create the final result
const result: EnhancedSynthesisResult = {
document: documentContent,
contradictions,
relationships,
sourcesAssessment
};
return result;
}
/**
* Generate a section on source credibility
*/
private generateCredibilitySection(sources: EnhancedWebpageContent[]): string {
let section = `## Source Credibility Assessment\n\n`;
section += `This synthesis considers the credibility of each source when weighing information. `;
section += `Credibility is assessed based on factors such as domain reputation, author information, `;
section += `citations, and content quality.\n\n`;
section += `| Source | Credibility | Key Factors |\n`;
section += `| ------ | ----------- | ----------- |\n`;
sources.forEach(source => {
const credibility = source.sourceCredibility || { score: 0.5, factors: ['Unknown'] };
const credPercent = Math.round(credibility.score * 100);
const factors = credibility.factors.join(', ');
section += `| [${source.title}](${source.url}) | ${credPercent}% | ${factors} |\n`;
});
section += `\n`;
return section;
}
/**
* Organize content thematically
*/
private organizeThematically(sources: EnhancedWebpageContent[], focus?: string): string {
let content = `## Thematic Analysis\n\n`;
if (focus) {
content += `The following themes emerged in relation to "${focus}":\n\n`;
} else {
content += `The following key themes emerged from the analyzed sources:\n\n`;
}
// Identify common themes across sources
const themes = this.identifyCommonThemes(sources, focus);
// Generate content for each theme
themes.forEach(theme => {
content += `### ${theme.name}\n\n`;
content += `${theme.description}\n\n`;
// Extract actual content snippets for this theme
const themeContent = this.extractThemeContent(sources, theme.name, theme.sourcesIndices);
if (themeContent.length > 0) {
content += `**Key Points:**\n\n`;
themeContent.forEach(point => {
content += `- ${point}\n`;
});
content += `\n`;
}
// List sources that contributed to this theme
if (theme.sourcesIndices.length > 0) {
content += `**Sources:**\n\n`;
theme.sourcesIndices.forEach(index => {
if (sources[index]) {
content += `- [${sources[index].title}](${sources[index].url})\n`;
}
});
content += `\n`;
}
});
return content;
}
/**
* Organize content chronologically
*/
private organizeChronologically(sources: EnhancedWebpageContent[], focus?: string): string {
let content = `## Chronological Analysis\n\n`;
if (focus) {
content += `The chronological development of "${focus}" is outlined below:\n\n`;
} else {
content += `The chronological development of this topic is outlined below:\n\n`;
}
// Define time periods (this would ideally be extracted from content)
const periods = [
{
name: "Early Development",
description: "The initial stages of development and early concepts.",
sourcesIndices: [] as number[]
},
{
name: "Foundational Period",
description: "Key foundational developments that shaped the field.",
sourcesIndices: [] as number[]
},
{
name: "Recent Developments",
description: "Current state and recent advancements.",
sourcesIndices: [] as number[]
},
{
name: "Future Directions",
description: "Anticipated future developments and trends.",
sourcesIndices: [] as number[]
}
];
// In a real implementation, we would analyze content to determine the
// chronology and assign sources to appropriate time periods
// For now, distribute sources across periods as a placeholder
sources.forEach((source, index) => {
const periodIndex = index % periods.length;
periods[periodIndex].sourcesIndices.push(index);
});
// Generate content for each period
periods.forEach(period => {
content += `### ${period.name}\n\n`;
content += `${period.description}\n\n`;
// List sources for this period
if (period.sourcesIndices.length > 0) {
content += `**Sources:**\n\n`;
period.sourcesIndices.forEach(index => {
if (sources[index]) {
content += `- [${sources[index].title}](${sources[index].url})\n`;
}
});
content += `\n`;
}
});
return content;
}
/**
* Organize content as compare/contrast
*/
private organizeCompareContrast(
sources: EnhancedWebpageContent[],
focus?: string,
compareBy?: string[]
): string {
let content = `## Compare and Contrast Analysis\n\n`;
if (focus) {
content += `This analysis compares and contrasts different perspectives on "${focus}":\n\n`;
} else {
content += `This analysis compares and contrasts different perspectives found in the sources:\n\n`;
}
// Determine aspects to compare (use provided aspects or generate defaults)
const aspects = compareBy && compareBy.length > 0 ?
compareBy :
['Definitions', 'Methodologies', 'Results', 'Implications'];
// First, identify areas of consensus and disagreement
content += `### Areas of Consensus\n\n`;
content += `The following points represent areas where sources generally agree:\n\n`;
// Placeholder for consensus items
content += `- Common definition and understanding of core concepts\n`;
content += `- Agreement on fundamental principles\n`;
content += `- Consistent recognition of key challenges\n\n`;
content += `### Areas of Disagreement\n\n`;
content += `The following points represent areas where sources present different or conflicting views:\n\n`;
// Placeholder for disagreement items
content += `- Varying perspectives on optimal approaches\n`;
content += `- Different interpretations of results and implications\n`;
content += `- Contrasting predictions about future developments\n\n`;
// Detailed comparison by aspect
content += `### Detailed Comparison by Aspect\n\n`;
aspects.forEach(aspect => {
content += `#### ${aspect}\n\n`;
content += `Comparison of ${aspect} across sources:\n\n`;
// Create a comparison table
content += `| Source | Perspective |\n`;
content += `| ------ | ----------- |\n`;
sources.forEach(source => {
// In a real implementation, we would extract relevant content about this aspect
content += `| [${source.title}](${source.url}) | Perspective on ${aspect} |\n`;
});
content += `\n`;
});
return content;
}
/**
* Organize content in question/answer format
*/
private organizeQuestionAnswer(sources: EnhancedWebpageContent[], focus?: string): string {
let content = `## Question and Answer Analysis\n\n`;
if (focus) {
content += `The following questions and answers address key aspects of "${focus}":\n\n`;
} else {
content += `The following questions and answers address key aspects from the analyzed sources:\n\n`;
}
// Generate questions (ideally these would be derived from content analysis)
const questions = [
"What is the definition of this topic?",
"What is the historical background?",
"What are the main approaches or methodologies?",
"What are the key findings or insights?",
"What are the practical applications?",
"What are the limitations or challenges?",
"What future developments are anticipated?"
];
// Generate answers for each question
questions.forEach(question => {
content += `### ${question}\n\n`;
// In a real implementation, we would analyze sources to generate a comprehensive answer
content += `Based on the analyzed sources, the following synthesis addresses this question:\n\n`;
// Placeholder for synthesized answer
content += `This would contain a synthesized answer based on information extracted from multiple sources.\n\n`;
// List sources that contributed to this answer
content += `**Sources that address this question:**\n\n`;
// For demonstration, include all sources
sources.forEach(source => {
content += `- [${source.title}](${source.url})\n`;
});
content += `\n`;
});
return content;
}
/**
* Detect contradictions between sources
*/
private detectContradictions(
sources: EnhancedWebpageContent[],
focus?: string
): Array<{ topic: string; sources: string[]; statements: string[] }> {
const contradictions: Array<{ topic: string; sources: string[]; statements: string[] }> = [];
// Look for contradictory statements about specific topics
const topicsToCheck = focus ? [focus] : this.extractKeyTopics(sources);
topicsToCheck.forEach(topic => {
const relevantStatements = this.extractStatementsAboutTopic(sources, topic);
if (relevantStatements.length >= 2) {
// Simple contradiction detection based on opposing keywords
const contradictoryPairs = this.findContradictoryStatements(relevantStatements);
contradictoryPairs.forEach(pair => {
contradictions.push({
topic: topic,
sources: [pair.source1, pair.source2],
statements: [pair.statement1, pair.statement2]
});
});
}
});
return contradictions;
}
/**
* Map relationships between information across sources
*/
private mapInformationRelationships(
sources: EnhancedWebpageContent[],
focus?: string
): Array<{ type: string; sources: string[]; description: string }> {
// This is a placeholder implementation
// In a real implementation, we would analyze content to identify relationships
const relationships: Array<{ type: string; sources: string[]; description: string }> = [];
// For demonstration purposes, add sample relationships
if (sources.length >= 2) {
relationships.push({
type: 'Complementary Information',
sources: [sources[0].title, sources[1].title],
description: 'These sources provide complementary information that enhances understanding when combined.'
});
}
if (sources.length >= 3) {
relationships.push({
type: 'Progression of Ideas',
sources: sources.slice(0, 3).map(s => s.title),
description: 'These sources show a progression of ideas from fundamental concepts to advanced applications.'
});
}
return relationships;
}
/**
* Identify common themes across sources
*/
private identifyCommonThemes(
sources: EnhancedWebpageContent[],
focus?: string
): Array<{ name: string; description: string; sourcesIndices: number[] }> {
// This is a placeholder implementation
// In a real implementation, we would use NLP to identify common themes
// Example themes
const themes = [
{
name: "Definitions and Core Concepts",
description: "This theme covers the fundamental definitions and core concepts of the topic.",
sourcesIndices: [] as number[]
},
{
name: "Methodological Approaches",
description: "This theme examines various methodological approaches and techniques.",
sourcesIndices: [] as number[]
},
{
name: "Practical Applications",
description: "This theme explores practical applications and real-world uses.",
sourcesIndices: [] as number[]
},
{
name: "Challenges and Limitations",
description: "This theme addresses challenges, limitations, and potential drawbacks.",
sourcesIndices: [] as number[]
},
{
name: "Future Directions",
description: "This theme looks at emerging trends and future directions.",
sourcesIndices: [] as number[]
}
];
// Distribute sources across themes as a placeholder
sources.forEach((source, index) => {
// Add each source to 1-3 themes based on index
themes[index % themes.length].sourcesIndices.push(index);
themes[(index + 2) % themes.length].sourcesIndices.push(index);
if (index % 2 === 0) {
themes[(index + 4) % themes.length].sourcesIndices.push(index);
}
});
return themes;
}
/**
* Detect potential bias in a source
*/
private detectBias(source: EnhancedWebpageContent): string | undefined {
// This is a placeholder implementation
// In a real implementation, we would analyze content for bias indicators
if (!source.content) {
return undefined;
}
// Check for obvious commercial bias
if (source.url.includes('buy') ||
source.url.includes('shop') ||
source.url.includes('product')) {
return 'Commercial';
}
// Check domain for potential political bias
const domain = new URL(source.url).hostname;
if (domain.includes('political') ||
domain.includes('party') ||
domain.includes('news')) {
return 'Potential political';
}
return undefined;
}
/**
* Extract content snippets for a specific theme
*/
private extractThemeContent(sources: EnhancedWebpageContent[], themeName: string, sourceIndices: number[]): string[] {
const content: string[] = [];
const themeKeywords = this.getThemeKeywords(themeName);
sourceIndices.forEach(index => {
const source = sources[index];
if (!source || !source.content) return;
const sentences = source.content.split(/[.!?]+/).map(s => s.trim());
sentences.forEach(sentence => {
if (sentence.length > 30 && sentence.length < 200) {
const sentenceLower = sentence.toLowerCase();
if (themeKeywords.some(keyword => sentenceLower.includes(keyword))) {
content.push(sentence);
}
}
});
});
return [...new Set(content)].slice(0, 5);
}
/**
* Get keywords associated with a theme
*/
private getThemeKeywords(themeName: string): string[] {
const themeKeywords: { [key: string]: string[] } = {
'definitions and core concepts': ['definition', 'define', 'concept', 'meaning', 'refers to'],
'methodological approaches': ['method', 'approach', 'technique', 'procedure', 'methodology'],
'practical applications': ['application', 'use', 'apply', 'implement', 'utilize'],
'challenges and limitations': ['challenge', 'limitation', 'problem', 'difficulty', 'constraint'],
'future directions': ['future', 'trend', 'emerging', 'development', 'innovation'],
'historical context': ['history', 'historical', 'development', 'evolution', 'background']
};
return themeKeywords[themeName.toLowerCase()] || [];
}
/**
* Extract key topics from sources
*/
private extractKeyTopics(sources: EnhancedWebpageContent[]): string[] {
const topics = new Set<string>();
sources.forEach(source => {
if (!source.content) return;
// Extract topics from titles
if (source.title) {
const titleWords = source.title.toLowerCase().split(/\s+/)
.filter(word => word.length > 3)
.slice(0, 3);
titleWords.forEach(word => topics.add(word));
}
// Extract frequent nouns from content
const words = source.content.toLowerCase().match(/\b[a-z]{4,}\b/g) || [];
const wordCount: { [key: string]: number } = {};
words.forEach(word => {
wordCount[word] = (wordCount[word] || 0) + 1;
});
// Get the most frequent words as potential topics
const frequentWords = Object.entries(wordCount)
.sort(([,a], [,b]) => b - a)
.slice(0, 5)
.map(([word]) => word);
frequentWords.forEach(word => topics.add(word));
});
return Array.from(topics).slice(0, 10);
}
/**
* Extract statements about a specific topic
*/
private extractStatementsAboutTopic(sources: EnhancedWebpageContent[], topic: string): Array<{
source: string;
statement: string;
}> {
const statements: Array<{ source: string; statement: string }> = [];
const topicLower = topic.toLowerCase();
sources.forEach(source => {
if (!source.content) return;
const sentences = source.content.split(/[.!?]+/).map(s => s.trim());
sentences.forEach(sentence => {
if (sentence.length > 20 && sentence.length < 300) {
const sentenceLower = sentence.toLowerCase();
if (sentenceLower.includes(topicLower)) {
statements.push({
source: source.title,
statement: sentence
});
}
}
});
});
return statements;
}
/**
* Find contradictory statements
*/
private findContradictoryStatements(statements: Array<{ source: string; statement: string }>): Array<{
source1: string;
source2: string;
statement1: string;
statement2: string;
}> {
const contradictions: Array<{
source1: string;
source2: string;
statement1: string;
statement2: string;
}> = [];
// Define opposing word pairs that might indicate contradictions
const opposingPairs = [
['good', 'bad'], ['effective', 'ineffective'], ['successful', 'failed'],
['increase', 'decrease'], ['positive', 'negative'], ['better', 'worse'],
['advantage', 'disadvantage'], ['benefit', 'harm'], ['improve', 'worsen'],
['high', 'low'], ['fast', 'slow'], ['easy', 'difficult']
];
for (let i = 0; i < statements.length; i++) {
for (let j = i + 1; j < statements.length; j++) {
const statement1Lower = statements[i].statement.toLowerCase();
const statement2Lower = statements[j].statement.toLowerCase();
// Check if statements contain opposing terms
const hasContradiction = opposingPairs.some(([word1, word2]) =>
(statement1Lower.includes(word1) && statement2Lower.includes(word2)) ||
(statement1Lower.includes(word2) && statement2Lower.includes(word1))
);
if (hasContradiction) {
contradictions.push({
source1: statements[i].source,
source2: statements[j].source,
statement1: statements[i].statement,
statement2: statements[j].statement
});
}
}
}
return contradictions.slice(0, 3); // Limit to top 3 contradictions
}
}