/**
* Reasoning-Enhanced RAG System
*
* This module enhances the existing RAG system by adding reasoning capabilities
* using Xenova transformers during the document retrieval process.
* It can perform summarization, question-answering, and analysis on retrieved chunks.
*/
import { Document } from 'langchain/document';
import { queryVectorStore } from './simple-pdf-indexer.js';
import log from '../../logger.js';
/**
* Reasoning task types supported by the system
*/
export type ReasoningTask =
| 'summarization'
| 'question-answering'
| 'analysis'
| 'classification';
/**
* Configuration for reasoning models
*/
interface ReasoningConfig {
task: ReasoningTask;
modelName: string;
maxLength?: number;
minLength?: number;
}
/**
* Result from reasoning process
*/
interface ReasoningResult {
originalChunk: string;
reasoningOutput: string;
confidence?: number;
metadata: Record<string, any>;
}
/**
* Enhanced RAG response with reasoning
*/
export interface EnhancedRAGResponse {
query: string;
retrievedChunks: Document[];
reasoningResults: ReasoningResult[];
summary: string;
answer: string;
sources: string[];
}
/**
* Reasoning-enhanced RAG processor
*/
export class ReasoningRAG {
private transformers: any = null;
private models: Map<string, any> = new Map();
private isInitialized: boolean = false;
constructor() {
this.initializeTransformers();
}
/**
* Initialize the transformers library dynamically
*/
private async initializeTransformers(): Promise<void> {
if (this.transformers) {
return;
}
try {
// Use eval to avoid CommonJS/ESM conflict during compilation
const importTransformers = new Function(
'return import("@xenova/transformers")'
);
this.transformers = await importTransformers();
this.isInitialized = true;
log.info('Xenova transformers initialized for reasoning');
} catch (error) {
log.error('Error importing @xenova/transformers:', error);
throw new Error(
`Failed to import @xenova/transformers: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Get or create a model pipeline for a specific task
*/
private async getModel(config: ReasoningConfig): Promise<any> {
const modelKey = `${config.task}-${config.modelName}`;
if (this.models.has(modelKey)) {
return this.models.get(modelKey);
}
await this.initializeTransformers();
log.info(`Loading model for ${config.task}: ${config.modelName}`);
try {
const model = await this.transformers.pipeline(
config.task,
config.modelName
);
this.models.set(modelKey, model);
log.info(`Successfully loaded model: ${config.modelName}`);
return model;
} catch (error) {
log.error(`Error loading model ${config.modelName}:`, error);
throw new Error(
`Failed to load model: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Perform reasoning on a text chunk
*/
private async performReasoning(
text: string,
config: ReasoningConfig,
query?: string
): Promise<ReasoningResult> {
const model = await this.getModel(config);
try {
let reasoningOutput: string;
let confidence: number | undefined;
switch (config.task) {
case 'summarization':
const summaryResult = await model(text, {
max_length: config.maxLength || 150,
min_length: config.minLength || 30,
do_sample: false,
});
reasoningOutput = summaryResult[0].summary_text;
confidence = summaryResult[0].score;
break;
case 'question-answering':
if (!query) {
throw new Error('Query is required for question-answering task');
}
const qaResult = await model({
question: query,
context: text,
});
reasoningOutput = qaResult.answer;
confidence = qaResult.score;
break;
case 'analysis':
// For analysis, we can use a text generation model
const analysisResult = await model(
`Analyze the following text and provide key insights:\n\n${text}\n\nAnalysis:`,
{
max_length: config.maxLength || 200,
temperature: 0.7,
do_sample: true,
}
);
reasoningOutput =
analysisResult[0].generated_text.split('Analysis:')[1]?.trim() ||
'No analysis generated';
break;
case 'classification':
// For classification, we can use a classification model
const classResult = await model(text);
reasoningOutput = `Classification: ${classResult[0].label} (confidence: ${classResult[0].score.toFixed(3)})`;
confidence = classResult[0].score;
break;
default:
throw new Error(`Unsupported reasoning task: ${config.task}`);
}
return {
originalChunk: text,
reasoningOutput,
confidence,
metadata: {
task: config.task,
model: config.modelName,
timestamp: new Date().toISOString(),
},
};
} catch (error) {
log.error(`Error performing reasoning with ${config.task}:`, error);
return {
originalChunk: text,
reasoningOutput: `Error during reasoning: ${error instanceof Error ? error.message : String(error)}`,
metadata: {
task: config.task,
model: config.modelName,
error: true,
timestamp: new Date().toISOString(),
},
};
}
}
/**
* Process multiple chunks with reasoning in parallel
*/
private async processChunksWithReasoning(
chunks: Document[],
configs: ReasoningConfig[],
query?: string
): Promise<ReasoningResult[]> {
const results: ReasoningResult[] = [];
// Process chunks in batches to avoid overwhelming the system
const batchSize = 5;
for (let i = 0; i < chunks.length; i += batchSize) {
const batch = chunks.slice(i, i + batchSize);
const batchPromises = batch.flatMap(chunk =>
configs.map(config =>
this.performReasoning(chunk.pageContent, config, query)
)
);
const batchResults = await Promise.all(batchPromises);
results.push(...batchResults);
// Log progress for large batches
if (chunks.length > batchSize) {
log.info(
`Processed reasoning for ${Math.min(i + batchSize, chunks.length)}/${chunks.length} chunks`
);
}
}
return results;
}
/**
* Generate a comprehensive summary from reasoning results
*/
private async generateComprehensiveSummary(
reasoningResults: ReasoningResult[],
query: string
): Promise<string> {
// Extract all reasoning outputs
const summaries = reasoningResults
.filter(result => result.metadata.task === 'summarization')
.map(result => result.reasoningOutput);
const analyses = reasoningResults
.filter(result => result.metadata.task === 'analysis')
.map(result => result.reasoningOutput);
const qaResults = reasoningResults
.filter(result => result.metadata.task === 'question-answering')
.map(result => result.reasoningOutput);
// Combine all insights
let comprehensiveSummary = `## Query: ${query}\n\n`;
if (summaries.length > 0) {
comprehensiveSummary += `### Key Summaries:\n${summaries.map((s, i) => `${i + 1}. ${s}`).join('\n')}\n\n`;
}
if (analyses.length > 0) {
comprehensiveSummary += `### Analysis Insights:\n${analyses.map((a, i) => `${i + 1}. ${a}`).join('\n')}\n\n`;
}
if (qaResults.length > 0) {
comprehensiveSummary += `### Direct Answers:\n${qaResults.map((qa, i) => `${i + 1}. ${qa}`).join('\n')}\n\n`;
}
return comprehensiveSummary;
}
/**
* Enhanced RAG query with reasoning capabilities
*/
async queryWithReasoning(
query: string,
options: {
topK?: number;
reasoningTasks?: ReasoningTask[];
customConfigs?: ReasoningConfig[];
} = {}
): Promise<EnhancedRAGResponse> {
const {
topK = 50,
reasoningTasks = ['summarization', 'question-answering'],
customConfigs,
} = options;
try {
log.info(`Starting reasoning-enhanced RAG query: "${query}"`);
// Step 1: Retrieve relevant chunks using existing RAG
log.info(`Retrieving top ${topK} relevant chunks...`);
const retrievedChunks = await queryVectorStore(query, topK);
if (!retrievedChunks || retrievedChunks.length === 0) {
return {
query,
retrievedChunks: [],
reasoningResults: [],
summary: 'No relevant information found in the documentation.',
answer: 'No relevant information found to answer your query.',
sources: [],
};
}
log.info(`Retrieved ${retrievedChunks.length} chunks for reasoning`);
// Step 2: Configure reasoning models
const configs: ReasoningConfig[] = customConfigs || [
// Summarization using T5
{
task: 'summarization',
modelName: 'Xenova/t5-small',
maxLength: 150,
minLength: 30,
},
// Question answering using DistilBERT
{
task: 'question-answering',
modelName: 'Xenova/distilbert-base-cased-distilled-squad',
},
];
// Filter configs based on requested tasks
const filteredConfigs = configs.filter(config =>
reasoningTasks.includes(config.task)
);
// Step 3: Perform reasoning on retrieved chunks
log.info(
`Performing reasoning with ${filteredConfigs.length} different models...`
);
const reasoningResults = await this.processChunksWithReasoning(
retrievedChunks,
filteredConfigs,
query
);
// Step 4: Generate comprehensive summary
log.info('Generating comprehensive summary...');
const summary = await this.generateComprehensiveSummary(
reasoningResults,
query
);
// Step 5: Extract best answer from reasoning results
const qaResults = reasoningResults.filter(
result =>
result.metadata.task === 'question-answering' &&
!result.metadata.error
);
const bestAnswer =
qaResults.length > 0
? qaResults.sort(
(a, b) => (b.confidence || 0) - (a.confidence || 0)
)[0].reasoningOutput
: summary;
// Step 6: Extract sources
const sources = retrievedChunks
.map(
(doc: any) =>
doc.metadata?.relativePath ||
doc.metadata?.filename ||
doc.metadata?.source
)
.filter(
(source: any, index: number, arr: any[]) =>
source && arr.indexOf(source) === index
);
log.info(
`Reasoning-enhanced RAG completed. Generated ${reasoningResults.length} reasoning results from ${sources.length} sources`
);
return {
query,
retrievedChunks,
reasoningResults,
summary,
answer: bestAnswer,
sources,
};
} catch (error) {
log.error('Error in reasoning-enhanced RAG:', error);
throw new Error(
`Reasoning-enhanced RAG failed: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Get available reasoning models and their capabilities
*/
getAvailableModels(): Record<ReasoningTask, string[]> {
return {
summarization: [
'Xenova/t5-small',
'Xenova/t5-base',
'Xenova/bart-large-cnn',
],
'question-answering': [
'Xenova/distilbert-base-cased-distilled-squad',
'Xenova/roberta-base-squad2',
],
analysis: ['Xenova/gpt2', 'Xenova/distilgpt2'],
classification: [
'Xenova/distilbert-base-uncased-finetuned-sst-2-english',
'Xenova/bert-base-uncased',
],
};
}
/**
* Clean up resources
*/
async cleanup(): Promise<void> {
this.models.clear();
log.info('Reasoning RAG resources cleaned up');
}
}
// Export a singleton instance
export const reasoningRAG = new ReasoningRAG();