import { z } from 'zod';
import { createLogger } from '../utils/logger.js';
import { Projection } from '../schemas/projection.js';
import { UseCase } from '../schemas/use-case.js';
// ML model input features
export const ProjectFeaturesSchema = z.object({
// Financial features
totalInvestment: z.number(),
expectedROI: z.number(),
paybackPeriodMonths: z.number(),
netPresentValue: z.number(),
// Project characteristics
useCaseCount: z.number(),
timelineMonths: z.number(),
implementationComplexity: z.number().min(0).max(10),
// Industry factors
industry: z.string(),
companySize: z.enum(['small', 'medium', 'large', 'enterprise']),
// Risk factors
technicalRisk: z.number().min(0).max(1),
organizationalRisk: z.number().min(0).max(1),
marketRisk: z.number().min(0).max(1),
// Historical data (if available)
similarProjectSuccessRate: z.number().min(0).max(1).optional(),
industryAverageROI: z.number().optional()
});
export type ProjectFeatures = z.infer<typeof ProjectFeaturesSchema>;
// ML prediction output
export const MLPredictionSchema = z.object({
successProbability: z.number().min(0).max(1),
confidenceInterval: z.object({
lower: z.number(),
upper: z.number()
}),
riskScore: z.number().min(0).max(10),
predictedActualROI: z.number(),
predictedDelayMonths: z.number().min(0),
keyRiskFactors: z.array(z.object({
factor: z.string(),
impact: z.enum(['low', 'medium', 'high']),
mitigation: z.string()
})),
synergies: z.array(z.object({
withProject: z.string(),
type: z.string(),
estimatedValue: z.number()
})).optional()
});
export type MLPrediction = z.infer<typeof MLPredictionSchema>;
// Comparison result with ML insights
export const MLComparisonResultSchema = z.object({
projectId: z.string(),
baseMetrics: z.object({
roi: z.number(),
paybackPeriod: z.number(),
npv: z.number(),
totalInvestment: z.number()
}),
mlPredictions: MLPredictionSchema,
ranking: z.object({
overall: z.number(),
byROI: z.number(),
byRisk: z.number(),
bySpeed: z.number()
}),
recommendation: z.enum(['strongly_recommend', 'recommend', 'consider', 'reconsider', 'not_recommended']),
insights: z.array(z.string())
});
export type MLComparisonResult = z.infer<typeof MLComparisonResultSchema>;
/**
* Machine Learning engine for advanced project comparison
* Uses ensemble methods for robust predictions
*/
export class MLComparisonEngine {
private logger = createLogger({ service: 'MLComparisonEngine' });
// Simplified Random Forest implementation
private forests: Array<DecisionTree> = [];
private readonly treeCount = 100;
private readonly maxDepth = 10;
constructor() {
// Initialize with pre-trained weights (in production, load from file)
this.initializeForests();
}
/**
* Extract features from project data for ML processing
*/
extractFeatures(
projection: Projection,
useCases: UseCase[],
industry: string,
companySize?: string
): ProjectFeatures {
// Calculate implementation complexity based on use cases
const complexityScore = this.calculateComplexityScore(useCases);
// Assess risk factors
const risks = this.assessRiskFactors(projection, useCases, industry);
return {
// Financial features
totalInvestment: projection.calculations.total_investment,
expectedROI: projection.calculations.five_year_roi,
paybackPeriodMonths: projection.calculations.payback_period_months || 0,
netPresentValue: projection.calculations.net_present_value,
// Project characteristics
useCaseCount: useCases.length,
timelineMonths: projection.timeline_months,
implementationComplexity: complexityScore,
// Industry factors
industry,
companySize: (companySize as any) || 'medium',
// Risk factors
technicalRisk: risks.technical,
organizationalRisk: risks.organizational,
marketRisk: risks.market,
// Historical data (would come from database in production)
similarProjectSuccessRate: this.getHistoricalSuccessRate(industry, complexityScore),
industryAverageROI: this.getIndustryAverageROI(industry)
};
}
/**
* Generate ML predictions for a project
*/
async predict(features: ProjectFeatures): Promise<MLPrediction> {
this.logger.debug('Generating ML predictions', { industry: features.industry });
// Ensemble prediction using Random Forest
const predictions = this.forests.map(tree => tree.predict(features));
// Aggregate predictions
const successProbabilities = predictions.map(p => p.success);
const avgSuccess = this.mean(successProbabilities);
const stdSuccess = this.standardDeviation(successProbabilities);
// Calculate risk score
const riskScore = this.calculateRiskScore(features);
// Predict actual ROI with variance
const roiAdjustment = this.predictROIAdjustment(features);
const predictedActualROI = features.expectedROI * roiAdjustment;
// Predict delays
const predictedDelayMonths = this.predictDelays(features);
// Identify key risk factors
const keyRiskFactors = this.identifyKeyRisks(features);
return {
successProbability: avgSuccess,
confidenceInterval: {
lower: Math.max(0, avgSuccess - 2 * stdSuccess),
upper: Math.min(1, avgSuccess + 2 * stdSuccess)
},
riskScore,
predictedActualROI,
predictedDelayMonths,
keyRiskFactors
};
}
/**
* Compare multiple projects with ML insights
*/
async compareProjects(
projects: Array<{
id: string;
projection: Projection;
useCases: UseCase[];
industry: string;
companySize?: string;
}>
): Promise<MLComparisonResult[]> {
this.logger.info('Comparing projects with ML', { count: projects.length });
// Extract features and generate predictions for all projects
const projectPredictions = await Promise.all(
projects.map(async project => {
const features = this.extractFeatures(
project.projection,
project.useCases,
project.industry,
project.companySize
);
const predictions = await this.predict(features);
// Detect synergies between projects
const synergies = this.detectSynergies(project, projects);
return {
project,
features,
predictions: { ...predictions, synergies }
};
})
);
// Rank projects by different criteria
const rankings = this.rankProjects(projectPredictions);
// Generate comparison results
return projectPredictions.map((pp, index) => {
const recommendation = this.generateRecommendation(
pp.predictions,
rankings.get(pp.project.id)!
);
const insights = this.generateInsights(pp);
return {
projectId: pp.project.id,
baseMetrics: {
roi: pp.project.projection.calculations.five_year_roi,
paybackPeriod: pp.project.projection.calculations.payback_period_months || 0,
npv: pp.project.projection.calculations.net_present_value,
totalInvestment: pp.project.projection.calculations.total_investment
},
mlPredictions: pp.predictions,
ranking: rankings.get(pp.project.id)!,
recommendation,
insights
};
});
}
/**
* Calculate complexity score based on use cases
*/
private calculateComplexityScore(useCases: UseCase[]): number {
let score = 0;
for (const useCase of useCases) {
// Base complexity from implementation details
const implComplexity = useCase.implementation?.complexity_score || 5;
score += implComplexity;
// Additional factors
if (useCase.implementation?.dependencies?.length) {
score += useCase.implementation.dependencies.length * 0.5;
}
if (useCase.implementation?.risk_factors?.length) {
score += useCase.implementation.risk_factors.length * 0.3;
}
}
// Normalize to 0-10 scale
return Math.min(10, score / useCases.length);
}
/**
* Assess risk factors
*/
private assessRiskFactors(
projection: Projection,
useCases: UseCase[],
industry: string
): { technical: number; organizational: number; market: number } {
// Technical risk based on complexity and dependencies
const avgComplexity = this.mean(
useCases.map(uc => uc.implementation?.complexity_score || 5)
);
const technicalRisk = avgComplexity / 10;
// Organizational risk based on timeline and scale
const organizationalRisk = Math.min(1, projection.timeline_months / 24);
// Market risk based on industry
const marketRiskMap: Record<string, number> = {
'technology': 0.3,
'financial_services': 0.5,
'healthcare': 0.6,
'retail': 0.4,
'manufacturing': 0.3,
'education': 0.2,
'government': 0.4,
'other': 0.5
};
const marketRisk = marketRiskMap[industry] || 0.5;
return { technical: technicalRisk, organizational: organizationalRisk, market: marketRisk };
}
/**
* Calculate overall risk score
*/
private calculateRiskScore(features: ProjectFeatures): number {
const weights = {
technical: 0.4,
organizational: 0.3,
market: 0.3
};
const weightedRisk =
features.technicalRisk * weights.technical +
features.organizationalRisk * weights.organizational +
features.marketRisk * weights.market;
// Scale to 0-10
return weightedRisk * 10;
}
/**
* Predict ROI adjustment factor based on ML model
*/
private predictROIAdjustment(features: ProjectFeatures): number {
// Simplified model: adjust based on risk and complexity
const riskFactor = 1 - (features.technicalRisk * 0.2 + features.organizationalRisk * 0.1);
const complexityFactor = 1 - (features.implementationComplexity / 10) * 0.15;
const industryFactor = features.similarProjectSuccessRate || 0.75;
return riskFactor * complexityFactor * industryFactor;
}
/**
* Predict project delays
*/
private predictDelays(features: ProjectFeatures): number {
// Base delay on complexity and timeline
const complexityDelay = features.implementationComplexity * 0.5;
const scaleDelay = Math.log10(features.totalInvestment / 100000) * 2;
return Math.max(0, Math.round(complexityDelay + scaleDelay));
}
/**
* Identify key risk factors
*/
private identifyKeyRisks(features: ProjectFeatures): Array<{
factor: string;
impact: 'low' | 'medium' | 'high';
mitigation: string;
}> {
const risks = [];
if (features.technicalRisk > 0.7) {
risks.push({
factor: 'High Technical Complexity',
impact: 'high' as const,
mitigation: 'Consider phased implementation and additional technical expertise'
});
}
if (features.paybackPeriodMonths > 24) {
risks.push({
factor: 'Extended Payback Period',
impact: 'medium' as const,
mitigation: 'Focus on quick wins and interim value delivery'
});
}
if (features.useCaseCount > 5) {
risks.push({
factor: 'Multiple Use Cases',
impact: 'medium' as const,
mitigation: 'Prioritize use cases and implement in waves'
});
}
if (features.organizationalRisk > 0.6) {
risks.push({
factor: 'Organizational Change Management',
impact: 'high' as const,
mitigation: 'Invest in change management and stakeholder alignment'
});
}
return risks;
}
/**
* Detect synergies between projects
*/
private detectSynergies(
project: any,
allProjects: any[]
): Array<{ withProject: string; type: string; estimatedValue: number }> {
const synergies = [];
for (const other of allProjects) {
if (other.id === project.id) continue;
// Same industry synergy
if (other.industry === project.industry) {
synergies.push({
withProject: other.id,
type: 'Shared industry knowledge',
estimatedValue: project.projection.calculations.total_investment * 0.05
});
}
// Overlapping use cases
const commonCategories = project.useCases
.map((uc: UseCase) => uc.category)
.filter((cat: string) =>
other.useCases.some((ouc: UseCase) => ouc.category === cat)
);
if (commonCategories.length > 0) {
synergies.push({
withProject: other.id,
type: `Shared capabilities: ${commonCategories.join(', ')}`,
estimatedValue: project.projection.calculations.total_investment * 0.1
});
}
}
return synergies;
}
/**
* Rank projects by multiple criteria
*/
private rankProjects(
projectPredictions: Array<any>
): Map<string, any> {
const rankings = new Map();
// Sort by different criteria
const byROI = [...projectPredictions].sort((a, b) =>
b.predictions.predictedActualROI - a.predictions.predictedActualROI
);
const byRisk = [...projectPredictions].sort((a, b) =>
a.predictions.riskScore - b.predictions.riskScore
);
const bySpeed = [...projectPredictions].sort((a, b) =>
(a.features.paybackPeriodMonths + a.predictions.predictedDelayMonths) -
(b.features.paybackPeriodMonths + b.predictions.predictedDelayMonths)
);
// Calculate overall ranking (weighted)
const weights = { roi: 0.4, risk: 0.3, speed: 0.3 };
for (let i = 0; i < projectPredictions.length; i++) {
const pp = projectPredictions[i];
const roiRank = byROI.findIndex(p => p.project.id === pp.project.id) + 1;
const riskRank = byRisk.findIndex(p => p.project.id === pp.project.id) + 1;
const speedRank = bySpeed.findIndex(p => p.project.id === pp.project.id) + 1;
const overallScore =
roiRank * weights.roi +
riskRank * weights.risk +
speedRank * weights.speed;
rankings.set(pp.project.id, {
overall: 0, // Will be set after sorting
byROI: roiRank,
byRisk: riskRank,
bySpeed: speedRank,
score: overallScore
});
}
// Set overall rankings
const sortedByOverall = Array.from(rankings.entries())
.sort((a, b) => a[1].score - b[1].score);
sortedByOverall.forEach((entry, index) => {
entry[1].overall = index + 1;
delete entry[1].score;
});
return rankings;
}
/**
* Generate recommendation based on predictions and rankings
*/
private generateRecommendation(
predictions: MLPrediction,
ranking: any
): 'strongly_recommend' | 'recommend' | 'consider' | 'reconsider' | 'not_recommended' {
if (predictions.successProbability > 0.8 && ranking.overall === 1) {
return 'strongly_recommend';
} else if (predictions.successProbability > 0.7 && ranking.overall <= 2) {
return 'recommend';
} else if (predictions.successProbability > 0.5 && predictions.riskScore < 6) {
return 'consider';
} else if (predictions.successProbability > 0.3) {
return 'reconsider';
} else {
return 'not_recommended';
}
}
/**
* Generate human-readable insights
*/
private generateInsights(projectPrediction: any): string[] {
const insights = [];
const { predictions, features } = projectPrediction;
// Success probability insight
if (predictions.successProbability > 0.8) {
insights.push('High success probability indicates strong project viability');
} else if (predictions.successProbability < 0.5) {
insights.push('Success probability below 50% suggests significant risks');
}
// ROI adjustment insight
const roiDiff = Math.abs(predictions.predictedActualROI - features.expectedROI);
if (roiDiff > features.expectedROI * 0.2) {
insights.push(
`ML model predicts ${predictions.predictedActualROI > features.expectedROI ? 'higher' : 'lower'} ` +
`actual ROI than projected (${roiDiff.toFixed(1)}% difference)`
);
}
// Delay insight
if (predictions.predictedDelayMonths > 3) {
insights.push(
`Expect approximately ${predictions.predictedDelayMonths} months delay based on complexity`
);
}
// Synergy insights
if (predictions.synergies && predictions.synergies.length > 0) {
const totalSynergyValue = predictions.synergies
.reduce((sum: number, s: any) => sum + s.estimatedValue, 0);
insights.push(
`Potential synergies with other projects could add $${totalSynergyValue.toLocaleString()} in value`
);
}
// Risk insights
if (predictions.keyRiskFactors.some((r: any) => r.impact === 'high')) {
insights.push('High-impact risks require immediate mitigation planning');
}
return insights;
}
/**
* Initialize Random Forest (simplified implementation)
*/
private initializeForests() {
// In production, load pre-trained models
// For now, create trees with random weights
for (let i = 0; i < this.treeCount; i++) {
this.forests.push(new DecisionTree(this.maxDepth));
}
}
// Statistical utilities
private mean(values: number[]): number {
return values.reduce((a, b) => a + b, 0) / values.length;
}
private standardDeviation(values: number[]): number {
const avg = this.mean(values);
const squaredDiffs = values.map(v => Math.pow(v - avg, 2));
return Math.sqrt(this.mean(squaredDiffs));
}
// Historical data (in production, query from database)
private getHistoricalSuccessRate(industry: string, complexity: number): number {
const baseRates: Record<string, number> = {
'technology': 0.75,
'financial_services': 0.70,
'healthcare': 0.65,
'retail': 0.72,
'manufacturing': 0.68,
'education': 0.73,
'government': 0.60,
'other': 0.70
};
const base = baseRates[industry] || 0.70;
// Adjust for complexity
return base * (1 - complexity / 20);
}
private getIndustryAverageROI(industry: string): number {
const averages: Record<string, number> = {
'technology': 35,
'financial_services': 25,
'healthcare': 20,
'retail': 30,
'manufacturing': 28,
'education': 18,
'government': 15,
'other': 25
};
return averages[industry] || 25;
}
}
/**
* Simplified Decision Tree for Random Forest
*/
class DecisionTree {
private weights: Map<string, number> = new Map();
constructor(private maxDepth: number) {
// Initialize with random weights (in production, use trained weights)
const features = [
'totalInvestment', 'expectedROI', 'paybackPeriodMonths',
'useCaseCount', 'implementationComplexity', 'technicalRisk'
];
features.forEach(feature => {
this.weights.set(feature, Math.random() * 2 - 1);
});
}
predict(features: ProjectFeatures): { success: number } {
// Simplified linear model (in production, use proper tree structure)
let score = 0.5; // Base probability
score += (features.expectedROI / 100) * (this.weights.get('expectedROI') || 0);
score -= (features.paybackPeriodMonths / 60) * (this.weights.get('paybackPeriodMonths') || 0);
score -= features.implementationComplexity / 10 * (this.weights.get('implementationComplexity') || 0);
score -= features.technicalRisk * (this.weights.get('technicalRisk') || 0);
score += Math.log10(features.totalInvestment) / 10 * (this.weights.get('totalInvestment') || 0);
// Sigmoid to bound between 0 and 1
return {
success: 1 / (1 + Math.exp(-score))
};
}
}