/**
* Enrich Context Tool
*
* Enriches codebase analysis with knowledge graph data from YAGO and Schema.org.
* Provides semantic annotations, entity linking, and structured metadata.
*/
import { getYAGOResolver, YAGOEntity } from '../knowledge-graph/yago-resolver.js';
import { getSchemaMapper } from '../knowledge-graph/schema-mapper.js';
import { getDatabase } from '../knowledge-graph/database.js';
import { EntityType } from '../knowledge-graph/schema.js';
export interface EnrichContextArgs {
path: string;
analysis_result?: any;
enrichment_level?: 'minimal' | 'standard' | 'comprehensive';
include_yago?: boolean;
include_schema?: boolean;
max_entities?: number;
}
export interface EnrichedContext {
path: string;
schema_annotation?: any;
yago_entities?: {
[key: string]: YAGOEntity[];
};
json_ld?: any;
confidence_stats?: {
total_entities: number;
auto_linked: number;
needs_review: number;
avg_confidence: number;
};
enrichment_metadata?: {
level: string;
timestamp: string;
cache_hits: number;
yago_queries: number;
};
}
/**
* Enrich codebase context with knowledge graph data
*/
export async function enrichContext(args: EnrichContextArgs): Promise<EnrichedContext> {
const {
path,
analysis_result,
enrichment_level = 'standard',
include_yago = true,
include_schema = true,
max_entities = 10,
} = args;
const result: EnrichedContext = {
path,
enrichment_metadata: {
level: enrichment_level,
timestamp: new Date().toISOString(),
cache_hits: 0,
yago_queries: 0,
},
};
// If no analysis provided, we can only do minimal enrichment
if (!analysis_result) {
return {
...result,
yago_entities: {},
schema_annotation: {},
json_ld: {},
};
}
const db = await getDatabase();
const yagoResolver = getYAGOResolver();
const schemaMapper = getSchemaMapper();
// Extract entities from analysis
const entities = extractEntitiesFromAnalysis(analysis_result, enrichment_level);
// YAGO enrichment
if (include_yago) {
const yagoEntities: { [key: string]: YAGOEntity[] } = {};
let cacheHits = 0;
let yagoQueries = 0;
for (const [name, type] of entities.slice(0, max_entities)) {
try {
// Check if already cached
const entity = db.findEntityByName(name);
if (entity?.id) {
const mapping = db.findYAGOMappingByEntityId(entity.id);
if (mapping) {
cacheHits++;
} else {
yagoQueries++;
}
} else {
yagoQueries++;
}
// Resolve entity (uses cache automatically)
const resolved = await yagoResolver.resolveEntity(name, 3);
if (resolved.length > 0) {
yagoEntities[name] = resolved;
// Store in database if not exists
if (!entity) {
const entityId = await db.insertEntity({
name,
type: type as EntityType,
source_file: path,
metadata: { detected_from: 'analysis' },
});
// Store high-confidence mapping
if (resolved[0].confidence >= 0.9) {
await db.upsertYAGOMapping({
entity_id: entityId,
yago_uri: resolved[0].uri,
yago_type: resolved[0].type,
confidence: resolved[0].confidence,
facts: { facts: resolved[0].facts },
});
}
}
}
} catch (error) {
console.error(`Failed to resolve YAGO entity for ${name}:`, error);
}
}
result.yago_entities = yagoEntities;
result.enrichment_metadata!.cache_hits = cacheHits;
result.enrichment_metadata!.yago_queries = yagoQueries;
// Calculate confidence stats
const allEntities = Object.values(yagoEntities).flat();
if (allEntities.length > 0) {
const autoLinked = allEntities.filter((e) => e.confidence >= 0.9).length;
const avgConfidence =
allEntities.reduce((sum, e) => sum + e.confidence, 0) / allEntities.length;
result.confidence_stats = {
total_entities: allEntities.length,
auto_linked: autoLinked,
needs_review: allEntities.length - autoLinked,
avg_confidence: Math.round(avgConfidence * 100) / 100,
};
}
}
// Schema.org enrichment
if (include_schema) {
const schemaType = schemaMapper.detectCodebaseType(analysis_result);
const properties = schemaMapper.extractProperties(analysis_result);
result.schema_annotation = {
'@type': schemaType,
...properties,
};
// Generate JSON-LD
result.json_ld = schemaMapper.generateJSONLD({
entity_id: 0, // Temporary, will be assigned when stored
schema_type: schemaType,
properties,
context_url: 'https://schema.org',
});
}
return result;
}
/**
* Extract entities from codebase analysis based on enrichment level
*/
function extractEntitiesFromAnalysis(
analysis: any,
level: 'minimal' | 'standard' | 'comprehensive'
): Array<[string, string]> {
const entities: Array<[string, string]> = [];
// Extract frameworks (always included)
if (analysis.frameworks && Array.isArray(analysis.frameworks)) {
for (const framework of analysis.frameworks) {
entities.push([framework, EntityType.FRAMEWORK]);
}
}
// Extract languages (always included)
if (analysis.languages && typeof analysis.languages === 'object') {
for (const lang of Object.keys(analysis.languages)) {
entities.push([lang, EntityType.LANGUAGE]);
}
}
// Standard level: add dependencies
if (level !== 'minimal') {
if (analysis.package?.dependencies) {
for (const dep of Object.keys(analysis.package.dependencies).slice(0, 10)) {
entities.push([dep, EntityType.LIBRARY]);
}
}
}
// Comprehensive level: add dev dependencies and tools
if (level === 'comprehensive') {
if (analysis.package?.devDependencies) {
for (const dep of Object.keys(analysis.package.devDependencies).slice(0, 10)) {
entities.push([dep, EntityType.LIBRARY]);
}
}
// Add build tools
if (analysis.build_tools && Array.isArray(analysis.build_tools)) {
for (const tool of analysis.build_tools) {
entities.push([tool, EntityType.TOOL]);
}
}
// Add detected patterns
if (analysis.patterns && Array.isArray(analysis.patterns)) {
for (const pattern of analysis.patterns) {
entities.push([pattern, EntityType.PATTERN]);
}
}
}
return entities;
}
/**
* Format enriched context for display
*/
export function formatEnrichedContext(context: EnrichedContext): string {
const lines: string[] = [];
lines.push(`# Enriched Context: ${context.path}`);
lines.push('');
// Schema.org annotation
if (context.schema_annotation) {
lines.push('## Schema.org Annotation');
lines.push('```json');
lines.push(JSON.stringify(context.schema_annotation, null, 2));
lines.push('```');
lines.push('');
}
// YAGO entities
if (context.yago_entities && Object.keys(context.yago_entities).length > 0) {
lines.push('## YAGO Knowledge Graph Entities');
lines.push('');
for (const [name, entities] of Object.entries(context.yago_entities)) {
lines.push(`### ${name}`);
for (const entity of entities) {
lines.push(`- **${entity.label}** (confidence: ${entity.confidence.toFixed(2)})`);
lines.push(` - Type: ${entity.type}`);
lines.push(` - URI: ${entity.uri}`);
if (entity.description) {
lines.push(` - Description: ${entity.description}`);
}
if (entity.facts.length > 0) {
lines.push(` - Facts: ${entity.facts.length} relationships`);
}
}
lines.push('');
}
}
// Confidence stats
if (context.confidence_stats) {
lines.push('## Confidence Statistics');
lines.push(`- Total entities: ${context.confidence_stats.total_entities}`);
lines.push(`- Auto-linked (≥0.9): ${context.confidence_stats.auto_linked}`);
lines.push(`- Needs review (<0.9): ${context.confidence_stats.needs_review}`);
lines.push(`- Average confidence: ${context.confidence_stats.avg_confidence}`);
lines.push('');
}
// Enrichment metadata
if (context.enrichment_metadata) {
lines.push('## Enrichment Metadata');
lines.push(`- Level: ${context.enrichment_metadata.level}`);
lines.push(`- Timestamp: ${context.enrichment_metadata.timestamp}`);
lines.push(`- Cache hits: ${context.enrichment_metadata.cache_hits}`);
lines.push(`- YAGO queries: ${context.enrichment_metadata.yago_queries}`);
lines.push('');
}
// JSON-LD
if (context.json_ld) {
lines.push('## JSON-LD Structured Data');
lines.push('```json');
lines.push(JSON.stringify(context.json_ld, null, 2));
lines.push('```');
}
return lines.join('\n');
}