/**
* YAGO 4.5 Resolver Service
*
* Resolves entities to YAGO knowledge graph URIs and retrieves facts.
* Based on Ludwig neurosymbolic system patterns with confidence scoring.
*/
import SparqlClient from 'sparql-http-client';
import { createHash } from 'crypto';
import { getDatabase } from './database.js';
import type { YAGOMapping } from './schema.js';
const YAGO_SPARQL_ENDPOINT = 'https://yago-knowledge.org/sparql/query';
const CACHE_TTL_MS = 30 * 24 * 60 * 60 * 1000; // 30 days (Ludwig pattern)
const CONFIDENCE_THRESHOLD = 0.9; // Auto-link threshold (Ludwig pattern)
/**
* YAGO entity with facts and relationships
*/
export interface YAGOEntity {
uri: string;
label: string;
type: string;
description?: string;
facts: YAGOFact[];
confidence: number;
}
/**
* YAGO fact (triple)
*/
export interface YAGOFact {
predicate: string;
object: string;
objectLabel?: string;
}
/**
* YAGO Resolver for entity linking and fact retrieval
*/
export class YAGOResolver {
private sparqlClient: SparqlClient;
private inMemoryCache: Map<string, { result: YAGOEntity[]; timestamp: number }>;
constructor(endpointUrl: string = YAGO_SPARQL_ENDPOINT) {
this.sparqlClient = new SparqlClient({ endpointUrl });
this.inMemoryCache = new Map();
}
/**
* Resolve entity name to YAGO entities
*/
async resolveEntity(name: string, maxResults: number = 5): Promise<YAGOEntity[]> {
// Check in-memory cache first
const cached = this.getMemoryCache(name);
if (cached) return cached;
// Check database cache
const db = await getDatabase();
const entity = db.findEntityByName(name);
if (entity?.id) {
const yagoMapping = db.findYAGOMappingByEntityId(entity.id);
if (yagoMapping && this.isCacheFresh(yagoMapping.cached_at)) {
const result = [this.mappingToEntity(yagoMapping)];
this.setMemoryCache(name, result);
return result;
}
}
// Query YAGO SPARQL endpoint
try {
const results = await this.queryYAGO(name, maxResults);
// Store in database and memory cache
if (results.length > 0 && entity?.id) {
const topResult = results[0];
if (topResult.confidence >= CONFIDENCE_THRESHOLD) {
await db.upsertYAGOMapping({
entity_id: entity.id,
yago_uri: topResult.uri,
yago_type: topResult.type,
confidence: topResult.confidence,
facts: { facts: topResult.facts },
});
}
}
this.setMemoryCache(name, results);
return results;
} catch (error) {
console.error('YAGO query failed:', error);
return [];
}
}
/**
* Query YAGO SPARQL endpoint
*/
private async queryYAGO(name: string, _maxResults: number): Promise<YAGOEntity[]> {
const query = `
PREFIX schema: <http://schema.org/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT DISTINCT ?entity ?label ?type ?description
WHERE {
?entity rdfs:label ?label .
?entity rdf:type ?type .
OPTIONAL { ?entity schema:description ?description }
FILTER (
CONTAINS(LCASE(STR(?label)), "\${name.toLowerCase()}") ||
CONTAINS(LCASE(STR(?description)), "\${name.toLowerCase()}")
)
FILTER (
STRSTARTS(STR(?type), "http://schema.org/")
)
FILTER (lang(?label) = 'en' || lang(?label) = '')
}
ORDER BY STRLEN(STR(?label))
LIMIT \${maxResults}
`;
const stream = await this.sparqlClient.query.select(query);
const bindings = await this.parseStream(stream);
// Convert bindings to entities with facts
const entities: YAGOEntity[] = [];
for (const binding of bindings) {
const entity: YAGOEntity = {
uri: binding.entity.value,
label: binding.label.value,
type: binding.type.value,
description: binding.description?.value,
facts: await this.getFacts(binding.entity.value),
confidence: this.calculateConfidence(name, binding.label.value),
};
entities.push(entity);
}
return entities;
}
/**
* Get facts for an entity
*/
private async getFacts(_entityUri: string): Promise<YAGOFact[]> {
const query = `
PREFIX schema: <http://schema.org/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?predicate ?object ?objectLabel
WHERE {
<\${entityUri}> ?predicate ?object .
OPTIONAL { ?object rdfs:label ?objectLabel }
FILTER (
STRSTARTS(STR(?predicate), "http://schema.org/") &&
!SAMTERM(?predicate, rdfs:label)
)
FILTER (lang(?objectLabel) = 'en' || lang(?objectLabel) = '' || !BOUND(?objectLabel))
}
LIMIT 20
`;
try {
const stream = await this.sparqlClient.query.select(query);
const bindings = await this.parseStream(stream);
return bindings.map((binding) => ({
predicate: binding.predicate.value,
object: binding.object.value,
objectLabel: binding.objectLabel?.value,
}));
} catch (error) {
console.error('Failed to get facts:', error);
return [];
}
}
/**
* Calculate confidence score for entity matching
* Based on string similarity (Ludwig pattern)
*/
private calculateConfidence(query: string, label: string): number {
const queryLower = query.toLowerCase().trim();
const labelLower = label.toLowerCase().trim();
// Exact match = 1.0
if (queryLower === labelLower) return 1.0;
// Starts with = 0.95
if (labelLower.startsWith(queryLower)) return 0.95;
// Contains = 0.85
if (labelLower.includes(queryLower)) return 0.85;
// Fuzzy similarity (Levenshtein-based)
const similarity = this.stringSimilarity(queryLower, labelLower);
return Math.max(0.5, similarity);
}
/**
* Simple string similarity (Dice coefficient)
*/
private stringSimilarity(str1: string, str2: string): number {
const bigrams1 = this.getBigrams(str1);
const bigrams2 = this.getBigrams(str2);
const intersection = bigrams1.filter((b) => bigrams2.includes(b));
return (2.0 * intersection.length) / (bigrams1.length + bigrams2.length);
}
/**
* Get bigrams from string
*/
private getBigrams(str: string): string[] {
const bigrams: string[] = [];
for (let i = 0; i < str.length - 1; i++) {
bigrams.push(str.substring(i, i + 2));
}
return bigrams;
}
/**
* Parse SPARQL result stream
*/
private async parseStream(stream: any): Promise<any[]> {
const bindings: any[] = [];
return new Promise((resolve, reject) => {
stream.on('data', (binding: any) => {
bindings.push(binding);
});
stream.on('end', () => {
resolve(bindings);
});
stream.on('error', (error: any) => {
reject(error);
});
});
}
/**
* Check if cache is fresh (within TTL)
*/
private isCacheFresh(cachedAt?: Date): boolean {
if (!cachedAt) return false;
const age = Date.now() - cachedAt.getTime();
return age < CACHE_TTL_MS;
}
/**
* Get from memory cache
*/
private getMemoryCache(key: string): YAGOEntity[] | null {
const cached = this.inMemoryCache.get(key);
if (!cached) return null;
const age = Date.now() - cached.timestamp;
if (age > CACHE_TTL_MS) {
this.inMemoryCache.delete(key);
return null;
}
return cached.result;
}
/**
* Set memory cache
*/
private setMemoryCache(key: string, result: YAGOEntity[]): void {
this.inMemoryCache.set(key, {
result,
timestamp: Date.now(),
});
}
/**
* Convert YAGO mapping to entity
*/
private mappingToEntity(mapping: YAGOMapping): YAGOEntity {
return {
uri: mapping.yago_uri,
label: '', // Would need to query for label
type: mapping.yago_type || '',
facts: (mapping.facts as any)?.facts || [],
confidence: mapping.confidence,
};
}
/**
* Hash string for cache key
*/
static hashQuery(query: string): string {
return createHash('sha256').update(query).digest('hex');
}
}
/**
* Singleton instance
*/
let resolverInstance: YAGOResolver | null = null;
/**
* Get or create YAGO resolver instance
*/
export function getYAGOResolver(): YAGOResolver {
if (!resolverInstance) {
resolverInstance = new YAGOResolver();
}
return resolverInstance;
}