Elasticsearch Knowledge Graph for MCP

by j3k0
  • legacy
import { Entity, Relation, KnowledgeGraph } from './types.js'; /** * Represents a search item for an entity with its searchable text */ export interface EntitySearchItem { entity: Entity; searchText: string; } /** * Represents an entity with its search score */ export interface ScoredEntity { entity: Entity; score: number; } /** * Represents a parsed query with its components */ export interface ParsedQuery { freeText: string; type: string | null; name: string | null; include: string[]; exclude: string[]; or: string[][]; } /** * Represents a knowledge graph with scored entities */ export interface ScoredKnowledgeGraph { // entities: Entity[]; scoredEntities: ScoredEntity[]; } /** * Parses a search query string into structured components for advanced searching. * * @param query The raw query string to parse * @returns An object containing the parsed query components: * - freeText: Any text not matched by special operators, used for fuzzy matching * - type: Entity type filter (from type:value) * - name: Entity name filter (from name:value) * - include: Terms that must be included (from +term) * - exclude: Terms that must not be included (from -term) * - or: Groups of alternative terms (from term1|term2|term3) */ export function parseQuery(query: string): ParsedQuery { const result: ParsedQuery = { freeText: '', type: null, name: null, include: [], exclude: [], or: [] }; // Early return for empty query if (!query || query.trim() === '') { return result; } // Regular expression to match special query operators const typeRegex = /type:([^\s]+)/gi; const nameRegex = /name:([^\s]+)/gi; const includeRegex = /\+([^\s]+)/g; const excludeRegex = /-([^\s]+)/g; const orRegex = /(\w+(?:\|\w+)+)/g; // Matches words separated by pipe symbols // Extract type filter const typeMatch = typeRegex.exec(query); if (typeMatch) { result.type = typeMatch[1]; query = query.replace(typeMatch[0], ''); } // Extract name filter const nameMatch = nameRegex.exec(query); if (nameMatch) { result.name = nameMatch[1]; query = query.replace(nameMatch[0], ''); } // Extract include terms - collect all matches first let includeMatches = []; let includeMatch; while ((includeMatch = includeRegex.exec(query)) !== null) { includeMatches.push(includeMatch); } // Process matches in reverse order to avoid index issues when replacing for (let i = includeMatches.length - 1; i >= 0; i--) { const match = includeMatches[i]; result.include.push(match[1]); query = query.slice(0, match.index) + query.slice(match.index + match[0].length); } // Extract exclude terms - collect all matches first let excludeMatches = []; let excludeMatch; while ((excludeMatch = excludeRegex.exec(query)) !== null) { excludeMatches.push(excludeMatch); } // Process matches in reverse order for (let i = excludeMatches.length - 1; i >= 0; i--) { const match = excludeMatches[i]; result.exclude.push(match[1]); query = query.slice(0, match.index) + query.slice(match.index + match[0].length); } // Extract OR groups - collect all matches first let orMatches = []; let orMatch; while ((orMatch = orRegex.exec(query)) !== null) { orMatches.push(orMatch); } // Process matches in reverse order for (let i = orMatches.length - 1; i >= 0; i--) { const match = orMatches[i]; const orTerms = match[0].split('|'); result.or.push(orTerms); query = query.slice(0, match.index) + query.slice(match.index + match[0].length); } // Remaining text is the free text search result.freeText = query.trim(); return result; } /** * Creates entity search items ready for filtering * * @param entities The list of entities to prepare for search * @returns An array of entity search items with searchable text */ export function createEntitySearchItems(entities: Entity[]): EntitySearchItem[] { return entities.map(entity => ({ entity, // Combine entity name, type, and observations for search searchText: [ entity.name, entity.entityType, ...entity.observations ].join(' ').toLowerCase() })); } /** * Filters entities based on a parsed query * * @param entitySearchItems The entity search items to filter * @param parsedQuery The parsed query to apply * @returns Filtered entity search items that match the query */ export function filterEntitiesByQuery( entitySearchItems: EntitySearchItem[], parsedQuery: ParsedQuery ): EntitySearchItem[] { return entitySearchItems.filter(item => { const entity = item.entity; const searchText = item.searchText; // Apply special filters first if (parsedQuery.type && !entity.entityType.toLowerCase().includes(parsedQuery.type.toLowerCase())) { return false; } if (parsedQuery.name && !entity.name.toLowerCase().includes(parsedQuery.name.toLowerCase())) { return false; } // Check for positive includes (AND logic) for (const term of parsedQuery.include) { if (!searchText.includes(term.toLowerCase())) { return false; } } // Check for excluded terms (NOT logic) for (const term of parsedQuery.exclude) { if (searchText.includes(term.toLowerCase())) { return false; } } // Check for OR term groups (any term in the group must match) for (const orGroup of parsedQuery.or) { let orMatched = false; for (const term of orGroup) { if (searchText.includes(term.toLowerCase())) { orMatched = true; break; } } // If none of the terms in the OR group matched, filter out this entity if (!orMatched) { return false; } } // If there's a free text search, apply fuzzy search if (parsedQuery.freeText) { // Basic fuzzy match using character sequence matching let lastIndex = -1; const queryLower = parsedQuery.freeText.toLowerCase(); for (const char of queryLower) { const index = searchText.indexOf(char, lastIndex + 1); if (index === -1) { return false; } lastIndex = index; } } return true; }); } /** * Scores and sorts entities by relevance to the query * * @param entitySearchItems The filtered entity search items to score * @param parsedQuery The parsed query used for scoring * @returns Object containing sorted entities and their scores */ export function scoreAndSortEntities( entitySearchItems: EntitySearchItem[], parsedQuery: ParsedQuery ): ScoredEntity[] { // Score entities based on relevance const scoredEntities = entitySearchItems.map(item => { let score = 1.0; // Exact match on name gives highest score if (parsedQuery.freeText && item.entity.name === parsedQuery.freeText) { score = 3.0; } else if (parsedQuery.freeText && item.entity.name.toLowerCase() === parsedQuery.freeText.toLowerCase()) { score = 2.0; } // Partial match on name gives medium score else if (parsedQuery.freeText && item.entity.name.toLowerCase().includes(parsedQuery.freeText.toLowerCase())) { score = 1.5; } // Add scores for matching include terms parsedQuery.include.forEach(term => { if (item.searchText.includes(term.toLowerCase())) { score += 0.5; } }); // Add scores for matching exact terms parsedQuery.include.forEach(term => { // regex match with separators on both sides if (item.searchText.match(new RegExp(`\\b${term}\\b`))) { score += 1.0; } }); // Add scores for matching OR terms parsedQuery.or.forEach(orGroup => { for (const term of orGroup) { if (item.searchText.includes(term.toLowerCase())) { score += 0.3; break; // Only score once per OR group } } }); // Add scores for type or name matches if specified if (parsedQuery.type && item.entity.entityType.toLowerCase().includes(parsedQuery.type.toLowerCase())) { score += 0.5; } if (parsedQuery.name && item.entity.name.toLowerCase().includes(parsedQuery.name.toLowerCase())) { score += 0.7; } // Calculate fuzzy match score if there's free text if (parsedQuery.freeText) { const queryLower = parsedQuery.freeText.toLowerCase(); // Calculate similarity ratio (simple implementation) let matchingChars = 0; let lastIndex = -1; for (const char of queryLower) { const index = item.searchText.indexOf(char, lastIndex + 1); if (index !== -1) { matchingChars++; lastIndex = index; } } // Add fuzzy score (0-1 range based on match quality) const fuzzyScore = queryLower.length > 0 ? matchingChars / queryLower.length : 0; score += fuzzyScore; } return { entity: item.entity, score }; }); // Sort by score in descending order return scoredEntities.sort((a, b) => b.score - a.score); } /** * Creates a filtered knowledge graph from a list of scored entities * * @param scoredEntities The scored entities to include in the graph * @returns A knowledge graph with only relevant entities and relations, plus scores */ export function createFilteredGraph( scoredEntities: ScoredEntity[], ): ScoredKnowledgeGraph { // Create a Set of filtered entity names for quick lookup // const filteredEntityNames = new Set(scoredEntities.map(se => se.entity.name)); // // Filter relations to include those where either from or to entity is in the filtered set // const filteredRelations = allRelations.filter(r => // filteredEntityNames.has(r.from) || filteredEntityNames.has(r.to) // ); return { // entities: scoredEntities.map(se => se.entity), // relations: filteredRelations, scoredEntities: scoredEntities }; } /** * Executes a search query on a knowledge graph * * @param query The raw query string * @param graph The knowledge graph to search * @returns A filtered knowledge graph containing only matching entities and their relations, with scores */ export function searchGraph(query: string, graph: KnowledgeGraph): ScoredKnowledgeGraph { // Early return for empty query if (!query || query.trim() === '') { // Return all entities with a default score of 1.0 const scoredEntities = graph.entities.map(entity => ({ entity, score: 1.0 })); return { // entities: graph.entities, // relations: graph.relations, scoredEntities }; } query = query.replace(/ OR /g, '|'); // Parse the query const parsedQuery = parseQuery(query); // Create entity search items const entitySearchItems = createEntitySearchItems(graph.entities); // Filter entities based on parsed query const matchingEntities = filterEntitiesByQuery(entitySearchItems, parsedQuery); // Score and sort by relevance const scoredEntities = scoreAndSortEntities(matchingEntities, parsedQuery); // Create and return the filtered graph return createFilteredGraph(scoredEntities); }