Skip to main content
Glama

Open Search MCP

by flyanima
MIT License
2
  • Apple
  • Linux
intelligent-ranker.ts12 kB
/** * Intelligent Search Result Ranking System * 智能搜索结果排序系统 * * 实现基于多维度评分的智能排序算法: * - 相关性评分 (Relevance Scoring) * - 权威性评分 (Authority Scoring) * - 时效性评分 (Freshness Scoring) * - 质量评分 (Quality Scoring) * - 多样性优化 (Diversity Optimization) */ import { SearchResult, EnhancedSearchResult } from '../types'; import { Logger } from '../utils/logger.js'; export interface RankingConfig { // 权重配置 weights: { relevance: number; // 相关性权重 (0-1) authority: number; // 权威性权重 (0-1) freshness: number; // 时效性权重 (0-1) quality: number; // 质量权重 (0-1) diversity: number; // 多样性权重 (0-1) }; // 排序配置 maxResults: number; // 最大结果数 diversityThreshold: number; // 多样性阈值 qualityThreshold: number; // 质量阈值 // 特殊处理 boostAcademic: boolean; // 提升学术来源 boostRecent: boolean; // 提升最新内容 penalizeDuplicates: boolean; // 惩罚重复内容 } export interface RankingMetrics { relevanceScore: number; authorityScore: number; freshnessScore: number; qualityScore: number; diversityScore: number; finalScore: number; rankingFactors: string[]; } export class IntelligentRanker { private logger: Logger; private config: RankingConfig; // 权威域名列表 private authorityDomains = new Set([ 'arxiv.org', 'pubmed.ncbi.nlm.nih.gov', 'scholar.google.com', 'ieee.org', 'acm.org', 'nature.com', 'science.org', 'github.com', 'stackoverflow.com', 'wikipedia.org', 'reuters.com', 'bbc.com', 'techcrunch.com', 'wired.com' ]); // 学术域名列表 private academicDomains = new Set([ 'arxiv.org', 'pubmed.ncbi.nlm.nih.gov', 'scholar.google.com', 'ieee.org', 'acm.org', 'nature.com', 'science.org', 'springer.com', 'elsevier.com', 'wiley.com' ]); constructor(config?: Partial<RankingConfig>) { this.logger = new Logger('IntelligentRanker'); this.config = { weights: { relevance: 0.35, authority: 0.25, freshness: 0.15, quality: 0.15, diversity: 0.10 }, maxResults: 50, diversityThreshold: 0.7, qualityThreshold: 0.6, boostAcademic: true, boostRecent: true, penalizeDuplicates: true, ...config }; } /** * 智能排序搜索结果 */ async rankResults( results: SearchResult[], query: string, context?: { topic?: string; domain?: string; timeRange?: string } ): Promise<EnhancedSearchResult[]> { this.logger.info(`Starting intelligent ranking for ${results.length} results`); // 1. 预处理和增强结果 const enhancedResults = await this.enhanceResults(results, query, context); // 2. 计算多维度评分 const scoredResults = await this.calculateScores(enhancedResults, query, context); // 3. 应用排序算法 const rankedResults = this.applyRanking(scoredResults); // 4. 多样性优化 const diversifiedResults = this.optimizeDiversity(rankedResults); // 5. 最终筛选和限制 const finalResults = this.finalizeResults(diversifiedResults); this.logger.info(`Ranking completed: ${finalResults.length} results returned`); return finalResults; } /** * 增强搜索结果 */ private async enhanceResults( results: SearchResult[], query: string, context?: any ): Promise<EnhancedSearchResult[]> { return results.map(result => ({ ...result, relevanceScore: 0, qualityScore: 0, summary: result.content ? result.content.substring(0, 200) + '...' : result.title, keyPoints: this.extractKeyPoints(result.content || ''), extractedEntities: this.extractEntities(result.content || '', query), credibilityScore: 0, wordCount: result.content ? result.content.split(' ').length : 0, readingTime: result.content ? Math.ceil(result.content.split(' ').length / 200) : 1 })); } /** * 计算多维度评分 */ private async calculateScores( results: EnhancedSearchResult[], query: string, context?: any ): Promise<(EnhancedSearchResult & { metrics: RankingMetrics })[]> { const queryTerms = this.tokenizeQuery(query); return results.map(result => { const metrics: RankingMetrics = { relevanceScore: this.calculateRelevanceScore(result, queryTerms), authorityScore: this.calculateAuthorityScore(result), freshnessScore: this.calculateFreshnessScore(result), qualityScore: this.calculateQualityScore(result), diversityScore: 0, // 将在多样性优化中计算 finalScore: 0, rankingFactors: [] }; // 计算最终评分 metrics.finalScore = this.calculateFinalScore(metrics); // 记录排序因子 metrics.rankingFactors = this.identifyRankingFactors(metrics, result); return { ...result, relevanceScore: metrics.relevanceScore, qualityScore: metrics.qualityScore, credibilityScore: metrics.authorityScore, metrics }; }); } /** * 计算相关性评分 */ private calculateRelevanceScore(result: EnhancedSearchResult, queryTerms: string[]): number { let score = 0; const content = (result.title + ' ' + result.content).toLowerCase(); // TF-IDF 简化版本 for (const term of queryTerms) { const termFreq = (content.match(new RegExp(term, 'g')) || []).length; const titleBoost = result.title.toLowerCase().includes(term) ? 2 : 1; score += (termFreq * titleBoost) / queryTerms.length; } // 标题匹配加权 const titleMatches = queryTerms.filter(term => result.title.toLowerCase().includes(term) ).length; score += (titleMatches / queryTerms.length) * 0.5; return Math.min(score / 10, 1); // 归一化到 0-1 } /** * 计算权威性评分 */ private calculateAuthorityScore(result: EnhancedSearchResult): number { let score = 0.5; // 基础分 try { const domain = new URL(result.url).hostname.toLowerCase(); // 权威域名加分 if (this.authorityDomains.has(domain)) { score += 0.3; } // 学术域名额外加分 if (this.academicDomains.has(domain) && this.config.boostAcademic) { score += 0.2; } // HTTPS 加分 if (result.url.startsWith('https://')) { score += 0.1; } } catch (error) { // URL 解析失败,使用默认分数 } return Math.min(score, 1); } /** * 计算时效性评分 */ private calculateFreshnessScore(result: EnhancedSearchResult): number { if (!result.lastUpdated) return 0.5; const now = new Date(); const resultDate = new Date(result.lastUpdated); const daysDiff = (now.getTime() - resultDate.getTime()) / (1000 * 60 * 60 * 24); // 时效性衰减函数 let score = Math.exp(-daysDiff / 365); // 一年后衰减到 1/e // 最近内容加权 if (this.config.boostRecent && daysDiff < 30) { score += 0.2; } return Math.min(score, 1); } /** * 计算质量评分 */ private calculateQualityScore(result: EnhancedSearchResult): number { let score = 0.5; // 基础分 // 内容长度评分 const contentLength = result.content ? result.content.length : 0; if (contentLength > 500) score += 0.2; if (contentLength > 1000) score += 0.1; // 标题质量评分 const titleLength = result.title.length; if (titleLength > 10 && titleLength < 100) score += 0.1; // 结构化内容加分 if (result.content && (result.content.includes('\n') || result.content.includes('•'))) { score += 0.1; } return Math.min(score, 1); } /** * 计算最终评分 */ private calculateFinalScore(metrics: RankingMetrics): number { const { weights } = this.config; return ( metrics.relevanceScore * weights.relevance + metrics.authorityScore * weights.authority + metrics.freshnessScore * weights.freshness + metrics.qualityScore * weights.quality ); } /** * 应用排序算法 */ private applyRanking(results: (EnhancedSearchResult & { metrics: RankingMetrics })[]): (EnhancedSearchResult & { metrics: RankingMetrics })[] { return results.sort((a, b) => b.metrics.finalScore - a.metrics.finalScore); } /** * 多样性优化 */ private optimizeDiversity(results: (EnhancedSearchResult & { metrics: RankingMetrics })[]): (EnhancedSearchResult & { metrics: RankingMetrics })[] { const diversified: (EnhancedSearchResult & { metrics: RankingMetrics })[] = []; const seenDomains = new Set<string>(); const seenContent = new Set<string>(); for (const result of results) { try { const domain = new URL(result.url).hostname; const contentHash = this.hashContent(result.content || ''); // 检查域名多样性 const domainCount = Array.from(seenDomains).filter(d => d === domain).length; if (domainCount >= 3) continue; // 同一域名最多3个结果 // 检查内容重复 if (this.config.penalizeDuplicates && seenContent.has(contentHash)) { continue; } seenDomains.add(domain); seenContent.add(contentHash); diversified.push(result); } catch (error) { // URL 解析失败,仍然包含结果 diversified.push(result); } } return diversified; } /** * 最终结果处理 */ private finalizeResults(results: (EnhancedSearchResult & { metrics: RankingMetrics })[]): EnhancedSearchResult[] { return results .filter(result => result.metrics.finalScore >= this.config.qualityThreshold) .slice(0, this.config.maxResults) .map(({ metrics, ...result }) => result); } // 辅助方法 private tokenizeQuery(query: string): string[] { return query.toLowerCase() .split(/\s+/) .filter(term => term.length > 2); } private extractKeyPoints(content: string): string[] { // 简单的关键点提取 return content.split(/[.!?]/) .filter(sentence => sentence.trim().length > 20) .slice(0, 3) .map(sentence => sentence.trim()); } private extractEntities(content: string, query: string): string[] { // 简单的实体提取 const entities = new Set<string>(); const words = content.split(/\s+/); // 提取大写开头的词组(可能是实体) for (let i = 0; i < words.length; i++) { const word = words[i]; if (/^[A-Z][a-z]+/.test(word)) { entities.add(word); } } return Array.from(entities).slice(0, 10); } private identifyRankingFactors(metrics: RankingMetrics, result: EnhancedSearchResult): string[] { const factors: string[] = []; if (metrics.relevanceScore > 0.8) factors.push('High Relevance'); if (metrics.authorityScore > 0.8) factors.push('Authoritative Source'); if (metrics.freshnessScore > 0.8) factors.push('Recent Content'); if (metrics.qualityScore > 0.8) factors.push('High Quality'); try { const domain = new URL(result.url).hostname; if (this.academicDomains.has(domain)) factors.push('Academic Source'); if (this.authorityDomains.has(domain)) factors.push('Trusted Domain'); } catch (error) { // URL 解析失败 } return factors; } private hashContent(content: string): string { // 简单的内容哈希 let hash = 0; for (let i = 0; i < content.length; i++) { const char = content.charCodeAt(i); hash = ((hash << 5) - hash) + char; hash = hash & hash; // 转换为32位整数 } return hash.toString(); } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/flyanima/open-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server