Skip to main content
Glama

Open Search MCP

by flyanima
MIT License
2
  • Apple
  • Linux
arxiv-search-router.ts11.7 kB
/** * arXiv 搜索路由器 - 智能识别学术搜索意图并路由到合适的arXiv API端点 */ export interface ArxivSearchRoute { tool: 'arxiv_search' | 'arxiv_paper_details' | 'arxiv_author_search'; endpoint: 'search' | 'paper_details' | 'author_search' | 'category_search'; params: Record<string, any>; intent: string; } export class ArxivSearchRouter { // arXiv学科分类映射 private readonly CATEGORY_MAPPING: Record<string, string> = { // 计算机科学 'computer science': 'cs', 'cs': 'cs', 'artificial intelligence': 'cs.AI', 'machine learning': 'cs.LG', 'computer vision': 'cs.CV', 'natural language processing': 'cs.CL', 'robotics': 'cs.RO', 'cryptography': 'cs.CR', // 数学 'mathematics': 'math', 'math': 'math', 'statistics': 'stat', 'probability': 'math.PR', 'optimization': 'math.OC', // 物理 'physics': 'physics', 'quantum physics': 'quant-ph', 'quantum computing': 'quant-ph', 'astrophysics': 'astro-ph', 'condensed matter': 'cond-mat', // 生物和金融 'biology': 'q-bio', 'quantitative biology': 'q-bio', 'finance': 'q-fin', 'quantitative finance': 'q-fin' }; /** * 根据用户搜索内容智能路由 */ routeSearch(query: string): ArxivSearchRoute { const normalizedQuery = query.toLowerCase().trim(); // arXiv ID查询 if (this.isPaperIdQuery(normalizedQuery)) { return { tool: 'arxiv_paper_details', endpoint: 'paper_details', params: this.extractPaperIdParams(query), intent: 'paper_details' }; } // 作者查询 if (this.isAuthorQuery(normalizedQuery)) { return { tool: 'arxiv_author_search', endpoint: 'author_search', params: this.extractAuthorParams(query), intent: 'author_search' }; } // 分类查询 if (this.isCategoryQuery(normalizedQuery)) { return { tool: 'arxiv_search', endpoint: 'category_search', params: this.extractCategoryParams(query), intent: 'category_search' }; } // 默认:一般搜索 return { tool: 'arxiv_search', endpoint: 'search', params: this.extractSearchParams(query), intent: 'general_search' }; } /** * 判断是否为论文ID查询 */ private isPaperIdQuery(query: string): boolean { // arXiv ID格式: YYMM.NNNN[vN] 或 subject-class/YYMMnnn const arxivIdPattern = /(?:arxiv:)?(\d{4}\.\d{4,5}(?:v\d+)?|[a-z-]+\/\d{7})/i; return arxivIdPattern.test(query) || query.includes('paper details') || query.includes('abstract'); } /** * 判断是否为作者查询 */ private isAuthorQuery(query: string): boolean { const authorKeywords = [ 'papers by', 'author:', 'by ', 'researcher', 'author', 'publications by', 'works by', 'written by' ]; return authorKeywords.some(kw => query.includes(kw)); } /** * 判断是否为分类查询 */ private isCategoryQuery(query: string): boolean { // 显式分类查询 if (query.includes('cat:') || query.includes('category:')) { return true; } // 检查是否包含已知分类关键词 for (const [keyword, category] of Object.entries(this.CATEGORY_MAPPING)) { if (query.includes(keyword)) { return true; } } return false; } /** * 提取论文ID查询参数 */ private extractPaperIdParams(query: string): Record<string, any> { const params: Record<string, any> = {}; // 提取arXiv ID const arxivIdPattern = /(?:arxiv:)?(\d{4}\.\d{4,5}(?:v\d+)?|[a-z-]+\/\d{7})/i; const idMatch = query.match(arxivIdPattern); if (idMatch) { params.id = idMatch[1]; } else { // 如果没有找到ID,尝试从查询中提取 const cleanQuery = query.replace(/paper details|abstract|details/gi, '').trim(); params.id = cleanQuery; } return params; } /** * 提取作者查询参数 */ private extractAuthorParams(query: string): Record<string, any> { const params: Record<string, any> = {}; // 提取作者名 const author = this.extractAuthor(query); if (author) params.author = author; // 提取其他参数 const limit = this.extractLimit(query); if (limit) params.limit = limit; const year = this.extractYear(query); if (year) params.year = year; // 检查是否需要精确匹配 if (query.includes('"') || query.includes('exact')) { params.exactMatch = true; } return params; } /** * 提取分类查询参数 */ private extractCategoryParams(query: string): Record<string, any> { const params: Record<string, any> = {}; // 提取分类 const category = this.extractCategory(query); if (category) params.category = category; // 提取其他参数 const limit = this.extractLimit(query); if (limit) params.limit = limit; const year = this.extractYear(query); if (year) params.year = year; // 检查是否包含子分类 if (query.includes('all') || query.includes('subcategories')) { params.subcategories = true; } return params; } /** * 提取一般搜索参数 */ private extractSearchParams(query: string): Record<string, any> { const params: Record<string, any> = { limit: 10 }; // 提取关键词 const keywords = this.extractKeywords(query); if (keywords) params.query = keywords; // 提取结果数量 const limit = this.extractLimit(query); if (limit) params.limit = limit; // 提取年份 const year = this.extractYear(query); if (year) params.year = year; // 提取排序方式 const sortBy = this.extractSortBy(query); if (sortBy) params.sortBy = sortBy; return params; } /** * 提取作者名 */ private extractAuthor(query: string): string | undefined { const authorWords = ['papers by', 'author:', 'by ', 'researcher', 'author', 'publications by', 'works by', 'written by']; let author = query; authorWords.forEach(word => { author = author.replace(new RegExp(`\\b${word}\\b`, 'gi'), '').trim(); }); // 移除引号 author = author.replace(/['"]/g, '').trim(); return author || undefined; } /** * 提取分类 */ private extractCategory(query: string): string | undefined { // 显式分类指定 const catMatch = query.match(/(?:cat:|category:)\s*([a-z-]+(?:\.[A-Z]{2})?)/i); if (catMatch) { return catMatch[1]; } // 从关键词映射中查找 for (const [keyword, category] of Object.entries(this.CATEGORY_MAPPING)) { if (query.toLowerCase().includes(keyword)) { return category; } } return undefined; } /** * 提取关键词 */ private extractKeywords(query: string): string | undefined { // 移除搜索修饰词 const stopWords = [ 'papers', 'research', 'articles', 'studies', 'about', 'on', 'regarding', 'search', 'find', 'look for', 'arxiv', 'paper' ]; let keywords = query; stopWords.forEach(word => { keywords = keywords.replace(new RegExp(`\\b${word}\\b`, 'gi'), '').trim(); }); keywords = keywords.replace(/\s+/g, ' ').trim(); return keywords || undefined; } /** * 提取年份 */ private extractYear(query: string): string | undefined { // 查找4位数年份 const yearMatch = query.match(/\b(20\d{2})\b/); if (yearMatch) { return yearMatch[1]; } // 查找年份范围 const rangeMatch = query.match(/\b(20\d{2})[-–](20\d{2})\b/); if (rangeMatch) { return `${rangeMatch[1]}-${rangeMatch[2]}`; } return undefined; } /** * 提取结果数量限制 */ private extractLimit(query: string): number | undefined { // 查找数字 const numberMatch = query.match(/(\d+)/); if (numberMatch) { const num = parseInt(numberMatch[1]); if (num > 0 && num <= 100) { return num; } } // 查找文字描述 if (query.includes('many') || query.includes('all')) { return 50; } if (query.includes('few') || query.includes('some')) { return 5; } return undefined; } /** * 提取排序方式 */ private extractSortBy(query: string): string | undefined { if (query.includes('latest') || query.includes('recent') || query.includes('new')) { return 'submittedDate'; } if (query.includes('updated') || query.includes('modified')) { return 'lastUpdatedDate'; } if (query.includes('relevant') || query.includes('relevance')) { return 'relevance'; } return undefined; } /** * 获取查询建议 */ getSuggestions(query: string): string[] { const suggestions: string[] = []; // 基于查询类型提供建议 if (this.isPaperIdQuery(query)) { suggestions.push( 'arXiv:2301.12345', 'paper details 2301.12345', 'abstract 2301.12345' ); } else if (this.isAuthorQuery(query)) { suggestions.push( 'papers by Hinton', 'author:LeCun', 'researcher Smith' ); } else if (this.isCategoryQuery(query)) { suggestions.push( 'cs.AI papers', 'machine learning', 'quantum physics' ); } else { suggestions.push( 'machine learning', 'quantum computing', 'deep learning 2024', 'neural networks', 'artificial intelligence' ); } return suggestions; } /** * 验证查询参数 */ validateQuery(query: string): { valid: boolean; message?: string } { if (!query || query.trim().length === 0) { return { valid: false, message: 'Query cannot be empty' }; } if (query.length > 500) { return { valid: false, message: 'Query too long (max 500 characters)' }; } return { valid: true }; } /** * 优化查询 */ optimizeQuery(query: string): string { let optimized = query.trim(); // 移除多余的空格 optimized = optimized.replace(/\s+/g, ' '); // 标准化学术术语 const termMapping: Record<string, string> = { 'ai': 'artificial intelligence', 'ml': 'machine learning', 'dl': 'deep learning', 'nlp': 'natural language processing', 'cv': 'computer vision' }; for (const [abbrev, full] of Object.entries(termMapping)) { const regex = new RegExp(`\\b${abbrev}\\b`, 'gi'); optimized = optimized.replace(regex, full); } return optimized; } /** * 分析查询复杂度 */ analyzeQueryComplexity(query: string): 'simple' | 'medium' | 'complex' { const words = query.trim().split(/\s+/); if (words.length <= 2) { return 'simple'; } else if (words.length <= 5) { return 'medium'; } else { return 'complex'; } } /** * 生成搜索策略 */ generateSearchStrategy(query: string): any { const route = this.routeSearch(query); const complexity = this.analyzeQueryComplexity(query); return { route, complexity, optimizedQuery: this.optimizeQuery(query), suggestions: this.getSuggestions(query), strategy: { useCache: complexity === 'simple', expandResults: complexity === 'complex', enableFiltering: route.intent === 'general_search' } }; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/flyanima/open-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server