Skip to main content
Glama

OpenEdu MCP Server

arxiv_tools.pyโ€ข33.5 kB
""" arXiv tools for OpenEdu MCP Server. This module provides MCP tool implementations for arXiv API integration, including educational filtering, academic content analysis, and research paper search functionality. """ import logging import re from typing import Dict, Any, List, Optional, Union from datetime import datetime, date, timedelta import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) from tools.base_tool import BaseTool from api.arxiv import ArxivClient from models.research_paper import ResearchPaper from models.base import GradeLevel, EducationalMetadata from config import Config from services.cache_service import CacheService from services.rate_limiting_service import RateLimitingService from services.usage_service import UsageService from exceptions import ToolError, ValidationError, APIError, CacheError logger = logging.getLogger(__name__) class ArxivTool(BaseTool): """Tool for arXiv API integration with educational features.""" def __init__( self, config: Config, cache_service: CacheService, rate_limiting_service: RateLimitingService, usage_service: UsageService ): """ Initialize arXiv tool. Args: config: Application configuration cache_service: Cache service instance rate_limiting_service: Rate limiting service instance usage_service: Usage tracking service instance """ super().__init__(config, cache_service, rate_limiting_service, usage_service) self.client = ArxivClient(config) # Educational filtering configuration self.min_educational_relevance = config.education.content_filters.min_educational_relevance self.enable_age_appropriate = config.education.content_filters.enable_age_appropriate self.enable_curriculum_alignment = config.education.content_filters.enable_curriculum_alignment # Academic level thresholds self.academic_level_thresholds = { 'High School': 0.3, 'Undergraduate': 0.5, 'Graduate': 0.7, 'Research': 0.9 } # Subject mappings for educational alignment self.subject_mappings = { 'Mathematics': ['math', 'stat'], 'Science': ['physics', 'q-bio', 'chem-ph', 'astro-ph'], 'Technology': ['cs', 'eess'], 'Engineering': ['cs', 'eess', 'physics'], 'Statistics': ['stat', 'math.ST'], 'Biology': ['q-bio'], 'Physics': ['physics', 'astro-ph', 'cond-mat', 'gr-qc', 'hep-ex', 'hep-lat', 'hep-ph', 'hep-th', 'math-ph', 'nlin', 'nucl-ex', 'nucl-th', 'quant-ph'], 'Computer Science': ['cs'] } @property def api_name(self) -> str: """Name of the API this tool uses for rate limiting.""" return "arxiv" async def search_academic_papers( self, query: str, subject: Optional[str] = None, academic_level: Optional[str] = None, max_results: int = 10, include_educational_analysis: bool = True, user_session: Optional[str] = None ) -> List[Dict[str, Any]]: """ Search for academic papers with educational filtering. Args: query: Search query for papers subject: Educational subject filter (optional) academic_level: Target academic level (High School, Undergraduate, Graduate, Research) max_results: Maximum number of results (1-50) include_educational_analysis: Whether to include educational metadata user_session: User session identifier Returns: List of academic papers with educational metadata Raises: ValidationError: If parameters are invalid ToolError: If search fails """ async def _search(): # Validate parameters validated = await self.validate_common_parameters( query=query, limit=max_results, subject=subject ) if academic_level and academic_level not in self.academic_level_thresholds: raise ValidationError(f"Invalid academic level: {academic_level}") # Map educational subject to arXiv categories arxiv_category = None if subject: arxiv_category = self._map_subject_to_arxiv_category(subject) # Search papers using arXiv API raw_papers = await self.client.search_papers( query=validated['query'], category=arxiv_category, max_results=validated['limit'] ) # Convert to ResearchPaper models with educational enrichment papers = [] for paper_data in raw_papers: try: paper = ResearchPaper.from_arxiv(paper_data) # Enrich with educational metadata if requested if include_educational_analysis: paper = await self._enrich_educational_metadata( paper, subject=subject, academic_level=academic_level ) papers.append(paper) except Exception as e: logger.warning(f"Failed to process paper data: {e}") continue # Apply educational filtering filtered_papers = self._apply_educational_filters( papers, academic_level=academic_level, subject=subject ) # Sort by educational relevance sorted_papers = self.sort_by_educational_relevance(filtered_papers) # Convert to dictionaries for response return [paper.to_dict() for paper in sorted_papers] return await self.execute_with_monitoring( "search_academic_papers", _search, user_session=user_session ) async def get_paper_summary( self, paper_id: str, include_educational_analysis: bool = True, user_session: Optional[str] = None ) -> Dict[str, Any]: """ Get paper summary with educational analysis. Args: paper_id: arXiv paper ID include_educational_analysis: Whether to include educational metadata user_session: User session identifier Returns: Paper summary with educational metadata Raises: ValidationError: If paper_id is invalid ToolError: If retrieval fails """ async def _get_summary(): # Get paper data from arXiv paper_data = await self.client.get_paper_abstract(paper_id) if not paper_data: raise ToolError(f"Paper not found: {paper_id}", self.tool_name) # Convert to ResearchPaper model paper = ResearchPaper.from_arxiv(paper_data) # Enrich with educational metadata if requested if include_educational_analysis: paper = await self._enrich_educational_metadata(paper) return paper.to_dict() return await self.execute_with_monitoring( "get_paper_summary", _get_summary, user_session=user_session ) async def get_recent_research( self, subject: str, days: int = 7, academic_level: Optional[str] = None, max_results: int = 10, user_session: Optional[str] = None ) -> List[Dict[str, Any]]: """ Get recent research papers by educational subject. Args: subject: Educational subject days: Number of days back to search (1-30) academic_level: Target academic level (optional) max_results: Maximum number of results user_session: User session identifier Returns: List of recent papers in the subject area Raises: ValidationError: If parameters are invalid ToolError: If search fails """ async def _get_recent(): # Validate parameters if days < 1 or days > 30: raise ValidationError("days must be between 1 and 30") validated = await self.validate_common_parameters( subject=subject, limit=max_results ) # Map educational subject to arXiv category arxiv_category = self._map_subject_to_arxiv_category(validated['subject']) if not arxiv_category: raise ValidationError(f"Subject not supported for arXiv search: {subject}") # Get recent papers raw_papers = await self.client.get_recent_papers( category=arxiv_category, days=days, max_results=validated['limit'] ) # Convert and enrich papers papers = [] for paper_data in raw_papers: try: paper = ResearchPaper.from_arxiv(paper_data) paper = await self._enrich_educational_metadata( paper, subject=subject, academic_level=academic_level ) papers.append(paper) except Exception as e: logger.warning(f"Failed to process paper data: {e}") continue # Apply educational filtering filtered_papers = self._apply_educational_filters( papers, academic_level=academic_level, subject=subject ) # Sort by publication date (most recent first) sorted_papers = sorted( filtered_papers, key=lambda p: p.publication_date, reverse=True ) return [paper.to_dict() for paper in sorted_papers] return await self.execute_with_monitoring( "get_recent_research", _get_recent, user_session=user_session ) async def get_research_by_level( self, academic_level: str, subject: Optional[str] = None, max_results: int = 10, user_session: Optional[str] = None ) -> List[Dict[str, Any]]: """ Get research papers appropriate for specific academic levels. Args: academic_level: Target academic level (High School, Undergraduate, Graduate, Research) subject: Subject area filter (optional) max_results: Maximum number of results user_session: User session identifier Returns: List of papers appropriate for the academic level Raises: ValidationError: If parameters are invalid ToolError: If search fails """ async def _get_by_level(): # Validate academic level if academic_level not in self.academic_level_thresholds: raise ValidationError(f"Invalid academic level: {academic_level}") validated = await self.validate_common_parameters( limit=max_results, subject=subject ) # Build search query based on academic level level_keywords = { 'High School': ['introductory', 'basic', 'elementary', 'tutorial'], 'Undergraduate': ['undergraduate', 'college', 'introductory course', 'textbook'], 'Graduate': ['graduate', 'advanced', 'research methods'], 'Research': ['novel', 'cutting-edge', 'state-of-the-art', 'breakthrough'] } keywords = level_keywords.get(academic_level, []) search_query = " OR ".join(keywords) if keywords else "education" # Add subject filter if specified arxiv_category = None if subject: arxiv_category = self._map_subject_to_arxiv_category(subject) if not arxiv_category: # Use subject as additional search term search_query = f"{search_query} {subject}" # Search papers raw_papers = await self.client.search_papers( query=search_query, category=arxiv_category, max_results=validated['limit'] ) # Convert and enrich papers papers = [] for paper_data in raw_papers: try: paper = ResearchPaper.from_arxiv(paper_data) paper = await self._enrich_educational_metadata( paper, subject=subject, academic_level=academic_level ) papers.append(paper) except Exception as e: logger.warning(f"Failed to process paper data: {e}") continue # Filter by academic level appropriateness level_filtered = [ paper for paper in papers if self._is_appropriate_for_level(paper, academic_level) ] # Apply additional educational filtering filtered_papers = self._apply_educational_filters( level_filtered, academic_level=academic_level, subject=subject ) # Sort by educational relevance sorted_papers = self.sort_by_educational_relevance(filtered_papers) return [paper.to_dict() for paper in sorted_papers] return await self.execute_with_monitoring( "get_research_by_level", _get_by_level, user_session=user_session ) async def analyze_research_trends( self, subject: str, days: int = 30, user_session: Optional[str] = None ) -> Dict[str, Any]: """ Analyze research trends for educational insights. Args: subject: Educational subject to analyze days: Number of days to analyze (7-90) user_session: User session identifier Returns: Research trend analysis with educational insights Raises: ValidationError: If parameters are invalid ToolError: If analysis fails """ async def _analyze_trends(): # Validate parameters if days < 7 or days > 90: raise ValidationError("days must be between 7 and 90") validated = await self.validate_common_parameters(subject=subject) # Map subject to arXiv category arxiv_category = self._map_subject_to_arxiv_category(validated['subject']) if not arxiv_category: raise ValidationError(f"Subject not supported for trend analysis: {subject}") # Get recent papers for trend analysis raw_papers = await self.client.get_recent_papers( category=arxiv_category, days=days, max_results=50 # Get more papers for better analysis ) # Convert to ResearchPaper models papers = [] for paper_data in raw_papers: try: paper = ResearchPaper.from_arxiv(paper_data) papers.append(paper) except Exception as e: logger.warning(f"Failed to process paper data: {e}") continue # Analyze trends trends = self._analyze_paper_trends(papers, subject) return trends return await self.execute_with_monitoring( "analyze_research_trends", _analyze_trends, user_session=user_session ) async def _enrich_educational_metadata( self, paper: ResearchPaper, subject: Optional[str] = None, academic_level: Optional[str] = None ) -> ResearchPaper: """ Enrich paper with educational metadata. Args: paper: ResearchPaper instance to enrich subject: Target subject for relevance scoring academic_level: Target academic level for relevance scoring Returns: Enriched paper instance """ # Calculate educational relevance score relevance_score = self._calculate_educational_relevance(paper, subject, academic_level) paper.educational_metadata.educational_relevance_score = relevance_score # Determine academic level appropriateness paper_level = self.client.analyze_educational_level({ 'title': paper.title, 'summary': paper.abstract }) # Map to grade levels grade_levels = self._map_academic_level_to_grades(paper_level) paper.educational_metadata.grade_levels = grade_levels # Enhance subject classification enhanced_subjects = self._enhance_subject_classification(paper.subjects, subject) paper.educational_metadata.educational_subjects = enhanced_subjects # Calculate complexity and difficulty complexity_score = self.client.calculate_complexity_score({ 'title': paper.title, 'summary': paper.abstract }) if complexity_score < 0.3: paper.educational_metadata.difficulty_level = "Introductory" elif complexity_score < 0.6: paper.educational_metadata.difficulty_level = "Intermediate" else: paper.educational_metadata.difficulty_level = "Advanced" # Analyze curriculum alignment curriculum_alignment = self._analyze_curriculum_alignment(paper, subject) paper.educational_metadata.curriculum_alignment = curriculum_alignment # Extract educational applications educational_applications = self._extract_educational_applications(paper) paper.educational_applications = educational_applications return paper def _calculate_educational_relevance( self, paper: ResearchPaper, target_subject: Optional[str] = None, target_level: Optional[str] = None ) -> float: """Calculate educational relevance score for a paper.""" score = 0.0 # Base score for educational content indicators content_text = f"{paper.title} {paper.abstract}".lower() # Educational keywords scoring educational_keywords = [ 'education', 'learning', 'teaching', 'pedagogy', 'curriculum', 'instruction', 'classroom', 'student', 'educational', 'academic', 'methodology', 'framework', 'analysis', 'study', 'research' ] keyword_matches = sum(1 for keyword in educational_keywords if keyword in content_text) score += min(keyword_matches * 0.05, 0.3) # Max 0.3 for keywords # Subject relevance if target_subject: subject_terms = self._get_subject_search_terms(target_subject) subject_match = any( term.lower() in content_text for term in subject_terms + [target_subject] ) if subject_match: score += 0.4 # Academic level appropriateness if target_level: level_keywords = { 'High School': ['introductory', 'basic', 'elementary', 'tutorial'], 'Undergraduate': ['undergraduate', 'college', 'introductory'], 'Graduate': ['graduate', 'advanced', 'research'], 'Research': ['novel', 'cutting-edge', 'state-of-the-art'] } level_terms = level_keywords.get(target_level, []) level_match = any(term in content_text for term in level_terms) if level_match: score += 0.2 # arXiv category relevance for STEM education stem_categories = ['math', 'physics', 'cs', 'stat', 'q-bio'] if any(cat in paper.subjects for cat in stem_categories): score += 0.1 return min(score, 1.0) # Cap at 1.0 def _map_subject_to_arxiv_category(self, subject: str) -> Optional[str]: """Map educational subject to arXiv category.""" subject_lower = subject.lower() for edu_subject, categories in self.subject_mappings.items(): if edu_subject.lower() in subject_lower or subject_lower in edu_subject.lower(): return categories[0] # Return primary category # Direct category mapping direct_mappings = { 'math': 'math', 'mathematics': 'math', 'physics': 'physics', 'computer science': 'cs', 'cs': 'cs', 'artificial intelligence': 'cs', 'ai': 'cs', 'machine learning': 'cs', 'biology': 'q-bio', 'statistics': 'stat', 'finance': 'q-fin' } return direct_mappings.get(subject_lower) def _map_academic_level_to_grades(self, academic_level: str) -> List[GradeLevel]: """Map academic level to grade levels.""" mapping = { 'High School': [GradeLevel.GRADES_9_12], 'Undergraduate': [GradeLevel.COLLEGE], 'Graduate': [GradeLevel.COLLEGE], 'Research': [GradeLevel.COLLEGE] } return mapping.get(academic_level, [GradeLevel.COLLEGE]) def _enhance_subject_classification( self, arxiv_subjects: List[str], target_subject: Optional[str] = None ) -> List[str]: """Enhance subject classification for educational purposes.""" educational_subjects = [] # Map arXiv categories to educational subjects category_mapping = { 'math': 'Mathematics', 'physics': 'Science', 'cs': 'Technology', 'stat': 'Mathematics', 'q-bio': 'Science', 'q-fin': 'Social Studies' } for subject in arxiv_subjects: subject_prefix = subject.split('.')[0].lower() if subject_prefix in category_mapping: educational_subjects.append(category_mapping[subject_prefix]) # Add target subject if specified if target_subject and target_subject not in educational_subjects: educational_subjects.append(target_subject) return list(set(educational_subjects)) def _analyze_curriculum_alignment( self, paper: ResearchPaper, subject: Optional[str] = None ) -> Dict[str, float]: """Analyze curriculum alignment for educational standards.""" alignment = { 'Common Core': 0.0, 'NGSS': 0.0, 'State Standards': 0.0 } content_text = f"{paper.title} {paper.abstract}".lower() # Common Core alignment (Math and ELA focus) if any(subj in ['Mathematics', 'English Language Arts'] for subj in paper.educational_metadata.educational_subjects): if any(term in content_text for term in ['problem solving', 'critical thinking', 'reasoning']): alignment['Common Core'] = 0.7 # NGSS alignment (Science focus) if 'Science' in paper.educational_metadata.educational_subjects: if any(term in content_text for term in ['inquiry', 'investigation', 'evidence', 'model']): alignment['NGSS'] = 0.8 # General state standards alignment if any(term in content_text for term in ['curriculum', 'standards', 'assessment', 'learning objectives']): alignment['State Standards'] = 0.6 return alignment def _extract_educational_applications(self, paper: ResearchPaper) -> List[str]: """Extract potential educational applications from paper content.""" applications = [] content_text = f"{paper.title} {paper.abstract}".lower() # Teaching applications if any(term in content_text for term in ['teaching', 'instruction', 'pedagogy']): applications.append("Teaching methodology") # Learning applications if any(term in content_text for term in ['learning', 'education', 'student']): applications.append("Student learning") # Research applications if any(term in content_text for term in ['research', 'investigation', 'study']): applications.append("Academic research") # Technology applications if any(term in content_text for term in ['technology', 'digital', 'online', 'computer']): applications.append("Educational technology") # Assessment applications if any(term in content_text for term in ['assessment', 'evaluation', 'testing', 'measurement']): applications.append("Educational assessment") return applications def _get_subject_search_terms(self, subject: str) -> List[str]: """Get search terms for educational subject.""" subject_terms = { 'Mathematics': ['math', 'mathematics', 'algebra', 'geometry', 'calculus', 'statistics'], 'Science': ['science', 'physics', 'chemistry', 'biology', 'earth science'], 'Technology': ['technology', 'computer', 'programming', 'software', 'digital'], 'Engineering': ['engineering', 'design', 'systems', 'optimization'], 'Physics': ['physics', 'mechanics', 'thermodynamics', 'quantum', 'relativity'], 'Computer Science': ['computer science', 'algorithm', 'programming', 'software', 'AI'], 'Biology': ['biology', 'genetics', 'ecology', 'evolution', 'molecular'], 'Statistics': ['statistics', 'probability', 'data analysis', 'statistical'] } return subject_terms.get(subject, [subject.lower()]) def _is_appropriate_for_level(self, paper: ResearchPaper, academic_level: str) -> bool: """Check if paper is appropriate for academic level.""" complexity_score = self.client.calculate_complexity_score({ 'title': paper.title, 'summary': paper.abstract }) level_thresholds = { 'High School': (0.0, 0.4), 'Undergraduate': (0.2, 0.7), 'Graduate': (0.5, 0.9), 'Research': (0.7, 1.0) } min_threshold, max_threshold = level_thresholds.get(academic_level, (0.0, 1.0)) return min_threshold <= complexity_score <= max_threshold def _apply_educational_filters( self, papers: List[ResearchPaper], academic_level: Optional[str] = None, subject: Optional[str] = None ) -> List[ResearchPaper]: """Apply educational filtering to papers.""" filtered = papers # Filter by minimum educational relevance if self.min_educational_relevance > 0: filtered = [ paper for paper in filtered if paper.educational_metadata.educational_relevance_score >= self.min_educational_relevance ] # Filter by academic level appropriateness if academic_level and self.enable_age_appropriate: filtered = [ paper for paper in filtered if self._is_appropriate_for_level(paper, academic_level) ] # Filter by subject relevance if subject: filtered = [ paper for paper in filtered if subject in paper.educational_metadata.educational_subjects or any(subj.lower() in subject.lower() for subj in paper.educational_metadata.educational_subjects) ] return filtered def _analyze_paper_trends(self, papers: List[ResearchPaper], subject: str) -> Dict[str, Any]: """Analyze trends in research papers for educational insights.""" if not papers: return { 'total_papers': 0, 'subject': subject, 'trends': {}, 'educational_insights': [] } # Analyze publication trends publication_dates = [paper.publication_date for paper in papers] recent_papers = len([d for d in publication_dates if (date.today() - d).days <= 7]) # Analyze subject distribution all_subjects = [] for paper in papers: all_subjects.extend(paper.subjects) subject_counts = {} for subj in all_subjects: subject_counts[subj] = subject_counts.get(subj, 0) + 1 # Top trending subjects top_subjects = sorted(subject_counts.items(), key=lambda x: x[1], reverse=True)[:5] # Analyze educational relevance high_relevance_papers = [ paper for paper in papers if paper.educational_metadata.educational_relevance_score > 0.7 ] # Educational insights insights = [] if recent_papers > len(papers) * 0.3: insights.append(f"High activity in {subject} research with {recent_papers} papers in the last week") if len(high_relevance_papers) > len(papers) * 0.5: insights.append(f"Strong educational relevance with {len(high_relevance_papers)} highly relevant papers") if top_subjects: insights.append(f"Most active area: {top_subjects[0][0]} with {top_subjects[0][1]} papers") return { 'total_papers': len(papers), 'subject': subject, 'recent_papers_week': recent_papers, 'high_relevance_papers': len(high_relevance_papers), 'trends': { 'top_subjects': dict(top_subjects), 'average_relevance': sum(p.educational_metadata.educational_relevance_score for p in papers) / len(papers), 'complexity_distribution': self._analyze_complexity_distribution(papers) }, 'educational_insights': insights, 'analysis_date': date.today().isoformat() } def _analyze_complexity_distribution(self, papers: List[ResearchPaper]) -> Dict[str, int]: """Analyze complexity distribution of papers.""" distribution = { 'Introductory': 0, 'Intermediate': 0, 'Advanced': 0 } for paper in papers: difficulty = paper.educational_metadata.difficulty_level if difficulty in distribution: distribution[difficulty] += 1 return distribution async def health_check(self) -> Dict[str, Any]: """ Perform health check on arXiv tool and API. Returns: Health check results """ try: # Check API connectivity api_health = await self.client.health_check() # Check cache service try: cache_healthy = await self.cache_service.health_check() except CacheError as e: logger.error(f"Cache health check failed: {e}") cache_healthy = False # Check rate limiting rate_limit_status = await self.rate_limiting_service.get_rate_limit_status(self.api_name) return { 'tool_name': self.tool_name, 'status': 'healthy' if api_health['status'] == 'healthy' and cache_healthy else 'unhealthy', 'api_health': api_health, 'cache_healthy': cache_healthy, 'rate_limit_status': rate_limit_status, 'timestamp': datetime.now().isoformat() } except Exception as e: logger.error(f"arXiv tool health check failed: {e}") return { 'tool_name': self.tool_name, 'status': 'unhealthy', 'error': str(e), 'timestamp': datetime.now().isoformat() }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Cicatriiz/openedu-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server