analyze_paper_trends
Analyze trends in research papers by examining authors, keywords, timeline, or categories to uncover patterns and insights in academic literature.
Instructions
Analyze trends in a collection of papers.
:param papers: List of papers from search_papers results :param analysis_type: Type of analysis ('authors', 'keywords', 'timeline', 'categories')
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| analysis_type | No | authors | |
| papers | Yes |
Implementation Reference
- arxiv_searcher/arxiv_mcp.py:381-492 (handler)Handler function for 'analyze_paper_trends' tool. Analyzes a list of papers for trends in authors, timeline, categories, or keywords using statistical methods like Counter and TF-IDF vectorization.@mcp.tool def analyze_paper_trends( papers: List[Dict[str, Any]], analysis_type: str = "authors" ) -> dict: """ Analyze trends in a collection of papers. :param papers: List of papers from search_papers results :param analysis_type: Type of analysis ('authors', 'keywords', 'timeline', 'categories') """ if not papers or "results" not in papers: if isinstance(papers, list): results = papers else: return { "error": "Invalid papers format. Expected list or dict with 'results' key." } else: results = papers["results"] if not results: return {"error": "No papers to analyze"} analysis = {} if analysis_type == "authors": author_counts = Counter() for paper in results: for author in paper.get("authors", []): author_counts[author] += 1 analysis = { "type": "authors", "total_unique_authors": len(author_counts), "most_prolific_authors": author_counts.most_common(10), "collaboration_stats": { "avg_authors_per_paper": sum(len(p.get("authors", [])) for p in results) / len(results), "single_author_papers": sum( 1 for p in results if len(p.get("authors", [])) == 1 ), "multi_author_papers": sum( 1 for p in results if len(p.get("authors", [])) > 1 ), }, } elif analysis_type == "timeline": date_counts = Counter() for paper in results: date = paper.get("published_date", "") if date: year = date.split("-")[0] date_counts[year] += 1 analysis = { "type": "timeline", "papers_by_year": dict(sorted(date_counts.items())), "most_active_year": date_counts.most_common(1)[0] if date_counts else None, "total_years_span": len(date_counts), } elif analysis_type == "categories": category_counts = Counter() for paper in results: categories = paper.get("categories", []) for cat in categories: category_counts[cat] += 1 analysis = { "type": "categories", "total_categories": len(category_counts), "most_common_categories": category_counts.most_common(10), "category_distribution": dict(category_counts), } elif analysis_type == "keywords": # Extract keywords from titles and abstracts text_content = [] for paper in results: title = paper.get("title", "") summary = paper.get("summary", "") text_content.append(f"{title} {summary}") if text_content: try: # Use TF-IDF to find important terms vectorizer = TfidfVectorizer( max_features=50, stop_words="english", ngram_range=(1, 2), min_df=2 ) tfidf_matrix = vectorizer.fit_transform(text_content) feature_names = vectorizer.get_feature_names_out() scores = tfidf_matrix.sum(axis=0).A1 keyword_scores = list(zip(feature_names, scores)) keyword_scores.sort(key=lambda x: x[1], reverse=True) analysis = { "type": "keywords", "top_keywords": keyword_scores[:20], "total_unique_terms": len(feature_names), } except Exception as e: analysis = { "type": "keywords", "error": f"Could not perform keyword analysis: {str(e)}", "fallback_word_count": Counter(), } analysis["total_papers_analyzed"] = len(results) return analysis
- Async handler function for 'analyze_paper_trends' tool in the remote MCP server version. Identical logic to the synchronous version.@mcp.tool async def analyze_paper_trends( papers: List[Dict[str, Any]], analysis_type: str = "authors" ) -> dict: """ Analyze trends in a collection of papers. :param papers: List of papers from search_papers results :param analysis_type: Type of analysis ('authors', 'keywords', 'timeline', 'categories') """ if not papers or "results" not in papers: if isinstance(papers, list): results = papers else: return { "error": "Invalid papers format. Expected list or dict with 'results' key." } else: results = papers["results"] if not results: return {"error": "No papers to analyze"} analysis = {} if analysis_type == "authors": author_counts = Counter() for paper in results: for author in paper.get("authors", []): author_counts[author] += 1 analysis = { "type": "authors", "total_unique_authors": len(author_counts), "most_prolific_authors": author_counts.most_common(10), "collaboration_stats": { "avg_authors_per_paper": sum(len(p.get("authors", [])) for p in results) / len(results), "single_author_papers": sum( 1 for p in results if len(p.get("authors", [])) == 1 ), "multi_author_papers": sum( 1 for p in results if len(p.get("authors", [])) > 1 ), }, } elif analysis_type == "timeline": date_counts = Counter() for paper in results: date = paper.get("published_date", "") if date: year = date.split("-")[0] date_counts[year] += 1 analysis = { "type": "timeline", "papers_by_year": dict(sorted(date_counts.items())), "most_active_year": date_counts.most_common(1)[0] if date_counts else None, "total_years_span": len(date_counts), } elif analysis_type == "categories": category_counts = Counter() for paper in results: categories = paper.get("categories", []) for cat in categories: category_counts[cat] += 1 analysis = { "type": "categories", "total_categories": len(category_counts), "most_common_categories": category_counts.most_common(10), "category_distribution": dict(category_counts), } elif analysis_type == "keywords": # Extract keywords from titles and abstracts text_content = [] for paper in results: title = paper.get("title", "") summary = paper.get("summary", "") text_content.append(f"{title} {summary}") if text_content: try: # Use TF-IDF to find important terms vectorizer = TfidfVectorizer( max_features=50, stop_words="english", ngram_range=(1, 2), min_df=2 ) tfidf_matrix = vectorizer.fit_transform(text_content) feature_names = vectorizer.get_feature_names_out() scores = tfidf_matrix.sum(axis=0).A1 keyword_scores = list(zip(feature_names, scores)) keyword_scores.sort(key=lambda x: x[1], reverse=True) analysis = { "type": "keywords", "top_keywords": keyword_scores[:20], "total_unique_terms": len(feature_names), } except Exception as e: analysis = { "type": "keywords", "error": f"Could not perform keyword analysis: {str(e)}", "fallback_word_count": Counter(), } analysis["total_papers_analyzed"] = len(results) return analysis