Skip to main content
Glama
emi-dm

ArxivSearcher MCP Server

by emi-dm

analyze_paper_trends

Analyze trends in academic papers to identify patterns in authors, keywords, timeline, or categories for research insights.

Instructions

Analyze trends in a collection of papers.

:param papers: List of papers from search_papers results :param analysis_type: Type of analysis ('authors', 'keywords', 'timeline', 'categories')

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
papersYes
analysis_typeNoauthors

Implementation Reference

  • The primary synchronous handler implementation for the analyze_paper_trends MCP tool. It performs various trend analyses (authors, timeline, categories, keywords) on a list of papers using Counter for counting and TF-IDF for keywords.
    @mcp.tool
    def analyze_paper_trends(
        papers: List[Dict[str, Any]], analysis_type: str = "authors"
    ) -> dict:
        """
        Analyze trends in a collection of papers.
    
        :param papers: List of papers from search_papers results
        :param analysis_type: Type of analysis ('authors', 'keywords', 'timeline', 'categories')
        """
        if not papers or "results" not in papers:
            if isinstance(papers, list):
                results = papers
            else:
                return {
                    "error": "Invalid papers format. Expected list or dict with 'results' key."
                }
        else:
            results = papers["results"]
    
        if not results:
            return {"error": "No papers to analyze"}
    
        analysis = {}
    
        if analysis_type == "authors":
            author_counts = Counter()
            for paper in results:
                for author in paper.get("authors", []):
                    author_counts[author] += 1
    
            analysis = {
                "type": "authors",
                "total_unique_authors": len(author_counts),
                "most_prolific_authors": author_counts.most_common(10),
                "collaboration_stats": {
                    "avg_authors_per_paper": sum(len(p.get("authors", [])) for p in results)
                    / len(results),
                    "single_author_papers": sum(
                        1 for p in results if len(p.get("authors", [])) == 1
                    ),
                    "multi_author_papers": sum(
                        1 for p in results if len(p.get("authors", [])) > 1
                    ),
                },
            }
    
        elif analysis_type == "timeline":
            date_counts = Counter()
            for paper in results:
                date = paper.get("published_date", "")
                if date:
                    year = date.split("-")[0]
                    date_counts[year] += 1
    
            analysis = {
                "type": "timeline",
                "papers_by_year": dict(sorted(date_counts.items())),
                "most_active_year": date_counts.most_common(1)[0] if date_counts else None,
                "total_years_span": len(date_counts),
            }
    
        elif analysis_type == "categories":
            category_counts = Counter()
            for paper in results:
                categories = paper.get("categories", [])
                for cat in categories:
                    category_counts[cat] += 1
    
            analysis = {
                "type": "categories",
                "total_categories": len(category_counts),
                "most_common_categories": category_counts.most_common(10),
                "category_distribution": dict(category_counts),
            }
    
        elif analysis_type == "keywords":
            # Extract keywords from titles and abstracts
            text_content = []
            for paper in results:
                title = paper.get("title", "")
                summary = paper.get("summary", "")
                text_content.append(f"{title} {summary}")
    
            if text_content:
                try:
                    # Use TF-IDF to find important terms
                    vectorizer = TfidfVectorizer(
                        max_features=50, stop_words="english", ngram_range=(1, 2), min_df=2
                    )
                    tfidf_matrix = vectorizer.fit_transform(text_content)
                    feature_names = vectorizer.get_feature_names_out()
                    scores = tfidf_matrix.sum(axis=0).A1
    
                    keyword_scores = list(zip(feature_names, scores))
                    keyword_scores.sort(key=lambda x: x[1], reverse=True)
    
                    analysis = {
                        "type": "keywords",
                        "top_keywords": keyword_scores[:20],
                        "total_unique_terms": len(feature_names),
                    }
                except Exception as e:
                    analysis = {
                        "type": "keywords",
                        "error": f"Could not perform keyword analysis: {str(e)}",
                        "fallback_word_count": Counter(),
                    }
    
        analysis["total_papers_analyzed"] = len(results)
        return analysis
  • The asynchronous version of the analyze_paper_trends handler for remote deployment, identical logic to the primary handler.
    @mcp.tool
    async def analyze_paper_trends(
        papers: List[Dict[str, Any]], analysis_type: str = "authors"
    ) -> dict:
        """
        Analyze trends in a collection of papers.
    
        :param papers: List of papers from search_papers results
        :param analysis_type: Type of analysis ('authors', 'keywords', 'timeline', 'categories')
        """
        if not papers or "results" not in papers:
            if isinstance(papers, list):
                results = papers
            else:
                return {
                    "error": "Invalid papers format. Expected list or dict with 'results' key."
                }
        else:
            results = papers["results"]
    
        if not results:
            return {"error": "No papers to analyze"}
    
        analysis = {}
    
        if analysis_type == "authors":
            author_counts = Counter()
            for paper in results:
                for author in paper.get("authors", []):
                    author_counts[author] += 1
    
            analysis = {
                "type": "authors",
                "total_unique_authors": len(author_counts),
                "most_prolific_authors": author_counts.most_common(10),
                "collaboration_stats": {
                    "avg_authors_per_paper": sum(len(p.get("authors", [])) for p in results)
                    / len(results),
                    "single_author_papers": sum(
                        1 for p in results if len(p.get("authors", [])) == 1
                    ),
                    "multi_author_papers": sum(
                        1 for p in results if len(p.get("authors", [])) > 1
                    ),
                },
            }
    
        elif analysis_type == "timeline":
            date_counts = Counter()
            for paper in results:
                date = paper.get("published_date", "")
                if date:
                    year = date.split("-")[0]
                    date_counts[year] += 1
    
            analysis = {
                "type": "timeline",
                "papers_by_year": dict(sorted(date_counts.items())),
                "most_active_year": date_counts.most_common(1)[0] if date_counts else None,
                "total_years_span": len(date_counts),
            }
    
        elif analysis_type == "categories":
            category_counts = Counter()
            for paper in results:
                categories = paper.get("categories", [])
                for cat in categories:
                    category_counts[cat] += 1
    
            analysis = {
                "type": "categories",
                "total_categories": len(category_counts),
                "most_common_categories": category_counts.most_common(10),
                "category_distribution": dict(category_counts),
            }
    
        elif analysis_type == "keywords":
            # Extract keywords from titles and abstracts
            text_content = []
            for paper in results:
                title = paper.get("title", "")
                summary = paper.get("summary", "")
                text_content.append(f"{title} {summary}")
    
            if text_content:
                try:
                    # Use TF-IDF to find important terms
                    vectorizer = TfidfVectorizer(
                        max_features=50, stop_words="english", ngram_range=(1, 2), min_df=2
                    )
                    tfidf_matrix = vectorizer.fit_transform(text_content)
                    feature_names = vectorizer.get_feature_names_out()
                    scores = tfidf_matrix.sum(axis=0).A1
    
                    keyword_scores = list(zip(feature_names, scores))
                    keyword_scores.sort(key=lambda x: x[1], reverse=True)
    
                    analysis = {
                        "type": "keywords",
                        "top_keywords": keyword_scores[:20],
                        "total_unique_terms": len(feature_names),
                    }
                except Exception as e:
                    analysis = {
                        "type": "keywords",
                        "error": f"Could not perform keyword analysis: {str(e)}",
                        "fallback_word_count": Counter(),
                    }
    
        analysis["total_papers_analyzed"] = len(results)
        return analysis

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/emi-dm/Arxiv-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server