Skip to main content
Glama
martinfrasch

ResearchTwin

discover

Search across researchers for papers, datasets, or repositories using keywords to find relevant academic resources in a federated network.

Instructions

Search across all researchers for papers, datasets, or repositories matching a keyword.

Args: query: Search keyword (e.g. 'fetal', 'machine learning', 'turbulence'). type: Optional filter — 'paper', 'dataset', or 'repo'. Leave empty to search all types.

Returns matching items across all registered researchers, sorted by relevance. This is the primary tool for cross-researcher discovery.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
queryYes
typeNo

Implementation Reference

  • The 'discover' MCP tool handler function. Takes a query string and optional type filter, calls the backend /api/discover endpoint, and formats results for cross-researcher discovery of papers, datasets, and repositories.
    @mcp.tool(annotations=ToolAnnotations(title="Discover Research", read_only_hint=True))
    async def discover(query: str, type: str = "") -> str:
        """Search across all researchers for papers, datasets, or repositories matching a keyword.
    
        Args:
            query: Search keyword (e.g. 'fetal', 'machine learning', 'turbulence').
            type: Optional filter — 'paper', 'dataset', or 'repo'. Leave empty to search all types.
    
        Returns matching items across all registered researchers, sorted by relevance.
        This is the primary tool for cross-researcher discovery.
        """
        params = {"q": query}
        if type:
            params["type"] = type
    
        data = await _get("/api/discover", params=params)
        results = data.get("results", [])
        if not results:
            return f"No results found for '{query}'" + (f" (type: {type})" if type else "") + "."
    
        lines = []
        for r in results[:20]:
            rtype = r.get("@type", "Unknown")
            name = r.get("title") or r.get("name", "Untitled")
            researcher = r.get("researcher", "")
            slug = r.get("researcher_slug", "")
    
            if rtype == "ScholarlyArticle":
                cites = r.get("citations", 0)
                lines.append(f"- [Paper] **{name}** by {researcher} ({cites} citations)")
            elif rtype == "Dataset":
                qic = r.get("qic_score", 0)
                lines.append(f"- [Dataset] **{name}** by {researcher} (QIC: {qic})")
            elif rtype == "SoftwareSourceCode":
                qic = r.get("qic_score", 0)
                lines.append(f"- [Repo] **{name}** by {researcher} (QIC: {qic})")
    
        total = data.get("total", len(results))
        shown = min(20, total)
        header = f"**{total} results for '{query}'**"
        if type:
            header += f" (type: {type})"
        if total > shown:
            header += f" (showing top {shown})"
    
        return header + ":\n" + "\n".join(lines)
  • Helper function _get() that makes HTTP GET requests to the ResearchTwin backend API. Used by the discover tool to fetch data from /api/discover endpoint.
    async def _get(path: str, params: dict | None = None) -> dict:
        """Make a GET request to the ResearchTwin API."""
        async with httpx.AsyncClient(timeout=TIMEOUT) as client:
            resp = await client.get(f"{BASE_URL}{path}", params=params)
            resp.raise_for_status()
            return resp.json()
  • Backend API endpoint /api/discover that implements the actual search logic. Iterates through all researchers, searches their papers/datasets/repositories for keyword matches, and returns sorted results.
    @app.get("/api/discover")
    async def discover(
        q: str = Query(..., min_length=2, max_length=200),
        type: str = Query(default="", pattern="^(dataset|repo|paper|)$"),
    ):
        """Cross-researcher search for agent-driven discovery."""
        q_lower = q.lower()
        results = []
    
        for slug in researchers.list_slugs():
            researcher = researchers.get_researcher(slug)
            try:
                s2_data, gh_data, fs_data = await _fetch_all(researcher)
                qic = compute_researcher_qic(fs_data, gh_data, s2_data)
            except Exception:
                continue
    
            researcher_name = researcher["display_name"]
    
            # Search papers
            if type in ("", "paper"):
                for p in s2_data.get("top_papers", []):
                    title = p.get("title", "")
                    if q_lower in title.lower():
                        results.append({
                            "@type": "ScholarlyArticle",
                            "title": title,
                            "year": p.get("year"),
                            "citations": p.get("citations", 0),
                            "researcher": researcher_name,
                            "researcher_slug": slug,
                        })
    
            # Search datasets
            if type in ("", "dataset"):
                for ds in qic.get("dataset_scores", []):
                    title = ds.get("title", "")
                    if q_lower in title.lower():
                        results.append({
                            "@type": "Dataset",
                            "title": title,
                            "s_score": ds.get("score", 0),
                            "researcher": researcher_name,
                            "researcher_slug": slug,
                        })
    
            # Search repos
            if type in ("", "repo"):
                for repo in qic.get("repo_scores", []):
                    title = repo.get("title", "")
                    if q_lower in title.lower():
                        results.append({
                            "@type": "SoftwareSourceCode",
                            "name": title,
                            "s_score": repo.get("score", 0),
                            "researcher": researcher_name,
                            "researcher_slug": slug,
                        })
    
        # Sort by relevance (title/name match first, then by score)
        results.sort(key=lambda r: r.get("s_score", r.get("citations", 0)), reverse=True)
    
        return {
            "@type": "SearchResultSet",
            "query": q,
            "type_filter": type or "all",
            "total": len(results),
            "results": results[:50],
  • MCP tool registration using @mcp.tool decorator with ToolAnnotations. Registers the discover function as an MCP tool with title 'Discover Research' and read_only_hint=True.
    @mcp.tool(annotations=ToolAnnotations(title="Discover Research", read_only_hint=True))

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/martinfrasch/researchtwin'

If you have feedback or need assistance with the MCP directory API, please join our Discord server