discover
Search across researchers for papers, datasets, or repositories using keywords to find relevant academic resources in a federated network.
Instructions
Search across all researchers for papers, datasets, or repositories matching a keyword.
Args: query: Search keyword (e.g. 'fetal', 'machine learning', 'turbulence'). type: Optional filter — 'paper', 'dataset', or 'repo'. Leave empty to search all types.
Returns matching items across all registered researchers, sorted by relevance. This is the primary tool for cross-researcher discovery.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| query | Yes | ||
| type | No |
Implementation Reference
- The 'discover' MCP tool handler function. Takes a query string and optional type filter, calls the backend /api/discover endpoint, and formats results for cross-researcher discovery of papers, datasets, and repositories.
@mcp.tool(annotations=ToolAnnotations(title="Discover Research", read_only_hint=True)) async def discover(query: str, type: str = "") -> str: """Search across all researchers for papers, datasets, or repositories matching a keyword. Args: query: Search keyword (e.g. 'fetal', 'machine learning', 'turbulence'). type: Optional filter — 'paper', 'dataset', or 'repo'. Leave empty to search all types. Returns matching items across all registered researchers, sorted by relevance. This is the primary tool for cross-researcher discovery. """ params = {"q": query} if type: params["type"] = type data = await _get("/api/discover", params=params) results = data.get("results", []) if not results: return f"No results found for '{query}'" + (f" (type: {type})" if type else "") + "." lines = [] for r in results[:20]: rtype = r.get("@type", "Unknown") name = r.get("title") or r.get("name", "Untitled") researcher = r.get("researcher", "") slug = r.get("researcher_slug", "") if rtype == "ScholarlyArticle": cites = r.get("citations", 0) lines.append(f"- [Paper] **{name}** by {researcher} ({cites} citations)") elif rtype == "Dataset": qic = r.get("qic_score", 0) lines.append(f"- [Dataset] **{name}** by {researcher} (QIC: {qic})") elif rtype == "SoftwareSourceCode": qic = r.get("qic_score", 0) lines.append(f"- [Repo] **{name}** by {researcher} (QIC: {qic})") total = data.get("total", len(results)) shown = min(20, total) header = f"**{total} results for '{query}'**" if type: header += f" (type: {type})" if total > shown: header += f" (showing top {shown})" return header + ":\n" + "\n".join(lines) - Helper function _get() that makes HTTP GET requests to the ResearchTwin backend API. Used by the discover tool to fetch data from /api/discover endpoint.
async def _get(path: str, params: dict | None = None) -> dict: """Make a GET request to the ResearchTwin API.""" async with httpx.AsyncClient(timeout=TIMEOUT) as client: resp = await client.get(f"{BASE_URL}{path}", params=params) resp.raise_for_status() return resp.json() - backend/main.py:833-900 (handler)Backend API endpoint /api/discover that implements the actual search logic. Iterates through all researchers, searches their papers/datasets/repositories for keyword matches, and returns sorted results.
@app.get("/api/discover") async def discover( q: str = Query(..., min_length=2, max_length=200), type: str = Query(default="", pattern="^(dataset|repo|paper|)$"), ): """Cross-researcher search for agent-driven discovery.""" q_lower = q.lower() results = [] for slug in researchers.list_slugs(): researcher = researchers.get_researcher(slug) try: s2_data, gh_data, fs_data = await _fetch_all(researcher) qic = compute_researcher_qic(fs_data, gh_data, s2_data) except Exception: continue researcher_name = researcher["display_name"] # Search papers if type in ("", "paper"): for p in s2_data.get("top_papers", []): title = p.get("title", "") if q_lower in title.lower(): results.append({ "@type": "ScholarlyArticle", "title": title, "year": p.get("year"), "citations": p.get("citations", 0), "researcher": researcher_name, "researcher_slug": slug, }) # Search datasets if type in ("", "dataset"): for ds in qic.get("dataset_scores", []): title = ds.get("title", "") if q_lower in title.lower(): results.append({ "@type": "Dataset", "title": title, "s_score": ds.get("score", 0), "researcher": researcher_name, "researcher_slug": slug, }) # Search repos if type in ("", "repo"): for repo in qic.get("repo_scores", []): title = repo.get("title", "") if q_lower in title.lower(): results.append({ "@type": "SoftwareSourceCode", "name": title, "s_score": repo.get("score", 0), "researcher": researcher_name, "researcher_slug": slug, }) # Sort by relevance (title/name match first, then by score) results.sort(key=lambda r: r.get("s_score", r.get("citations", 0)), reverse=True) return { "@type": "SearchResultSet", "query": q, "type_filter": type or "all", "total": len(results), "results": results[:50], - mcp-server/src/mcp_server_researchtwin/server.py:166-166 (registration)MCP tool registration using @mcp.tool decorator with ToolAnnotations. Registers the discover function as an MCP tool with title 'Discover Research' and read_only_hint=True.
@mcp.tool(annotations=ToolAnnotations(title="Discover Research", read_only_hint=True))