Skip to main content
Glama
gqy20

Europe PMC Literature Search MCP Server

crossref_service.py8.34 kB
""" CrossRef API服务 - 使用统一API调用 """ import logging from typing import Any from .api_utils import get_api_client class CrossRefService: def __init__(self, logger: logging.Logger | None = None): self.logger = logger or logging.getLogger(__name__) self.base_url = "https://api.crossref.org" self.api_client = get_api_client(logger) def search_works(self, query: str, max_results: int = 10) -> dict[str, Any]: """搜索CrossRef学术文献""" try: url = f"{self.base_url}/works" params = { "query": query, "rows": max_results, "select": "title,author,DOI,created,member,short-container-title", } api_result = self.api_client.get(url, params=params) if not api_result.get("success", False): raise Exception(api_result.get("error", "API调用失败")) data = api_result.get("data", {}) return { "success": True, "articles": self._format_articles(data.get("message", {}).get("items", [])), "total_count": data.get("message", {}).get("total-results", 0), "source": "crossref", } except Exception as e: self.logger.error(f"CrossRef搜索失败: {e}") return { "success": False, "articles": [], "total_count": 0, "source": "crossref", "error": str(e), } def get_work_by_doi(self, doi: str) -> dict[str, Any]: """通过DOI获取文献详情""" try: import urllib.parse # 对DOI进行URL编码处理,保留斜杠 encoded_doi = urllib.parse.quote(doi, safe='/') url = f"{self.base_url}/works/{encoded_doi}" api_result = self.api_client.get(url) if not api_result.get("success", False): raise Exception(api_result.get("error", "API调用失败")) data = api_result.get("data", {}) article = data.get("message", {}) return { "success": True, "article": self._format_single_article(article), "source": "crossref", } except Exception as e: self.logger.error(f"CrossRef获取详情失败: {e}") return {"success": False, "article": None, "source": "crossref", "error": str(e)} def get_references(self, doi: str, max_results: int = 20) -> dict[str, Any]: """获取参考文献列表""" try: import urllib.parse # CrossRef API: 参考文献数据包含在主查询结果中 # 对DOI进行URL编码处理,保留斜杠 encoded_doi = urllib.parse.quote(doi, safe='/') url = f"{self.base_url}/works/{encoded_doi}" # 简化API调用,不使用select参数避免400错误 api_result = self.api_client.get(url) if not api_result.get("success", False): raise Exception(api_result.get("error", "API调用失败")) data = api_result.get("data", {}) work_data = data.get("message", {}) references = work_data.get("reference", []) return { "success": True, "references": self._format_references(references[:max_results]), "total_count": len(references), "source": "crossref", } except Exception as e: self.logger.error(f"CrossRef获取参考文献失败: {e}") return { "success": False, "references": [], "total_count": 0, "source": "crossref", "error": str(e), } def _format_articles(self, items: list[dict]) -> list[dict]: """格式化文章列表""" articles = [] for item in items: articles.append(self._format_single_article(item)) return articles def _format_single_article(self, item: dict) -> dict: """格式化单篇文章""" return { "title": self._extract_title(item.get("title") or []), "authors": self._extract_authors(item.get("author") or []), "doi": item.get("DOI"), "journal": ( (item.get("short-container-title") or [""])[0] if item.get("short-container-title") else "" ), "publication_date": (item.get("created") or {}).get("date-time", ""), "source": "crossref", "raw_data": item, # 保留原始数据用于调试 } def _format_references(self, references: list[dict]) -> list[dict]: """格式化参考文献""" formatted_refs = [] for ref in references: if not ref: # 跳过空引用 continue # 处理CrossRef参考文献格式 formatted_ref = { "doi": ref.get("DOI"), "title": ref.get("unstructured", ""), # 非结构化文本通常是标题 "authors": self._extract_ref_authors(ref), "year": self._extract_ref_year(ref), "journal": ref.get("journal-title", ""), "volume": ref.get("volume"), "issue": ref.get("issue"), "page": ref.get("first-page"), "source": "crossref", } # 如果没有结构化文本但有其他信息,尝试构建标题 if not formatted_ref["title"] and ref.get("article-title"): formatted_ref["title"] = ref.get("article-title", "") formatted_refs.append(formatted_ref) return formatted_refs def _extract_title(self, title_list: list) -> str: """提取标题""" return title_list[0] if title_list else "" def _extract_authors(self, author_list: list) -> list[str]: """提取作者""" authors = [] for author in author_list: if not author: # 跳过None值 continue if "given" in author and "family" in author: authors.append(f"{author.get('given', '')} {author.get('family', '')}") elif "name" in author: authors.append(author["name"]) return authors def _extract_ref_authors(self, ref: dict) -> list[str]: """提取参考文献的作者""" authors = [] if "author" in ref: author_list = ref["author"] if isinstance(author_list, list): for author in author_list: if isinstance(author, dict): if "given" in author and "family" in author: authors.append(f"{author.get('given', '')} {author.get('family', '')}") elif "family" in author: authors.append(author.get('family', '')) elif "name" in author: authors.append(author["name"]) return authors def _extract_ref_year(self, ref: dict) -> str: """提取参考文献的年份""" # 尝试从不同字段提取年份 year = "" # 方法1: 直接的year字段 if "year" in ref: year = str(ref["year"]) # 方法2: 从created date-parts提取 elif "created" in ref and "date-parts" in ref["created"]: date_parts = ref["created"]["date-parts"] if date_parts and len(date_parts) > 0: year = str(date_parts[0][0]) # 方法3: 从published date-parts提取 elif "published" in ref and "date-parts" in ref["published"]: date_parts = ref["published"]["date-parts"] if date_parts and len(date_parts) > 0: year = str(date_parts[0][0]) # 方法4: 从published-print date-parts提取 elif "published-print" in ref and "date-parts" in ref["published-print"]: date_parts = ref["published-print"]["date-parts"] if date_parts and len(date_parts) > 0: year = str(date_parts[0][0]) return year

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/gqy20/article-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server