Zotero MCP

MIT License

131

Overview InspectNew Schema Related Servers Reviews Score

zotero-mcp
src
zotero_mcp

"""
Helper for accessing Zotero via Better BibTeX JSON-RPC API.
Provides direct access to Zotero's annotations without requiring PDF extraction.
"""

import json
import requests
import os
import sys
from typing import Dict, Any, List, Optional

class ZoteroBetterBibTexAPI:
    """Class to interact with Zotero's local Better BibTeX JSON-RPC API"""
    
    def __init__(self, port="23119", database="Zotero"):
        """
        Initialize the API connection.
        
        Args:
            port: The port number Zotero is running on (default: 23119 for Zotero, 24119 for Juris-M)
            database: The database type ('Zotero' or 'Juris-M')
        """
        self.port = port
        if database == "Juris-M":
            self.port = "24119"
        
        self.base_url = f"http://127.0.0.1:{self.port}/better-bibtex/json-rpc"
        self.headers = {
            'Content-Type': 'application/json',
            'User-Agent': 'python/zotero-mcp',
            'Accept': 'application/json',
            'Connection': 'keep-alive',
        }
    
    def _make_request(self, method: str, params: List[Any]) -> Dict[str, Any]:
        """
        Make a JSON-RPC request to the Zotero API.
        
        Args:
            method: The JSON-RPC method to call
            params: The parameters for the method
            
        Returns:
            The response data
        """
        payload = {
            "jsonrpc": "2.0",
            "method": method,
            "params": params,
            "id": 1  # Adding an ID to the request
        }
        
        try:
            response = requests.post(
                self.base_url,
                headers=self.headers,
                data=json.dumps(payload),
                timeout=30
            )
            response.raise_for_status()
            data = response.json()
            
            if "error" in data:
                error_msg = str(data['error'].get('message', 'Unknown error'))
                error_data = data['error'].get('data', '')
                if error_data:
                    error_msg += f": {error_data}"
                raise Exception(f"API error: {error_msg}")
                
            return data.get("result", {})
            
        except requests.exceptions.RequestException as e:
            raise Exception(f"Connection error: {str(e)}. Is Zotero running with Better BibTeX installed?")
    
    def is_zotero_running(self) -> bool:
        """Check if Zotero is running and accessible."""
        try:
            response = requests.get(
                f"http://127.0.0.1:{self.port}/better-bibtex/cayw?probe=true",
                headers=self.headers,
                timeout=5
            )
            return response.text == "ready"
        except:
            return False
    
    def get_item_by_citekey(self, citekey: str) -> Dict[str, Any]:
        """
        Get item data by citation key.
        
        Args:
            citekey: The citation key of the item
            
        Returns:
            The item data
        """
        # First, search for the item to get its ID and library ID
        search_results = self._make_request("item.search", [citekey])
        
        if not search_results:
            raise Exception(f"No items found with citekey: {citekey}")
        
        item = next((item for item in search_results if item.get('citekey') == citekey), None)
        
        if not item:
            raise Exception(f"No exact match found for citekey: {citekey}")
        
        library_id = item.get('libraryID')
        
        # Now export the full item data
        try:
            export_result = self._make_request(
                "item.export", 
                [[citekey], "36a3b0b5-bad0-4a04-b79b-441c7cef77db", library_id]
            )
            
            if not export_result:
                raise Exception(f"Failed to export item data for citekey: {citekey}")
            
            # The result might be an array or a string depending on the Better BibTeX version
            if isinstance(export_result, list):
                if len(export_result) > 2 and export_result[2]:
                    try:
                        return json.loads(export_result[2]).get('items', [])[0]
                    except:
                        # Try to use the first element if it's a string
                        if isinstance(export_result[0], str):
                            return json.loads(export_result[0]).get('items', [])[0]
            elif isinstance(export_result, str):
                return json.loads(export_result).get('items', [])[0]
            elif isinstance(export_result, dict) and 'items' in export_result:
                return export_result.get('items', [])[0]
                
            # Fall back to using the search result
            return item
            
        except Exception as e:
            print(f"Warning: Could not export full item data: {e}")
            # Return basic item data from search
            return item
    
    def get_attachments(self, citekey: str, library_id: int) -> List[Dict[str, Any]]:
        """
        Get all attachments for an item.
        
        Args:
            citekey: The citation key of the item
            library_id: The library ID
            
        Returns:
            A list of attachment data
        """
        try:
            return self._make_request("item.attachments", [citekey, library_id])
        except Exception as e:
            print(f"Warning: Could not get attachments: {e}")
            return []
    
    def get_annotations_from_attachment(self, attachment: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Extract annotations from an attachment.
        
        Args:
            attachment: The attachment data
            
        Returns:
            A list of annotations
        """
        # Return empty list if attachment has no annotations
        if not attachment.get('annotations'):
            return []
            
        return attachment.get('annotations', [])
    
    def search_citekeys(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
        """
        Search for items in Zotero by a search query and return their citation keys.
        
        Args:
            query: Search term to find items
            limit: Maximum number of results to return (default: 10)
        
        Returns:
            A list of dictionaries containing cite keys and basic item information
        """
        try:
            # Use the general item.search method with the query
            search_results = self._make_request("item.search", [query])
            
            # If no results found, return empty list
            if not search_results:
                return []
            
            # Process and filter results
            cite_key_results = []
            for item in search_results[:limit]:
                # Ensure we have a cite key
                if item.get('citekey'):
                    cite_key_results.append({
                        'citekey': item['citekey'],
                        'title': item.get('title', 'No Title'),
                        'creators': item.get('creators', []),
                        'year': item.get('year', 'N/A'),
                        'libraryID': item.get('libraryID')
                    })
            
            return cite_key_results
        
        except Exception as e:
            print(f"Error searching for cite keys: {e}")
            return []

    def export_bibtex(self, item_key: str, library_id: int = 1) -> str:
        """
        Export BibTeX for a specific item using its item key.
        
        Args:
            item_key: Zotero item key to export
            library_id: Library ID (default: 1 = Personal Library)
            
        Returns:
            BibTeX formatted string
        """
        try:
            # Better BibTeX translator ID for BibTeX export
            translator_id = "ca65189f-8815-4afe-8c8b-8c7c15f0edca"  # Better BibTeX
            
            # Step 1: Get citation key from item key
            item_keys = [f"{library_id}:{item_key}"]
            citation_mapping = self._make_request("item.citationkey", [item_keys])
            
            if not citation_mapping:
                raise Exception(f"No citation key found for item: {item_key}")
            
            # Step 2: Extract citation key from mapping
            full_item_key = f"{library_id}:{item_key}"
            citation_key = citation_mapping.get(full_item_key)
            
            if not citation_key:
                raise Exception(f"Citation key not found for item: {item_key}")
            
            # Step 3: Export BibTeX using citation key
            export_result = self._make_request(
                "item.export", 
                [[citation_key], translator_id]
            )
            
            # Handle different response formats
            if isinstance(export_result, str):
                return export_result
            elif isinstance(export_result, list) and len(export_result) > 0:
                # Sometimes the result is wrapped in an array
                return export_result[0] if isinstance(export_result[0], str) else str(export_result[0])
            elif isinstance(export_result, dict) and 'bibtex' in export_result:
                return export_result['bibtex']
            else:
                return str(export_result)
                
        except Exception as e:
            print(f"Error exporting BibTeX: {e}")
            return ""


def process_annotation(annotation: Dict[str, Any], attachment: Dict[str, Any], format_type: str = 'markdown') -> Dict[str, Any]:
    """
    Process a raw Zotero annotation into a more usable format.
    
    Args:
        annotation: The raw annotation data from Zotero
        attachment: The attachment this annotation belongs to
        format_type: Output format (raw or markdown)
        
    Returns:
        A processed annotation object
    """
    try:
        annotation_type = annotation.get('annotationType', 'unknown')
        color = annotation.get('annotationColor', '')
        
        # Extract text content
        text = annotation.get('annotationText', '')
        comment = annotation.get('annotationComment', '')
        
        # Handle page information
        page_label = annotation.get('annotationPageLabel', '1')
        page = 1
        
        # Get position data
        position = annotation.get('annotationPosition', {})
        
        if isinstance(position, str):
            try:
                position = json.loads(position)
            except:
                position = {}
            
        if position:
            # Get page index if available
            if 'pageIndex' in position:
                page = position['pageIndex'] + 1
                
            # Get coordinates if available
            if 'rects' in position and position['rects'] and len(position['rects'][0]) >= 2:
                x, y = position['rects'][0][0], position['rects'][0][1]
            else:
                x, y = 0, 0
        else:
            x, y = 0, 0
        
        # Create result object
        result = {
            'id': annotation.get('key', ''),
            'type': annotation_type,
            'color': color,
            'annotatedText': text,
            'comment': comment,
            'page': page,
            'pageLabel': page_label,
            'x': x,
            'y': y,
            'date': annotation.get('dateModified', ''),
            'attachment': {
                'key': attachment.get('itemKey', ''),
                'filename': os.path.basename(attachment.get('path', '')),
                'title': attachment.get('title', 'PDF'),
                'path': attachment.get('path', ''),
            }
        }
        
        # If markdown format is requested, format the output
        if format_type == 'markdown':
            result['markdown'] = format_annotation_markdown(result)
            
        return result
        
    except Exception as e:
        print(f"Error processing annotation: {e}")
        return {}

def format_annotation_markdown(annotation: Dict[str, Any]) -> str:
    """
    Format an annotation as markdown.
    
    Args:
        annotation: The processed annotation object
        
    Returns:
        A markdown string representing the annotation
    """
    md = []
    
    # Format the citation with text and page number
    if annotation['annotatedText']:
        color_str = f" {annotation['color']}" if annotation['color'] else ""
        md.append(f"> \"{annotation['annotatedText']}\"{color_str} {annotation['type'].capitalize()} [Page {annotation['pageLabel']}]")
    
    # Add the comment if available
    if annotation['comment']:
        md.append(f"\n{annotation['comment']}")
    
    return "\n".join(md)

def get_color_category(hex_color: str) -> str:
    """
    Get a color category name from a hex color code.
    
    Args:
        hex_color: The hex color code
        
    Returns:
        A color category name
    """
    # Simple implementation based on common annotation colors
    color_map = {
        "#ffd400": "Yellow",
        "#ff6666": "Red",
        "#5fb236": "Green",
        "#2ea8e5": "Blue",
        "#a28ae5": "Purple",
        "#e56eee": "Magenta",
        "#f19837": "Orange",
        "#aaaaaa": "Gray"
    }
    
    return color_map.get(hex_color.lower(), "")

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/54yyyu/zotero-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server