Zotero MCP

MIT License

131

Overview InspectNew Schema Related Servers Reviews Score

zotero-mcp
src
zotero_mcp

"""
Helper functions for PDF annotation extraction using pdfannots2json.
"""

import os
import json
import platform
import subprocess
import tempfile
from pathlib import Path
from typing import Dict, List, Optional, Union, Any

# Constants
PDFANNOTS_VERSION = "1.0.15"
PDFANNOTS_BASE_URL = f"https://github.com/mgmeyers/pdfannots2json/releases/download/{PDFANNOTS_VERSION}/"

# Download URLs based on platform and architecture
PDFANNOTS_URLS = {
    "darwin": {
        "x86_64": f"{PDFANNOTS_BASE_URL}pdfannots2json.Mac.Intel.tar.gz",
        "arm64": f"{PDFANNOTS_BASE_URL}pdfannots2json.Mac.M1.tar.gz"
    },
    "linux": {
        "x86_64": f"{PDFANNOTS_BASE_URL}pdfannots2json.Linux.x64.tar.gz"
    },
    "win32": {
        "x86_64": f"{PDFANNOTS_BASE_URL}pdfannots2json.Windows.x64.zip",
        "AMD64": f"{PDFANNOTS_BASE_URL}pdfannots2json.Windows.x64.zip"
    }
}

def get_pdfannots_dir() -> str:
    """Get the directory where pdfannots2json is installed"""
    return os.path.expanduser("~/.pdfannots2json")

def get_pdfannots_executable() -> str:
    """Get the path to the pdfannots2json executable"""
    base_dir = get_pdfannots_dir()
    system = platform.system().lower()
    machine = platform.machine()
    
    if system == "windows":
        return os.path.join(base_dir, "pdfannots2json.exe")
    else:
        return os.path.join(base_dir, f"pdfannots2json-{system}-{machine}")

def is_pdfannots_installed() -> bool:
    """Check if pdfannots2json is installed"""
    return os.path.exists(get_pdfannots_executable())

def ensure_pdfannots_installed() -> bool:
    """Ensure pdfannots2json is installed, downloading if necessary"""
    if is_pdfannots_installed():
        return True
    
    # If not installed, use the downloader script to install it
    try:
        from zotero_mcp import pdfannots_downloader
        success = pdfannots_downloader.download_and_install()
        return success
    except Exception as e:
        print(f"Error installing pdfannots2json: {e}")
        return False

def extract_annotations_from_pdf(
    pdf_path: Union[str, Path], 
    output_dir: Optional[str] = None, 
    image_format: str = "jpg", 
    image_dpi: int = 120, 
    image_quality: int = 90
) -> List[Dict[str, Any]]:
    """
    Extract annotations directly from a PDF file using pdfannots2json
    
    Args:
        pdf_path: Path to the PDF file
        output_dir: Directory to save extracted images (if None, uses a temp dir)
        image_format: Format for extracted images (jpg, png)
        image_dpi: DPI for extracted images
        image_quality: Quality for extracted images (1-100)
        
    Returns:
        List of annotation objects
    """
    if not ensure_pdfannots_installed():
        print("Error: pdfannots2json is not installed")
        return []
    
    # Create temporary output directory if none provided
    if output_dir is None:
        output_dir = tempfile.mkdtemp()
    else:
        os.makedirs(output_dir, exist_ok=True)
    
    # Get the path to the executable
    exe_path = get_pdfannots_executable()
    
    # Construct command similar to what the plugin uses
    cmd = [
        exe_path,
        str(pdf_path),
        "-o", output_dir,
        "-n", "annotation",
        "-f", image_format,
        "-d", str(image_dpi),
        "-q", str(image_quality)
    ]
    
    try:
        # Run the command and capture JSON output
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        annotations = json.loads(result.stdout)
        print(f"Extracted {len(annotations)} annotations from PDF")
        return annotations
    except subprocess.CalledProcessError as e:
        print(f"Error extracting annotations: {e}")
        print(f"stderr: {e.stderr}")
        return []
    except json.JSONDecodeError:
        print("Error parsing JSON output from pdfannots2json")
        return []

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/54yyyu/zotero-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server