Skip to main content
Glama
kujenga
by kujenga

zotero_item_fulltext

Retrieve full text content from Zotero library items using item keys to access stored documents and attachments.

Instructions

Get the full text content of a Zotero item, given the item key of a parent item or specific attachment.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
item_keyYes

Implementation Reference

  • The main handler function for the 'zotero_item_fulltext' tool. It retrieves the Zotero item by key, finds an attachment, extracts full-text content using the Zotero API, formats metadata with format_item, and returns a combined markdown output.
    @mcp.tool(
        name="zotero_item_fulltext",
        description="Get the full text content of a Zotero item, given the item key of a parent item or specific attachment.",
    )
    def get_item_fulltext(item_key: str) -> str:
        """Get the full text content of a specific Zotero item"""
        zot = get_zotero_client()
    
        try:
            item: Any = zot.item(item_key)
            if not item:
                return f"No item found with key: {item_key}"
    
            # Fetch full-text content
            attachment = get_attachment_details(zot, item)
    
            # Prepare header with metadata
            header = format_item(item)
    
            # Add attachment information
            if attachment is not None:
                attachment_info = f"\n## Attachment Information\n- **Key**: `{attachment.key}`\n- **Type**: {attachment.content_type}"
    
                # Get the full text
                full_text_data: Any = zot.fulltext_item(attachment.key)
                if full_text_data and "content" in full_text_data:
                    item_text = full_text_data["content"]
                    # Calculate approximate word count
                    word_count = len(item_text.split())
                    attachment_info += f"\n- **Word Count**: ~{word_count}"
    
                    # Format the content with markdown for structure
                    full_text = f"\n\n## Document Content\n\n{item_text}"
                else:
                    # Clear error message when text extraction isn't possible
                    full_text = "\n\n## Document Content\n\n[⚠️ Attachment is available but text extraction is not possible. The document may be scanned as images or have other restrictions that prevent text extraction.]"
            else:
                attachment_info = "\n\n## Attachment Information\n[❌ No suitable attachment found for full text extraction. This item may not have any attached files or they may not be in a supported format.]"
                full_text = ""
    
            # Combine all sections
            return f"{header}{attachment_info}{full_text}"
    
        except Exception as e:
            return f"Error retrieving item full text: {str(e)}"
  • Supporting utility to find the most suitable attachment (prioritizing largest PDF, then HTML, then others) for full-text extraction, used directly in the handler.
    def get_attachment_details(
        zot: zotero.Zotero,
        item: dict[str, Any],
    ) -> AttachmentDetails | None:
        """Get attachment ID and content type for a Zotero item"""
        data = item.get("data", {})
        item_type = data.get("itemType")
    
        # Direct attachment - check if it's a PDF or other supported type
        if item_type == "attachment":
            content_type = data.get("contentType")
            return AttachmentDetails(
                key=data.get("key"),
                content_type=content_type,
            )
    
        # For regular items, look for child attachments
        try:
            children: Any = zot.children(data.get("key", ""))
            # Group attachments by content type and size
            pdfs = []
            htmls = []
            others = []
    
            for child in children:
                child_data = child.get("data", {})
                if child_data.get("itemType") == "attachment":
                    content_type = child_data.get("contentType")
                    file_size = child_data.get("md5", "")  # Use md5 as proxy for size
    
                    if content_type == "application/pdf":
                        pdfs.append((child_data.get("key"), content_type, file_size))
                    elif content_type == "text/html":
                        htmls.append((child_data.get("key"), content_type, file_size))
                    else:
                        others.append((child_data.get("key"), content_type, file_size))
    
            # Return first match in priority order
            if pdfs:
                pdfs.sort(key=lambda x: x[2], reverse=True)
                return AttachmentDetails(
                    key=pdfs[0][0],
                    content_type=pdfs[0][1],
                )
            if htmls:
                htmls.sort(key=lambda x: x[2], reverse=True)
                return AttachmentDetails(
                    key=htmls[0][0],
                    content_type=htmls[0][1],
                )
            if others:
                others.sort(key=lambda x: x[2], reverse=True)
                return AttachmentDetails(
                    key=others[0][0],
                    content_type=others[0][1],
                )
        except Exception:
            pass
    
        return None
  • Utility function to format Zotero item metadata into structured markdown, used to generate the header section in the fulltext tool response.
    def format_item(item: dict[str, Any]) -> str:
        """Format a Zotero item's metadata as a readable string optimized for LLM consumption"""
        data = item["data"]
        item_key = item["key"]
        item_type = data.get("itemType", "unknown")
    
        # Special handling for notes
        if item_type == "note":
            # Get note content
            note_content = data.get("note", "")
            # Strip HTML tags for cleaner text (simple approach)
            note_content = (
                note_content.replace("<p>", "").replace("</p>", "\n").replace("<br>", "\n")
            )
            note_content = note_content.replace("<strong>", "**").replace("</strong>", "**")
            note_content = note_content.replace("<em>", "*").replace("</em>", "*")
    
            # Format note with clear sections
            formatted = [
                "## 📝 Note",
                f"Item Key: `{item_key}`",
            ]
    
            # Add parent item reference if available
            if parent_item := data.get("parentItem"):
                formatted.append(f"Parent Item: `{parent_item}`")
    
            # Add date if available
            if date := data.get("dateModified"):
                formatted.append(f"Last Modified: {date}")
    
            # Add tags with formatting for better visibility
            if tags := data.get("tags"):
                tag_list = [f"`{tag['tag']}`" for tag in tags]
                formatted.append(f"\n### Tags\n{', '.join(tag_list)}")
    
            # Add note content
            formatted.append(f"\n### Note Content\n{note_content}")
    
            return "\n".join(formatted)
    
        # Regular item handling (non-notes)
    
        # Basic metadata with key for easy reference
        formatted = [
            f"## {data.get('title', 'Untitled')}",
            f"Item Key: `{item_key}`",
            f"Type: {item_type}",
            f"Date: {data.get('date', 'No date')}",
        ]
    
        # Creators with role differentiation
        creators_by_role = {}
        for creator in data.get("creators", []):
            role = creator.get("creatorType", "contributor")
            name = ""
            if "firstName" in creator and "lastName" in creator:
                name = f"{creator['lastName']}, {creator['firstName']}"
            elif "name" in creator:
                name = creator["name"]
    
            if name:
                if role not in creators_by_role:
                    creators_by_role[role] = []
                creators_by_role[role].append(name)
    
        for role, names in creators_by_role.items():
            role_display = role.capitalize() + ("s" if len(names) > 1 else "")
            formatted.append(f"{role_display}: {'; '.join(names)}")
    
        # Publication details
        if publication := data.get("publicationTitle"):
            formatted.append(f"Publication: {publication}")
        if volume := data.get("volume"):
            volume_info = f"Volume: {volume}"
            if issue := data.get("issue"):
                volume_info += f", Issue: {issue}"
            if pages := data.get("pages"):
                volume_info += f", Pages: {pages}"
            formatted.append(volume_info)
    
        # Abstract with clear section header
        if abstract := data.get("abstractNote"):
            formatted.append(f"\n### Abstract\n{abstract}")
    
        # Tags with formatting for better visibility
        if tags := data.get("tags"):
            tag_list = [f"`{tag['tag']}`" for tag in tags]
            formatted.append(f"\n### Tags\n{', '.join(tag_list)}")
    
        # URLs, DOIs, and identifiers grouped together
        identifiers = []
        if url := data.get("url"):
            identifiers.append(f"URL: {url}")
        if doi := data.get("DOI"):
            identifiers.append(f"DOI: {doi}")
        if isbn := data.get("ISBN"):
            identifiers.append(f"ISBN: {isbn}")
        if issn := data.get("ISSN"):
            identifiers.append(f"ISSN: {issn}")
    
        if identifiers:
            formatted.append("\n### Identifiers\n" + "\n".join(identifiers))
    
        # Notes and attachments
        if notes := item.get("meta", {}).get("numChildren", 0):
            formatted.append(
                f"\n### Additional Information\nNumber of notes/attachments: {notes}"
            )
    
        return "\n".join(formatted)
  • Utility to create and configure the pyzotero Zotero client instance from environment variables, used throughout the tools including fulltext handler.
    def get_zotero_client() -> zotero.Zotero:
        """Get authenticated Zotero client using environment variables"""
        library_id = os.getenv("ZOTERO_LIBRARY_ID")
        library_type = os.getenv("ZOTERO_LIBRARY_TYPE", "user")
        api_key = os.getenv("ZOTERO_API_KEY") or None
        local = os.getenv("ZOTERO_LOCAL", "").lower() in ["true", "yes", "1"]
        if local:
            if not library_id:
                # Indicates "current user" for the local API
                library_id = "0"
        elif not all([library_id, api_key]):
            raise ValueError(
                "Missing required environment variables. Please set ZOTERO_LIBRARY_ID and ZOTERO_API_KEY"
            )
    
        return zotero.Zotero(
            library_id=library_id,
            library_type=library_type,
            api_key=api_key,
            local=local,
        )
  • Pydantic schema/model for attachment details returned by get_attachment_details helper, used in the fulltext handler.
    class AttachmentDetails(BaseModel):
        key: str
        content_type: str

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/kujenga/zotero-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server