Skip to main content
Glama

zotero_item_fulltext

Retrieve full text content of a Zotero item or attachment using its item key through the Model Context Protocol, enabling direct access to library resources.

Instructions

Get the full text content of a Zotero item, given the item key of a parent item or specific attachment.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
item_keyYes

Implementation Reference

  • Registration of the 'zotero_item_fulltext' tool using the @mcp.tool decorator with name and description.
    @mcp.tool( name="zotero_item_fulltext", description="Get the full text content of a Zotero item, given the item key of a parent item or specific attachment.", )
  • Main handler function implementing the tool logic: retrieves Zotero item, finds attachment, extracts full text using pyzotero.fulltext_item, formats with metadata header.
    def get_item_fulltext(item_key: str) -> str: """Get the full text content of a specific Zotero item""" zot = get_zotero_client() try: item: Any = zot.item(item_key) if not item: return f"No item found with key: {item_key}" # Fetch full-text content attachment = get_attachment_details(zot, item) # Prepare header with metadata header = format_item(item) # Add attachment information if attachment is not None: attachment_info = f"\n## Attachment Information\n- **Key**: `{attachment.key}`\n- **Type**: {attachment.content_type}" # Get the full text full_text_data: Any = zot.fulltext_item(attachment.key) if full_text_data and "content" in full_text_data: item_text = full_text_data["content"] # Calculate approximate word count word_count = len(item_text.split()) attachment_info += f"\n- **Word Count**: ~{word_count}" # Format the content with markdown for structure full_text = f"\n\n## Document Content\n\n{item_text}" else: # Clear error message when text extraction isn't possible full_text = "\n\n## Document Content\n\n[⚠️ Attachment is available but text extraction is not possible. The document may be scanned as images or have other restrictions that prevent text extraction.]" else: attachment_info = "\n\n## Attachment Information\n[❌ No suitable attachment found for full text extraction. This item may not have any attached files or they may not be in a supported format.]" full_text = "" # Combine all sections return f"{header}{attachment_info}{full_text}" except Exception as e: return f"Error retrieving item full text: {str(e)}"
  • Helper function to identify and prioritize attachments (PDFs first, then HTML, others) for full text extraction.
    def get_attachment_details( zot: zotero.Zotero, item: dict[str, Any], ) -> AttachmentDetails | None: """Get attachment ID and content type for a Zotero item""" data = item.get("data", {}) item_type = data.get("itemType") # Direct attachment - check if it's a PDF or other supported type if item_type == "attachment": content_type = data.get("contentType") return AttachmentDetails( key=data.get("key"), content_type=content_type, ) # For regular items, look for child attachments try: children: Any = zot.children(data.get("key", "")) # Group attachments by content type and size pdfs = [] htmls = [] others = [] for child in children: child_data = child.get("data", {}) if child_data.get("itemType") == "attachment": content_type = child_data.get("contentType") file_size = child_data.get("md5", "") # Use md5 as proxy for size if content_type == "application/pdf": pdfs.append((child_data.get("key"), content_type, file_size)) elif content_type == "text/html": htmls.append((child_data.get("key"), content_type, file_size)) else: others.append((child_data.get("key"), content_type, file_size)) # Return first match in priority order if pdfs: pdfs.sort(key=lambda x: x[2], reverse=True) return AttachmentDetails( key=pdfs[0][0], content_type=pdfs[0][1], ) if htmls: htmls.sort(key=lambda x: x[2], reverse=True) return AttachmentDetails( key=htmls[0][0], content_type=htmls[0][1], ) if others: others.sort(key=lambda x: x[2], reverse=True) return AttachmentDetails( key=others[0][0], content_type=others[0][1], ) except Exception: pass return None
  • Helper function to initialize and return the pyzotero Zotero client from environment variables.
    def get_zotero_client() -> zotero.Zotero: """Get authenticated Zotero client using environment variables""" library_id = os.getenv("ZOTERO_LIBRARY_ID") library_type = os.getenv("ZOTERO_LIBRARY_TYPE", "user") api_key = os.getenv("ZOTERO_API_KEY") or None local = os.getenv("ZOTERO_LOCAL", "").lower() in ["true", "yes", "1"] if local: if not library_id: # Indicates "current user" for the local API library_id = "0" elif not all([library_id, api_key]): raise ValueError( "Missing required environment variables. Please set ZOTERO_LIBRARY_ID and ZOTERO_API_KEY" ) return zotero.Zotero( library_id=library_id, library_type=library_type, api_key=api_key, local=local, )
  • Helper function to format Zotero item metadata into a readable markdown string, used as header in fulltext output.
    def format_item(item: dict[str, Any]) -> str: """Format a Zotero item's metadata as a readable string optimized for LLM consumption""" data = item["data"] item_key = item["key"] item_type = data.get("itemType", "unknown") # Special handling for notes if item_type == "note": # Get note content note_content = data.get("note", "") # Strip HTML tags for cleaner text (simple approach) note_content = ( note_content.replace("<p>", "").replace("</p>", "\n").replace("<br>", "\n") ) note_content = note_content.replace("<strong>", "**").replace("</strong>", "**") note_content = note_content.replace("<em>", "*").replace("</em>", "*") # Format note with clear sections formatted = [ "## 📝 Note", f"Item Key: `{item_key}`", ] # Add parent item reference if available if parent_item := data.get("parentItem"): formatted.append(f"Parent Item: `{parent_item}`") # Add date if available if date := data.get("dateModified"): formatted.append(f"Last Modified: {date}") # Add tags with formatting for better visibility if tags := data.get("tags"): tag_list = [f"`{tag['tag']}`" for tag in tags] formatted.append(f"\n### Tags\n{', '.join(tag_list)}") # Add note content formatted.append(f"\n### Note Content\n{note_content}") return "\n".join(formatted) # Regular item handling (non-notes) # Basic metadata with key for easy reference formatted = [ f"## {data.get('title', 'Untitled')}", f"Item Key: `{item_key}`", f"Type: {item_type}", f"Date: {data.get('date', 'No date')}", ] # Creators with role differentiation creators_by_role = {} for creator in data.get("creators", []): role = creator.get("creatorType", "contributor") name = "" if "firstName" in creator and "lastName" in creator: name = f"{creator['lastName']}, {creator['firstName']}" elif "name" in creator: name = creator["name"] if name: if role not in creators_by_role: creators_by_role[role] = [] creators_by_role[role].append(name) for role, names in creators_by_role.items(): role_display = role.capitalize() + ("s" if len(names) > 1 else "") formatted.append(f"{role_display}: {'; '.join(names)}") # Publication details if publication := data.get("publicationTitle"): formatted.append(f"Publication: {publication}") if volume := data.get("volume"): volume_info = f"Volume: {volume}" if issue := data.get("issue"): volume_info += f", Issue: {issue}" if pages := data.get("pages"): volume_info += f", Pages: {pages}" formatted.append(volume_info) # Abstract with clear section header if abstract := data.get("abstractNote"): formatted.append(f"\n### Abstract\n{abstract}") # Tags with formatting for better visibility if tags := data.get("tags"): tag_list = [f"`{tag['tag']}`" for tag in tags] formatted.append(f"\n### Tags\n{', '.join(tag_list)}") # URLs, DOIs, and identifiers grouped together identifiers = [] if url := data.get("url"): identifiers.append(f"URL: {url}") if doi := data.get("DOI"): identifiers.append(f"DOI: {doi}") if isbn := data.get("ISBN"): identifiers.append(f"ISBN: {isbn}") if issn := data.get("ISSN"): identifiers.append(f"ISSN: {issn}") if identifiers: formatted.append("\n### Identifiers\n" + "\n".join(identifiers)) # Notes and attachments if notes := item.get("meta", {}).get("numChildren", 0): formatted.append( f"\n### Additional Information\nNumber of notes/attachments: {notes}" ) return "\n".join(formatted)

Other Tools

Related Tools

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/kujenga/zotero-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server