Skip to main content
Glama
Sharan0402

Expense Tracker MCP Server

by Sharan0402

import_receipt_from_pdf

Extract and parse receipt data from PDF files to automatically categorize expenses and store them in a database for tracking spending patterns.

Instructions

Import and parse a receipt from a PDF file.

This tool:

  1. Extracts text from the PDF

  2. Parses receipt metadata (store, date, totals)

  3. Extracts line items with prices

  4. Categorizes each item using hybrid approach (static rules + LLM)

  5. Stores everything in SQLite database

Args: pdf_path: Absolute path to the PDF receipt file ctx: FastMCP context for logging and LLM access

Returns: Summary of imported receipt including store, date, item count, and category breakdown

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
pdf_pathYesAbsolute path to PDF receipt file

Implementation Reference

  • main.py:27-120 (handler)
    Primary tool handler function. Decorated with @mcp.tool for registration. Implements PDF validation, parsing via pdf_parser, item categorization, database storage, and returns structured summary.
    @mcp.tool
    async def import_receipt_from_pdf(
        pdf_path: Annotated[str, "Absolute path to PDF receipt file"],
        ctx: Context,
    ) -> dict:
        """Import and parse a receipt from a PDF file.
    
        This tool:
        1. Extracts text from the PDF
        2. Parses receipt metadata (store, date, totals)
        3. Extracts line items with prices
        4. Categorizes each item using hybrid approach (static rules + LLM)
        5. Stores everything in SQLite database
    
        Args:
            pdf_path: Absolute path to the PDF receipt file
            ctx: FastMCP context for logging and LLM access
    
        Returns:
            Summary of imported receipt including store, date, item count, and category breakdown
        """
        try:
            await ctx.info(f"Starting import of receipt: {pdf_path}")
    
            # Validate path
            path = Path(pdf_path).expanduser().resolve()
            if not path.exists():
                raise ToolError(f"PDF file not found: {pdf_path}")
    
            if not path.suffix.lower() == ".pdf":
                raise ToolError(f"File must be a PDF: {pdf_path}")
    
            # Parse PDF
            await ctx.info("Extracting text from PDF...")
            receipt, raw_items = parse_pdf_receipt(path)
    
            await ctx.info(
                f"Parsed receipt: {receipt.store_name} on {receipt.purchase_date}"
            )
            await ctx.info(f"Found {len(raw_items)} line items")
    
            # Categorize items
            await ctx.info("Categorizing items...")
            categorized_items = []
            item_type_counts = {}
    
            for idx, item_dict in enumerate(raw_items):
                # Categorize using hybrid approach
                item_type = await categorize_item(item_dict["item_name"], ctx)
    
                # Create LineItem object
                line_item = LineItem(
                    item_name_raw=item_dict["item_name"],
                    item_type=item_type,
                    quantity=item_dict["quantity"],
                    line_total=item_dict["price"],
                )
    
                categorized_items.append(line_item)
    
                # Track category counts
                item_type_counts[item_type] = item_type_counts.get(item_type, 0) + 1
    
                await ctx.debug(
                    f"  [{idx+1}/{len(raw_items)}] {item_dict['item_name']} -> {item_type}"
                )
    
            # Insert into database
            await ctx.info("Saving to database...")
            receipt_id = insert_receipt(receipt)
            insert_items(receipt_id, categorized_items)
    
            await ctx.info(f"Successfully imported receipt #{receipt_id}")
    
            # Return summary
            return {
                "status": "success",
                "receipt_id": receipt_id,
                "store_name": receipt.store_name,
                "purchase_date": receipt.purchase_date,
                "total": receipt.total,
                "items_count": len(categorized_items),
                "item_types": item_type_counts,
                "message": f"Successfully imported {len(categorized_items)} items from {receipt.store_name}",
            }
    
        except FileNotFoundError as e:
            raise ToolError(f"File not found: {str(e)}")
        except ValueError as e:
            raise ToolError(f"Failed to parse receipt: {str(e)}")
        except Exception as e:
            await ctx.error(f"Unexpected error during import: {e}")
            raise ToolError(f"Failed to import receipt: {str(e)}")
  • Dataclass models for Receipt and LineItem providing structured data validation and typing for parsed receipt data used throughout the tool.
    @dataclass
    class Receipt:
        """Represents a parsed receipt."""
    
        store_name: str
        purchase_date: str  # ISO format: YYYY-MM-DD
        total: float
        subtotal: Optional[float] = None
        tax: Optional[float] = None
    
        def __post_init__(self):
            """Validate receipt data."""
            if self.total <= 0:
                raise ValueError("Total must be positive")
    
    
    @dataclass
    class LineItem:
        """Represents a single item from a receipt."""
    
        item_name_raw: str
        item_type: str
        line_total: float
        quantity: float = 1.0
        unit_price: Optional[float] = None
    
        def __post_init__(self):
            """Calculate unit price if not provided."""
            if self.unit_price is None and self.quantity > 0:
                self.unit_price = self.line_total / self.quantity
    
            if self.line_total <= 0:
                raise ValueError("Line total must be positive")
            if self.quantity <= 0:
                raise ValueError("Quantity must be positive")
    
    
    @dataclass
    class ItemStats:
        """Statistics for a specific item type."""
    
        item_type: str
        total_purchases: int
        last_purchase_date: str
        first_purchase_date: str
        total_spent: float
        average_days_between: Optional[float] = None
  • Key helper function that extracts text from PDF using pdfplumber, parses store/date/totals/line items using regex patterns, returns Receipt and raw items list.
    def parse_pdf_receipt(pdf_path: Path) -> tuple[Receipt, list[dict]]:
        """Parse a PDF receipt file.
    
        Args:
            pdf_path: Path to PDF file
    
        Returns:
            Tuple of (Receipt object, list of item dicts)
    
        Raises:
            FileNotFoundError: If PDF doesn't exist
            ValueError: If parsing fails
        """
        # Convert string to Path if needed
        if isinstance(pdf_path, str):
            pdf_path = Path(pdf_path)
    
        # Extract text
        text = extract_text_from_pdf(pdf_path)
    
        # Parse receipt
        return parse_receipt(text)
  • Hybrid item categorization helper: static regex/pattern matching first, LLM fallback via ctx.sample for unknown items.
    async def categorize_item(item_name: str, ctx=None) -> str:
        """Main categorization function with hybrid approach.
    
        Args:
            item_name: Raw item name from receipt
            ctx: Optional FastMCP Context for LLM fallback
    
        Returns:
            item_type category (guaranteed to return a value)
        """
        # Try deterministic rules first
        category = deterministic_categorize(item_name)
    
        if category:
            return category
    
        # Fall back to LLM if context is available
        if ctx:
            return await llm_categorize(item_name, ctx)
    
        # Ultimate fallback
        return "other"
  • Database persistence helpers: insert_receipt creates receipt record, insert_items adds categorized line items with foreign key.
    def insert_receipt(receipt: Receipt, db_path: Path = DEFAULT_DB_PATH) -> int:
        """Insert a receipt and return its ID."""
        conn = get_connection(db_path)
    
        try:
            cursor = conn.execute(
                """
                INSERT INTO receipts (store_name, purchase_date, subtotal, tax, total)
                VALUES (?, ?, ?, ?, ?)
            """,
                (
                    receipt.store_name,
                    receipt.purchase_date,
                    receipt.subtotal,
                    receipt.tax,
                    receipt.total,
                ),
            )
            conn.commit()
            return cursor.lastrowid
        finally:
            conn.close()
    
    
    def insert_items(
        receipt_id: int, items: list[LineItem], db_path: Path = DEFAULT_DB_PATH
    ) -> None:
        """Bulk insert items for a receipt."""
        if not items:
            return
    
        conn = get_connection(db_path)
    
        try:
            conn.executemany(
                """
                INSERT INTO items (receipt_id, item_name_raw, item_type, quantity, unit_price, line_total)
                VALUES (?, ?, ?, ?, ?, ?)
            """,
                [
                    (
                        receipt_id,
                        item.item_name_raw,
                        item.item_type,
                        item.quantity,
                        item.unit_price,
                        item.line_total,
                    )
                    for item in items
                ],
            )
            conn.commit()
        finally:
            conn.close()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Sharan0402/expense-tracker-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server