Skip to main content
Glama
Sharan0402

Expense Tracker MCP Server

by Sharan0402

import_receipt_from_pdf

Extract and parse receipt data from PDF files to automatically categorize expenses and store them in a database for tracking spending patterns.

Instructions

Import and parse a receipt from a PDF file.

This tool:

  1. Extracts text from the PDF

  2. Parses receipt metadata (store, date, totals)

  3. Extracts line items with prices

  4. Categorizes each item using hybrid approach (static rules + LLM)

  5. Stores everything in SQLite database

Args: pdf_path: Absolute path to the PDF receipt file ctx: FastMCP context for logging and LLM access

Returns: Summary of imported receipt including store, date, item count, and category breakdown

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
pdf_pathYesAbsolute path to PDF receipt file

Output Schema

TableJSON Schema
NameRequiredDescriptionDefault

No arguments

Implementation Reference

  • main.py:27-120 (handler)
    Primary tool handler function. Decorated with @mcp.tool for registration. Implements PDF validation, parsing via pdf_parser, item categorization, database storage, and returns structured summary.
    @mcp.tool
    async def import_receipt_from_pdf(
        pdf_path: Annotated[str, "Absolute path to PDF receipt file"],
        ctx: Context,
    ) -> dict:
        """Import and parse a receipt from a PDF file.
    
        This tool:
        1. Extracts text from the PDF
        2. Parses receipt metadata (store, date, totals)
        3. Extracts line items with prices
        4. Categorizes each item using hybrid approach (static rules + LLM)
        5. Stores everything in SQLite database
    
        Args:
            pdf_path: Absolute path to the PDF receipt file
            ctx: FastMCP context for logging and LLM access
    
        Returns:
            Summary of imported receipt including store, date, item count, and category breakdown
        """
        try:
            await ctx.info(f"Starting import of receipt: {pdf_path}")
    
            # Validate path
            path = Path(pdf_path).expanduser().resolve()
            if not path.exists():
                raise ToolError(f"PDF file not found: {pdf_path}")
    
            if not path.suffix.lower() == ".pdf":
                raise ToolError(f"File must be a PDF: {pdf_path}")
    
            # Parse PDF
            await ctx.info("Extracting text from PDF...")
            receipt, raw_items = parse_pdf_receipt(path)
    
            await ctx.info(
                f"Parsed receipt: {receipt.store_name} on {receipt.purchase_date}"
            )
            await ctx.info(f"Found {len(raw_items)} line items")
    
            # Categorize items
            await ctx.info("Categorizing items...")
            categorized_items = []
            item_type_counts = {}
    
            for idx, item_dict in enumerate(raw_items):
                # Categorize using hybrid approach
                item_type = await categorize_item(item_dict["item_name"], ctx)
    
                # Create LineItem object
                line_item = LineItem(
                    item_name_raw=item_dict["item_name"],
                    item_type=item_type,
                    quantity=item_dict["quantity"],
                    line_total=item_dict["price"],
                )
    
                categorized_items.append(line_item)
    
                # Track category counts
                item_type_counts[item_type] = item_type_counts.get(item_type, 0) + 1
    
                await ctx.debug(
                    f"  [{idx+1}/{len(raw_items)}] {item_dict['item_name']} -> {item_type}"
                )
    
            # Insert into database
            await ctx.info("Saving to database...")
            receipt_id = insert_receipt(receipt)
            insert_items(receipt_id, categorized_items)
    
            await ctx.info(f"Successfully imported receipt #{receipt_id}")
    
            # Return summary
            return {
                "status": "success",
                "receipt_id": receipt_id,
                "store_name": receipt.store_name,
                "purchase_date": receipt.purchase_date,
                "total": receipt.total,
                "items_count": len(categorized_items),
                "item_types": item_type_counts,
                "message": f"Successfully imported {len(categorized_items)} items from {receipt.store_name}",
            }
    
        except FileNotFoundError as e:
            raise ToolError(f"File not found: {str(e)}")
        except ValueError as e:
            raise ToolError(f"Failed to parse receipt: {str(e)}")
        except Exception as e:
            await ctx.error(f"Unexpected error during import: {e}")
            raise ToolError(f"Failed to import receipt: {str(e)}")
  • Dataclass models for Receipt and LineItem providing structured data validation and typing for parsed receipt data used throughout the tool.
    @dataclass
    class Receipt:
        """Represents a parsed receipt."""
    
        store_name: str
        purchase_date: str  # ISO format: YYYY-MM-DD
        total: float
        subtotal: Optional[float] = None
        tax: Optional[float] = None
    
        def __post_init__(self):
            """Validate receipt data."""
            if self.total <= 0:
                raise ValueError("Total must be positive")
    
    
    @dataclass
    class LineItem:
        """Represents a single item from a receipt."""
    
        item_name_raw: str
        item_type: str
        line_total: float
        quantity: float = 1.0
        unit_price: Optional[float] = None
    
        def __post_init__(self):
            """Calculate unit price if not provided."""
            if self.unit_price is None and self.quantity > 0:
                self.unit_price = self.line_total / self.quantity
    
            if self.line_total <= 0:
                raise ValueError("Line total must be positive")
            if self.quantity <= 0:
                raise ValueError("Quantity must be positive")
    
    
    @dataclass
    class ItemStats:
        """Statistics for a specific item type."""
    
        item_type: str
        total_purchases: int
        last_purchase_date: str
        first_purchase_date: str
        total_spent: float
        average_days_between: Optional[float] = None
  • Key helper function that extracts text from PDF using pdfplumber, parses store/date/totals/line items using regex patterns, returns Receipt and raw items list.
    def parse_pdf_receipt(pdf_path: Path) -> tuple[Receipt, list[dict]]:
        """Parse a PDF receipt file.
    
        Args:
            pdf_path: Path to PDF file
    
        Returns:
            Tuple of (Receipt object, list of item dicts)
    
        Raises:
            FileNotFoundError: If PDF doesn't exist
            ValueError: If parsing fails
        """
        # Convert string to Path if needed
        if isinstance(pdf_path, str):
            pdf_path = Path(pdf_path)
    
        # Extract text
        text = extract_text_from_pdf(pdf_path)
    
        # Parse receipt
        return parse_receipt(text)
  • Hybrid item categorization helper: static regex/pattern matching first, LLM fallback via ctx.sample for unknown items.
    async def categorize_item(item_name: str, ctx=None) -> str:
        """Main categorization function with hybrid approach.
    
        Args:
            item_name: Raw item name from receipt
            ctx: Optional FastMCP Context for LLM fallback
    
        Returns:
            item_type category (guaranteed to return a value)
        """
        # Try deterministic rules first
        category = deterministic_categorize(item_name)
    
        if category:
            return category
    
        # Fall back to LLM if context is available
        if ctx:
            return await llm_categorize(item_name, ctx)
    
        # Ultimate fallback
        return "other"
  • Database persistence helpers: insert_receipt creates receipt record, insert_items adds categorized line items with foreign key.
    def insert_receipt(receipt: Receipt, db_path: Path = DEFAULT_DB_PATH) -> int:
        """Insert a receipt and return its ID."""
        conn = get_connection(db_path)
    
        try:
            cursor = conn.execute(
                """
                INSERT INTO receipts (store_name, purchase_date, subtotal, tax, total)
                VALUES (?, ?, ?, ?, ?)
            """,
                (
                    receipt.store_name,
                    receipt.purchase_date,
                    receipt.subtotal,
                    receipt.tax,
                    receipt.total,
                ),
            )
            conn.commit()
            return cursor.lastrowid
        finally:
            conn.close()
    
    
    def insert_items(
        receipt_id: int, items: list[LineItem], db_path: Path = DEFAULT_DB_PATH
    ) -> None:
        """Bulk insert items for a receipt."""
        if not items:
            return
    
        conn = get_connection(db_path)
    
        try:
            conn.executemany(
                """
                INSERT INTO items (receipt_id, item_name_raw, item_type, quantity, unit_price, line_total)
                VALUES (?, ?, ?, ?, ?, ?)
            """,
                [
                    (
                        receipt_id,
                        item.item_name_raw,
                        item.item_type,
                        item.quantity,
                        item.unit_price,
                        item.line_total,
                    )
                    for item in items
                ],
            )
            conn.commit()
        finally:
            conn.close()

Tool Definition Quality

Score is being calculated. Check back soon.

Install Server

Other Tools

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Sharan0402/expense-tracker-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server