Farnsworth

Overview Schema Related Servers Score Discussions

ocr_parser.py•17.2 KiB

""" Farnsworth DeepSeek OCR2 Integration Parses health documents (lab results, prescriptions, nutrition labels) using DeepSeek OCR2 vision model via DeepInfra or local deployment. """ import os import re import json import base64 import logging from pathlib import Path from datetime import date from dataclasses import dataclass, field from typing import List, Dict, Any, Optional, Union import httpx from .models import ( DocumentType, LabResult, Prescription, NutrientInfo, FoodItem, ) logger = logging.getLogger(__name__) @dataclass class OCRResult: """Result from OCR processing.""" success: bool document_type: DocumentType raw_text: str = "" structured_data: Dict[str, Any] = field(default_factory=dict) confidence: float = 0.0 error: Optional[str] = None def to_dict(self) -> dict: return { "success": self.success, "document_type": self.document_type.value, "raw_text": self.raw_text, "structured_data": self.structured_data, "confidence": self.confidence, "error": self.error, } class DeepSeekOCRParser: """ DeepSeek OCR2 document parser for health documents. Supports: - Lab results (blood work, metabolic panels, etc.) - Prescriptions - Nutrition labels - Medical reports Uses DeepSeek OCR2 via DeepInfra API or direct DeepSeek API. """ # DeepInfra API endpoint DEEPINFRA_URL = "https://api.deepinfra.com/v1/openai/chat/completions" # DeepSeek direct API endpoint DEEPSEEK_URL = "https://api.deepseek.com/v1/chat/completions" # Model configurations MODELS = { "deepinfra": "deepseek-ai/DeepSeek-V2.5", # Vision-capable model "deepseek": "deepseek-chat", } def __init__( self, api_key: Optional[str] = None, provider: str = "deepinfra", ): """ Initialize the OCR parser. Args: api_key: API key (uses env var if not provided) provider: "deepinfra" or "deepseek" """ self.provider = provider if provider == "deepinfra": self.api_key = api_key or os.getenv("DEEPINFRA_API_KEY") self.api_url = self.DEEPINFRA_URL self.model = self.MODELS["deepinfra"] else: self.api_key = api_key or os.getenv("DEEPSEEK_API_KEY") self.api_url = self.DEEPSEEK_URL self.model = self.MODELS["deepseek"] self.client = httpx.AsyncClient(timeout=60.0) async def parse_document( self, image_path: Union[str, Path], doc_type: DocumentType, ) -> OCRResult: """ Parse a health document image. Args: image_path: Path to the image file doc_type: Type of document to parse Returns: OCRResult with structured data """ if not self.api_key: logger.error("No API key configured for OCR") return OCRResult( success=False, document_type=doc_type, error="No API key configured. Set DEEPINFRA_API_KEY or DEEPSEEK_API_KEY.", ) try: # Read and encode the image image_data = self._encode_image(image_path) if not image_data: return OCRResult( success=False, document_type=doc_type, error=f"Could not read image: {image_path}", ) # Build the prompt based on document type prompt = self._build_prompt(doc_type) # Call the vision API response = await self._call_vision_api(image_data, prompt) if not response: return OCRResult( success=False, document_type=doc_type, error="API call failed", ) # Parse the response based on document type result = self._parse_response(response, doc_type) return result except Exception as e: logger.error(f"OCR parsing error: {e}") return OCRResult( success=False, document_type=doc_type, error=str(e), ) def _encode_image(self, image_path: Union[str, Path]) -> Optional[str]: """Encode image to base64.""" path = Path(image_path) if not path.exists(): logger.error(f"Image not found: {path}") return None # Determine MIME type suffix = path.suffix.lower() mime_types = { ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".png": "image/png", ".gif": "image/gif", ".webp": "image/webp", } mime_type = mime_types.get(suffix, "image/jpeg") try: with open(path, "rb") as f: image_data = base64.b64encode(f.read()).decode("utf-8") return f"data:{mime_type};base64,{image_data}" except Exception as e: logger.error(f"Error encoding image: {e}") return None def _build_prompt(self, doc_type: DocumentType) -> str: """Build the extraction prompt based on document type.""" prompts = { DocumentType.LAB_RESULT: """Analyze this lab result document and extract all test results. For each test found, provide: - test_name: Name of the test - value: Numeric value - unit: Unit of measurement - reference_range_low: Lower bound of normal range (if shown) - reference_range_high: Upper bound of normal range (if shown) - status: "normal", "low", "high", or "critical" based on the reference range Return the data as a JSON array of objects with these fields. Be precise with numbers. If you cannot read a value clearly, note it with confidence < 0.8. Example output format: { "results": [ { "test_name": "Glucose", "value": 95, "unit": "mg/dL", "reference_range_low": 70, "reference_range_high": 100, "status": "normal", "confidence": 0.95 } ] }""", DocumentType.PRESCRIPTION: """Analyze this prescription document and extract medication information. Extract: - medication_name: Full name of the medication - dosage: Dosage amount and strength - frequency: How often to take (e.g., "twice daily", "every 8 hours") - route: How to take it (oral, topical, etc.) - prescriber: Doctor's name if visible - refills_remaining: Number of refills if shown - instructions: Any special instructions - warnings: Any warnings mentioned Return the data as JSON: { "medications": [ { "medication_name": "Lisinopril", "dosage": "10mg", "frequency": "once daily", "route": "oral", "prescriber": "Dr. Smith", "refills_remaining": 3, "instructions": "Take in the morning with water", "warnings": ["May cause dizziness"], "confidence": 0.9 } ] }""", DocumentType.NUTRITION_LABEL: """Analyze this nutrition label and extract all nutritional information. Extract per serving: - serving_size: Serving size amount - serving_unit: Unit (g, ml, oz, etc.) - calories: Total calories - protein_g: Protein in grams - carbs_g: Total carbohydrates in grams - fat_g: Total fat in grams - fiber_g: Dietary fiber in grams - sugar_g: Sugars in grams - sodium_mg: Sodium in milligrams - cholesterol_mg: Cholesterol in milligrams - saturated_fat_g: Saturated fat in grams - trans_fat_g: Trans fat in grams - Any vitamins and minerals shown Return as JSON: { "product_name": "Product name if visible", "serving_size": 100, "serving_unit": "g", "nutrients": { "calories": 200, "protein_g": 10, "carbs_g": 25, "fat_g": 8, "fiber_g": 3, "sugar_g": 5, "sodium_mg": 150, ... }, "confidence": 0.95 }""", DocumentType.MEDICAL_REPORT: """Analyze this medical report and extract key information. Extract: - report_type: Type of report (e.g., "Radiology", "Pathology", "Physical Exam") - date: Date of the report - provider: Healthcare provider/facility name - patient_info: Any visible patient information - findings: List of key findings - diagnosis: Any diagnoses mentioned - recommendations: Any recommendations - follow_up: Follow-up instructions if any Return as JSON with these fields. Focus on medical findings and recommendations. { "report_type": "...", "date": "YYYY-MM-DD", "provider": "...", "findings": ["..."], "diagnosis": ["..."], "recommendations": ["..."], "confidence": 0.85 }""", } return prompts.get(doc_type, prompts[DocumentType.LAB_RESULT]) async def _call_vision_api( self, image_data: str, prompt: str, ) -> Optional[str]: """Call the vision API with the image.""" headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", } # Build the request payload payload = { "model": self.model, "messages": [ { "role": "user", "content": [ { "type": "text", "text": prompt, }, { "type": "image_url", "image_url": { "url": image_data, }, }, ], } ], "max_tokens": 2000, "temperature": 0.1, # Low temperature for accuracy } try: response = await self.client.post( self.api_url, headers=headers, json=payload, ) if response.status_code != 200: logger.error(f"API error: {response.status_code} - {response.text}") return None data = response.json() return data["choices"][0]["message"]["content"] except Exception as e: logger.error(f"API call error: {e}") return None def _parse_response( self, response: str, doc_type: DocumentType, ) -> OCRResult: """Parse the API response into structured data.""" try: # Extract JSON from the response json_match = re.search(r'\{[\s\S]*\}', response) if not json_match: return OCRResult( success=False, document_type=doc_type, raw_text=response, error="Could not parse JSON from response", ) data = json.loads(json_match.group()) # Process based on document type if doc_type == DocumentType.LAB_RESULT: return self._process_lab_results(data, response) elif doc_type == DocumentType.PRESCRIPTION: return self._process_prescription(data, response) elif doc_type == DocumentType.NUTRITION_LABEL: return self._process_nutrition_label(data, response) else: return OCRResult( success=True, document_type=doc_type, raw_text=response, structured_data=data, confidence=data.get("confidence", 0.8), ) except json.JSONDecodeError as e: logger.error(f"JSON parse error: {e}") return OCRResult( success=False, document_type=doc_type, raw_text=response, error=f"JSON parse error: {e}", ) def _process_lab_results( self, data: Dict[str, Any], raw_text: str, ) -> OCRResult: """Process lab results into LabResult objects.""" results = [] raw_results = data.get("results", []) for item in raw_results: try: lab_result = LabResult( test_name=item.get("test_name", ""), value=float(item.get("value", 0)), unit=item.get("unit", ""), reference_range_low=float(item.get("reference_range_low")) if item.get("reference_range_low") else None, reference_range_high=float(item.get("reference_range_high")) if item.get("reference_range_high") else None, status=item.get("status", "normal"), confidence=float(item.get("confidence", 0.9)), ) results.append(lab_result.to_dict()) except (ValueError, TypeError) as e: logger.warning(f"Error parsing lab result: {e}") continue avg_confidence = ( sum(r.get("confidence", 0.8) for r in results) / len(results) if results else 0.0 ) return OCRResult( success=len(results) > 0, document_type=DocumentType.LAB_RESULT, raw_text=raw_text, structured_data={"results": results}, confidence=avg_confidence, ) def _process_prescription( self, data: Dict[str, Any], raw_text: str, ) -> OCRResult: """Process prescription data into Prescription objects.""" results = [] medications = data.get("medications", []) for item in medications: try: rx = Prescription( medication_name=item.get("medication_name", ""), dosage=item.get("dosage", ""), frequency=item.get("frequency", ""), route=item.get("route", "oral"), prescriber=item.get("prescriber"), refills_remaining=int(item.get("refills_remaining", 0)), instructions=item.get("instructions", ""), warnings=item.get("warnings", []), confidence=float(item.get("confidence", 0.9)), ) results.append(rx.to_dict()) except (ValueError, TypeError) as e: logger.warning(f"Error parsing prescription: {e}") continue avg_confidence = ( sum(r.get("confidence", 0.8) for r in results) / len(results) if results else 0.0 ) return OCRResult( success=len(results) > 0, document_type=DocumentType.PRESCRIPTION, raw_text=raw_text, structured_data={"medications": results}, confidence=avg_confidence, ) def _process_nutrition_label( self, data: Dict[str, Any], raw_text: str, ) -> OCRResult: """Process nutrition label into NutrientInfo.""" try: nutrients = data.get("nutrients", {}) nutrient_info = NutrientInfo( calories=float(nutrients.get("calories", 0)), protein_g=float(nutrients.get("protein_g", 0)), carbs_g=float(nutrients.get("carbs_g", 0)), fat_g=float(nutrients.get("fat_g", 0)), fiber_g=float(nutrients.get("fiber_g", 0)), sugar_g=float(nutrients.get("sugar_g", 0)), sodium_mg=float(nutrients.get("sodium_mg", 0)), cholesterol_mg=float(nutrients.get("cholesterol_mg", 0)), saturated_fat_g=float(nutrients.get("saturated_fat_g", 0)), trans_fat_g=float(nutrients.get("trans_fat_g", 0)), ) food_item = FoodItem( name=data.get("product_name", "Scanned Product"), serving_size=float(data.get("serving_size", 100)), serving_unit=data.get("serving_unit", "g"), nutrients=nutrient_info, ) return OCRResult( success=True, document_type=DocumentType.NUTRITION_LABEL, raw_text=raw_text, structured_data=food_item.to_dict(), confidence=float(data.get("confidence", 0.9)), ) except (ValueError, TypeError) as e: logger.error(f"Error parsing nutrition label: {e}") return OCRResult( success=False, document_type=DocumentType.NUTRITION_LABEL, raw_text=raw_text, error=f"Parse error: {e}", ) async def close(self): """Close the HTTP client.""" await self.client.aclose() async def __aenter__(self): return self async def __aexit__(self, exc_type, exc_val, exc_tb): await self.close() # Convenience function for one-off parsing async def parse_health_document( image_path: Union[str, Path], doc_type: str = "lab_result", api_key: Optional[str] = None, ) -> OCRResult: """ Parse a health document. Args: image_path: Path to the image doc_type: Type of document (lab_result, prescription, nutrition_label, medical_report) api_key: Optional API key Returns: OCRResult with parsed data """ doc_type_enum = DocumentType(doc_type) async with DeepSeekOCRParser(api_key=api_key) as parser: return await parser.parse_document(image_path, doc_type_enum)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/timowhite88/Farnsworth'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

ocr_parser.py•17.2 KiB