OCR-MCP

_analysis.py•32.1 KiB

""" Document Structure Analysis Tools for OCR-MCP Advanced document analysis tools for detecting tables, forms, layout elements, and document structure understanding. """ import logging import re from typing import Any from ..core.backend_manager import BackendManager from ..core.config import OCRConfig # Optional OpenCV import try: import cv2 OPENCV_AVAILABLE = True except ImportError: cv2 = None OPENCV_AVAILABLE = False logger = logging.getLogger(__name__) async def analyze_document_layout( image_path: str, analysis_type: str = "comprehensive", detect_tables: bool = True, detect_forms: bool = True, detect_headers: bool = True, backend_manager: BackendManager | None = None, config: OCRConfig | None = None, ) -> dict[str, Any]: """ Analyze document layout and structure. Detects document elements like headers, paragraphs, tables, forms, images, and other structural components. Args: image_path: Path to the document image analysis_type: Type of analysis ("basic", "comprehensive", "detailed") detect_tables: Enable table detection detect_forms: Enable form field detection detect_headers: Enable header/footer detection Returns: Document layout analysis with detected elements """ logger.info(f"Analyzing document layout: {image_path}") try: import cv2 import numpy as np from PIL import Image # Load image image = Image.open(image_path) if image.mode != "RGB": image = image.convert("RGB") cv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY) # Basic layout analysis layout_elements = [] # Detect text blocks and lines text_regions = _detect_text_regions(cv_image) # Classify text regions for region in text_regions: element_type = _classify_text_region(region, cv_image) layout_elements.append( { "type": element_type, "bbox": region["bbox"], "confidence": region["confidence"], "text_estimate": region.get("text", ""), } ) # Table detection if detect_tables: tables = _detect_tables(cv_image) for table in tables: layout_elements.append( { "type": "table", "bbox": table["bbox"], "rows": table["rows"], "cols": table["cols"], "confidence": table["confidence"], } ) # Form detection if detect_forms: forms = _detect_form_fields(cv_image, text_regions) layout_elements.extend(forms) # Header/Footer detection if detect_headers: headers_foot = _detect_headers_footers(cv_image, text_regions) layout_elements.extend(headers_foot) # Group elements by type element_summary = _summarize_layout_elements(layout_elements) return { "success": True, "image_path": image_path, "analysis_type": analysis_type, "layout_elements": layout_elements, "element_summary": element_summary, "document_structure": { "has_tables": any(e["type"] == "table" for e in layout_elements), "has_forms": any( e["type"] in ["form_field", "checkbox", "signature"] for e in layout_elements ), "has_headers": any(e["type"] == "header" for e in layout_elements), "has_footers": any(e["type"] == "footer" for e in layout_elements), "text_blocks": len([e for e in layout_elements if e["type"] == "text_block"]), "estimated_pages": 1, }, "image_info": { "width": image.width, "height": image.height, "orientation": _detect_orientation(cv_image), }, "message": f"Layout analysis complete: {len(layout_elements)} elements detected", } except Exception as e: logger.error(f"Document layout analysis failed: {e}") return { "success": False, "error": f"Layout analysis failed: {str(e)}", "image_path": image_path, } async def extract_table_data( image_path: str, table_region: list[int] | None = None, ocr_backend: str = "auto", backend_manager: BackendManager | None = None, config: OCRConfig | None = None, ) -> dict[str, Any]: """ Extract tabular data from document images. Detects table structures and extracts data from rows and columns, returning structured tabular data. Args: image_path: Path to the document image table_region: Optional bounding box [x1,y1,x2,y2] for specific table ocr_backend: OCR backend to use for text extraction Returns: Extracted table data with structure and content """ logger.info(f"Extracting table data from: {image_path}") try: import cv2 import numpy as np from PIL import Image # Load image image = Image.open(image_path) cv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY) # Find tables tables = _detect_tables(cv_image) if table_region: # Focus on specific region tables = [t for t in tables if _bbox_overlap(t["bbox"], table_region) > 0.5] extracted_tables = [] for table_info in tables: table_data = await _extract_table_content( image_path, table_info, ocr_backend, backend_manager, config ) extracted_tables.append(table_data) return { "success": True, "image_path": image_path, "tables_found": len(tables), "tables_extracted": len(extracted_tables), "table_data": extracted_tables, "extraction_method": "structure_analysis + OCR", "message": f"Extracted {len(extracted_tables)} tables with structured data", } except Exception as e: logger.error(f"Table extraction failed: {e}") return { "success": False, "error": f"Table extraction failed: {str(e)}", "image_path": image_path, } async def detect_form_fields( image_path: str, field_types: list[str] | None = None, backend_manager: BackendManager | None = None, config: OCRConfig | None = None, ) -> dict[str, Any]: """ Detect and classify form fields in documents. Identifies checkboxes, text fields, radio buttons, signatures, and other form elements with their positions. Args: image_path: Path to the document image field_types: Types of fields to detect (default: all types) Returns: Detected form fields with classifications and positions """ logger.info(f"Detecting form fields in: {image_path}") if field_types is None: field_types = [ "checkbox", "text_field", "radio_button", "signature", "date_field", ] try: import cv2 import numpy as np from PIL import Image # Load image image = Image.open(image_path) cv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY) # Get text regions first (form fields are often near text) text_regions = _detect_text_regions(cv_image) # Detect form fields form_fields = [] # Checkbox detection if "checkbox" in field_types: checkboxes = _detect_checkboxes(cv_image) form_fields.extend( [ { "type": "checkbox", "bbox": cb["bbox"], "confidence": cb["confidence"], "state": cb.get("state", "unknown"), } for cb in checkboxes ] ) # Text field detection (rectangular areas near text) if "text_field" in field_types: text_fields = _detect_text_fields(cv_image, text_regions) form_fields.extend( [ { "type": "text_field", "bbox": tf["bbox"], "confidence": tf["confidence"], "associated_text": tf.get("label", ""), } for tf in text_fields ] ) # Radio button detection if "radio_button" in field_types: radio_buttons = _detect_radio_buttons(cv_image) form_fields.extend( [ { "type": "radio_button", "bbox": rb["bbox"], "confidence": rb["confidence"], "group": rb.get("group", "unknown"), } for rb in radio_buttons ] ) # Signature field detection if "signature" in field_types: signatures = _detect_signature_fields(cv_image, text_regions) form_fields.extend( [ { "type": "signature", "bbox": sig["bbox"], "confidence": sig["confidence"], } for sig in signatures ] ) # Group fields by type field_summary = {} for field_type in field_types: field_summary[field_type] = len([f for f in form_fields if f["type"] == field_type]) return { "success": True, "image_path": image_path, "field_types_requested": field_types, "form_fields": form_fields, "field_summary": field_summary, "total_fields": len(form_fields), "document_type": "form" if len(form_fields) > 0 else "document", "message": f"Detected {len(form_fields)} form fields", } except Exception as e: logger.error(f"Form field detection failed: {e}") return { "success": False, "error": f"Form field detection failed: {str(e)}", "image_path": image_path, } async def analyze_document_reading_order( image_path: str, ocr_result: dict[str, Any] | None = None, backend_manager: BackendManager | None = None, config: OCRConfig | None = None, ) -> dict[str, Any]: """ Analyze the logical reading order of document content. Determines the proper sequence for reading multi-column documents, forms with complex layouts, and documents with non-linear content flow. Args: image_path: Path to the document image ocr_result: Optional pre-computed OCR result to avoid re-processing Returns: Document reading order analysis with content sequencing """ logger.info(f"Analyzing reading order for: {image_path}") try: from PIL import Image # Load image image = Image.open(image_path) # Get OCR result if not provided if not ocr_result: if not backend_manager: return { "success": False, "error": "Backend manager not provided - cannot perform OCR for analysis", } ocr_result = await backend_manager.process_with_backend("auto", image_path, mode="text") if not ocr_result.get("success"): return { "success": False, "error": "OCR processing failed - cannot analyze reading order", } # Extract text blocks with positions raw_results = ocr_result.get("raw_results", []) # Sort text blocks into reading order reading_order = _determine_reading_order(raw_results, image.size) # Group into logical sections sections = _group_into_sections(reading_order) return { "success": True, "image_path": image_path, "reading_order": reading_order, "sections": sections, "content_flow": { "total_blocks": len(reading_order), "sections_count": len(sections), "estimated_columns": _estimate_columns(reading_order), "reading_direction": "left-to-right-top-to-bottom", }, "message": f"Reading order analysis complete: {len(reading_order)} text blocks sequenced", } except Exception as e: logger.error(f"Reading order analysis failed: {e}") return { "success": False, "error": f"Reading order analysis failed: {str(e)}", "image_path": image_path, } async def classify_document_type( image_path: str, ocr_result: dict[str, Any] | None = None, backend_manager: BackendManager | None = None, config: OCRConfig | None = None, ) -> dict[str, Any]: """ Classify the type of document based on layout and content analysis. Identifies document types like invoices, receipts, forms, letters, reports, contracts, etc. Args: image_path: Path to the document image ocr_result: Optional pre-computed OCR result Returns: Document type classification with confidence scores """ logger.info(f"Classifying document type: {image_path}") try: # Load image via Image.open is not needed as analyze_document_layout uses image_path # image = Image.open(image_path) # Get OCR result if not provided if not ocr_result: if not backend_manager: return { "success": False, "error": "Backend manager not provided - cannot perform OCR for classification", } ocr_result = await backend_manager.process_with_backend("auto", image_path, mode="text") ocr_text = ocr_result.get("text", "") if ocr_result.get("success") else "" # Analyze layout layout_analysis = await analyze_document_layout( image_path, "basic", backend_manager=backend_manager ) # Classify based on content and layout features classification = _classify_document_type(ocr_text, layout_analysis) return { "success": True, "image_path": image_path, "document_type": classification["primary_type"], "confidence": classification["confidence"], "alternative_types": classification["alternatives"], "detected_features": classification["features"], "classification_reasoning": classification["reasoning"], "message": f"Document classified as: {classification['primary_type']} ({classification['confidence']}% confidence)", } except Exception as e: logger.error(f"Document classification failed: {e}") return { "success": False, "error": f"Document classification failed: {str(e)}", "image_path": image_path, } async def extract_document_metadata( image_path: str, ocr_result: dict[str, Any] | None = None, extract_dates: bool = True, extract_names: bool = True, extract_numbers: bool = True, backend_manager: BackendManager | None = None, config: OCRConfig | None = None, ) -> dict[str, Any]: """ Extract structured metadata from documents. Identifies and extracts dates, names, document numbers, amounts, addresses, and other structured information. Args: image_path: Path to the document image ocr_result: Optional pre-computed OCR result extract_dates: Extract date information extract_names: Extract person/company names extract_numbers: Extract document numbers and amounts Returns: Extracted document metadata with confidence scores """ logger.info(f"Extracting metadata from: {image_path}") try: # Get OCR result if not provided if not ocr_result: if not backend_manager: return { "success": False, "error": "Backend manager not provided - cannot perform OCR for metadata extraction", } ocr_result = await backend_manager.process_with_backend("auto", image_path, mode="text") ocr_text = ocr_result.get("text", "") if ocr_result.get("success") else "" metadata = { "dates": [], "names": [], "numbers": [], "amounts": [], "addresses": [], "document_numbers": [], } # Extract dates if extract_dates: metadata["dates"] = _extract_dates(ocr_text) # Extract names (basic pattern matching) if extract_names: metadata["names"] = _extract_names(ocr_text) # Extract numbers and amounts if extract_numbers: numbers_data = _extract_numbers_and_amounts(ocr_text) metadata.update(numbers_data) # Extract addresses metadata["addresses"] = _extract_addresses(ocr_text) # Calculate confidence scores confidence_scores = {} for key, items in metadata.items(): if items: # Simple confidence based on extraction consistency confidence_scores[key] = min(95, 60 + len(items) * 5) else: confidence_scores[key] = 0 return { "success": True, "image_path": image_path, "metadata": metadata, "confidence_scores": confidence_scores, "extraction_summary": { "total_items_extracted": sum(len(items) for items in metadata.values()), "categories_with_data": len([k for k, v in metadata.items() if v]), "highest_confidence": max(confidence_scores.values()) if confidence_scores else 0, }, "message": f"Extracted {sum(len(items) for items in metadata.values())} metadata items", } except Exception as e: logger.error(f"Metadata extraction failed: {e}") return { "success": False, "error": f"Metadata extraction failed: {str(e)}", "image_path": image_path, } # Helper functions for document analysis def _detect_text_regions(image): """Detect text regions in the image.""" # Simple text region detection using morphology kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 1)) dilated = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel) contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) text_regions = [] for contour in contours: x, y, w, h = cv2.boundingRect(contour) if w > 50 and h > 10: # Filter small regions text_regions.append({"bbox": [x, y, x + w, y + h], "area": w * h, "confidence": 0.8}) return sorted(text_regions, key=lambda x: (x["bbox"][1], x["bbox"][0])) def _classify_text_region(region, image): """Classify a text region based on position and characteristics.""" x1, y1, x2, y2 = region["bbox"] img_h, img_w = image.shape # Position-based classification if y1 < img_h * 0.1: return "header" elif y2 > img_h * 0.9: return "footer" elif x1 < img_w * 0.1 and x2 > img_w * 0.9: return "title" else: return "text_block" def _detect_tables(image): """Detect table structures in the image.""" # Simple table detection using line detection # This is a placeholder - real table detection would use more sophisticated methods tables = [] # Look for horizontal and vertical lines horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1)) vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40)) horizontal_lines = cv2.morphologyEx(image, cv2.MORPH_OPEN, horizontal_kernel) vertical_lines = cv2.morphologyEx(image, cv2.MORPH_OPEN, vertical_kernel) # Combine lines table_mask = cv2.add(horizontal_lines, vertical_lines) # Find table regions contours, _ = cv2.findContours(table_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for contour in contours: x, y, w, h = cv2.boundingRect(contour) if w > 100 and h > 50: # Reasonable table size tables.append( { "bbox": [x, y, x + w, y + h], "rows": 3, # Placeholder "cols": 4, # Placeholder "confidence": 0.7, } ) return tables def _detect_form_fields(image, text_regions): """Detect form fields in the image.""" form_fields = [] # Simple checkbox detection - look for small squares contours, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for contour in contours: x, y, w, h = cv2.boundingRect(contour) aspect_ratio = w / float(h) # Check for square-ish shapes (checkboxes) if 0.8 <= aspect_ratio <= 1.2 and 10 <= w <= 30: form_fields.append( {"type": "checkbox", "bbox": [x, y, x + w, y + h], "confidence": 0.75} ) return form_fields def _detect_headers_footers(image, text_regions): """Detect header and footer regions.""" elements = [] img_h = image.shape[0] for region in text_regions: y1 = region["bbox"][1] if y1 < img_h * 0.15: elements.append({"type": "header", "bbox": region["bbox"], "confidence": 0.8}) elif y1 > img_h * 0.85: elements.append({"type": "footer", "bbox": region["bbox"], "confidence": 0.8}) return elements def _summarize_layout_elements(elements): """Summarize layout elements by type.""" summary = {} for element in elements: elem_type = element["type"] if elem_type not in summary: summary[elem_type] = 0 summary[elem_type] += 1 return summary def _detect_orientation(image): """Detect document orientation.""" # Simple heuristic - check if image is wider than tall h, w = image.shape return "landscape" if w > h else "portrait" async def _extract_table_content( image_path, table_info, ocr_backend, backend_manager=None, config=None ): """Extract content from a detected table.""" # This would use OCR to extract cell content # Placeholder implementation return { "bbox": table_info["bbox"], "rows": table_info["rows"], "cols": table_info["cols"], "headers": [], # Would detect headers "data": [], # Would extract cell data "confidence": table_info["confidence"], } def _detect_checkboxes(image): """Detect checkboxes in the image.""" # Simple checkbox detection contours, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) checkboxes = [] for contour in contours: x, y, w, h = cv2.boundingRect(contour) aspect_ratio = w / float(h) if 0.8 <= aspect_ratio <= 1.2 and 10 <= w <= 30: checkboxes.append({"bbox": [x, y, x + w, y + h], "confidence": 0.75}) return checkboxes def _detect_text_fields(image, text_regions): """Detect text input fields.""" # Look for rectangular areas that might be text fields # This is a simplified implementation text_fields = [] for region in text_regions: x1, y1, x2, y2 = region["bbox"] width = x2 - x1 height = y2 - y1 # Look for wide, short rectangles (typical text field shape) if width > height * 3 and height < 50: text_fields.append({"bbox": [x1, y1, x2, y2], "confidence": 0.7}) return text_fields def _detect_radio_buttons(image): """Detect radio buttons.""" # Similar to checkboxes but smaller and rounder contours, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) radio_buttons = [] for contour in contours: area = cv2.contourArea(contour) perimeter = cv2.arcLength(contour, True) circularity = 4 * 3.14159 * area / (perimeter * perimeter) if perimeter > 0 else 0 if 0.7 <= circularity <= 1.2 and 50 <= area <= 500: x, y, w, h = cv2.boundingRect(contour) radio_buttons.append({"bbox": [x, y, x + w, y + h], "confidence": 0.8}) return radio_buttons def _detect_signature_fields(image, text_regions): """Detect signature fields.""" signature_fields = [] for region in text_regions: # Look for regions with signature-related text # This is a simplified heuristic if "signature" in region.get("text", "").lower(): x1, y1, x2, y2 = region["bbox"] # Extend bbox to include signature area signature_fields.append( { "bbox": [x1, y1 + 20, x2, y2 + 100], # Below the text "confidence": 0.8, } ) return signature_fields def _bbox_overlap(bbox1, bbox2): """Calculate overlap between two bounding boxes.""" x1 = max(bbox1[0], bbox2[0]) y1 = max(bbox1[1], bbox2[1]) x2 = min(bbox1[2], bbox2[2]) y2 = min(bbox1[3], bbox2[3]) if x2 <= x1 or y2 <= y1: return 0.0 intersection = (x2 - x1) * (y2 - y1) area1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1]) area2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1]) return intersection / (area1 + area2 - intersection) def _determine_reading_order(text_blocks, image_size): """Determine the logical reading order of text blocks.""" # Sort by top-to-bottom, then left-to-right sorted_blocks = sorted( text_blocks, key=lambda x: (x.get("bbox", [0, 0, 0, 0])[1], x.get("bbox", [0, 0, 0, 0])[0]), ) # Add reading order indices for i, block in enumerate(sorted_blocks): block["reading_order"] = i + 1 return sorted_blocks def _group_into_sections(reading_order): """Group text blocks into logical sections.""" sections = [] current_section = [] img_width = 1000 # Placeholder for block in reading_order: bbox = block.get("bbox", [0, 0, 100, 100]) x1 = bbox[0] # Simple column detection if x1 < img_width * 0.4: column = "left" elif x1 > img_width * 0.6: column = "right" else: column = "center" block["column"] = column current_section.append(block) if current_section: sections.append( { "section_number": 1, "blocks": current_section, "estimated_type": "main_content", } ) return sections def _estimate_columns(reading_order): """Estimate number of columns in the document.""" if not reading_order: return 1 # Simple column estimation based on x-coordinates x_positions = [block.get("bbox", [0])[0] for block in reading_order] unique_x = len(set(int(x / 100) for x in x_positions)) # Group by 100px bins return max(1, min(unique_x, 3)) # Reasonable range def _classify_document_type(text, layout_analysis): """Classify document type based on content and layout.""" text_lower = text.lower() # Invoice indicators if any(keyword in text_lower for keyword in ["invoice", "bill", "amount due", "total:", "$"]): return { "primary_type": "invoice", "confidence": 85, "alternatives": ["receipt", "bill"], "features": ["amounts", "dates", "vendor_info"], "reasoning": "Contains invoice-specific keywords and financial information", } # Receipt indicators elif any(keyword in text_lower for keyword in ["receipt", "paid", "change", "subtotal"]): return { "primary_type": "receipt", "confidence": 80, "alternatives": ["invoice"], "features": ["transaction_details", "amounts"], "reasoning": "Contains receipt-specific transaction language", } # Form indicators elif layout_analysis.get("document_structure", {}).get("has_forms", False): return { "primary_type": "form", "confidence": 75, "alternatives": ["application", "document"], "features": ["form_fields", "structured_layout"], "reasoning": "Contains detectable form fields and structured elements", } # Letter/contract indicators elif any(keyword in text_lower for keyword in ["dear", "sincerely", "agreement", "contract"]): return { "primary_type": "letter", "confidence": 70, "alternatives": ["contract", "correspondence"], "features": ["formal_language", "addressing"], "reasoning": "Contains formal correspondence language", } # Default classification else: return { "primary_type": "document", "confidence": 50, "alternatives": ["letter", "report"], "features": ["text_content"], "reasoning": "General document with text content", } def _extract_dates(text): """Extract date patterns from text.""" # Common date patterns date_patterns = [ r"\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b", # MM/DD/YYYY or DD/MM/YYYY r"\b\d{4}[/-]\d{1,2}[/-]\d{1,2}\b", # YYYY/MM/DD r"\b(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*\s+\d{1,2},?\s+\d{2,4}\b", # Month DD, YYYY r"\b\d{1,2}\s+(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*\s+\d{2,4}\b", # DD Month YYYY ] dates = [] for pattern in date_patterns: matches = re.findall(pattern, text, re.IGNORECASE) dates.extend(matches) return list(set(dates)) # Remove duplicates def _extract_names(text): """Extract potential names from text.""" # Simple name extraction - capitalized words words = re.findall(r"\b[A-Z][a-z]+\b", text) # Filter out common non-names common_words = { "The", "And", "For", "Are", "But", "Not", "You", "All", "Can", "Had", "Her", "Was", "One", "Our", "Out", "Day", "Get", "Has", "Him", "His", "How", "Its", "May", "New", "Now", "Old", "See", "Two", "Way", "Who", "Boy", "Did", "Let", "Put", "Say", "She", "Too", "Use", } names = [word for word in words if word not in common_words and len(word) > 2] return list(set(names))[:10] # Limit to 10 most common def _extract_numbers_and_amounts(text): """Extract numbers and monetary amounts.""" # Document numbers (patterns like "INV-123", "PO#456") doc_numbers = re.findall(r"\b(?:INV|PO|ORD|REF|DOC)[\s#-]*\d+\b", text, re.IGNORECASE) # Monetary amounts ($123.45, €99.99, 123.45 USD) amounts = re.findall( r"\b(?:\$|€|£|¥)\s*\d+(?:\.\d{2})?\b|\b\d+(?:\.\d{2})?\s*(?:USD|EUR|GBP|JPY|dollars?|euros?|pounds?)\b", text, re.IGNORECASE, ) # General numbers numbers = re.findall(r"\b\d{3,}\b", text) # Numbers with 3+ digits return { "document_numbers": list(set(doc_numbers)), "amounts": list(set(amounts)), "numbers": list(set(numbers))[:20], # Limit large numbers } def _extract_addresses(text): """Extract potential addresses from text.""" # Simple address pattern matching address_indicators = [ "street", "avenue", "road", "drive", "lane", "way", "place", "court", ] lines = text.split("\n") addresses = [] for line in lines: line_lower = line.lower().strip() if any(indicator in line_lower for indicator in address_indicators): if len(line.strip()) > 10: # Reasonable address length addresses.append(line.strip()) return addresses[:5] # Limit to 5 addresses

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sandraschi/ocr-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

_analysis.py•32.1 KiB