PDF Knowledgebase MCP Server

web_status_service.py•12.5 KiB

"""Web status service for system status and statistics.""" import logging import time from datetime import datetime, timezone from typing import Any, Dict from ...config import ServerConfig from ...vector_store import VectorStore from ..models.web_models import ConfigOverviewResponse, HealthCheckResponse, StatusResponse logger = logging.getLogger(__name__) class WebStatusService: """Service for system status and configuration information.""" def __init__( self, config: ServerConfig, vector_store: VectorStore, document_cache: Dict[str, Any], start_time: float, ): """Initialize the web status service. Args: config: Server configuration vector_store: Vector storage service document_cache: Document metadata cache start_time: Server start timestamp """ self.config = config self.vector_store = vector_store self.document_cache = document_cache self.start_time = start_time self._version = self._get_version() def _get_version(self) -> str: """Get application version from package info.""" try: # Try to read version from package metadata import importlib.metadata return importlib.metadata.version("pdfkb-mcp") except Exception: # Fallback version return "0.1.0" async def get_status(self) -> StatusResponse: """Get comprehensive system status. Returns: StatusResponse with system status information """ try: # Calculate uptime uptime = time.time() - self.start_time # Get vector store statistics documents_count = await self.vector_store.get_document_count() chunks_count = await self.vector_store.get_chunk_count() # Get storage statistics storage_stats = await self._get_storage_statistics() # Get processing statistics processing_stats = await self._get_processing_statistics() # Build configuration summary config_summary = { "embedding_model": self.config.embedding_model, "pdf_parser": self.config.pdf_parser, "pdf_chunker": self.config.pdf_chunker, "chunk_size": self.config.chunk_size, "chunk_overlap": self.config.chunk_overlap, "vector_search_k": self.config.vector_search_k, "reranker_enabled": self.config.enable_reranker, "reranker_model": self.config.reranker_model if self.config.enable_reranker else None, "web_enabled": self.config.web_enabled, "web_port": self.config.web_port, "web_host": self.config.web_host, } # Build system statistics statistics = { "uptime_seconds": uptime, "uptime_hours": uptime / 3600, "documents_per_hour": documents_count / (uptime / 3600) if uptime > 0 else 0, "avg_chunks_per_document": chunks_count / documents_count if documents_count > 0 else 0, **storage_stats, **processing_stats, } return StatusResponse( status="healthy", version=self._version, uptime=uptime, documents_count=documents_count, chunks_count=chunks_count, knowledgebase_path=str(self.config.knowledgebase_path), cache_dir=str(self.config.cache_dir), configuration=config_summary, statistics=statistics, ) except Exception as e: logger.error(f"Error getting system status: {e}") return StatusResponse( status="error", version=self._version, uptime=time.time() - self.start_time, documents_count=0, chunks_count=0, knowledgebase_path=str(self.config.knowledgebase_path), cache_dir=str(self.config.cache_dir), configuration={"error": str(e)}, statistics={"error": str(e)}, ) async def get_config_overview(self) -> ConfigOverviewResponse: """Get configuration overview. Returns: ConfigOverviewResponse with current configuration """ try: return ConfigOverviewResponse( embedding_model=self.config.embedding_model, pdf_parser=self.config.pdf_parser, pdf_chunker=self.config.pdf_chunker, chunk_size=self.config.chunk_size, chunk_overlap=self.config.chunk_overlap, vector_search_k=self.config.vector_search_k, reranker_enabled=self.config.enable_reranker, reranker_model=self.config.reranker_model if self.config.enable_reranker else None, web_enabled=self.config.web_enabled, web_port=self.config.web_port, web_host=self.config.web_host, supported_extensions=self.config.supported_extensions, ) except Exception as e: logger.error(f"Error getting configuration overview: {e}") raise async def get_health_check(self) -> HealthCheckResponse: """Get basic health check response. Returns: HealthCheckResponse with health status """ try: # Perform basic health checks health_status = "ok" # Check if vector store is responsive try: await self.vector_store.get_chunk_count() except Exception as e: logger.error(f"Vector store health check failed: {e}") health_status = "degraded" # Check if required directories exist if not self.config.knowledgebase_path.exists(): health_status = "degraded" if not self.config.cache_dir.exists(): health_status = "degraded" return HealthCheckResponse( status=health_status, timestamp=datetime.now(timezone.utc), version=self._version, ) except Exception as e: logger.error(f"Health check failed: {e}") return HealthCheckResponse( status="error", timestamp=datetime.now(timezone.utc), version=self._version, ) async def _get_storage_statistics(self) -> Dict[str, Any]: """Get storage-related statistics. Returns: Dictionary with storage statistics """ try: stats = {} # Get knowledgebase directory size kb_path = self.config.knowledgebase_path if kb_path.exists(): kb_size = sum(f.stat().st_size for f in kb_path.rglob("*") if f.is_file()) stats["knowledgebase_size_bytes"] = kb_size stats["knowledgebase_size_mb"] = round(kb_size / (1024 * 1024), 2) # Get cache directory size cache_path = self.config.cache_dir if cache_path.exists(): cache_size = sum(f.stat().st_size for f in cache_path.rglob("*") if f.is_file()) stats["cache_size_bytes"] = cache_size stats["cache_size_mb"] = round(cache_size / (1024 * 1024), 2) # Get file counts if kb_path.exists(): pdf_files = list(kb_path.rglob("*.pdf")) stats["pdf_file_count"] = len(pdf_files) stats["total_pdf_size_bytes"] = sum(f.stat().st_size for f in pdf_files) return stats except Exception as e: logger.error(f"Error getting storage statistics: {e}") return {"storage_error": str(e)} async def _get_processing_statistics(self) -> Dict[str, Any]: """Get processing-related statistics. Returns: Dictionary with processing statistics """ try: stats = {} # Count processing cache files processing_path = self.config.processing_path if processing_path.exists(): parsing_cache_files = list(processing_path.rglob("parsing_result.json")) chunking_cache_files = list(processing_path.rglob("chunking_result.json")) stats["cached_parsing_results"] = len(parsing_cache_files) stats["cached_chunking_results"] = len(chunking_cache_files) # Get document type distribution doc_types = {} for document in self.document_cache.values(): doc_type = document.metadata.get("document_type", "pdf") doc_types[doc_type] = doc_types.get(doc_type, 0) + 1 stats["document_types"] = doc_types # Get processing timestamps processing_times = [] for document in self.document_cache.values(): if "processing_timestamp" in document.metadata: try: timestamp = datetime.fromisoformat(document.metadata["processing_timestamp"]) processing_times.append(timestamp) except Exception: continue if processing_times: latest_processing = max(processing_times) stats["latest_processing"] = latest_processing.isoformat() stats["documents_processed_today"] = sum( 1 for t in processing_times if t.date() == datetime.now(timezone.utc).date() ) return stats except Exception as e: logger.error(f"Error getting processing statistics: {e}") return {"processing_error": str(e)} async def get_system_metrics(self) -> Dict[str, Any]: """Get detailed system metrics for monitoring. Returns: Dictionary with detailed system metrics """ try: import sys import psutil # System metrics cpu_percent = psutil.cpu_percent(interval=1) memory = psutil.virtual_memory() disk = psutil.disk_usage(str(self.config.knowledgebase_path.parent)) # Process metrics process = psutil.Process() process_memory = process.memory_info() metrics = { "timestamp": datetime.now(timezone.utc).isoformat(), "system": { "cpu_percent": cpu_percent, "memory_total_gb": round(memory.total / (1024**3), 2), "memory_available_gb": round(memory.available / (1024**3), 2), "memory_percent": memory.percent, "disk_total_gb": round(disk.total / (1024**3), 2), "disk_free_gb": round(disk.free / (1024**3), 2), "disk_percent": round((disk.used / disk.total) * 100, 2), }, "process": { "memory_rss_mb": round(process_memory.rss / (1024**2), 2), "memory_vms_mb": round(process_memory.vms / (1024**2), 2), "cpu_percent": process.cpu_percent(), "num_threads": process.num_threads(), "open_files": len(process.open_files()), }, "python": { "version": sys.version, "executable": sys.executable, }, "application": { "version": self._version, "uptime_seconds": time.time() - self.start_time, "documents_count": len(self.document_cache), "chunks_count": await self.vector_store.get_chunk_count(), }, } return metrics except ImportError: logger.warning("psutil not available, returning limited metrics") return { "timestamp": datetime.now(timezone.utc).isoformat(), "application": { "version": self._version, "uptime_seconds": time.time() - self.start_time, "documents_count": len(self.document_cache), }, "note": "Install psutil for detailed system metrics", } except Exception as e: logger.error(f"Error getting system metrics: {e}") return {"error": str(e)}

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/juanqui/pdfkb-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

web_status_service.py•12.5 KiB