Medical GraphRAG Assistant

MIT License

medical-graphrag-assistant
config

fhir_graphrag_config.yaml•7.97 kB

# BYOT Configuration Schema for FHIR GraphRAG # This file defines the complete YAML configuration structure for rag-templates # to operate in BYOT (Bring Your Own Table) mode on FHIR native tables. # Database Connection Settings database: iris: host: "localhost" # IRIS database host port: 32782 # IRIS database port namespace: "DEMO" # IRIS namespace username: "_SYSTEM" # Database username password: "ISCDEMO" # Database password (use environment variable in production) connection_timeout: 30 # Connection timeout in seconds pool_size: 5 # Connection pool size max_overflow: 10 # Maximum overflow connections # BYOT Storage Configuration storage: iris: # Custom table name for BYOT mode table_name: "HSFHIR_X0001_R.Rsrc" # Column mapping: Maps FHIR table columns to Document model column_mapping: id_column: "ID" # Primary key column text_column: "ResourceString" # Content column (FHIR JSON with hex-encoded notes) metadata_columns: # Additional metadata columns - "ResourceType" - "ResourceId" - "Compartments" - "Deleted" # BYOT mode flags zero_copy: true # Enable zero-copy mode (no data migration) preserve_schema: true # Read-only access (no schema modifications) # Security and validation validate_table_name: true # Validate table name against SQL injection allowed_schemas: # Whitelist of allowed schema names - "HSFHIR_X0001_R" # ✅ CloudConfiguration API vector settings (iris-vector-rag v0.5.4+) # These settings are read by SchemaManager via CloudConfiguration vector_dimension: 384 # Vector dimensionality for embeddings distance_metric: "COSINE" # Distance metric for similarity search index_type: "HNSW" # Vector index type # Vector Storage Configuration (existing vectors) vector_storage: table_name: "VectorSearch.FHIRResourceVectors" reference_column: "ResourceID" # FK to FHIR native table vector_column: "Vector" # 384-dimensional vector column model_column: "VectorModel" # Embedding model name dimension: 384 # Vector dimensionality (legacy, kept for compatibility) # Embedding Configuration embeddings: model: "sentence-transformers/all-MiniLM-L6-v2" dimension: 384 # Legacy setting (CloudConfiguration uses storage.vector_dimension) batch_size: 32 # Batch size for embedding generation normalize: true # Normalize embeddings to unit length device: "cpu" # Device for embedding generation (cpu/cuda) # GraphRAG Pipeline Configuration pipelines: graphrag: # Entity extraction settings entity_extraction_enabled: true # Medical entity types to extract entity_types: - "SYMPTOM" # Patient symptoms (cough, fever, chest pain) - "CONDITION" # Medical conditions (diabetes, hypertension) - "MEDICATION" # Prescribed medications (aspirin, metformin) - "PROCEDURE" # Medical procedures (blood test, x-ray) - "BODY_PART" # Anatomical locations (chest, lungs, heart) - "TEMPORAL" # Time references (2023-01-15, 3 days ago) # Entity relationship types relationship_types: - "TREATS" # medication TREATS condition - "CAUSES" # condition CAUSES symptom - "LOCATED_IN" # symptom LOCATED_IN body_part - "CO_OCCURS_WITH" # symptom CO_OCCURS_WITH symptom - "PRECEDES" # event PRECEDES event (temporal) # Extraction confidence thresholds min_entity_confidence: 0.7 # Minimum confidence to keep entity min_relationship_confidence: 0.6 # Minimum confidence to keep relationship # Graph traversal settings default_top_k: 10 # Default number of results to return max_depth: 2 # Maximum graph traversal depth max_entities: 50 # Maximum entities to extract per document max_relationships: 100 # Maximum relationships to extract per document # Multi-modal search weights vector_k: 30 # Top K from vector search text_k: 30 # Top K from text search graph_k: 10 # Top K from graph traversal # RRF fusion parameters rrf_k: 60 # RRF constant (higher = more weight to top results) fusion_method: "rrf" # Reciprocal Rank Fusion algorithm # Performance settings batch_size: 10 # Number of documents to process in batch parallel_extraction: true # Enable parallel entity extraction max_workers: 4 # Number of parallel workers # LLM Configuration (optional - for enhanced entity extraction) llm: provider: "ollama" # LLM provider (ollama, openai, etc.) model: "gemma3:4b" # LLM model for entity extraction base_url: "http://localhost:11434" # Ollama API endpoint temperature: 0.0 # Temperature for entity extraction (deterministic) max_tokens: 500 # Maximum tokens for entity extraction timeout: 30 # LLM request timeout in seconds fallback_to_regex: true # Fallback to regex if LLM unavailable # Logging Configuration logging: level: "INFO" # Log level (DEBUG, INFO, WARNING, ERROR) format: "json" # Log format (json, text) file: "logs/fhir_graphrag.log" # Log file path rotation: "daily" # Log rotation (daily, size) max_bytes: 10485760 # Maximum log file size (10 MB) backup_count: 7 # Number of backup log files # Monitoring and Observability monitoring: enabled: true metrics: - "entity_extraction_time" # Time to extract entities per document - "entity_extraction_count" # Number of entities extracted - "relationship_extraction_count" # Number of relationships extracted - "query_latency" # Query execution time - "graph_traversal_depth" # Actual graph traversal depth used # Performance targets for alerts performance_targets: entity_extraction_time_ms: 2000 # < 2 seconds per document query_latency_ms: 1000 # < 1 second query response knowledge_graph_build_time_ms: 300000 # < 5 minutes for 51 documents # Feature Flags features: entity_normalization: false # Entity text normalization (future enhancement) temporal_analysis: false # Temporal relationship analysis (future) entity_feedback: false # Manual entity correction (future) query_history: false # Query performance tracking (future) # Environment-Specific Overrides # These can be overridden by environment variables: # - FHIR_GRAPHRAG_DB_HOST # - FHIR_GRAPHRAG_DB_PORT # - FHIR_GRAPHRAG_DB_NAMESPACE # - FHIR_GRAPHRAG_DB_USERNAME # - FHIR_GRAPHRAG_DB_PASSWORD # - FHIR_GRAPHRAG_LLM_MODEL # - FHIR_GRAPHRAG_LOG_LEVEL # Configuration Validation Rules validation: required_fields: - "database.iris.host" - "database.iris.port" - "database.iris.namespace" - "storage.iris.table_name" - "storage.iris.column_mapping.id_column" - "storage.iris.column_mapping.text_column" field_types: "database.iris.port": "integer" "embeddings.dimension": "integer" "pipelines.graphrag.min_entity_confidence": "float" "pipelines.graphrag.entity_extraction_enabled": "boolean" range_constraints: "pipelines.graphrag.min_entity_confidence": min: 0.0 max: 1.0 "pipelines.graphrag.min_relationship_confidence": min: 0.0 max: 1.0 "pipelines.graphrag.max_depth": min: 1 max: 5

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/isc-tdyar/medical-graphrag-assistant'

If you have feedback or need assistance with the MCP directory API, please join our Discord server