Skip to main content
Glama
export_deberta_onnx.py3.28 kB
#!/usr/bin/env python3 """ Export NVIDIA DeBERTa quality classifier to ONNX format. This script exports the NVIDIA quality-classifier-deberta model from HuggingFace to ONNX format for local inference. Usage: python scripts/quality/export_deberta_onnx.py The exported model will be cached at: ~/.cache/mcp_memory/onnx_models/nvidia-quality-classifier-deberta/ """ import sys import logging from pathlib import Path # Add project to path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from mcp_memory_service.quality.onnx_ranker import get_onnx_ranker_model logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) def main(): """Export NVIDIA DeBERTa to ONNX format.""" print("=" * 80) print("NVIDIA DeBERTa Quality Classifier - ONNX Export") print("=" * 80) print() logger.info("Initializing DeBERTa model (this will trigger ONNX export if needed)...") try: # Create model instance - will automatically export to ONNX if not present model = get_onnx_ranker_model( model_name='nvidia-quality-classifier-deberta', device='cpu' # Use CPU for export (GPU-specific optimizations happen at runtime) ) if model is None: logger.error("Failed to create DeBERTa model") logger.error("Check that you have transformers and torch installed:") logger.error(" pip install transformers torch onnx onnxruntime") return 1 # Verify model works with a test inference logger.info("Testing model with sample input...") test_content = "This is a high-quality, comprehensive guide with detailed examples and best practices." test_score = model.score_quality("", test_content) # Empty query for classifier logger.info(f"Test inference successful! Score: {test_score:.3f}") print() print("=" * 80) print("✅ SUCCESS!") print("=" * 80) print() print(f"Model cached at: {model.MODEL_PATH}") print(f"Model type: {model.model_config['type']}") print(f"Model size: {model.model_config['size_mb']}MB") print() print("The model is now ready for use.") print("It will automatically load from cache on subsequent uses.") print() print("Next steps:") print(" 1. Update .env: export MCP_QUALITY_LOCAL_MODEL=nvidia-quality-classifier-deberta") print(" 2. Restart MCP services") print(" 3. Run bulk evaluation: python scripts/quality/bulk_evaluate_onnx.py") return 0 except Exception as e: logger.error(f"Export failed: {e}", exc_info=True) print() print("=" * 80) print("❌ EXPORT FAILED") print("=" * 80) print() print(f"Error: {e}") print() print("Troubleshooting:") print(" 1. Ensure dependencies are installed: pip install transformers torch onnx onnxruntime") print(" 2. Check internet connection (model downloads from HuggingFace)") print(" 3. Verify disk space (~500MB needed for model)") return 1 if __name__ == '__main__': sys.exit(main())

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/doobidoo/mcp-memory-service'

If you have feedback or need assistance with the MCP directory API, please join our Discord server