#!/usr/bin/env python
"""
Fix embedding dimension mismatch utility.
This script helps resolve issues where the database schema has a different
embedding dimension than the configured embedding provider.
"""
import logging
import asyncio
import sys
from pathlib import Path
from typing import Dict, Any
# Setup basic logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
async def fix_dimension_mismatch(config_path: str = None) -> bool:
"""
Fix embedding dimension mismatch between database and provider.
Args:
config_path: Path to config file (defaults to ~/.config/zotero-mcp/config.json)
Returns:
True if successful, False otherwise
"""
try:
from zotero_mcp.utils import get_config
from zotero_mcp.embedding_service import create_embedding_provider
from zotero_mcp.db_schema import create_database_manager
# Load configuration
if config_path:
config = get_config(config_path)
else:
config = get_config()
if not config:
logger.error("Could not load configuration")
return False
# Get embedding provider to detect dimension
embedding_config = config.get("embedding", {})
embedding_provider = await create_embedding_provider(embedding_config)
detected_dimension = embedding_provider.get_embedding_dimension()
logger.info(f"Detected embedding dimension: {detected_dimension}")
logger.info(f"Provider: {embedding_provider.get_provider_name()}")
logger.info(f"Model: {embedding_provider.get_model_name()}")
# Get database manager
database_config = config.get("database", {})
db_manager = create_database_manager(database_config, embedding_dimension=detected_dimension)
# Check current table dimension
current_dimension = db_manager.get_current_embedding_dimension()
stored_dimension = db_manager.get_stored_embedding_dimension()
logger.info(f"Current table dimension: {current_dimension}")
logger.info(f"Stored config dimension: {stored_dimension}")
if current_dimension == detected_dimension:
logger.info("✅ No dimension mismatch found!")
return True
# Ask for confirmation
print(f"\n⚠️ DIMENSION MISMATCH DETECTED:")
print(f" Database table expects: {current_dimension}D vectors")
print(f" Your embedding model produces: {detected_dimension}D vectors")
print(f"\n🔧 PROPOSED FIX:")
print(f" - Update database schema to use {detected_dimension}D vectors")
print(f" - Clear existing embeddings (they'll be incompatible)")
print(f" - You'll need to run 'zotero-mcp update-db --force-rebuild' after this")
response = input(f"\nProceed with dimension change? (yes/no): ").strip().lower()
if response not in ['yes', 'y']:
logger.info("Operation cancelled by user")
return False
# Apply the fix
logger.info("Applying dimension fix...")
db_manager.alter_embedding_dimension(detected_dimension)
logger.info("✅ Dimension mismatch fixed!")
logger.info("📋 Next steps:")
logger.info(" 1. Run: zotero-mcp update-db --force-rebuild")
logger.info(" 2. Wait for embeddings to be regenerated")
logger.info(" 3. Test semantic search functionality")
return True
except Exception as e:
logger.error(f"Error fixing dimension mismatch: {e}")
return False
def main():
"""CLI entry point."""
config_path = None
if len(sys.argv) > 1:
config_path = sys.argv[1]
print("🔧 Zotero MCP Embedding Dimension Fix Utility")
print("=" * 50)
success = asyncio.run(fix_dimension_mismatch(config_path))
if success:
print("\n✅ Fix completed successfully!")
sys.exit(0)
else:
print("\n❌ Fix failed!")
sys.exit(1)
if __name__ == "__main__":
main()