#!/usr/bin/env python3
"""
Create Full-Text Index for MP Names in Neo4j
This script creates a full-text index on MP nodes to improve search performance.
The index covers name, given_name, and family_name properties.
Usage:
python scripts/create_mp_fulltext_index.py
Environment variables:
NEO4J_URI: Neo4j connection URI (default: bolt://localhost:7687)
NEO4J_USERNAME: Neo4j username (default: neo4j)
NEO4J_PASSWORD: Neo4j password (required)
"""
import os
import sys
from pathlib import Path
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from fedmcp_pipeline.utils.neo4j_client import Neo4jClient
def create_fulltext_index():
"""Create full-text index for MP name searches."""
# Get connection details from environment
uri = os.getenv('NEO4J_URI', 'bolt://localhost:7687')
username = os.getenv('NEO4J_USERNAME', 'neo4j')
password = os.getenv('NEO4J_PASSWORD')
if not password:
print("Error: NEO4J_PASSWORD environment variable is required")
sys.exit(1)
print(f"Connecting to Neo4j at {uri}...")
client = Neo4jClient(uri=uri, user=username, password=password)
# Check if index already exists
print("Checking for existing indexes...")
result = client.run_query("""
SHOW FULLTEXT INDEXES
YIELD name
WHERE name = 'mp_name_search'
RETURN name
""")
if result:
print("Full-text index 'mp_name_search' already exists.")
print("To recreate, run: DROP INDEX mp_name_search")
return
# Create the full-text index
print("Creating full-text index 'mp_name_search' on MP nodes...")
try:
client.run_query("""
CREATE FULLTEXT INDEX mp_name_search IF NOT EXISTS
FOR (mp:MP) ON EACH [mp.name, mp.given_name, mp.family_name]
""")
print("Full-text index created successfully!")
except Exception as e:
print(f"Error creating index: {e}")
sys.exit(1)
# Verify the index was created
print("\nVerifying index...")
result = client.run_query("""
SHOW FULLTEXT INDEXES
YIELD name, state, populationPercent
WHERE name = 'mp_name_search'
RETURN name, state, populationPercent
""")
if result:
index = result[0]
print(f"Index '{index['name']}' created - State: {index['state']}, Population: {index['populationPercent']}%")
# Test the index with a sample query
print("\nTesting index with sample query for 'Pierre'...")
test_result = client.run_query("""
CALL db.index.fulltext.queryNodes('mp_name_search', 'Pierre*')
YIELD node, score
RETURN node.name AS name, score
LIMIT 5
""")
if test_result:
print("Sample results:")
for row in test_result:
print(f" - {row['name']} (score: {row['score']:.2f})")
else:
print("No results found (index may still be populating)")
print("\nDone!")
client.close()
if __name__ == '__main__':
create_fulltext_index()