"""Check for duplicates in the database."""
import asyncio
import aiosqlite
from pathlib import Path
# Calculate DB path relative to this file
_SCRIPT_DIR = Path(__file__).parent.resolve()
_DB_PATH = str(_SCRIPT_DIR.parent / "db" / "statements.db")
async def check_duplicates():
"""Check for duplicate records in the database."""
db_path = _DB_PATH
async with aiosqlite.connect(db_path) as db:
print("Checking for duplicates...\n")
# Check for duplicate holdings (same statement_id + symbol)
print("1. Checking Holdings for duplicates:")
async with db.execute("""
SELECT statement_id, symbol, COUNT(*) as count
FROM holdings
GROUP BY statement_id, symbol
HAVING count > 1
""") as cursor:
duplicates = await cursor.fetchall()
if duplicates:
print(f" ❌ Found {len(duplicates)} duplicate holding groups:")
for row in duplicates[:10]:
print(f" - Statement {row[0]}, Symbol {row[1]}: {row[2]} copies")
if len(duplicates) > 10:
print(f" ... and {len(duplicates) - 10} more")
else:
print(" ✅ No duplicate holdings")
# Check total holdings
async with db.execute("SELECT COUNT(*) FROM holdings") as cursor:
total = (await cursor.fetchone())[0]
print(f" Total holdings: {total}")
# Check for duplicate transactions
print("\n2. Checking Transactions for duplicates:")
async with db.execute("""
SELECT transaction_date, symbol, description, activity_type, net_amount, COUNT(*) as count
FROM transactions
WHERE symbol IS NOT NULL
GROUP BY transaction_date, symbol, description, activity_type, quantity, price, net_amount, currency
HAVING count > 1
""") as cursor:
duplicates = await cursor.fetchall()
if duplicates:
print(f" ⚠️ Found {len(duplicates)} potential duplicate transaction groups:")
for row in duplicates[:10]:
print(f" - {row[0]} {row[3]} {row[1]} - {row[2]}: {row[5]} copies")
if len(duplicates) > 10:
print(f" ... and {len(duplicates) - 10} more")
else:
print(" ✅ No duplicate transactions")
# Check total transactions
async with db.execute("SELECT COUNT(*) FROM transactions") as cursor:
total = (await cursor.fetchone())[0]
print(f" Total transactions: {total}")
# Check for duplicate benchmarks
print("\n3. Checking Benchmarks for duplicates:")
async with db.execute("""
SELECT statement_id, name, COUNT(*) as count
FROM benchmarks
GROUP BY statement_id, name
HAVING count > 1
""") as cursor:
duplicates = await cursor.fetchall()
if duplicates:
print(f" ❌ Found {len(duplicates)} duplicate benchmark groups:")
for row in duplicates[:10]:
print(f" - Statement {row[0]}, Benchmark {row[1]}: {row[2]} copies")
else:
print(" ✅ No duplicate benchmarks")
# Check total benchmarks
async with db.execute("SELECT COUNT(*) FROM benchmarks") as cursor:
total = (await cursor.fetchone())[0]
print(f" Total benchmarks: {total}")
# Check statements
print("\n4. Statement counts:")
async with db.execute("SELECT COUNT(*) FROM statements") as cursor:
total = (await cursor.fetchone())[0]
print(f" Total statements: {total}")
if __name__ == "__main__":
asyncio.run(check_duplicates())