#!/usr/bin/env python3
"""
Quick MongoDB Data Checker
"""
import os
from dotenv import load_dotenv
import pymongo
from datetime import datetime
# Load environment variables
load_dotenv()
def check_mongodb_data():
"""Check what data is in MongoDB."""
try:
# Connect to MongoDB
client = pymongo.MongoClient(os.getenv('MONGO_URI'))
db = client[os.getenv('MONGO_DB_NAME', 'blackhole_db')]
collection = db[os.getenv('MONGO_COLLECTION_NAME', 'agent_outputs')]
print("π MongoDB Data Analysis")
print("=" * 40)
# Collections
collections = db.list_collection_names()
print(f"π Collections: {collections}")
# Agent outputs count
total_docs = collection.count_documents({})
print(f"π Total agent outputs: {total_docs}")
# Agent types
agents = list(collection.distinct('agent'))
print(f"π€ Agent types: {agents}")
# Recent documents
recent = list(collection.find().sort('_id', -1).limit(5))
print(f"\nπ Recent documents ({len(recent)}):")
for i, doc in enumerate(recent, 1):
agent = doc.get('agent', 'unknown')
timestamp = doc.get('timestamp', doc.get('created_at', 'unknown'))
has_output = 'output' in doc
print(f" {i}. Agent: {agent}, Has output: {has_output}, Time: {timestamp}")
# Check for specific agent data
doc_agents = collection.count_documents({'agent': {'$regex': 'document', '$options': 'i'}})
pdf_agents = collection.count_documents({'agent': {'$regex': 'pdf', '$options': 'i'}})
ocr_agents = collection.count_documents({'agent': {'$regex': 'ocr', '$options': 'i'}})
print(f"\nπ― Specific agent data:")
print(f" π Document agents: {doc_agents}")
print(f" π PDF agents: {pdf_agents}")
print(f" πΌοΈ OCR agents: {ocr_agents}")
# Sample document structure
sample = collection.find_one()
if sample:
print(f"\nπ Sample document structure:")
print(f" Keys: {list(sample.keys())}")
if 'output' in sample:
output_keys = list(sample['output'].keys()) if isinstance(sample['output'], dict) else ['non-dict output']
print(f" Output keys: {output_keys}")
client.close()
print("\nβ
MongoDB is connected and contains data!")
return True
except Exception as e:
print(f"β Error: {e}")
return False
if __name__ == "__main__":
check_mongodb_data()