import time
import csv
import os
import tempfile
import sys
import shutil
# Add src to path
sys.path.append(os.path.abspath("src"))
from data_handler import DataManager
import data_handler
def create_large_csv(filepath, total_rows=10000, target_index=5000):
with open(filepath, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(["id", "text", "media_path", "model_used", "status", "created_at", "scheduled_time", "notes", "is_retweet", "original_tweet_id"])
for i in range(total_rows):
status = "posted"
if i == target_index:
status = "pending"
writer.writerow([
f"id_{i}",
f"text_{i}",
"",
"manual",
status,
"2023-01-01T00:00:00",
"",
"",
False,
""
])
def simulated_get_first_pending_draft(filepath):
with open(filepath, 'r', newline='', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
if row.get("status") == "pending":
return row
return None
def main():
test_dir = tempfile.mkdtemp()
drafts_file = os.path.join(test_dir, "drafts.csv")
try:
print("Generating 10,000 rows CSV (target at 5000)...")
create_large_csv(drafts_file, total_rows=10000, target_index=5000)
# Patch DataManager to use our temp file
original_drafts_file = data_handler.DRAFTS_FILE
data_handler.DRAFTS_FILE = drafts_file
dm = DataManager()
# Benchmark 1: Current Approach (list_pending_drafts()[0])
print("\nBenchmarking Current Approach (list_pending_drafts)...")
start_time = time.time()
drafts = dm.list_pending_drafts()
if drafts:
_ = drafts[0]
end_time = time.time()
current_time = end_time - start_time
print(f"Time: {current_time:.6f} seconds")
# Benchmark 2: Proposed Approach (streaming)
print("\nBenchmarking Proposed Approach (streaming)...")
start_time = time.time()
# Using the actual implementation
_ = dm.get_first_pending_draft()
end_time = time.time()
proposed_time = end_time - start_time
print(f"Time: {proposed_time:.6f} seconds")
if current_time > 0:
speedup = current_time / proposed_time
print(f"\nSpeedup: {speedup:.2f}x")
finally:
data_handler.DRAFTS_FILE = original_drafts_file
shutil.rmtree(test_dir)
if __name__ == "__main__":
main()