"""Timeline parsing utilities for Zoho CRM screenshots."""
import json
import re
from typing import List, Dict, Any, Optional
from pathlib import Path
from anthropic import Anthropic
class TimelineParser:
"""Parses Zoho timeline screenshots using Claude's vision API."""
def __init__(self, api_key: Optional[str] = None):
"""Initialize parser with Anthropic API key.
Args:
api_key: Anthropic API key (if None, reads from environment)
"""
self.client = Anthropic(api_key=api_key) if api_key else Anthropic()
def extract_timeline_from_images(self, image_paths: List[Path]) -> Dict[str, Any]:
"""Extract timeline data from multiple screenshot images.
Args:
image_paths: List of paths to screenshot images
Returns:
Dictionary containing parsed timeline data
"""
# Sort images by filename (which contains timestamp)
sorted_paths = sorted(image_paths, key=lambda p: p.name)
# Build message content with all images
content = [
{
"type": "text",
"text": """Analyze these Zoho CRM timeline screenshots and extract the event data.
The screenshots show a timeline history from newest (top) to oldest (bottom). I need you to:
1. Read all events from all screenshots
2. Order them chronologically (oldest first)
3. Extract for each event:
- Timestamp
- Event type (Field Update, Function Call, Blueprint Entry/Exit, etc.)
- Fields changed (with before/after values)
- Workflow rules triggered
- Functions called
- Any other relevant details
Return a JSON object with this structure:
{
"lead_timeline": {
"date": "YYYY-MM-DD",
"events": [
{
"timestamp": "HH:MM AM/PM",
"event_type": "...",
"action": "...",
"fields": [...],
"triggered_by": "...",
"workflow_rule": "...",
"function_name": "...",
...
}
]
}
}
Be thorough and capture all details from the timeline."""
}
]
# Add each image
for img_path in sorted_paths:
with open(img_path, "rb") as f:
import base64
image_data = base64.standard_b64encode(f.read()).decode("utf-8")
content.append({
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": image_data,
}
})
# Call Claude API
message = self.client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=16000,
messages=[
{
"role": "user",
"content": content
}
]
)
# Extract JSON from response
response_text = message.content[0].text
# Try to extract JSON from markdown code blocks or raw text
json_match = re.search(r'```json\s*(.*?)\s*```', response_text, re.DOTALL)
if json_match:
json_str = json_match.group(1)
else:
# Try to find JSON object directly
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
if json_match:
json_str = json_match.group(0)
else:
json_str = response_text
try:
timeline_data = json.loads(json_str)
except json.JSONDecodeError as e:
# Return error with raw response
timeline_data = {
"error": f"Failed to parse JSON: {e}",
"raw_response": response_text
}
return timeline_data
def extract_workflows_and_functions(self, timeline_data: Dict[str, Any]) -> Dict[str, Any]:
"""Extract unique workflows and functions from timeline data.
Args:
timeline_data: Parsed timeline data
Returns:
Dictionary with workflows and functions lists
"""
workflows = set()
functions = set()
events = timeline_data.get("lead_timeline", {}).get("events", [])
for event in events:
# Extract workflow rules
if "workflow_rule" in event and event["workflow_rule"]:
workflows.add(event["workflow_rule"])
# Extract functions
if event.get("event_type") == "Function Call" and "function_name" in event:
functions.add(event["function_name"])
elif "function_name" in event and event.get("triggered_by") == "Function":
functions.add(event["function_name"])
return {
"workflow_rules": {
"count": len(workflows),
"list": sorted(list(workflows))
},
"functions": {
"count": len(functions),
"list": sorted(list(functions))
}
}
def generate_report(self, timeline_data: Dict[str, Any]) -> str:
"""Generate a markdown report from timeline data.
Args:
timeline_data: Parsed timeline data
Returns:
Markdown formatted report
"""
workflows_functions = self.extract_workflows_and_functions(timeline_data)
report = "# Zoho Timeline Analysis Report\n\n"
# Summary
events = timeline_data.get("lead_timeline", {}).get("events", [])
report += f"**Total Events:** {len(events)}\n\n"
# Workflows
report += "## Workflow Rules Triggered\n\n"
for i, wf in enumerate(workflows_functions["workflow_rules"]["list"], 1):
report += f"{i}. {wf}\n"
report += f"\n**Total:** {workflows_functions['workflow_rules']['count']}\n\n"
# Functions
report += "## Functions Called\n\n"
for i, fn in enumerate(workflows_functions["functions"]["list"], 1):
report += f"{i}. {fn}\n"
report += f"\n**Total:** {workflows_functions['functions']['count']}\n\n"
return report