RewindMCP

ocr_cli.py•24.6 KiB

#!/usr/bin/env python """ ocr_cli.py - command line interface for retrieving screen OCR data from rewinddb. call flow: 1. parse command line arguments using argparse 2. connect to rewinddb database 3. determine time range based on arguments: - if --relative is provided, calculate relative time range from now - if --from and --to are provided, use specific time range 4. query screen OCR data for the specified time range 5. optionally filter by application if --app is specified 6. deduplicate OCR data to remove entries with very similar text content 7. format and display results (with conditional app name display) 8. close database connection the cli supports two main query modes: - relative time queries (e.g., "1 hour", "5h", "30m", "2d", "1w") - specific time range queries with --from and --to timestamps additional features: - list all applications that have OCR data with --list-apps - filter OCR data by specific application with --app - automatic deduplication of similar text content (keeps most recent entries) - improved display format when filtering by app (hides redundant app names) examples: python ocr_cli.py --relative "1 hour" python ocr_cli.py --relative "5 hours" python ocr_cli.py --relative "5h" python ocr_cli.py --relative "30m" python ocr_cli.py --relative "2d" python ocr_cli.py --relative "1w" python ocr_cli.py --from "2023-05-11 13:00:00" --to "2023-05-11 17:00:00" python ocr_cli.py --list-apps python ocr_cli.py --relative "1 day" --app "com.apple.Safari" """ import argparse import datetime from datetime import timezone import re import sys import hashlib import time # Removed SequenceMatcher import - now using fast fingerprint approach # Removed defaultdict import - no longer needed with fingerprint approach import rewinddb import rewinddb.utils def convert_to_local_time(dt): """convert a utc datetime to local time. args: dt: datetime object in utc returns: datetime object in local time """ if dt is None: return None # if datetime has no timezone info, assume it's utc if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) # convert to local time return dt.astimezone() def parse_relative_time(time_str): """parse a relative time string into timedelta components. args: time_str: string like "1 hour", "5 hours", "30 minutes" or short form "5h", "3m", "10d", "2w" returns: dict with keys for days, hours, minutes, seconds raises: ValueError: if the time string format is invalid """ time_str = time_str.lower().strip() time_components = {"days": 0, "hours": 0, "minutes": 0, "seconds": 0} # short form pattern (e.g., "5h", "3m", "10d", "2w") short_patterns = { r"^(\d+)w$": lambda x: {"days": int(x) * 7}, r"^(\d+)d$": lambda x: {"days": int(x)}, r"^(\d+)h$": lambda x: {"hours": int(x)}, r"^(\d+)m$": lambda x: {"minutes": int(x)}, r"^(\d+)s$": lambda x: {"seconds": int(x)} } # check for short form patterns first for pattern, handler in short_patterns.items(): match = re.search(pattern, time_str) if match: component_values = handler(match.group(1)) for component, value in component_values.items(): time_components[component] = value return time_components # long form patterns patterns = { r"(\d+)\s*(?:day|days)": "days", r"(\d+)\s*(?:hour|hours|hr|hrs)": "hours", r"(\d+)\s*(?:minute|minutes|min|mins)": "minutes", r"(\d+)\s*(?:second|seconds|sec|secs)": "seconds", r"(\d+)\s*(?:week|weeks)": "weeks" } # try to match each pattern found_match = False for pattern, component in patterns.items(): match = re.search(pattern, time_str) if match: if component == "weeks": time_components["days"] += int(match.group(1)) * 7 else: time_components[component] = int(match.group(1)) found_match = True if not found_match: raise ValueError(f"invalid time format: {time_str}. use format like '1 hour', '5h', '30m', '2d', '1w'.") return time_components def get_ocr_data_relative(db, time_str): """get screen OCR data from a relative time period. args: db: rewinddb instance time_str: relative time string (e.g., "1 hour", "5 hours") returns: list of OCR data dictionaries """ try: time_components = parse_relative_time(time_str) return db.get_screen_ocr_text_relative(**time_components) except ValueError as e: print(f"error: {e}", file=sys.stderr) sys.exit(1) def get_ocr_data_absolute(db, from_time_str, to_time_str): """get screen OCR data from a specific time range. args: db: rewinddb instance from_time_str: start time string in format "YYYY-MM-DD HH:MM:SS", "YYYY-MM-DD HH:MM", "YYYY-MM-DD", "HH:MM:SS", or "HH:MM" to_time_str: end time string in format "YYYY-MM-DD HH:MM:SS", "YYYY-MM-DD HH:MM", "YYYY-MM-DD", "HH:MM:SS", or "HH:MM" returns: list of OCR data dictionaries """ def normalize_time_string(time_str, is_end_time=False): """normalize time string to handle various formats.""" # check if time_str is time-only format (HH:MM or HH:MM:SS) if len(time_str) <= 8 and ':' in time_str: today = datetime.datetime.now().strftime("%Y-%m-%d") # if it's HH:MM format, add :00 for seconds if time_str.count(':') == 1: time_str = f"{time_str}:00" time_str = f"{today} {time_str}" # check if it's date-only format (YYYY-MM-DD) elif len(time_str) == 10 and time_str.count('-') == 2: if is_end_time: time_str = f"{time_str} 23:59:59" else: time_str = f"{time_str} 00:00:00" # check if it's date with HH:MM format elif ' ' in time_str and time_str.split(' ')[1].count(':') == 1: time_str = f"{time_str}:00" return time_str try: # get local timezone for proper conversion local_tz = datetime.datetime.now().astimezone().tzinfo # normalize time strings to handle various formats from_time_str = normalize_time_string(from_time_str, is_end_time=False) to_time_str = normalize_time_string(to_time_str, is_end_time=True) # parse as naive datetime first from_time_naive = datetime.datetime.strptime(from_time_str, "%Y-%m-%d %H:%M:%S") to_time_naive = datetime.datetime.strptime(to_time_str, "%Y-%m-%d %H:%M:%S") # add local timezone info and convert to UTC for database query from_time = from_time_naive.replace(tzinfo=local_tz).astimezone(timezone.utc) to_time = to_time_naive.replace(tzinfo=local_tz).astimezone(timezone.utc) return db.get_screen_ocr_text_absolute(from_time, to_time) except ValueError as e: print(f"error: invalid time format. use format 'YYYY-MM-DD HH:MM:SS', 'YYYY-MM-DD HH:MM', 'YYYY-MM-DD', 'HH:MM:SS', or 'HH:MM'.", file=sys.stderr) sys.exit(1) def get_applications_with_ocr_data(db, time_str=None, from_time_str=None, to_time_str=None): """get list of applications that have OCR data. args: db: rewinddb instance time_str: optional relative time string (e.g., "1 week") from_time_str: optional start time string to_time_str: optional end time string returns: sorted list of unique application names """ # get OCR data for the specified time range if time_str: ocr_data = get_ocr_data_relative(db, time_str) elif from_time_str and to_time_str: ocr_data = get_ocr_data_absolute(db, from_time_str, to_time_str) else: # default to last week if no time range specified ocr_data = get_ocr_data_relative(db, "1 week") # extract unique application names applications = set() for item in ocr_data: app = item.get('application') if app: applications.add(app) return sorted(list(applications)) def filter_ocr_data_by_app(ocr_data, app_name): """filter OCR data to only include entries from a specific application. args: ocr_data: list of OCR data dictionaries app_name: application name to filter by returns: filtered list of OCR data dictionaries """ return [item for item in ocr_data if item.get('application') == app_name] def normalize_text_for_similarity(text): """normalize text for similarity comparison. args: text: input text string returns: normalized text string suitable for similarity matching """ if not text: return "" # convert to lowercase, remove extra whitespace, strip normalized = re.sub(r'\s+', ' ', text.lower().strip()) # remove common punctuation that doesn't affect meaning but keep structure normalized = re.sub(r'[.,;:!?"\'\-_(){}[\]]+', '', normalized) return normalized # Removed calculate_text_similarity - now using fast fingerprint approach # Removed old fuzzy matching functions - now using fast fingerprint approach def deduplicate_ocr_data_fuzzy(ocr_data, similarity_threshold=None, debug=False): """fast, lossy deduplication using improved text fingerprinting for maximum speed. uses multi-part text fingerprinting instead of similarity calculations: - creates fingerprints using first 30 chars + words 3-5 + last 20 chars + length - uses dictionary lookups for O(1) duplicate detection - catches variations in middle of text (like "CONANT H V S" vs "CONANT H V") - prioritizes speed over perfect accuracy - target: process 1000+ entries in under 1 second keeps the most recent entry when duplicates are found. args: ocr_data: list of OCR data dictionaries similarity_threshold: ignored (kept for compatibility) debug: whether to show timing and debug information returns: tuple of (deduplicated_ocr_data, num_duplicates_removed) """ if not ocr_data: return ocr_data, 0 start_time = time.time() if debug: print(f"starting fast fingerprint deduplication with {len(ocr_data)} entries...") # sort by frame_time to process chronologically (keep most recent) sorted_data = sorted(ocr_data, key=lambda x: x.get('frame_time', datetime.datetime.min)) # dictionary to track fingerprints: fingerprint -> (index, item) fingerprint_to_item = {} deduplicated = [] duplicates_removed = 0 for current_item in sorted_data: current_text = current_item.get('text', '') current_app = current_item.get('application', '') # skip items with no meaningful text if not current_text.strip(): deduplicated.append(current_item) continue # create improved fingerprint using multiple text parts normalized_text = normalize_text_for_similarity(current_text) text_len = len(normalized_text) # create fingerprint components first_part = normalized_text[:30] if len(normalized_text) > 30 else normalized_text last_part = normalized_text[-20:] if len(normalized_text) > 20 else "" # get middle words (words 3-5) to catch variations like "V S" vs "V" vs "U2" words = normalized_text.split() middle_words = "" if len(words) >= 5: middle_words = " ".join(words[2:5]) # words 3-5 (0-indexed) elif len(words) >= 3: middle_words = " ".join(words[2:]) # remaining words after first 2 # combine into fingerprint: app:length:first30:middle_words:last20 fingerprint = f"{current_app}:{text_len}:{first_part}:{middle_words}:{last_part}" if fingerprint in fingerprint_to_item: # this is a duplicate - replace the existing item with the more recent one existing_index, existing_item = fingerprint_to_item[fingerprint] deduplicated[existing_index] = current_item fingerprint_to_item[fingerprint] = (existing_index, current_item) duplicates_removed += 1 else: # this is a new unique item new_index = len(deduplicated) deduplicated.append(current_item) fingerprint_to_item[fingerprint] = (new_index, current_item) elapsed = time.time() - start_time if debug: print(f"fast fingerprint deduplication completed in {elapsed:.3f} seconds") print(f"processed {len(ocr_data)} entries, removed {duplicates_removed} duplicates") print(f"processing rate: {len(ocr_data)/elapsed:.0f} entries/second") return deduplicated, duplicates_removed def deduplicate_ocr_data_fast(ocr_data, debug=False): """legacy fast hash-based deduplication (kept for compatibility). this is the old exact-match approach. use deduplicate_ocr_data_fuzzy for better results. args: ocr_data: list of OCR data dictionaries debug: whether to show timing information returns: tuple of (deduplicated_ocr_data, num_duplicates_removed) """ if not ocr_data: return ocr_data, 0 start_time = time.time() if debug else None # sort by frame_time to process chronologically sorted_data = sorted(ocr_data, key=lambda x: x.get('frame_time', datetime.datetime.min)) # use dictionary to track unique text hashes hash_to_item = {} deduplicated = [] duplicates_removed = 0 for current_item in sorted_data: current_text = current_item.get('text', '') current_app = current_item.get('application', '') # skip items with no meaningful text if not current_text.strip(): deduplicated.append(current_item) continue # create hash for this text + app combination normalized_text = normalize_text_for_similarity(current_text) hash_input = f"{current_app}:{normalized_text}" text_hash = hashlib.md5(hash_input.encode('utf-8')).hexdigest() if text_hash in hash_to_item: # this is a duplicate - replace the existing item with the more recent one existing_index, existing_item = hash_to_item[text_hash] deduplicated[existing_index] = current_item hash_to_item[text_hash] = (existing_index, current_item) duplicates_removed += 1 else: # this is a new unique item new_index = len(deduplicated) deduplicated.append(current_item) hash_to_item[text_hash] = (new_index, current_item) if debug and start_time: elapsed = time.time() - start_time print(f"hash-based deduplication completed in {elapsed:.3f} seconds") return deduplicated, duplicates_removed def deduplicate_ocr_data(ocr_data, similarity_threshold=None, debug=False): """deduplicate OCR data using fast fingerprint-based matching. uses fast text fingerprinting instead of similarity calculations for maximum speed. can catch similar entries like: - "CONANT H V S + home DMs Activity Later..." - "CONANT H V + home DMs Activity Later..." - "CONANT H U2 + home DMs Activity Later..." keeps the most recent entry when duplicates are found. args: ocr_data: list of OCR data dictionaries similarity_threshold: ignored (kept for compatibility) debug: whether to show timing and debug information returns: tuple of (deduplicated_ocr_data, num_duplicates_removed) """ return deduplicate_ocr_data_fuzzy(ocr_data, similarity_threshold, debug=debug) # Compatibility functions for existing test files def normalize_text_for_deduplication(text): """legacy function name - redirects to normalize_text_for_similarity for compatibility.""" return normalize_text_for_similarity(text) def create_text_hash(text, app_name=""): """legacy function for creating text hash - kept for compatibility with test files.""" normalized_text = normalize_text_for_similarity(text) hash_input = f"{app_name}:{normalized_text}" return hashlib.md5(hash_input.encode('utf-8')).hexdigest() def format_ocr_data_with_text(ocr_data, show_app_name=True): """format OCR data into readable text with actual text content. converts a list of OCR data dictionaries into a formatted string with timestamps, applications, and extracted text content. args: ocr_data: list of OCR data dictionaries show_app_name: whether to show application name in the output returns: formatted string representation of the OCR data """ if not ocr_data: return "no OCR data available." # group by frame time and application for better readability frames = {} for item in ocr_data: frame_time = item['frame_time'] application = item.get('application', 'Unknown') window = item.get('window', 'Unknown') text = item.get('text', '') # create a key for grouping time_str = frame_time.strftime('%Y-%m-%d %H:%M:%S') key = f"{time_str}_{application}_{window}" if key not in frames: frames[key] = { 'time': frame_time, 'application': application, 'window': window, 'texts': [] } if text.strip(): # only add non-empty text frames[key]['texts'].append(text.strip()) # sort frames by time sorted_frames = sorted(frames.items(), key=lambda x: x[1]['time']) # format each frame result = [] for key, frame in sorted_frames: time_str = frame['time'].strftime('%Y-%m-%d %H:%M:%S') if show_app_name: app_str = f"{frame['application']}" if frame['window'] and frame['window'] != 'Unknown': app_str += f" - {frame['window']}" result.append(f"[{time_str}] {app_str}") else: # when not showing app name, still show window if available if frame['window'] and frame['window'] != 'Unknown': result.append(f"[{time_str}] {frame['window']}") else: result.append(f"[{time_str}]") # show text content if frame['texts']: for text in frame['texts']: # show full text content without truncation result.append(f" {text}") else: result.append(" (no text content)") result.append("") # empty line between frames return "\n".join(result) def parse_arguments(): """parse command line arguments. returns: parsed argument namespace """ parser = argparse.ArgumentParser( description="retrieve screen OCR data from rewinddb", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" examples: %(prog)s --relative "1 hour" %(prog)s --relative "5 hours" %(prog)s --relative "5h" %(prog)s --relative "30m" %(prog)s --relative "2d" %(prog)s --relative "1w" %(prog)s --from "2023-05-11 13:00:00" --to "2023-05-11 17:00:00" %(prog)s --from "2023-05-11" --to "2023-05-12" # uses 00:00:00 and 23:59:59 %(prog)s --from "13:00:00" --to "17:00:00" # uses today's date %(prog)s --from "13:00" --to "17:00" # uses today's date, HH:MM format %(prog)s --relative "7 days" --debug %(prog)s --relative "1 hour" --env-file /path/to/.env %(prog)s --relative "1 day" --utc # display times in UTC instead of local time %(prog)s --list-apps # list all applications with OCR data %(prog)s --relative "1 day" --app "com.apple.Safari" # filter by application """ ) group = parser.add_mutually_exclusive_group(required=True) group.add_argument("-r", "--relative", metavar="TIME", help="relative time period (e.g., '1 hour', '5h', '3m', '10d', '2w')") group.add_argument("--from", dest="from_time", metavar="DATETIME", help="start time in format 'YYYY-MM-DD HH:MM:SS', 'YYYY-MM-DD HH:MM', 'YYYY-MM-DD' (uses 00:00:00), 'HH:MM:SS', or 'HH:MM' (uses today's date)") group.add_argument("--list-apps", action="store_true", help="list all applications that have OCR data") parser.add_argument("--to", dest="to_time", metavar="DATETIME", help="end time in format 'YYYY-MM-DD HH:MM:SS', 'YYYY-MM-DD HH:MM', 'YYYY-MM-DD' (uses 23:59:59), 'HH:MM:SS', or 'HH:MM' (uses today's date)") parser.add_argument("--app", metavar="APPLICATION", help="filter OCR data by specific application name") parser.add_argument("--debug", action="store_true", help="enable debug output") parser.add_argument("--env-file", metavar="FILE", help="path to .env file with database configuration") parser.add_argument("--utc", action="store_true", help="display times in UTC instead of local time") args = parser.parse_args() # validate that if --from is provided, --to is also provided if args.from_time and not args.to_time: parser.error("--to is required when --from is provided") # validate that --app can only be used with time range queries if args.app and args.list_apps: parser.error("--app cannot be used with --list-apps") return args def main(): """main function for the OCR cli tool.""" args = parse_arguments() try: # connect to the database using rewinddb library print("connecting to rewind database...") with rewinddb.RewindDB(args.env_file) as db: if args.list_apps: # list all applications with OCR data print("retrieving applications with OCR data...") applications = get_applications_with_ocr_data(db) if not applications: print("no applications found with OCR data.") return print(f"found {len(applications)} applications with OCR data:") for app in applications: print(f" {app}") return # get OCR data based on the specified time range if args.relative: print(f"retrieving OCR data from the last {args.relative}...") ocr_data = get_ocr_data_relative(db, args.relative) else: print(f"retrieving OCR data from {args.from_time} to {args.to_time}...") ocr_data = get_ocr_data_absolute(db, args.from_time, args.to_time) # filter by application if specified if args.app: print(f"filtering OCR data for application: {args.app}") original_count = len(ocr_data) ocr_data = filter_ocr_data_by_app(ocr_data, args.app) filtered_count = len(ocr_data) print(f"filtered from {original_count} to {filtered_count} OCR entries.") # deduplicate OCR data if ocr_data: if args.debug: print("deduplicating OCR data using fast fingerprint matching...") else: print("deduplicating OCR data...") ocr_data, duplicates_removed = deduplicate_ocr_data(ocr_data, debug=args.debug) if duplicates_removed > 0: print(f"removed {duplicates_removed} duplicate entries.") else: print("no duplicates found.") # format and display results if not ocr_data: if args.app: print(f"no OCR data found for application '{args.app}' in the specified time range.") else: print("no OCR data found for the specified time range.") return print(f"found {len(ocr_data)} OCR entries after deduplication.") # convert timestamps to local time if not using UTC if not args.utc: for item in ocr_data: if 'frame_time' in item: item['frame_time'] = convert_to_local_time(item['frame_time']) # format with conditional app name display show_app_name = not bool(args.app) # hide app name when filtering by specific app formatted = format_ocr_data_with_text(ocr_data, show_app_name=show_app_name) print("\nOCR data:") print(formatted) except FileNotFoundError as e: print(f"error: {e}", file=sys.stderr) print("check your DB_PATH setting in .env file", file=sys.stderr) sys.exit(1) except ConnectionError as e: print(f"error: {e}", file=sys.stderr) print("check your DB_PASSWORD setting in .env file", file=sys.stderr) sys.exit(1) except Exception as e: print(f"unexpected error: {e}", file=sys.stderr) print(f"error type: {type(e).__name__}") import traceback traceback.print_exc() sys.exit(1) if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/pedramamini/RewindMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

ocr_cli.py•24.6 KiB