"""
This type stub file was generated by pyright.
"""
from typing import Dict, Optional
from ..models import CrawlStatus
class TerminalUI:
"""Terminal user interface for CrawlerMonitor using rich library."""
def __init__(self, refresh_rate: float = ..., max_width: int = ...) -> None:
"""
Initialize the terminal UI.
Args:
refresh_rate: How often to refresh the UI (in seconds)
max_width: Maximum width of the UI in characters
"""
...
def start(self, monitor): # -> None:
"""Start the UI thread."""
...
def stop(self): # -> None:
"""Stop the UI thread."""
...
class CrawlerMonitor:
"""
Comprehensive monitoring and visualization system for tracking web crawler operations in real-time.
Provides a terminal-based dashboard that displays task statuses, memory usage, queue statistics,
and performance metrics.
"""
def __init__(self, urls_total: int = ..., refresh_rate: float = ..., enable_ui: bool = ..., max_width: int = ...) -> None:
"""
Initialize the CrawlerMonitor.
Args:
urls_total: Total number of URLs to be crawled
refresh_rate: How often to refresh the UI (in seconds)
enable_ui: Whether to display the terminal UI
max_width: Maximum width of the UI in characters
"""
...
def start(self): # -> None:
"""
Start the monitoring session.
- Initializes the start_time
- Sets is_running to True
- Starts the terminal UI if enabled
"""
...
def stop(self): # -> None:
"""
Stop the monitoring session.
- Records end_time
- Sets is_running to False
- Stops the terminal UI
- Generates final summary statistics
"""
...
def add_task(self, task_id: str, url: str): # -> None:
"""
Register a new task with the monitor.
Args:
task_id: Unique identifier for the task
url: URL being crawled
The task is initialized with:
- status: QUEUED
- url: The URL to crawl
- enqueue_time: Current time
- memory_usage: 0
- peak_memory: 0
- wait_time: 0
- retry_count: 0
"""
...
def update_task(self, task_id: str, status: Optional[CrawlStatus] = ..., start_time: Optional[float] = ..., end_time: Optional[float] = ..., memory_usage: Optional[float] = ..., peak_memory: Optional[float] = ..., error_message: Optional[str] = ..., retry_count: Optional[int] = ..., wait_time: Optional[float] = ...): # -> None:
"""
Update statistics for a specific task.
Args:
task_id: Unique identifier for the task
status: New status (QUEUED, IN_PROGRESS, COMPLETED, FAILED)
start_time: When task execution started
end_time: When task execution ended
memory_usage: Current memory usage in MB
peak_memory: Maximum memory usage in MB
error_message: Error description if failed
retry_count: Number of retry attempts
wait_time: Time spent in queue
Updates task statistics and updates status counts.
If status changes, decrements old status count and
increments new status count.
"""
...
def update_memory_status(self, status: str): # -> None:
"""
Update the current memory status.
Args:
status: Memory status (NORMAL, PRESSURE, CRITICAL, or custom)
Also updates the UI to reflect the new status.
"""
...
def update_queue_statistics(self, total_queued: int, highest_wait_time: float, avg_wait_time: float): # -> None:
"""
Update statistics related to the task queue.
Args:
total_queued: Number of tasks currently in queue
highest_wait_time: Longest wait time of any queued task
avg_wait_time: Average wait time across all queued tasks
"""
...
def get_task_stats(self, task_id: str) -> Dict:
"""
Get statistics for a specific task.
Args:
task_id: Unique identifier for the task
Returns:
Dictionary containing all task statistics
"""
...
def get_all_task_stats(self) -> Dict[str, Dict]:
"""
Get statistics for all tasks.
Returns:
Dictionary mapping task_ids to their statistics
"""
...
def get_memory_status(self) -> str:
"""
Get the current memory status.
Returns:
Current memory status string
"""
...
def get_queue_stats(self) -> Dict:
"""
Get current queue statistics.
Returns:
Dictionary with queue statistics including:
- total_queued: Number of tasks in queue
- highest_wait_time: Longest wait time
- avg_wait_time: Average wait time
"""
...
def get_summary(self) -> Dict:
"""
Get a summary of all crawler statistics.
Returns:
Dictionary containing:
- runtime: Total runtime in seconds
- urls_total: Total URLs to process
- urls_completed: Number of completed URLs
- completion_percentage: Percentage complete
- status_counts: Count of tasks in each status
- memory_status: Current memory status
- peak_memory_percent: Highest memory usage
- peak_memory_time: When peak memory occurred
- avg_task_duration: Average task processing time
- estimated_completion_time: Projected finish time
- requeue_rate: Percentage of tasks requeued
"""
...
def render(self): # -> None:
"""
Render the terminal UI.
This is the main UI rendering loop that:
1. Updates all statistics
2. Formats the display
3. Renders the ASCII interface
4. Handles keyboard input
Note: The actual rendering is handled by the TerminalUI class
which uses the rich library's Live display.
"""
...
if __name__ == "__main__":
monitor = ...