MCP Task Aggregator

mcp-joecc
tests

test_training_adapter.py•8.35 KiB

"""Tests for training data adapter.

Tests for extracting training data from task management database.
"""

import pytest

from mcp_task_aggregator.adapters.training_adapter import (
    TrainingDataAdapter,
    TrainingDataConfig,
)
from mcp_task_aggregator.models import TodoSource, TodoStatus


@pytest.fixture
def training_adapter(temp_db):
    """Create a TrainingDataAdapter with temp database."""
    return TrainingDataAdapter(temp_db)


@pytest.fixture
def seeded_training_db(temp_db, todo_repo):
    """Create a database with data suitable for training extraction."""
    # Create tasks with varying lengths
    todo_repo.create(content="Short task", status=TodoStatus.DONE, priority=1)
    todo_repo.create(
        content="A medium length task description for training",
        status=TodoStatus.TODO,
        priority=2,
    )
    todo_repo.create(
        content="A longer task description that provides more context for the training pipeline to learn from",
        status=TodoStatus.IN_PROGRESS,
        priority=3,
    )
    todo_repo.create(
        content="External task from Jira system",
        status=TodoStatus.TODO,
        source_system=TodoSource.JIRA,
        source_id="TEST-123",
    )
    return temp_db


class TestTrainingDataConfig:
    """Tests for TrainingDataConfig."""

    def test_default_values(self):
        """Test default configuration values."""
        config = TrainingDataConfig()
        assert config.min_content_length == 10
        assert config.max_content_length == 500
        assert config.include_completed_only is False
        assert config.exclude_sources == []

    def test_custom_values(self):
        """Test custom configuration values."""
        config = TrainingDataConfig(
            min_content_length=20,
            max_content_length=200,
            include_completed_only=True,
            exclude_sources=["jira", "github"],
        )
        assert config.min_content_length == 20
        assert config.max_content_length == 200
        assert config.include_completed_only is True
        assert config.exclude_sources == ["jira", "github"]


class TestTrainingDataAdapter:
    """Tests for TrainingDataAdapter."""

    def test_adapter_creation(self, temp_db):
        """Test adapter can be created."""
        adapter = TrainingDataAdapter(temp_db)
        assert adapter.db == temp_db
        assert adapter.config is not None

    def test_adapter_with_config(self, temp_db):
        """Test adapter creation with custom config."""
        config = TrainingDataConfig(min_content_length=50)
        adapter = TrainingDataAdapter(temp_db, config=config)
        assert adapter.config.min_content_length == 50


class TestExtractTaskInputs:
    """Tests for extract_task_inputs method."""

    def test_empty_database(self, training_adapter):
        """Test extraction from empty database."""
        inputs = training_adapter.extract_task_inputs()
        assert inputs == []

    def test_extracts_tasks(self, seeded_training_db):
        """Test task extraction from seeded database."""
        adapter = TrainingDataAdapter(
            seeded_training_db,
            config=TrainingDataConfig(min_content_length=5, max_content_length=500),
        )
        inputs = adapter.extract_task_inputs()
        assert len(inputs) > 0

    def test_respects_limit(self, seeded_training_db):
        """Test limit parameter is respected."""
        adapter = TrainingDataAdapter(
            seeded_training_db,
            config=TrainingDataConfig(min_content_length=5, max_content_length=500),
        )
        inputs = adapter.extract_task_inputs(limit=2)
        assert len(inputs) <= 2

    def test_task_input_has_context(self, seeded_training_db):
        """Test extracted inputs have context."""
        adapter = TrainingDataAdapter(
            seeded_training_db,
            config=TrainingDataConfig(min_content_length=5, max_content_length=500),
        )
        inputs = adapter.extract_task_inputs(limit=1)
        if inputs:
            assert inputs[0].context is not None
            assert "status" in inputs[0].context
            assert "priority" in inputs[0].context


class TestComputeCompletionSignals:
    """Tests for compute_completion_signals method."""

    def test_task_completed(self, training_adapter):
        """Test completion signal for completed task."""
        before = {"status": "in_progress"}
        after = {"status": "done"}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["task_completed"] is True

    def test_task_not_completed(self, training_adapter):
        """Test completion signal for incomplete task."""
        before = {"status": "in_progress"}
        after = {"status": "in_progress"}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["task_completed"] is False

    def test_status_progression(self, training_adapter):
        """Test status progression tracking."""
        before = {"status": "in_progress"}
        after = {"status": "done"}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["status_progression"] == ["in_progress", "done"]

    def test_proper_progression_detection(self, training_adapter):
        """Test proper flow detection."""
        before = {"status": "todo"}
        after = {"status": "in_progress"}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["proper_progression"] is True

    def test_priority_change_detection(self, training_adapter):
        """Test priority change detection."""
        before = {"status": "todo", "priority": 1}
        after = {"status": "todo", "priority": 3}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["priority_changed"] is True

    def test_no_priority_change(self, training_adapter):
        """Test no priority change."""
        before = {"status": "todo", "priority": 2}
        after = {"status": "done", "priority": 2}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["priority_changed"] is False

    def test_due_date_set(self, training_adapter):
        """Test due date set detection."""
        before = {"status": "todo", "due_date": None}
        after = {"status": "todo", "due_date": "2024-12-31"}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["due_date_set"] is True

    def test_error_detection(self, training_adapter):
        """Test error detection in after state."""
        before = {"status": "todo"}
        after = {"status": "todo", "error": "Something failed"}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["error_count"] == 1


class TestGetTrainingStatistics:
    """Tests for get_training_statistics method."""

    def test_empty_database_stats(self, training_adapter):
        """Test statistics from empty database."""
        stats = training_adapter.get_training_statistics()
        assert stats["total_todos"] == 0
        assert stats["by_status"] == {}
        assert stats["by_source"] == {}
        assert stats["valid_for_training"] == 0

    def test_seeded_database_stats(self, seeded_training_db):
        """Test statistics from seeded database."""
        adapter = TrainingDataAdapter(
            seeded_training_db,
            config=TrainingDataConfig(min_content_length=5, max_content_length=500),
        )
        stats = adapter.get_training_statistics()
        assert stats["total_todos"] > 0
        assert len(stats["by_status"]) > 0


class TestCreateTrainingBatch:
    """Tests for create_training_batch method."""

    def test_creates_batch(self, seeded_training_db):
        """Test training batch creation."""
        adapter = TrainingDataAdapter(
            seeded_training_db,
            config=TrainingDataConfig(min_content_length=5, max_content_length=500),
        )
        batch = adapter.create_training_batch(
            num_examples=10,
            step=0,
        )
        assert batch.step == 0
        assert batch.epoch == 0
        assert batch.epoch_step == 0

    def test_batch_step_parameter(self, training_adapter):
        """Test step parameter is set correctly."""
        batch = training_adapter.create_training_batch(
            num_examples=5,
            step=42,
        )
        assert batch.step == 42

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/89jobrien/mcp-joecc'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_training_adapter.py•8.35 KiB

"""Tests for training data adapter.

Tests for extracting training data from task management database.
"""

import pytest

from mcp_task_aggregator.adapters.training_adapter import (
    TrainingDataAdapter,
    TrainingDataConfig,
)
from mcp_task_aggregator.models import TodoSource, TodoStatus


@pytest.fixture
def training_adapter(temp_db):
    """Create a TrainingDataAdapter with temp database."""
    return TrainingDataAdapter(temp_db)


@pytest.fixture
def seeded_training_db(temp_db, todo_repo):
    """Create a database with data suitable for training extraction."""
    # Create tasks with varying lengths
    todo_repo.create(content="Short task", status=TodoStatus.DONE, priority=1)
    todo_repo.create(
        content="A medium length task description for training",
        status=TodoStatus.TODO,
        priority=2,
    )
    todo_repo.create(
        content="A longer task description that provides more context for the training pipeline to learn from",
        status=TodoStatus.IN_PROGRESS,
        priority=3,
    )
    todo_repo.create(
        content="External task from Jira system",
        status=TodoStatus.TODO,
        source_system=TodoSource.JIRA,
        source_id="TEST-123",
    )
    return temp_db


class TestTrainingDataConfig:
    """Tests for TrainingDataConfig."""

    def test_default_values(self):
        """Test default configuration values."""
        config = TrainingDataConfig()
        assert config.min_content_length == 10
        assert config.max_content_length == 500
        assert config.include_completed_only is False
        assert config.exclude_sources == []

    def test_custom_values(self):
        """Test custom configuration values."""
        config = TrainingDataConfig(
            min_content_length=20,
            max_content_length=200,
            include_completed_only=True,
            exclude_sources=["jira", "github"],
        )
        assert config.min_content_length == 20
        assert config.max_content_length == 200
        assert config.include_completed_only is True
        assert config.exclude_sources == ["jira", "github"]


class TestTrainingDataAdapter:
    """Tests for TrainingDataAdapter."""

    def test_adapter_creation(self, temp_db):
        """Test adapter can be created."""
        adapter = TrainingDataAdapter(temp_db)
        assert adapter.db == temp_db
        assert adapter.config is not None

    def test_adapter_with_config(self, temp_db):
        """Test adapter creation with custom config."""
        config = TrainingDataConfig(min_content_length=50)
        adapter = TrainingDataAdapter(temp_db, config=config)
        assert adapter.config.min_content_length == 50


class TestExtractTaskInputs:
    """Tests for extract_task_inputs method."""

    def test_empty_database(self, training_adapter):
        """Test extraction from empty database."""
        inputs = training_adapter.extract_task_inputs()
        assert inputs == []

    def test_extracts_tasks(self, seeded_training_db):
        """Test task extraction from seeded database."""
        adapter = TrainingDataAdapter(
            seeded_training_db,
            config=TrainingDataConfig(min_content_length=5, max_content_length=500),
        )
        inputs = adapter.extract_task_inputs()
        assert len(inputs) > 0

    def test_respects_limit(self, seeded_training_db):
        """Test limit parameter is respected."""
        adapter = TrainingDataAdapter(
            seeded_training_db,
            config=TrainingDataConfig(min_content_length=5, max_content_length=500),
        )
        inputs = adapter.extract_task_inputs(limit=2)
        assert len(inputs) <= 2

    def test_task_input_has_context(self, seeded_training_db):
        """Test extracted inputs have context."""
        adapter = TrainingDataAdapter(
            seeded_training_db,
            config=TrainingDataConfig(min_content_length=5, max_content_length=500),
        )
        inputs = adapter.extract_task_inputs(limit=1)
        if inputs:
            assert inputs[0].context is not None
            assert "status" in inputs[0].context
            assert "priority" in inputs[0].context


class TestComputeCompletionSignals:
    """Tests for compute_completion_signals method."""

    def test_task_completed(self, training_adapter):
        """Test completion signal for completed task."""
        before = {"status": "in_progress"}
        after = {"status": "done"}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["task_completed"] is True

    def test_task_not_completed(self, training_adapter):
        """Test completion signal for incomplete task."""
        before = {"status": "in_progress"}
        after = {"status": "in_progress"}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["task_completed"] is False

    def test_status_progression(self, training_adapter):
        """Test status progression tracking."""
        before = {"status": "in_progress"}
        after = {"status": "done"}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["status_progression"] == ["in_progress", "done"]

    def test_proper_progression_detection(self, training_adapter):
        """Test proper flow detection."""
        before = {"status": "todo"}
        after = {"status": "in_progress"}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["proper_progression"] is True

    def test_priority_change_detection(self, training_adapter):
        """Test priority change detection."""
        before = {"status": "todo", "priority": 1}
        after = {"status": "todo", "priority": 3}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["priority_changed"] is True

    def test_no_priority_change(self, training_adapter):
        """Test no priority change."""
        before = {"status": "todo", "priority": 2}
        after = {"status": "done", "priority": 2}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["priority_changed"] is False

    def test_due_date_set(self, training_adapter):
        """Test due date set detection."""
        before = {"status": "todo", "due_date": None}
        after = {"status": "todo", "due_date": "2024-12-31"}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["due_date_set"] is True

    def test_error_detection(self, training_adapter):
        """Test error detection in after state."""
        before = {"status": "todo"}
        after = {"status": "todo", "error": "Something failed"}
        signals = training_adapter.compute_completion_signals(before, after)
        assert signals["error_count"] == 1


class TestGetTrainingStatistics:
    """Tests for get_training_statistics method."""

    def test_empty_database_stats(self, training_adapter):
        """Test statistics from empty database."""
        stats = training_adapter.get_training_statistics()
        assert stats["total_todos"] == 0
        assert stats["by_status"] == {}
        assert stats["by_source"] == {}
        assert stats["valid_for_training"] == 0

    def test_seeded_database_stats(self, seeded_training_db):
        """Test statistics from seeded database."""
        adapter = TrainingDataAdapter(
            seeded_training_db,
            config=TrainingDataConfig(min_content_length=5, max_content_length=500),
        )
        stats = adapter.get_training_statistics()
        assert stats["total_todos"] > 0
        assert len(stats["by_status"]) > 0


class TestCreateTrainingBatch:
    """Tests for create_training_batch method."""

    def test_creates_batch(self, seeded_training_db):
        """Test training batch creation."""
        adapter = TrainingDataAdapter(
            seeded_training_db,
            config=TrainingDataConfig(min_content_length=5, max_content_length=500),
        )
        batch = adapter.create_training_batch(
            num_examples=10,
            step=0,
        )
        assert batch.step == 0
        assert batch.epoch == 0
        assert batch.epoch_step == 0

    def test_batch_step_parameter(self, training_adapter):
        """Test step parameter is set correctly."""
        batch = training_adapter.create_training_batch(
            num_examples=5,
            step=42,
        )
        assert batch.step == 42