We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/gaiaaiagent/regen-registry-review-mcp'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
"""Integration tests for LLM extraction with real API calls."""
import pytest
from pathlib import Path
from registry_review_mcp.config.settings import settings
from registry_review_mcp.extractors.llm_extractors import (
DateExtractor,
extract_fields_with_llm,
)
pytestmark = [
pytest.mark.expensive,
pytest.mark.skipif(
not settings.anthropic_api_key or not settings.llm_extraction_enabled,
reason="LLM extraction not configured (set ANTHROPIC_API_KEY and enable LLM extraction)"
)
]
class TestRealAPIDateExtraction:
"""Test date extraction with real Anthropic API calls."""
@pytest.mark.asyncio
async def test_extract_simple_project_start_date(self):
"""Test extracting a simple project start date."""
markdown = """
# Project Information
## 1.8. Project Start Date
01/01/2022. The project will be aligned with the calendar year, with annual
monitoring rounds taking place in the August March bracket when the soil is
dormant.
"""
extractor = DateExtractor()
results = await extractor.extract(markdown, [], "test_project_plan.pdf")
print(f"\n=== Extracted {len(results)} dates ===")
for field in results:
print(f" {field.field_type}: {field.value} (confidence: {field.confidence})")
print(f" Reasoning: {field.reasoning}")
# Verify we got results
assert len(results) > 0, "Should extract at least one date"
# Check for project start date
project_start_dates = [f for f in results if f.field_type == "project_start_date"]
assert len(project_start_dates) > 0, "Should find project start date"
# Verify the date value
assert "2022-01-01" in project_start_dates[0].value
# Verify confidence is reasonable
assert project_start_dates[0].confidence >= 0.7
@pytest.mark.asyncio
async def test_extract_multiple_date_types(self):
"""Test extracting multiple date types from complex text."""
markdown = """
# Project Timeline
Project Start Date: January 1, 2022
Baseline assessment was conducted on March 15, 2022.
Satellite imagery was acquired on June 20, 2022 for the baseline analysis.
Field sampling occurred from August 15-20, 2022.
The monitoring report was completed on December 31, 2022.
"""
extractor = DateExtractor()
results = await extractor.extract(markdown, [], "test_timeline.pdf")
print(f"\n=== Extracted {len(results)} dates ===")
for field in results:
print(f" {field.field_type}: {field.value} (confidence: {field.confidence})")
# Should extract multiple dates
assert len(results) >= 3, "Should extract at least 3 different dates"
# Check for different date types
date_types = {f.field_type for f in results}
assert "project_start_date" in date_types
assert "baseline_date" in date_types or "sampling_date" in date_types
@pytest.mark.asyncio
async def test_date_format_flexibility(self):
"""Test that various date formats are recognized."""
markdown = """
Project Timeline:
Project started: January 15, 2022
Baseline conducted: 03/20/2022
Field work: August 10-15, 2022
Report submitted: 2022-12-31
"""
extractor = DateExtractor()
results = await extractor.extract(markdown, [], "test_formats.pdf")
print(f"\n=== Extracted {len(results)} dates from various formats ===")
for field in results:
print(f" {field.value} from: '{field.raw_text}' (type: {field.field_type})")
# Should recognize multiple formats
assert len(results) >= 3, "Should extract dates in multiple formats"
# Verify different date formats were parsed
values = [f.value for f in results]
assert any("2022-01-15" in str(v) for v in values)
assert any("2022-03-20" in str(v) or "03" in str(v) for v in values)
@pytest.mark.asyncio
async def test_date_disambiguation(self):
"""Test that dates are correctly classified by context."""
markdown = """
Project Dates:
The project started on 01/01/2022.
Imagery was acquired on 06/15/2022 for baseline analysis.
Soil sampling was conducted on 08/20/2022.
"""
extractor = DateExtractor()
results = await extractor.extract(markdown, [], "test_disambiguation.pdf")
print(f"\n=== Date Classification ===")
for field in results:
print(f" {field.field_type}: {field.value}")
print(f" Context: {field.raw_text}")
# Check that dates are classified differently
date_types = {f.field_type for f in results}
assert len(date_types) >= 2, "Should classify dates into different types"
# Verify specific classifications
imagery_dates = [f for f in results if "imagery" in f.field_type]
sampling_dates = [f for f in results if "sampling" in f.field_type]
# At least one should be correctly classified
assert len(imagery_dates) > 0 or len(sampling_dates) > 0
class TestCachingWithRealAPI:
"""Test that caching works with real API calls."""
@pytest.mark.asyncio
async def test_caching_prevents_duplicate_api_calls(self):
"""Test that second call uses cache instead of API."""
import time
markdown = "Project started on 01/01/2022"
doc_name = f"cache_test_{int(time.time())}.pdf"
extractor = DateExtractor()
# First call - hits API
start1 = time.time()
results1 = await extractor.extract(markdown, [], doc_name)
duration1 = time.time() - start1
print(f"\nFirst call (API): {duration1:.2f}s, extracted {len(results1)} dates")
# Second call - should use cache
start2 = time.time()
results2 = await extractor.extract(markdown, [], doc_name)
duration2 = time.time() - start2
print(f"Second call (cache): {duration2:.2f}s, extracted {len(results2)} dates")
# Verify results are identical
assert len(results1) == len(results2)
assert results1[0].value == results2[0].value
# Second call should be much faster (< 100ms for cache hit)
assert duration2 < 0.1, f"Cache hit should be fast, got {duration2}s"
class TestEndToEndExtraction:
"""Test end-to-end extraction with evidence data format."""
@pytest.mark.asyncio
async def test_extract_from_evidence_structure(self):
"""Test extraction from evidence.json format."""
# Simulate evidence data structure
evidence_data = {
"evidence": [
{
"requirement_id": "REQ-007",
"requirement_text": "Project start date",
"evidence_snippets": [
{
"document_name": "Project Plan",
"text": "Project Start Date: 01/01/2022. The project will be aligned with the calendar year.",
"page": 4,
}
],
}
]
}
session_id = "test_session"
results = await extract_fields_with_llm(session_id, evidence_data)
print(f"\n=== End-to-End Extraction ===")
print(f"Dates extracted: {len(results.get('dates', []))}")
for field in results.get('dates', []):
print(f" {field.field_type}: {field.value} (confidence: {field.confidence})")
# Should extract dates
assert len(results.get('dates', [])) > 0, "Should extract dates from evidence"
# Should have high confidence
dates = results.get('dates', [])
if dates:
assert dates[0].confidence >= 0.7, "Should have reasonable confidence"