"""Tests for text profile tool."""
import pytest
from mcp_server.schemas import TextProfile
from mcp_server.tools.text_profile import text_profile
def test_text_profile_returns_text_profile():
"""Test that text_profile returns a TextProfile object."""
text = "This is a test sentence. It has multiple words and punctuation!"
result = text_profile(text)
assert isinstance(result, TextProfile)
assert result.char_count > 0
# TODO: Uncomment after implementing _tokenize
# assert result.token_count > 0
def test_text_profile_with_short_text():
"""Test text_profile with a short text passage."""
text = "Hello world!"
result = text_profile(text)
assert isinstance(result, TextProfile)
assert result.char_count == len(text)
# TODO: Uncomment after implementing _tokenize
# assert result.token_count >= 2
assert 0.0 <= result.type_token_ratio <= 1.0
# TODO: Uncomment after implementing sentiment analysis
# assert -1.0 <= result.sentiment <= 1.0
def test_text_profile_with_longer_text():
"""Test text_profile with a longer, more complex text."""
text = """
Artificial intelligence is transforming many industries. Machine learning
algorithms can now perform tasks that were once thought to require human
intelligence. However, these systems also raise important ethical questions
about bias, privacy, and accountability. Researchers and policymakers are
working to develop frameworks for responsible AI development.
"""
result = text_profile(text)
assert isinstance(result, TextProfile)
# TODO: Uncomment after implementing _tokenize
# assert result.token_count > 20
# TODO: Uncomment after implementing _top_terms
# assert len(result.keywords) > 0
# assert len(result.top_ngrams) > 0
assert isinstance(result.readability_flesch, float)
assert isinstance(result.sentiment, float)
def test_text_profile_with_document_id():
"""Test text_profile with a document ID from the corpus."""
# This should try to read from the corpus first
doc_id = "ai_ethics.txt"
result = text_profile(doc_id)
assert isinstance(result, TextProfile)
# If file exists, should have substantial content
# If not, it treats doc_id as raw text (short)
def test_text_profile_fields_are_reasonable():
"""Test that text profile fields contain reasonable values."""
text = "The quick brown fox jumps over the lazy dog. " * 5
result = text_profile(text)
# Type-token ratio should be between 0 and 1
assert 0.0 <= result.type_token_ratio <= 1.0
# Sentiment should be between -1 and 1
assert -1.0 <= result.sentiment <= 1.0
# Readability score typically ranges from 0-100
# (though it can go negative or above 100 in edge cases)
assert isinstance(result.readability_flesch, float)
# Should have some keywords/ngrams
assert isinstance(result.keywords, list)
assert isinstance(result.top_ngrams, list)
@pytest.mark.parametrize(
"text,min_tokens",
[
("One two three.", 3),
("This is a longer sentence with more words.", 7),
("Short", 1),
],
)
def test_text_profile_token_counts(text: str, min_tokens: int):
"""Parametrized test for token counting."""
result = text_profile(text)
# TODO: Uncomment after implementing _tokenize
# assert result.token_count >= min_tokens
# Stub: just check it returns a valid result for now
assert isinstance(result, TextProfile)