test_scraper.py•5.06 kB
import sys
import os
import unittest
from unittest.mock import patch, MagicMock
import pandas as pd
import datetime
# Add parent directory to path to import scraper module
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from scraper import (
validate_date,
get_request_params,
has_next_page,
parse_transactions_table,
get_all_nfl_teams,
get_available_transaction_types
)
class TestScraper(unittest.TestCase):
"""Test cases for the NFL transaction scraper"""
def test_validate_date(self):
"""Test date validation function"""
# Valid dates
self.assertTrue(validate_date("2023-01-01"))
self.assertTrue(validate_date("2000-12-31"))
# Invalid dates
self.assertFalse(validate_date("01-01-2023")) # wrong format
self.assertFalse(validate_date("2023/01/01")) # wrong format
self.assertFalse(validate_date("not-a-date")) # not a date
def test_get_request_params(self):
"""Test request parameter generation"""
# Test with all parameters
params = get_request_params(
team="Patriots",
player="Brady",
start_date="2023-01-01",
end_date="2023-12-31",
transaction_type="Injury"
)
# Check base parameters
self.assertEqual(params["Team"], "Patriots")
self.assertEqual(params["Player"], "Brady")
self.assertEqual(params["BeginDate"], "2023-01-01")
self.assertEqual(params["EndDate"], "2023-12-31")
self.assertEqual(params["submit"], "Search")
# Check transaction type parameter
self.assertEqual(params["InjuriesChkBx"], "yes")
# Test with "All" transaction type
params = get_request_params(
start_date="2023-01-01",
end_date="2023-12-31",
transaction_type="All"
)
self.assertNotIn("InjuriesChkBx", params)
self.assertNotIn("PlayerMovementChkBx", params)
# Test with empty team/player
params = get_request_params(
start_date="2023-01-01",
end_date="2023-12-31"
)
self.assertEqual(params["Team"], "")
self.assertEqual(params["Player"], "")
# Test with invalid date (should raise ValueError)
with self.assertRaises(ValueError):
get_request_params(
start_date="invalid-date",
end_date="2023-12-31"
)
def test_has_next_page(self):
"""Test next page detection"""
# HTML with next page link
html_with_next = """<html><body>
<div class="pagination">
<a href="...">1</a>
<a href="..." class="next">Next</a>
</div>
</body></html>"""
# HTML without next page link
html_without_next = """<html><body>
<div class="pagination">
<a href="...">1</a>
</div>
</body></html>"""
self.assertTrue(has_next_page(html_with_next))
self.assertFalse(has_next_page(html_without_next))
@patch('pandas.read_html')
def test_parse_transactions_table(self, mock_read_html):
"""Test HTML table parsing"""
# Mock pandas.read_html to return a test dataframe
test_df = pd.DataFrame({
'Date': ['2023-01-01', '2023-01-02'],
'Team': ['Patriots', 'Patriots'],
'Acquired': ['Player A', 'Player B'],
'Relinquished': ['', 'Player C'],
'Notes': ['Signed as free agent', 'Traded']
})
mock_read_html.return_value = [test_df]
# Call the function
result = parse_transactions_table("<html>...</html>", "Player")
# Check results
self.assertEqual(len(result), 2)
self.assertEqual(result['transaction_type'][0], "Player")
# Test with empty result
mock_read_html.return_value = []
result = parse_transactions_table("<html>...</html>", "Player")
self.assertTrue(result.empty)
# Test with exception
mock_read_html.side_effect = Exception("Test error")
result = parse_transactions_table("<html>...</html>", "Player")
self.assertTrue(result.empty)
def test_get_all_nfl_teams(self):
"""Test getting all NFL teams"""
teams = get_all_nfl_teams()
self.assertTrue(len(teams) >= 32) # There should be at least 32 NFL teams
self.assertIn("New England Patriots", teams)
self.assertIn("Kansas City Chiefs", teams)
def test_get_available_transaction_types(self):
"""Test getting all transaction types"""
types = get_available_transaction_types()
self.assertIn("Player", types)
self.assertIn("Injury", types)
self.assertIn("Legal", types)
self.assertIn("All", types)
if __name__ == '__main__':
unittest.main()