MCP YouTube Intelligence

test_entities.py•6.57 KiB

"""Tests for entity extraction.""" import pytest from mcp_youtube_intelligence.core.entities import extract_entities, DEFAULT_ENTITY_DICT class TestExtractEntities: def test_empty_text(self): assert extract_entities("") == [] def test_single_korean_company(self): result = extract_entities("삼성전자가 신제품을 발표했습니다") names = [e["name"] for e in result] assert "Samsung Electronics" in names def test_single_english_company(self): result = extract_entities("Apple announced a new iPhone today") names = [e["name"] for e in result] assert "Apple" in names def test_count_accuracy(self): text = "테슬라 주가가 올랐습니다. 테슬라 CEO는 테슬라 공장을 방문했습니다." result = extract_entities(text) tesla = next(e for e in result if e["name"] == "Tesla") assert tesla["count"] == 3 def test_synonym_merging(self): text = "삼성전자와 삼성의 미래" result = extract_entities(text) samsung_entries = [e for e in result if e["name"] == "Samsung Electronics"] assert len(samsung_entries) == 1 assert samsung_entries[0]["count"] >= 2 def test_multiple_entity_types(self): text = "비트코인과 나스닥이 동반 상승했고 트럼프가 발언했습니다" result = extract_entities(text) types = {e["type"] for e in result} assert "crypto" in types assert "index" in types assert "person" in types def test_sorted_by_count(self): text = "AI AI AI 반도체 반도체 삼성전자" result = extract_entities(text) counts = [e["count"] for e in result] assert counts == sorted(counts, reverse=True) def test_extra_dict(self): extra = {"커피": ("commodity", "Coffee")} result = extract_entities("오늘 커피 가격이 올랐습니다", extra_dict=extra) names = [e["name"] for e in result] assert "Coffee" in names def test_no_false_positives(self): text = "오늘 날씨가 좋습니다" result = extract_entities(text) assert len(result) == 0 def test_entity_has_required_keys(self): result = extract_entities("Google은 좋은 회사입니다") for entity in result: assert "type" in entity assert "name" in entity assert "keyword" in entity assert "count" in entity # --- New tests for expanded dictionary and case-insensitive matching --- def test_case_insensitive_english(self): """English entities should match regardless of case.""" result = extract_entities("nvidia is beating amd in the GPU market") names = [e["name"] for e in result] assert "NVIDIA" in names assert "AMD" in names def test_word_boundary_go_no_false_match(self): """'Go' language should not match 'going' or 'good'.""" extra = {"Go": ("technology", "Go")} result = extract_entities("I'm going to do something good", extra_dict=extra) names = [e["name"] for e in result] assert "Go" not in names def test_word_boundary_go_exact_match(self): """'Go' should match when standalone.""" extra = {"Go": ("technology", "Go")} result = extract_entities("I use Go for backend development", extra_dict=extra) names = [e["name"] for e in result] assert "Go" in names def test_ai_no_false_match(self): """'AI' should not match 'AGAIN' or 'SAID'.""" result = extract_entities("AGAIN he SAID something") names = [e["name"] for e in result] assert "AI" not in names def test_ai_ml_terms(self): """AI/ML terms should be detected.""" text = "The Transformer architecture uses attention mechanism for deep learning" result = extract_entities(text) names = [e["name"] for e in result] assert "Transformer" in names assert "Deep Learning" in names def test_global_tech_companies(self): """Global tech companies should be detected.""" text = "Microsoft and Anthropic are investing heavily in OpenAI competitors" result = extract_entities(text) names = [e["name"] for e in result] assert "Microsoft" in names assert "Anthropic" in names def test_programming_languages(self): """Programming languages should be detected.""" text = "We migrated from Python to Rust with Docker and Kubernetes" result = extract_entities(text) names = [e["name"] for e in result] assert "Python" in names assert "Rust" in names assert "Docker" in names assert "Kubernetes" in names def test_crypto_entities(self): """Crypto entities should be detected.""" text = "Bitcoin and Solana are outperforming in the DeFi space" result = extract_entities(text) names = [e["name"] for e in result] assert "BTC" in names assert "SOL" in names assert "DeFi" in names def test_global_people(self): """Global notable people should be detected.""" text = "Sam Altman and Jensen Huang discussed AI at the conference" result = extract_entities(text) names = [e["name"] for e in result] assert "Sam Altman" in names assert "Jensen Huang" in names def test_finance_macro(self): """Finance/macro terms should be detected.""" text = "The Fed raised interest rate amid rising inflation and GDP growth" result = extract_entities(text) names = [e["name"] for e in result] assert "Federal Reserve" in names assert "Inflation" in names def test_sector_terms(self): """Sector terms should be detected.""" text = "semiconductor and cloud computing are driving the cybersecurity market" result = extract_entities(text) names = [e["name"] for e in result] assert "Semiconductor" in names assert "Cloud Computing" in names assert "Cybersecurity" in names def test_english_tech_video_not_empty(self): """Simulated English tech video transcript should yield entities.""" text = ( "Today we're going to look at how NVIDIA's new GPU compares to AMD. " "We'll be using Python and Docker to benchmark performance. " "OpenAI just released GPT-4o and it's incredible for machine learning tasks. " "Meanwhile Tesla stock is soaring on NASDAQ." ) result = extract_entities(text) assert len(result) >= 5 # Should find many entities

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/JangHyuckYun/mcp-youtube-intelligence'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_entities.py•6.57 KiB