edinet-mcp

edinet-mcp
tests

test_normalize.py•16.3 KiB

"""Tests for edinet_mcp._normalize.""" from __future__ import annotations import datetime from edinet_mcp._normalize import ( _extract_element, _extract_period, _extract_value, _load_taxonomy, _normalize_items, _strip_edinet_suffixes, get_taxonomy_labels, normalize_statement, ) from edinet_mcp.models import ( AccountingStandard, DocType, Filing, FinancialStatement, StatementData, ) # --------------------------------------------------------------------------- # Taxonomy loading # --------------------------------------------------------------------------- class TestLoadTaxonomy: def test_loads_all_statement_types(self) -> None: taxonomy = _load_taxonomy() assert "income_statement" in taxonomy assert "balance_sheet" in taxonomy assert "cash_flow" in taxonomy def test_items_have_required_fields(self) -> None: taxonomy = _load_taxonomy() for stmt_type, items in taxonomy.items(): for item in items: assert "id" in item, f"Missing 'id' in {stmt_type}" assert "label" in item, f"Missing 'label' in {stmt_type}" assert "elements" in item, f"Missing 'elements' in {stmt_type}" assert len(item["elements"]) > 0 # --------------------------------------------------------------------------- # Field extraction # --------------------------------------------------------------------------- class TestStripEdinetSuffixes: def test_no_suffix(self) -> None: assert _strip_edinet_suffixes("TotalAssets") == "TotalAssets" def test_ifrs_suffix(self) -> None: assert _strip_edinet_suffixes("CurrentAssetsIFRS") == "CurrentAssets" def test_summary_suffix(self) -> None: assert _strip_edinet_suffixes("NetSalesSummaryOfBusinessResults") == "NetSales" def test_ifrs_summary_suffix(self) -> None: result = _strip_edinet_suffixes("TotalAssetsIFRSSummaryOfBusinessResults") assert result == "TotalAssets" def test_key_financial_data_suffix(self) -> None: result = _strip_edinet_suffixes("OperatingRevenuesIFRSKeyFinancialData") assert result == "OperatingRevenues" def test_position_tag_ca(self) -> None: assert _strip_edinet_suffixes("OtherCurrentAssetsCAIFRS") == "OtherCurrentAssets" def test_position_tag_ncl(self) -> None: result = _strip_edinet_suffixes("RetirementBenefitLiabilityNCLIFRS") assert result == "RetirementBenefitLiability" def test_position_tag_nca(self) -> None: result = _strip_edinet_suffixes("OtherFinancialAssetsNCAIFRS") assert result == "OtherFinancialAssets" def test_opecf_not_stripped(self) -> None: """OpeCF is part of canonical taxonomy names — should NOT be stripped.""" result = _strip_edinet_suffixes("DepreciationAndAmortizationOpeCFIFRS") assert result == "DepreciationAndAmortizationOpeCF" def test_jgaap_suffix(self) -> None: assert _strip_edinet_suffixes("NetSalesJGAAP") == "NetSales" def test_usgaap_suffix(self) -> None: assert _strip_edinet_suffixes("RevenueUSGAAP") == "Revenue" class TestExtractElement: def test_from_element_key(self) -> None: assert _extract_element({"element": "Revenue"}) == "Revenue" def test_from_japanese_key(self) -> None: assert _extract_element({"要素ID": "NetSales"}) == "NetSales" def test_strips_namespace(self) -> None: assert _extract_element({"要素ID": "jppfs_cor:NetSales"}) == "NetSales" def test_none_when_missing(self) -> None: assert _extract_element({"other": "value"}) is None def test_strips_edinet_suffixes(self) -> None: """Element extraction also strips EDINET suffixes.""" item = {"element": "TotalAssetsIFRSSummaryOfBusinessResults"} assert _extract_element(item) == "TotalAssets" def test_strips_namespace_and_suffix(self) -> None: item = {"要素ID": "jpcrp_cor:SalesRevenuesIFRS"} assert _extract_element(item) == "SalesRevenues" class TestExtractValue: def test_int(self) -> None: assert _extract_value({"value": 12345}) == 12345 def test_float(self) -> None: assert _extract_value({"value": 12.5}) == 12.5 def test_string_int(self) -> None: assert _extract_value({"value": "12345"}) == 12345 def test_comma_separated(self) -> None: assert _extract_value({"value": "1,234,567"}) == 1234567 def test_japanese_key(self) -> None: assert _extract_value({"値": 999}) == 999 def test_none(self) -> None: assert _extract_value({"value": None}) is None assert _extract_value({}) is None class TestExtractPeriod: def test_japanese_key(self) -> None: assert _extract_period({"相対年度": "当期"}) == "当期" assert _extract_period({"相対年度": "前期"}) == "前期" def test_xbrl_context_prior(self) -> None: assert _extract_period({"context": "PriorYearDuration"}) == "前期" def test_xbrl_context_current(self) -> None: assert _extract_period({"context": "CurrentYearDuration"}) == "当期" def test_default_is_current(self) -> None: assert _extract_period({}) == "当期" def test_non_consolidated_returns_none(self) -> None: """Non-consolidated contexts should be skipped.""" item = {"context": "CurrentYearInstant_NonConsolidatedMember"} assert _extract_period(item) is None def test_non_consolidated_prior_returns_none(self) -> None: item = {"context": "Prior1YearDuration_NonConsolidatedMember"} assert _extract_period(item) is None def test_consolidated_context_works(self) -> None: """Plain context (consolidated) should work normally.""" assert _extract_period({"context": "CurrentYearInstant"}) == "当期" assert _extract_period({"context": "Prior1YearDuration"}) == "前期" # --------------------------------------------------------------------------- # Normalization # --------------------------------------------------------------------------- def _make_filing() -> Filing: return Filing( doc_id="S100TEST", edinet_code="E99999", company_name="テスト株式会社", doc_type=DocType.ANNUAL_REPORT, filing_date=datetime.date(2025, 6, 20), ) class TestNormalizeItems: def test_basic_pl_normalization(self) -> None: taxonomy = _load_taxonomy() raw = [ {"element": "Revenue", "value": 100000, "context": "CurrentYearDuration"}, {"element": "Revenue", "value": 90000, "context": "PriorYearDuration"}, {"element": "OperatingProfit", "value": 20000, "context": "CurrentYearDuration"}, ] result = _normalize_items(raw, "income_statement", taxonomy) assert len(result) == 2 assert result[0]["科目"] == "売上高" assert result[0]["当期"] == 100000 assert result[0]["前期"] == 90000 assert result[1]["科目"] == "営業利益" assert result[1]["当期"] == 20000 def test_jgaap_elements(self) -> None: taxonomy = _load_taxonomy() raw = [ {"element": "NetSales", "value": 50000, "context": "Current"}, {"element": "OperatingIncome", "value": 10000, "context": "Current"}, ] result = _normalize_items(raw, "income_statement", taxonomy) assert result[0]["科目"] == "売上高" assert result[1]["科目"] == "営業利益" def test_preserves_display_order(self) -> None: taxonomy = _load_taxonomy() # Items in reverse order raw = [ {"element": "OperatingProfit", "value": 20000, "context": "Current"}, {"element": "Revenue", "value": 100000, "context": "Current"}, ] result = _normalize_items(raw, "income_statement", taxonomy) # Revenue should come before OperatingProfit in output labels = [r["科目"] for r in result] assert labels.index("売上高") < labels.index("営業利益") def test_unknown_elements_ignored(self) -> None: taxonomy = _load_taxonomy() raw = [ {"element": "Revenue", "value": 100000, "context": "Current"}, {"element": "UnknownElement", "value": 999, "context": "Current"}, ] result = _normalize_items(raw, "income_statement", taxonomy) assert len(result) == 1 def test_empty_input(self) -> None: taxonomy = _load_taxonomy() assert _normalize_items([], "income_statement", taxonomy) == [] def test_bs_normalization(self) -> None: taxonomy = _load_taxonomy() raw = [ {"element": "TotalAssets", "value": 500000, "context": "Current"}, {"element": "TotalLiabilities", "value": 300000, "context": "Current"}, ] result = _normalize_items(raw, "balance_sheet", taxonomy) labels = [r["科目"] for r in result] assert "資産合計" in labels assert "負債合計" in labels def test_cf_normalization(self) -> None: taxonomy = _load_taxonomy() raw = [ {"element": "CashFlowsFromOperatingActivities", "value": 80000, "context": "Current"}, ] result = _normalize_items(raw, "cash_flow", taxonomy) assert result[0]["科目"] == "営業活動によるキャッシュ・フロー" def test_ifrs_suffix_stripping_in_normalization(self) -> None: """IFRS suffixes should be stripped before matching taxonomy.""" taxonomy = _load_taxonomy() raw = [ { "element": "TotalAssetsIFRSSummaryOfBusinessResults", "value": 90000000, "context": "CurrentYearInstant", }, { "element": "TotalAssetsIFRSSummaryOfBusinessResults", "value": 80000000, "context": "Prior1YearInstant", }, ] result = _normalize_items(raw, "balance_sheet", taxonomy) assert len(result) >= 1 assert result[0]["科目"] == "資産合計" assert result[0]["当期"] == 90000000 assert result[0]["前期"] == 80000000 def test_ifrs_cf_normalization(self) -> None: """IFRS CF elements should be normalized after suffix stripping.""" taxonomy = _load_taxonomy() raw = [ { "element": "CashFlowsFromUsedInOperatingActivitiesIFRSSummaryOfBusinessResults", "value": 4000000, "context": "CurrentYearDuration", }, { "element": "NetCashProvidedByUsedInInvestingActivitiesIFRS", "value": -2000000, "context": "CurrentYearDuration", }, ] result = _normalize_items(raw, "cash_flow", taxonomy) labels = [r["科目"] for r in result] assert "営業活動によるキャッシュ・フロー" in labels assert "投資活動によるキャッシュ・フロー" in labels def test_non_consolidated_filtered_out(self) -> None: """Non-consolidated data should be skipped, keeping consolidated.""" taxonomy = _load_taxonomy() raw = [ { "element": "TotalAssets", "value": 90000000, "context": "CurrentYearInstant", }, { "element": "TotalAssets", "value": 30000000, "context": "CurrentYearInstant_NonConsolidatedMember", }, ] result = _normalize_items(raw, "balance_sheet", taxonomy) # Only consolidated value should be kept total_assets = next(r for r in result if r["科目"] == "資産合計") assert total_assets["当期"] == 90000000 class TestNormalizeStatement: def test_normalizes_all_statements(self) -> None: filing = _make_filing() stmt = FinancialStatement( filing=filing, income_statement=StatementData( items=[ {"element": "Revenue", "value": 100000, "context": "CurrentYearDuration"}, {"element": "Revenue", "value": 90000, "context": "PriorYearDuration"}, ], label="IncomeStatement", ), balance_sheet=StatementData( items=[ {"element": "TotalAssets", "value": 500000, "context": "Current"}, ], label="BalanceSheet", ), accounting_standard=AccountingStandard.IFRS, ) result = normalize_statement(stmt) # Income statement normalized assert result.income_statement["売上高"]["当期"] == 100000 assert result.income_statement["売上高"]["前期"] == 90000 # Balance sheet normalized assert result.balance_sheet["資産合計"]["当期"] == 500000 # Raw data preserved assert len(result.income_statement.raw_items) == 2 assert result.income_statement.raw_items[0]["element"] == "Revenue" # Filing preserved assert result.filing.doc_id == "S100TEST" assert result.accounting_standard == AccountingStandard.IFRS def test_empty_statement_unchanged(self) -> None: filing = _make_filing() stmt = FinancialStatement(filing=filing) result = normalize_statement(stmt) assert not result.income_statement assert not result.balance_sheet def test_unmatched_elements_keep_raw(self) -> None: filing = _make_filing() stmt = FinancialStatement( filing=filing, income_statement=StatementData( items=[{"element": "CompletelyUnknown", "value": 999}], label="IncomeStatement", ), ) result = normalize_statement(stmt) # When nothing matches, raw items are kept assert result.income_statement.items == [{"element": "CompletelyUnknown", "value": 999}] class TestStatementDataAccess: def test_getitem(self) -> None: data = StatementData( items=[ {"科目": "売上高", "当期": 100000, "前期": 90000}, {"科目": "営業利益", "当期": 20000}, ], label="IncomeStatement", ) assert data["売上高"] == {"当期": 100000, "前期": 90000} assert data["営業利益"] == {"当期": 20000} def test_getitem_missing_raises(self) -> None: import pytest data = StatementData( items=[{"科目": "売上高", "当期": 100000}], label="IncomeStatement", ) with pytest.raises(KeyError): data["存在しない科目"] def test_get_with_default(self) -> None: data = StatementData( items=[{"科目": "売上高", "当期": 100000}], label="IncomeStatement", ) assert data.get("売上高") == {"当期": 100000} assert data.get("存在しない", None) is None def test_labels(self) -> None: data = StatementData( items=[ {"科目": "売上高", "当期": 100000}, {"科目": "営業利益", "当期": 20000}, ], ) assert data.labels == ["売上高", "営業利益"] def test_labels_empty(self) -> None: data = StatementData(items=[{"element": "Revenue", "value": 100}]) assert data.labels == [] class TestGetTaxonomyLabels: def test_income_statement(self) -> None: labels = get_taxonomy_labels("income_statement") assert len(labels) > 0 assert labels[0]["label"] == "売上高" assert labels[0]["label_en"] == "Revenue" assert labels[0]["id"] == "revenue" def test_balance_sheet(self) -> None: labels = get_taxonomy_labels("balance_sheet") ids = [item["id"] for item in labels] assert "total_assets" in ids assert "net_assets" in ids def test_cash_flow(self) -> None: labels = get_taxonomy_labels("cash_flow") ids = [item["id"] for item in labels] assert "operating_cf" in ids def test_cash_flow_statement_alias(self) -> None: """'cash_flow_statement' maps to 'cash_flow' in YAML.""" labels = get_taxonomy_labels("cash_flow_statement") assert len(labels) > 0 assert labels == get_taxonomy_labels("cash_flow") def test_unknown_returns_empty(self) -> None: assert get_taxonomy_labels("nonexistent") == []

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ajtgjmdjp/edinet-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_normalize.py•16.3 KiB