Skip to main content
Glama
jafforgehq

SEO Analytics MCP

by jafforgehq

analytics_data_quality_report

Analyze data quality by comparing Google Search Console and Google Analytics 4 metrics to identify coverage gaps and top URL mismatches for SEO optimization.

Instructions

Show merge coverage and top URL mismatches between GSC and GA4.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
site_urlNo
property_idNo
start_dateNo
end_dateNo
max_rowsNo
top_n_unmatchedNo

Implementation Reference

  • The analytics_data_quality_report tool handler - an MCP tool decorated function that fetches page data from GSC and GA4, then calls build_data_quality_report() to analyze merge coverage and URL mismatches between the two data sources.
    def analytics_data_quality_report(
        site_url: str | None = None,
        property_id: str | None = None,
        start_date: str | None = None,
        end_date: str | None = None,
        max_rows: int = 50000,
        top_n_unmatched: int = 20,
    ) -> dict[str, Any]:
        """Show merge coverage and top URL mismatches between GSC and GA4."""
        data = _fetch_page_data(
            site_url,
            property_id,
            start_date,
            end_date,
            include_previous_period=False,
            max_rows=max_rows,
        )
        return {
            "ranges": data["ranges"],
            "site_url": data["site_url"],
            "property_id": data["property_id"],
            "quality": build_data_quality_report(
                data["merged_pages"],
                top_n_unmatched=max(1, top_n_unmatched),
            ),
        }
  • Tool registration in the capabilities() function - lists 'analytics_data_quality_report' among the available tools in the tool registry.
        "tools": [
            "gsc_list_sites",
            "gsc_search_analytics_raw",
            "gsc_top_pages",
            "gsc_top_queries",
            "gsc_query_page_pairs",
            "ga4_run_report_raw",
            "ga4_landing_pages",
            "ga4_channel_report",
            "analytics_merge_page_metrics",
            "analytics_generate_action_items",
            "analytics_popularity_snapshot",
            "analytics_trend_report",
            "analytics_data_quality_report",
            "analytics_query_page_opportunities",
            "analytics_topic_clusters",
        ],
    }
  • The build_data_quality_report() helper function - analyzes merged pages to categorize data coverage, identifying pages with GSC only, GA4 only, or both sources, and returns counts and top unmatched pages.
    def build_data_quality_report(
        merged_pages: list[dict[str, Any]],
        *,
        top_n_unmatched: int = 20,
    ) -> dict[str, Any]:
        pages_with_gsc = [p for p in merged_pages if float(p.get("gsc_impressions", 0.0)) > 0]
        pages_with_ga4 = [p for p in merged_pages if float(p.get("ga4_sessions", 0.0)) > 0]
        pages_with_both = [
            p
            for p in merged_pages
            if float(p.get("gsc_impressions", 0.0)) > 0 and float(p.get("ga4_sessions", 0.0)) > 0
        ]
    
        gsc_only = [
            p
            for p in merged_pages
            if float(p.get("gsc_impressions", 0.0)) > 0 and float(p.get("ga4_sessions", 0.0)) <= 0
        ]
        ga4_only = [
            p
            for p in merged_pages
            if float(p.get("ga4_sessions", 0.0)) > 0 and float(p.get("gsc_impressions", 0.0)) <= 0
        ]
    
        gsc_only_top = sorted(
            gsc_only, key=lambda p: float(p.get("gsc_impressions", 0.0)), reverse=True
        )[:top_n_unmatched]
        ga4_only_top = sorted(
            ga4_only, key=lambda p: float(p.get("ga4_sessions", 0.0)), reverse=True
        )[:top_n_unmatched]
    
        return {
            "counts": {
                "total_merged_pages": len(merged_pages),
                "pages_with_gsc": len(pages_with_gsc),
                "pages_with_ga4": len(pages_with_ga4),
                "pages_with_both": len(pages_with_both),
                "gsc_only_pages": len(gsc_only),
                "ga4_only_pages": len(ga4_only),
            },
            "top_gsc_only_pages": [
                {
                    "url": p["url"],
                    "gsc_impressions": round(float(p.get("gsc_impressions", 0.0)), 2),
                    "gsc_clicks": round(float(p.get("gsc_clicks", 0.0)), 2),
                }
                for p in gsc_only_top
            ],
            "top_ga4_only_pages": [
                {
                    "url": p["url"],
                    "ga4_sessions": round(float(p.get("ga4_sessions", 0.0)), 2),
                    "ga4_conversions": round(float(p.get("ga4_conversions", 0.0)), 2),
                }
                for p in ga4_only_top
            ],
        }
  • The _fetch_page_data() helper function - fetches and normalizes data from both GSC and GA4 connectors, then merges the page metrics together.
    def _fetch_page_data(
        site_url: str | None,
        property_id: str | None,
        start_date: str | None,
        end_date: str | None,
        *,
        include_previous_period: bool,
        max_rows: int,
    ) -> dict[str, Any]:
        settings = _get_settings()
        current_start, current_end = _default_dates(start_date, end_date)
        ranges = current_and_previous_ranges(current_start, current_end, settings.default_lookback_days)
    
        # Ensure current range reflects explicit values.
        ranges["current"] = (current_start, current_end)
    
        gsc_current: dict[str, dict[str, Any]] = {}
        gsc_previous: dict[str, dict[str, Any]] = {}
        ga4_current: dict[str, dict[str, Any]] = {}
        ga4_previous: dict[str, dict[str, Any]] = {}
    
        resolved_site_url: str | None = site_url or settings.default_gsc_site_url
        resolved_property_id = property_id or settings.default_ga4_property_id
    
        if settings.enable_gsc:
            resolved_site_url = _resolve_site_url(site_url)
            gsc = _get_gsc_connector()
            current_resp = gsc.search_analytics_all(
                resolved_site_url,
                ranges["current"][0],
                ranges["current"][1],
                dimensions=["page"],
                search_type="web",
                aggregation_type="byPage",
                max_rows=max_rows,
            )
            gsc_current = normalize_gsc_rows_by_page(
                current_resp["rows"],
                dimensions=["page"],
                base_url=settings.canonical_base_url,
            )
    
            if include_previous_period:
                prev_resp = gsc.search_analytics_all(
                    resolved_site_url,
                    ranges["previous"][0],
                    ranges["previous"][1],
                    dimensions=["page"],
                    search_type="web",
                    aggregation_type="byPage",
                    max_rows=max_rows,
                )
                gsc_previous = normalize_gsc_rows_by_page(
                    prev_resp["rows"],
                    dimensions=["page"],
                    base_url=settings.canonical_base_url,
                )
    
        if settings.enable_ga4 and resolved_property_id:
            ga4 = _get_ga4_connector()
            common_kwargs: dict[str, Any] = {
                "dimensions": ["landingPagePlusQueryString"],
                "metrics": [
                    "sessions",
                    "engagedSessions",
                    "conversions",
                    "totalUsers",
                    "screenPageViews",
                    "userEngagementDuration",
                ],
                "order_bys": [{"metric": "sessions", "desc": True}],
                "max_rows": max_rows,
            }
    
            current_resp = ga4.run_report_all(
                resolved_property_id,
                ranges["current"][0],
                ranges["current"][1],
                **common_kwargs,
            )
            ga4_current = normalize_ga4_rows_by_page(
                current_resp["rows"],
                base_url=settings.canonical_base_url,
            )
    
            if include_previous_period:
                prev_resp = ga4.run_report_all(
                    resolved_property_id,
                    ranges["previous"][0],
                    ranges["previous"][1],
                    **common_kwargs,
                )
                ga4_previous = normalize_ga4_rows_by_page(
                    prev_resp["rows"],
                    base_url=settings.canonical_base_url,
                )
    
        merged = merge_page_metrics(
            gsc_current,
            ga4_current,
            gsc_previous=gsc_previous if include_previous_period else None,
            ga4_previous=ga4_previous if include_previous_period else None,
        )
    
        return {
            "ranges": ranges,
            "site_url": resolved_site_url,
            "property_id": resolved_property_id,
            "gsc_pages": len(gsc_current),
            "ga4_pages": len(ga4_current),
            "merged_pages": merged,
        }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jafforgehq/google-analytics-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server