Skip to main content
Glama
Arize-ai

@arizeai/phoenix-mcp

Official
by Arize-ai
generate_sitemap.py4.58 kB
#!/usr/bin/env python3 """ Generate sitemap.xml files from docs.json navigation structure. This script parses docs.json and extracts all page URLs to create a standard sitemap.xml file. The sitemap is written to both the repository root and docs/phoenix/ directories. """ import json from datetime import datetime, timezone from pathlib import Path from typing import Any from xml.etree import ElementTree as ET def extract_pages(item: Any) -> list[str]: """ Recursively extract page paths from the docs.json navigation structure. Pages can be: - A string (direct page path) - An object with "group" and "pages" keys (nested group) - An object with "tab" and "groups" keys (tab containing groups) """ pages: list[str] = [] if isinstance(item, str): # Direct page path pages.append(item) elif isinstance(item, dict): # Check for nested pages if "pages" in item: for page in item["pages"]: pages.extend(extract_pages(page)) # Check for groups (in tabs) if "groups" in item: for group in item["groups"]: pages.extend(extract_pages(group)) # Check for tabs (in languages) if "tabs" in item: for tab in item["tabs"]: pages.extend(extract_pages(tab)) elif isinstance(item, list): for sub_item in item: pages.extend(extract_pages(sub_item)) return pages def indent_xml(elem: ET.Element, level: int = 0) -> None: """ Add indentation to XML elements for pretty printing. """ indent = "\n" + " " * level if len(elem): if not elem.text or not elem.text.strip(): elem.text = indent + " " if not elem.tail or not elem.tail.strip(): elem.tail = indent for child in elem: indent_xml(child, level + 1) if not child.tail or not child.tail.strip(): child.tail = indent else: if level and (not elem.tail or not elem.tail.strip()): elem.tail = indent def generate_sitemap_xml(urls: list[str], base_url: str = "https://arize.com") -> str: """ Generate a standard sitemap.xml string from a list of URL paths. """ # Create the root element with proper namespace urlset = ET.Element("urlset") urlset.set("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9") # Get current timestamp for lastmod (ISO 8601 format) now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S+00:00") # Add each URL for path in urls: url_element = ET.SubElement(urlset, "url") # Create the full URL - paths already include "docs/phoenix" prefix full_url = f"{base_url}/{path}" loc = ET.SubElement(url_element, "loc") loc.text = full_url lastmod = ET.SubElement(url_element, "lastmod") lastmod.text = now # Pretty print the XML indent_xml(urlset) # Convert to string with XML declaration xml_declaration = '<?xml version="1.0" encoding="UTF-8"?>\n' xml_body = ET.tostring(urlset, encoding="unicode") return xml_declaration + xml_body def main() -> None: # Determine paths script_dir = Path(__file__).parent repo_root = script_dir.parent docs_json_path = repo_root / "docs.json" docs_phoenix_dir = repo_root / "docs" / "phoenix" # Read docs.json with open(docs_json_path, encoding="utf-8") as f: docs_config = json.load(f) # Extract all pages from navigation navigation = docs_config.get("navigation", {}) languages = navigation.get("languages", []) all_pages: list[str] = [] for language in languages: pages = extract_pages(language) all_pages.extend(pages) # Remove duplicates while preserving order seen = set() unique_pages = [] for page in all_pages: if page not in seen: seen.add(page) unique_pages.append(page) print(f"Found {len(unique_pages)} unique pages") # Generate sitemap XML sitemap_xml = generate_sitemap_xml(unique_pages) # Write to both locations output_paths = [ repo_root / "sitemap.xml", docs_phoenix_dir / "sitemap.xml", ] for output_path in output_paths: # Ensure parent directory exists output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, "w", encoding="utf-8") as f: f.write(sitemap_xml) print(f"Wrote sitemap to {output_path}") if __name__ == "__main__": main()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server