Skip to main content
Glama
fetch_public_apis.py4.89 kB
import argparse import json import re import uuid from typing import Dict, List, Tuple LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)") def slugify_header(s: str) -> str: s = s.replace("`", "").replace("&", " and ") s = re.sub(r"\s+", " ", s.strip()).lower() s = re.sub(r"[^a-z0-9]+", "_", s) s = re.sub(r"_+", "_", s).strip("_") return s or "col" def strip_html(s: str) -> str: return re.sub(r"<[^>]+>", "", s) def extract_first_link(cell: str) -> Tuple[str, str]: """ Returns (display_text_without_md, href_or_None) for the first markdown link in the cell. Replaces *all* links in the display text with their visible text. """ m = LINK_RE.search(cell) href = m.group(2).strip() if m else None def repl(mo: re.Match) -> str: return mo.group(1) text = LINK_RE.sub(repl, cell) text = strip_html(text).replace("`", "") text = re.sub(r"\s+", " ", text).strip() return text, href def is_alignment_row(line: str) -> bool: """ Detects the markdown table alignment row, e.g.: |:---|:---:|---:| """ line = line.strip() if "|" not in line: return False core = line.strip("|").strip() parts = [p.strip() for p in core.split("|")] if len(parts) < 2: return False return all(re.fullmatch(r":?-{3,}:?", p) for p in parts) def split_md_row(line: str, expected: int = None) -> List[str]: line = re.sub(r"<!--.*?-->", "", line) # remove HTML comments s = line.strip().strip("|") parts = [p.strip() for p in s.split("|")] if expected is not None: if len(parts) < expected: parts += [""] * (expected - len(parts)) elif len(parts) > expected: # join extras into the last cell to keep column count stable parts = parts[: expected - 1] + [" | ".join(parts[expected - 1 :])] return parts def parse_markdown_tables(md_text: str) -> List[Dict]: lines = md_text.splitlines() results: List[Dict] = [] category = None i, n = 0, len(lines) while i < n: line = lines[i] # Track the most recent H2–H6 heading as the category mh = re.match(r"^(#{2,6})\s+(.+?)\s*$", line.strip()) if mh: cat_text = mh.group(2) cat_text = LINK_RE.sub(lambda m: m.group(1), cat_text) cat_text = strip_html(cat_text).strip() # Skip APILayer APIs header if cat_text != "APILayer APIs": category = cat_text i += 1 continue # Detect a table by "header line" followed by an alignment row if i + 1 < n and is_alignment_row(lines[i + 1]): header_cells = [c.strip() for c in split_md_row(line)] headers = [slugify_header(h) for h in header_cells] i += 2 # move to first data row while i < n: row = lines[i] if not row.strip(): break if re.match(r"^\s*#{1,6}\s+", row): # next heading starts break if is_alignment_row(row): # stray alignment row i += 1 continue if "|" not in row: break cells = split_md_row(row, expected=len(headers)) rec: Dict = {} for h, cell in zip(headers, cells): text, href = extract_first_link(cell) rec[h] = text if href: rec[f"{h}_link"] = href # Stable unique id derived from category + key fields + full row content base = f"{category or ''}|{rec.get('api', '')}|{rec.get('api_link', '')}|{'||'.join(cells)}" rec["id"] = str(uuid.uuid5(uuid.NAMESPACE_URL, base)) rec["category"] = category or "Uncategorized" results.append(rec) i += 1 continue i += 1 return results def main(): parser = argparse.ArgumentParser( description="Parse API tables from markdown into JSON." ) parser.add_argument( "-i", "--input", help="Markdown file path (defaults to stdin)", required=True ) parser.add_argument( "-o", "--output", help="Output JSON file path (defaults to stdout)" ) parser.add_argument( "--indent", type=int, default=2, help="JSON indent (default: 2)" ) args = parser.parse_args() with open(args.input, "r", encoding="utf-8") as f: md_text = f.read() records = parse_markdown_tables(md_text) out_json = json.dumps(records, ensure_ascii=False, indent=args.indent) if args.output: with open(args.output, "w", encoding="utf-8") as f: f.write(out_json) else: print(out_json) if __name__ == "__main__": main()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/zazencodes/public-apis-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server