VideoDB Director

Official

Overview Schema Related Servers Score Discussions

crawl_coda_tree.py•3.94 KiB

import argparse import requests from bs4 import BeautifulSoup import json import sys # Constants DEFAULT_URL = "https://docs.videodb.io" HTML_PARSER = "html.parser" DEFAULT_SELECTOR = "data-coda-ui-id" DEFAULT_SELECTOR_VALUE = ( "page-list" # Default attribute value to locate the parent element ) def find_a_tags_with_depth(parent_tag, depth=0): """ Recursively find all <a> tags within a parent tag and track their depth level. """ results = [] for child in parent_tag.find_all(recursive=False): # Iterate over direct children if child.name == "a": results.append((child, depth)) # Store <a> tag with its depth results.extend(find_a_tags_with_depth(child, depth + 1)) # Recurse deeper return results def list_to_nested_json(data): """ Convert a list of tuples (element, depth) into a nested JSON-like structure, where items with the smallest depth are at the top level and items of the same depth become siblings. Parameters: data (list of tuple): Each tuple is (element, depth) Returns: list: A list of nested dictionaries representing the JSON structure. """ result = [] stack = [] for element, depth in data: node = { "element": element.get_text(strip=True), "href": element.get("href"), "children": [], } # Adjust the stack to match the current depth while stack and stack[-1][1] >= depth: stack.pop() if stack: parent_node, _ = stack[-1] parent_node["children"].append(node) else: result.append(node) stack.append((node, depth)) return result def fetch_and_parse(url): """ Fetch the webpage content from the given URL and parse it with BeautifulSoup. """ response = requests.get(url) if response.status_code == 200: return BeautifulSoup(response.text, HTML_PARSER) else: raise Exception( f"Failed to fetch the webpage. Status code: {response.status_code}" ) def scrape_and_save( output_file, url=DEFAULT_URL, selector=DEFAULT_SELECTOR, selector_value=DEFAULT_SELECTOR_VALUE, ): """ Scrape the webpage, convert <a> tags into a nested JSON structure, and save it to a file. Parameters: output_file (str): Path to the output JSON file. url (str): URL of the docs page to scrape. selector (str): HTML attribute name to locate the parent element. selector_value (str): Value for the attribute selector. """ soup = fetch_and_parse(url) parent_tag = soup.find(attrs={selector: selector_value}) if not parent_tag: raise Exception(f"Element with {selector}='{selector_value}' not found.") a_tags_with_levels = find_a_tags_with_depth(parent_tag) nested_json = list_to_nested_json(a_tags_with_levels) with open(output_file, "w", encoding="utf-8") as f: json.dump(nested_json, f, indent=4) def main(): parser = argparse.ArgumentParser( description="Scrape a webpage, convert <a> tags into a nested JSON structure, and save it to a file." ) parser.add_argument("output", help="Path to the output JSON file") parser.add_argument( "--url", default=DEFAULT_URL, help="URL of the docs page (default: %(default)s)" ) parser.add_argument( "--selector", default=DEFAULT_SELECTOR, help="Attribute selector to locate the parent element (default: %(default)s)", ) parser.add_argument( "--selector-value", default=DEFAULT_SELECTOR_VALUE, help="Value for the attribute selector (default: %(default)s)", ) args = parser.parse_args() try: scrape_and_save( args.output, url=args.url, selector=args.selector, selector_value=args.selector_value, ) except Exception as e: sys.exit(str(e)) if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/video-db/agent-toolkit'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

crawl_coda_tree.py•3.94 KiB