QuantClaw Data

quantclaw-data
scripts

generate_index.py•8.57 KiB

#!/usr/bin/env python3 """ Generate searchable markdown index of all QuantClaw Data modules. Output: /home/quant/.openclaw/workspace/knowledge/quantclaw-data-index.md This gets auto-indexed by zvec-watcher into vector search. """ import importlib import inspect import json import os import sys from pathlib import Path from datetime import datetime PROJECT_ROOT = Path(__file__).parent.parent MODULES_DIR = PROJECT_ROOT / "modules" OUTPUT_DIR = Path("/home/quant/.openclaw/workspace/knowledge") OUTPUT_FILE = OUTPUT_DIR / "quantclaw-data-index.md" CORRELATION_FILE = OUTPUT_DIR / "quantclaw-data-correlations.md" sys.path.insert(0, str(PROJECT_ROOT)) def get_module_info(module_path: Path) -> dict: """Extract metadata from a Python module.""" mod_name = f"modules.{module_path.stem}" info = { "name": module_path.stem, "file": str(module_path), "size": module_path.stat().st_size, "functions": [], "docstring": "", "data_sources": [], "imports": [], } try: mod = importlib.import_module(mod_name) info["docstring"] = (mod.__doc__ or "").strip() # Get all public functions for name, obj in inspect.getmembers(mod, inspect.isfunction): if name.startswith("_"): continue sig = "" try: sig = str(inspect.signature(obj)) except (ValueError, TypeError): pass doc = (obj.__doc__ or "").strip().split("\n")[0] # First line only info["functions"].append({"name": name, "signature": sig, "doc": doc}) except Exception as e: info["error"] = str(e)[:200] # Scan source for data source URLs try: source = module_path.read_text(errors="ignore") import re urls = re.findall(r'https?://[a-zA-Z0-9._/-]+', source) domains = list(set( url.split("/")[2] for url in urls if "." in url.split("/")[2] and "python" not in url and "github" not in url )) info["data_sources"] = sorted(domains)[:15] # Detect imports for line in source.split("\n"): if line.startswith("import ") or line.startswith("from "): pkg = line.split()[1].split(".")[0] if pkg not in ("os", "sys", "json", "re", "time", "datetime", "pathlib", "typing", "collections", "functools", "math"): if pkg not in info["imports"]: info["imports"].append(pkg) except Exception: pass return info def categorize_module(name: str, docstring: str) -> str: """Auto-categorize module by name/docstring.""" name_lower = name.lower() doc_lower = docstring.lower() combined = name_lower + " " + doc_lower categories = { "macro": ["gdp", "cpi", "inflation", "fed", "central_bank", "treasury", "yield_curve", "pboc", "ecb", "boj", "imf", "taylor_rule", "recession"], "equity": ["stock", "earnings", "screener", "company", "profile", "sec", "filing", "13f", "insider"], "fixed_income": ["bond", "credit", "cds", "spread", "duration", "convexity", "muni", "clo", "abs", "mbs"], "derivatives": ["option", "gex", "greeks", "vol", "straddle", "implied", "skew"], "quant": ["monte_carlo", "fama", "kalman", "black_litterman", "pairs", "cointegration", "factor", "backtest", "momentum", "mean_reversion", "stat_arb", "risk_parity", "kelly"], "crypto": ["crypto", "bitcoin", "ethereum", "defi", "nft", "liquidation", "funding_rate", "stablecoin"], "commodities": ["oil", "gold", "commodity", "opec", "eia", "usda", "agriculture", "energy", "metal", "lng", "carbon"], "fx": ["forex", "currency", "fx", "carry_trade", "intervention"], "alt_data": ["satellite", "patent", "job", "congress", "social", "sentiment", "dark_pool", "short_squeeze", "insider", "shipping"], "ml_ai": ["neural", "lstm", "transformer", "xgboost", "random_forest", "ml", "deep_learning", "anomaly", "automl"], "real_time": ["websocket", "stream", "live", "real_time", "tick", "heatmap"], } for cat, keywords in categories.items(): if any(kw in combined for kw in keywords): return cat return "other" def generate_index(): """Generate the main searchable index.""" modules = [] for f in sorted(MODULES_DIR.glob("*.py")): if f.name.startswith("__"): continue info = get_module_info(f) info["category"] = categorize_module(info["name"], info["docstring"]) modules.append(info) # Build markdown lines = [ f"# QuantClaw Data — Module Index", f"", f"> Auto-generated: {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}", f"> Total modules: {len(modules)}", f"> Search this file via Zvec for instant recall of any data source, function, or capability.", f"", ] # Summary by category cat_counts = {} for m in modules: cat_counts[m["category"]] = cat_counts.get(m["category"], 0) + 1 lines.append("## Categories") for cat, count in sorted(cat_counts.items(), key=lambda x: -x[1]): lines.append(f"- **{cat}**: {count} modules") lines.append("") # All data sources (deduplicated) all_sources = set() for m in modules: all_sources.update(m["data_sources"]) lines.append(f"## Data Sources ({len(all_sources)} unique)") for src in sorted(all_sources): lines.append(f"- {src}") lines.append("") # Per-module detail lines.append("## Modules\n") for m in modules: lines.append(f"### {m['name']}") lines.append(f"**Category:** {m['category']} | **Size:** {m['size']:,} bytes | **Functions:** {len(m['functions'])}") if m["docstring"]: lines.append(f"\n{m['docstring'][:300]}") if m["data_sources"]: lines.append(f"\n**Data sources:** {', '.join(m['data_sources'])}") if m["functions"]: lines.append("\n**Functions:**") for func in m["functions"][:10]: doc_part = f" — {func['doc']}" if func['doc'] else "" lines.append(f"- `{func['name']}{func['signature']}`{doc_part}") lines.append("") OUTPUT_DIR.mkdir(parents=True, exist_ok=True) OUTPUT_FILE.write_text("\n".join(lines)) print(f"✅ Index written: {OUTPUT_FILE} ({len(lines)} lines, {len(modules)} modules, {len(all_sources)} data sources)") return modules, all_sources def generate_correlations(modules): """Generate a correlation map showing which modules share data sources.""" lines = [ "# QuantClaw Data — Cross-Module Correlations", "", f"> Auto-generated: {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}", "> Shows which modules share data sources — useful for building composite signals.", "", ] # Build source → modules map source_to_modules = {} for m in modules: for src in m["data_sources"]: if src not in source_to_modules: source_to_modules[src] = [] source_to_modules[src].append(m["name"]) # Find modules that share sources (potential correlations) lines.append("## Shared Data Sources (modules that can be correlated)\n") for src, mods in sorted(source_to_modules.items(), key=lambda x: -len(x[1])): if len(mods) >= 2: lines.append(f"### {src} ({len(mods)} modules)") for mod in sorted(mods): lines.append(f"- {mod}") lines.append("") # Category cross-references lines.append("## Cross-Category Opportunities\n") cat_modules = {} for m in modules: if m["category"] not in cat_modules: cat_modules[m["category"]] = [] cat_modules[m["category"]].append(m["name"]) cats = list(cat_modules.keys()) for i, cat1 in enumerate(cats): for cat2 in cats[i+1:]: # Find shared data sources between categories sources1 = set() sources2 = set() for m in modules: if m["category"] == cat1: sources1.update(m["data_sources"]) elif m["category"] == cat2: sources2.update(m["data_sources"]) shared = sources1 & sources2 if shared: lines.append(f"**{cat1} ↔ {cat2}** (shared: {', '.join(list(shared)[:5])})") lines.append("") CORRELATION_FILE.write_text("\n".join(lines)) print(f"✅ Correlations written: {CORRELATION_FILE} ({len(lines)} lines)") if __name__ == "__main__": modules, sources = generate_index() generate_correlations(modules)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/yoniassia/quantclaw-data'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

generate_index.py•8.57 KiB