name: Index Famous Repositories
on:
workflow_dispatch:
inputs:
repos:
description: 'Repositories to index (comma-separated, e.g., numpy/numpy,pandas-dev/pandas)'
required: false
default: 'numpy/numpy,pandas-dev/pandas,tiangolo/fastapi,psf/requests,pallets/flask'
schedule:
# Run weekly on Sunday at 00:00 UTC
- cron: '0 0 * * 0'
permissions:
contents: write # Required to create releases
jobs:
index-and-release:
runs-on: ubuntu-latest
strategy:
matrix:
repo:
- owner: numpy
name: numpy
language: python
- owner: pandas-dev
name: pandas
language: python
- owner: tiangolo
name: fastapi
language: python
- owner: psf
name: requests
language: python
- owner: pallets
name: flask
language: python
fail-fast: false
steps:
- name: Checkout CodeGraphContext
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install CodeGraphContext
run: |
pip install -e .
pip install tree-sitter tree-sitter-language-pack
- name: Checkout target repository
uses: actions/checkout@v4
with:
repository: ${{ matrix.repo.owner }}/${{ matrix.repo.name }}
path: target-repo
fetch-depth: 1
- name: Get repository metadata
id: repo-meta
run: |
cd target-repo
COMMIT_SHA=$(git rev-parse HEAD)
COMMIT_SHORT=$(git rev-parse --short HEAD)
echo "commit_sha=$COMMIT_SHA" >> $GITHUB_OUTPUT
echo "commit_short=$COMMIT_SHORT" >> $GITHUB_OUTPUT
# Try to get the latest tag
TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "main")
echo "tag=$TAG" >> $GITHUB_OUTPUT
# Get current date
DATE=$(date +%Y%m%d)
echo "date=$DATE" >> $GITHUB_OUTPUT
- name: Index repository
run: |
cd target-repo
echo "Indexing ${{ matrix.repo.owner }}/${{ matrix.repo.name }}..."
cgc index .
- name: Export to .cgc bundle
id: export
run: |
BUNDLE_NAME="${{ matrix.repo.name }}-${{ steps.repo-meta.outputs.tag }}-${{ steps.repo-meta.outputs.commit_short }}.cgc"
echo "Creating bundle: $BUNDLE_NAME"
cgc bundle export "$BUNDLE_NAME" --repo "$(pwd)/target-repo"
# Store bundle name for later steps
echo "bundle_name=$BUNDLE_NAME" >> $GITHUB_OUTPUT
- name: Generate bundle info
run: |
cat > bundle-info.md << EOF
# ${{ matrix.repo.name }} - CodeGraphContext Bundle
## Repository Information
- **Repository**: ${{ matrix.repo.owner }}/${{ matrix.repo.name }}
- **Commit**: ${{ steps.repo-meta.outputs.commit_sha }}
- **Tag/Version**: ${{ steps.repo-meta.outputs.tag }}
- **Indexed**: $(date -u +"%Y-%m-%d %H:%M:%S UTC")
- **Language**: ${{ matrix.repo.language }}
## Usage
Download this bundle and load it instantly:
\`\`\`bash
# Download the bundle
wget https://github.com/${{ github.repository }}/releases/download/bundles-${{ steps.repo-meta.outputs.date }}/${{ steps.export.outputs.bundle_name }}
# Load into your database
cgc load ${{ steps.export.outputs.bundle_name }}
# Or clear existing data first
cgc load ${{ steps.export.outputs.bundle_name }} --clear
\`\`\`
## What's Inside
This bundle contains a pre-indexed graph of the ${{ matrix.repo.name }} codebase, including:
- All source files and their relationships
- Functions, classes, and variables
- Import dependencies
- Call graphs and inheritance hierarchies
## Benefits
- ⚡ **Instant loading** - No need to index the repository yourself
- 🎯 **Accurate structure** - Pre-analyzed code relationships
- 🔍 **Query ready** - Start querying immediately with MCP tools
- 📦 **Portable** - Works with any CodeGraphContext installation
## System Requirements
- CodeGraphContext v0.1.0 or later
- FalkorDB or Neo4j database
## Bundle Format
This is a `.cgc` (CodeGraphContext Bundle) file - a portable graph snapshot format.
Learn more: https://github.com/${{ github.repository }}
EOF
- name: Upload bundle as artifact
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.repo.name }}-bundle
path: |
${{ steps.export.outputs.bundle_name }}
bundle-info.md
retention-days: 7
create-release:
needs: index-and-release
runs-on: ubuntu-latest
if: success()
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Download all bundles
uses: actions/download-artifact@v4
with:
path: bundles
- name: Get current date
id: date
run: echo "date=$(date +%Y%m%d)" >> $GITHUB_OUTPUT
- name: Prepare bundle files
run: |
echo "Downloaded artifacts:"
ls -la bundles/
echo "Moving .cgc files to bundles root..."
find bundles -name "*.cgc" -exec mv {} bundles/ \;
echo "Final bundle files:"
ls -la bundles/*.cgc || echo "No .cgc files found!"
- name: Create release
uses: softprops/action-gh-release@v1
with:
tag_name: bundles-${{ steps.date.outputs.date }}
name: Pre-indexed Bundles - ${{ steps.date.outputs.date }}
body: |
# CodeGraphContext Pre-indexed Bundles
This release contains pre-indexed `.cgc` bundles for popular repositories.
## Available Bundles
- **numpy** - Fundamental package for scientific computing
- **pandas** - Data analysis and manipulation library
- **fastapi** - Modern web framework for building APIs
- **requests** - HTTP library for Python
- **flask** - Lightweight WSGI web application framework
## Quick Start
```bash
# Download a bundle
wget https://github.com/${{ github.repository }}/releases/download/bundles-${{ steps.date.outputs.date }}/numpy-*.cgc
# Load it
cgc load numpy-*.cgc
# Start querying
cgc find name array
```
## What are .cgc bundles?
`.cgc` files are portable, pre-indexed graph snapshots that can be loaded instantly
without re-indexing. They contain the complete code structure, relationships, and
metadata for a repository.
**Benefits:**
- ⚡ Instant loading (seconds vs minutes/hours)
- 🎯 Pre-analyzed code relationships
- 🔍 Query-ready for AI assistants
- 📦 Portable across installations
## Requirements
- CodeGraphContext v0.1.0+
- FalkorDB (default) or Neo4j
---
Generated: ${{ steps.date.outputs.date }}
Commit: ${{ github.sha }}
files: bundles/*.cgc
draft: false
prerelease: false
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}