Repomix

Overview Schema Related Servers Score Discussions

gitHubArchive.ts•6.11 KiB

import { Readable, Transform } from 'node:stream'; import { pipeline } from 'node:stream/promises'; import * as zlib from 'node:zlib'; import { extract as tarExtract } from 'tar'; import { RepomixError } from '../../shared/errorHandle.js'; import { logger } from '../../shared/logger.js'; import { buildGitHubArchiveUrl, buildGitHubMasterArchiveUrl, buildGitHubTagArchiveUrl, checkGitHubResponse, } from './gitHubArchiveApi.js'; import type { GitHubRepoInfo } from './gitRemoteParse.js'; export interface ArchiveDownloadOptions { timeout?: number; // Download timeout in milliseconds (default: 30000) retries?: number; // Number of retry attempts (default: 3) } export interface ArchiveDownloadProgress { downloaded: number; total: number | null; percentage: number | null; } export type ProgressCallback = (progress: ArchiveDownloadProgress) => void; export interface ArchiveDownloadDeps { fetch: typeof globalThis.fetch; pipeline: typeof pipeline; Transform: typeof Transform; tarExtract: typeof tarExtract; createGunzip: typeof zlib.createGunzip; } const defaultDeps: ArchiveDownloadDeps = { fetch: globalThis.fetch, pipeline, Transform, tarExtract, createGunzip: zlib.createGunzip, }; /** * Downloads and extracts a GitHub repository archive using streaming tar.gz extraction */ export const downloadGitHubArchive = async ( repoInfo: GitHubRepoInfo, targetDirectory: string, options: ArchiveDownloadOptions = {}, onProgress?: ProgressCallback, deps: ArchiveDownloadDeps = defaultDeps, ): Promise<void> => { const { timeout = 30000, retries = 3 } = options; let lastError: Error | null = null; // Try downloading with multiple URL formats: main branch, master branch (fallback), then tag format const archiveUrls = [ buildGitHubArchiveUrl(repoInfo), buildGitHubMasterArchiveUrl(repoInfo), buildGitHubTagArchiveUrl(repoInfo), ].filter(Boolean) as string[]; for (const archiveUrl of archiveUrls) { for (let attempt = 1; attempt <= retries; attempt++) { try { logger.trace(`Downloading GitHub archive from: ${archiveUrl} (attempt ${attempt}/${retries})`); await downloadAndExtractArchive(archiveUrl, targetDirectory, timeout, onProgress, deps); logger.trace('Successfully downloaded and extracted GitHub archive'); return; // Success - exit early } catch (error) { lastError = error as Error; logger.trace(`Archive download attempt ${attempt} failed:`, lastError.message); // If it's a 404-like error and we have more URLs to try, don't retry this URL const isNotFoundError = lastError instanceof RepomixError && (lastError.message.includes('not found') || lastError.message.includes('404')); if (isNotFoundError && archiveUrls.length > 1) { break; } // If it's the last attempt, don't wait if (attempt < retries) { const delay = Math.min(1000 * 2 ** (attempt - 1), 5000); // Exponential backoff, max 5s logger.trace(`Retrying in ${delay}ms...`); await new Promise((resolve) => setTimeout(resolve, delay)); } } } } // If we get here, all attempts failed throw new RepomixError( `Failed to download GitHub archive after ${retries} attempts. ${lastError?.message || 'Unknown error'}`, ); }; /** * Downloads and extracts a tar.gz archive from a single URL using streaming pipeline. * The HTTP response is streamed through gunzip and tar extract directly to disk, * without writing a temporary archive file. */ const downloadAndExtractArchive = async ( archiveUrl: string, targetDirectory: string, timeout: number, onProgress?: ProgressCallback, deps: ArchiveDownloadDeps = defaultDeps, ): Promise<void> => { const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), timeout); try { const response = await deps.fetch(archiveUrl, { signal: controller.signal, }); checkGitHubResponse(response); if (!response.body) { throw new RepomixError('No response body received'); } const totalSize = response.headers.get('content-length'); const total = totalSize ? Number.parseInt(totalSize, 10) : null; let downloaded = 0; let lastProgressUpdate = 0; const nodeStream = Readable.fromWeb(response.body); // Transform stream for progress tracking const progressStream = new deps.Transform({ transform(chunk, _encoding, callback) { downloaded += chunk.length; // Update progress at most every 100ms to avoid too frequent updates const now = Date.now(); if (onProgress && now - lastProgressUpdate > 100) { lastProgressUpdate = now; onProgress({ downloaded, total, percentage: total ? Math.round((downloaded / total) * 100) : null, }); } callback(null, chunk); }, flush(callback) { if (onProgress) { onProgress({ downloaded, total, percentage: total ? 100 : null, }); } callback(); }, }); // Stream: HTTP response -> progress tracking -> gunzip -> tar extract to disk // strip: 1 removes the top-level "repo-branch/" directory from archive paths const extractStream = deps.tarExtract({ cwd: targetDirectory, strip: 1, }); const gunzipStream = deps.createGunzip(); try { await deps.pipeline(nodeStream, progressStream, gunzipStream, extractStream); } finally { // Explicitly destroy streams to release handles. // Bun's pipeline() may not fully clean up, causing subsequent worker_threads to hang. nodeStream.destroy(); progressStream.destroy(); gunzipStream.destroy(); } } finally { clearTimeout(timeoutId); } }; /** * Checks if archive download is supported for the given repository info */ export const isArchiveDownloadSupported = (_repoInfo: GitHubRepoInfo): boolean => { // Archive download is supported for all GitHub repositories // In the future, we might add conditions here (e.g., size limits, private repos) return true; };

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/yamadashy/repomix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

gitHubArchive.ts•6.11 KiB