GithubFinder.php•8.06 kB
<?php
declare(strict_types=1);
namespace Butschster\ContextGenerator\Source\Github;
use Butschster\ContextGenerator\Config\Exclude\ExcludeRegistryInterface;
use Butschster\ContextGenerator\Lib\Finder\FinderInterface;
use Butschster\ContextGenerator\Lib\Finder\FinderResult;
use Butschster\ContextGenerator\Lib\GithubClient\GithubClientInterface;
use Butschster\ContextGenerator\Lib\GithubClient\Model\GithubRepository;
use Butschster\ContextGenerator\Lib\PathFilter\ContentsFilter;
use Butschster\ContextGenerator\Lib\PathFilter\ExcludePathFilter;
use Butschster\ContextGenerator\Lib\PathFilter\FilePatternFilter;
use Butschster\ContextGenerator\Lib\PathFilter\FilterInterface;
use Butschster\ContextGenerator\Lib\PathFilter\PathFilter;
use Butschster\ContextGenerator\Lib\TreeBuilder\FileTreeBuilder;
use Butschster\ContextGenerator\Lib\Variable\VariableResolver;
use Butschster\ContextGenerator\Source\Fetcher\FilterableSourceInterface;
use Butschster\ContextGenerator\Application\Logger\LoggerPrefix;
use Psr\Log\LoggerInterface;
/**
 * GitHub content finder implementation
 *
 * Fetches and filters content from GitHub repositories
 */
final class GithubFinder implements FinderInterface
{
    /**
     * Filters to apply
     *
     * @var array<FilterInterface>
     */
    private array $filters = [];
    /**
     * Create a new GitHub finder
     */
    public function __construct(
        private readonly GithubClientInterface $githubClient,
        private readonly ExcludeRegistryInterface $excludeRegistry,
        private readonly VariableResolver $variableResolver = new VariableResolver(),
        private readonly FileTreeBuilder $fileTreeBuilder = new FileTreeBuilder(),
        #[LoggerPrefix(prefix: 'github-finder')]
        private readonly ?LoggerInterface $logger = null,
    ) {}
    /**
     * Find files in a GitHub repository based on source configuration
     */
    public function find(FilterableSourceInterface $source, string $basePath = '', array $options = []): FinderResult
    {
        if (!$source instanceof GithubSource) {
            throw new \InvalidArgumentException('Source must be an instance of GithubSource');
        }
        if ($source->githubToken) {
            $this->githubClient->setToken($this->variableResolver->resolve($source->githubToken));
        }
        // Parse repository from string
        $repository = new GithubRepository($source->repository, $source->branch);
        // Initialize path filters based on source configuration
        $this->initializePathFilters($source);
        // Get source paths
        $sourcePaths = $source->sourcePaths;
        if (\is_string($sourcePaths)) {
            $sourcePaths = [$sourcePaths];
        }
        // Recursively discover all files from repository paths
        $discoveredItems = $this->discoverRepositoryItems($repository, $sourcePaths);
        // Apply path-based filters
        $filteredItems = $this->applyFilters($discoveredItems);
        // Build result structure
        $files = [];
        $this->buildResultStructure($filteredItems, $repository, $files);
        // Apply content filters
        $files = (new ContentsFilter(
            contains: $source->contains(),
            notContains: $source->notContains(),
        ))->apply($files);
        // Apply global exclusion registry
        $files = $this->applyGlobalExclusions($files);
        /** @psalm-suppress InvalidArgument */
        $tree = \array_map(static fn(GithubFileInfo $file): string => $file->getRelativePathname(), $files);
        // Create the result
        return new FinderResult(
            \array_values($files),
            $this->fileTreeBuilder->buildTree($tree, '', $options),
        );
    }
    /**
     * Apply all filters to the GitHub API response items
     */
    public function applyFilters(array $items): array
    {
        foreach ($this->filters as $filter) {
            $items = $filter->apply($items);
        }
        return $items;
    }
    /**
     * Apply global exclusion patterns to filter files
     */
    private function applyGlobalExclusions(array $files): array
    {
        return \array_filter($files, function (GithubFileInfo $file): bool {
            $path = $file->getRelativePathname();
            if ($this->excludeRegistry->shouldExclude($path)) {
                $this->logger?->debug('File excluded by global exclusion pattern', [
                    'path' => $path,
                ]);
                return false;
            }
            return true;
        });
    }
    /**
     * Initialize path filters based on source configuration
     *
     * @param FilterableSourceInterface $source Source with filter criteria
     */
    private function initializePathFilters(FilterableSourceInterface $source): void
    {
        // Clear existing filters
        $this->filters = [];
        // Add file name pattern filter
        $filePattern = $source->name();
        if ($filePattern) {
            $this->filters[] = new FilePatternFilter($filePattern);
        }
        // Add path inclusion filter
        $path = $source->path();
        if ($path) {
            $this->filters[] = new PathFilter($path);
        }
        // Add path exclusion filter
        $excludePatterns = $source->notPath();
        if ($excludePatterns) {
            $this->filters[] = new ExcludePathFilter($excludePatterns);
        }
    }
    /**
     * Discover all items from repository paths recursively
     *
     * @param GithubRepository $repository GitHub repository
     * @param array<string> $sourcePaths Source paths to discover
     * @return array<array<string, mixed>> Discovered items
     */
    private function discoverRepositoryItems(GithubRepository $repository, array $sourcePaths): array
    {
        $allItems = [];
        foreach ($sourcePaths as $path) {
            $items = $this->fetchDirectoryContents($repository, $path);
            $allItems = \array_merge($allItems, $this->traverseDirectoryRecursively($items, $repository));
        }
        return $allItems;
    }
    /**
     * Traverse directory items recursively to discover all files
     */
    private function traverseDirectoryRecursively(array $items, GithubRepository $repository): array
    {
        $result = [];
        foreach ($items as $item) {
            if (($item['type'] ?? '') === 'dir') {
                $subItems = $this->fetchDirectoryContents($repository, $item['path']);
                $result = \array_merge($result, $this->traverseDirectoryRecursively($subItems, $repository));
            } else {
                $result[] = $item;
            }
        }
        return $result;
    }
    /**
     * Build the final result structure (files and tree)
     */
    private function buildResultStructure(
        array $items,
        GithubRepository $repository,
        array &$files,
    ): void {
        foreach ($items as $item) {
            $path = $item['path'];
            try {
                $relativePath = \dirname((string) $path);
                if ($relativePath === '.') {
                    $relativePath = '';
                }
                // Add to files array
                $files[] = new GithubFileInfo(
                    $relativePath,
                    $path,
                    $item,
                    fn() => $this->fetchFileContent($repository, $path),
                );
            } catch (\Exception) {
                // Skip files that can't be processed
                continue;
            }
        }
    }
    /**
     * Fetch directory contents from GitHub API
     */
    private function fetchDirectoryContents(GithubRepository $repository, string $path = ''): array
    {
        return $this->githubClient->getContents($repository, $path);
    }
    /**
     * Fetch file content from GitHub API
     */
    private function fetchFileContent(GithubRepository $repository, string $path): string
    {
        return $this->githubClient->getFileContent($repository, $path);
    }
}