DocumentationIndex.php•23.8 kB
<?php
declare(strict_types=1);
namespace OpenFGA\MCP\Documentation;
use RuntimeException;
use function array_slice;
use function count;
use function is_string;
use function strlen;
final class DocumentationIndex
{
    private const int CHUNK_SIZE_LIMIT = 5000;
    private const string DOCS_PATH = __DIR__ . '/../../docs';
    /**
     * @var array<string, array{id: string, sdk: string, content: string, metadata: array{section: string|null, class: string|null, method: string|null, line_count: int, size_bytes: int}, prev_chunk?: string, next_chunk?: string}>
     */
    private array $chunks = [];
    /**
     * @var array<string, array{name: string, file: string, sections: array<string, array{line_start: int, chunks: array<string>}>, classes: array<string, array{namespace: string|null, methods: array<string, array{signature: string|null, parameters: array<mixed>, returns: string|null, chunk_id: string|null}>, chunk_id: string|null}>, chunks: array<string>, source: string|null, generated: string|null}>
     */
    private array $index = [];
    private bool $initialized = false;
    /**
     * @var array<string>
     */
    private array $sdkList = [];
    /**
     * @param string $chunkId
     *
     * @throws RuntimeException
     *
     * @return array{id: string, sdk: string, content: string, metadata: array{section: string|null, class: string|null, method: string|null, line_count: int, size_bytes: int}, prev_chunk?: string, next_chunk?: string}|null
     */
    public function getChunk(string $chunkId): ?array
    {
        $this->ensureInitialized();
        return $this->chunks[$chunkId] ?? null;
    }
    /**
     * Get a specific chunk by its ID.
     *
     * @param  string                                                                                                                                                                                                           $chunkId
     * @return array{id: string, sdk: string, content: string, metadata: array{section: string|null, class: string|null, method: string|null, line_count: int, size_bytes: int}, prev_chunk?: string, next_chunk?: string}|null
     */
    public function getChunkById(string $chunkId): ?array
    {
        return $this->chunks[$chunkId] ?? null;
    }
    /**
     * @param string $sdk
     * @param string $section
     *
     * @throws RuntimeException
     *
     * @return array<array{id: string, sdk: string, content: string, metadata: array{section: string|null, class: string|null, method: string|null, line_count: int, size_bytes: int}, prev_chunk?: string, next_chunk?: string}>
     */
    public function getChunksBySection(string $sdk, string $section): array
    {
        $this->ensureInitialized();
        $sdkKey = strtolower($sdk);
        if (! isset($this->index[$sdkKey]['sections'][$section])) {
            return [];
        }
        $sectionData = $this->index[$sdkKey]['sections'][$section] ?? null;
        if (null === $sectionData) {
            return [];
        }
        $chunkIds = $sectionData['chunks'];
        $chunks = [];
        foreach ($chunkIds as $chunkId) {
            if (isset($this->chunks[$chunkId])) {
                $chunks[] = $this->chunks[$chunkId];
            }
        }
        return $chunks;
    }
    /**
     * @param string $sdk
     * @param string $className
     *
     * @throws RuntimeException
     *
     * @return array{class: string, sdk: string, namespace: string|null, methods: array<string, array{signature: string|null, parameters: array<mixed>, returns: string|null, chunk_id: string|null}>, content: string, metadata: array<mixed>}|null
     */
    public function getClassDocumentation(string $sdk, string $className): ?array
    {
        $this->ensureInitialized();
        $sdkKey = strtolower($sdk);
        if (! isset($this->index[$sdkKey]['classes'][$className])) {
            return null;
        }
        $classInfo = $this->index[$sdkKey]['classes'][$className];
        $chunkId = $classInfo['chunk_id'];
        if (null === $chunkId || ! isset($this->chunks[$chunkId])) {
            return null;
        }
        $chunk = $this->chunks[$chunkId];
        return [
            'class' => $className,
            'sdk' => $sdkKey,
            'namespace' => $classInfo['namespace'],
            'methods' => $classInfo['methods'],
            'content' => $chunk['content'],
            'metadata' => $chunk['metadata'],
        ];
    }
    /**
     * @param string $sdk
     * @param string $className
     * @param string $methodName
     *
     * @throws RuntimeException
     *
     * @return array{method: string, class: string, sdk: string, signature: string|null, parameters: array<mixed>, returns: string|null, content: string}|null
     */
    public function getMethodDocumentation(string $sdk, string $className, string $methodName): ?array
    {
        $this->ensureInitialized();
        $classDoc = $this->getClassDocumentation($sdk, $className);
        if (null === $classDoc || ! isset($classDoc['methods'][$methodName])) {
            return null;
        }
        $methodInfo = $classDoc['methods'][$methodName];
        $chunkId = $methodInfo['chunk_id'];
        if (null === $chunkId || ! isset($this->chunks[$chunkId])) {
            return null;
        }
        $chunk = $this->chunks[$chunkId];
        return [
            'method' => $methodName,
            'class' => $className,
            'sdk' => $sdk,
            'signature' => $methodInfo['signature'],
            'parameters' => $methodInfo['parameters'],
            'returns' => $methodInfo['returns'],
            'content' => $chunk['content'],
        ];
    }
    /**
     * @throws RuntimeException
     *
     * @return array<string>
     */
    public function getSdkList(): array
    {
        $this->ensureInitialized();
        return $this->sdkList;
    }
    /**
     * @param string $sdk
     *
     * @throws RuntimeException
     *
     * @return array{sdk: string, name: string, file: string, sections: array<string>, classes: array<string>, total_chunks: int, source: string|null, generated: string|null}|null
     */
    public function getSdkOverview(string $sdk): ?array
    {
        $this->ensureInitialized();
        $sdkKey = strtolower($sdk);
        if (! isset($this->index[$sdkKey])) {
            return null;
        }
        $sdkData = $this->index[$sdkKey];
        return [
            'sdk' => $sdkKey,
            'name' => $sdkData['name'],
            'file' => $sdkData['file'],
            'sections' => array_keys($sdkData['sections'] ?? []),
            'classes' => array_keys($sdkData['classes'] ?? []),
            'total_chunks' => count($sdkData['chunks'] ?? []),
            'source' => $sdkData['source'] ?? null,
            'generated' => $sdkData['generated'] ?? null,
        ];
    }
    /**
     * @throws RuntimeException
     */
    public function initialize(): void
    {
        if ($this->initialized) {
            return;
        }
        $this->scanDocumentationFiles();
        $this->buildIndex();
        $this->initialized = true;
    }
    public function isInitialized(): bool
    {
        return $this->initialized;
    }
    /**
     * @param string  $query
     * @param ?string $sdk
     * @param int     $limit
     *
     * @throws RuntimeException
     *
     * @return array<array{chunk_id: string, sdk: string, score: float, preview: string, metadata: array<mixed>}>
     */
    public function searchChunks(string $query, ?string $sdk = null, int $limit = 10): array
    {
        $this->ensureInitialized();
        $results = [];
        $queryLower = strtolower($query);
        foreach ($this->chunks as $chunkId => $chunk) {
            if (null !== $sdk && $chunk['sdk'] !== strtolower($sdk)) {
                continue;
            }
            $content = strtolower($chunk['content']);
            $score = $this->calculateRelevanceScore($queryLower, $content, $chunk['metadata']);
            if (0 < $score) {
                $results[] = [
                    'chunk_id' => $chunkId,
                    'sdk' => $chunk['sdk'],
                    'score' => $score,
                    'preview' => $this->generatePreview($chunk['content'], $query),
                    'metadata' => $chunk['metadata'],
                ];
            }
        }
        usort(
            $results,
            /**
             * @param array{chunk_id: string, sdk: string, score: float, preview: string, metadata: array<mixed>} $a
             * @param array{chunk_id: string, sdk: string, score: float, preview: string, metadata: array<mixed>} $b
             */
            static fn (array $a, array $b): int => $b['score'] <=> $a['score'],
        );
        return array_slice($results, 0, $limit);
    }
    private function buildIndex(): void
    {
        // Build navigation links between chunks for each SDK
        foreach ($this->index as $sdkData) {
            if (! isset($sdkData['chunks'])) {
                continue;
            }
            if ([] === $sdkData['chunks']) {
                continue;
            }
            $chunkIds = $sdkData['chunks'];
            $counter = count($chunkIds);
            for ($i = 0; $i < $counter; ++$i) {
                $currentId = $chunkIds[$i];
                if (0 < $i) {
                    $this->chunks[$currentId]['prev_chunk'] = $chunkIds[$i - 1];
                }
                if ($i < count($chunkIds) - 1) {
                    $this->chunks[$currentId]['next_chunk'] = $chunkIds[$i + 1];
                }
            }
        }
    }
    /**
     * @param array<mixed> $metadata
     * @param string       $query
     * @param string       $content
     */
    private function calculateRelevanceScore(string $query, string $content, array $metadata): float
    {
        $score = 0.0;
        $queryTerms = explode(' ', $query);
        foreach ($queryTerms as $queryTerm) {
            // Skip empty terms that can occur with multiple spaces or long queries
            if ('' === $queryTerm) {
                continue;
            }
            $termCount = substr_count($content, $queryTerm);
            $score += (float) $termCount * 1.0;
            /** @var mixed $class */
            $class = $metadata['class'] ?? null;
            if (null !== $class && is_string($class) && false !== stripos($class, $queryTerm)) {
                $score += 5.0;
            }
            /** @var mixed $method */
            $method = $metadata['method'] ?? null;
            if (null !== $method && is_string($method) && false !== stripos($method, $queryTerm)) {
                $score += 3.0;
            }
            /** @var mixed $section */
            $section = $metadata['section'] ?? null;
            if (null !== $section && is_string($section) && false !== stripos($section, $queryTerm)) {
                $score += 2.0;
            }
        }
        return $score;
    }
    /**
     * @param array<string> $lines
     * @param string        $sdk
     * @param ?string       $section
     * @param ?string       $class
     * @param ?string       $method
     */
    private function createChunk(string $sdk, array $lines, ?string $section, ?string $class, ?string $method): void
    {
        $content = implode("\n", $lines);
        $chunkId = $sdk . '_chunk_' . str_pad((string) count($this->chunks), 6, '0', STR_PAD_LEFT);
        $metadata = [
            'section' => $section,
            'class' => $class,
            'method' => $method,
            'line_count' => count($lines),
            'size_bytes' => strlen($content),
        ];
        $this->chunks[$chunkId] = [
            'id' => $chunkId,
            'sdk' => $sdk,
            'content' => $content,
            'metadata' => $metadata,
        ];
        $this->index[$sdk]['chunks'][] = $chunkId;
        if (null !== $section && isset($this->index[$sdk]['sections'][$section])) {
            $this->index[$sdk]['sections'][$section]['chunks'][] = $chunkId;
        }
        if (null !== $class && isset($this->index[$sdk]['classes'][$class])) {
            if (null === $this->index[$sdk]['classes'][$class]['chunk_id']) {
                $this->index[$sdk]['classes'][$class]['chunk_id'] = $chunkId;
            }
            if (null !== $method && isset($this->index[$sdk]['classes'][$class]['methods'][$method])) {
                $this->index[$sdk]['classes'][$class]['methods'][$method]['chunk_id'] = $chunkId;
            }
        }
    }
    /**
     * @throws RuntimeException
     */
    private function ensureInitialized(): void
    {
        if (! $this->initialized) {
            $this->initialize();
        }
    }
    private function extractClassNameFromSource(string $sourceFile): ?string
    {
        if (1 === preg_match('/\/([^\/]+)\.(php|go|py|java|cs|js|ts)$/', $sourceFile, $matches)) {
            // When preg_match returns 1, capturing groups are guaranteed to be set
            /** @var array{0: non-falsy-string, 1: non-empty-string, 2: non-empty-string} $matches */
            return $matches[1];
        }
        return null;
    }
    private function generatePreview(string $content, string $query, int $previewLength = 200): string
    {
        $queryLower = strtolower($query);
        $contentLower = strtolower($content);
        $position = strpos($contentLower, $queryLower);
        if (false === $position) {
            $queryTerms = explode(' ', $queryLower);
            foreach ($queryTerms as $queryTerm) {
                $position = strpos($contentLower, $queryTerm);
                if (false !== $position) {
                    break;
                }
            }
        }
        if (false === $position) {
            $position = 0;
        }
        $start = max(0, $position - 50);
        $end = min(strlen($content), $position + $previewLength);
        $preview = substr($content, $start, $end - $start);
        if (0 < $start) {
            $preview = '...' . ltrim($preview);
        }
        if ($end < strlen($content)) {
            return rtrim($preview) . '...';
        }
        return $preview;
    }
    private function parseDocumentationFile(string $file, string $sdk): void
    {
        $content = file_get_contents($file);
        if (false === $content) {
            return;
        }
        $lines = explode("\n", $content);
        $currentSection = null;
        $currentClass = null;
        $currentMethod = null;
        $buffer = [];
        $lineNumber = 0;
        $inSourceBlock = false;
        foreach ($lines as $line) {
            ++$lineNumber;
            if (1 === preg_match('/^> Compiled from: (.+)$/', $line, $matches)) {
                /** @var array{0: non-falsy-string, 1: non-empty-string} $matches */
                $this->index[$sdk]['source'] = trim($matches[1]);
            }
            if (1 === preg_match('/^> Generated: (.+)$/', $line, $matches)) {
                /** @var array{0: non-falsy-string, 1: non-empty-string} $matches */
                $this->index[$sdk]['generated'] = trim($matches[1]);
            }
            if (1 === preg_match('/^<!-- Source: (.+) -->$/', $line, $matches)) {
                if ([] !== $buffer) {
                    $this->createChunk($sdk, $buffer, $currentSection, $currentClass, $currentMethod);
                    $buffer = [];
                }
                $inSourceBlock = true;
                /** @var array{0: non-falsy-string, 1: non-empty-string} $matches */
                $currentClass = $this->extractClassNameFromSource(trim($matches[1]));
                continue;
            }
            if (1 === preg_match('/^<!-- End of .+ -->$/', $line)) {
                if ([] !== $buffer) {
                    $this->createChunk($sdk, $buffer, $currentSection, $currentClass, $currentMethod);
                    $buffer = [];
                }
                $inSourceBlock = false;
                $currentClass = null;
                $currentMethod = null;
                continue;
            }
            if (1 === preg_match('/^## (.+)$/', $line, $matches)) {
                if ([] !== $buffer) {
                    $this->createChunk($sdk, $buffer, $currentSection, $currentClass, $currentMethod);
                    $buffer = [];
                }
                /** @var array{0: non-falsy-string, 1: non-empty-string} $matches */
                $currentSection = trim($matches[1]);
                if (! isset($this->index[$sdk]['sections'][$currentSection])) {
                    $this->index[$sdk]['sections'][$currentSection] = [
                        'line_start' => $lineNumber,
                        'chunks' => [],
                    ];
                }
            }
            if (1 === preg_match('/^### (.+)$/', $line, $matches) && $inSourceBlock) {
                /** @var array{0: non-falsy-string, 1: non-empty-string} $matches */
                $currentClass = trim($matches[1]);
                if (! isset($this->index[$sdk]['classes'][$currentClass])) {
                    $this->index[$sdk]['classes'][$currentClass] = [
                        'namespace' => null,
                        'methods' => [],
                        'chunk_id' => null,
                    ];
                }
            }
            if (null !== $currentClass && 1 === preg_match('/^##### (.+)$/', $line, $matches)) {
                /** @var array{0: non-falsy-string, 1: non-empty-string} $matches */
                $currentMethod = trim($matches[1]);
                if (! isset($this->index[$sdk]['classes'][$currentClass]['methods'][$currentMethod])) {
                    $this->index[$sdk]['classes'][$currentClass]['methods'][$currentMethod] = [
                        'signature' => null,
                        'parameters' => [],
                        'returns' => null,
                        'chunk_id' => null,
                    ];
                }
            }
            $buffer[] = $line;
            if (self::CHUNK_SIZE_LIMIT <= count($buffer)) {
                $this->createChunk($sdk, $buffer, $currentSection, $currentClass, $currentMethod);
                $buffer = [];
            }
        }
        if ([] !== $buffer) {
            $this->createChunk($sdk, $buffer, $currentSection, $currentClass, $currentMethod);
        }
    }
    private function parseGeneralDocumentation(string $file, string $key): void
    {
        $content = file_get_contents($file);
        if (false === $content) {
            return;
        }
        $lines = explode("\n", $content);
        $currentSection = null;
        $buffer = [];
        $lineNumber = 0;
        foreach ($lines as $line) {
            ++$lineNumber;
            // Match both ## and ### headers for sections (but not #### or deeper)
            if (1 === preg_match('/^(##|###) (.+)$/', $line, $matches)) {
                if ([] !== $buffer) {
                    $this->createChunk($key, $buffer, $currentSection, null, null);
                    $buffer = [];
                }
                /** @var array{0: non-falsy-string, 1: '###'|'##', 2: non-empty-string} $matches */
                $currentSection = trim($matches[2]);
                // Handle JSX components more carefully
                // Replace <ProductName .../> with "OpenFGA"
                $cleaned = preg_replace('/<ProductName[^>]*\/>/', 'OpenFGA', $currentSection);
                $currentSection = $cleaned ?? $currentSection;
                // Clean up any remaining markdown/JSX formatting
                $currentSection = strip_tags($currentSection);
                // Remove JSX attributes like format={...}
                $cleaned = preg_replace('/\s*\{[^}]*\}\s*/', ' ', $currentSection);
                $currentSection = $cleaned ?? $currentSection;
                // Clean up any remaining < or > characters
                $currentSection = str_replace(['<', '>'], '', $currentSection);
                // Clean up multiple spaces
                $cleaned = preg_replace('/\s+/', ' ', $currentSection);
                $currentSection = $cleaned ?? $currentSection;
                $currentSection = trim($currentSection);
                // If section name is empty after cleaning, use the original with basic cleanup
                if ('' === trim($currentSection)) {
                    $currentSection = trim($matches[2]);
                    // Just remove the most problematic characters
                    $currentSection = str_replace(['<', '>', '{', '}', '/'], '', $currentSection);
                    $currentSection = trim($currentSection);
                    // If still empty, use a placeholder
                    if ('' === trim($currentSection)) {
                        $currentSection = 'Section ' . $lineNumber;
                    }
                }
                if (! isset($this->index[$key]['sections'][$currentSection])) {
                    $this->index[$key]['sections'][$currentSection] = [
                        'line_start' => $lineNumber,
                        'chunks' => [],
                    ];
                }
            }
            $buffer[] = $line;
            if (self::CHUNK_SIZE_LIMIT <= count($buffer)) {
                $this->createChunk($key, $buffer, $currentSection, null, null);
                $buffer = [];
            }
        }
        if ([] !== $buffer) {
            $this->createChunk($key, $buffer, $currentSection, null, null);
        }
    }
    /**
     * @throws RuntimeException
     */
    private function scanDocumentationFiles(): void
    {
        if (! is_dir(self::DOCS_PATH)) {
            throw new RuntimeException('Documentation directory not found: ' . self::DOCS_PATH);
        }
        $files = glob(self::DOCS_PATH . '/*.md');
        if (false === $files) {
            throw new RuntimeException('Failed to scan documentation directory');
        }
        foreach ($files as $file) {
            $filename = basename($file);
            if (1 === preg_match('/^([A-Z]+)_SDK\.md$/', $filename, $matches)) {
                /** @var array{0: non-falsy-string, 1: non-empty-string} $matches */
                $sdkName = strtolower($matches[1]);
                $this->sdkList[] = $sdkName;
                $this->index[$sdkName] = [
                    'name' => $matches[1] . ' SDK',
                    'file' => $file,
                    'sections' => [],
                    'classes' => [],
                    'chunks' => [],
                    'source' => null,
                    'generated' => null,
                ];
                $this->parseDocumentationFile($file, $sdkName);
            } elseif ('AUTHORING_OPENFGA_MODELS.md' === $filename) {
                $this->index['authoring'] = [
                    'name' => 'Model Authoring Guide',
                    'file' => $file,
                    'sections' => [],
                    'classes' => [],
                    'chunks' => [],
                    'source' => null,
                    'generated' => null,
                ];
                $this->parseGeneralDocumentation($file, 'authoring');
            } elseif ('OPENFGA_DOCS.md' === $filename) {
                $this->index['general'] = [
                    'name' => 'OpenFGA Documentation',
                    'file' => $file,
                    'sections' => [],
                    'classes' => [],
                    'chunks' => [],
                    'source' => null,
                    'generated' => null,
                ];
                $this->parseGeneralDocumentation($file, 'general');
            }
        }
    }
}