SelectorContentExtractor.php•1.63 kB
<?php
declare(strict_types=1);
namespace Butschster\ContextGenerator\Lib\Html;
/**
 * Extracts content from HTML using CSS selectors
 */
final class SelectorContentExtractor implements SelectorContentExtractorInterface
{
    public function extract(string $html, string $selector): string
    {
        if (empty($html)) {
            return $html;
        }
        $dom = new \DOMDocument();
        \libxml_use_internal_errors(true);
        $dom->loadHTML($html, LIBXML_NOWARNING | LIBXML_NOERROR);
        \libxml_clear_errors();
        $xpath = new \DOMXPath($dom);
        // Convert CSS selector to XPath
        $xpathSelector = $this->cssToXPath($selector);
        $elements = $xpath->query($xpathSelector);
        if ($elements === false || $elements->length === 0) {
            return '';
        }
        $result = '';
        foreach ($elements as $element) {
            $result .= $dom->saveHTML($element) . "\n";
        }
        return $result;
    }
    /**
     * Very basic CSS to XPath converter (only handles simple selectors)
     * In a real implementation, use a proper library for this
     */
    private function cssToXPath(string $selector): string
    {
        // Handle ID selector (#id)
        if (\str_starts_with($selector, '#')) {
            return "//*[@id='" . \substr($selector, 1) . "']";
        }
        // Handle class selector (.class)
        if (\str_starts_with($selector, '.')) {
            return "//*[contains(@class, '" . \substr($selector, 1) . "')]";
        }
        // Handle element selector (div, p, etc.)
        return "//{$selector}";
    }
}