Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
85.76% covered (warning)
85.76%
277 / 323
52.00% covered (warning)
52.00%
13 / 25
CRAP
0.00% covered (danger)
0.00%
0 / 1
SelectorParser
85.76% covered (warning)
85.76%
277 / 323
52.00% covered (warning)
52.00%
13 / 25
249.52
0.00% covered (danger)
0.00%
0 / 1
 parse
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 parseTokens
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 __construct
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 parseComplexSelectorList
64.00% covered (warning)
64.00%
16 / 25
0.00% covered (danger)
0.00%
0 / 1
14.67
 parseComplexSelector
88.24% covered (warning)
88.24%
15 / 17
0.00% covered (danger)
0.00%
0 / 1
5.04
 parseCompoundSelector
91.67% covered (success)
91.67%
11 / 12
0.00% covered (danger)
0.00%
0 / 1
5.01
 tryParseTypeOrUniversal
100.00% covered (success)
100.00%
28 / 28
100.00% covered (success)
100.00%
1 / 1
17
 tryParseSubclassOrPseudo
93.33% covered (success)
93.33%
14 / 15
0.00% covered (danger)
0.00%
0 / 1
8.02
 parseAttributeSelector
76.36% covered (warning)
76.36%
42 / 55
0.00% covered (danger)
0.00%
0 / 1
31.61
 parseAttrMatcher
93.33% covered (success)
93.33%
14 / 15
0.00% covered (danger)
0.00%
0 / 1
12.04
 parsePseudoSelector
95.65% covered (success)
95.65%
22 / 23
0.00% covered (danger)
0.00%
0 / 1
8
 buildPseudoClassFunction
86.96% covered (warning)
86.96%
20 / 23
0.00% covered (danger)
0.00%
0 / 1
14.43
 buildPseudoElementFunction
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
 parseTokensInner
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 parseCombinator
100.00% covered (success)
100.00%
24 / 24
100.00% covered (success)
100.00%
1 / 1
10
 startsCompound
90.00% covered (success)
90.00%
9 / 10
0.00% covered (danger)
0.00%
0 / 1
7.05
 collectUntilMatchingParen
85.71% covered (warning)
85.71%
12 / 14
0.00% covered (danger)
0.00%
0 / 1
5.07
 skipWhitespace
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 skipToNextSelector
0.00% covered (danger)
0.00%
0 / 11
0.00% covered (danger)
0.00%
0 / 1
90
 eof
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 peek
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 peekAt
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 serializeTokens
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
 serializeTokenRange
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 serializeToken
100.00% covered (success)
100.00%
19 / 19
100.00% covered (success)
100.00%
1 / 1
18
1<?php
2
3declare(strict_types=1);
4
5namespace Phpdftk\Css\Selector;
6
7use Phpdftk\Css\Token\AtKeywordToken;
8use Phpdftk\Css\Token\ColonToken;
9use Phpdftk\Css\Token\CommaToken;
10use Phpdftk\Css\Token\DelimToken;
11use Phpdftk\Css\Token\DimensionToken;
12use Phpdftk\Css\Token\EofToken;
13use Phpdftk\Css\Token\FunctionToken;
14use Phpdftk\Css\Token\HashToken;
15use Phpdftk\Css\Token\HashTokenType;
16use Phpdftk\Css\Token\IdentToken;
17use Phpdftk\Css\Token\LeftBracketToken;
18use Phpdftk\Css\Token\NumberToken;
19use Phpdftk\Css\Token\NumberTokenType;
20use Phpdftk\Css\Token\RightBracketToken;
21use Phpdftk\Css\Token\RightParenToken;
22use Phpdftk\Css\Token\StringToken;
23use Phpdftk\Css\Token\Token;
24use Phpdftk\Css\Token\WhitespaceToken;
25use Phpdftk\Css\Tokenizer;
26
27/**
28 * Selectors-4 parser. Consumes the prelude token list of a style rule (or a
29 * raw string for `:is()` / `:where()` / `:not()` / `:has()` argument
30 * parsing) and produces a `SelectorList` of `ComplexSelector`s.
31 *
32 * Parsing follows the grammar in Selectors 4 ยง17 with the common-path subset
33 * needed for print rendering. Specifically supported:
34 *  - type, universal, id, class, attribute, pseudo-class, pseudo-element
35 *  - all combinators: descendant, `>`, `+`, `~`, `||`
36 *  - `:is`, `:not`, `:where`, `:has` argument lists (recursive)
37 *  - `:nth-child` family with An+B
38 *  - `:lang`, `:dir`, `:host`, `:host-context`, `::slotted`, `::part`,
39 *    `::theme` and other functional forms (parsed but match semantics live
40 *    in the matcher in Phase 1D.2).
41 *
42 * The parser is forgiving by default for top-level selector lists and
43 * non-forgiving for `:not` / `:has`. `:is` / `:where` are forgiving per spec.
44 */
45final class SelectorParser
46{
47    /** @var list<Token> */
48    private array $tokens;
49    private int $i = 0;
50    private int $count;
51
52    /** Parse a selector source string into a SelectorList. */
53    public static function parse(string $source): SelectorList
54    {
55        $tokenizer = new Tokenizer($source);
56        $tokens = array_values(array_filter(
57            $tokenizer->tokenize(),
58            static fn(Token $t): bool => !($t instanceof EofToken),
59        ));
60        return self::parseTokens($tokens, $source);
61    }
62
63    /**
64     * Parse a pre-tokenised prelude into a SelectorList. The CSS stylesheet
65     * parser calls this with the prelude tokens of a qualified rule.
66     *
67     * @param list<Token> $tokens
68     */
69    public static function parseTokens(array $tokens, string $sourceText = ''): SelectorList
70    {
71        $self = new self($tokens);
72        $selectors = $self->parseComplexSelectorList(forgiving: true);
73        return new SelectorList($sourceText, $selectors);
74    }
75
76    /** @param list<Token> $tokens */
77    private function __construct(array $tokens)
78    {
79        $this->tokens = $tokens;
80        $this->count = count($tokens);
81    }
82
83    /**
84     * `<complex-selector-list> = <complex-selector>#`
85     *
86     * @return list<ComplexSelector>
87     */
88    private function parseComplexSelectorList(bool $forgiving): array
89    {
90        $out = [];
91        while (true) {
92            $this->skipWhitespace();
93            if ($this->eof()) {
94                break;
95            }
96            $start = $this->i;
97            try {
98                $sel = $this->parseComplexSelector();
99                if ($sel !== null) {
100                    $out[] = $sel;
101                }
102            } catch (SelectorSyntaxException) {
103                if (!$forgiving) {
104                    throw new SelectorSyntaxException('Invalid selector');
105                }
106                // Skip to next comma or EOF.
107                $this->skipToNextSelector();
108            }
109            $this->skipWhitespace();
110            if ($this->eof()) {
111                break;
112            }
113            $next = $this->peek();
114            if ($next instanceof CommaToken) {
115                $this->i++;
116                continue;
117            }
118            // No comma but tokens remain โ€” recover or fail.
119            if ($this->i === $start) {
120                // Made no progress; bail out to avoid infinite loop.
121                $this->i++;
122                if (!$forgiving) {
123                    throw new SelectorSyntaxException('Unexpected token in selector list');
124                }
125            }
126        }
127        return $out;
128    }
129
130    private function parseComplexSelector(): ?ComplexSelector
131    {
132        $startTok = $this->i;
133        $compound = $this->parseCompoundSelector();
134        if ($compound === null) {
135            return null;
136        }
137        $parts = [];
138        while (true) {
139            $combinator = $this->parseCombinator();
140            if ($combinator === null) {
141                $parts[] = new CompoundSelectorWithCombinator($compound, null);
142                break;
143            }
144            $next = $this->parseCompoundSelector();
145            if ($next === null) {
146                throw new SelectorSyntaxException('Expected compound selector after combinator');
147            }
148            $parts[] = new CompoundSelectorWithCombinator($compound, $combinator);
149            $compound = $next;
150        }
151        $text = self::serializeTokenRange($this->tokens, $startTok, $this->i);
152        return new ComplexSelector($parts, trim($text));
153    }
154
155    private function parseCompoundSelector(): ?CompoundSelector
156    {
157        $components = [];
158
159        // Optional type/universal selector first.
160        $typed = $this->tryParseTypeOrUniversal();
161        if ($typed !== null) {
162            $components[] = $typed;
163        }
164        while (true) {
165            $sub = $this->tryParseSubclassOrPseudo();
166            if ($sub === null) {
167                break;
168            }
169            $components[] = $sub;
170        }
171        if ($components === []) {
172            return null;
173        }
174        return new CompoundSelector($components);
175    }
176
177    private function tryParseTypeOrUniversal(): ?SimpleSelector
178    {
179        $save = $this->i;
180        $prefix = null;
181
182        // Look for ns-prefix: ident|, *|, or |.
183        if ($this->peek() instanceof IdentToken
184            && $this->peekAt(1) instanceof DelimToken
185            && $this->peekAt(1)->value === '|'
186            && !($this->peekAt(2) instanceof DelimToken && $this->peekAt(2)->value === '=')
187        ) {
188            $prefix = $this->peek()->value;
189            $this->i += 2;
190        } elseif ($this->peek() instanceof DelimToken
191            && $this->peek()->value === '*'
192            && $this->peekAt(1) instanceof DelimToken
193            && $this->peekAt(1)->value === '|'
194        ) {
195            $prefix = '*';
196            $this->i += 2;
197        } elseif ($this->peek() instanceof DelimToken
198            && $this->peek()->value === '|'
199            && !($this->peekAt(1) instanceof DelimToken && $this->peekAt(1)->value === '|')
200        ) {
201            $prefix = '';
202            $this->i++;
203        }
204
205        $tok = $this->peek();
206        if ($tok instanceof IdentToken) {
207            $this->i++;
208            return new TypeSelector($tok->value, $prefix);
209        }
210        if ($tok instanceof DelimToken && $tok->value === '*') {
211            $this->i++;
212            return new UniversalSelector($prefix);
213        }
214        // Not a type selector โ€” rewind.
215        $this->i = $save;
216        return null;
217    }
218
219    private function tryParseSubclassOrPseudo(): ?SimpleSelector
220    {
221        $tok = $this->peek();
222        if ($tok instanceof HashToken && $tok->type === HashTokenType::Id) {
223            $this->i++;
224            return new IdSelector($tok->value);
225        }
226        if ($tok instanceof DelimToken && $tok->value === '.') {
227            if ($this->peekAt(1) instanceof IdentToken) {
228                $this->i += 2;
229                /** @var IdentToken $ident */
230                $ident = $this->tokens[$this->i - 1];
231                return new ClassSelector($ident->value);
232            }
233            return null;
234        }
235        if ($tok instanceof LeftBracketToken) {
236            return $this->parseAttributeSelector();
237        }
238        if ($tok instanceof ColonToken) {
239            return $this->parsePseudoSelector();
240        }
241        return null;
242    }
243
244    private function parseAttributeSelector(): AttributeSelector
245    {
246        // Caller verified the `[`.
247        $this->i++;
248        $this->skipWhitespace();
249        $prefix = null;
250        // Optional namespace prefix. A `|` followed by `=` is the |= match
251        // operator, not a namespace separator.
252        if ($this->peek() instanceof IdentToken
253            && $this->peekAt(1) instanceof DelimToken
254            && $this->peekAt(1)->value === '|'
255            && !($this->peekAt(2) instanceof DelimToken && $this->peekAt(2)->value === '=')
256        ) {
257            $prefix = $this->peek()->value;
258            $this->i += 2;
259        } elseif ($this->peek() instanceof DelimToken && $this->peek()->value === '*'
260            && $this->peekAt(1) instanceof DelimToken && $this->peekAt(1)->value === '|'
261            && !($this->peekAt(2) instanceof DelimToken && $this->peekAt(2)->value === '=')
262        ) {
263            $prefix = '*';
264            $this->i += 2;
265        } elseif ($this->peek() instanceof DelimToken && $this->peek()->value === '|'
266            && !($this->peekAt(1) instanceof DelimToken && in_array($this->peekAt(1)->value, ['|', '='], true))
267        ) {
268            $prefix = '';
269            $this->i++;
270        }
271
272        $nameTok = $this->peek();
273        if (!$nameTok instanceof IdentToken) {
274            throw new SelectorSyntaxException('Expected attribute name');
275        }
276        $name = $nameTok->value;
277        $this->i++;
278        $this->skipWhitespace();
279
280        // `]` โ†’ existence only.
281        if ($this->peek() instanceof RightBracketToken) {
282            $this->i++;
283            return new AttributeSelector($name, AttributeMatchType::Exists, namespacePrefix: $prefix);
284        }
285        $matcher = $this->parseAttrMatcher();
286        $this->skipWhitespace();
287        $valueTok = $this->peek();
288        $value = null;
289        if ($valueTok instanceof StringToken) {
290            $value = $valueTok->value;
291            $this->i++;
292        } elseif ($valueTok instanceof IdentToken) {
293            $value = $valueTok->value;
294            $this->i++;
295        } else {
296            throw new SelectorSyntaxException('Expected attribute value');
297        }
298        $this->skipWhitespace();
299        $ci = false;
300        $modifier = $this->peek();
301        if ($modifier instanceof IdentToken) {
302            $lower = strtolower($modifier->value);
303            if ($lower === 'i') {
304                $ci = true;
305                $this->i++;
306                $this->skipWhitespace();
307            } elseif ($lower === 's') {
308                $ci = false;
309                $this->i++;
310                $this->skipWhitespace();
311            }
312        }
313        if (!($this->peek() instanceof RightBracketToken)) {
314            throw new SelectorSyntaxException('Expected `]`');
315        }
316        $this->i++;
317        return new AttributeSelector($name, $matcher, $value, $prefix, $ci);
318    }
319
320    private function parseAttrMatcher(): AttributeMatchType
321    {
322        $tok = $this->peek();
323        if ($tok instanceof DelimToken && $tok->value === '=') {
324            $this->i++;
325            return AttributeMatchType::Equals;
326        }
327        if ($tok instanceof DelimToken && in_array($tok->value, ['~', '|', '^', '$', '*'], true)) {
328            $next = $this->peekAt(1);
329            if ($next instanceof DelimToken && $next->value === '=') {
330                $this->i += 2;
331                return match ($tok->value) {
332                    '~' => AttributeMatchType::Includes,
333                    '|' => AttributeMatchType::DashMatch,
334                    '^' => AttributeMatchType::PrefixMatch,
335                    '$' => AttributeMatchType::SuffixMatch,
336                    '*' => AttributeMatchType::SubstringMatch,
337                };
338            }
339        }
340        throw new SelectorSyntaxException('Expected attribute matcher operator');
341    }
342
343    private function parsePseudoSelector(): SimpleSelector
344    {
345        $this->i++; // consume `:`
346        $isPseudoElement = false;
347        if ($this->peek() instanceof ColonToken) {
348            $isPseudoElement = true;
349            $this->i++;
350        }
351        $next = $this->peek();
352        if ($next instanceof IdentToken) {
353            $this->i++;
354            $name = strtolower($next->value);
355            // Per CSS 2.1 legacy: ::before/::after/::first-line/::first-letter
356            // can be written with a single colon; route those to pseudo-element.
357            if (!$isPseudoElement
358                && in_array($name, ['before', 'after', 'first-line', 'first-letter'], true)
359            ) {
360                $isPseudoElement = true;
361            }
362            return $isPseudoElement
363                ? new PseudoElementSelector($name)
364                : new PseudoClassSelector($name);
365        }
366        if ($next instanceof FunctionToken) {
367            $this->i++;
368            $name = strtolower($next->name);
369            $argTokens = $this->collectUntilMatchingParen();
370            return $isPseudoElement
371                ? $this->buildPseudoElementFunction($name, $argTokens)
372                : $this->buildPseudoClassFunction($name, $argTokens);
373        }
374        throw new SelectorSyntaxException('Expected identifier or function after `:`');
375    }
376
377    /** @param list<Token> $argTokens */
378    private function buildPseudoClassFunction(string $name, array $argTokens): PseudoClassSelector
379    {
380        switch ($name) {
381            case 'is':
382            case 'where':
383            case 'has':
384            case 'not':
385                $forgiving = in_array($name, ['is', 'where'], true);
386                $inner = self::parseTokensInner($argTokens, $forgiving);
387                return new PseudoClassSelector($name, $inner);
388            case 'nth-child':
389            case 'nth-last-child':
390            case 'nth-of-type':
391            case 'nth-last-of-type':
392                [$anb, $of] = AnPlusBParser::parseWithOf($argTokens);
393                return new PseudoClassSelector($name, $of, $anb);
394            case 'lang':
395            case 'dir':
396                $argText = self::serializeTokens($argTokens);
397                return new PseudoClassSelector($name, argText: trim($argText));
398            case 'host':
399            case 'host-context':
400                $inner = self::parseTokensInner($argTokens, false);
401                return new PseudoClassSelector($name, $inner);
402            default:
403                // Unknown functional pseudo-class โ€” keep as raw text.
404                $argText = self::serializeTokens($argTokens);
405                return new PseudoClassSelector($name, argText: trim($argText));
406        }
407    }
408
409    /** @param list<Token> $argTokens */
410    private function buildPseudoElementFunction(string $name, array $argTokens): PseudoElementSelector
411    {
412        $inner = match ($name) {
413            'slotted', 'part', 'theme' => self::parseTokensInner($argTokens, false),
414            default => self::parseTokensInner($argTokens, true),
415        };
416        return new PseudoElementSelector($name, $inner);
417    }
418
419    /** @param list<Token> $tokens */
420    private static function parseTokensInner(array $tokens, bool $forgiving): SelectorList
421    {
422        $self = new self($tokens);
423        $sels = $self->parseComplexSelectorList($forgiving);
424        return new SelectorList(self::serializeTokens($tokens), $sels);
425    }
426
427    private function parseCombinator(): ?Combinator
428    {
429        $hadWhitespace = $this->skipWhitespace();
430        $tok = $this->peek();
431        if ($tok instanceof DelimToken) {
432            switch ($tok->value) {
433                case '>':
434                    $this->i++;
435                    $this->skipWhitespace();
436                    return Combinator::Child;
437                case '+':
438                    $this->i++;
439                    $this->skipWhitespace();
440                    return Combinator::NextSibling;
441                case '~':
442                    $this->i++;
443                    $this->skipWhitespace();
444                    return Combinator::SubsequentSibling;
445                case '|':
446                    if ($this->peekAt(1) instanceof DelimToken && $this->peekAt(1)->value === '|') {
447                        $this->i += 2;
448                        $this->skipWhitespace();
449                        return Combinator::Column;
450                    }
451            }
452        }
453        // Descendant: whitespace followed by something that starts a compound.
454        if ($hadWhitespace && $this->startsCompound($tok)) {
455            return Combinator::Descendant;
456        }
457        return null;
458    }
459
460    private function startsCompound(?Token $tok): bool
461    {
462        if ($tok === null) {
463            return false;
464        }
465        if ($tok instanceof IdentToken
466            || $tok instanceof HashToken
467            || $tok instanceof LeftBracketToken
468            || $tok instanceof ColonToken
469        ) {
470            return true;
471        }
472        if ($tok instanceof DelimToken) {
473            return in_array($tok->value, ['.', '*', '|'], true);
474        }
475        return false;
476    }
477
478    /**
479     * Collect tokens until the matching `)` of a function-token argument
480     * list. The opening `(` was consumed as part of the FunctionToken.
481     *
482     * @return list<Token>
483     */
484    private function collectUntilMatchingParen(): array
485    {
486        $depth = 1;
487        $out = [];
488        while ($this->i < $this->count) {
489            $t = $this->tokens[$this->i];
490            if ($t instanceof RightParenToken) {
491                $depth--;
492                if ($depth === 0) {
493                    $this->i++;
494                    return $out;
495                }
496            }
497            if ($t instanceof FunctionToken) {
498                $depth++;
499            }
500            $out[] = $t;
501            $this->i++;
502        }
503        // Implicit closer at EOF per CSS Syntax 3.
504        return $out;
505    }
506
507    private function skipWhitespace(): bool
508    {
509        $had = false;
510        while ($this->i < $this->count && $this->tokens[$this->i] instanceof WhitespaceToken) {
511            $this->i++;
512            $had = true;
513        }
514        return $had;
515    }
516
517    private function skipToNextSelector(): void
518    {
519        $depth = 0;
520        while ($this->i < $this->count) {
521            $t = $this->tokens[$this->i];
522            if ($t instanceof CommaToken && $depth === 0) {
523                return;
524            }
525            if ($t instanceof FunctionToken || $t instanceof LeftBracketToken) {
526                $depth++;
527            } elseif ($t instanceof RightParenToken || $t instanceof RightBracketToken) {
528                if ($depth > 0) {
529                    $depth--;
530                }
531            }
532            $this->i++;
533        }
534    }
535
536    private function eof(): bool
537    {
538        return $this->i >= $this->count;
539    }
540
541    private function peek(): ?Token
542    {
543        return $this->tokens[$this->i] ?? null;
544    }
545
546    private function peekAt(int $offset): ?Token
547    {
548        return $this->tokens[$this->i + $offset] ?? null;
549    }
550
551    /** @param list<Token> $tokens */
552    private static function serializeTokens(array $tokens): string
553    {
554        $out = '';
555        foreach ($tokens as $t) {
556            $out .= self::serializeToken($t);
557        }
558        return $out;
559    }
560
561    /** @param list<Token> $tokens */
562    private static function serializeTokenRange(array $tokens, int $from, int $to): string
563    {
564        $slice = array_slice($tokens, $from, $to - $from);
565        return self::serializeTokens($slice);
566    }
567
568    private static function serializeToken(Token $t): string
569    {
570        return match (true) {
571            $t instanceof IdentToken => $t->value,
572            $t instanceof AtKeywordToken => '@' . $t->value,
573            $t instanceof HashToken => '#' . $t->value,
574            $t instanceof StringToken => '"' . str_replace('"', '\\"', $t->value) . '"',
575            $t instanceof DelimToken => $t->value,
576            $t instanceof CommaToken => ',',
577            $t instanceof ColonToken => ':',
578            $t instanceof WhitespaceToken => ' ',
579            $t instanceof LeftBracketToken => '[',
580            $t instanceof RightBracketToken => ']',
581            $t instanceof FunctionToken => $t->name . '(',
582            $t instanceof RightParenToken => ')',
583            $t instanceof NumberToken => $t->type === NumberTokenType::Integer
584                ? (string) (int) $t->value
585                : (string) $t->value,
586            $t instanceof DimensionToken => ($t->type === NumberTokenType::Integer
587                ? (string) (int) $t->value
588                : (string) $t->value) . $t->unit,
589            default => '',
590        };
591    }
592}