Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
82.89% |
315 / 380 |
|
44.00% |
11 / 25 |
CRAP | |
0.00% |
0 / 1 |
| BoxGenerator | |
82.89% |
315 / 380 |
|
44.00% |
11 / 25 |
421.45 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| generate | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
| buildElementBox | |
94.23% |
98 / 104 |
|
0.00% |
0 / 1 |
44.37 | |||
| applyPictureSourceOverride | |
90.48% |
19 / 21 |
|
0.00% |
0 / 1 |
14.17 | |||
| firstSrcsetUrl | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
| makePseudoBox | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
| resolvePseudoContent | |
83.33% |
15 / 18 |
|
0.00% |
0 / 1 |
9.37 | |||
| resolveQuotePair | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
5 | |||
| contentItemAsString | |
93.94% |
31 / 33 |
|
0.00% |
0 / 1 |
21.10 | |||
| applyCounterReset | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| applyCounterIncrement | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| forEachCounterPair | |
30.00% |
6 / 20 |
|
0.00% |
0 / 1 |
44.30 | |||
| formatCounter | |
42.86% |
3 / 7 |
|
0.00% |
0 / 1 |
16.14 | |||
| bijectiveBase26 | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
| roman | |
92.31% |
12 / 13 |
|
0.00% |
0 / 1 |
5.01 | |||
| flushInlineGroup | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
| makeBox | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
8 | |||
| displayKeyword | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
| applyPresentationalAttributes | |
76.79% |
43 / 56 |
|
0.00% |
0 / 1 |
58.03 | |||
| isAutoLength | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
| naturalImageSize | |
66.67% |
14 / 21 |
|
0.00% |
0 / 1 |
15.48 | |||
| resolveLocalImagePath | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| parseHtmlLength | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
3.33 | |||
| mixesBlockAndInline | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
5 | |||
| isInlineLevel | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
4 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Phpdftk\HtmlToPdf\Box; |
| 6 | |
| 7 | use Phpdftk\Css\Cascade\Cascade; |
| 8 | use Phpdftk\Css\Cascade\CascadedValues; |
| 9 | use Phpdftk\Css\Sheet\Stylesheet; |
| 10 | use Phpdftk\Css\Value\Keyword; |
| 11 | use Phpdftk\Html\Dom\Document; |
| 12 | use Phpdftk\Html\Dom\Element; |
| 13 | use Phpdftk\Html\Dom\Text; |
| 14 | |
| 15 | /** |
| 16 | * Walks a parsed HTML document, runs the CSS cascade against each element, |
| 17 | * and emits the box tree. |
| 18 | * |
| 19 | * Phase 1E.1 implements the common path of CSS Display 3 box generation: |
| 20 | * - `display: block` / `list-item` → {@see BlockBox} |
| 21 | * - `display: inline` → {@see InlineBox} |
| 22 | * - `display: inline-block` and replaced elements → {@see AtomicInlineBox} |
| 23 | * - `display: none` → element + subtree skipped |
| 24 | * - Text nodes inside any element → {@see TextBox} |
| 25 | * - Anonymous block wrapping per CSS Display 3 §3.4 when a block parent |
| 26 | * has mixed inline + block children |
| 27 | * |
| 28 | * Display values we don't yet generate for (table, flex, grid, ruby) fall |
| 29 | * through to BlockBox as a sensible default — layout will reject them in a |
| 30 | * dedicated message until those sub-phases ship. |
| 31 | * |
| 32 | * The flat-tree composition that Q11 calls for (slot distribution + |
| 33 | * shadow-tree traversal) lives in 1E.2; this version walks the light DOM. |
| 34 | */ |
| 35 | final class BoxGenerator |
| 36 | { |
| 37 | /** |
| 38 | * Live CSS counter state during a single `generate()` walk. Keyed by |
| 39 | * counter name, value is the current count. Reset per generate; not |
| 40 | * shared across documents. |
| 41 | * |
| 42 | * @var array<string, int> |
| 43 | */ |
| 44 | private array $counters = []; |
| 45 | |
| 46 | public function __construct( |
| 47 | private readonly Cascade $cascade = new Cascade(), |
| 48 | /** |
| 49 | * Base directory for resolving local-file `<img src>` paths when |
| 50 | * reading intrinsic image dimensions. Same posture as the |
| 51 | * painter's `baseDir`: `null` disables local-file lookups (only |
| 52 | * `data:` URLs supply natural sizes); a non-null value joins |
| 53 | * relative paths and rejects any escape via `realpath()`. |
| 54 | */ |
| 55 | private readonly ?string $baseDir = null, |
| 56 | ) {} |
| 57 | |
| 58 | /** |
| 59 | * Generate a box tree from a parsed HTML document + a list of |
| 60 | * stylesheets in their cascade-origin order. |
| 61 | * |
| 62 | * @param list<Stylesheet> $sheets |
| 63 | */ |
| 64 | public function generate(Document $document, array $sheets): ?Box |
| 65 | { |
| 66 | $root = $document->documentElement; |
| 67 | if ($root === null) { |
| 68 | return null; |
| 69 | } |
| 70 | $this->counters = []; |
| 71 | return $this->buildElementBox($root, $sheets, null); |
| 72 | } |
| 73 | |
| 74 | /** @param list<Stylesheet> $sheets */ |
| 75 | private function buildElementBox( |
| 76 | Element $element, |
| 77 | array $sheets, |
| 78 | ?CascadedValues $parentValues, |
| 79 | ): ?Box { |
| 80 | $values = $this->cascade->computeFor($sheets, $element, $parentValues); |
| 81 | $this->applyPresentationalAttributes($element, $values); |
| 82 | $display = $this->displayKeyword($values); |
| 83 | if ($display === 'none') { |
| 84 | return null; |
| 85 | } |
| 86 | |
| 87 | // CSS Generated Content 3 §2: apply counter-reset (set named |
| 88 | // counters at this scope) then counter-increment (bump them) so |
| 89 | // any `::before` content that reads `counter()` sees the post- |
| 90 | // increment value at this element's position in document order. |
| 91 | $this->applyCounterReset($values); |
| 92 | $this->applyCounterIncrement($values); |
| 93 | |
| 94 | // HTML `<br>` produces a sentinel line-break box — a hard break |
| 95 | // inside the parent inline formatting context that survives |
| 96 | // whitespace collapsing under `white-space: normal`. |
| 97 | if (strtolower($element->localName) === 'br') { |
| 98 | return new LineBreakBox($element, $values); |
| 99 | } |
| 100 | // HTML 5 `<wbr>` — a soft-break opportunity. Lower to an |
| 101 | // `InlineBox` carrying a zero-width-space TextBox so UAX #14 has |
| 102 | // a wrap point even when surrounding text doesn't. |
| 103 | if (strtolower($element->localName) === 'wbr') { |
| 104 | $inline = new InlineBox($element, $values); |
| 105 | $inline->addChild(new TextBox($element, $values, "\u{200B}")); |
| 106 | return $inline; |
| 107 | } |
| 108 | |
| 109 | // HTML 5 §4.8.3: `<img alt="...">` — until image painting lands, |
| 110 | // emit the alt text as a synthetic InlineBox + TextBox so the |
| 111 | // fallback content takes part in inline layout. Empty `alt=""` |
| 112 | // is intentional "decorative image, hide from a11y" — leave as |
| 113 | // the regular atomic inline. |
| 114 | if (strtolower($element->localName) === 'img') { |
| 115 | // HTML 5 §4.8.4.2 — when the `<img>` is wrapped in a |
| 116 | // `<picture>`, walk the preceding `<source>` siblings to |
| 117 | // pick the best one for the print medium. Phase-1 honours |
| 118 | // a `media` attribute containing `print` or `all` (or an |
| 119 | // absent media attribute, which means "all media"). |
| 120 | $this->applyPictureSourceOverride($element); |
| 121 | $alt = $element->getAttribute('alt'); |
| 122 | if ($alt !== null && $alt !== '') { |
| 123 | $inline = new InlineBox($element, $values); |
| 124 | $inline->addChild(new TextBox($element, $values, $alt)); |
| 125 | return $inline; |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | // HTML 5 §4.10.5.1: `<input type="text|email|search|...">` renders |
| 130 | // its `value` attribute as static text for print output. PDF |
| 131 | // AcroForm field generation is a Phase 2 task. Emit as an |
| 132 | // `InlineBox` so the text child flows through inline layout. |
| 133 | if (strtolower($element->localName) === 'input') { |
| 134 | $type = strtolower($element->getAttribute('type') ?? 'text'); |
| 135 | $textTypes = ['text', 'email', 'search', 'tel', 'url', 'number', 'date', 'time', 'datetime-local']; |
| 136 | if (in_array($type, $textTypes, true)) { |
| 137 | $inline = new InlineBox($element, $values); |
| 138 | $value = $element->getAttribute('value') ?? ''; |
| 139 | if ($value !== '') { |
| 140 | $inline->addChild(new TextBox($element, $values, $value)); |
| 141 | } |
| 142 | return $inline; |
| 143 | } |
| 144 | // HTML 5 §4.10.5.1.18: button-type inputs render the |
| 145 | // `value` as the button label. Phase 2 will paint them as |
| 146 | // proper PDF widget annotations; for now we just emit the |
| 147 | // label text inline. |
| 148 | $buttonTypes = ['button', 'submit', 'reset']; |
| 149 | if (in_array($type, $buttonTypes, true)) { |
| 150 | $inline = new InlineBox($element, $values); |
| 151 | $label = $element->getAttribute('value'); |
| 152 | if ($label === null || $label === '') { |
| 153 | // HTML 5 default labels when `value` is missing. |
| 154 | $label = match ($type) { |
| 155 | 'submit' => 'Submit', |
| 156 | 'reset' => 'Reset', |
| 157 | default => '', |
| 158 | }; |
| 159 | } |
| 160 | if ($label !== '') { |
| 161 | $inline->addChild(new TextBox($element, $values, $label)); |
| 162 | } |
| 163 | return $inline; |
| 164 | } |
| 165 | // Checkbox / radio — render an ASCII visual indicator so |
| 166 | // form-print output stays informative without depending on |
| 167 | // ☐/☑ glyphs in the user's font. |
| 168 | if ($type === 'checkbox' || $type === 'radio') { |
| 169 | $checked = $element->getAttribute('checked') !== null; |
| 170 | $marker = $type === 'checkbox' |
| 171 | ? ($checked ? '[x] ' : '[ ] ') |
| 172 | : ($checked ? '(o) ' : '( ) '); |
| 173 | $inline = new InlineBox($element, $values); |
| 174 | $inline->addChild(new TextBox($element, $values, $marker)); |
| 175 | return $inline; |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | // HTML 5 §4.5.27: `<wbr>` (Word Break Opportunity) is a void |
| 180 | // inline element that just marks a permissible line break. |
| 181 | // Emit a U+200B (zero-width space) text child — it has zero |
| 182 | // advance width but the line breaker recognises it as a break |
| 183 | // opportunity, so a long unbroken token wrapping a `<wbr>` |
| 184 | // can split at that point. |
| 185 | if (strtolower($element->localName) === 'wbr') { |
| 186 | $inline = new InlineBox($element, $values); |
| 187 | $inline->addChild(new TextBox($element, $values, "\u{200B}")); |
| 188 | return $inline; |
| 189 | } |
| 190 | |
| 191 | // HTML 5 §4.10.7: `<select>` renders only its currently-selected |
| 192 | // `<option>` in static print output (no dropdown widget). When |
| 193 | // no option has the `selected` attribute, the first option is |
| 194 | // implicitly selected per spec. Subsequent options are skipped |
| 195 | // so the print form just shows the chosen value. |
| 196 | if (strtolower($element->localName) === 'select') { |
| 197 | $selectedOption = null; |
| 198 | $firstOption = null; |
| 199 | foreach ($element->children() as $child) { |
| 200 | if (!$child instanceof Element) { |
| 201 | continue; |
| 202 | } |
| 203 | if (strtolower($child->localName) !== 'option') { |
| 204 | continue; |
| 205 | } |
| 206 | if ($firstOption === null) { |
| 207 | $firstOption = $child; |
| 208 | } |
| 209 | if ($selectedOption === null && $child->getAttribute('selected') !== null) { |
| 210 | $selectedOption = $child; |
| 211 | } |
| 212 | } |
| 213 | $picked = $selectedOption ?? $firstOption; |
| 214 | $inline = new InlineBox($element, $values); |
| 215 | if ($picked !== null) { |
| 216 | $text = $picked->textContent(); |
| 217 | if ($text !== '') { |
| 218 | $inline->addChild(new TextBox($element, $values, $text)); |
| 219 | } |
| 220 | } |
| 221 | return $inline; |
| 222 | } |
| 223 | |
| 224 | $box = $this->makeBox($element, $values, $display); |
| 225 | |
| 226 | // Walk children, building child boxes. Text nodes become TextBoxes. |
| 227 | // `::before` is generated content prepended to the element's own |
| 228 | // children; `::after` is appended. Both are inline boxes carrying a |
| 229 | // synthetic TextBox of the `content` string. Phase-1 supports |
| 230 | // `content: <string>` only — `attr()`, `counter()`, `open-quote` / |
| 231 | // `close-quote`, etc. fall through to the `normal` initial. |
| 232 | $rawChildren = []; |
| 233 | $before = $this->makePseudoBox($element, $sheets, $values, 'before'); |
| 234 | if ($before !== null) { |
| 235 | $rawChildren[] = $before; |
| 236 | } |
| 237 | for ($n = $element->firstChild; $n !== null; $n = $n->nextSibling) { |
| 238 | if ($n instanceof Element) { |
| 239 | $child = $this->buildElementBox($n, $sheets, $values); |
| 240 | if ($child !== null) { |
| 241 | $rawChildren[] = $child; |
| 242 | } |
| 243 | } elseif ($n instanceof Text) { |
| 244 | if ($n->data === '') { |
| 245 | continue; |
| 246 | } |
| 247 | $rawChildren[] = new TextBox($element, $values, $n->data); |
| 248 | } |
| 249 | // Comments and other node types are dropped. |
| 250 | } |
| 251 | $after = $this->makePseudoBox($element, $sheets, $values, 'after'); |
| 252 | if ($after !== null) { |
| 253 | $rawChildren[] = $after; |
| 254 | } |
| 255 | |
| 256 | // Anonymous-block wrapping per CSS Display 3 §3.4: only inside |
| 257 | // block-context parents whose children mix block + inline. |
| 258 | $needsAnonymous = $box instanceof BlockBox && $this->mixesBlockAndInline($rawChildren); |
| 259 | if (!$needsAnonymous) { |
| 260 | foreach ($rawChildren as $child) { |
| 261 | $box->addChild($child); |
| 262 | } |
| 263 | return $box; |
| 264 | } |
| 265 | |
| 266 | // Run through children; group contiguous inline-ish children under |
| 267 | // an AnonymousBlockBox sharing the parent's style. |
| 268 | $inlineGroup = []; |
| 269 | foreach ($rawChildren as $child) { |
| 270 | if ($this->isInlineLevel($child)) { |
| 271 | $inlineGroup[] = $child; |
| 272 | continue; |
| 273 | } |
| 274 | $this->flushInlineGroup($box, $values, $inlineGroup); |
| 275 | $inlineGroup = []; |
| 276 | $box->addChild($child); |
| 277 | } |
| 278 | $this->flushInlineGroup($box, $values, $inlineGroup); |
| 279 | return $box; |
| 280 | } |
| 281 | |
| 282 | /** |
| 283 | * HTML 5 §4.8.4.2 — when an `<img>` is the fallback inside a |
| 284 | * `<picture>`, the browser walks the `<source>` siblings and |
| 285 | * picks the first one whose `media` attribute matches. For |
| 286 | * print rendering: pick the first `<source>` with |
| 287 | * `media="print"` (or `media="all"` or no media attribute) and |
| 288 | * use the first URL of its `srcset` as the effective `src`. |
| 289 | * |
| 290 | * Mutates the element's `src` attribute in place — feels |
| 291 | * intrusive but means the existing painter code that reads |
| 292 | * `$element->getAttribute('src')` Just Works without any |
| 293 | * extra plumbing through the box tree. |
| 294 | */ |
| 295 | private function applyPictureSourceOverride(Element $img): void |
| 296 | { |
| 297 | $parent = $img->parentNode; |
| 298 | if (!($parent instanceof Element) |
| 299 | || strtolower($parent->localName) !== 'picture' |
| 300 | ) { |
| 301 | return; |
| 302 | } |
| 303 | foreach ($parent->children() as $sibling) { |
| 304 | if ($sibling === $img) { |
| 305 | continue; |
| 306 | } |
| 307 | if (strtolower($sibling->localName) !== 'source') { |
| 308 | continue; |
| 309 | } |
| 310 | $media = $sibling->getAttribute('media'); |
| 311 | if ($media !== null && $media !== '') { |
| 312 | $lower = strtolower(trim($media)); |
| 313 | if ($lower !== 'all' && !str_contains($lower, 'print')) { |
| 314 | continue; |
| 315 | } |
| 316 | } |
| 317 | $srcset = $sibling->getAttribute('srcset'); |
| 318 | if ($srcset === null || trim($srcset) === '') { |
| 319 | continue; |
| 320 | } |
| 321 | $url = $this->firstSrcsetUrl($srcset); |
| 322 | if ($url !== null && $url !== '') { |
| 323 | $img->setAttribute('src', $url); |
| 324 | return; |
| 325 | } |
| 326 | } |
| 327 | } |
| 328 | |
| 329 | /** |
| 330 | * Extract the first URL from a `srcset` value. Per HTML 5 |
| 331 | * §4.8.4.2.4 the syntax is `url1 [descriptor], url2 [descriptor]`. |
| 332 | * Take everything before the first comma OR first whitespace |
| 333 | * (whichever comes first) as the URL. |
| 334 | */ |
| 335 | private function firstSrcsetUrl(string $srcset): ?string |
| 336 | { |
| 337 | $first = trim(explode(',', $srcset, 2)[0] ?? ''); |
| 338 | if ($first === '') { |
| 339 | return null; |
| 340 | } |
| 341 | // URL may be followed by a descriptor (e.g. `image.png 2x`); |
| 342 | // strip everything from the first whitespace. |
| 343 | $parts = preg_split('/\s+/', $first, 2); |
| 344 | return $parts[0] ?? null; |
| 345 | } |
| 346 | |
| 347 | /** |
| 348 | * Build a pseudo-element box (`::before` / `::after`) for an element |
| 349 | * when the cascade produces a non-`none` / non-`normal` `content` |
| 350 | * value. Returns null when no rule targets the pseudo, or when the |
| 351 | * content keyword indicates no generated box. |
| 352 | * |
| 353 | * @param list<Stylesheet> $sheets |
| 354 | */ |
| 355 | private function makePseudoBox( |
| 356 | Element $element, |
| 357 | array $sheets, |
| 358 | CascadedValues $hostValues, |
| 359 | string $pseudoName, |
| 360 | ): ?Box { |
| 361 | $pseudoValues = $this->cascade->computeFor($sheets, $element, $hostValues, $pseudoName); |
| 362 | $content = $pseudoValues->get('content'); |
| 363 | $text = $this->resolvePseudoContent($content, $element, $pseudoValues); |
| 364 | if ($text === null) { |
| 365 | return null; |
| 366 | } |
| 367 | $display = $this->displayKeyword($pseudoValues); |
| 368 | // Pseudo-elements default to `inline` when no `display` rule fires. |
| 369 | $pseudo = $this->makeBox($element, $pseudoValues, $display); |
| 370 | if ($text !== '') { |
| 371 | $pseudo->addChild(new TextBox($element, $pseudoValues, $text)); |
| 372 | } |
| 373 | return $pseudo; |
| 374 | } |
| 375 | |
| 376 | /** |
| 377 | * Resolve the `content` value to a plain string. Returns null when the |
| 378 | * pseudo-element should produce no box (`none` / `normal` / unsupported |
| 379 | * generators like `counter()` / `<image>` — Phase 2). Returns the empty |
| 380 | * string when `content` is explicitly an empty string (the pseudo box |
| 381 | * still generates). |
| 382 | * |
| 383 | * Supports `<string>`, `attr(name)`, and any space-joined list of those. |
| 384 | */ |
| 385 | private function resolvePseudoContent(?\Phpdftk\Css\Value\Value $value, Element $host, CascadedValues $values): ?string |
| 386 | { |
| 387 | if ($value === null) { |
| 388 | return null; |
| 389 | } |
| 390 | if ($value instanceof Keyword) { |
| 391 | $name = strtolower($value->name); |
| 392 | if ($name === 'none' || $name === 'normal') { |
| 393 | return null; |
| 394 | } |
| 395 | // Other keywords (open-quote / close-quote / no-open-quote / |
| 396 | // no-close-quote / etc.) fall through to `contentItemAsString` |
| 397 | // which produces the right glyph. |
| 398 | } |
| 399 | $item = $this->contentItemAsString($value, $host, $values); |
| 400 | if ($item !== null) { |
| 401 | return $item; |
| 402 | } |
| 403 | if ($value instanceof \Phpdftk\Css\Value\ValueList) { |
| 404 | $out = ''; |
| 405 | foreach ($value->values as $v) { |
| 406 | $piece = $this->contentItemAsString($v, $host, $values); |
| 407 | if ($piece === null) { |
| 408 | // Unsupported component (counter/url/etc.) — bail. |
| 409 | return null; |
| 410 | } |
| 411 | $out .= $piece; |
| 412 | } |
| 413 | return $out; |
| 414 | } |
| 415 | return null; |
| 416 | } |
| 417 | |
| 418 | /** |
| 419 | * Translate a single content-list item into a plain string, returning |
| 420 | * null when the item isn't a Phase-1 supported producer. Handles |
| 421 | * `<string>`, `attr(name)`, and the `open-quote` / `close-quote` |
| 422 | * keywords. Phase-1 emits an ASCII double quote for both — full |
| 423 | * `quotes` property + nesting depth tracking lands in a follow-up. |
| 424 | */ |
| 425 | /** |
| 426 | * Resolve CSS Generated Content 3 §3.1 `quotes` to the |
| 427 | * `[openQuote, closeQuote]` pair for `open-quote` / `close-quote` |
| 428 | * content keywords. `auto` (initial value) defers to the |
| 429 | * typographic default U+201C / U+201D ("smart quotes"). An |
| 430 | * explicit value is a list of strings paired open/close — Phase-1 |
| 431 | * uses the first pair only; nested quote depth tracking lands |
| 432 | * later. |
| 433 | * |
| 434 | * @return array{0:string, 1:string} |
| 435 | */ |
| 436 | private function resolveQuotePair(CascadedValues $values): array |
| 437 | { |
| 438 | $value = $values->get('quotes'); |
| 439 | if ($value instanceof \Phpdftk\Css\Value\ValueList) { |
| 440 | $strings = []; |
| 441 | foreach ($value->values as $v) { |
| 442 | if ($v instanceof \Phpdftk\Css\Value\StringValue) { |
| 443 | $strings[] = $v->value; |
| 444 | } |
| 445 | } |
| 446 | if (count($strings) >= 2) { |
| 447 | return [$strings[0], $strings[1]]; |
| 448 | } |
| 449 | } |
| 450 | // U+201C LEFT DOUBLE QUOTATION MARK + U+201D RIGHT DOUBLE |
| 451 | // QUOTATION MARK — the typographic default for English. Other |
| 452 | // locales (German „..." / French «...») are Phase 2 once the |
| 453 | // cascade tracks `:lang()`-driven UA stylesheets. |
| 454 | return ["\u{201C}", "\u{201D}"]; |
| 455 | } |
| 456 | |
| 457 | private function contentItemAsString(\Phpdftk\Css\Value\Value $value, Element $host, CascadedValues $values): ?string |
| 458 | { |
| 459 | if ($value instanceof \Phpdftk\Css\Value\StringValue) { |
| 460 | return $value->value; |
| 461 | } |
| 462 | if ($value instanceof Keyword) { |
| 463 | $kw = strtolower($value->name); |
| 464 | if ($kw === 'open-quote' || $kw === 'close-quote') { |
| 465 | $pair = $this->resolveQuotePair($values); |
| 466 | return $kw === 'open-quote' ? $pair[0] : $pair[1]; |
| 467 | } |
| 468 | return match ($kw) { |
| 469 | 'no-open-quote', 'no-close-quote' => '', |
| 470 | default => null, |
| 471 | }; |
| 472 | } |
| 473 | if ($value instanceof \Phpdftk\Css\Value\CssFunction |
| 474 | && strtolower($value->name) === 'attr' |
| 475 | && $value->arguments !== [] |
| 476 | ) { |
| 477 | $arg = $value->arguments[0]; |
| 478 | $name = null; |
| 479 | if ($arg instanceof Keyword) { |
| 480 | $name = $arg->name; |
| 481 | } elseif ($arg instanceof \Phpdftk\Css\Value\StringValue) { |
| 482 | $name = $arg->value; |
| 483 | } |
| 484 | if ($name !== null && $name !== '') { |
| 485 | $attrValue = $host->getAttribute($name); |
| 486 | return $attrValue ?? ''; |
| 487 | } |
| 488 | } |
| 489 | if ($value instanceof \Phpdftk\Css\Value\CssFunction |
| 490 | && strtolower($value->name) === 'counter' |
| 491 | && $value->arguments !== [] |
| 492 | ) { |
| 493 | $nameArg = $value->arguments[0]; |
| 494 | if ($nameArg instanceof Keyword) { |
| 495 | $count = $this->counters[$nameArg->name] ?? 0; |
| 496 | $style = isset($value->arguments[1]) && $value->arguments[1] instanceof Keyword |
| 497 | ? strtolower($value->arguments[1]->name) |
| 498 | : 'decimal'; |
| 499 | return $this->formatCounter($count, $style); |
| 500 | } |
| 501 | } |
| 502 | return null; |
| 503 | } |
| 504 | |
| 505 | /** |
| 506 | * Apply `counter-reset: <name> [<int>]?` declarations to {@see counters}. |
| 507 | * Multiple name/value pairs in a list are supported. |
| 508 | */ |
| 509 | private function applyCounterReset(CascadedValues $values): void |
| 510 | { |
| 511 | $value = $values->get('counter-reset'); |
| 512 | $this->forEachCounterPair($value, function (string $name, int $defaultOrSpecified): void { |
| 513 | $this->counters[$name] = $defaultOrSpecified; |
| 514 | }, defaultValue: 0); |
| 515 | } |
| 516 | |
| 517 | /** |
| 518 | * Apply `counter-increment: <name> [<int>]?` declarations — bumps the |
| 519 | * named counter by the specified delta (default +1). |
| 520 | */ |
| 521 | private function applyCounterIncrement(CascadedValues $values): void |
| 522 | { |
| 523 | $value = $values->get('counter-increment'); |
| 524 | $this->forEachCounterPair($value, function (string $name, int $delta): void { |
| 525 | $this->counters[$name] = ($this->counters[$name] ?? 0) + $delta; |
| 526 | }, defaultValue: 1); |
| 527 | } |
| 528 | |
| 529 | /** |
| 530 | * Walk a `counter-reset` / `counter-increment` value and invoke the |
| 531 | * callback for each `<name> [<int>]?` pair encountered. Handles single |
| 532 | * Keyword, single Keyword + Integer, and Space-separated `ValueList` |
| 533 | * shapes. Skips when the value is the `none` keyword. |
| 534 | * |
| 535 | * @param \Closure(string, int): void $cb |
| 536 | */ |
| 537 | private function forEachCounterPair(?\Phpdftk\Css\Value\Value $value, \Closure $cb, int $defaultValue): void |
| 538 | { |
| 539 | if ($value === null |
| 540 | || ($value instanceof Keyword && strtolower($value->name) === 'none') |
| 541 | ) { |
| 542 | return; |
| 543 | } |
| 544 | if ($value instanceof Keyword) { |
| 545 | $cb($value->name, $defaultValue); |
| 546 | return; |
| 547 | } |
| 548 | if ($value instanceof \Phpdftk\Css\Value\ValueList) { |
| 549 | $items = $value->values; |
| 550 | $i = 0; |
| 551 | $n = count($items); |
| 552 | while ($i < $n) { |
| 553 | if (!($items[$i] instanceof Keyword)) { |
| 554 | $i++; |
| 555 | continue; |
| 556 | } |
| 557 | $name = $items[$i]->name; |
| 558 | if ($i + 1 < $n && $items[$i + 1] instanceof \Phpdftk\Css\Value\Integer) { |
| 559 | $cb($name, $items[$i + 1]->value); |
| 560 | $i += 2; |
| 561 | } else { |
| 562 | $cb($name, $defaultValue); |
| 563 | $i++; |
| 564 | } |
| 565 | } |
| 566 | } |
| 567 | } |
| 568 | |
| 569 | /** |
| 570 | * Format `$count` per `$style`: `decimal` / `decimal-leading-zero`, |
| 571 | * `lower-alpha` / `upper-alpha` / `lower-latin` / `upper-latin`, |
| 572 | * `lower-roman` / `upper-roman`. Other style names fall back to decimal. |
| 573 | */ |
| 574 | private function formatCounter(int $count, string $style): string |
| 575 | { |
| 576 | return match ($style) { |
| 577 | 'decimal-leading-zero' => sprintf('%02d', $count), |
| 578 | 'lower-alpha', 'lower-latin' => $this->bijectiveBase26($count, lower: true), |
| 579 | 'upper-alpha', 'upper-latin' => $this->bijectiveBase26($count, lower: false), |
| 580 | 'lower-roman' => strtolower($this->roman($count)), |
| 581 | 'upper-roman' => $this->roman($count), |
| 582 | default => (string) $count, |
| 583 | }; |
| 584 | } |
| 585 | |
| 586 | private function bijectiveBase26(int $n, bool $lower): string |
| 587 | { |
| 588 | if ($n <= 0) { |
| 589 | return (string) $n; |
| 590 | } |
| 591 | $out = ''; |
| 592 | while ($n > 0) { |
| 593 | $n--; |
| 594 | $out = chr(($lower ? ord('a') : ord('A')) + ($n % 26)) . $out; |
| 595 | $n = intdiv($n, 26); |
| 596 | } |
| 597 | return $out; |
| 598 | } |
| 599 | |
| 600 | private function roman(int $n): string |
| 601 | { |
| 602 | if ($n < 1 || $n > 3999) { |
| 603 | return (string) $n; |
| 604 | } |
| 605 | $map = [ |
| 606 | 1000 => 'M', 900 => 'CM', 500 => 'D', 400 => 'CD', |
| 607 | 100 => 'C', 90 => 'XC', 50 => 'L', 40 => 'XL', |
| 608 | 10 => 'X', 9 => 'IX', 5 => 'V', 4 => 'IV', 1 => 'I', |
| 609 | ]; |
| 610 | $out = ''; |
| 611 | foreach ($map as $v => $s) { |
| 612 | while ($n >= $v) { |
| 613 | $out .= $s; |
| 614 | $n -= $v; |
| 615 | } |
| 616 | } |
| 617 | return $out; |
| 618 | } |
| 619 | |
| 620 | /** @param list<Box> $inlineGroup */ |
| 621 | private function flushInlineGroup(Box $parent, CascadedValues $values, array $inlineGroup): void |
| 622 | { |
| 623 | if ($inlineGroup === []) { |
| 624 | return; |
| 625 | } |
| 626 | $anon = new AnonymousBlockBox(null, $values); |
| 627 | foreach ($inlineGroup as $c) { |
| 628 | $anon->addChild($c); |
| 629 | } |
| 630 | $parent->addChild($anon); |
| 631 | } |
| 632 | |
| 633 | private function makeBox(Element $element, CascadedValues $values, string $display): Box |
| 634 | { |
| 635 | return match ($display) { |
| 636 | 'inline' => new InlineBox($element, $values), |
| 637 | 'inline-block', 'inline-table', 'inline-flex', 'inline-grid' |
| 638 | => new AtomicInlineBox($element, $values), |
| 639 | 'table' => new TableBox($element, $values), |
| 640 | 'table-row' => new TableRowBox($element, $values), |
| 641 | 'table-cell' => new TableCellBox($element, $values), |
| 642 | 'flex' => new FlexBox($element, $values), |
| 643 | default => new BlockBox($element, $values), |
| 644 | }; |
| 645 | } |
| 646 | |
| 647 | private function displayKeyword(CascadedValues $values): string |
| 648 | { |
| 649 | $display = $values->get('display'); |
| 650 | if ($display instanceof Keyword) { |
| 651 | return strtolower($display->name); |
| 652 | } |
| 653 | return 'inline'; |
| 654 | } |
| 655 | |
| 656 | /** |
| 657 | * Pre-CSS HTML attributes that map to CSS properties — `<img width>`, |
| 658 | * `<img height>`, `<font color>` etc. Per HTML 5 §15.3, these |
| 659 | * "presentational attributes" map into the user-agent style sheet at |
| 660 | * the lowest specificity. We apply them after the cascade so any |
| 661 | * author CSS still wins, but they provide the size to layout for |
| 662 | * elements that lack explicit `width` / `height` declarations. |
| 663 | */ |
| 664 | private function applyPresentationalAttributes(Element $element, CascadedValues $values): void |
| 665 | { |
| 666 | $tag = strtolower($element->localName); |
| 667 | if ($tag === 'img' || $tag === 'embed' || $tag === 'iframe' || $tag === 'video') { |
| 668 | foreach (['width', 'height'] as $attr) { |
| 669 | if ($values->has($attr) && !$this->isAutoLength($values->get($attr))) { |
| 670 | continue; // author CSS wins |
| 671 | } |
| 672 | $raw = $element->getAttribute($attr); |
| 673 | if ($raw === null) { |
| 674 | continue; |
| 675 | } |
| 676 | $px = $this->parseHtmlLength($raw); |
| 677 | if ($px !== null) { |
| 678 | $values->set($attr, new \Phpdftk\Css\Value\Length($px, \Phpdftk\Css\Value\LengthUnit::Px)); |
| 679 | } |
| 680 | } |
| 681 | // Intrinsic-dimension fallback for `<img src="data:image/...">` |
| 682 | // when neither CSS nor HTML attributes provide width/height. |
| 683 | // Decode the data URL once via ImageParser::parseString so layout |
| 684 | // gets the natural pixel dimensions, then derive missing sides |
| 685 | // from the aspect ratio when exactly one dimension is given |
| 686 | // (CSS Images 3 §3.3 "used image dimensions"). |
| 687 | if ($tag === 'img') { |
| 688 | $wValue = $values->get('width'); |
| 689 | $hValue = $values->get('height'); |
| 690 | $wUnset = !$values->has('width') || $this->isAutoLength($wValue); |
| 691 | $hUnset = !$values->has('height') || $this->isAutoLength($hValue); |
| 692 | if ($wUnset || $hUnset) { |
| 693 | $natural = $this->naturalImageSize($element->getAttribute('src')); |
| 694 | if ($natural !== null) { |
| 695 | [$nw, $nh] = $natural; |
| 696 | if ($wUnset && $hUnset) { |
| 697 | $values->set('width', new \Phpdftk\Css\Value\Length((float) $nw, \Phpdftk\Css\Value\LengthUnit::Px)); |
| 698 | $values->set('height', new \Phpdftk\Css\Value\Length((float) $nh, \Phpdftk\Css\Value\LengthUnit::Px)); |
| 699 | } elseif ($wUnset && $hValue instanceof \Phpdftk\Css\Value\Length && $nh > 0) { |
| 700 | $values->set( |
| 701 | 'width', |
| 702 | new \Phpdftk\Css\Value\Length( |
| 703 | $hValue->value * ($nw / $nh), |
| 704 | \Phpdftk\Css\Value\LengthUnit::Px, |
| 705 | ), |
| 706 | ); |
| 707 | } elseif ($hUnset && $wValue instanceof \Phpdftk\Css\Value\Length && $nw > 0) { |
| 708 | $values->set( |
| 709 | 'height', |
| 710 | new \Phpdftk\Css\Value\Length( |
| 711 | $wValue->value * ($nh / $nw), |
| 712 | \Phpdftk\Css\Value\LengthUnit::Px, |
| 713 | ), |
| 714 | ); |
| 715 | } |
| 716 | } |
| 717 | } |
| 718 | } |
| 719 | } |
| 720 | // HTML 5 §4.4.5.1: `<ol type="A">` / `"a"` / `"I"` / `"i"` / `"1"` |
| 721 | // maps to a `list-style-type` keyword. `<ul type="..."` is the |
| 722 | // older HTML 4 form; supported because real-world docs still use |
| 723 | // it. |
| 724 | if ($tag === 'ol' || $tag === 'ul') { |
| 725 | $type = $element->getAttribute('type'); |
| 726 | if ($type !== null && $type !== '') { |
| 727 | $keyword = match ($type) { |
| 728 | '1' => 'decimal', |
| 729 | 'A' => 'upper-alpha', |
| 730 | 'a' => 'lower-alpha', |
| 731 | 'I' => 'upper-roman', |
| 732 | 'i' => 'lower-roman', |
| 733 | 'disc', 'circle', 'square' => $type, |
| 734 | default => null, |
| 735 | }; |
| 736 | if ($keyword !== null) { |
| 737 | // Author CSS still wins: only apply when the cascade |
| 738 | // hasn't already set a non-default value. |
| 739 | $current = $values->get('list-style-type'); |
| 740 | $defaulted = $current instanceof Keyword |
| 741 | && in_array(strtolower($current->name), ['disc', 'decimal'], true); |
| 742 | if (!$values->has('list-style-type') || $defaulted) { |
| 743 | $values->set('list-style-type', new Keyword($keyword)); |
| 744 | } |
| 745 | } |
| 746 | } |
| 747 | } |
| 748 | } |
| 749 | |
| 750 | private function isAutoLength(?\Phpdftk\Css\Value\Value $v): bool |
| 751 | { |
| 752 | return $v instanceof Keyword && strtolower($v->name) === 'auto'; |
| 753 | } |
| 754 | |
| 755 | /** |
| 756 | * Read the natural pixel dimensions for an `<img src>` value. Returns |
| 757 | * `[width, height]` or null when the URL isn't a recognised Phase-1 |
| 758 | * variant, when local-file resolution fails security gates, or when |
| 759 | * the underlying bytes don't parse as a supported image format. |
| 760 | * |
| 761 | * Supported sources: |
| 762 | * - `data:image/{png,jpeg};base64,...` (and the rfc2397 non-base64 |
| 763 | * form) — bytes go straight to `ImageParser::parseString`. |
| 764 | * - relative or absolute filesystem paths — joined with `baseDir`, |
| 765 | * must resolve under it via `realpath()`. Stream-wrapper URLs |
| 766 | * (`http://`, `phar://`, etc.) are rejected. |
| 767 | * |
| 768 | * @return array{int, int}|null |
| 769 | */ |
| 770 | private function naturalImageSize(?string $src): ?array |
| 771 | { |
| 772 | if ($src === null || $src === '') { |
| 773 | return null; |
| 774 | } |
| 775 | if (str_starts_with($src, 'data:')) { |
| 776 | if (preg_match('~^data:image/(png|jpeg|jpg);(base64,)?(.*)$~s', $src, $m) !== 1) { |
| 777 | return null; |
| 778 | } |
| 779 | $payload = $m[2] === 'base64,' |
| 780 | ? base64_decode($m[3], strict: true) |
| 781 | : urldecode($m[3]); |
| 782 | if ($payload === false || $payload === '') { |
| 783 | return null; |
| 784 | } |
| 785 | try { |
| 786 | $info = \Phpdftk\ImageMetadata\ImageParser::parseString($payload); |
| 787 | } catch (\Throwable) { |
| 788 | return null; |
| 789 | } |
| 790 | return [$info->width, $info->height]; |
| 791 | } |
| 792 | $resolved = $this->resolveLocalImagePath($src); |
| 793 | if ($resolved === null) { |
| 794 | return null; |
| 795 | } |
| 796 | try { |
| 797 | $info = \Phpdftk\ImageMetadata\ImageParser::parse($resolved); |
| 798 | } catch (\Throwable) { |
| 799 | return null; |
| 800 | } |
| 801 | return [$info->width, $info->height]; |
| 802 | } |
| 803 | |
| 804 | /** |
| 805 | * Resolve an `<img src>` value to a real local-file path, or null |
| 806 | * when the path can't be confirmed safe. Delegates to the unified |
| 807 | * `Phpdftk\Filesystem\ResourceLoader` so the BoxGenerator and the |
| 808 | * painter share one resolver — they must agree on what's loadable |
| 809 | * (the BoxGenerator decides layout, the painter fetches the bytes). |
| 810 | */ |
| 811 | private function resolveLocalImagePath(string $src): ?string |
| 812 | { |
| 813 | return (new \Phpdftk\Filesystem\ResourceLoader($this->baseDir)) |
| 814 | ->resolveLocalPath($src); |
| 815 | } |
| 816 | |
| 817 | /** |
| 818 | * HTML legacy `width` / `height` attributes: plain integer = pixels; |
| 819 | * trailing `%` = percentage (not yet honoured here, returns null and |
| 820 | * leaves the value at whatever the cascade said). Everything else is |
| 821 | * rejected. |
| 822 | */ |
| 823 | private function parseHtmlLength(string $raw): ?float |
| 824 | { |
| 825 | $raw = trim($raw); |
| 826 | if ($raw === '') { |
| 827 | return null; |
| 828 | } |
| 829 | if (preg_match('/^\d+(?:\.\d+)?$/', $raw) === 1) { |
| 830 | return (float) $raw; |
| 831 | } |
| 832 | return null; |
| 833 | } |
| 834 | |
| 835 | /** @param list<Box> $boxes */ |
| 836 | private function mixesBlockAndInline(array $boxes): bool |
| 837 | { |
| 838 | $hasBlock = false; |
| 839 | $hasInline = false; |
| 840 | foreach ($boxes as $b) { |
| 841 | if ($this->isInlineLevel($b)) { |
| 842 | $hasInline = true; |
| 843 | } else { |
| 844 | $hasBlock = true; |
| 845 | } |
| 846 | if ($hasBlock && $hasInline) { |
| 847 | return true; |
| 848 | } |
| 849 | } |
| 850 | return false; |
| 851 | } |
| 852 | |
| 853 | private function isInlineLevel(Box $box): bool |
| 854 | { |
| 855 | return $box instanceof InlineBox |
| 856 | || $box instanceof TextBox |
| 857 | || $box instanceof AtomicInlineBox |
| 858 | || $box instanceof LineBreakBox; |
| 859 | } |
| 860 | } |