Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
84.83% |
917 / 1081 |
|
27.66% |
13 / 47 |
CRAP | |
0.00% |
0 / 1 |
| Renderer | |
84.83% |
917 / 1081 |
|
27.66% |
13 / 47 |
1107.89 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
| render | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| renderInto | |
91.45% |
139 / 152 |
|
0.00% |
0 / 1 |
21.28 | |||
| collectHeadings | |
100.00% |
19 / 19 |
|
100.00% |
1 / 1 |
7 | |||
| collectTextContent | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
| emitOutline | |
100.00% |
73 / 73 |
|
100.00% |
1 / 1 |
19 | |||
| collectAnchors | |
93.75% |
15 / 16 |
|
0.00% |
0 / 1 |
11.03 | |||
| parse | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| parseStylesheet | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| collectStylesheets | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
4 | |||
| expandImports | |
94.12% |
16 / 17 |
|
0.00% |
0 / 1 |
9.02 | |||
| loadImport | |
76.19% |
16 / 21 |
|
0.00% |
0 / 1 |
12.63 | |||
| fetchImportSource | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| resourceLoader | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| emitLinkAnnotations | |
94.44% |
34 / 36 |
|
0.00% |
0 / 1 |
10.02 | |||
| resolveFragmentDestination | |
92.31% |
12 / 13 |
|
0.00% |
0 / 1 |
5.01 | |||
| applyDocumentMetadata | |
94.12% |
16 / 17 |
|
0.00% |
0 / 1 |
5.01 | |||
| formatPdfDate | |
44.44% |
4 / 9 |
|
0.00% |
0 / 1 |
4.54 | |||
| findTextOfFirstElement | |
93.75% |
15 / 16 |
|
0.00% |
0 / 1 |
9.02 | |||
| findMetaContent | |
93.33% |
14 / 15 |
|
0.00% |
0 / 1 |
10.03 | |||
| extractAuthorCss | |
96.00% |
24 / 25 |
|
0.00% |
0 / 1 |
12 | |||
| collectCodepoints | |
73.68% |
28 / 38 |
|
0.00% |
0 / 1 |
11.82 | |||
| countUnpaintableImages | |
93.33% |
14 / 15 |
|
0.00% |
0 / 1 |
9.02 | |||
| isPaintableImageSrc | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
4.13 | |||
| documentHasText | |
87.50% |
14 / 16 |
|
0.00% |
0 / 1 |
8.12 | |||
| loadFontFaces | |
57.35% |
39 / 68 |
|
0.00% |
0 / 1 |
55.21 | |||
| fontFamilyName | |
17.65% |
3 / 17 |
|
0.00% |
0 / 1 |
78.58 | |||
| splitSrcList | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
7 | |||
| extractFormatHint | |
72.73% |
8 / 11 |
|
0.00% |
0 / 1 |
7.99 | |||
| loadLinkedStylesheet | |
80.00% |
12 / 15 |
|
0.00% |
0 / 1 |
10.80 | |||
| mediaPreludeMatches | |
77.78% |
7 / 9 |
|
0.00% |
0 / 1 |
8.70 | |||
| fetchFontSource | |
37.50% |
9 / 24 |
|
0.00% |
0 / 1 |
61.85 | |||
| collectPageMarginBoxes | |
88.12% |
89 / 101 |
|
0.00% |
0 / 1 |
40.42 | |||
| resolvePageSize | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
10 | |||
| resolvePageBackground | |
82.61% |
19 / 23 |
|
0.00% |
0 / 1 |
13.89 | |||
| pageSelectorAppliesTo | |
71.43% |
5 / 7 |
|
0.00% |
0 / 1 |
9.49 | |||
| resolvePageNames | |
93.33% |
14 / 15 |
|
0.00% |
0 / 1 |
9.02 | |||
| resolvePageMargins | |
90.91% |
20 / 22 |
|
0.00% |
0 / 1 |
13.13 | |||
| parsePageSize | |
82.69% |
43 / 52 |
|
0.00% |
0 / 1 |
23.29 | |||
| parseFontWeight | |
44.44% |
4 / 9 |
|
0.00% |
0 / 1 |
15.40 | |||
| parseFontStyle | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
| normalisePageSelector | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
4.03 | |||
| resolvePageMarginBoxes | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
7 | |||
| parseContentValue | |
92.86% |
26 / 28 |
|
0.00% |
0 / 1 |
13.06 | |||
| splitCounterArgs | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
4.05 | |||
| paintPageMarginBoxes | |
96.15% |
75 / 78 |
|
0.00% |
0 / 1 |
30 | |||
| maybeThrow | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Phpdftk\HtmlToPdf; |
| 6 | |
| 7 | use Phpdftk\Css\Cascade\Cascade; |
| 8 | use Phpdftk\Css\Cascade\LengthContext; |
| 9 | use Phpdftk\Css\Cascade\PropertyRegistry; |
| 10 | use Phpdftk\Css\Parser as CssParser; |
| 11 | use Phpdftk\Css\Sheet\Origin; |
| 12 | use Phpdftk\Css\Sheet\Stylesheet; |
| 13 | use Phpdftk\Html\Dom\Document; |
| 14 | use Phpdftk\Html\Parser as HtmlParser; |
| 15 | use Phpdftk\HtmlToPdf\Box\BoxGenerator; |
| 16 | use Phpdftk\HtmlToPdf\Layout\BlockLayout; |
| 17 | use Phpdftk\HtmlToPdf\Layout\LayoutContext; |
| 18 | use Phpdftk\HtmlToPdf\Painter\Painter; |
| 19 | use Phpdftk\Pdf\Writer\PdfWriter; |
| 20 | |
| 21 | /** |
| 22 | * Top-level façade for `phpdftk/html-to-pdf`. Wires parse → cascade → |
| 23 | * box generation → layout → paint into one call. Holds no state between |
| 24 | * invocations — every `render()` produces a fresh `PdfWriter`. |
| 25 | * |
| 26 | * Usage: |
| 27 | * |
| 28 | * $result = (new Renderer())->render($html, $css); |
| 29 | * $result->writer->save('out.pdf'); |
| 30 | * |
| 31 | * Or render into an existing writer (the path `Pdf::addHtml` will use): |
| 32 | * |
| 33 | * $warnings = (new Renderer())->renderInto($writer, $html, $css); |
| 34 | * |
| 35 | * Phase-1 simplifications: text is only painted when `RendererOptions` |
| 36 | * carries a `defaultFont`; without one the renderer still produces a |
| 37 | * structurally-valid PDF with background + border content. `@font-face` |
| 38 | * resolution and font-family matching land in 1M. Multi-page paginated |
| 39 | * output lands in 1I (paged media); for now the renderer fits the |
| 40 | * document onto a single page sized by `RendererOptions`. |
| 41 | */ |
| 42 | final class Renderer |
| 43 | { |
| 44 | private readonly HtmlParser $htmlParser; |
| 45 | private readonly CssParser $cssParser; |
| 46 | private readonly Cascade $cascade; |
| 47 | private readonly BoxGenerator $boxGenerator; |
| 48 | private readonly BlockLayout $layout; |
| 49 | private ?\Phpdftk\Filesystem\ResourceLoader $cachedResourceLoader = null; |
| 50 | |
| 51 | public function __construct( |
| 52 | public readonly RendererOptions $options = new RendererOptions(), |
| 53 | ) { |
| 54 | $this->htmlParser = new HtmlParser(); |
| 55 | $this->cssParser = new CssParser(); |
| 56 | $this->cascade = new Cascade(PropertyRegistry::default()); |
| 57 | $this->boxGenerator = new BoxGenerator($this->cascade, $this->options->baseDir); |
| 58 | $this->layout = new BlockLayout($this->cascade); |
| 59 | } |
| 60 | |
| 61 | /** |
| 62 | * Render `$html` (with optional author CSS) into a fresh `PdfWriter`. |
| 63 | * Returns a {@see RenderResult} carrying both the writer and any |
| 64 | * diagnostics that came up. |
| 65 | */ |
| 66 | public function render(string $html, ?string $css = null): RenderResult |
| 67 | { |
| 68 | $writer = new PdfWriter(); |
| 69 | $warnings = $this->renderInto($writer, $html, $css); |
| 70 | return new RenderResult($writer, $warnings); |
| 71 | } |
| 72 | |
| 73 | /** |
| 74 | * Render into an existing `PdfWriter`. Returns the diagnostics |
| 75 | * emitted; the writer mutation is the visible side effect. |
| 76 | * |
| 77 | * @return list<Warning> |
| 78 | */ |
| 79 | public function renderInto(PdfWriter $writer, string $html, ?string $css = null): array |
| 80 | { |
| 81 | $warnings = []; |
| 82 | |
| 83 | $document = $this->htmlParser->parseDocument($html); |
| 84 | $this->applyDocumentMetadata($document, $writer); |
| 85 | $sheets = $this->collectStylesheets($css, $document); |
| 86 | // @font-face parsing: walk every sheet for `@font-face` rules, |
| 87 | // decode their `data:font/*` sources, and merge the parsed |
| 88 | // OpenTypeData into a copy of the configured fontMap before the |
| 89 | // FontResolver gets built. Authored `@font-face` wins over any |
| 90 | // entry in `RendererOptions::fontMap` that shares its family. |
| 91 | $fontMap = $this->options->fontMap; |
| 92 | $faceWarnings = []; |
| 93 | foreach ($this->loadFontFaces($sheets, $faceWarnings) as $name => $data) { |
| 94 | $fontMap[strtolower($name)] = $data; |
| 95 | } |
| 96 | $warnings = array_merge($warnings, $faceWarnings); |
| 97 | // CSS Paged Media 3 §6.1: `@page { size: ... }` overrides the |
| 98 | // renderer's default page dimensions when set. Read it before |
| 99 | // building the layout context so block layout sees the right |
| 100 | // containing-block width / height for `%` resolution and the |
| 101 | // pagination math works against the actual page slot. |
| 102 | $pageSize = $this->resolvePageSize($sheets); |
| 103 | $pageWidth = $pageSize['width']; |
| 104 | $pageHeight = $pageSize['height']; |
| 105 | // CSS Paged Media 3 §6.2: `@page { margin: ... }` declares the |
| 106 | // page margins. Phase-1 uses the margin only for positioning the |
| 107 | // running headers/footers — the body still gets its layout origin |
| 108 | // from `body { margin }` so existing fixtures stay stable. |
| 109 | $pageMargins = $this->resolvePageMargins($sheets); |
| 110 | $root = $this->boxGenerator->generate($document, $sheets); |
| 111 | if ($root === null) { |
| 112 | $warnings[] = new Warning( |
| 113 | WarningCode::UnsupportedDisplayType, |
| 114 | 'Document has no <html> root element', |
| 115 | WarningSeverity::Error, |
| 116 | ); |
| 117 | $this->maybeThrow($warnings); |
| 118 | return $warnings; |
| 119 | } |
| 120 | |
| 121 | $fontResolver = new \Phpdftk\HtmlToPdf\Layout\FontResolver( |
| 122 | $fontMap, |
| 123 | $this->options->defaultFont, |
| 124 | $this->options->faceMap, |
| 125 | ); |
| 126 | $layoutCtx = new LayoutContext( |
| 127 | containingBlockWidth: $pageWidth, |
| 128 | containingBlockHeight: $pageHeight, |
| 129 | originX: 0.0, |
| 130 | originY: 0.0, |
| 131 | lengthContext: new LengthContext(), |
| 132 | defaultFont: $this->options->defaultFont, |
| 133 | fontResolver: $fontResolver, |
| 134 | ); |
| 135 | $this->layout->layout($root, $layoutCtx); |
| 136 | |
| 137 | // If the document contains non-whitespace text but no font was |
| 138 | // wired in, warn — the text won't render. Lenient mode still |
| 139 | // produces a valid PDF (background + border content only). |
| 140 | if ($this->options->defaultFont === null && $this->documentHasText($document)) { |
| 141 | $warnings[] = new Warning( |
| 142 | WarningCode::MissingFont, |
| 143 | 'No default font configured — text content will not render. ' |
| 144 | . 'Pass a font via RendererOptions::withDefaultFont().', |
| 145 | WarningSeverity::Warning, |
| 146 | ); |
| 147 | } |
| 148 | |
| 149 | // `<img>` without a paintable `data:image/png|jpeg` URL or `alt` |
| 150 | // fallback won't appear in the output — emit a warning per failing |
| 151 | // image so callers can surface the missing-resource state. |
| 152 | $unpaintableImgs = $this->countUnpaintableImages($document); |
| 153 | if ($unpaintableImgs > 0) { |
| 154 | $warnings[] = new Warning( |
| 155 | WarningCode::MissingResource, |
| 156 | sprintf( |
| 157 | '%d <img> element%s without an embeddable data: URL — ' |
| 158 | . 'remote / file:// image fetching lands with Phase 1L\'s ' |
| 159 | . 'resource loader. Add `alt="..."` so the fallback flows.', |
| 160 | $unpaintableImgs, |
| 161 | $unpaintableImgs === 1 ? '' : 's', |
| 162 | ), |
| 163 | WarningSeverity::Warning, |
| 164 | ); |
| 165 | } |
| 166 | |
| 167 | $totalHeight = max($pageHeight, $root->geometry->outerHeight()); |
| 168 | $pageCount = (int) max(1, ceil($totalHeight / $pageHeight)); |
| 169 | |
| 170 | // CSS Paged Media 3 §3.4: when a block declares `page: foo`, |
| 171 | // the page containing its first fragment is tagged "foo" and |
| 172 | // picks up `@page foo` overrides (background / margins / |
| 173 | // margin-boxes). Walk the laid-out box tree once to build |
| 174 | // `pageIndex → name`; later, per-page resolvers overlay the |
| 175 | // named rules on top of the defaults. |
| 176 | $pageNames = $this->resolvePageNames($root, $pageHeight, $pageCount); |
| 177 | |
| 178 | // Build an `id → layoutY` map so `<a href="#anchor">` links can |
| 179 | // resolve to PDF named destinations. Walk the post-layout box tree |
| 180 | // once. |
| 181 | $anchorMap = $this->collectAnchors($root); |
| 182 | |
| 183 | // Collect heading boxes ahead of pagination so we can emit a PDF |
| 184 | // outline once page refs are known. |
| 185 | $headings = $this->collectHeadings($root); |
| 186 | |
| 187 | // Pre-add all pages up-front so we have a stable PdfReference for |
| 188 | // every page before any annotation is emitted — a link on page 1 |
| 189 | // may target an anchor on page 3. |
| 190 | /** @var list<\Phpdftk\Pdf\Writer\Page> $pages */ |
| 191 | $pages = []; |
| 192 | for ($i = 0; $i < $pageCount; $i++) { |
| 193 | $pages[] = $writer->addPage($pageWidth, $pageHeight); |
| 194 | } |
| 195 | |
| 196 | // Register the outline now that page refs exist. Build a flat list |
| 197 | // of headings — nesting under their level is a Phase-2 follow-up. |
| 198 | $this->emitOutline($headings, $pages, $pageHeight, $writer); |
| 199 | |
| 200 | // CSS Paged Media 3 §3 + Generated Content for Paged Media 3 §2: |
| 201 | // collect `@page { @<position> { content: "..." } }` blocks once |
| 202 | // up-front. Phase-1 subset supports static `content: <string>` in |
| 203 | // the 6 corner / centre margin boxes (top-left / top-center / |
| 204 | // top-right / bottom-left / bottom-center / bottom-right). The |
| 205 | // other 10 positions (corner + side rails) and `counter(page)` / |
| 206 | // `element()` substitution land in follow-ups. |
| 207 | $pageMarginBoxes = $this->collectPageMarginBoxes($sheets); |
| 208 | |
| 209 | for ($i = 0; $i < $pageCount; $i++) { |
| 210 | $page = $pages[$i]; |
| 211 | $stream = $writer->addContentStream($page); |
| 212 | |
| 213 | $codepoints = $this->collectCodepoints($html); |
| 214 | $registeredFont = null; |
| 215 | /** @var array<string, \Phpdftk\Pdf\Core\Font\RegisteredFont> $registeredMap */ |
| 216 | $registeredMap = []; |
| 217 | if ($this->options->defaultFont !== null) { |
| 218 | $registeredFont = $writer->addOpenTypeFont( |
| 219 | $this->options->defaultFont, |
| 220 | $codepoints, |
| 221 | $page, |
| 222 | ); |
| 223 | $registeredMap[$this->options->defaultFont->postScriptName] = $registeredFont; |
| 224 | } |
| 225 | // Register every alternate font in the map so per-fragment |
| 226 | // font-family switching has a `Tf` resource to reference. |
| 227 | foreach ($fontMap as $alt) { |
| 228 | if ($alt === $this->options->defaultFont) { |
| 229 | continue; |
| 230 | } |
| 231 | if (isset($registeredMap[$alt->postScriptName])) { |
| 232 | continue; |
| 233 | } |
| 234 | $registeredMap[$alt->postScriptName] = $writer->addOpenTypeFont( |
| 235 | $alt, |
| 236 | $codepoints, |
| 237 | $page, |
| 238 | ); |
| 239 | } |
| 240 | // Register every weight/style face the resolver might pick. |
| 241 | // Same key as defaultFont/fontMap so the painter looks up the |
| 242 | // right RegisteredFont by postScriptName. |
| 243 | foreach ($this->options->faceMap as $faces) { |
| 244 | foreach ($faces as $face) { |
| 245 | if (isset($registeredMap[$face->data->postScriptName])) { |
| 246 | continue; |
| 247 | } |
| 248 | $registeredMap[$face->data->postScriptName] = $writer->addOpenTypeFont( |
| 249 | $face->data, |
| 250 | $codepoints, |
| 251 | $page, |
| 252 | ); |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | // Clip every page's drawing to its own MediaBox so the |
| 257 | // "paint the whole tree, let the viewport drop what's |
| 258 | // off-page" pagination strategy doesn't leak content past |
| 259 | // the page boundaries in viewers that don't crop automatically. |
| 260 | $stream->rectangle(0, 0, $pageWidth, $pageHeight); |
| 261 | $stream->clip(); |
| 262 | $stream->endPath(); |
| 263 | // CSS Paged Media 3 §3.1: `@page { background-color }` |
| 264 | // fills the entire page sheet before any content paints. |
| 265 | // Sits inside the clip so the colour stays on this page |
| 266 | // even when later content draws over it. When this page |
| 267 | // is tagged with a name (via a `page: foo` block on it), |
| 268 | // overlay `@page foo` onto the default rule. |
| 269 | $pageBgForThis = $this->resolvePageBackground($sheets, $pageNames[$i] ?? null); |
| 270 | if ($pageBgForThis !== null) { |
| 271 | $stream->saveGraphicsState(); |
| 272 | $stream->setFillColorRGB( |
| 273 | $pageBgForThis->r, |
| 274 | $pageBgForThis->g, |
| 275 | $pageBgForThis->b, |
| 276 | ); |
| 277 | $stream->rectangle(0, 0, $pageWidth, $pageHeight); |
| 278 | $stream->fill(); |
| 279 | $stream->restoreGraphicsState(); |
| 280 | } |
| 281 | |
| 282 | // Page i wants layout-Y rows [i*pageHeight .. (i+1)*pageHeight) |
| 283 | // to appear at the top of the PDF page. The painter computes |
| 284 | // PDF Y as `pageHeightConstant - layoutY`; setting the constant |
| 285 | // to `(i+1)*pageHeight` makes layoutY=i*pageHeight land at |
| 286 | // PDF Y = pageHeight (top of MediaBox), and layoutY=(i+1)*pageHeight |
| 287 | // land at PDF Y = 0 (bottom). |
| 288 | $painter = new Painter( |
| 289 | ($i + 1) * $pageHeight, |
| 290 | $registeredFont, |
| 291 | $page, |
| 292 | pageRangeStart: $i * $pageHeight, |
| 293 | pageRangeEnd: ($i + 1) * $pageHeight, |
| 294 | writer: $writer, |
| 295 | baseDir: $this->options->baseDir, |
| 296 | registeredFonts: $registeredMap, |
| 297 | ); |
| 298 | $painter->paint($root, $stream); |
| 299 | // Per-page link annotations — emit one /Link per `<a href>` rect |
| 300 | // the painter collected on this page, clipping to MediaBox so |
| 301 | // multi-page paint passes don't leak annotations onto unrelated |
| 302 | // pages. |
| 303 | $this->emitLinkAnnotations( |
| 304 | $painter->collectedLinks, |
| 305 | $writer, |
| 306 | $page, |
| 307 | $pageHeight, |
| 308 | $anchorMap, |
| 309 | $pages, |
| 310 | ); |
| 311 | // Paint `@page` margin boxes (running headers / footers) once |
| 312 | // per page, after the main content stream so they sit on top. |
| 313 | // Uses the writer's default-font GID map so text shapes against |
| 314 | // the same font subset as the rest of the document. Per-page |
| 315 | // selector resolution happens here so `@page :first` only |
| 316 | // applies to page 0 and `@page :left`/`:right` alternate. |
| 317 | if ($pageMarginBoxes !== [] && $registeredFont !== null && $this->options->defaultFont !== null) { |
| 318 | $resolved = $this->resolvePageMarginBoxes($pageMarginBoxes, $i, $pageNames[$i] ?? null); |
| 319 | if ($resolved !== []) { |
| 320 | $this->paintPageMarginBoxes( |
| 321 | $stream, |
| 322 | $resolved, |
| 323 | $pageWidth, |
| 324 | $pageHeight, |
| 325 | $this->options->defaultFont, |
| 326 | $registeredFont, |
| 327 | pageIndex: $i, |
| 328 | pageCount: $pageCount, |
| 329 | fontResolver: $fontResolver, |
| 330 | registeredMap: $registeredMap, |
| 331 | marginTop: $pageMargins['top'], |
| 332 | marginRight: $pageMargins['right'], |
| 333 | marginBottom: $pageMargins['bottom'], |
| 334 | marginLeft: $pageMargins['left'], |
| 335 | ); |
| 336 | } |
| 337 | } |
| 338 | } |
| 339 | |
| 340 | return $warnings; |
| 341 | } |
| 342 | |
| 343 | /** |
| 344 | * Collect every `<h1>`–`<h6>` box in document order, capturing the |
| 345 | * heading level, the rendered text content, and the box's top-edge Y |
| 346 | * in layout space. Headings without text content are skipped. |
| 347 | * |
| 348 | * @return list<array{level: int, text: string, layoutY: float}> |
| 349 | */ |
| 350 | private function collectHeadings(\Phpdftk\HtmlToPdf\Box\Box $root): array |
| 351 | { |
| 352 | $out = []; |
| 353 | // Pre-order DFS in document order via a stack with reverse-pushed |
| 354 | // children (so the first child is processed before its siblings). |
| 355 | // Only match on `BlockBox` so TextBox / InlineBox children (which |
| 356 | // share their parent's element ref) don't get double-counted. |
| 357 | $stack = [$root]; |
| 358 | while ($stack !== []) { |
| 359 | $node = array_shift($stack); |
| 360 | $element = $node->element; |
| 361 | if ($node instanceof \Phpdftk\HtmlToPdf\Box\BlockBox |
| 362 | && $element !== null |
| 363 | && preg_match('/^h([1-6])$/', strtolower($element->localName), $m) === 1 |
| 364 | ) { |
| 365 | $text = trim($this->collectTextContent($node)); |
| 366 | if ($text !== '') { |
| 367 | $out[] = [ |
| 368 | 'level' => (int) $m[1], |
| 369 | 'text' => $text, |
| 370 | 'layoutY' => $node->geometry->y, |
| 371 | ]; |
| 372 | } |
| 373 | } |
| 374 | $children = $node->children; |
| 375 | foreach (array_reverse($children) as $c) { |
| 376 | array_unshift($stack, $c); |
| 377 | } |
| 378 | } |
| 379 | return $out; |
| 380 | } |
| 381 | |
| 382 | /** Recursively collect TextBox content under a box. */ |
| 383 | private function collectTextContent(\Phpdftk\HtmlToPdf\Box\Box $box): string |
| 384 | { |
| 385 | $out = ''; |
| 386 | if ($box instanceof \Phpdftk\HtmlToPdf\Box\TextBox) { |
| 387 | return $box->text; |
| 388 | } |
| 389 | foreach ($box->children as $c) { |
| 390 | $out .= $this->collectTextContent($c); |
| 391 | } |
| 392 | return $out; |
| 393 | } |
| 394 | |
| 395 | /** |
| 396 | * Register a PDF outline (bookmarks tree) from the collected headings, |
| 397 | * nesting `<hN>` under the most recent heading of lower N (so `<h2>`s |
| 398 | * appear under their preceding `<h1>`, etc.). Headings that open at a |
| 399 | * deeper level than any prior sibling create an implicit parent chain |
| 400 | * at the outline root — matching what browsers do for "reader mode" |
| 401 | * outlines. |
| 402 | * |
| 403 | * @param list<array{level: int, text: string, layoutY: float}> $headings |
| 404 | * @param list<\Phpdftk\Pdf\Writer\Page> $pages |
| 405 | */ |
| 406 | private function emitOutline(array $headings, array $pages, float $pageHeight, PdfWriter $writer): void |
| 407 | { |
| 408 | if ($headings === [] || $pages === []) { |
| 409 | return; |
| 410 | } |
| 411 | $outline = new \Phpdftk\Pdf\Core\Document\Outline(); |
| 412 | $writer->register($outline); |
| 413 | $outlineRef = new \Phpdftk\Pdf\Core\PdfReference($outline->objectNumber); |
| 414 | |
| 415 | /** |
| 416 | * Each entry: ['item' => OutlineItem, 'level' => int, 'children' => |
| 417 | * list<int> indices into $entries]. |
| 418 | * |
| 419 | * @var list<array{item: \Phpdftk\Pdf\Core\Document\OutlineItem, level: int, parent: ?int, children: list<int>}> $entries |
| 420 | */ |
| 421 | $entries = []; |
| 422 | /** @var list<int> $stack stack of $entries indices being descended into */ |
| 423 | $stack = []; |
| 424 | foreach ($headings as $h) { |
| 425 | $pageIdx = max(0, min(count($pages) - 1, (int) floor($h['layoutY'] / $pageHeight))); |
| 426 | $localY = $h['layoutY'] - $pageIdx * $pageHeight; |
| 427 | $top = max(0.0, min($pageHeight, $pageHeight - $localY)); |
| 428 | $pageRef = new \Phpdftk\Pdf\Core\PdfReference($pages[$pageIdx]->corePage()->objectNumber); |
| 429 | $item = new \Phpdftk\Pdf\Core\Document\OutlineItem($h['text']); |
| 430 | $item->dest = \Phpdftk\Pdf\Core\Document\Destination::xyz($pageRef, null, $top); |
| 431 | $writer->register($item); |
| 432 | // Pop stack while top.level >= this.level so we ascend to the |
| 433 | // appropriate parent. |
| 434 | while ($stack !== [] && $entries[$stack[array_key_last($stack)]]['level'] >= $h['level']) { |
| 435 | array_pop($stack); |
| 436 | } |
| 437 | $parentIdx = $stack === [] ? null : $stack[array_key_last($stack)]; |
| 438 | $idx = count($entries); |
| 439 | $entries[] = [ |
| 440 | 'item' => $item, |
| 441 | 'level' => $h['level'], |
| 442 | 'parent' => $parentIdx, |
| 443 | 'children' => [], |
| 444 | ]; |
| 445 | if ($parentIdx !== null) { |
| 446 | $entries[$parentIdx]['children'][] = $idx; |
| 447 | } |
| 448 | $stack[] = $idx; |
| 449 | } |
| 450 | |
| 451 | // Wire references now that every item has an object number. |
| 452 | $rootChildren = []; |
| 453 | foreach ($entries as $idx => $entry) { |
| 454 | $item = $entry['item']; |
| 455 | $parentIdx = $entry['parent']; |
| 456 | $item->parent = $parentIdx === null |
| 457 | ? $outlineRef |
| 458 | : new \Phpdftk\Pdf\Core\PdfReference($entries[$parentIdx]['item']->objectNumber); |
| 459 | $children = $entry['children']; |
| 460 | if ($children !== []) { |
| 461 | $first = $entries[$children[0]]['item']; |
| 462 | $last = $entries[$children[array_key_last($children)]]['item']; |
| 463 | $item->first = new \Phpdftk\Pdf\Core\PdfReference($first->objectNumber); |
| 464 | $item->last = new \Phpdftk\Pdf\Core\PdfReference($last->objectNumber); |
| 465 | $item->count = count($children); // direct children only — collapsed by default |
| 466 | } |
| 467 | if ($parentIdx === null) { |
| 468 | $rootChildren[] = $idx; |
| 469 | } |
| 470 | } |
| 471 | |
| 472 | // Sibling prev/next chains per parent. |
| 473 | $linkSiblings = static function (array $sibIdxs) use ($entries): void { |
| 474 | $n = count($sibIdxs); |
| 475 | for ($i = 0; $i < $n; $i++) { |
| 476 | $item = $entries[$sibIdxs[$i]]['item']; |
| 477 | if ($i > 0) { |
| 478 | $item->prev = new \Phpdftk\Pdf\Core\PdfReference( |
| 479 | $entries[$sibIdxs[$i - 1]]['item']->objectNumber, |
| 480 | ); |
| 481 | } |
| 482 | if ($i < $n - 1) { |
| 483 | $item->next = new \Phpdftk\Pdf\Core\PdfReference( |
| 484 | $entries[$sibIdxs[$i + 1]]['item']->objectNumber, |
| 485 | ); |
| 486 | } |
| 487 | } |
| 488 | }; |
| 489 | $linkSiblings($rootChildren); |
| 490 | foreach ($entries as $entry) { |
| 491 | if ($entry['children'] !== []) { |
| 492 | $linkSiblings($entry['children']); |
| 493 | } |
| 494 | } |
| 495 | |
| 496 | if ($rootChildren !== []) { |
| 497 | $outline->first = new \Phpdftk\Pdf\Core\PdfReference( |
| 498 | $entries[$rootChildren[0]]['item']->objectNumber, |
| 499 | ); |
| 500 | $outline->last = new \Phpdftk\Pdf\Core\PdfReference( |
| 501 | $entries[$rootChildren[array_key_last($rootChildren)]]['item']->objectNumber, |
| 502 | ); |
| 503 | } |
| 504 | $outline->count = count($rootChildren); |
| 505 | $catalog = $writer->getCatalog(); |
| 506 | $catalog->outlines = $outlineRef; |
| 507 | // Open the outline pane by default so users see the bookmark tree |
| 508 | // when the PDF opens. Authors can post-hoc override the page mode |
| 509 | // via `PdfWriter`'s catalog accessor. |
| 510 | if ($catalog->pageMode === null) { |
| 511 | $catalog->pageMode = new \Phpdftk\Pdf\Core\PdfName('UseOutlines'); |
| 512 | } |
| 513 | } |
| 514 | |
| 515 | /** |
| 516 | * Walk the laid-out box tree and record `id → layoutY` for every box |
| 517 | * whose originating element has an `id` attribute. Also captures the |
| 518 | * legacy HTML4 `<a name="...">` form. The Y is the top edge of the |
| 519 | * box's content area in layout-space (top-down) — that's what we want |
| 520 | * to scroll-to for a `#anchor` jump. |
| 521 | * |
| 522 | * @return array<string, float> |
| 523 | */ |
| 524 | private function collectAnchors(\Phpdftk\HtmlToPdf\Box\Box $root): array |
| 525 | { |
| 526 | $map = []; |
| 527 | $stack = [$root]; |
| 528 | while ($stack !== []) { |
| 529 | $node = array_pop($stack); |
| 530 | $element = $node->element; |
| 531 | if ($element !== null) { |
| 532 | $id = $element->getAttribute('id'); |
| 533 | if ($id !== null && $id !== '' && !isset($map[$id])) { |
| 534 | $map[$id] = $node->geometry->y; |
| 535 | } |
| 536 | if (strtolower($element->localName) === 'a') { |
| 537 | $name = $element->getAttribute('name'); |
| 538 | if ($name !== null && $name !== '' && !isset($map[$name])) { |
| 539 | $map[$name] = $node->geometry->y; |
| 540 | } |
| 541 | } |
| 542 | } |
| 543 | foreach ($node->children as $c) { |
| 544 | $stack[] = $c; |
| 545 | } |
| 546 | } |
| 547 | return $map; |
| 548 | } |
| 549 | |
| 550 | /** |
| 551 | * Parse the HTML into a DOM for inspection / manipulation by callers. |
| 552 | * Useful for hand-tweaking output before re-rendering. |
| 553 | */ |
| 554 | public function parse(string $html): Document |
| 555 | { |
| 556 | return $this->htmlParser->parseDocument($html); |
| 557 | } |
| 558 | |
| 559 | public function parseStylesheet(string $css): Stylesheet |
| 560 | { |
| 561 | return $this->cssParser->parseStylesheet($css); |
| 562 | } |
| 563 | |
| 564 | /** |
| 565 | * Build the cascade-ordered list: UA, then the caller-supplied author |
| 566 | * CSS (when non-empty), then every embedded `<style>` element's content |
| 567 | * in document order. Document `<style>` rules win over the explicit |
| 568 | * `$authorCss` (later source wins per CSS Cascade 5 §6.3) so authors |
| 569 | * can use `$authorCss` to inject defaults the document can override. |
| 570 | * |
| 571 | * @return list<Stylesheet> |
| 572 | */ |
| 573 | private function collectStylesheets(?string $authorCss, Document $document): array |
| 574 | { |
| 575 | $uaSheet = $this->cssParser->parseStylesheet( |
| 576 | $this->options->effectiveUserAgentStylesheet(), |
| 577 | Origin::UserAgent, |
| 578 | ); |
| 579 | $sheets = [$this->expandImports($uaSheet)]; |
| 580 | if ($authorCss !== null && $authorCss !== '') { |
| 581 | $sheets[] = $this->expandImports( |
| 582 | $this->cssParser->parseStylesheet($authorCss, Origin::Author), |
| 583 | ); |
| 584 | } |
| 585 | foreach ($this->extractAuthorCss($document) as $css) { |
| 586 | $sheets[] = $this->expandImports( |
| 587 | $this->cssParser->parseStylesheet($css, Origin::Author), |
| 588 | ); |
| 589 | } |
| 590 | return $sheets; |
| 591 | } |
| 592 | |
| 593 | /** |
| 594 | * Resolve every top-of-sheet `@import url(...) [media]` at-rule by |
| 595 | * fetching the target CSS, parsing it, recursing for nested imports, |
| 596 | * and splicing the imported rules in at the `@import` position so |
| 597 | * cascade source-order is preserved. Per CSS Cascade 5 §6.3 the |
| 598 | * imported sheet's rules behave as if pasted at the import point, |
| 599 | * with later rules in the importing sheet still winning ties. |
| 600 | * |
| 601 | * Recursion depth-capped at 16 per `docs/plans/html-and-svg.md` |
| 602 | * Security defaults to prevent `a.css → b.css → a.css` loops blowing |
| 603 | * the stack. Unloadable imports drop silently — the renderer keeps |
| 604 | * going with the remaining rules. |
| 605 | */ |
| 606 | private function expandImports(\Phpdftk\Css\Sheet\Stylesheet $sheet, int $depth = 0): \Phpdftk\Css\Sheet\Stylesheet |
| 607 | { |
| 608 | if ($depth >= 16) { |
| 609 | return $sheet; |
| 610 | } |
| 611 | $newRules = []; |
| 612 | $importsAllowed = true; |
| 613 | foreach ($sheet->rules as $rule) { |
| 614 | if ($rule instanceof \Phpdftk\Css\Sheet\AtRule |
| 615 | && strtolower($rule->name) === 'import' |
| 616 | && $importsAllowed |
| 617 | ) { |
| 618 | $imported = $this->loadImport($rule, $sheet->origin); |
| 619 | if ($imported !== null) { |
| 620 | $expanded = $this->expandImports($imported, $depth + 1); |
| 621 | foreach ($expanded->rules as $r) { |
| 622 | $newRules[] = $r; |
| 623 | } |
| 624 | } |
| 625 | continue; |
| 626 | } |
| 627 | // CSS Syntax 3: `@import` must precede every other rule |
| 628 | // (except `@charset` and other `@import`s). Once we hit a |
| 629 | // non-import at top level, the import window closes. |
| 630 | if ($rule instanceof \Phpdftk\Css\Sheet\StyleRule) { |
| 631 | $importsAllowed = false; |
| 632 | } |
| 633 | $newRules[] = $rule; |
| 634 | } |
| 635 | return new \Phpdftk\Css\Sheet\Stylesheet($newRules, $sheet->origin); |
| 636 | } |
| 637 | |
| 638 | /** |
| 639 | * Load a single `@import` at-rule's target CSS and return the |
| 640 | * parsed Stylesheet, or null when the URL fails to resolve, the |
| 641 | * fetch fails, or a media-query filter rejects the sheet. |
| 642 | * |
| 643 | * The prelude is hand-extracted with a small grammar: |
| 644 | * - `url("…")` / `url('…')` / `url(…)` (quoted or bare) |
| 645 | * - bare `"…"` / `'…'` |
| 646 | * - optional trailing media-query list |
| 647 | */ |
| 648 | private function loadImport( |
| 649 | \Phpdftk\Css\Sheet\AtRule $rule, |
| 650 | \Phpdftk\Css\Sheet\Origin $origin, |
| 651 | ): ?\Phpdftk\Css\Sheet\Stylesheet { |
| 652 | $prelude = trim($rule->prelude); |
| 653 | $href = null; |
| 654 | $remainder = ''; |
| 655 | if (preg_match('~^url\(\s*("([^"]*)"|\'([^\']*)\'|([^)\s]*))\s*\)\s*(.*)$~i', $prelude, $m) === 1) { |
| 656 | $href = $m[2] !== '' ? $m[2] : ($m[3] !== '' ? $m[3] : $m[4]); |
| 657 | $remainder = $m[5]; |
| 658 | } elseif (preg_match('~^"([^"]*)"\s*(.*)$~', $prelude, $m) === 1) { |
| 659 | $href = $m[1]; |
| 660 | $remainder = $m[2]; |
| 661 | } elseif (preg_match("~^'([^']*)'\\s*(.*)\$~", $prelude, $m) === 1) { |
| 662 | $href = $m[1]; |
| 663 | $remainder = $m[2]; |
| 664 | } |
| 665 | if ($href === null || $href === '') { |
| 666 | return null; |
| 667 | } |
| 668 | // Optional trailing media query. Phase-1 matcher accepts |
| 669 | // `print` / `all` / lists containing either. |
| 670 | $remainder = trim($remainder); |
| 671 | if ($remainder !== '' && !$this->mediaPreludeMatches($remainder)) { |
| 672 | return null; |
| 673 | } |
| 674 | // Resolve the URL (data:text/css… or relative under baseDir). |
| 675 | $css = $this->fetchImportSource($href); |
| 676 | if ($css === null) { |
| 677 | return null; |
| 678 | } |
| 679 | return $this->cssParser->parseStylesheet($css, $origin); |
| 680 | } |
| 681 | |
| 682 | /** |
| 683 | * Resolve an `@import` href to its raw CSS bytes via the unified |
| 684 | * `Phpdftk\Filesystem\ResourceLoader`. `data:` URLs must declare a |
| 685 | * `text/css` MIME for `@import`; the loader's MIME allowlist |
| 686 | * enforces that. Filesystem paths use the same realpath-escape + |
| 687 | * stream-wrapper gates as every other resource. |
| 688 | */ |
| 689 | private function fetchImportSource(string $href): ?string |
| 690 | { |
| 691 | return $this->resourceLoader()->load($href, allowedMimes: ['text/css']); |
| 692 | } |
| 693 | |
| 694 | /** |
| 695 | * Cached `ResourceLoader` bound to the renderer's `baseDir`. |
| 696 | * Created lazily so renderers without `baseDir` still construct |
| 697 | * cleanly — the loader handles a null base by rejecting filesystem |
| 698 | * paths while still decoding data: URLs. |
| 699 | */ |
| 700 | private function resourceLoader(): \Phpdftk\Filesystem\ResourceLoader |
| 701 | { |
| 702 | return $this->cachedResourceLoader |
| 703 | ??= new \Phpdftk\Filesystem\ResourceLoader($this->options->baseDir); |
| 704 | } |
| 705 | |
| 706 | /** |
| 707 | * Emit `/Link` annotations for the painter's collected link rects on |
| 708 | * this page. Each rect is clipped to the page's MediaBox (`[0, 0, |
| 709 | * pageWidth, pageHeight]`); rects entirely outside the page are |
| 710 | * dropped — this handles the multi-page paint pass cleanly because |
| 711 | * the painter walks the full box tree per page with a shifted Y |
| 712 | * constant. |
| 713 | * |
| 714 | * @param list<array{href: string, llx: float, lly: float, urx: float, ury: float}> $links |
| 715 | */ |
| 716 | /** |
| 717 | * @param list<array{href: string, llx: float, lly: float, urx: float, ury: float, title: ?string}> $links |
| 718 | * @param array<string, float> $anchorMap |
| 719 | * @param list<\Phpdftk\Pdf\Writer\Page> $allPages |
| 720 | */ |
| 721 | private function emitLinkAnnotations( |
| 722 | array $links, |
| 723 | PdfWriter $writer, |
| 724 | \Phpdftk\Pdf\Writer\Page $page, |
| 725 | float $pageHeight, |
| 726 | array $anchorMap, |
| 727 | array $allPages, |
| 728 | ): void { |
| 729 | if ($links === []) { |
| 730 | return; |
| 731 | } |
| 732 | $corePage = $page->corePage(); |
| 733 | foreach ($links as $link) { |
| 734 | // Drop rects entirely above or below the page box. |
| 735 | if ($link['ury'] <= 0.0 || $link['lly'] >= $pageHeight) { |
| 736 | continue; |
| 737 | } |
| 738 | $llx = $link['llx']; |
| 739 | $urx = $link['urx']; |
| 740 | $lly = max(0.0, $link['lly']); |
| 741 | $ury = min($pageHeight, $link['ury']); |
| 742 | if ($ury - $lly <= 0.0 || $urx - $llx <= 0.0) { |
| 743 | continue; |
| 744 | } |
| 745 | $rect = new \Phpdftk\Pdf\Core\PdfArray([ |
| 746 | new \Phpdftk\Pdf\Core\PdfNumber($llx), |
| 747 | new \Phpdftk\Pdf\Core\PdfNumber($lly), |
| 748 | new \Phpdftk\Pdf\Core\PdfNumber($urx), |
| 749 | new \Phpdftk\Pdf\Core\PdfNumber($ury), |
| 750 | ]); |
| 751 | $annotation = new \Phpdftk\Pdf\Core\Annotation\LinkAnnotation($rect); |
| 752 | // Suppress the default 1-unit black border that PDF readers |
| 753 | // overlay on `/Link` annotations — browser print output never |
| 754 | // shows a frame around links and our text already carries |
| 755 | // the styling (`color`, `text-decoration: underline`). |
| 756 | $annotation->border = new \Phpdftk\Pdf\Core\PdfArray([ |
| 757 | new \Phpdftk\Pdf\Core\PdfNumber(0), |
| 758 | new \Phpdftk\Pdf\Core\PdfNumber(0), |
| 759 | new \Phpdftk\Pdf\Core\PdfNumber(0), |
| 760 | ]); |
| 761 | if (($link['title'] ?? null) !== null && $link['title'] !== '') { |
| 762 | $annotation->contents = new \Phpdftk\Pdf\Core\PdfString($link['title']); |
| 763 | } |
| 764 | |
| 765 | $dest = $this->resolveFragmentDestination($link['href'], $anchorMap, $allPages, $pageHeight); |
| 766 | if ($dest !== null) { |
| 767 | $annotation->dest = $dest; |
| 768 | } else { |
| 769 | $actionDict = new \Phpdftk\Pdf\Core\PdfDictionary(); |
| 770 | $actionDict->set('Type', new \Phpdftk\Pdf\Core\PdfName('Action')); |
| 771 | $actionDict->set('S', new \Phpdftk\Pdf\Core\PdfName('URI')); |
| 772 | $actionDict->set('URI', new \Phpdftk\Pdf\Core\PdfString($link['href'])); |
| 773 | $annotation->a = $actionDict; |
| 774 | } |
| 775 | |
| 776 | $writer->register($annotation); |
| 777 | $corePage->annots[] = new \Phpdftk\Pdf\Core\PdfReference($annotation->objectNumber); |
| 778 | } |
| 779 | } |
| 780 | |
| 781 | /** |
| 782 | * If `$href` is a fragment of the form `#anchor` and the anchor is in |
| 783 | * the document, return the matching {@see Destination::xyz} pointing |
| 784 | * at the right page + Y. Otherwise return null and the caller will |
| 785 | * fall back to a URI action. |
| 786 | * |
| 787 | * @param array<string, float> $anchorMap |
| 788 | * @param list<\Phpdftk\Pdf\Writer\Page> $allPages |
| 789 | */ |
| 790 | private function resolveFragmentDestination( |
| 791 | string $href, |
| 792 | array $anchorMap, |
| 793 | array $allPages, |
| 794 | float $pageHeight, |
| 795 | ): ?\Phpdftk\Pdf\Core\Document\Destination { |
| 796 | if (!str_starts_with($href, '#')) { |
| 797 | return null; |
| 798 | } |
| 799 | $anchor = substr($href, 1); |
| 800 | if (!isset($anchorMap[$anchor])) { |
| 801 | return null; |
| 802 | } |
| 803 | $layoutY = $anchorMap[$anchor]; |
| 804 | $pageIdx = (int) floor($layoutY / $pageHeight); |
| 805 | if ($pageIdx < 0 || $pageIdx >= count($allPages)) { |
| 806 | return null; |
| 807 | } |
| 808 | $localY = $layoutY - $pageIdx * $pageHeight; |
| 809 | $top = $pageHeight - $localY; |
| 810 | $pageRef = new \Phpdftk\Pdf\Core\PdfReference($allPages[$pageIdx]->corePage()->objectNumber); |
| 811 | return \Phpdftk\Pdf\Core\Document\Destination::xyz($pageRef, null, $top); |
| 812 | } |
| 813 | |
| 814 | /** |
| 815 | * Map the document's `<title>` element + key `<meta>` tags onto the |
| 816 | * PDF's `/Info` dictionary. Supported author conventions: |
| 817 | * - `<title>` → /Title |
| 818 | * - `<meta name="author">` → /Author |
| 819 | * - `<meta name="description">` → /Subject |
| 820 | * - `<meta name="keywords">` → /Keywords |
| 821 | * |
| 822 | * Skips entries that are missing or empty so the renderer doesn't |
| 823 | * stomp on an `/Info` already populated by the caller via |
| 824 | * `PdfWriter::setInfo`. |
| 825 | */ |
| 826 | private function applyDocumentMetadata(Document $document, PdfWriter $writer): void |
| 827 | { |
| 828 | $title = $this->findTextOfFirstElement($document, 'title'); |
| 829 | $author = $this->findMetaContent($document, 'author'); |
| 830 | $description = $this->findMetaContent($document, 'description'); |
| 831 | $keywords = $this->findMetaContent($document, 'keywords'); |
| 832 | $info = new \Phpdftk\Pdf\Core\Document\Info(); |
| 833 | if ($title !== null) { |
| 834 | $info->title = new \Phpdftk\Pdf\Core\PdfString($title); |
| 835 | } |
| 836 | if ($author !== null) { |
| 837 | $info->author = new \Phpdftk\Pdf\Core\PdfString($author); |
| 838 | } |
| 839 | if ($description !== null) { |
| 840 | $info->subject = new \Phpdftk\Pdf\Core\PdfString($description); |
| 841 | } |
| 842 | if ($keywords !== null) { |
| 843 | $info->keywords = new \Phpdftk\Pdf\Core\PdfString($keywords); |
| 844 | } |
| 845 | // Always identify the renderer in the standard /Creator + |
| 846 | // /Producer entries so downstream tooling (verapdf, qpdf, etc.) |
| 847 | // can trace the pipeline a PDF came from — including docs that |
| 848 | // don't carry <title> / <meta> tags themselves. |
| 849 | $info->creator = new \Phpdftk\Pdf\Core\PdfString('phpdftk/html-to-pdf'); |
| 850 | $info->producer = new \Phpdftk\Pdf\Core\PdfString('phpdftk'); |
| 851 | // ISO 32000-2 §7.9.4 PDF date format: |
| 852 | // `(D:YYYYMMDDHHmmSSOHH'mm')` with O ∈ {Z, +, -}. |
| 853 | $info->creationDate = new \Phpdftk\Pdf\Core\PdfString($this->formatPdfDate(new \DateTimeImmutable())); |
| 854 | $writer->setInfo($info); |
| 855 | } |
| 856 | |
| 857 | private function formatPdfDate(\DateTimeImmutable $dt): string |
| 858 | { |
| 859 | $offset = $dt->getOffset(); |
| 860 | if ($offset === 0) { |
| 861 | $tz = 'Z'; |
| 862 | } else { |
| 863 | $sign = $offset >= 0 ? '+' : '-'; |
| 864 | $absOffset = abs($offset); |
| 865 | $hours = intdiv($absOffset, 3600); |
| 866 | $minutes = intdiv($absOffset % 3600, 60); |
| 867 | $tz = sprintf("%s%02d'%02d'", $sign, $hours, $minutes); |
| 868 | } |
| 869 | return 'D:' . $dt->format('YmdHis') . $tz; |
| 870 | } |
| 871 | |
| 872 | private function findTextOfFirstElement(Document $document, string $localName): ?string |
| 873 | { |
| 874 | $stack = [$document->documentElement]; |
| 875 | while ($stack !== []) { |
| 876 | $node = array_shift($stack); |
| 877 | if ($node === null) { |
| 878 | continue; |
| 879 | } |
| 880 | if (strtolower($node->localName) === $localName) { |
| 881 | $text = ''; |
| 882 | for ($t = $node->firstChild; $t !== null; $t = $t->nextSibling) { |
| 883 | if ($t instanceof \Phpdftk\Html\Dom\Text) { |
| 884 | $text .= $t->data; |
| 885 | } |
| 886 | } |
| 887 | $text = trim($text); |
| 888 | return $text === '' ? null : $text; |
| 889 | } |
| 890 | for ($child = $node->firstChild; $child !== null; $child = $child->nextSibling) { |
| 891 | if ($child instanceof \Phpdftk\Html\Dom\Element) { |
| 892 | $stack[] = $child; |
| 893 | } |
| 894 | } |
| 895 | } |
| 896 | return null; |
| 897 | } |
| 898 | |
| 899 | private function findMetaContent(Document $document, string $nameAttr): ?string |
| 900 | { |
| 901 | $stack = [$document->documentElement]; |
| 902 | while ($stack !== []) { |
| 903 | $node = array_shift($stack); |
| 904 | if ($node === null) { |
| 905 | continue; |
| 906 | } |
| 907 | if (strtolower($node->localName) === 'meta') { |
| 908 | $name = $node->getAttribute('name'); |
| 909 | if ($name !== null && strtolower($name) === $nameAttr) { |
| 910 | $content = $node->getAttribute('content'); |
| 911 | if ($content !== null && trim($content) !== '') { |
| 912 | return trim($content); |
| 913 | } |
| 914 | } |
| 915 | } |
| 916 | for ($child = $node->firstChild; $child !== null; $child = $child->nextSibling) { |
| 917 | if ($child instanceof \Phpdftk\Html\Dom\Element) { |
| 918 | $stack[] = $child; |
| 919 | } |
| 920 | } |
| 921 | } |
| 922 | return null; |
| 923 | } |
| 924 | |
| 925 | /** |
| 926 | * Walk the document for both `<style>…</style>` element contents |
| 927 | * AND `<link rel="stylesheet" href="…">` external sheets, yielding |
| 928 | * each loaded CSS chunk in document order so the cascade's |
| 929 | * later-source-wins rule operates on the right ordering. External |
| 930 | * `<link>` sheets resolve via: |
| 931 | * - `data:text/css[;base64],…` payloads (decoded) |
| 932 | * - relative-or-absolute paths under `RendererOptions::baseDir` |
| 933 | * (with `realpath` escape rejection + stream-wrapper rejection |
| 934 | * mirroring `@font-face` / `<img src>` resolution) |
| 935 | * |
| 936 | * Unloadable `<link>` hrefs silently skip (no Warning yet — the |
| 937 | * resource loader gate in 1L proper will surface them). |
| 938 | * |
| 939 | * @return list<string> |
| 940 | */ |
| 941 | private function extractAuthorCss(Document $document): array |
| 942 | { |
| 943 | $out = []; |
| 944 | $stack = [$document->documentElement]; |
| 945 | while ($stack !== []) { |
| 946 | $node = array_shift($stack); |
| 947 | if ($node === null) { |
| 948 | continue; |
| 949 | } |
| 950 | // Depth-first in document order: push children in reverse so the |
| 951 | // first child is processed next. |
| 952 | $children = []; |
| 953 | for ($child = $node->firstChild; $child !== null; $child = $child->nextSibling) { |
| 954 | if ($child instanceof \Phpdftk\Html\Dom\Element) { |
| 955 | $children[] = $child; |
| 956 | } |
| 957 | } |
| 958 | foreach (array_reverse($children) as $c) { |
| 959 | array_unshift($stack, $c); |
| 960 | } |
| 961 | $local = strtolower($node->localName); |
| 962 | if ($local === 'style') { |
| 963 | $text = ''; |
| 964 | for ($t = $node->firstChild; $t !== null; $t = $t->nextSibling) { |
| 965 | if ($t instanceof \Phpdftk\Html\Dom\Text) { |
| 966 | $text .= $t->data; |
| 967 | } |
| 968 | } |
| 969 | if (trim($text) !== '') { |
| 970 | $out[] = $text; |
| 971 | } |
| 972 | } elseif ($local === 'link') { |
| 973 | $css = $this->loadLinkedStylesheet($node); |
| 974 | if ($css !== null) { |
| 975 | $out[] = $css; |
| 976 | } |
| 977 | } |
| 978 | } |
| 979 | return $out; |
| 980 | } |
| 981 | |
| 982 | /** |
| 983 | * Collect every codepoint in the HTML so the font registration can |
| 984 | * subset to just the used glyphs. Done by stripping tags and walking |
| 985 | * UTF-8 codepoints — fast enough for Phase 1; a proper text-node |
| 986 | * walk over the DOM lands in 1N-bis. |
| 987 | * |
| 988 | * @return list<int> |
| 989 | */ |
| 990 | private function collectCodepoints(string $html): array |
| 991 | { |
| 992 | $stripped = strip_tags($html); |
| 993 | $seen = []; |
| 994 | // Always include the characters that may be needed for counter-style |
| 995 | // list markers (decimal / alpha / roman) and basic punctuation — |
| 996 | // even if the document body doesn't use them — so `<ol>` markers |
| 997 | // can shape against the registered font subset. |
| 998 | foreach (range(ord('0'), ord('9')) as $cp) { |
| 999 | $seen[$cp] = true; |
| 1000 | } |
| 1001 | foreach (range(ord('a'), ord('z')) as $cp) { |
| 1002 | $seen[$cp] = true; |
| 1003 | } |
| 1004 | foreach (range(ord('A'), ord('Z')) as $cp) { |
| 1005 | $seen[$cp] = true; |
| 1006 | } |
| 1007 | foreach ([ord('.'), ord(','), ord(':'), ord(';'), ord(' ')] as $cp) { |
| 1008 | $seen[$cp] = true; |
| 1009 | } |
| 1010 | // U+2026 HORIZONTAL ELLIPSIS — emitted by `text-overflow: ellipsis` |
| 1011 | // and useful punctuation in body text. |
| 1012 | $seen[0x2026] = true; |
| 1013 | // U+200B ZERO-WIDTH SPACE — emitted by the `<wbr>` lowering as |
| 1014 | // a soft-break opportunity that fonts may or may not support; |
| 1015 | // request the glyph so the subset captures it when present. |
| 1016 | $seen[0x200B] = true; |
| 1017 | $i = 0; |
| 1018 | $bytes = strlen($stripped); |
| 1019 | while ($i < $bytes) { |
| 1020 | $b = ord($stripped[$i]); |
| 1021 | if ($b < 0x80) { |
| 1022 | $seen[$b] = true; |
| 1023 | $i++; |
| 1024 | } elseif ($b < 0xC0) { |
| 1025 | $i++; |
| 1026 | } elseif ($b < 0xE0) { |
| 1027 | $cp = (($b & 0x1F) << 6) | (ord($stripped[$i + 1] ?? "\x00") & 0x3F); |
| 1028 | $seen[$cp] = true; |
| 1029 | $i += 2; |
| 1030 | } elseif ($b < 0xF0) { |
| 1031 | $cp = (($b & 0x0F) << 12) |
| 1032 | | ((ord($stripped[$i + 1] ?? "\x00") & 0x3F) << 6) |
| 1033 | | (ord($stripped[$i + 2] ?? "\x00") & 0x3F); |
| 1034 | $seen[$cp] = true; |
| 1035 | $i += 3; |
| 1036 | } else { |
| 1037 | $cp = (($b & 0x07) << 18) |
| 1038 | | ((ord($stripped[$i + 1] ?? "\x00") & 0x3F) << 12) |
| 1039 | | ((ord($stripped[$i + 2] ?? "\x00") & 0x3F) << 6) |
| 1040 | | (ord($stripped[$i + 3] ?? "\x00") & 0x3F); |
| 1041 | $seen[$cp] = true; |
| 1042 | $i += 4; |
| 1043 | } |
| 1044 | } |
| 1045 | return array_keys($seen); |
| 1046 | } |
| 1047 | |
| 1048 | /** |
| 1049 | * Count `<img>` elements (post-parse, ignoring scripted dynamism since |
| 1050 | * we don't run JS). Drives the MissingResource warning emitted when |
| 1051 | * image painting is unsupported in the current phase. |
| 1052 | */ |
| 1053 | private function countUnpaintableImages(Document $document): int |
| 1054 | { |
| 1055 | $count = 0; |
| 1056 | $stack = [$document->documentElement]; |
| 1057 | while ($stack !== []) { |
| 1058 | $node = array_pop($stack); |
| 1059 | if ($node === null) { |
| 1060 | continue; |
| 1061 | } |
| 1062 | if (strtolower($node->localName) === 'img') { |
| 1063 | $src = $node->getAttribute('src'); |
| 1064 | $alt = $node->getAttribute('alt'); |
| 1065 | if (!$this->isPaintableImageSrc($src) && ($alt === null || $alt === '')) { |
| 1066 | $count++; |
| 1067 | } |
| 1068 | } |
| 1069 | for ($c = $node->firstChild; $c !== null; $c = $c->nextSibling) { |
| 1070 | if ($c instanceof \Phpdftk\Html\Dom\Element) { |
| 1071 | $stack[] = $c; |
| 1072 | } |
| 1073 | } |
| 1074 | } |
| 1075 | return $count; |
| 1076 | } |
| 1077 | |
| 1078 | /** |
| 1079 | * Mirror the painter's "can this `<img src>` be drawn?" decision so the |
| 1080 | * MissingResource warning doesn't false-positive on local-file paths |
| 1081 | * the painter actually handles. Accepts `data:image/{png,jpeg}` URLs |
| 1082 | * unconditionally; for filesystem paths, requires `baseDir` and that |
| 1083 | * `realpath()` resolves under it. |
| 1084 | */ |
| 1085 | private function isPaintableImageSrc(?string $src): bool |
| 1086 | { |
| 1087 | if ($src === null || $src === '') { |
| 1088 | return false; |
| 1089 | } |
| 1090 | if (preg_match('~^data:image/(png|jpeg|jpg);~', $src) === 1) { |
| 1091 | return true; |
| 1092 | } |
| 1093 | return $this->resourceLoader()->resolveLocalPath($src) !== null; |
| 1094 | } |
| 1095 | |
| 1096 | /** |
| 1097 | * Walk the parsed DOM looking for any non-whitespace text content. |
| 1098 | * Used to decide whether a missing default-font is worth warning about. |
| 1099 | */ |
| 1100 | private function documentHasText(Document $document): bool |
| 1101 | { |
| 1102 | $stack = [$document->documentElement]; |
| 1103 | while ($stack !== []) { |
| 1104 | $node = array_pop($stack); |
| 1105 | if ($node === null) { |
| 1106 | continue; |
| 1107 | } |
| 1108 | for ($child = $node->firstChild; $child !== null; $child = $child->nextSibling) { |
| 1109 | if ($child instanceof \Phpdftk\Html\Dom\Text) { |
| 1110 | if (trim($child->data) !== '') { |
| 1111 | return true; |
| 1112 | } |
| 1113 | continue; |
| 1114 | } |
| 1115 | if ($child instanceof \Phpdftk\Html\Dom\Element) { |
| 1116 | // Skip head, script, style — they don't render. |
| 1117 | $local = strtolower($child->localName); |
| 1118 | if (in_array($local, ['head', 'script', 'style', 'title', 'meta', 'link', 'base'], true)) { |
| 1119 | continue; |
| 1120 | } |
| 1121 | $stack[] = $child; |
| 1122 | } |
| 1123 | } |
| 1124 | } |
| 1125 | return false; |
| 1126 | } |
| 1127 | |
| 1128 | /** |
| 1129 | * Walk every supplied stylesheet for `@font-face` rules, decode each |
| 1130 | * rule's `src: url(...)` into raw font bytes (via `data:` URLs or |
| 1131 | * `file://` paths resolved against `RendererOptions::baseDir`), parse |
| 1132 | * the bytes with `OpenTypeParser`, and yield `family-name => OpenTypeData`. |
| 1133 | * |
| 1134 | * Phase-1 scope: OTF/CFF only (matches what `OpenTypeParser` accepts), |
| 1135 | * `data:` and resolved-local sources only — remote `http(s)://` fetch |
| 1136 | * lands in Phase 2 behind the same `ResourceLoader` gate. Per-face |
| 1137 | * failures emit a Warning and the face is dropped; the renderer keeps |
| 1138 | * going with the rest of the document. Multi-value `src` lists are |
| 1139 | * walked left-to-right; the first source that parses wins. The CSS |
| 1140 | * `format(...)` hint is accepted but never trusted — magic-number |
| 1141 | * detection on the decoded bytes is the actual gate. |
| 1142 | * |
| 1143 | * @param list<Stylesheet> $sheets |
| 1144 | * @param list<Warning> $warnings |
| 1145 | * @return iterable<string, \Phpdftk\FontParser\OpenTypeData> |
| 1146 | */ |
| 1147 | private function loadFontFaces(array $sheets, array &$warnings): iterable |
| 1148 | { |
| 1149 | foreach ($sheets as $sheet) { |
| 1150 | foreach ($sheet->rules as $rule) { |
| 1151 | if (!$rule instanceof \Phpdftk\Css\Sheet\AtRule) { |
| 1152 | continue; |
| 1153 | } |
| 1154 | if (strtolower($rule->name) !== 'font-face') { |
| 1155 | continue; |
| 1156 | } |
| 1157 | if ($rule->block === null) { |
| 1158 | continue; |
| 1159 | } |
| 1160 | $family = null; |
| 1161 | /** @var list<\Phpdftk\Css\Value\Value> $srcCandidates */ |
| 1162 | $srcCandidates = []; |
| 1163 | foreach ($rule->block->contents as $item) { |
| 1164 | if (!$item instanceof \Phpdftk\Css\Sheet\Declaration) { |
| 1165 | continue; |
| 1166 | } |
| 1167 | if ($item->property === 'font-family') { |
| 1168 | $family = $this->fontFamilyName($item->value); |
| 1169 | } elseif ($item->property === 'src') { |
| 1170 | $srcCandidates = $this->splitSrcList($item->value); |
| 1171 | } |
| 1172 | } |
| 1173 | if ($family === null || $family === '' || $srcCandidates === []) { |
| 1174 | $warnings[] = new Warning( |
| 1175 | WarningCode::UnsupportedCssValue, |
| 1176 | '@font-face rule missing `font-family` or `src` — face dropped.', |
| 1177 | WarningSeverity::Warning, |
| 1178 | ); |
| 1179 | continue; |
| 1180 | } |
| 1181 | $data = null; |
| 1182 | foreach ($srcCandidates as $candidate) { |
| 1183 | // Honour the CSS Fonts 4 §4.3 `format()` hint when one |
| 1184 | // is supplied. An unsupported hint skips the source |
| 1185 | // without touching the fetch path — useful for authors |
| 1186 | // shipping `url(font.woff2) format("woff2"), |
| 1187 | // url(font.otf) format("opentype")` fallback chains. |
| 1188 | if ($candidate['format'] !== null |
| 1189 | && !in_array($candidate['format'], self::SUPPORTED_FONT_FORMATS, true) |
| 1190 | ) { |
| 1191 | continue; |
| 1192 | } |
| 1193 | $bytes = $this->fetchFontSource($candidate['url'], $warnings); |
| 1194 | if ($bytes === null) { |
| 1195 | continue; |
| 1196 | } |
| 1197 | // WOFF 1.0 wraps OTF/TTF in a zlib-compressed |
| 1198 | // container; transparently unwrap so the downstream |
| 1199 | // OpenTypeParser sees the original SFNT. |
| 1200 | if (\Phpdftk\FontParser\WoffParser::isWoff($bytes)) { |
| 1201 | try { |
| 1202 | $bytes = \Phpdftk\FontParser\WoffParser::decompressBytes($bytes); |
| 1203 | } catch (\Throwable $e) { |
| 1204 | $warnings[] = new Warning( |
| 1205 | WarningCode::UnsupportedCssValue, |
| 1206 | sprintf( |
| 1207 | '@font-face `%s` WOFF source failed to decompress: %s', |
| 1208 | $family, |
| 1209 | $e->getMessage(), |
| 1210 | ), |
| 1211 | WarningSeverity::Warning, |
| 1212 | ); |
| 1213 | continue; |
| 1214 | } |
| 1215 | } |
| 1216 | try { |
| 1217 | $data = \Phpdftk\FontParser\OpenTypeParser::fromBytes($bytes)->parse(); |
| 1218 | break; |
| 1219 | } catch (\Throwable $e) { |
| 1220 | $warnings[] = new Warning( |
| 1221 | WarningCode::UnsupportedCssValue, |
| 1222 | sprintf( |
| 1223 | '@font-face `%s` source failed to parse: %s', |
| 1224 | $family, |
| 1225 | $e->getMessage(), |
| 1226 | ), |
| 1227 | WarningSeverity::Warning, |
| 1228 | ); |
| 1229 | } |
| 1230 | } |
| 1231 | if ($data === null) { |
| 1232 | $warnings[] = new Warning( |
| 1233 | WarningCode::MissingResource, |
| 1234 | sprintf( |
| 1235 | '@font-face `%s` has no loadable source — face dropped.', |
| 1236 | $family, |
| 1237 | ), |
| 1238 | WarningSeverity::Warning, |
| 1239 | ); |
| 1240 | continue; |
| 1241 | } |
| 1242 | yield $family => $data; |
| 1243 | } |
| 1244 | } |
| 1245 | } |
| 1246 | |
| 1247 | /** |
| 1248 | * Extract the family name from a `font-family` value inside an |
| 1249 | * `@font-face` block. Accepts a `StringValue` (`"My Font"`) or a |
| 1250 | * `Keyword` or a space-separated `ValueList` of keywords (the |
| 1251 | * unquoted-multi-word form `font-family: My Font`). |
| 1252 | */ |
| 1253 | private function fontFamilyName(\Phpdftk\Css\Value\Value $value): ?string |
| 1254 | { |
| 1255 | if ($value instanceof \Phpdftk\Css\Value\StringValue) { |
| 1256 | $name = trim($value->value); |
| 1257 | return $name === '' ? null : $name; |
| 1258 | } |
| 1259 | if ($value instanceof \Phpdftk\Css\Value\Keyword) { |
| 1260 | $name = trim($value->name); |
| 1261 | return $name === '' ? null : $name; |
| 1262 | } |
| 1263 | if ($value instanceof \Phpdftk\Css\Value\ValueList |
| 1264 | && $value->separator === \Phpdftk\Css\Value\ListSeparator::Space |
| 1265 | ) { |
| 1266 | $parts = []; |
| 1267 | foreach ($value->values as $v) { |
| 1268 | if ($v instanceof \Phpdftk\Css\Value\Keyword) { |
| 1269 | $parts[] = $v->name; |
| 1270 | } elseif ($v instanceof \Phpdftk\Css\Value\StringValue) { |
| 1271 | $parts[] = $v->value; |
| 1272 | } |
| 1273 | } |
| 1274 | $name = trim(implode(' ', $parts)); |
| 1275 | return $name === '' ? null : $name; |
| 1276 | } |
| 1277 | return null; |
| 1278 | } |
| 1279 | |
| 1280 | /** |
| 1281 | * Split a `src:` value into its candidate sources (comma-separated by |
| 1282 | * the CSS Fonts 4 grammar). Each element is a `{url, format}` tuple: |
| 1283 | * `url` is the bare `Url` or `url()`/`local()` `CssFunction`; `format` |
| 1284 | * is the lower-cased identifier from the optional trailing |
| 1285 | * `format(<keyword|string>)` sibling, or null when no hint is given. |
| 1286 | * CSS Fonts 4 §4.3: the format hint is advisory, not load-blocking, |
| 1287 | * but it lets the resolver skip sources it can't decode without |
| 1288 | * attempting the parse. |
| 1289 | * |
| 1290 | * @return list<array{url: \Phpdftk\Css\Value\Value, format: ?string}> |
| 1291 | */ |
| 1292 | private function splitSrcList(\Phpdftk\Css\Value\Value $value): array |
| 1293 | { |
| 1294 | $candidates = $value instanceof \Phpdftk\Css\Value\ValueList |
| 1295 | && $value->separator === \Phpdftk\Css\Value\ListSeparator::Comma |
| 1296 | ? $value->values |
| 1297 | : [$value]; |
| 1298 | $out = []; |
| 1299 | foreach ($candidates as $c) { |
| 1300 | if ($c instanceof \Phpdftk\Css\Value\ValueList |
| 1301 | && $c->separator === \Phpdftk\Css\Value\ListSeparator::Space |
| 1302 | && $c->values !== [] |
| 1303 | ) { |
| 1304 | $out[] = [ |
| 1305 | 'url' => $c->values[0], |
| 1306 | 'format' => $this->extractFormatHint($c->values), |
| 1307 | ]; |
| 1308 | continue; |
| 1309 | } |
| 1310 | $out[] = ['url' => $c, 'format' => null]; |
| 1311 | } |
| 1312 | return $out; |
| 1313 | } |
| 1314 | |
| 1315 | /** |
| 1316 | * Find a `format(...)` `CssFunction` in the space-list and return its |
| 1317 | * first argument as a lower-cased string. Tolerates the function's |
| 1318 | * argument being either a `Keyword` or a `StringValue` (both spec |
| 1319 | * variants). Returns null when no `format()` sibling is present. |
| 1320 | * |
| 1321 | * @param list<\Phpdftk\Css\Value\Value> $siblings |
| 1322 | */ |
| 1323 | private function extractFormatHint(array $siblings): ?string |
| 1324 | { |
| 1325 | foreach ($siblings as $s) { |
| 1326 | if (!$s instanceof \Phpdftk\Css\Value\CssFunction |
| 1327 | || strtolower($s->name) !== 'format' |
| 1328 | || $s->arguments === [] |
| 1329 | ) { |
| 1330 | continue; |
| 1331 | } |
| 1332 | $first = $s->arguments[0]; |
| 1333 | if ($first instanceof \Phpdftk\Css\Value\StringValue) { |
| 1334 | return strtolower($first->value); |
| 1335 | } |
| 1336 | if ($first instanceof \Phpdftk\Css\Value\Keyword) { |
| 1337 | return strtolower($first->name); |
| 1338 | } |
| 1339 | } |
| 1340 | return null; |
| 1341 | } |
| 1342 | |
| 1343 | /** |
| 1344 | * The set of `format(...)` hints we can decode at Phase 1. OTF/CFF |
| 1345 | * goes through `OpenTypeParser`; everything else (WOFF/WOFF2/EOT/ |
| 1346 | * SVG/TTC) requires decompression or extra parsers that haven't |
| 1347 | * landed yet. Hints outside this set make the resolver skip the |
| 1348 | * source without attempting a fetch. |
| 1349 | */ |
| 1350 | private const SUPPORTED_FONT_FORMATS = [ |
| 1351 | 'opentype', |
| 1352 | 'opentype-variations', |
| 1353 | 'woff', |
| 1354 | ]; |
| 1355 | |
| 1356 | /** |
| 1357 | * Load the CSS text for a `<link rel="stylesheet" href="…">` |
| 1358 | * element, or null when the link doesn't apply (`rel` not |
| 1359 | * stylesheet, href missing, fetch fails, media query unmatched). |
| 1360 | * Supports `data:text/css[;base64],…` payloads and relative paths |
| 1361 | * resolved under `RendererOptions::baseDir` (with realpath escape |
| 1362 | * rejection — same posture as `<img src>` / `@font-face`). |
| 1363 | * |
| 1364 | * Honours `<link media="…">`: the same Phase-1 prelude matcher used |
| 1365 | * for `@media` rule cascade. Drops the sheet when `media` is |
| 1366 | * present and doesn't include `print` / `all`. |
| 1367 | */ |
| 1368 | private function loadLinkedStylesheet(\Phpdftk\Html\Dom\Element $link): ?string |
| 1369 | { |
| 1370 | $relAttr = $link->getAttribute('rel'); |
| 1371 | if ($relAttr === null) { |
| 1372 | return null; |
| 1373 | } |
| 1374 | // `rel` is a space-separated token list; pick stylesheet |
| 1375 | // anywhere in it. Case-insensitive per HTML 5. |
| 1376 | $rels = preg_split('/\s+/', strtolower(trim($relAttr))) ?: []; |
| 1377 | if (!in_array('stylesheet', $rels, true)) { |
| 1378 | return null; |
| 1379 | } |
| 1380 | $href = $link->getAttribute('href'); |
| 1381 | if ($href === null || $href === '') { |
| 1382 | return null; |
| 1383 | } |
| 1384 | // CSS Media Queries 5: a `<link media="…">` filters the sheet |
| 1385 | // per the same media-type matcher we use for `@media` rules. |
| 1386 | $media = $link->getAttribute('media'); |
| 1387 | if ($media !== null && $media !== '' && !$this->mediaPreludeMatches($media)) { |
| 1388 | return null; |
| 1389 | } |
| 1390 | // `data:` URLs must declare `text/css`; filesystem paths take |
| 1391 | // any extension. The ResourceLoader's allowlist enforces the |
| 1392 | // MIME check for the former. |
| 1393 | if (str_starts_with($href, 'data:')) { |
| 1394 | return $this->resourceLoader()->load($href, allowedMimes: ['text/css']); |
| 1395 | } |
| 1396 | return $this->resourceLoader()->load($href); |
| 1397 | } |
| 1398 | |
| 1399 | /** |
| 1400 | * Phase-1 media-type matcher mirrored from `Cascade::mediaPreludeMatches`. |
| 1401 | * The cascade can't be re-used because its method is private; we |
| 1402 | * duplicate the small predicate here. Both should stay in sync — |
| 1403 | * any change to the cascade matcher should reflect here too. |
| 1404 | */ |
| 1405 | private function mediaPreludeMatches(string $prelude): bool |
| 1406 | { |
| 1407 | $lower = strtolower(trim($prelude)); |
| 1408 | if ($lower === '' || $lower === 'all') { |
| 1409 | return true; |
| 1410 | } |
| 1411 | foreach (explode(',', $lower) as $part) { |
| 1412 | $tokens = preg_split('/\s+/', trim($part)) ?: []; |
| 1413 | foreach ($tokens as $tok) { |
| 1414 | if ($tok === 'print' || $tok === 'all') { |
| 1415 | return true; |
| 1416 | } |
| 1417 | } |
| 1418 | } |
| 1419 | return false; |
| 1420 | } |
| 1421 | |
| 1422 | /** |
| 1423 | * Resolve a single `src` candidate to raw font bytes via the unified |
| 1424 | * `ResourceLoader`. Returns null when the URL can't be unwrapped |
| 1425 | * (not a `url(...)` or `StringValue`) or when the loader can't |
| 1426 | * fetch it under the current security gates. Per-source diagnostic |
| 1427 | * Warnings only emit for the local-file branch when the resolved |
| 1428 | * path read fails — data: URLs that the loader can't decode fall |
| 1429 | * through silently so the caller's downstream parse-attempt can |
| 1430 | * surface the error. |
| 1431 | * |
| 1432 | * @param list<Warning> $warnings |
| 1433 | */ |
| 1434 | private function fetchFontSource(\Phpdftk\Css\Value\Value $candidate, array &$warnings): ?string |
| 1435 | { |
| 1436 | $url = null; |
| 1437 | if ($candidate instanceof \Phpdftk\Css\Value\Url) { |
| 1438 | $url = $candidate->url; |
| 1439 | } elseif ($candidate instanceof \Phpdftk\Css\Value\CssFunction |
| 1440 | && strtolower($candidate->name) === 'url' |
| 1441 | && isset($candidate->arguments[0]) |
| 1442 | ) { |
| 1443 | $first = $candidate->arguments[0]; |
| 1444 | if ($first instanceof \Phpdftk\Css\Value\Url) { |
| 1445 | $url = $first->url; |
| 1446 | } elseif ($first instanceof \Phpdftk\Css\Value\StringValue) { |
| 1447 | $url = $first->value; |
| 1448 | } |
| 1449 | } |
| 1450 | if ($url === null || $url === '') { |
| 1451 | return null; |
| 1452 | } |
| 1453 | // Fonts are binary; `data:` URLs must be base64 for binary to |
| 1454 | // round-trip. The ResourceLoader accepts urlencoded payloads |
| 1455 | // too but they're unsafe for fonts — reject explicitly. |
| 1456 | if (str_starts_with($url, 'data:') && stripos($url, ';base64,') === false) { |
| 1457 | return null; |
| 1458 | } |
| 1459 | $bytes = $this->resourceLoader()->load($url); |
| 1460 | if ($bytes === null && !str_starts_with($url, 'data:')) { |
| 1461 | // Local-file branch: emit a per-source warning so authors |
| 1462 | // see what went wrong with a missing fixture. data: URL |
| 1463 | // failures fall through silently — the downstream parse |
| 1464 | // attempt surfaces them. |
| 1465 | if ($this->options->baseDir !== null) { |
| 1466 | $warnings[] = new Warning( |
| 1467 | WarningCode::MissingResource, |
| 1468 | sprintf('@font-face src `%s` could not be read.', $url), |
| 1469 | WarningSeverity::Warning, |
| 1470 | ); |
| 1471 | } |
| 1472 | } |
| 1473 | return $bytes; |
| 1474 | } |
| 1475 | |
| 1476 | /** |
| 1477 | * Walk every supplied stylesheet looking for `@page` at-rules; for |
| 1478 | * each, extract its nested margin-box at-rules (e.g. `@top-center`) |
| 1479 | * and pull the `content` declaration out alongside any styling |
| 1480 | * declarations (`font-size`, `color`, `text-align`). The `content` |
| 1481 | * value is parsed into a sequence of parts (literal strings + |
| 1482 | * `counter(page)` / `counter(pages)` directives) so the per-page |
| 1483 | * paint pass can substitute the right page number at emission time. |
| 1484 | * |
| 1485 | * CSS Paged Media 3 §3.3 page selectors are partially supported at |
| 1486 | * Phase 1: `:first` (matches page index 0), `:left` (even-numbered |
| 1487 | * 0-indexed pages — index 1, 3, 5...), `:right` (odd-numbered |
| 1488 | * 0-indexed pages — index 0, 2, 4...). Other selectors (`:blank`, |
| 1489 | * `:nth(...)`, named pages) ignored. Multiple `@page` rules with |
| 1490 | * different selectors stack — `resolvePageMarginBoxes` overlays |
| 1491 | * them per-page at paint time. |
| 1492 | * |
| 1493 | * @param list<\Phpdftk\Css\Sheet\Stylesheet> $sheets |
| 1494 | * @return array<string, array<string, array{ |
| 1495 | * parts: list<array{kind: string, value: string}>, |
| 1496 | * fontSize: float, |
| 1497 | * color: \Phpdftk\Css\Value\Color, |
| 1498 | * textAlign: ?string, |
| 1499 | * fontFamily: ?\Phpdftk\Css\Value\Value, |
| 1500 | * fontWeight: int, |
| 1501 | * fontStyle: string, |
| 1502 | * }>> selector → position → spec |
| 1503 | */ |
| 1504 | private function collectPageMarginBoxes(array $sheets): array |
| 1505 | { |
| 1506 | $supported = [ |
| 1507 | 'top-left-corner', 'top-left', 'top-center', 'top-right', 'top-right-corner', |
| 1508 | 'bottom-left-corner', 'bottom-left', 'bottom-center', 'bottom-right', 'bottom-right-corner', |
| 1509 | ]; |
| 1510 | $out = []; |
| 1511 | foreach ($sheets as $sheet) { |
| 1512 | foreach ($sheet->rules as $rule) { |
| 1513 | if (!$rule instanceof \Phpdftk\Css\Sheet\AtRule |
| 1514 | || strtolower($rule->name) !== 'page' |
| 1515 | || $rule->block === null |
| 1516 | ) { |
| 1517 | continue; |
| 1518 | } |
| 1519 | $selector = $this->normalisePageSelector($rule->prelude); |
| 1520 | if ($selector === null) { |
| 1521 | continue; |
| 1522 | } |
| 1523 | if (!isset($out[$selector])) { |
| 1524 | $out[$selector] = []; |
| 1525 | } |
| 1526 | // CSS Paged Media 3 §3 + Generated Content 3 §2.1: the |
| 1527 | // `@page` rule's own typography declarations cascade |
| 1528 | // INTO its nested margin boxes. Read them first so each |
| 1529 | // box's spec starts at those defaults instead of the |
| 1530 | // hard-coded 10pt black; the margin box's own |
| 1531 | // declarations still win per source-order. |
| 1532 | $pageDefaults = [ |
| 1533 | 'fontSize' => 10.0, |
| 1534 | 'color' => new \Phpdftk\Css\Value\Color(0.0, 0.0, 0.0, 1.0), |
| 1535 | 'textAlign' => null, |
| 1536 | 'fontFamily' => null, |
| 1537 | 'fontWeight' => 400, |
| 1538 | 'fontStyle' => 'normal', |
| 1539 | ]; |
| 1540 | foreach ($rule->block->contents as $pageDecl) { |
| 1541 | if (!$pageDecl instanceof \Phpdftk\Css\Sheet\Declaration) { |
| 1542 | continue; |
| 1543 | } |
| 1544 | switch ($pageDecl->property) { |
| 1545 | case 'font-size': |
| 1546 | if ($pageDecl->value instanceof \Phpdftk\Css\Value\Length) { |
| 1547 | $pageDefaults['fontSize'] = max(1.0, $pageDecl->value->value); |
| 1548 | } |
| 1549 | break; |
| 1550 | case 'color': |
| 1551 | if ($pageDecl->value instanceof \Phpdftk\Css\Value\Color) { |
| 1552 | $pageDefaults['color'] = $pageDecl->value; |
| 1553 | } |
| 1554 | break; |
| 1555 | case 'font-family': |
| 1556 | $pageDefaults['fontFamily'] = $pageDecl->value; |
| 1557 | break; |
| 1558 | case 'font-weight': |
| 1559 | $pageDefaults['fontWeight'] = $this->parseFontWeight($pageDecl->value); |
| 1560 | break; |
| 1561 | case 'font-style': |
| 1562 | $pageDefaults['fontStyle'] = $this->parseFontStyle($pageDecl->value); |
| 1563 | break; |
| 1564 | } |
| 1565 | } |
| 1566 | foreach ($rule->block->contents as $item) { |
| 1567 | if (!$item instanceof \Phpdftk\Css\Sheet\AtRule |
| 1568 | || $item->block === null |
| 1569 | ) { |
| 1570 | continue; |
| 1571 | } |
| 1572 | $pos = strtolower($item->name); |
| 1573 | if (!in_array($pos, $supported, true)) { |
| 1574 | continue; |
| 1575 | } |
| 1576 | $parts = null; |
| 1577 | $fontSize = $pageDefaults['fontSize']; |
| 1578 | $color = $pageDefaults['color']; |
| 1579 | $textAlign = $pageDefaults['textAlign']; |
| 1580 | $fontFamily = $pageDefaults['fontFamily']; |
| 1581 | $fontWeight = $pageDefaults['fontWeight']; |
| 1582 | $fontStyle = $pageDefaults['fontStyle']; |
| 1583 | foreach ($item->block->contents as $decl) { |
| 1584 | if (!$decl instanceof \Phpdftk\Css\Sheet\Declaration) { |
| 1585 | continue; |
| 1586 | } |
| 1587 | switch ($decl->property) { |
| 1588 | case 'content': |
| 1589 | $parts = $this->parseContentValue($decl->value); |
| 1590 | break; |
| 1591 | case 'font-size': |
| 1592 | if ($decl->value instanceof \Phpdftk\Css\Value\Length) { |
| 1593 | $fontSize = max(1.0, $decl->value->value); |
| 1594 | } |
| 1595 | break; |
| 1596 | case 'color': |
| 1597 | if ($decl->value instanceof \Phpdftk\Css\Value\Color) { |
| 1598 | $color = $decl->value; |
| 1599 | } |
| 1600 | break; |
| 1601 | case 'text-align': |
| 1602 | if ($decl->value instanceof \Phpdftk\Css\Value\Keyword) { |
| 1603 | $kw = strtolower($decl->value->name); |
| 1604 | if (in_array($kw, ['left', 'right', 'center', 'start', 'end'], true)) { |
| 1605 | $textAlign = $kw === 'start' ? 'left' |
| 1606 | : ($kw === 'end' ? 'right' : $kw); |
| 1607 | } |
| 1608 | } |
| 1609 | break; |
| 1610 | case 'font-family': |
| 1611 | $fontFamily = $decl->value; |
| 1612 | break; |
| 1613 | case 'font-weight': |
| 1614 | $fontWeight = $this->parseFontWeight($decl->value); |
| 1615 | break; |
| 1616 | case 'font-style': |
| 1617 | $fontStyle = $this->parseFontStyle($decl->value); |
| 1618 | break; |
| 1619 | } |
| 1620 | } |
| 1621 | if ($parts !== null && $parts !== []) { |
| 1622 | $out[$selector][$pos] = [ |
| 1623 | 'parts' => $parts, |
| 1624 | 'fontSize' => $fontSize, |
| 1625 | 'color' => $color, |
| 1626 | 'textAlign' => $textAlign, |
| 1627 | 'fontFamily' => $fontFamily, |
| 1628 | 'fontWeight' => $fontWeight, |
| 1629 | 'fontStyle' => $fontStyle, |
| 1630 | ]; |
| 1631 | } |
| 1632 | } |
| 1633 | } |
| 1634 | } |
| 1635 | return $out; |
| 1636 | } |
| 1637 | |
| 1638 | /** |
| 1639 | * Reduce a `@page <prelude>` selector text to one of the supported |
| 1640 | * keys: `''` (unscoped / default), `:first`, `:left`, `:right`. |
| 1641 | * Returns null for unsupported selectors (`:blank`, named pages, etc.) |
| 1642 | * so the caller drops the rule entirely rather than mis-applying it. |
| 1643 | */ |
| 1644 | /** |
| 1645 | * Resolve the effective page width / height from CSS Paged Media 3 |
| 1646 | * §6.1 `@page { size: ... }` declarations. Falls back to the |
| 1647 | * `RendererOptions` defaults when no `size` is declared or the |
| 1648 | * declared value isn't recognised. Multiple `@page` rules merge — |
| 1649 | * the last `size` declaration wins per source-order. |
| 1650 | * |
| 1651 | * Supported forms: |
| 1652 | * - `auto` — use defaults |
| 1653 | * - `<length>{1,2}` — width [height]; one length sets a square |
| 1654 | * - `<page-size>` — A3/A4/A5/B4/B5/JIS-B4/JIS-B5/letter/legal/ledger |
| 1655 | * - `<page-size> <orientation>` or `<orientation> <page-size>` |
| 1656 | * - `<orientation>` alone (rotates the default size) |
| 1657 | * |
| 1658 | * @param list<\Phpdftk\Css\Sheet\Stylesheet> $sheets |
| 1659 | * @return array{width: float, height: float} |
| 1660 | */ |
| 1661 | private function resolvePageSize(array $sheets): array |
| 1662 | { |
| 1663 | $width = $this->options->pageWidth; |
| 1664 | $height = $this->options->pageHeight; |
| 1665 | foreach ($sheets as $sheet) { |
| 1666 | foreach ($sheet->rules as $rule) { |
| 1667 | if (!$rule instanceof \Phpdftk\Css\Sheet\AtRule |
| 1668 | || strtolower($rule->name) !== 'page' |
| 1669 | || $rule->block === null |
| 1670 | ) { |
| 1671 | continue; |
| 1672 | } |
| 1673 | foreach ($rule->block->contents as $decl) { |
| 1674 | if (!$decl instanceof \Phpdftk\Css\Sheet\Declaration |
| 1675 | || $decl->property !== 'size' |
| 1676 | ) { |
| 1677 | continue; |
| 1678 | } |
| 1679 | $resolved = $this->parsePageSize($decl->value); |
| 1680 | if ($resolved !== null) { |
| 1681 | [$width, $height] = $resolved; |
| 1682 | } |
| 1683 | } |
| 1684 | } |
| 1685 | } |
| 1686 | return ['width' => $width, 'height' => $height]; |
| 1687 | } |
| 1688 | |
| 1689 | /** |
| 1690 | * Resolve the effective page background color from every `@page` |
| 1691 | * rule's `background-color` (and the `background` shorthand's color |
| 1692 | * component) declarations. Returns null when no @page rule sets a |
| 1693 | * page-level background. |
| 1694 | * |
| 1695 | * Phase-1 simplification: colour only. `background-image`, |
| 1696 | * `background-repeat`, etc. lands later alongside the body-level |
| 1697 | * background-image painter once a shared image-paint path is in |
| 1698 | * place. |
| 1699 | * |
| 1700 | * `$pageName` (optional) selects an `@page <name>` overlay on top |
| 1701 | * of the default unnamed rule. CSS Paged Media 3 §3.4: when the |
| 1702 | * page being painted is tagged with a name, the named rule wins |
| 1703 | * for any property it sets. |
| 1704 | * |
| 1705 | * @param list<\Phpdftk\Css\Sheet\Stylesheet> $sheets |
| 1706 | */ |
| 1707 | private function resolvePageBackground(array $sheets, ?string $pageName = null): ?\Phpdftk\Css\Value\Color |
| 1708 | { |
| 1709 | $expander = new \Phpdftk\Css\Cascade\ShorthandExpander(); |
| 1710 | $color = null; |
| 1711 | foreach ($sheets as $sheet) { |
| 1712 | foreach ($sheet->rules as $rule) { |
| 1713 | if (!$rule instanceof \Phpdftk\Css\Sheet\AtRule |
| 1714 | || strtolower($rule->name) !== 'page' |
| 1715 | || $rule->block === null |
| 1716 | ) { |
| 1717 | continue; |
| 1718 | } |
| 1719 | $sel = $this->normalisePageSelector($rule->prelude); |
| 1720 | if (!$this->pageSelectorAppliesTo($sel, $pageName)) { |
| 1721 | continue; |
| 1722 | } |
| 1723 | foreach ($rule->block->contents as $decl) { |
| 1724 | if (!$decl instanceof \Phpdftk\Css\Sheet\Declaration) { |
| 1725 | continue; |
| 1726 | } |
| 1727 | if ($decl->property === 'background-color' |
| 1728 | && $decl->value instanceof \Phpdftk\Css\Value\Color |
| 1729 | ) { |
| 1730 | $color = $decl->value; |
| 1731 | } elseif ($decl->property === 'background') { |
| 1732 | $expanded = $expander->expand('background', $decl->value); |
| 1733 | $bg = $expanded['background-color'] ?? null; |
| 1734 | if ($bg instanceof \Phpdftk\Css\Value\Color) { |
| 1735 | $color = $bg; |
| 1736 | } |
| 1737 | } |
| 1738 | } |
| 1739 | } |
| 1740 | } |
| 1741 | return $color; |
| 1742 | } |
| 1743 | |
| 1744 | /** |
| 1745 | * `true` when an `@page` rule with the given normalised selector |
| 1746 | * applies to a page tagged `$pageName`. Default (no selector) |
| 1747 | * always applies; named selectors apply only when their name |
| 1748 | * matches the page tag. |
| 1749 | */ |
| 1750 | private function pageSelectorAppliesTo(?string $selector, ?string $pageName): bool |
| 1751 | { |
| 1752 | if ($selector === null) { |
| 1753 | return false; |
| 1754 | } |
| 1755 | if ($selector === '' || $selector === ':first' || $selector === ':left' || $selector === ':right') { |
| 1756 | // Phase-1: ignore parity / first overlays here. The |
| 1757 | // resolvePageMarginBoxes pipeline still honours them |
| 1758 | // separately via its own selector overlay. |
| 1759 | return $selector === ''; |
| 1760 | } |
| 1761 | if (str_starts_with($selector, 'name:')) { |
| 1762 | return $pageName !== null && substr($selector, 5) === $pageName; |
| 1763 | } |
| 1764 | return false; |
| 1765 | } |
| 1766 | |
| 1767 | /** |
| 1768 | * Walk the laid-out box tree once, building a per-page-index map of |
| 1769 | * the named page type that applies. A block with `page: foo` |
| 1770 | * tags the page containing its top edge as "foo" (CSS Paged Media |
| 1771 | * 3 §3.4 — the first fragment determines the page type). |
| 1772 | * |
| 1773 | * @return array<int, string> |
| 1774 | */ |
| 1775 | private function resolvePageNames(\Phpdftk\HtmlToPdf\Box\Box $root, float $pageHeight, int $pageCount): array |
| 1776 | { |
| 1777 | $map = []; |
| 1778 | if ($pageHeight <= 0.0) { |
| 1779 | return $map; |
| 1780 | } |
| 1781 | $stack = [$root]; |
| 1782 | while ($stack !== []) { |
| 1783 | $node = array_pop($stack); |
| 1784 | $value = $node->style->get('page'); |
| 1785 | if ($value instanceof \Phpdftk\Css\Value\Keyword |
| 1786 | && strtolower($value->name) !== 'auto' |
| 1787 | ) { |
| 1788 | $pageIndex = (int) floor($node->geometry->y / $pageHeight); |
| 1789 | if ($pageIndex >= 0 && $pageIndex < $pageCount && !isset($map[$pageIndex])) { |
| 1790 | $map[$pageIndex] = strtolower($value->name); |
| 1791 | } |
| 1792 | } |
| 1793 | // Push children in reverse order so document-order walk |
| 1794 | // processes the first child first. |
| 1795 | for ($i = count($node->children) - 1; $i >= 0; $i--) { |
| 1796 | $stack[] = $node->children[$i]; |
| 1797 | } |
| 1798 | } |
| 1799 | return $map; |
| 1800 | } |
| 1801 | |
| 1802 | /** |
| 1803 | * Resolve effective page margins (in PDF points) from every `@page` |
| 1804 | * rule's margin declarations. Honours the `margin` shorthand (1-4 |
| 1805 | * components per CSS Box 3) and the per-side longhands |
| 1806 | * (`margin-top` / -right / -bottom / -left); later declarations win |
| 1807 | * per source order. Defaults to 36pt all sides — the same fixed |
| 1808 | * margin the painter used before CSS-driven control landed. |
| 1809 | * |
| 1810 | * @param list<\Phpdftk\Css\Sheet\Stylesheet> $sheets |
| 1811 | * @return array{top: float, right: float, bottom: float, left: float} |
| 1812 | */ |
| 1813 | private function resolvePageMargins(array $sheets): array |
| 1814 | { |
| 1815 | $expander = new \Phpdftk\Css\Cascade\ShorthandExpander(); |
| 1816 | $margins = ['top' => 36.0, 'right' => 36.0, 'bottom' => 36.0, 'left' => 36.0]; |
| 1817 | foreach ($sheets as $sheet) { |
| 1818 | foreach ($sheet->rules as $rule) { |
| 1819 | if (!$rule instanceof \Phpdftk\Css\Sheet\AtRule |
| 1820 | || strtolower($rule->name) !== 'page' |
| 1821 | || $rule->block === null |
| 1822 | ) { |
| 1823 | continue; |
| 1824 | } |
| 1825 | foreach ($rule->block->contents as $decl) { |
| 1826 | if (!$decl instanceof \Phpdftk\Css\Sheet\Declaration) { |
| 1827 | continue; |
| 1828 | } |
| 1829 | $prop = $decl->property; |
| 1830 | if ($prop === 'margin') { |
| 1831 | $expanded = $expander->expand('margin', $decl->value); |
| 1832 | foreach (['top', 'right', 'bottom', 'left'] as $side) { |
| 1833 | $sideValue = $expanded['margin-' . $side] ?? null; |
| 1834 | if ($sideValue instanceof \Phpdftk\Css\Value\Length) { |
| 1835 | $margins[$side] = $sideValue->value; |
| 1836 | } |
| 1837 | } |
| 1838 | } elseif (in_array($prop, ['margin-top', 'margin-right', 'margin-bottom', 'margin-left'], true)) { |
| 1839 | if ($decl->value instanceof \Phpdftk\Css\Value\Length) { |
| 1840 | $margins[substr($prop, 7)] = $decl->value->value; |
| 1841 | } |
| 1842 | } |
| 1843 | } |
| 1844 | } |
| 1845 | } |
| 1846 | return $margins; |
| 1847 | } |
| 1848 | |
| 1849 | /** |
| 1850 | * Parse a single `@page { size }` value into `[width, height]` in |
| 1851 | * PDF points, or null when the value can't be resolved. |
| 1852 | * |
| 1853 | * @return array{0: float, 1: float}|null |
| 1854 | */ |
| 1855 | private function parsePageSize(\Phpdftk\Css\Value\Value $value): ?array |
| 1856 | { |
| 1857 | // Standard ISO + US page sizes in PDF points (1 inch = 72 pt). |
| 1858 | // Matches the CSS Paged Media 3 §6.1 named-size table. |
| 1859 | $named = [ |
| 1860 | 'a3' => [842.0, 1191.0], |
| 1861 | 'a4' => [595.0, 842.0], |
| 1862 | 'a5' => [420.0, 595.0], |
| 1863 | 'b4' => [729.0, 1032.0], |
| 1864 | 'b5' => [516.0, 729.0], |
| 1865 | 'jis-b4' => [729.0, 1032.0], |
| 1866 | 'jis-b5' => [516.0, 729.0], |
| 1867 | 'letter' => [612.0, 792.0], |
| 1868 | 'legal' => [612.0, 1008.0], |
| 1869 | 'ledger' => [792.0, 1224.0], |
| 1870 | ]; |
| 1871 | $items = $value instanceof \Phpdftk\Css\Value\ValueList |
| 1872 | && $value->separator === \Phpdftk\Css\Value\ListSeparator::Space |
| 1873 | ? $value->values |
| 1874 | : [$value]; |
| 1875 | // Single `auto` keyword → use defaults. |
| 1876 | if (count($items) === 1 |
| 1877 | && $items[0] instanceof \Phpdftk\Css\Value\Keyword |
| 1878 | && strtolower($items[0]->name) === 'auto' |
| 1879 | ) { |
| 1880 | return null; |
| 1881 | } |
| 1882 | // Single length → square. Two lengths → width + height. |
| 1883 | $lengths = array_values(array_filter( |
| 1884 | $items, |
| 1885 | static fn($v) => $v instanceof \Phpdftk\Css\Value\Length, |
| 1886 | )); |
| 1887 | if (count($lengths) === 1) { |
| 1888 | return [$lengths[0]->value, $lengths[0]->value]; |
| 1889 | } |
| 1890 | if (count($lengths) === 2) { |
| 1891 | return [$lengths[0]->value, $lengths[1]->value]; |
| 1892 | } |
| 1893 | // Otherwise scan keywords: `<page-size>` + optional orientation. |
| 1894 | $size = null; |
| 1895 | $orientation = null; |
| 1896 | foreach ($items as $item) { |
| 1897 | if (!$item instanceof \Phpdftk\Css\Value\Keyword) { |
| 1898 | continue; |
| 1899 | } |
| 1900 | $kw = strtolower($item->name); |
| 1901 | if (isset($named[$kw])) { |
| 1902 | $size = $named[$kw]; |
| 1903 | } elseif ($kw === 'landscape' || $kw === 'portrait') { |
| 1904 | $orientation = $kw; |
| 1905 | } |
| 1906 | } |
| 1907 | if ($size === null) { |
| 1908 | // Orientation alone: rotate the default size. |
| 1909 | if ($orientation !== null) { |
| 1910 | $defaultPortrait = [$this->options->pageWidth, $this->options->pageHeight]; |
| 1911 | if ($defaultPortrait[0] > $defaultPortrait[1]) { |
| 1912 | [$defaultPortrait[0], $defaultPortrait[1]] = [$defaultPortrait[1], $defaultPortrait[0]]; |
| 1913 | } |
| 1914 | return $orientation === 'landscape' |
| 1915 | ? [$defaultPortrait[1], $defaultPortrait[0]] |
| 1916 | : $defaultPortrait; |
| 1917 | } |
| 1918 | return null; |
| 1919 | } |
| 1920 | if ($orientation === 'landscape' && $size[0] < $size[1]) { |
| 1921 | return [$size[1], $size[0]]; |
| 1922 | } |
| 1923 | if ($orientation === 'portrait' && $size[0] > $size[1]) { |
| 1924 | return [$size[1], $size[0]]; |
| 1925 | } |
| 1926 | return $size; |
| 1927 | } |
| 1928 | |
| 1929 | /** |
| 1930 | * Parse a CSS `font-weight` value to the CSS Fonts 4 1–1000 range. |
| 1931 | * Keywords map per spec: `normal` → 400, `bold` / `bolder` → 700, |
| 1932 | * `lighter` → 100. Anything unrecognised falls back to 400. |
| 1933 | */ |
| 1934 | private function parseFontWeight(\Phpdftk\Css\Value\Value $value): int |
| 1935 | { |
| 1936 | if ($value instanceof \Phpdftk\Css\Value\Keyword) { |
| 1937 | return match (strtolower($value->name)) { |
| 1938 | 'bold', 'bolder' => 700, |
| 1939 | 'lighter' => 100, |
| 1940 | default => 400, |
| 1941 | }; |
| 1942 | } |
| 1943 | if ($value instanceof \Phpdftk\Css\Value\Integer |
| 1944 | || $value instanceof \Phpdftk\Css\Value\Number |
| 1945 | ) { |
| 1946 | return max(1, min(1000, (int) $value->value)); |
| 1947 | } |
| 1948 | return 400; |
| 1949 | } |
| 1950 | |
| 1951 | /** |
| 1952 | * Parse a CSS `font-style` value to one of `normal`, `italic`, or |
| 1953 | * `oblique`. Unknown values fall back to `normal`. |
| 1954 | */ |
| 1955 | private function parseFontStyle(\Phpdftk\Css\Value\Value $value): string |
| 1956 | { |
| 1957 | if ($value instanceof \Phpdftk\Css\Value\Keyword) { |
| 1958 | $lc = strtolower($value->name); |
| 1959 | if (in_array($lc, ['italic', 'oblique'], true)) { |
| 1960 | return $lc; |
| 1961 | } |
| 1962 | } |
| 1963 | return 'normal'; |
| 1964 | } |
| 1965 | |
| 1966 | private function normalisePageSelector(string $prelude): ?string |
| 1967 | { |
| 1968 | $lc = strtolower(trim($prelude)); |
| 1969 | if ($lc === '') { |
| 1970 | return ''; |
| 1971 | } |
| 1972 | if (in_array($lc, [':first', ':left', ':right'], true)) { |
| 1973 | return $lc; |
| 1974 | } |
| 1975 | // CSS Paged Media 3 §3.4: `@page <ident>` names a page type. |
| 1976 | // The prelude is an identifier (possibly followed by a |
| 1977 | // pseudo-class — Phase 1 ignores combined `<ident>:first` |
| 1978 | // forms and just keys on the bare name). |
| 1979 | if (preg_match('/^([a-z_][a-z0-9_-]*)$/', $lc, $m) === 1) { |
| 1980 | return 'name:' . $m[1]; |
| 1981 | } |
| 1982 | return null; |
| 1983 | } |
| 1984 | |
| 1985 | /** |
| 1986 | * Build the per-position margin-box map for a specific page index by |
| 1987 | * overlaying selector-scoped rules in CSS Paged Media 3 §3.3 |
| 1988 | * specificity order: default (no selector) is the base, then |
| 1989 | * `:left` / `:right` (one applies per page), then `:first` (only |
| 1990 | * page 0). Position-keyed overlay so a `:first { @top-center }` |
| 1991 | * override preserves the default `@bottom-center` rule. |
| 1992 | * |
| 1993 | * @param array<string, array<string, array{ |
| 1994 | * parts: list<array{kind: string, value: string}>, |
| 1995 | * fontSize: float, |
| 1996 | * color: \Phpdftk\Css\Value\Color, |
| 1997 | * textAlign: ?string, |
| 1998 | * fontFamily: ?\Phpdftk\Css\Value\Value, |
| 1999 | * fontWeight: int, |
| 2000 | * fontStyle: string, |
| 2001 | * }>> $marginBoxes |
| 2002 | * @return array<string, array{ |
| 2003 | * parts: list<array{kind: string, value: string}>, |
| 2004 | * fontSize: float, |
| 2005 | * color: \Phpdftk\Css\Value\Color, |
| 2006 | * textAlign: ?string, |
| 2007 | * fontFamily: ?\Phpdftk\Css\Value\Value, |
| 2008 | * fontWeight: int, |
| 2009 | * fontStyle: string, |
| 2010 | * }> |
| 2011 | */ |
| 2012 | private function resolvePageMarginBoxes(array $marginBoxes, int $pageIndex, ?string $pageName = null): array |
| 2013 | { |
| 2014 | $resolved = $marginBoxes[''] ?? []; |
| 2015 | // Even-numbered (0-indexed) pages are right-facing per the CSS |
| 2016 | // Paged Media 3 default ("the first page of a document begins on |
| 2017 | // a right page"). Odd-indexed are left-facing. |
| 2018 | $sideSelector = $pageIndex % 2 === 0 ? ':right' : ':left'; |
| 2019 | if (isset($marginBoxes[$sideSelector])) { |
| 2020 | $resolved = array_merge($resolved, $marginBoxes[$sideSelector]); |
| 2021 | } |
| 2022 | if ($pageIndex === 0 && isset($marginBoxes[':first'])) { |
| 2023 | $resolved = array_merge($resolved, $marginBoxes[':first']); |
| 2024 | } |
| 2025 | // CSS Paged Media 3 §3.4: named selectors overlay on top of |
| 2026 | // the parity/first selectors when the page is tagged with a |
| 2027 | // matching name. |
| 2028 | if ($pageName !== null && isset($marginBoxes['name:' . $pageName])) { |
| 2029 | $resolved = array_merge($resolved, $marginBoxes['name:' . $pageName]); |
| 2030 | } |
| 2031 | return $resolved; |
| 2032 | } |
| 2033 | |
| 2034 | /** |
| 2035 | * Parse a CSS `content` value (StringValue, counter() CssFunction, |
| 2036 | * or a space-separated ValueList mixing both) into a list of parts |
| 2037 | * the paint pass can resolve per page. Returns an empty list when |
| 2038 | * the value contains nothing renderable. |
| 2039 | * |
| 2040 | * `counter(name [, style])` arguments come back parsed by the CSS |
| 2041 | * value parser as a comma-separated `ValueList` inside the function |
| 2042 | * call. We honour the second positional `<counter-style>` keyword |
| 2043 | * argument (`decimal`, `lower-roman`, `upper-alpha`, etc. per CSS |
| 2044 | * Counter Styles 3 §6) by stashing the style with the counter part |
| 2045 | * so the paint pass can format the numeric value through it. |
| 2046 | * |
| 2047 | * @return list<array{kind: string, value: string, style?: string}> |
| 2048 | */ |
| 2049 | private function parseContentValue(\Phpdftk\Css\Value\Value $value): array |
| 2050 | { |
| 2051 | $parts = []; |
| 2052 | $items = $value instanceof \Phpdftk\Css\Value\ValueList |
| 2053 | && $value->separator === \Phpdftk\Css\Value\ListSeparator::Space |
| 2054 | ? $value->values |
| 2055 | : [$value]; |
| 2056 | foreach ($items as $item) { |
| 2057 | if ($item instanceof \Phpdftk\Css\Value\StringValue) { |
| 2058 | $parts[] = ['kind' => 'literal', 'value' => $item->value]; |
| 2059 | } elseif ($item instanceof \Phpdftk\Css\Value\CssFunction |
| 2060 | && strtolower($item->name) === 'counter' |
| 2061 | && $item->arguments !== [] |
| 2062 | ) { |
| 2063 | $args = $this->splitCounterArgs($item->arguments); |
| 2064 | $first = $args[0] ?? null; |
| 2065 | $name = $first instanceof \Phpdftk\Css\Value\Keyword |
| 2066 | ? strtolower($first->name) |
| 2067 | : null; |
| 2068 | if ($name !== 'page' && $name !== 'pages') { |
| 2069 | continue; |
| 2070 | } |
| 2071 | $style = 'decimal'; |
| 2072 | $second = $args[1] ?? null; |
| 2073 | if ($second instanceof \Phpdftk\Css\Value\Keyword) { |
| 2074 | $style = strtolower($second->name); |
| 2075 | } |
| 2076 | $parts[] = [ |
| 2077 | 'kind' => $name === 'pages' ? 'totalpages' : 'pagenumber', |
| 2078 | 'value' => '', |
| 2079 | 'style' => $style, |
| 2080 | ]; |
| 2081 | } |
| 2082 | } |
| 2083 | return $parts; |
| 2084 | } |
| 2085 | |
| 2086 | /** |
| 2087 | * `counter(page, lower-roman)` parses into a CssFunction whose single |
| 2088 | * `arguments[0]` is a comma-separated `ValueList` of the actual |
| 2089 | * positional arguments. Split it back into a flat list so the caller |
| 2090 | * can index by position. Tolerant of the single-arg case (no comma). |
| 2091 | * |
| 2092 | * @param list<\Phpdftk\Css\Value\Value> $arguments |
| 2093 | * @return list<\Phpdftk\Css\Value\Value> |
| 2094 | */ |
| 2095 | private function splitCounterArgs(array $arguments): array |
| 2096 | { |
| 2097 | if (count($arguments) !== 1) { |
| 2098 | return $arguments; |
| 2099 | } |
| 2100 | $head = $arguments[0]; |
| 2101 | if ($head instanceof \Phpdftk\Css\Value\ValueList |
| 2102 | && $head->separator === \Phpdftk\Css\Value\ListSeparator::Comma |
| 2103 | ) { |
| 2104 | return $head->values; |
| 2105 | } |
| 2106 | return $arguments; |
| 2107 | } |
| 2108 | |
| 2109 | /** |
| 2110 | * Paint the collected `@page` margin boxes on the current page. |
| 2111 | * Phase-1 positioning: a fixed 36pt (0.5") page margin band; text |
| 2112 | * baseline sits halfway through the margin. Each position picks an |
| 2113 | * anchor point and a horizontal alignment: |
| 2114 | * - top-left / bottom-left → left-aligned at the margin |
| 2115 | * - top-center / bottom-center → centred on the page width |
| 2116 | * - top-right / bottom-right → right-aligned at the margin |
| 2117 | * Uses the document's default font at 10pt. Author-driven sizing / |
| 2118 | * styling lands when we cascade margin-box rules into a proper |
| 2119 | * mini-layout (follow-up). |
| 2120 | * |
| 2121 | * @param array<string, array{ |
| 2122 | * parts: list<array{kind: string, value: string}>, |
| 2123 | * fontSize: float, |
| 2124 | * color: \Phpdftk\Css\Value\Color, |
| 2125 | * textAlign: ?string, |
| 2126 | * fontFamily: ?\Phpdftk\Css\Value\Value, |
| 2127 | * fontWeight: int, |
| 2128 | * fontStyle: string, |
| 2129 | * }> $boxes |
| 2130 | * @param array<string, \Phpdftk\Pdf\Core\Font\RegisteredFont> $registeredMap |
| 2131 | */ |
| 2132 | private function paintPageMarginBoxes( |
| 2133 | \Phpdftk\Pdf\Core\Content\ContentStream $stream, |
| 2134 | array $boxes, |
| 2135 | float $pageWidth, |
| 2136 | float $pageHeight, |
| 2137 | \Phpdftk\FontParser\OpenTypeData $font, |
| 2138 | \Phpdftk\Pdf\Core\Font\RegisteredFont $registered, |
| 2139 | int $pageIndex, |
| 2140 | int $pageCount, |
| 2141 | ?\Phpdftk\HtmlToPdf\Layout\FontResolver $fontResolver = null, |
| 2142 | array $registeredMap = [], |
| 2143 | float $marginTop = 36.0, |
| 2144 | float $marginRight = 36.0, |
| 2145 | float $marginBottom = 36.0, |
| 2146 | float $marginLeft = 36.0, |
| 2147 | ): void { |
| 2148 | $shaper = new \Phpdftk\Text\Shaper(); |
| 2149 | foreach ($boxes as $position => $spec) { |
| 2150 | // Resolve the per-page-variable parts. `counter(page)` becomes |
| 2151 | // the 1-based page number, `counter(pages)` the total, both |
| 2152 | // formatted through the optional `<counter-style>` argument |
| 2153 | // (`decimal` / `lower-roman` / `upper-alpha` / ...). |
| 2154 | $text = ''; |
| 2155 | foreach ($spec['parts'] as $part) { |
| 2156 | if ($part['kind'] === 'pagenumber') { |
| 2157 | $text .= \Phpdftk\HtmlToPdf\Layout\CounterFormat::format( |
| 2158 | $pageIndex + 1, |
| 2159 | $part['style'] ?? 'decimal', |
| 2160 | ); |
| 2161 | } elseif ($part['kind'] === 'totalpages') { |
| 2162 | $text .= \Phpdftk\HtmlToPdf\Layout\CounterFormat::format( |
| 2163 | $pageCount, |
| 2164 | $part['style'] ?? 'decimal', |
| 2165 | ); |
| 2166 | } else { |
| 2167 | $text .= $part['value']; |
| 2168 | } |
| 2169 | } |
| 2170 | if ($text === '') { |
| 2171 | continue; |
| 2172 | } |
| 2173 | // Resolve a per-position `font-family` (+ weight + style) |
| 2174 | // through the same FontResolver the body uses. When a real |
| 2175 | // bold/italic face matches, the painter skips the synthetic |
| 2176 | // fake-bold / fake-italic fallbacks; otherwise the |
| 2177 | // FontMatch's match flags drive whether those fire. |
| 2178 | $faceFont = $font; |
| 2179 | $faceRegistered = $registered; |
| 2180 | $needsFakeBold = $spec['fontWeight'] >= 600; |
| 2181 | $needsFakeItalic = $spec['fontStyle'] !== 'normal'; |
| 2182 | if ($spec['fontFamily'] !== null && $fontResolver !== null) { |
| 2183 | $match = $fontResolver->resolveMatch( |
| 2184 | $spec['fontFamily'], |
| 2185 | $spec['fontWeight'], |
| 2186 | $spec['fontStyle'], |
| 2187 | ); |
| 2188 | if ($match !== null |
| 2189 | && isset($registeredMap[$match->face->data->postScriptName]) |
| 2190 | ) { |
| 2191 | $faceFont = $match->face->data; |
| 2192 | $faceRegistered = $registeredMap[$match->face->data->postScriptName]; |
| 2193 | if ($match->matchesWeight) { |
| 2194 | $needsFakeBold = false; |
| 2195 | } |
| 2196 | if ($match->matchesStyle) { |
| 2197 | $needsFakeItalic = false; |
| 2198 | } |
| 2199 | } |
| 2200 | } |
| 2201 | $shapingCtx = new \Phpdftk\Text\ShapingContext($faceFont, $spec['fontSize']); |
| 2202 | $shaped = $shaper->shapeRun($text, $shapingCtx); |
| 2203 | if ($shaped->glyphs === []) { |
| 2204 | continue; |
| 2205 | } |
| 2206 | $width = $shaped->totalAdvance; |
| 2207 | // Y bands: top boxes sit centred in the top margin |
| 2208 | // (pageHeight - marginTop / 2); bottom boxes centred in the |
| 2209 | // bottom margin (marginBottom / 2). |
| 2210 | $yPdf = match (true) { |
| 2211 | str_starts_with($position, 'top-') => $pageHeight - $marginTop / 2, |
| 2212 | default => $marginBottom / 2, |
| 2213 | }; |
| 2214 | // Corner boxes sit in their respective margin corner area; |
| 2215 | // default alignment centres the text inside that area. |
| 2216 | // Author `text-align` still overrides. |
| 2217 | $isCorner = str_ends_with($position, '-corner'); |
| 2218 | $alignment = $spec['textAlign'] ?? match (true) { |
| 2219 | $isCorner => 'center', |
| 2220 | str_ends_with($position, '-left') => 'left', |
| 2221 | str_ends_with($position, '-right') => 'right', |
| 2222 | default => 'center', |
| 2223 | }; |
| 2224 | $xPdf = match (true) { |
| 2225 | $isCorner && str_contains($position, '-left-') => max(0.0, ($marginLeft - $width) / 2), |
| 2226 | $isCorner && str_contains($position, '-right-') |
| 2227 | => $pageWidth - $marginRight + max(0.0, ($marginRight - $width) / 2), |
| 2228 | $alignment === 'left' => $marginLeft, |
| 2229 | $alignment === 'right' => $pageWidth - $marginRight - $width, |
| 2230 | default => ($pageWidth - $width) / 2, |
| 2231 | }; |
| 2232 | $stream->saveGraphicsState(); |
| 2233 | $stream->setFillColorRGB($spec['color']->r, $spec['color']->g, $spec['color']->b); |
| 2234 | $stream->setFont($faceRegistered, $spec['fontSize']); |
| 2235 | $stream->beginText(); |
| 2236 | // Fake-italic via a 12° skew in the Tm `c` slot when no real |
| 2237 | // italic face matched — same trick used in the body painter. |
| 2238 | $skew = $needsFakeItalic ? 0.213 : 0.0; |
| 2239 | $stream->setTextMatrix(1, 0, $skew, 1, $xPdf, $yPdf); |
| 2240 | if ($needsFakeBold) { |
| 2241 | $stream->setStrokeColorRGB( |
| 2242 | $spec['color']->r, |
| 2243 | $spec['color']->g, |
| 2244 | $spec['color']->b, |
| 2245 | ); |
| 2246 | $stream->setLineWidth($spec['fontSize'] * 0.04); |
| 2247 | $stream->setTextRenderingMode(2); |
| 2248 | } else { |
| 2249 | $stream->setTextRenderingMode(0); |
| 2250 | } |
| 2251 | $gidMap = $faceRegistered instanceof \Phpdftk\Pdf\Writer\Font |
| 2252 | ? $faceRegistered->getOldToNewGidMap() |
| 2253 | : []; |
| 2254 | $hexParts = []; |
| 2255 | foreach ($shaped->glyphs as $glyph) { |
| 2256 | $newGid = $gidMap[$glyph->glyphId] ?? $glyph->glyphId; |
| 2257 | $hexParts[] = sprintf('%04X', $newGid); |
| 2258 | } |
| 2259 | $stream->showTextHex(implode('', $hexParts)); |
| 2260 | $stream->endText(); |
| 2261 | $stream->restoreGraphicsState(); |
| 2262 | } |
| 2263 | } |
| 2264 | |
| 2265 | /** @param list<Warning> $warnings */ |
| 2266 | private function maybeThrow(array $warnings): void |
| 2267 | { |
| 2268 | if (!$this->options->strict) { |
| 2269 | return; |
| 2270 | } |
| 2271 | foreach ($warnings as $w) { |
| 2272 | if ($w->severity === WarningSeverity::Error) { |
| 2273 | throw new StrictModeException($w); |
| 2274 | } |
| 2275 | } |
| 2276 | } |
| 2277 | } |