Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
86.00% |
43 / 50 |
|
25.00% |
1 / 4 |
CRAP | |
0.00% |
0 / 1 |
| ResourceLoader | |
86.00% |
43 / 50 |
|
25.00% |
1 / 4 |
26.71 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| load | |
80.00% |
8 / 10 |
|
0.00% |
0 / 1 |
5.20 | |||
| resolveLocalPath | |
77.78% |
14 / 18 |
|
0.00% |
0 / 1 |
9.89 | |||
| decodeDataUrl | |
95.24% |
20 / 21 |
|
0.00% |
0 / 1 |
10 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Phpdftk\Filesystem; |
| 6 | |
| 7 | /** |
| 8 | * Centralised entry point for fetching the bytes of every URL-shaped |
| 9 | * resource an html-to-pdf / svg-to-pdf render walks into — fonts via |
| 10 | * `@font-face src: url(...)`, images via `<img src>` / |
| 11 | * `background-image: url(...)`, stylesheets via `<link rel="stylesheet">` |
| 12 | * / `@import`. Replaces the per-call-site data-URL + baseDir + realpath |
| 13 | * boilerplate that had drifted into four near-identical resolvers. |
| 14 | * |
| 15 | * Phase-1 supported sources: |
| 16 | * - `data:<mime>[;base64],<payload>` URLs (MIME-validated when the |
| 17 | * caller supplies an allowlist) |
| 18 | * - Relative paths joined with the configured `$baseDir` |
| 19 | * - Absolute filesystem paths resolved under the same `$baseDir` |
| 20 | * |
| 21 | * All security gates documented in `docs/plans/html-and-svg.md` |
| 22 | * apply uniformly: `realpath` escape rejection (no `..` walks out of |
| 23 | * `baseDir`), stream-wrapper rejection (`php://`, `phar://`, etc.) via |
| 24 | * the underlying `LocalFilesystem::assertLocalPath`, and explicit URL |
| 25 | * scheme rejection (`http://`, `https://`, `ftp://`, ...) — remote |
| 26 | * fetching lands in Phase 2 behind the same surface, with SSRF gates |
| 27 | * added then. |
| 28 | * |
| 29 | * `data:` URLs are accepted unconditionally; the caller's allowlist |
| 30 | * (`$allowedMimes`) is a *MIME match* check, not a security gate — |
| 31 | * binary payloads still get the same treatment as the on-disk path. |
| 32 | */ |
| 33 | final readonly class ResourceLoader |
| 34 | { |
| 35 | public function __construct(public ?string $baseDir = null) {} |
| 36 | |
| 37 | /** |
| 38 | * Resolve `$url` to its raw bytes. Returns null when: |
| 39 | * - the URL doesn't match a Phase-1 supported scheme; |
| 40 | * - a relative path doesn't resolve under `$baseDir`; |
| 41 | * - `$allowedMimes` is non-empty and the resource's declared MIME |
| 42 | * isn't in the list (only enforced for `data:` URLs — disk paths |
| 43 | * have no transport-level MIME); |
| 44 | * - the underlying read throws (stream-wrapper, permissions, etc.). |
| 45 | * |
| 46 | * @param list<string>|null $allowedMimes Lower-case MIME types. When |
| 47 | * null, any data: MIME is accepted. When set, only those MIMEs |
| 48 | * pass through. |
| 49 | */ |
| 50 | public function load(string $url, ?array $allowedMimes = null): ?string |
| 51 | { |
| 52 | if ($url === '') { |
| 53 | return null; |
| 54 | } |
| 55 | if (str_starts_with($url, 'data:')) { |
| 56 | return $this->decodeDataUrl($url, $allowedMimes); |
| 57 | } |
| 58 | $resolved = $this->resolveLocalPath($url); |
| 59 | if ($resolved === null) { |
| 60 | return null; |
| 61 | } |
| 62 | try { |
| 63 | return LocalFilesystem::readFile($resolved); |
| 64 | } catch (\Throwable) { |
| 65 | return null; |
| 66 | } |
| 67 | } |
| 68 | |
| 69 | /** |
| 70 | * Resolve `$url` to a real filesystem path, or null when it can't |
| 71 | * be confirmed safe under `$baseDir`. Useful when the caller needs |
| 72 | * the path itself rather than the bytes — e.g. `PdfWriter::addImage` |
| 73 | * accepts a path, not a buffer. |
| 74 | * |
| 75 | * Rejects non-`file://` URL schemes, paths that escape `$baseDir` |
| 76 | * via `realpath`, and stream-wrapper paths via |
| 77 | * {@see LocalFilesystem::assertLocalPath}. |
| 78 | */ |
| 79 | public function resolveLocalPath(string $url): ?string |
| 80 | { |
| 81 | if ($this->baseDir === null) { |
| 82 | return null; |
| 83 | } |
| 84 | if (preg_match('~^[a-zA-Z][a-zA-Z0-9+.-]*://~', $url) === 1) { |
| 85 | return null; |
| 86 | } |
| 87 | $candidate = str_starts_with($url, '/') |
| 88 | ? $url |
| 89 | : $this->baseDir . DIRECTORY_SEPARATOR . $url; |
| 90 | $resolved = realpath($candidate); |
| 91 | $base = realpath($this->baseDir); |
| 92 | if ($resolved === false || $base === false) { |
| 93 | return null; |
| 94 | } |
| 95 | if (!str_starts_with($resolved, $base . DIRECTORY_SEPARATOR) |
| 96 | && $resolved !== $base |
| 97 | ) { |
| 98 | return null; |
| 99 | } |
| 100 | try { |
| 101 | LocalFilesystem::assertLocalPath($resolved); |
| 102 | } catch (\Throwable) { |
| 103 | return null; |
| 104 | } |
| 105 | return $resolved; |
| 106 | } |
| 107 | |
| 108 | /** |
| 109 | * Decode a `data:<mime>[;base64],<payload>` URL into raw bytes. |
| 110 | * Honours both base64 and URL-encoded (rfc2397) payloads. When |
| 111 | * `$allowedMimes` is non-empty, the declared MIME must match |
| 112 | * exactly (case-insensitive); a payload claiming `image/png` |
| 113 | * passes for an `['image/png']` allowlist but not for `['image/jpeg']`. |
| 114 | * |
| 115 | * @param list<string>|null $allowedMimes |
| 116 | */ |
| 117 | private function decodeDataUrl(string $url, ?array $allowedMimes): ?string |
| 118 | { |
| 119 | // data:[<mime>][;params][;base64],<payload> |
| 120 | $commaPos = strpos($url, ','); |
| 121 | if ($commaPos === false) { |
| 122 | return null; |
| 123 | } |
| 124 | $header = substr($url, 5, $commaPos - 5); // strip leading `data:` |
| 125 | $payload = substr($url, $commaPos + 1); |
| 126 | $isBase64 = false; |
| 127 | $mime = ''; |
| 128 | if ($header !== '') { |
| 129 | $parts = explode(';', $header); |
| 130 | $mime = strtolower(trim($parts[0])); |
| 131 | for ($i = 1; $i < count($parts); $i++) { |
| 132 | if (strtolower(trim($parts[$i])) === 'base64') { |
| 133 | $isBase64 = true; |
| 134 | } |
| 135 | } |
| 136 | } |
| 137 | if ($allowedMimes !== null && $allowedMimes !== []) { |
| 138 | $allowed = array_map('strtolower', $allowedMimes); |
| 139 | if (!in_array($mime, $allowed, true)) { |
| 140 | return null; |
| 141 | } |
| 142 | } |
| 143 | if ($isBase64) { |
| 144 | $decoded = base64_decode($payload, true); |
| 145 | return $decoded === false ? null : $decoded; |
| 146 | } |
| 147 | return urldecode($payload); |
| 148 | } |
| 149 | } |