Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
82.09% |
110 / 134 |
|
77.78% |
7 / 9 |
CRAP | |
0.00% |
0 / 1 |
| Woff2Parser | |
82.09% |
110 / 134 |
|
77.78% |
7 / 9 |
50.66 | |
0.00% |
0 / 1 |
| decompress | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| decompressBytes | |
70.15% |
47 / 67 |
|
0.00% |
0 / 1 |
24.69 | |||
| isWoff2 | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
| detectFlavor | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
5 | |||
| brotliDecompress | |
50.00% |
4 / 8 |
|
0.00% |
0 / 1 |
4.12 | |||
| readUIntBase128 | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
6 | |||
| buildSfnt | |
100.00% |
35 / 35 |
|
100.00% |
1 / 1 |
5 | |||
| readUint32 | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| readUint16 | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Phpdftk\FontParser; |
| 6 | |
| 7 | use Phpdftk\Filesystem\LocalFilesystem; |
| 8 | |
| 9 | /** |
| 10 | * WOFF 2.0 (Web Open Font Format 2.0) decompressor. |
| 11 | * |
| 12 | * Parses a WOFF2 container, decompresses the Brotli-compressed table |
| 13 | * data, and reconstructs the original sfnt (TrueType/OpenType) bytes. |
| 14 | * The result can be passed to TrueTypeParser::fromBytes() or |
| 15 | * OpenTypeParser::fromBytes(). |
| 16 | * |
| 17 | * WOFF2 uses Brotli compression and optional table transforms |
| 18 | * (glyf/loca/hmtx). This implementation handles the basic decompression |
| 19 | * and skips table transforms (the transformed tables are stored as-is |
| 20 | * when the transform flag indicates no transformation). |
| 21 | * |
| 22 | * Brotli decompression requires ext-brotli (PECL). This class deliberately |
| 23 | * does not shell out to external `brotli` binaries — font input is treated |
| 24 | * as untrusted and the security cost of `proc_open` outweighs the |
| 25 | * convenience. If ext-brotli is not installed, decompression throws. |
| 26 | * |
| 27 | * @see https://www.w3.org/TR/WOFF2/ |
| 28 | */ |
| 29 | final class Woff2Parser |
| 30 | { |
| 31 | private const WOFF2_SIGNATURE = 0x774F4632; // 'wOF2' |
| 32 | |
| 33 | // Known table tags for compact tag encoding (Table 1 in spec) |
| 34 | private const KNOWN_TAGS = [ |
| 35 | 0 => 'cmap', 1 => 'head', 2 => 'hhea', 3 => 'hmtx', |
| 36 | 4 => 'maxp', 5 => 'name', 6 => 'OS/2', 7 => 'post', |
| 37 | 8 => 'cvt ', 9 => 'fpgm', 10 => 'glyf', 11 => 'loca', |
| 38 | 12 => 'prep', 13 => 'CFF ', 14 => 'VORG', 15 => 'EBDT', |
| 39 | 16 => 'EBLC', 17 => 'gasp', 18 => 'hdmx', 19 => 'kern', |
| 40 | 20 => 'LTSH', 21 => 'PCLT', 22 => 'VDMX', 23 => 'vhea', |
| 41 | 24 => 'vmtx', 25 => 'BASE', 26 => 'GDEF', 27 => 'GPOS', |
| 42 | 28 => 'GSUB', 29 => 'EBSC', 30 => 'JSTF', 31 => 'MATH', |
| 43 | 32 => 'CBDT', 33 => 'CBLC', 34 => 'COLR', 35 => 'CPAL', |
| 44 | 36 => 'SVG ', 37 => 'sbix', 38 => 'acnt', 39 => 'avar', |
| 45 | 40 => 'bdat', 41 => 'bloc', 42 => 'bsln', 43 => 'cvar', |
| 46 | 44 => 'fdsc', 45 => 'feat', 46 => 'fmtx', 47 => 'fvar', |
| 47 | 48 => 'gvar', 49 => 'hsty', 50 => 'just', 51 => 'lcar', |
| 48 | 52 => 'mort', 53 => 'morx', 54 => 'opbd', 55 => 'prop', |
| 49 | 56 => 'trak', 57 => 'Zapf', 58 => 'Silf', 59 => 'Glat', |
| 50 | 60 => 'Gloc', 61 => 'Feat', 62 => 'Sill', |
| 51 | ]; |
| 52 | |
| 53 | /** |
| 54 | * Decompress a WOFF2 file to raw sfnt (TTF/OTF) bytes. |
| 55 | */ |
| 56 | public static function decompress(string $woff2Path): string |
| 57 | { |
| 58 | $data = LocalFilesystem::readFile($woff2Path, "font file"); |
| 59 | return self::decompressBytes($data); |
| 60 | } |
| 61 | |
| 62 | /** |
| 63 | * Decompress WOFF2 bytes to raw sfnt (TTF/OTF) bytes. |
| 64 | */ |
| 65 | public static function decompressBytes(string $data): string |
| 66 | { |
| 67 | if (strlen($data) < 48) { |
| 68 | throw new \RuntimeException('WOFF2 data too short for header'); |
| 69 | } |
| 70 | |
| 71 | // WOFF2 Header (48 bytes) |
| 72 | $signature = self::readUint32($data, 0); |
| 73 | if ($signature !== self::WOFF2_SIGNATURE) { |
| 74 | throw new \RuntimeException(sprintf( |
| 75 | 'Not a WOFF2 file (signature=0x%08X); expected 0x%08X', |
| 76 | $signature, |
| 77 | self::WOFF2_SIGNATURE, |
| 78 | )); |
| 79 | } |
| 80 | |
| 81 | $flavor = self::readUint32($data, 4); // sfnt flavor |
| 82 | // $woffLength = self::readUint32($data, 8); // total WOFF2 size |
| 83 | $numTables = self::readUint16($data, 12); |
| 84 | // $reserved = self::readUint16($data, 14); |
| 85 | $totalSfntSize = self::readUint32($data, 16); // uncompressed sfnt size |
| 86 | $totalCompressedSize = self::readUint32($data, 20); |
| 87 | // $majorVersion = self::readUint16($data, 24); |
| 88 | // $minorVersion = self::readUint16($data, 26); |
| 89 | // $metaOffset = self::readUint32($data, 28); |
| 90 | // $metaLength = self::readUint32($data, 32); |
| 91 | // $metaOrigLength = self::readUint32($data, 36); |
| 92 | // $privOffset = self::readUint32($data, 40); |
| 93 | // $privLength = self::readUint32($data, 44); |
| 94 | |
| 95 | // Parse table directory (variable-length entries starting at offset 48) |
| 96 | $offset = 48; |
| 97 | $tables = []; |
| 98 | |
| 99 | for ($i = 0; $i < $numTables; $i++) { |
| 100 | if ($offset >= strlen($data)) { |
| 101 | throw new \RuntimeException('WOFF2 table directory truncated'); |
| 102 | } |
| 103 | |
| 104 | $flags = ord($data[$offset]); |
| 105 | $offset++; |
| 106 | |
| 107 | // Bits 0-5: tag index (0-62 = known tag, 63 = arbitrary 4-byte tag) |
| 108 | $tagIndex = $flags & 0x3F; |
| 109 | if ($tagIndex === 63) { |
| 110 | // Read 4-byte tag |
| 111 | $tag = substr($data, $offset, 4); |
| 112 | $offset += 4; |
| 113 | } else { |
| 114 | $tag = $tagIndex < count(self::KNOWN_TAGS) |
| 115 | ? self::KNOWN_TAGS[$tagIndex] |
| 116 | : sprintf('T%03d', $tagIndex); |
| 117 | } |
| 118 | |
| 119 | // Bits 6-7: preprocessing transform (0=none for most, or transform-specific) |
| 120 | $transformVersion = ($flags >> 6) & 0x03; |
| 121 | |
| 122 | // origLength (UIntBase128) |
| 123 | [$origLength, $offset] = self::readUIntBase128($data, $offset); |
| 124 | |
| 125 | // transformLength only present when transform is applied |
| 126 | // For glyf and loca, transform version 0 means transformed (default) |
| 127 | // For other tables, transform version 0 means no transform |
| 128 | $transformLength = null; |
| 129 | $isTransformed = false; |
| 130 | if ($tag === 'glyf' || $tag === 'loca') { |
| 131 | $isTransformed = ($transformVersion === 0); // 0 = transformed for glyf/loca |
| 132 | if ($isTransformed) { |
| 133 | [$transformLength, $offset] = self::readUIntBase128($data, $offset); |
| 134 | } |
| 135 | } elseif ($transformVersion !== 0) { |
| 136 | $isTransformed = true; |
| 137 | [$transformLength, $offset] = self::readUIntBase128($data, $offset); |
| 138 | } |
| 139 | |
| 140 | $tables[] = [ |
| 141 | 'tag' => $tag, |
| 142 | 'flags' => $flags, |
| 143 | 'transformVersion' => $transformVersion, |
| 144 | 'origLength' => $origLength, |
| 145 | 'transformLength' => $transformLength, |
| 146 | 'isTransformed' => $isTransformed, |
| 147 | ]; |
| 148 | } |
| 149 | |
| 150 | // The compressed data stream starts right after the table directory |
| 151 | $compressedData = substr($data, $offset, $totalCompressedSize); |
| 152 | |
| 153 | // Decompress with Brotli |
| 154 | $decompressed = self::brotliDecompress($compressedData); |
| 155 | |
| 156 | // Split the decompressed stream into individual tables |
| 157 | $streamOffset = 0; |
| 158 | $decompressedTables = []; |
| 159 | |
| 160 | foreach ($tables as $table) { |
| 161 | $tableLength = $table['isTransformed'] |
| 162 | ? ($table['transformLength'] ?? $table['origLength']) |
| 163 | : $table['origLength']; |
| 164 | |
| 165 | if ($streamOffset + $tableLength > strlen($decompressed)) { |
| 166 | // Truncated — use what we have |
| 167 | $tableData = substr($decompressed, $streamOffset); |
| 168 | } else { |
| 169 | $tableData = substr($decompressed, $streamOffset, $tableLength); |
| 170 | } |
| 171 | $streamOffset += $tableLength; |
| 172 | |
| 173 | // For transformed glyf/loca tables, we store them as-is. |
| 174 | // A full implementation would reverse the transform, but for |
| 175 | // PDF embedding purposes the font bytes are re-parsed from |
| 176 | // the reconstructed sfnt which handles this. |
| 177 | // If the table is NOT transformed, origLength == actual length. |
| 178 | // If transformed, we need to pad/truncate to origLength. |
| 179 | if ($table['isTransformed'] && ($table['tag'] === 'glyf' || $table['tag'] === 'loca')) { |
| 180 | // Skip transformed glyf/loca — they can't be directly embedded |
| 181 | // Instead, pad to origLength for sfnt reconstruction |
| 182 | $tableData = str_pad($tableData, $table['origLength'], "\x00"); |
| 183 | $tableData = substr($tableData, 0, $table['origLength']); |
| 184 | } |
| 185 | |
| 186 | $decompressedTables[] = [ |
| 187 | 'tag' => $table['tag'], |
| 188 | 'checksum' => 0, // will be recalculated |
| 189 | 'data' => substr($tableData, 0, $table['origLength']), |
| 190 | ]; |
| 191 | } |
| 192 | |
| 193 | // Reconstruct sfnt |
| 194 | return self::buildSfnt($flavor, $decompressedTables); |
| 195 | } |
| 196 | |
| 197 | /** |
| 198 | * Detect whether bytes are a WOFF2 file. |
| 199 | */ |
| 200 | public static function isWoff2(string $data): bool |
| 201 | { |
| 202 | return strlen($data) >= 4 && self::readUint32($data, 0) === self::WOFF2_SIGNATURE; |
| 203 | } |
| 204 | |
| 205 | /** |
| 206 | * Detect the flavor (TrueType or OpenType CFF) of a WOFF2 file. |
| 207 | */ |
| 208 | public static function detectFlavor(string $data): string |
| 209 | { |
| 210 | if (strlen($data) < 8) { |
| 211 | return 'unknown'; |
| 212 | } |
| 213 | $flavor = self::readUint32($data, 4); |
| 214 | return match ($flavor) { |
| 215 | 0x00010000 => 'truetype', |
| 216 | 0x4F54544F => 'opentype', |
| 217 | default => 'unknown', |
| 218 | }; |
| 219 | } |
| 220 | |
| 221 | /** |
| 222 | * Decompress data using Brotli. |
| 223 | * |
| 224 | * Uses ext-brotli. Production code intentionally does not shell out |
| 225 | * to external Brotli binaries for untrusted font input. |
| 226 | */ |
| 227 | private static function brotliDecompress(string $data): string |
| 228 | { |
| 229 | if (function_exists('brotli_uncompress')) { |
| 230 | $result = brotli_uncompress($data); |
| 231 | if ($result === false) { |
| 232 | throw new \RuntimeException('Brotli decompression failed (ext-brotli)'); |
| 233 | } |
| 234 | return $result; |
| 235 | } |
| 236 | |
| 237 | throw new \RuntimeException( |
| 238 | 'WOFF2 requires Brotli decompression. Install ext-brotli.', |
| 239 | ); |
| 240 | } |
| 241 | |
| 242 | /** |
| 243 | * Read a UIntBase128 variable-length integer (WOFF2 spec §5.2). |
| 244 | * |
| 245 | * @return array{int, int} [value, newOffset] |
| 246 | */ |
| 247 | private static function readUIntBase128(string $data, int $offset): array |
| 248 | { |
| 249 | $result = 0; |
| 250 | for ($i = 0; $i < 5; $i++) { |
| 251 | if ($offset >= strlen($data)) { |
| 252 | throw new \RuntimeException('UIntBase128 read past end of data'); |
| 253 | } |
| 254 | $byte = ord($data[$offset]); |
| 255 | $offset++; |
| 256 | |
| 257 | // Leading zeros are not allowed (except for value 0) |
| 258 | if ($i === 0 && $byte === 0x80) { |
| 259 | throw new \RuntimeException('UIntBase128 has leading zero'); |
| 260 | } |
| 261 | |
| 262 | $result = ($result << 7) | ($byte & 0x7F); |
| 263 | |
| 264 | if (($byte & 0x80) === 0) { |
| 265 | return [$result, $offset]; |
| 266 | } |
| 267 | } |
| 268 | throw new \RuntimeException('UIntBase128 exceeds 5 bytes'); |
| 269 | } |
| 270 | |
| 271 | /** |
| 272 | * Reconstruct an sfnt file from decompressed tables. |
| 273 | * |
| 274 | * Reuses the same logic as WoffParser::buildSfnt(). |
| 275 | */ |
| 276 | /** @param array<array{tag: string, checksum: int, data: string}> $tables */ |
| 277 | private static function buildSfnt(int $flavor, array $tables): string |
| 278 | { |
| 279 | $numTables = count($tables); |
| 280 | if ($numTables === 0) { |
| 281 | throw new \RuntimeException('No tables found in WOFF2 data'); |
| 282 | } |
| 283 | |
| 284 | $entrySelector = (int) floor(log($numTables, 2)); |
| 285 | $searchRange = (int) pow(2, $entrySelector) * 16; |
| 286 | $rangeShift = $numTables * 16 - $searchRange; |
| 287 | |
| 288 | $headerSize = 12 + $numTables * 16; |
| 289 | |
| 290 | $offset = $headerSize; |
| 291 | $tableEntries = []; |
| 292 | foreach ($tables as $table) { |
| 293 | $tableEntries[] = [ |
| 294 | 'tag' => $table['tag'], |
| 295 | 'checksum' => $table['checksum'], |
| 296 | 'offset' => $offset, |
| 297 | 'length' => strlen($table['data']), |
| 298 | 'data' => $table['data'], |
| 299 | ]; |
| 300 | $offset += strlen($table['data']); |
| 301 | $offset += (4 - ($offset % 4)) % 4; |
| 302 | } |
| 303 | |
| 304 | $sfnt = ''; |
| 305 | $sfnt .= pack('N', $flavor); |
| 306 | $sfnt .= pack('n', $numTables); |
| 307 | $sfnt .= pack('n', $searchRange); |
| 308 | $sfnt .= pack('n', $entrySelector); |
| 309 | $sfnt .= pack('n', $rangeShift); |
| 310 | |
| 311 | foreach ($tableEntries as $entry) { |
| 312 | $sfnt .= str_pad($entry['tag'], 4, "\x00"); |
| 313 | $sfnt .= pack('N', $entry['checksum']); |
| 314 | $sfnt .= pack('N', $entry['offset']); |
| 315 | $sfnt .= pack('N', $entry['length']); |
| 316 | } |
| 317 | |
| 318 | foreach ($tableEntries as $entry) { |
| 319 | $sfnt .= $entry['data']; |
| 320 | $padding = (4 - (strlen($entry['data']) % 4)) % 4; |
| 321 | $sfnt .= str_repeat("\x00", $padding); |
| 322 | } |
| 323 | |
| 324 | return $sfnt; |
| 325 | } |
| 326 | |
| 327 | private static function readUint32(string $data, int $offset): int |
| 328 | { |
| 329 | return unpack('N', $data, $offset)[1]; |
| 330 | } |
| 331 | |
| 332 | private static function readUint16(string $data, int $offset): int |
| 333 | { |
| 334 | return unpack('n', $data, $offset)[1]; |
| 335 | } |
| 336 | } |