Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
94.82% |
238 / 251 |
|
63.64% |
7 / 11 |
CRAP | |
0.00% |
0 / 1 |
| OpenTypeParser | |
94.82% |
238 / 251 |
|
63.64% |
7 / 11 |
79.87 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| fromBytes | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
| parse | |
94.48% |
171 / 181 |
|
0.00% |
0 / 1 |
49.40 | |||
| parseCmapFormat4 | |
97.30% |
36 / 37 |
|
0.00% |
0 / 1 |
11 | |||
| parseCmapFormat12 | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
| win1252ToUnicode | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
5 | |||
| readUint16 | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| readInt16 | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
| readUint32 | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| readInt32 | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
| tableOffset | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Phpdftk\FontParser; |
| 6 | |
| 7 | use Phpdftk\Filesystem\LocalFilesystem; |
| 8 | |
| 9 | /** |
| 10 | * Parser for OpenType fonts with CFF outlines (sfVersion "OTTO"). |
| 11 | * |
| 12 | * Reuses the same table-level parsing as TrueType (head, hhea, OS/2, |
| 13 | * maxp, hmtx, cmap, name, post) since these tables have identical |
| 14 | * format in both TrueType and OpenType-CFF. The CFF table data is |
| 15 | * extracted as raw bytes for embedding (we don't parse CFF charstrings). |
| 16 | * |
| 17 | * The parsed result is an OpenTypeData object containing metrics, |
| 18 | * glyph widths, Unicode mappings, and raw CFF bytes. |
| 19 | */ |
| 20 | final class OpenTypeParser |
| 21 | { |
| 22 | private string $data; |
| 23 | |
| 24 | /** @var array<string, array{offset:int, length:int}> */ |
| 25 | private array $tables = []; |
| 26 | |
| 27 | // Windows-1252 byte => Unicode codepoint for bytes 128-159 |
| 28 | private const WIN1252_MAP = [ |
| 29 | 128 => 0x20AC, 130 => 0x201A, 131 => 0x0192, 132 => 0x201E, |
| 30 | 133 => 0x2026, 134 => 0x2020, 135 => 0x2021, 136 => 0x02C6, |
| 31 | 137 => 0x2030, 138 => 0x0160, 139 => 0x2039, 140 => 0x0152, |
| 32 | 142 => 0x017D, 145 => 0x2018, 146 => 0x2019, 147 => 0x201C, |
| 33 | 148 => 0x201D, 149 => 0x2022, 150 => 0x2013, 151 => 0x2014, |
| 34 | 152 => 0x02DC, 153 => 0x2122, 154 => 0x0161, 155 => 0x203A, |
| 35 | 156 => 0x0153, 158 => 0x017E, 159 => 0x0178, |
| 36 | ]; |
| 37 | |
| 38 | public function __construct(private readonly string $path) {} |
| 39 | |
| 40 | /** |
| 41 | * Create a parser from raw font bytes instead of a file path. |
| 42 | */ |
| 43 | public static function fromBytes(string $fontBytes): self |
| 44 | { |
| 45 | $tmp = tempnam(sys_get_temp_dir(), 'phpdftk_otf_'); |
| 46 | if ($tmp === false) { |
| 47 | throw new \RuntimeException('Cannot create temp file for font data'); |
| 48 | } |
| 49 | file_put_contents($tmp, $fontBytes); |
| 50 | return new self($tmp); |
| 51 | } |
| 52 | |
| 53 | public function parse(): OpenTypeData |
| 54 | { |
| 55 | $this->data = LocalFilesystem::readFile($this->path, "font file"); |
| 56 | |
| 57 | // Validate OpenType CFF signature |
| 58 | $sfVersion = $this->readUint32(0); |
| 59 | if ($sfVersion !== 0x4F54544F) { |
| 60 | throw new \RuntimeException(sprintf( |
| 61 | 'Not an OpenType CFF font (sfVersion=0x%08X); expected 0x4F54544F ("OTTO")', |
| 62 | $sfVersion, |
| 63 | )); |
| 64 | } |
| 65 | |
| 66 | $numTables = $this->readUint16(4); |
| 67 | |
| 68 | // Parse table directory |
| 69 | $dirOffset = 12; |
| 70 | for ($i = 0; $i < $numTables; $i++) { |
| 71 | $base = $dirOffset + $i * 16; |
| 72 | $tag = rtrim(substr($this->data, $base, 4)); |
| 73 | $tableOffset = $this->readUint32($base + 8); |
| 74 | $tableLength = $this->readUint32($base + 12); |
| 75 | $this->tables[$tag] = ['offset' => $tableOffset, 'length' => $tableLength]; |
| 76 | } |
| 77 | |
| 78 | // Extract raw CFF table |
| 79 | if (!isset($this->tables['CFF'])) { |
| 80 | throw new \RuntimeException('OpenType font has no CFF table'); |
| 81 | } |
| 82 | $cffOffset = $this->tables['CFF']['offset']; |
| 83 | $cffLength = $this->tables['CFF']['length']; |
| 84 | $cffBytes = substr($this->data, $cffOffset, $cffLength); |
| 85 | |
| 86 | // Parse head table |
| 87 | $headBase = $this->tableOffset('head'); |
| 88 | $unitsPerEm = $this->readUint16($headBase + 18); |
| 89 | $xMin = $this->readInt16($headBase + 36); |
| 90 | $yMin = $this->readInt16($headBase + 38); |
| 91 | $xMax = $this->readInt16($headBase + 40); |
| 92 | $yMax = $this->readInt16($headBase + 42); |
| 93 | |
| 94 | // Parse hhea table |
| 95 | $hheaBase = $this->tableOffset('hhea'); |
| 96 | $hheaAscender = $this->readInt16($hheaBase + 4); |
| 97 | $hheaDescender = $this->readInt16($hheaBase + 6); |
| 98 | $numberOfHMetrics = $this->readUint16($hheaBase + 34); |
| 99 | |
| 100 | // Parse OS/2 table |
| 101 | $os2Base = $this->tableOffset('OS/2'); |
| 102 | $os2Version = $this->readUint16($os2Base); |
| 103 | $usWeightClass = $this->readUint16($os2Base + 4); |
| 104 | $fsType = $this->readUint16($os2Base + 8); |
| 105 | $fsSelection = $this->readUint16($os2Base + 62); |
| 106 | $sTypoAscender = $this->readInt16($os2Base + 68); |
| 107 | $sTypoDescender = $this->readInt16($os2Base + 70); |
| 108 | |
| 109 | $sxHeight = 0; |
| 110 | $sCapHeight = 0; |
| 111 | if ($os2Version >= 2) { |
| 112 | $sxHeight = $this->readInt16($os2Base + 86); |
| 113 | $sCapHeight = $this->readInt16($os2Base + 88); |
| 114 | } |
| 115 | |
| 116 | // Parse post table |
| 117 | $postBase = $this->tableOffset('post'); |
| 118 | $italicAngleFixed = $this->readInt32($postBase + 4); |
| 119 | $italicAngle = $italicAngleFixed / 65536.0; |
| 120 | $underlinePosition = $this->readInt16($postBase + 8); |
| 121 | $underlineThickness = $this->readInt16($postBase + 10); |
| 122 | $isFixedPitch = $this->readUint32($postBase + 12); |
| 123 | |
| 124 | // Parse name table |
| 125 | $nameBase = $this->tableOffset('name'); |
| 126 | $nameCount = $this->readUint16($nameBase + 2); |
| 127 | $nameStringOffset = $this->readUint16($nameBase + 4); |
| 128 | $nameStorageBase = $nameBase + $nameStringOffset; |
| 129 | |
| 130 | $nameRecords = [1 => ['win' => null, 'mac' => null], 6 => ['win' => null, 'mac' => null]]; |
| 131 | |
| 132 | for ($i = 0; $i < $nameCount; $i++) { |
| 133 | $recBase = $nameBase + 6 + $i * 12; |
| 134 | $platformID = $this->readUint16($recBase); |
| 135 | $encodingID = $this->readUint16($recBase + 2); |
| 136 | $nameID = $this->readUint16($recBase + 6); |
| 137 | $nameLen = $this->readUint16($recBase + 8); |
| 138 | $nameOff = $this->readUint16($recBase + 10); |
| 139 | |
| 140 | if (!isset($nameRecords[$nameID])) { |
| 141 | continue; |
| 142 | } |
| 143 | |
| 144 | $raw = substr($this->data, $nameStorageBase + $nameOff, $nameLen); |
| 145 | |
| 146 | if ($platformID === 3 && $encodingID === 1) { |
| 147 | $nameRecords[$nameID]['win'] = mb_convert_encoding($raw, 'UTF-8', 'UTF-16BE'); |
| 148 | } elseif ($platformID === 1 && $nameRecords[$nameID]['mac'] === null) { |
| 149 | $nameRecords[$nameID]['mac'] = $raw; |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | $familyName = $nameRecords[1]['win'] ?? $nameRecords[1]['mac'] ?? ''; |
| 154 | $postScriptName = $nameRecords[6]['win'] ?? $nameRecords[6]['mac'] ?? ''; |
| 155 | |
| 156 | // Parse maxp |
| 157 | $maxpBase = $this->tableOffset('maxp'); |
| 158 | $numGlyphs = $this->readUint16($maxpBase + 4); |
| 159 | |
| 160 | // Parse hmtx |
| 161 | $hmtxBase = $this->tableOffset('hmtx'); |
| 162 | $hmtxWidths = []; |
| 163 | $lastAdvanceWidth = 0; |
| 164 | for ($gid = 0; $gid < $numberOfHMetrics; $gid++) { |
| 165 | $lastAdvanceWidth = $this->readUint16($hmtxBase + $gid * 4); |
| 166 | $hmtxWidths[$gid] = $lastAdvanceWidth; |
| 167 | } |
| 168 | for ($gid = $numberOfHMetrics; $gid < $numGlyphs; $gid++) { |
| 169 | $hmtxWidths[$gid] = $lastAdvanceWidth; |
| 170 | } |
| 171 | |
| 172 | // Parse cmap — find best Unicode subtable |
| 173 | $cmapBase = $this->tableOffset('cmap'); |
| 174 | $cmapNumTables = $this->readUint16($cmapBase + 2); |
| 175 | |
| 176 | $bestOffset = null; |
| 177 | $bestPriority = -1; |
| 178 | $bestFormat = 0; |
| 179 | |
| 180 | for ($i = 0; $i < $cmapNumTables; $i++) { |
| 181 | $recBase = $cmapBase + 4 + $i * 8; |
| 182 | $platID = $this->readUint16($recBase); |
| 183 | $encID = $this->readUint16($recBase + 2); |
| 184 | $subtableOffset = $this->readUint32($recBase + 4); |
| 185 | |
| 186 | // Prefer subtables that cover the full Unicode range (format-12, |
| 187 | // platform=3/enc=10 or platform=0/enc=4-6) over BMP-only subtables. |
| 188 | // Without this, fonts with supplementary-plane glyphs (anything |
| 189 | // above U+FFFF — historic scripts, emoji, math, CJK extensions) |
| 190 | // silently lose those codepoints because format-4 can only encode |
| 191 | // 16-bit code points. |
| 192 | $priority = -1; |
| 193 | if ($platID === 3 && $encID === 10) { |
| 194 | $priority = 4; // Windows full Unicode (UCS-4) |
| 195 | } elseif ($platID === 0 && ($encID === 4 || $encID === 6)) { |
| 196 | $priority = 3; // Unicode full repertoire |
| 197 | } elseif ($platID === 3 && $encID === 1) { |
| 198 | $priority = 2; // Windows BMP UCS-2 |
| 199 | } elseif ($platID === 0 && $encID === 3) { |
| 200 | $priority = 1; // Unicode BMP |
| 201 | } elseif ($platID === 0 && $encID === 0) { |
| 202 | $priority = 0; // Unicode default semantics |
| 203 | } |
| 204 | |
| 205 | if ($priority > $bestPriority) { |
| 206 | $bestPriority = $priority; |
| 207 | $bestOffset = $cmapBase + $subtableOffset; |
| 208 | $bestFormat = $this->readUint16($cmapBase + $subtableOffset); |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | $unicodeToGid = []; |
| 213 | if ($bestOffset !== null) { |
| 214 | if ($bestFormat === 4) { |
| 215 | $unicodeToGid = $this->parseCmapFormat4($bestOffset); |
| 216 | } elseif ($bestFormat === 12) { |
| 217 | $unicodeToGid = $this->parseCmapFormat12($bestOffset); |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | // Scale helper |
| 222 | $scale = fn(int $v): int => (int) round($v * 1000 / $unitsPerEm); |
| 223 | |
| 224 | $ascent = $scale($sTypoAscender !== 0 ? $sTypoAscender : $hheaAscender); |
| 225 | $descent = $scale($sTypoDescender !== 0 ? $sTypoDescender : $hheaDescender); |
| 226 | $capHeight = $os2Version >= 2 ? $scale($sCapHeight) : (int) round($ascent * 0.7); |
| 227 | $xHeight = $os2Version >= 2 ? $scale($sxHeight) : (int) round($ascent * 0.5); |
| 228 | $stemV = max(50, min(220, 50 + (int) ($usWeightClass / 65.0))); |
| 229 | |
| 230 | $flags = 0; |
| 231 | if ($isFixedPitch !== 0) { |
| 232 | $flags |= 1; |
| 233 | } |
| 234 | $flags |= 32; // Nonsymbolic |
| 235 | if ($italicAngle !== 0.0 || ($fsSelection & 0x01)) { |
| 236 | $flags |= 64; |
| 237 | } |
| 238 | if ($fsSelection & 0x20) { |
| 239 | $flags |= 262144; |
| 240 | } |
| 241 | |
| 242 | $fontBBox = [$scale($xMin), $scale($yMin), $scale($xMax), $scale($yMax)]; |
| 243 | |
| 244 | // Build WinAnsi charWidths and unicodeMap |
| 245 | $charWidths = []; |
| 246 | $unicodeMap = []; |
| 247 | for ($byte = 32; $byte <= 255; $byte++) { |
| 248 | $codepoint = $this->win1252ToUnicode($byte); |
| 249 | if ($codepoint === null) { |
| 250 | $charWidths[$byte] = 0; |
| 251 | continue; |
| 252 | } |
| 253 | if (isset($unicodeToGid[$codepoint])) { |
| 254 | $gid = $unicodeToGid[$codepoint]; |
| 255 | $charWidths[$byte] = $scale($hmtxWidths[$gid] ?? 0); |
| 256 | $unicodeMap[$byte] = $codepoint; |
| 257 | } else { |
| 258 | $charWidths[$byte] = 0; |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | $embeddingAllowed = ($fsType & 0x000E) !== 2; |
| 263 | |
| 264 | // Parse vertical metrics (vhea + vmtx tables) if present |
| 265 | $verticalWidths = null; |
| 266 | if (isset($this->tables['vhea']) && isset($this->tables['vmtx'])) { |
| 267 | $vheaBase = $this->tables['vhea']['offset']; |
| 268 | $numOfLongVerMetrics = $this->readUint16($vheaBase + 34); |
| 269 | |
| 270 | $vmtxBase = $this->tables['vmtx']['offset']; |
| 271 | $verticalWidths = []; |
| 272 | $lastAdvanceHeight = 0; |
| 273 | for ($gid = 0; $gid < $numOfLongVerMetrics; $gid++) { |
| 274 | $lastAdvanceHeight = $this->readUint16($vmtxBase + $gid * 4); |
| 275 | $verticalWidths[$gid] = $lastAdvanceHeight; |
| 276 | } |
| 277 | // Remaining GIDs use the last advance height |
| 278 | for ($gid = $numOfLongVerMetrics; $gid < $numGlyphs; $gid++) { |
| 279 | $verticalWidths[$gid] = $lastAdvanceHeight; |
| 280 | } |
| 281 | } |
| 282 | |
| 283 | // Parse kerning data (GPOS or legacy kern table) |
| 284 | $kernPairs = null; |
| 285 | if (isset($this->tables['GPOS']) || isset($this->tables['kern'])) { |
| 286 | $kernPairs = (new KerningParser())->parse($this->data, $this->tables) ?: null; |
| 287 | } |
| 288 | |
| 289 | // Parse GSUB ligature data |
| 290 | $ligatures = null; |
| 291 | if (isset($this->tables['GSUB'])) { |
| 292 | $ligatures = (new GsubParser())->parse($this->data, $this->tables) ?: null; |
| 293 | } |
| 294 | |
| 295 | return new OpenTypeData( |
| 296 | postScriptName: $postScriptName, |
| 297 | familyName: $familyName, |
| 298 | ascent: $ascent, |
| 299 | descent: $descent, |
| 300 | capHeight: $capHeight, |
| 301 | xHeight: $xHeight, |
| 302 | italicAngle: $italicAngle, |
| 303 | stemV: $stemV, |
| 304 | flags: $flags, |
| 305 | fontBBox: $fontBBox, |
| 306 | charWidths: $charWidths, |
| 307 | unicodeMap: $unicodeMap, |
| 308 | cffBytes: $cffBytes, |
| 309 | fontBytes: $this->data, |
| 310 | embeddingAllowed: $embeddingAllowed, |
| 311 | unitsPerEm: $unitsPerEm, |
| 312 | fullUnicodeToGid: $unicodeToGid, |
| 313 | glyphWidths: $hmtxWidths, |
| 314 | kernPairs: $kernPairs, |
| 315 | ligatures: $ligatures, |
| 316 | verticalWidths: $verticalWidths, |
| 317 | underlinePosition: $underlinePosition, |
| 318 | underlineThickness: $underlineThickness, |
| 319 | ); |
| 320 | } |
| 321 | |
| 322 | /** |
| 323 | * @return array<int, int> Unicode codepoint => GID |
| 324 | */ |
| 325 | private function parseCmapFormat4(int $offset): array |
| 326 | { |
| 327 | $segCountX2 = $this->readUint16($offset + 6); |
| 328 | $segCount = $segCountX2 / 2; |
| 329 | |
| 330 | $endCodesBase = $offset + 14; |
| 331 | $startCodesBase = $offset + 16 + $segCountX2; |
| 332 | $idDeltaBase = $offset + 16 + 2 * $segCountX2; |
| 333 | $idRangeOffsetBase = $offset + 16 + 3 * $segCountX2; |
| 334 | |
| 335 | $subtableLength = $this->readUint16($offset + 2); |
| 336 | $glyphIdArrayBase = $offset + 16 + 4 * $segCountX2; |
| 337 | $glyphIdArrayLen = ($subtableLength - (16 + 4 * $segCountX2)) / 2; |
| 338 | |
| 339 | $endCodes = []; |
| 340 | $startCodes = []; |
| 341 | $idDelta = []; |
| 342 | $idRangeOffset = []; |
| 343 | $glyphIdArray = []; |
| 344 | |
| 345 | for ($i = 0; $i < $segCount; $i++) { |
| 346 | $endCodes[$i] = $this->readUint16($endCodesBase + $i * 2); |
| 347 | $startCodes[$i] = $this->readUint16($startCodesBase + $i * 2); |
| 348 | $idDelta[$i] = $this->readInt16($idDeltaBase + $i * 2); |
| 349 | $idRangeOffset[$i] = $this->readUint16($idRangeOffsetBase + $i * 2); |
| 350 | } |
| 351 | |
| 352 | for ($j = 0; $j < $glyphIdArrayLen; $j++) { |
| 353 | $glyphIdArray[$j] = $this->readUint16($glyphIdArrayBase + $j * 2); |
| 354 | } |
| 355 | |
| 356 | $map = []; |
| 357 | for ($i = 0; $i < $segCount; $i++) { |
| 358 | if ($startCodes[$i] === 0xFFFF) { |
| 359 | continue; |
| 360 | } |
| 361 | for ($cp = $startCodes[$i]; $cp <= $endCodes[$i]; $cp++) { |
| 362 | if ($idRangeOffset[$i] === 0) { |
| 363 | $gid = ($cp + $idDelta[$i]) & 0xFFFF; |
| 364 | } else { |
| 365 | $idx = $idRangeOffset[$i] / 2 + ($cp - $startCodes[$i]) + $i - $segCount; |
| 366 | if ($idx < 0 || $idx >= count($glyphIdArray)) { |
| 367 | $gid = 0; |
| 368 | } else { |
| 369 | $gid = $glyphIdArray[$idx]; |
| 370 | if ($gid !== 0) { |
| 371 | $gid = ($gid + $idDelta[$i]) & 0xFFFF; |
| 372 | } |
| 373 | } |
| 374 | } |
| 375 | if ($gid !== 0) { |
| 376 | $map[$cp] = $gid; |
| 377 | } |
| 378 | } |
| 379 | } |
| 380 | return $map; |
| 381 | } |
| 382 | |
| 383 | /** |
| 384 | * @return array<int, int> Unicode codepoint => GID |
| 385 | */ |
| 386 | private function parseCmapFormat12(int $offset): array |
| 387 | { |
| 388 | $nGroups = $this->readUint32($offset + 12); |
| 389 | $map = []; |
| 390 | for ($i = 0; $i < $nGroups; $i++) { |
| 391 | $base = $offset + 16 + $i * 12; |
| 392 | $startCharCode = $this->readUint32($base); |
| 393 | $endCharCode = $this->readUint32($base + 4); |
| 394 | $startGlyphID = $this->readUint32($base + 8); |
| 395 | for ($cp = $startCharCode; $cp <= $endCharCode; $cp++) { |
| 396 | $map[$cp] = $startGlyphID + ($cp - $startCharCode); |
| 397 | } |
| 398 | } |
| 399 | return $map; |
| 400 | } |
| 401 | |
| 402 | private function win1252ToUnicode(int $byte): ?int |
| 403 | { |
| 404 | if ($byte >= 32 && $byte <= 127) { |
| 405 | return $byte; |
| 406 | } |
| 407 | if ($byte >= 160 && $byte <= 255) { |
| 408 | return $byte; |
| 409 | } |
| 410 | return self::WIN1252_MAP[$byte] ?? null; |
| 411 | } |
| 412 | |
| 413 | private function readUint16(int $offset): int |
| 414 | { |
| 415 | return (ord($this->data[$offset]) << 8) | ord($this->data[$offset + 1]); |
| 416 | } |
| 417 | |
| 418 | private function readInt16(int $offset): int |
| 419 | { |
| 420 | $v = $this->readUint16($offset); |
| 421 | return $v >= 0x8000 ? $v - 0x10000 : $v; |
| 422 | } |
| 423 | |
| 424 | private function readUint32(int $offset): int |
| 425 | { |
| 426 | return ((ord($this->data[$offset]) << 24) |
| 427 | | (ord($this->data[$offset + 1]) << 16) |
| 428 | | (ord($this->data[$offset + 2]) << 8) |
| 429 | | ord($this->data[$offset + 3])) & 0xFFFFFFFF; |
| 430 | } |
| 431 | |
| 432 | private function readInt32(int $offset): int |
| 433 | { |
| 434 | $v = $this->readUint32($offset); |
| 435 | return $v >= 0x80000000 ? (int) ($v - 0x100000000) : (int) $v; |
| 436 | } |
| 437 | |
| 438 | private function tableOffset(string $tag): int |
| 439 | { |
| 440 | if (!isset($this->tables[$tag])) { |
| 441 | throw new \RuntimeException("Required table '{$tag}' not found in font"); |
| 442 | } |
| 443 | return $this->tables[$tag]['offset']; |
| 444 | } |
| 445 | } |