Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
83.96% |
178 / 212 |
|
14.29% |
1 / 7 |
CRAP | |
0.00% |
0 / 1 |
| CffParser | |
83.96% |
178 / 212 |
|
14.29% |
1 / 7 |
96.59 | |
0.00% |
0 / 1 |
| parse | |
83.54% |
66 / 79 |
|
0.00% |
0 / 1 |
11.54 | |||
| parseIndex | |
94.44% |
17 / 18 |
|
0.00% |
0 / 1 |
4.00 | |||
| parseDictData | |
90.54% |
67 / 74 |
|
0.00% |
0 / 1 |
39.22 | |||
| parseCharset | |
63.33% |
19 / 30 |
|
0.00% |
0 / 1 |
23.66 | |||
| readOffset | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| readUint16 | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
| getTopDictInt | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Phpdftk\FontParser; |
| 6 | |
| 7 | /** |
| 8 | * Parses a CFF (Compact Font Format) binary table into a CffData structure. |
| 9 | * |
| 10 | * Implements parsing per Adobe Technical Note #5176 (CFF spec) and |
| 11 | * #5177 (Type 2 Charstring format). Does NOT interpret charstring |
| 12 | * bytecode — charstrings are stored as opaque byte arrays. |
| 13 | */ |
| 14 | final class CffParser |
| 15 | { |
| 16 | private string $data; |
| 17 | private int $length; |
| 18 | |
| 19 | public function parse(string $cffBytes): CffData |
| 20 | { |
| 21 | $this->data = $cffBytes; |
| 22 | $this->length = strlen($cffBytes); |
| 23 | |
| 24 | // Header: major(1) minor(1) hdrSize(1) offSize(1) |
| 25 | $major = ord($this->data[0]); |
| 26 | $minor = ord($this->data[1]); |
| 27 | $hdrSize = ord($this->data[2]); |
| 28 | |
| 29 | $offset = $hdrSize; |
| 30 | |
| 31 | // Name INDEX |
| 32 | $nameIndexStart = $offset; |
| 33 | $nameIndex = $this->parseIndex($offset); |
| 34 | $offset = $nameIndex['nextOffset']; |
| 35 | $nameIndexData = substr($this->data, $nameIndexStart, $offset - $nameIndexStart); |
| 36 | |
| 37 | // Top DICT INDEX |
| 38 | $topDictIndex = $this->parseIndex($offset); |
| 39 | $offset = $topDictIndex['nextOffset']; |
| 40 | |
| 41 | // Parse Top DICT data (first entry only — single-font CFF) |
| 42 | $topDictOperators = []; |
| 43 | if (!empty($topDictIndex['entries'])) { |
| 44 | $topDictOperators = $this->parseDictData($topDictIndex['entries'][0]); |
| 45 | } |
| 46 | |
| 47 | // String INDEX |
| 48 | $stringIndexStart = $offset; |
| 49 | $stringIndex = $this->parseIndex($offset); |
| 50 | $offset = $stringIndex['nextOffset']; |
| 51 | $stringIndexData = substr($this->data, $stringIndexStart, $offset - $stringIndexStart); |
| 52 | |
| 53 | // Global Subr INDEX |
| 54 | $globalSubrStart = $offset; |
| 55 | $globalSubrIndex = $this->parseIndex($offset); |
| 56 | $offset = $globalSubrIndex['nextOffset']; |
| 57 | $globalSubrIndexData = substr($this->data, $globalSubrStart, $offset - $globalSubrStart); |
| 58 | |
| 59 | // CharStrings INDEX (located via Top DICT operator 17) |
| 60 | $charStringsOffset = $this->getTopDictInt($topDictOperators, 17); |
| 61 | if ($charStringsOffset === null) { |
| 62 | throw new \RuntimeException('CFF Top DICT missing CharStrings operator (17)'); |
| 63 | } |
| 64 | $charStringsIndex = $this->parseIndex($charStringsOffset); |
| 65 | $charStrings = []; |
| 66 | foreach ($charStringsIndex['entries'] as $gid => $entry) { |
| 67 | $charStrings[$gid] = $entry; |
| 68 | } |
| 69 | |
| 70 | // Private DICT (operator 18 = [size, offset]) |
| 71 | $privateDictData = ''; |
| 72 | $localSubrIndexData = ''; |
| 73 | $privateOp = $topDictOperators[18] ?? null; |
| 74 | if (is_array($privateOp) && count($privateOp) === 2) { |
| 75 | $privateSize = (int) $privateOp[0]; |
| 76 | $privateOffset = (int) $privateOp[1]; |
| 77 | $privateDictData = substr($this->data, $privateOffset, $privateSize); |
| 78 | |
| 79 | // Local Subr INDEX (located via Private DICT operator 19) |
| 80 | $privateDictOps = $this->parseDictData($privateDictData); |
| 81 | $localSubrOffset = $this->getTopDictInt($privateDictOps, 19); |
| 82 | if ($localSubrOffset !== null) { |
| 83 | $absLocalSubrOffset = $privateOffset + $localSubrOffset; |
| 84 | $localSubrStart = $absLocalSubrOffset; |
| 85 | $localSubrIndex = $this->parseIndex($absLocalSubrOffset); |
| 86 | $localSubrIndexData = substr($this->data, $localSubrStart, $localSubrIndex['nextOffset'] - $localSubrStart); |
| 87 | } |
| 88 | } |
| 89 | |
| 90 | // Charset (operator 15) |
| 91 | $charsetOffset = $this->getTopDictInt($topDictOperators, 15); |
| 92 | $charset = $this->parseCharset($charsetOffset, count($charStrings)); |
| 93 | |
| 94 | // FDArray (operator 12 36) and FDSelect (operator 12 37) |
| 95 | $fdArrayData = null; |
| 96 | $fdSelectData = null; |
| 97 | $fdArrayKey = '12.36'; |
| 98 | $fdSelectKey = '12.37'; |
| 99 | if (isset($topDictOperators[$fdArrayKey])) { |
| 100 | $fdArrayOffset = (int) $topDictOperators[$fdArrayKey]; |
| 101 | $fdArrayIndex = $this->parseIndex($fdArrayOffset); |
| 102 | $fdArrayData = substr($this->data, $fdArrayOffset, $fdArrayIndex['nextOffset'] - $fdArrayOffset); |
| 103 | } |
| 104 | if (isset($topDictOperators[$fdSelectKey])) { |
| 105 | $fdSelectOffset = (int) $topDictOperators[$fdSelectKey]; |
| 106 | // FDSelect length is hard to determine without format parsing; |
| 107 | // store from offset to end (subsetter will truncate if needed) |
| 108 | $fdSelectFormat = ord($this->data[$fdSelectOffset]); |
| 109 | if ($fdSelectFormat === 0) { |
| 110 | $fdSelectLen = 1 + count($charStrings); |
| 111 | } elseif ($fdSelectFormat === 3) { |
| 112 | $nRanges = $this->readUint16($fdSelectOffset + 1); |
| 113 | $fdSelectLen = 1 + 2 + $nRanges * 3 + 2; // format + nRanges + ranges + sentinel |
| 114 | } else { |
| 115 | $fdSelectLen = $this->length - $fdSelectOffset; |
| 116 | } |
| 117 | $fdSelectData = substr($this->data, $fdSelectOffset, $fdSelectLen); |
| 118 | } |
| 119 | |
| 120 | return new CffData( |
| 121 | major: $major, |
| 122 | minor: $minor, |
| 123 | hdrSize: $hdrSize, |
| 124 | nameIndexData: $nameIndexData, |
| 125 | topDictOperators: $topDictOperators, |
| 126 | stringIndexData: $stringIndexData, |
| 127 | globalSubrIndexData: $globalSubrIndexData, |
| 128 | charStrings: $charStrings, |
| 129 | privateDictData: $privateDictData, |
| 130 | localSubrIndexData: $localSubrIndexData, |
| 131 | charset: $charset, |
| 132 | fdArrayData: $fdArrayData, |
| 133 | fdSelectData: $fdSelectData, |
| 134 | ); |
| 135 | } |
| 136 | |
| 137 | /** |
| 138 | * Parse a CFF INDEX structure. |
| 139 | * |
| 140 | * @return array{entries: string[], nextOffset: int} |
| 141 | */ |
| 142 | private function parseIndex(int $offset): array |
| 143 | { |
| 144 | $count = $this->readUint16($offset); |
| 145 | $offset += 2; |
| 146 | |
| 147 | if ($count === 0) { |
| 148 | return ['entries' => [], 'nextOffset' => $offset]; |
| 149 | } |
| 150 | |
| 151 | $offSize = ord($this->data[$offset]); |
| 152 | $offset += 1; |
| 153 | |
| 154 | // Read count+1 offsets |
| 155 | $offsets = []; |
| 156 | for ($i = 0; $i <= $count; $i++) { |
| 157 | $offsets[] = $this->readOffset($offset, $offSize); |
| 158 | $offset += $offSize; |
| 159 | } |
| 160 | |
| 161 | $dataBase = $offset - 1; // offsets are 1-based |
| 162 | |
| 163 | $entries = []; |
| 164 | for ($i = 0; $i < $count; $i++) { |
| 165 | $start = $dataBase + $offsets[$i]; |
| 166 | $end = $dataBase + $offsets[$i + 1]; |
| 167 | $entries[] = substr($this->data, $start, $end - $start); |
| 168 | } |
| 169 | |
| 170 | $nextOffset = $dataBase + $offsets[$count]; |
| 171 | return ['entries' => $entries, 'nextOffset' => $nextOffset]; |
| 172 | } |
| 173 | |
| 174 | /** |
| 175 | * Parse DICT binary data into operator => operand(s) map. |
| 176 | * |
| 177 | * @return array<int|string, int|float|array<int, int|float>> |
| 178 | */ |
| 179 | private function parseDictData(string $data): array |
| 180 | { |
| 181 | $operators = []; |
| 182 | $operandStack = []; |
| 183 | $pos = 0; |
| 184 | $len = strlen($data); |
| 185 | |
| 186 | while ($pos < $len) { |
| 187 | $b0 = ord($data[$pos]); |
| 188 | |
| 189 | if ($b0 >= 32 && $b0 <= 246) { |
| 190 | // Single-byte integer: value = b0 - 139 |
| 191 | $operandStack[] = $b0 - 139; |
| 192 | $pos++; |
| 193 | } elseif ($b0 >= 247 && $b0 <= 250) { |
| 194 | // Two-byte positive: (b0-247)*256 + b1 + 108 |
| 195 | $b1 = ord($data[$pos + 1]); |
| 196 | $operandStack[] = ($b0 - 247) * 256 + $b1 + 108; |
| 197 | $pos += 2; |
| 198 | } elseif ($b0 >= 251 && $b0 <= 254) { |
| 199 | // Two-byte negative: -(b0-251)*256 - b1 - 108 |
| 200 | $b1 = ord($data[$pos + 1]); |
| 201 | $operandStack[] = -($b0 - 251) * 256 - $b1 - 108; |
| 202 | $pos += 2; |
| 203 | } elseif ($b0 === 28) { |
| 204 | // 3-byte integer |
| 205 | $val = (ord($data[$pos + 1]) << 8) | ord($data[$pos + 2]); |
| 206 | if ($val >= 0x8000) { |
| 207 | $val -= 0x10000; |
| 208 | } |
| 209 | $operandStack[] = $val; |
| 210 | $pos += 3; |
| 211 | } elseif ($b0 === 29) { |
| 212 | // 5-byte integer |
| 213 | $val = (ord($data[$pos + 1]) << 24) | (ord($data[$pos + 2]) << 16) |
| 214 | | (ord($data[$pos + 3]) << 8) | ord($data[$pos + 4]); |
| 215 | if ($val >= 0x80000000) { |
| 216 | $val = (int) ($val - 0x100000000); |
| 217 | } |
| 218 | $operandStack[] = $val; |
| 219 | $pos += 5; |
| 220 | } elseif ($b0 === 30) { |
| 221 | // Real number |
| 222 | $realStr = ''; |
| 223 | $pos++; |
| 224 | $done = false; |
| 225 | while (!$done && $pos < $len) { |
| 226 | $byte = ord($data[$pos]); |
| 227 | $pos++; |
| 228 | for ($nib = 0; $nib < 2; $nib++) { |
| 229 | $nibble = ($nib === 0) ? ($byte >> 4) : ($byte & 0x0F); |
| 230 | switch ($nibble) { |
| 231 | case 0: case 1: case 2: case 3: case 4: |
| 232 | case 5: case 6: case 7: case 8: case 9: |
| 233 | $realStr .= (string) $nibble; |
| 234 | break; |
| 235 | case 0x0A: $realStr .= '.'; |
| 236 | break; |
| 237 | case 0x0B: $realStr .= 'E'; |
| 238 | break; |
| 239 | case 0x0C: $realStr .= 'E-'; |
| 240 | break; |
| 241 | case 0x0D: break; // reserved |
| 242 | case 0x0E: $realStr .= '-'; |
| 243 | break; |
| 244 | case 0x0F: $done = true; |
| 245 | break; |
| 246 | } |
| 247 | if ($done) { |
| 248 | break; |
| 249 | } |
| 250 | } |
| 251 | } |
| 252 | $operandStack[] = (float) $realStr; |
| 253 | } elseif ($b0 === 12) { |
| 254 | // Two-byte operator |
| 255 | $pos++; |
| 256 | $b1 = ord($data[$pos]); |
| 257 | $pos++; |
| 258 | $key = '12.' . $b1; |
| 259 | if (count($operandStack) === 1) { |
| 260 | $operators[$key] = $operandStack[0]; |
| 261 | } else { |
| 262 | $operators[$key] = $operandStack; |
| 263 | } |
| 264 | $operandStack = []; |
| 265 | } elseif ($b0 <= 21) { |
| 266 | // One-byte operator |
| 267 | $pos++; |
| 268 | if (count($operandStack) === 1) { |
| 269 | $operators[$b0] = $operandStack[0]; |
| 270 | } else { |
| 271 | $operators[$b0] = $operandStack; |
| 272 | } |
| 273 | $operandStack = []; |
| 274 | } else { |
| 275 | // Unknown byte — skip |
| 276 | $pos++; |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | return $operators; |
| 281 | } |
| 282 | |
| 283 | /** |
| 284 | * Parse Charset structure. |
| 285 | * |
| 286 | * @return array<int, int> GID => SID/CID |
| 287 | */ |
| 288 | private function parseCharset(?int $offset, int $nGlyphs): array |
| 289 | { |
| 290 | $charset = [0 => 0]; // GID 0 is always .notdef (SID 0) |
| 291 | |
| 292 | if ($offset === null || $offset <= 2) { |
| 293 | // Predefined charsets: 0=ISOAdobe, 1=Expert, 2=ExpertSubset |
| 294 | // For subsetting purposes, generate sequential SIDs |
| 295 | for ($gid = 1; $gid < $nGlyphs; $gid++) { |
| 296 | $charset[$gid] = $gid; |
| 297 | } |
| 298 | return $charset; |
| 299 | } |
| 300 | |
| 301 | $format = ord($this->data[$offset]); |
| 302 | $pos = $offset + 1; |
| 303 | |
| 304 | if ($format === 0) { |
| 305 | // Format 0: array of SIDs |
| 306 | for ($gid = 1; $gid < $nGlyphs; $gid++) { |
| 307 | $charset[$gid] = $this->readUint16($pos); |
| 308 | $pos += 2; |
| 309 | } |
| 310 | } elseif ($format === 1) { |
| 311 | // Format 1: ranges with 1-byte count |
| 312 | $gid = 1; |
| 313 | while ($gid < $nGlyphs) { |
| 314 | $first = $this->readUint16($pos); |
| 315 | $nLeft = ord($this->data[$pos + 2]); |
| 316 | $pos += 3; |
| 317 | for ($i = 0; $i <= $nLeft && $gid < $nGlyphs; $i++) { |
| 318 | $charset[$gid] = $first + $i; |
| 319 | $gid++; |
| 320 | } |
| 321 | } |
| 322 | } elseif ($format === 2) { |
| 323 | // Format 2: ranges with 2-byte count |
| 324 | $gid = 1; |
| 325 | while ($gid < $nGlyphs) { |
| 326 | $first = $this->readUint16($pos); |
| 327 | $nLeft = $this->readUint16($pos + 2); |
| 328 | $pos += 4; |
| 329 | for ($i = 0; $i <= $nLeft && $gid < $nGlyphs; $i++) { |
| 330 | $charset[$gid] = $first + $i; |
| 331 | $gid++; |
| 332 | } |
| 333 | } |
| 334 | } |
| 335 | |
| 336 | return $charset; |
| 337 | } |
| 338 | |
| 339 | private function readOffset(int $offset, int $offSize): int |
| 340 | { |
| 341 | $val = 0; |
| 342 | for ($i = 0; $i < $offSize; $i++) { |
| 343 | $val = ($val << 8) | ord($this->data[$offset + $i]); |
| 344 | } |
| 345 | return $val; |
| 346 | } |
| 347 | |
| 348 | private function readUint16(int $offset): int |
| 349 | { |
| 350 | if ($offset + 1 >= $this->length) { |
| 351 | return 0; |
| 352 | } |
| 353 | return (ord($this->data[$offset]) << 8) | ord($this->data[$offset + 1]); |
| 354 | } |
| 355 | |
| 356 | /** |
| 357 | * @param array<int|string, int|float|array<int, int|float>> $operators |
| 358 | */ |
| 359 | private function getTopDictInt(array $operators, int $key): ?int |
| 360 | { |
| 361 | if (!isset($operators[$key])) { |
| 362 | return null; |
| 363 | } |
| 364 | $val = $operators[$key]; |
| 365 | return is_array($val) ? (int) $val[0] : (int) $val; |
| 366 | } |
| 367 | } |