Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
94.44% |
34 / 36 |
|
0.00% |
0 / 1 |
CRAP | |
0.00% |
0 / 1 |
| CMapParser | |
94.44% |
34 / 36 |
|
0.00% |
0 / 1 |
15.04 | |
0.00% |
0 / 1 |
| parse | |
94.44% |
34 / 36 |
|
0.00% |
0 / 1 |
15.04 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Phpdftk\Encoding; |
| 6 | |
| 7 | final class CMapParser |
| 8 | { |
| 9 | /** |
| 10 | * Parse a PDF CMap stream and return character code to Unicode codepoint mapping. |
| 11 | * |
| 12 | * @return array<int, int> character code => Unicode codepoint |
| 13 | */ |
| 14 | public function parse(string $cmapStream): array |
| 15 | { |
| 16 | $result = []; |
| 17 | |
| 18 | // Parse beginbfchar/endbfchar sections |
| 19 | if (preg_match_all('/beginbfchar\s+(.*?)\s+endbfchar/s', $cmapStream, $matches)) { |
| 20 | foreach ($matches[1] as $section) { |
| 21 | $lines = preg_split('/\r?\n/', trim($section)); |
| 22 | foreach ($lines as $line) { |
| 23 | $line = trim($line); |
| 24 | if ($line === '') { |
| 25 | continue; |
| 26 | } |
| 27 | // Format: <srcCode> <dstCode> |
| 28 | if (preg_match('/^<([0-9A-Fa-f]+)>\s+<([0-9A-Fa-f]+)>/', $line, $m)) { |
| 29 | $srcCode = hexdec($m[1]); |
| 30 | $dstCode = hexdec($m[2]); |
| 31 | $result[(int) $srcCode] = (int) $dstCode; |
| 32 | } |
| 33 | } |
| 34 | } |
| 35 | } |
| 36 | |
| 37 | // Parse beginbfrange/endbfrange sections |
| 38 | if (preg_match_all('/beginbfrange\s+(.*?)\s+endbfrange/s', $cmapStream, $matches)) { |
| 39 | foreach ($matches[1] as $section) { |
| 40 | $lines = preg_split('/\r?\n/', trim($section)); |
| 41 | foreach ($lines as $line) { |
| 42 | $line = trim($line); |
| 43 | if ($line === '') { |
| 44 | continue; |
| 45 | } |
| 46 | // Format: <startCode> <endCode> <startDst> |
| 47 | if (preg_match('/^<([0-9A-Fa-f]+)>\s+<([0-9A-Fa-f]+)>\s+<([0-9A-Fa-f]+)>/', $line, $m)) { |
| 48 | $startCode = (int) hexdec($m[1]); |
| 49 | $endCode = (int) hexdec($m[2]); |
| 50 | $startDst = (int) hexdec($m[3]); |
| 51 | for ($code = $startCode; $code <= $endCode; $code++) { |
| 52 | $result[$code] = $startDst + ($code - $startCode); |
| 53 | } |
| 54 | } |
| 55 | // Format: <startCode> <endCode> [<dst1> <dst2> ...] |
| 56 | elseif (preg_match('/^<([0-9A-Fa-f]+)>\s+<([0-9A-Fa-f]+)>\s+\[(.+)\]/', $line, $m)) { |
| 57 | $startCode = (int) hexdec($m[1]); |
| 58 | $endCode = (int) hexdec($m[2]); |
| 59 | preg_match_all('/<([0-9A-Fa-f]+)>/', $m[3], $dstMatches); |
| 60 | $dsts = $dstMatches[1]; |
| 61 | $idx = 0; |
| 62 | for ($code = $startCode; $code <= $endCode; $code++) { |
| 63 | if (isset($dsts[$idx])) { |
| 64 | $result[$code] = (int) hexdec($dsts[$idx]); |
| 65 | } |
| 66 | $idx++; |
| 67 | } |
| 68 | } |
| 69 | } |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | return $result; |
| 74 | } |
| 75 | } |