Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
94.44% covered (success)
94.44%
34 / 36
0.00% covered (danger)
0.00%
0 / 1
CRAP
0.00% covered (danger)
0.00%
0 / 1
CMapParser
94.44% covered (success)
94.44%
34 / 36
0.00% covered (danger)
0.00%
0 / 1
15.04
0.00% covered (danger)
0.00%
0 / 1
 parse
94.44% covered (success)
94.44%
34 / 36
0.00% covered (danger)
0.00%
0 / 1
15.04
1<?php
2
3declare(strict_types=1);
4
5namespace Phpdftk\Encoding;
6
7final class CMapParser
8{
9    /**
10     * Parse a PDF CMap stream and return character code to Unicode codepoint mapping.
11     *
12     * @return array<int, int> character code => Unicode codepoint
13     */
14    public function parse(string $cmapStream): array
15    {
16        $result = [];
17
18        // Parse beginbfchar/endbfchar sections
19        if (preg_match_all('/beginbfchar\s+(.*?)\s+endbfchar/s', $cmapStream, $matches)) {
20            foreach ($matches[1] as $section) {
21                $lines = preg_split('/\r?\n/', trim($section));
22                foreach ($lines as $line) {
23                    $line = trim($line);
24                    if ($line === '') {
25                        continue;
26                    }
27                    // Format: <srcCode> <dstCode>
28                    if (preg_match('/^<([0-9A-Fa-f]+)>\s+<([0-9A-Fa-f]+)>/', $line, $m)) {
29                        $srcCode  = hexdec($m[1]);
30                        $dstCode  = hexdec($m[2]);
31                        $result[(int) $srcCode] = (int) $dstCode;
32                    }
33                }
34            }
35        }
36
37        // Parse beginbfrange/endbfrange sections
38        if (preg_match_all('/beginbfrange\s+(.*?)\s+endbfrange/s', $cmapStream, $matches)) {
39            foreach ($matches[1] as $section) {
40                $lines = preg_split('/\r?\n/', trim($section));
41                foreach ($lines as $line) {
42                    $line = trim($line);
43                    if ($line === '') {
44                        continue;
45                    }
46                    // Format: <startCode> <endCode> <startDst>
47                    if (preg_match('/^<([0-9A-Fa-f]+)>\s+<([0-9A-Fa-f]+)>\s+<([0-9A-Fa-f]+)>/', $line, $m)) {
48                        $startCode = (int) hexdec($m[1]);
49                        $endCode   = (int) hexdec($m[2]);
50                        $startDst  = (int) hexdec($m[3]);
51                        for ($code = $startCode; $code <= $endCode; $code++) {
52                            $result[$code] = $startDst + ($code - $startCode);
53                        }
54                    }
55                    // Format: <startCode> <endCode> [<dst1> <dst2> ...]
56                    elseif (preg_match('/^<([0-9A-Fa-f]+)>\s+<([0-9A-Fa-f]+)>\s+\[(.+)\]/', $line, $m)) {
57                        $startCode = (int) hexdec($m[1]);
58                        $endCode   = (int) hexdec($m[2]);
59                        preg_match_all('/<([0-9A-Fa-f]+)>/', $m[3], $dstMatches);
60                        $dsts = $dstMatches[1];
61                        $idx = 0;
62                        for ($code = $startCode; $code <= $endCode; $code++) {
63                            if (isset($dsts[$idx])) {
64                                $result[$code] = (int) hexdec($dsts[$idx]);
65                            }
66                            $idx++;
67                        }
68                    }
69                }
70            }
71        }
72
73        return $result;
74    }
75}