Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
94.95% covered (success)
94.95%
94 / 99
57.14% covered (warning)
57.14%
4 / 7
CRAP
33.33% covered (danger)
33.33%
1 / 3
LzwFilter
94.59% covered (success)
94.59%
70 / 74
33.33% covered (danger)
33.33%
1 / 3
28.12
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 decode
93.02% covered (success)
93.02%
40 / 43
0.00% covered (danger)
0.00%
0 / 1
16.09
 encode
96.67% covered (success)
96.67%
29 / 30
0.00% covered (danger)
0.00%
0 / 1
11
LzwBitReader
92.31% covered (success)
92.31%
12 / 13
50.00% covered (danger)
50.00%
1 / 2
5.01
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 read
91.67% covered (success)
91.67%
11 / 12
0.00% covered (danger)
0.00%
0 / 1
4.01
LzwBitWriter
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
2 / 2
5
100.00% covered (success)
100.00%
1 / 1
 write
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
3
 finish
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2
3declare(strict_types=1);
4
5namespace Phpdftk\Filters;
6
7/**
8 * LZW filter — decode/encode per ISO 32000-2 §7.4.4.2.
9 *
10 * PDF LZW uses MSB-first bit packing, variable code widths from
11 * 9 to 12 bits, clear code = 256, EOD code = 257.
12 *
13 * The "early change" convention is used: code width increases
14 * immediately after the entry that causes nextCode to reach 2^codeSize,
15 * so the NEXT code read/written uses the wider width.
16 */
17final class LzwFilter implements FilterInterface
18{
19    private const CLEAR_CODE = 256;
20    private const EOD_CODE = 257;
21    private const FIRST_CODE = 258;
22
23    /**
24     * @param int $earlyChange When 1 (default), code-size transition uses
25     *                         "early change" convention per PDF spec. When 0,
26     *                         the transition happens one code later.
27     */
28    public function __construct(
29        private readonly int $earlyChange = 1,
30    ) {}
31
32    public function decode(string $data): string
33    {
34        $reader = new LzwBitReader($data);
35        $codeSize = 9;
36        $result = '';
37
38        // Initialize table
39        $table = [];
40        for ($i = 0; $i < 256; $i++) {
41            $table[$i] = chr($i);
42        }
43        $nextCode = self::FIRST_CODE;
44        $prevEntry = null;
45
46        while (true) {
47            $code = $reader->read($codeSize);
48            if ($code === null) {
49                break;
50            }
51
52            if ($code === self::EOD_CODE) {
53                break;
54            }
55
56            if ($code === self::CLEAR_CODE) {
57                $table = [];
58                for ($i = 0; $i < 256; $i++) {
59                    $table[$i] = chr($i);
60                }
61                $nextCode = self::FIRST_CODE;
62                $codeSize = 9;
63                $prevEntry = null;
64                continue;
65            }
66
67            if ($prevEntry === null) {
68                // First code after clear — no table entry added
69                if (!isset($table[$code])) {
70                    break;
71                }
72                $entry = $table[$code];
73                $result .= $entry;
74                $prevEntry = $entry;
75
76                // Even though we don't add an entry, advance nextCode
77                // to stay synchronized with the encoder (which added an
78                // entry for the pair that PRODUCED this code).
79                // Actually, the encoder hasn't added anything yet for the
80                // first code — it just set $w. So no advancement needed.
81                continue;
82            }
83
84            if (isset($table[$code])) {
85                $entry = $table[$code];
86            } else {
87                // KwKwK case
88                $entry = $prevEntry . $prevEntry[0];
89            }
90
91            $result .= $entry;
92
93            // Add new entry
94            if ($nextCode < 4096) {
95                $table[$nextCode] = $prevEntry . $entry[0];
96                $nextCode++;
97            }
98
99            // Code-size transition. When earlyChange=1 (PDF default), the
100            // transition is anticipated one step early. When earlyChange=0,
101            // the transition happens when nextCode exceeds 2^codeSize.
102            if ($this->earlyChange === 1) {
103                if (($nextCode + 1) >= (1 << $codeSize) && $codeSize < 12) {
104                    $codeSize++;
105                }
106            } else {
107                if ($nextCode > (1 << $codeSize) && $codeSize < 12) {
108                    $codeSize++;
109                }
110            }
111
112            $prevEntry = $entry;
113        }
114
115        return $result;
116    }
117
118    public function encode(string $data): string
119    {
120        $writer = new LzwBitWriter();
121        $codeSize = 9;
122
123        // Initialize table
124        $table = [];
125        for ($i = 0; $i < 256; $i++) {
126            $table[chr($i)] = $i;
127        }
128        $nextCode = self::FIRST_CODE;
129        $len = strlen($data);
130
131        // Emit clear code
132        $writer->write(self::CLEAR_CODE, $codeSize);
133
134        if ($len === 0) {
135            $writer->write(self::EOD_CODE, $codeSize);
136            return $writer->finish();
137        }
138
139        $w = $data[0];
140
141        for ($i = 1; $i < $len; $i++) {
142            $c = $data[$i];
143            $wc = $w . $c;
144
145            if (isset($table[$wc])) {
146                $w = $wc;
147            } else {
148                // Emit code for $w
149                $writer->write($table[$w], $codeSize);
150
151                // Add $wc to table
152                if ($nextCode < 4096) {
153                    $table[$wc] = $nextCode;
154                    $nextCode++;
155
156                    // Code-size transition — must match decoder timing
157                    if ($this->earlyChange === 1) {
158                        if ($nextCode >= (1 << $codeSize) && $codeSize < 12) {
159                            $codeSize++;
160                        }
161                    } else {
162                        if ($nextCode > (1 << $codeSize) && $codeSize < 12) {
163                            $codeSize++;
164                        }
165                    }
166                }
167
168                $w = $c;
169            }
170        }
171
172        // Emit code for remaining $w
173        $writer->write($table[$w], $codeSize);
174
175        // Emit EOD
176        $writer->write(self::EOD_CODE, $codeSize);
177
178        return $writer->finish();
179    }
180}
181
182/**
183 * @internal MSB-first bit reader for LZW decode.
184 */
185final class LzwBitReader
186{
187    private int $bytePos = 0;
188    private int $bitPos = 0;
189    private readonly int $len;
190
191    public function __construct(private readonly string $data)
192    {
193        $this->len = strlen($data);
194    }
195
196    public function read(int $bits): ?int
197    {
198        $result = 0;
199        for ($i = 0; $i < $bits; $i++) {
200            if ($this->bytePos >= $this->len) {
201                return null;
202            }
203            $byte = ord($this->data[$this->bytePos]);
204            $bit = ($byte >> (7 - $this->bitPos)) & 1;
205            $result = ($result << 1) | $bit;
206
207            $this->bitPos++;
208            if ($this->bitPos >= 8) {
209                $this->bitPos = 0;
210                $this->bytePos++;
211            }
212        }
213        return $result;
214    }
215}
216
217/**
218 * @internal MSB-first bit writer for LZW encode.
219 */
220final class LzwBitWriter
221{
222    private string $buffer = '';
223    private int $currentByte = 0;
224    private int $bitPos = 0;
225
226    public function write(int $code, int $bits): void
227    {
228        for ($i = $bits - 1; $i >= 0; $i--) {
229            $bit = ($code >> $i) & 1;
230            $this->currentByte = ($this->currentByte << 1) | $bit;
231            $this->bitPos++;
232
233            if ($this->bitPos >= 8) {
234                $this->buffer .= chr($this->currentByte);
235                $this->currentByte = 0;
236                $this->bitPos = 0;
237            }
238        }
239    }
240
241    public function finish(): string
242    {
243        if ($this->bitPos > 0) {
244            // Pad remaining bits with zeros
245            $this->currentByte <<= (8 - $this->bitPos);
246            $this->buffer .= chr($this->currentByte);
247        }
248        return $this->buffer;
249    }
250}