Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
94.95% |
94 / 99 |
|
57.14% |
4 / 7 |
CRAP | |
33.33% |
1 / 3 |
| LzwFilter | |
94.59% |
70 / 74 |
|
33.33% |
1 / 3 |
28.12 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| decode | |
93.02% |
40 / 43 |
|
0.00% |
0 / 1 |
16.09 | |||
| encode | |
96.67% |
29 / 30 |
|
0.00% |
0 / 1 |
11 | |||
| LzwBitReader | |
92.31% |
12 / 13 |
|
50.00% |
1 / 2 |
5.01 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| read | |
91.67% |
11 / 12 |
|
0.00% |
0 / 1 |
4.01 | |||
| LzwBitWriter | |
100.00% |
12 / 12 |
|
100.00% |
2 / 2 |
5 | |
100.00% |
1 / 1 |
| write | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
| finish | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Phpdftk\Filters; |
| 6 | |
| 7 | /** |
| 8 | * LZW filter — decode/encode per ISO 32000-2 §7.4.4.2. |
| 9 | * |
| 10 | * PDF LZW uses MSB-first bit packing, variable code widths from |
| 11 | * 9 to 12 bits, clear code = 256, EOD code = 257. |
| 12 | * |
| 13 | * The "early change" convention is used: code width increases |
| 14 | * immediately after the entry that causes nextCode to reach 2^codeSize, |
| 15 | * so the NEXT code read/written uses the wider width. |
| 16 | */ |
| 17 | final class LzwFilter implements FilterInterface |
| 18 | { |
| 19 | private const CLEAR_CODE = 256; |
| 20 | private const EOD_CODE = 257; |
| 21 | private const FIRST_CODE = 258; |
| 22 | |
| 23 | /** |
| 24 | * @param int $earlyChange When 1 (default), code-size transition uses |
| 25 | * "early change" convention per PDF spec. When 0, |
| 26 | * the transition happens one code later. |
| 27 | */ |
| 28 | public function __construct( |
| 29 | private readonly int $earlyChange = 1, |
| 30 | ) {} |
| 31 | |
| 32 | public function decode(string $data): string |
| 33 | { |
| 34 | $reader = new LzwBitReader($data); |
| 35 | $codeSize = 9; |
| 36 | $result = ''; |
| 37 | |
| 38 | // Initialize table |
| 39 | $table = []; |
| 40 | for ($i = 0; $i < 256; $i++) { |
| 41 | $table[$i] = chr($i); |
| 42 | } |
| 43 | $nextCode = self::FIRST_CODE; |
| 44 | $prevEntry = null; |
| 45 | |
| 46 | while (true) { |
| 47 | $code = $reader->read($codeSize); |
| 48 | if ($code === null) { |
| 49 | break; |
| 50 | } |
| 51 | |
| 52 | if ($code === self::EOD_CODE) { |
| 53 | break; |
| 54 | } |
| 55 | |
| 56 | if ($code === self::CLEAR_CODE) { |
| 57 | $table = []; |
| 58 | for ($i = 0; $i < 256; $i++) { |
| 59 | $table[$i] = chr($i); |
| 60 | } |
| 61 | $nextCode = self::FIRST_CODE; |
| 62 | $codeSize = 9; |
| 63 | $prevEntry = null; |
| 64 | continue; |
| 65 | } |
| 66 | |
| 67 | if ($prevEntry === null) { |
| 68 | // First code after clear — no table entry added |
| 69 | if (!isset($table[$code])) { |
| 70 | break; |
| 71 | } |
| 72 | $entry = $table[$code]; |
| 73 | $result .= $entry; |
| 74 | $prevEntry = $entry; |
| 75 | |
| 76 | // Even though we don't add an entry, advance nextCode |
| 77 | // to stay synchronized with the encoder (which added an |
| 78 | // entry for the pair that PRODUCED this code). |
| 79 | // Actually, the encoder hasn't added anything yet for the |
| 80 | // first code — it just set $w. So no advancement needed. |
| 81 | continue; |
| 82 | } |
| 83 | |
| 84 | if (isset($table[$code])) { |
| 85 | $entry = $table[$code]; |
| 86 | } else { |
| 87 | // KwKwK case |
| 88 | $entry = $prevEntry . $prevEntry[0]; |
| 89 | } |
| 90 | |
| 91 | $result .= $entry; |
| 92 | |
| 93 | // Add new entry |
| 94 | if ($nextCode < 4096) { |
| 95 | $table[$nextCode] = $prevEntry . $entry[0]; |
| 96 | $nextCode++; |
| 97 | } |
| 98 | |
| 99 | // Code-size transition. When earlyChange=1 (PDF default), the |
| 100 | // transition is anticipated one step early. When earlyChange=0, |
| 101 | // the transition happens when nextCode exceeds 2^codeSize. |
| 102 | if ($this->earlyChange === 1) { |
| 103 | if (($nextCode + 1) >= (1 << $codeSize) && $codeSize < 12) { |
| 104 | $codeSize++; |
| 105 | } |
| 106 | } else { |
| 107 | if ($nextCode > (1 << $codeSize) && $codeSize < 12) { |
| 108 | $codeSize++; |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | $prevEntry = $entry; |
| 113 | } |
| 114 | |
| 115 | return $result; |
| 116 | } |
| 117 | |
| 118 | public function encode(string $data): string |
| 119 | { |
| 120 | $writer = new LzwBitWriter(); |
| 121 | $codeSize = 9; |
| 122 | |
| 123 | // Initialize table |
| 124 | $table = []; |
| 125 | for ($i = 0; $i < 256; $i++) { |
| 126 | $table[chr($i)] = $i; |
| 127 | } |
| 128 | $nextCode = self::FIRST_CODE; |
| 129 | $len = strlen($data); |
| 130 | |
| 131 | // Emit clear code |
| 132 | $writer->write(self::CLEAR_CODE, $codeSize); |
| 133 | |
| 134 | if ($len === 0) { |
| 135 | $writer->write(self::EOD_CODE, $codeSize); |
| 136 | return $writer->finish(); |
| 137 | } |
| 138 | |
| 139 | $w = $data[0]; |
| 140 | |
| 141 | for ($i = 1; $i < $len; $i++) { |
| 142 | $c = $data[$i]; |
| 143 | $wc = $w . $c; |
| 144 | |
| 145 | if (isset($table[$wc])) { |
| 146 | $w = $wc; |
| 147 | } else { |
| 148 | // Emit code for $w |
| 149 | $writer->write($table[$w], $codeSize); |
| 150 | |
| 151 | // Add $wc to table |
| 152 | if ($nextCode < 4096) { |
| 153 | $table[$wc] = $nextCode; |
| 154 | $nextCode++; |
| 155 | |
| 156 | // Code-size transition — must match decoder timing |
| 157 | if ($this->earlyChange === 1) { |
| 158 | if ($nextCode >= (1 << $codeSize) && $codeSize < 12) { |
| 159 | $codeSize++; |
| 160 | } |
| 161 | } else { |
| 162 | if ($nextCode > (1 << $codeSize) && $codeSize < 12) { |
| 163 | $codeSize++; |
| 164 | } |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | $w = $c; |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | // Emit code for remaining $w |
| 173 | $writer->write($table[$w], $codeSize); |
| 174 | |
| 175 | // Emit EOD |
| 176 | $writer->write(self::EOD_CODE, $codeSize); |
| 177 | |
| 178 | return $writer->finish(); |
| 179 | } |
| 180 | } |
| 181 | |
| 182 | /** |
| 183 | * @internal MSB-first bit reader for LZW decode. |
| 184 | */ |
| 185 | final class LzwBitReader |
| 186 | { |
| 187 | private int $bytePos = 0; |
| 188 | private int $bitPos = 0; |
| 189 | private readonly int $len; |
| 190 | |
| 191 | public function __construct(private readonly string $data) |
| 192 | { |
| 193 | $this->len = strlen($data); |
| 194 | } |
| 195 | |
| 196 | public function read(int $bits): ?int |
| 197 | { |
| 198 | $result = 0; |
| 199 | for ($i = 0; $i < $bits; $i++) { |
| 200 | if ($this->bytePos >= $this->len) { |
| 201 | return null; |
| 202 | } |
| 203 | $byte = ord($this->data[$this->bytePos]); |
| 204 | $bit = ($byte >> (7 - $this->bitPos)) & 1; |
| 205 | $result = ($result << 1) | $bit; |
| 206 | |
| 207 | $this->bitPos++; |
| 208 | if ($this->bitPos >= 8) { |
| 209 | $this->bitPos = 0; |
| 210 | $this->bytePos++; |
| 211 | } |
| 212 | } |
| 213 | return $result; |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | /** |
| 218 | * @internal MSB-first bit writer for LZW encode. |
| 219 | */ |
| 220 | final class LzwBitWriter |
| 221 | { |
| 222 | private string $buffer = ''; |
| 223 | private int $currentByte = 0; |
| 224 | private int $bitPos = 0; |
| 225 | |
| 226 | public function write(int $code, int $bits): void |
| 227 | { |
| 228 | for ($i = $bits - 1; $i >= 0; $i--) { |
| 229 | $bit = ($code >> $i) & 1; |
| 230 | $this->currentByte = ($this->currentByte << 1) | $bit; |
| 231 | $this->bitPos++; |
| 232 | |
| 233 | if ($this->bitPos >= 8) { |
| 234 | $this->buffer .= chr($this->currentByte); |
| 235 | $this->currentByte = 0; |
| 236 | $this->bitPos = 0; |
| 237 | } |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | public function finish(): string |
| 242 | { |
| 243 | if ($this->bitPos > 0) { |
| 244 | // Pad remaining bits with zeros |
| 245 | $this->currentByte <<= (8 - $this->bitPos); |
| 246 | $this->buffer .= chr($this->currentByte); |
| 247 | } |
| 248 | return $this->buffer; |
| 249 | } |
| 250 | } |