Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
81.63% covered (warning)
81.63%
80 / 98
54.55% covered (warning)
54.55%
6 / 11
CRAP
0.00% covered (danger)
0.00%
0 / 1
StreamParser
81.63% covered (warning)
81.63%
80 / 98
54.55% covered (warning)
54.55%
6 / 11
77.13
0.00% covered (danger)
0.00%
0 / 1
 setResolver
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 decode
100.00% covered (success)
100.00%
21 / 21
100.00% covered (success)
100.00%
1 / 1
12
 decodeFlate
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 decodeLzw
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 decodeCCITTFax
81.82% covered (warning)
81.82%
9 / 11
0.00% covered (danger)
0.00%
0 / 1
9.49
 decodeJbig2
36.36% covered (danger)
36.36%
4 / 11
0.00% covered (danger)
0.00%
0 / 1
15.28
 boolParam
37.50% covered (danger)
37.50%
3 / 8
0.00% covered (danger)
0.00%
0 / 1
7.91
 applyPredictor
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
3
 intParam
83.33% covered (warning)
83.33%
5 / 6
0.00% covered (danger)
0.00%
0 / 1
3.04
 resolveFilterNames
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
6
 resolveDecodeParms
78.57% covered (warning)
78.57%
11 / 14
0.00% covered (danger)
0.00%
0 / 1
10.98
1<?php
2
3declare(strict_types=1);
4
5namespace Phpdftk\Pdf\Reader\Parser;
6
7use Phpdftk\Filters\Ascii85Filter;
8use Phpdftk\Filters\AsciiHexFilter;
9use Phpdftk\Filters\CCITTFaxFilter;
10use Phpdftk\Filters\FlateFilter;
11use Phpdftk\Filters\Jbig2Filter;
12use Phpdftk\Filters\LzwFilter;
13use Phpdftk\Filters\PredictorFilter;
14use Phpdftk\Filters\RunLengthFilter;
15use Phpdftk\Pdf\Core\PdfArray;
16use Phpdftk\Pdf\Core\PdfDictionary;
17use Phpdftk\Pdf\Core\PdfName;
18use Phpdftk\Pdf\Core\PdfNumber;
19use Phpdftk\Pdf\Core\PdfReference;
20use Phpdftk\Pdf\Reader\Exception\UnsupportedFilterException;
21use Phpdftk\Pdf\Reader\ObjectResolver;
22
23/**
24 * Applies the filter pipeline from a stream dictionary's `/Filter`
25 * entry to decompress raw stream data.
26 */
27final class StreamParser
28{
29    private ?ObjectResolver $resolver = null;
30
31    /**
32     * Set the object resolver for resolving indirect /DecodeParms references.
33     */
34    public function setResolver(ObjectResolver $resolver): void
35    {
36        $this->resolver = $resolver;
37    }
38
39    /**
40     * Decode stream data using the filter(s) declared in $dict.
41     */
42    public function decode(string $data, PdfDictionary $dict): string
43    {
44        $filter = $dict->get('Filter');
45        if ($filter === null) {
46            return $data;
47        }
48
49        $filterNames = $this->resolveFilterNames($filter);
50        $decodeParms = $this->resolveDecodeParms($dict->get('DecodeParms'), count($filterNames));
51
52        foreach ($filterNames as $index => $name) {
53            $params = $decodeParms[$index] ?? null;
54
55            $data = match ($name) {
56                'FlateDecode', 'Fl'      => $this->decodeFlate($data, $params),
57                'LZWDecode', 'LZW'       => $this->decodeLzw($data, $params),
58                'ASCII85Decode', 'A85'   => (new Ascii85Filter())->decode($data),
59                'ASCIIHexDecode', 'AHx'  => (new AsciiHexFilter())->decode($data),
60                'RunLengthDecode', 'RL'  => (new RunLengthFilter())->decode($data),
61                'CCITTFaxDecode', 'CCF'  => $this->decodeCCITTFax($data, $params),
62                'JBIG2Decode'            => $this->decodeJbig2($data, $params),
63                // Image-specific filters: return data as-is (the raw bytes ARE the image)
64                'DCTDecode', 'DCT',
65                'JPXDecode'              => $data,
66                default                  => throw new UnsupportedFilterException(
67                    "Unsupported stream filter: $name",
68                ),
69            };
70        }
71
72        return $data;
73    }
74
75    /**
76     * Decode FlateDecode with optional predictor post-processing.
77     */
78    private function decodeFlate(string $data, ?PdfDictionary $params): string
79    {
80        $data = (new FlateFilter())->decode($data);
81        return $this->applyPredictor($data, $params);
82    }
83
84    /**
85     * Decode LZWDecode with optional predictor post-processing.
86     */
87    private function decodeLzw(string $data, ?PdfDictionary $params): string
88    {
89        $earlyChange = $params !== null ? $this->intParam($params, 'EarlyChange', 1) : 1;
90        $data = (new LzwFilter($earlyChange))->decode($data);
91        return $this->applyPredictor($data, $params);
92    }
93
94    /**
95     * Decode CCITTFaxDecode with parameters from DecodeParms.
96     */
97    private function decodeCCITTFax(string $data, ?PdfDictionary $params): string
98    {
99        $k = $params !== null ? $this->intParam($params, 'K', 0) : 0;
100        $columns = $params !== null ? $this->intParam($params, 'Columns', 1728) : 1728;
101        $rows = $params !== null ? $this->intParam($params, 'Rows', 0) : 0;
102        $endOfLine = $params !== null && $this->boolParam($params, 'EndOfLine', false);
103        $encodedByteAlign = $params !== null && $this->boolParam($params, 'EncodedByteAlign', false);
104        $endOfBlock = $params === null || $this->boolParam($params, 'EndOfBlock', true);
105        $blackIs1 = $params !== null && $this->boolParam($params, 'BlackIs1', false);
106
107        $filter = new CCITTFaxFilter($k, $columns, $rows, $endOfLine, $encodedByteAlign, $endOfBlock, $blackIs1);
108
109        try {
110            return $filter->decode($data);
111        } catch (\Throwable) {
112            // If decoding fails, return raw data (image streams are often usable as-is)
113            return $data;
114        }
115    }
116
117    /**
118     * Decode JBIG2Decode with optional globals from DecodeParms.
119     */
120    private function decodeJbig2(string $data, ?PdfDictionary $params): string
121    {
122        $globals = '';
123        if ($params !== null) {
124            $globalsRef = $params->get('JBIG2Globals');
125            if ($globalsRef instanceof PdfReference && $this->resolver !== null) {
126                $globalsObj = $this->resolver->resolveReference($globalsRef);
127                if ($globalsObj instanceof \Phpdftk\Pdf\Core\PdfStream) {
128                    $globals = $globalsObj->data;
129                }
130            }
131        }
132
133        $filter = new Jbig2Filter($globals);
134
135        try {
136            return $filter->decode($data);
137        } catch (\Throwable) {
138            return $data;
139        }
140    }
141
142    /**
143     * Extract a boolean parameter from a DecodeParms dictionary.
144     */
145    private function boolParam(PdfDictionary $dict, string $key, bool $default): bool
146    {
147        $val = $dict->get($key);
148        if ($val instanceof \Phpdftk\Pdf\Core\PdfBoolean) {
149            return $val->value;
150        }
151        if ($val instanceof PdfName) {
152            return $val->value === 'true';
153        }
154        if (is_bool($val)) {
155            return $val;
156        }
157        return $default;
158    }
159
160    /**
161     * Apply predictor un-filtering if the DecodeParms specify one.
162     */
163    private function applyPredictor(string $data, ?PdfDictionary $params): string
164    {
165        if ($params === null) {
166            return $data;
167        }
168
169        $predictor = $this->intParam($params, 'Predictor', 1);
170        if ($predictor <= 1) {
171            return $data;
172        }
173
174        $columns = $this->intParam($params, 'Columns', 1);
175        $colors = $this->intParam($params, 'Colors', 1);
176        $bpc = $this->intParam($params, 'BitsPerComponent', 8);
177
178        $filter = new PredictorFilter($predictor, $columns, $colors, $bpc);
179        return $filter->decode($data);
180    }
181
182    /**
183     * Extract an integer parameter from a DecodeParms dictionary.
184     */
185    private function intParam(PdfDictionary $dict, string $key, int $default): int
186    {
187        $val = $dict->get($key);
188        if ($val instanceof PdfNumber) {
189            return (int) $val->toPdf();
190        }
191        if (is_int($val)) {
192            return $val;
193        }
194        return $default;
195    }
196
197    /**
198     * @return list<string>
199     */
200    private function resolveFilterNames(mixed $filter): array
201    {
202        if ($filter instanceof PdfName) {
203            return [$filter->value];
204        }
205        if ($filter instanceof PdfArray) {
206            $names = [];
207            foreach ($filter->items as $item) {
208                if ($item instanceof PdfName) {
209                    $names[] = $item->value;
210                }
211            }
212            return $names;
213        }
214        if (is_string($filter)) {
215            return [ltrim($filter, '/')];
216        }
217        return [];
218    }
219
220    /**
221     * Resolve /DecodeParms into a per-filter array of PdfDictionary|null.
222     *
223     * @return list<PdfDictionary|null>
224     */
225    private function resolveDecodeParms(mixed $parms, int $filterCount): array
226    {
227        // Resolve indirect reference if resolver is available
228        if ($parms instanceof PdfReference && $this->resolver !== null) {
229            $parms = $this->resolver->resolveReference($parms);
230        }
231
232        if ($parms === null) {
233            return array_fill(0, $filterCount, null);
234        }
235        if ($parms instanceof PdfDictionary) {
236            return [$parms];
237        }
238        if ($parms instanceof PdfArray) {
239            $result = [];
240            foreach ($parms->items as $item) {
241                if ($item instanceof PdfReference && $this->resolver !== null) {
242                    $item = $this->resolver->resolveReference($item);
243                }
244                $result[] = $item instanceof PdfDictionary ? $item : null;
245            }
246            return $result;
247        }
248        return array_fill(0, $filterCount, null);
249    }
250}