Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
97.44% |
38 / 39 |
|
50.00% |
1 / 2 |
CRAP | |
0.00% |
0 / 1 |
| ObjectStreamParser | |
97.44% |
38 / 39 |
|
50.00% |
1 / 2 |
12 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| unpack | |
97.37% |
37 / 38 |
|
0.00% |
0 / 1 |
11 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Phpdftk\Pdf\Reader\Parser; |
| 6 | |
| 7 | use Phpdftk\Pdf\Core\PdfDictionary; |
| 8 | use Phpdftk\Pdf\Core\PdfNumber; |
| 9 | use Phpdftk\Pdf\Core\PdfStream; |
| 10 | use Phpdftk\Pdf\Core\Serializable; |
| 11 | use Phpdftk\Pdf\Reader\Exception\InvalidPdfException; |
| 12 | use Phpdftk\Pdf\Reader\Tokenizer\StringSource; |
| 13 | use Phpdftk\Pdf\Reader\Tokenizer\Tokenizer; |
| 14 | |
| 15 | /** |
| 16 | * Unpacks objects stored inside a /Type /ObjStm stream — |
| 17 | * ISO 32000-2 §7.5.7. |
| 18 | */ |
| 19 | final class ObjectStreamParser |
| 20 | { |
| 21 | public function __construct( |
| 22 | private readonly StreamParser $streamParser, |
| 23 | ) {} |
| 24 | |
| 25 | /** |
| 26 | * Unpack all objects from an ObjStm. |
| 27 | * |
| 28 | * @return array<int, Serializable> objNum => parsed value |
| 29 | */ |
| 30 | public function unpack(PdfStream $objStm): array |
| 31 | { |
| 32 | $dict = $objStm->dictionary; |
| 33 | |
| 34 | // The stream data may already be decompressed by ObjectResolver::resolveInUse(). |
| 35 | // Try decoding via the filter chain; if it fails, assume data is already raw. |
| 36 | $filterVal = $dict->get('Filter'); |
| 37 | if ($filterVal !== null) { |
| 38 | try { |
| 39 | $data = $this->streamParser->decode($objStm->data, $dict); |
| 40 | } catch (\Throwable) { |
| 41 | $data = $objStm->data; |
| 42 | } |
| 43 | } else { |
| 44 | $data = $objStm->data; |
| 45 | } |
| 46 | |
| 47 | $nVal = $dict->get('N'); |
| 48 | $n = ($nVal instanceof PdfNumber) ? (int) $nVal->toPdf() : 0; |
| 49 | |
| 50 | $firstVal = $dict->get('First'); |
| 51 | $first = ($firstVal instanceof PdfNumber) ? (int) $firstVal->toPdf() : 0; |
| 52 | |
| 53 | if ($n === 0 || $first === 0) { |
| 54 | return []; |
| 55 | } |
| 56 | |
| 57 | // Parse the header: N pairs of (objNum offset) |
| 58 | $headerSource = new StringSource(substr($data, 0, $first)); |
| 59 | $headerTokenizer = new Tokenizer($headerSource); |
| 60 | $headerParser = new ObjectParser($headerTokenizer, $headerSource); |
| 61 | |
| 62 | $objNums = []; |
| 63 | $offsets = []; |
| 64 | for ($i = 0; $i < $n; $i++) { |
| 65 | $numToken = $headerTokenizer->nextToken(); |
| 66 | $offToken = $headerTokenizer->nextToken(); |
| 67 | $objNums[] = (int) $numToken->value; |
| 68 | $offsets[] = (int) $offToken->value; |
| 69 | } |
| 70 | |
| 71 | // Parse each embedded object from the body (after /First) |
| 72 | $body = substr($data, $first); |
| 73 | $result = []; |
| 74 | |
| 75 | for ($i = 0; $i < $n; $i++) { |
| 76 | $start = $offsets[$i]; |
| 77 | $end = ($i + 1 < $n) ? $offsets[$i + 1] : strlen($body); |
| 78 | $slice = substr($body, $start, $end - $start); |
| 79 | |
| 80 | $objSource = new StringSource($slice); |
| 81 | $objTokenizer = new Tokenizer($objSource); |
| 82 | $objParser = new ObjectParser($objTokenizer, $objSource); |
| 83 | |
| 84 | try { |
| 85 | $result[$objNums[$i]] = $objParser->parseValue(); |
| 86 | } catch (\Throwable $e) { |
| 87 | throw new InvalidPdfException( |
| 88 | "Failed to parse object {$objNums[$i]} inside ObjStm: {$e->getMessage()}", |
| 89 | ); |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | return $result; |
| 94 | } |
| 95 | } |