Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
98.28% |
57 / 58 |
|
50.00% |
1 / 2 |
CRAP | |
0.00% |
0 / 1 |
| XrefStreamParser | |
98.28% |
57 / 58 |
|
50.00% |
1 / 2 |
21 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| parseXrefStream | |
98.25% |
56 / 57 |
|
0.00% |
0 / 1 |
20 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Phpdftk\Pdf\Reader\Parser; |
| 6 | |
| 7 | use Phpdftk\Pdf\Core\PdfArray; |
| 8 | use Phpdftk\Pdf\Core\PdfDictionary; |
| 9 | use Phpdftk\Pdf\Core\PdfNumber; |
| 10 | use Phpdftk\Pdf\Core\PdfStream; |
| 11 | use Phpdftk\Pdf\Reader\Exception\InvalidPdfException; |
| 12 | use Phpdftk\Pdf\Reader\Tokenizer\Source; |
| 13 | use Phpdftk\Pdf\Reader\Tokenizer\Tokenizer; |
| 14 | use Phpdftk\Pdf\Reader\XrefEntry; |
| 15 | |
| 16 | /** |
| 17 | * Parses a cross-reference stream (/Type /XRef) — ISO 32000-2 §7.5.8. |
| 18 | */ |
| 19 | final class XrefStreamParser |
| 20 | { |
| 21 | public function __construct( |
| 22 | private readonly Tokenizer $tokenizer, |
| 23 | private readonly Source $source, |
| 24 | private readonly ObjectParser $objectParser, |
| 25 | private readonly StreamParser $streamParser, |
| 26 | ) {} |
| 27 | |
| 28 | /** |
| 29 | * Parse a cross-reference stream at the given byte offset. |
| 30 | * |
| 31 | * @return array{0: array<int, XrefEntry>, 1: PdfDictionary} |
| 32 | */ |
| 33 | public function parseXrefStream(int $offset): array |
| 34 | { |
| 35 | $this->tokenizer->seek($offset); |
| 36 | |
| 37 | [$objNum, $genNum, $value] = $this->objectParser->parseIndirectObject(); |
| 38 | |
| 39 | if (!$value instanceof PdfStream) { |
| 40 | throw new InvalidPdfException( |
| 41 | "Expected a stream at xref stream offset $offset, got " . $value::class, |
| 42 | ); |
| 43 | } |
| 44 | |
| 45 | $dict = $value->dictionary; |
| 46 | |
| 47 | // Decompress the stream data |
| 48 | try { |
| 49 | $data = $this->streamParser->decode($value->data, $dict); |
| 50 | } catch (\Throwable $e) { |
| 51 | throw new InvalidPdfException( |
| 52 | "Failed to decompress xref stream at offset $offset: " . $e->getMessage(), |
| 53 | 0, |
| 54 | $e, |
| 55 | ); |
| 56 | } |
| 57 | |
| 58 | // Read /W (field widths) |
| 59 | $wArr = $dict->get('W'); |
| 60 | if (!$wArr instanceof PdfArray || count($wArr->items) !== 3) { |
| 61 | throw new InvalidPdfException('Xref stream missing or invalid /W array'); |
| 62 | } |
| 63 | $w = []; |
| 64 | foreach ($wArr->items as $item) { |
| 65 | $fieldWidth = ($item instanceof PdfNumber) ? (int) $item->toPdf() : 0; |
| 66 | // Clamp field widths to sane values (max 8 bytes = 64-bit integer) |
| 67 | $w[] = max(0, min($fieldWidth, 8)); |
| 68 | } |
| 69 | |
| 70 | // Read /Size |
| 71 | $sizeVal = $dict->get('Size'); |
| 72 | $size = ($sizeVal instanceof PdfNumber) ? (int) $sizeVal->toPdf() : 0; |
| 73 | |
| 74 | // Read /Index (default [0 Size]) |
| 75 | $indexArr = $dict->get('Index'); |
| 76 | if ($indexArr instanceof PdfArray) { |
| 77 | $indexPairs = []; |
| 78 | $items = $indexArr->items; |
| 79 | for ($i = 0; $i < count($items) - 1; $i += 2) { |
| 80 | $first = ($items[$i] instanceof PdfNumber) ? (int) $items[$i]->toPdf() : 0; |
| 81 | $count = ($items[$i + 1] instanceof PdfNumber) ? (int) $items[$i + 1]->toPdf() : 0; |
| 82 | $indexPairs[] = [$first, $count]; |
| 83 | } |
| 84 | } else { |
| 85 | $indexPairs = [[0, $size]]; |
| 86 | } |
| 87 | |
| 88 | // Unpack binary entries |
| 89 | $entries = []; |
| 90 | $dataPos = 0; |
| 91 | $dataLen = strlen($data); |
| 92 | $entryWidth = $w[0] + $w[1] + $w[2]; |
| 93 | |
| 94 | foreach ($indexPairs as [$firstObj, $count]) { |
| 95 | // Clamp count to what the data can actually hold |
| 96 | if ($entryWidth > 0) { |
| 97 | $maxEntries = (int) (($dataLen - $dataPos) / $entryWidth); |
| 98 | $count = min($count, $maxEntries + 1); |
| 99 | } |
| 100 | |
| 101 | for ($i = 0; $i < $count; $i++) { |
| 102 | $fields = []; |
| 103 | for ($f = 0; $f < 3; $f++) { |
| 104 | $val = 0; |
| 105 | for ($b = 0; $b < $w[$f]; $b++) { |
| 106 | $val = ($val << 8); |
| 107 | if ($dataPos < $dataLen) { |
| 108 | $val |= ord($data[$dataPos++]); |
| 109 | } |
| 110 | } |
| 111 | // If w[f] is 0, the default for field 0 is 1 (type=inUse), others are 0 |
| 112 | if ($w[$f] === 0) { |
| 113 | $val = ($f === 0) ? 1 : 0; |
| 114 | } |
| 115 | $fields[] = $val; |
| 116 | } |
| 117 | |
| 118 | $entries[$firstObj + $i] = new XrefEntry( |
| 119 | type: $fields[0], |
| 120 | offset: $fields[1], |
| 121 | generation: $fields[2], |
| 122 | ); |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | return [$entries, $dict]; |
| 127 | } |
| 128 | } |