Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
95.04% |
115 / 121 |
|
33.33% |
1 / 3 |
CRAP | |
0.00% |
0 / 1 |
| HintTableParser | |
95.04% |
115 / 121 |
|
33.33% |
1 / 3 |
20 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| parsePageOffsetTable | |
93.98% |
78 / 83 |
|
0.00% |
0 / 1 |
13.04 | |||
| parseSharedObjectTable | |
97.30% |
36 / 37 |
|
0.00% |
0 / 1 |
6 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Phpdftk\Pdf\Reader\Parser; |
| 6 | |
| 7 | /** |
| 8 | * Parses the binary hint table data from linearized PDF hint streams. |
| 9 | * |
| 10 | * The hint stream body contains the page offset hint table (required) |
| 11 | * and optionally a shared object hint table, both using bit-packed |
| 12 | * variable-width fields. Layout defined in ISO 32000-2 Annex F §F.4. |
| 13 | */ |
| 14 | final class HintTableParser |
| 15 | { |
| 16 | public function __construct(private readonly string $data) {} |
| 17 | |
| 18 | /** |
| 19 | * Parse the page offset hint table starting at the given byte offset. |
| 20 | * |
| 21 | * The header is 11 four-byte integers (44 bytes), followed by |
| 22 | * per-page bit-packed entries. |
| 23 | * |
| 24 | * @param int $offset Byte offset within the hint stream data |
| 25 | * @param int $numPages Number of pages in the document |
| 26 | */ |
| 27 | public function parsePageOffsetTable(int $offset, int $numPages): PageOffsetHintTable |
| 28 | { |
| 29 | if (strlen($this->data) < $offset + 44) { |
| 30 | throw new \RuntimeException('Hint stream too short for page offset table header'); |
| 31 | } |
| 32 | |
| 33 | // Read 11 header values as 32-bit big-endian unsigned integers |
| 34 | $pos = $offset; |
| 35 | $read32 = function () use (&$pos): int { |
| 36 | $val = unpack('N', substr($this->data, $pos, 4)); |
| 37 | $pos += 4; |
| 38 | return $val[1]; |
| 39 | }; |
| 40 | |
| 41 | $minObjectsPerPage = $read32(); // Item 1 |
| 42 | $firstPageLocation = $read32(); // Item 2 |
| 43 | $bitsForObjectCount = $read32(); // Item 3 |
| 44 | $minPageLength = $read32(); // Item 4 |
| 45 | $bitsForPageLength = $read32(); // Item 5 |
| 46 | $minSharedRefsPerPage = $read32(); // Item 6 (offset in shared obj hint table) |
| 47 | $bitsForSharedRefCount = $read32(); // Item 7 |
| 48 | $minSharedObjId = $read32(); // Item 8 |
| 49 | $bitsForSharedObjId = $read32(); // Item 9 |
| 50 | $bitsForSharedObjNum = $read32(); // Item 10 |
| 51 | $minContentStreamOffset = $read32(); // Item 11 |
| 52 | |
| 53 | // Read remaining header items if data allows |
| 54 | $bitsForContentOffset = 0; |
| 55 | $minContentStreamLength = 0; |
| 56 | $bitsForContentLength = 0; |
| 57 | |
| 58 | if (strlen($this->data) >= $pos + 12) { |
| 59 | $bitsForContentOffset = $read32(); // Item 12 |
| 60 | $minContentStreamLength = $read32(); // Item 13 |
| 61 | $bitsForContentLength = $read32(); // Item 14 |
| 62 | } |
| 63 | |
| 64 | // Parse per-page entries (bit-packed) |
| 65 | $reader = new BitReader(substr($this->data, $pos)); |
| 66 | |
| 67 | $entries = []; |
| 68 | |
| 69 | // First pass: read object count deltas for all pages |
| 70 | $objectCountDeltas = []; |
| 71 | for ($i = 0; $i < $numPages; $i++) { |
| 72 | $objectCountDeltas[] = $reader->readBits($bitsForObjectCount); |
| 73 | } |
| 74 | $reader->alignToByte(); |
| 75 | |
| 76 | // Second pass: page length deltas |
| 77 | $pageLengthDeltas = []; |
| 78 | for ($i = 0; $i < $numPages; $i++) { |
| 79 | $pageLengthDeltas[] = $reader->readBits($bitsForPageLength); |
| 80 | } |
| 81 | $reader->alignToByte(); |
| 82 | |
| 83 | // Third pass: shared ref count deltas |
| 84 | $sharedRefCountDeltas = []; |
| 85 | for ($i = 0; $i < $numPages; $i++) { |
| 86 | $sharedRefCountDeltas[] = $reader->readBits($bitsForSharedRefCount); |
| 87 | } |
| 88 | $reader->alignToByte(); |
| 89 | |
| 90 | // Fourth pass: shared object IDs (variable count per page) |
| 91 | $sharedObjIds = []; |
| 92 | for ($i = 0; $i < $numPages; $i++) { |
| 93 | $count = $minSharedRefsPerPage + $sharedRefCountDeltas[$i]; |
| 94 | $ids = []; |
| 95 | for ($j = 0; $j < $count; $j++) { |
| 96 | $ids[] = $minSharedObjId + $reader->readBits($bitsForSharedObjId); |
| 97 | } |
| 98 | $sharedObjIds[] = $ids; |
| 99 | } |
| 100 | $reader->alignToByte(); |
| 101 | |
| 102 | // Fifth pass: shared object numerator deltas |
| 103 | $sharedObjNumDeltas = []; |
| 104 | for ($i = 0; $i < $numPages; $i++) { |
| 105 | $count = $minSharedRefsPerPage + $sharedRefCountDeltas[$i]; |
| 106 | $delta = 0; |
| 107 | for ($j = 0; $j < $count; $j++) { |
| 108 | $delta += $reader->readBits($bitsForSharedObjNum); |
| 109 | } |
| 110 | $sharedObjNumDeltas[] = $delta; |
| 111 | } |
| 112 | $reader->alignToByte(); |
| 113 | |
| 114 | // Sixth pass: content stream offset deltas |
| 115 | $contentOffsetDeltas = []; |
| 116 | for ($i = 0; $i < $numPages; $i++) { |
| 117 | $contentOffsetDeltas[] = $reader->readBits($bitsForContentOffset); |
| 118 | } |
| 119 | $reader->alignToByte(); |
| 120 | |
| 121 | // Seventh pass: content stream length deltas |
| 122 | $contentLengthDeltas = []; |
| 123 | for ($i = 0; $i < $numPages; $i++) { |
| 124 | $contentLengthDeltas[] = $reader->readBits($bitsForContentLength); |
| 125 | } |
| 126 | |
| 127 | // Build entries |
| 128 | for ($i = 0; $i < $numPages; $i++) { |
| 129 | $entries[] = new PageHintEntry( |
| 130 | objectCountDelta: $objectCountDeltas[$i], |
| 131 | pageLengthDelta: $pageLengthDeltas[$i], |
| 132 | sharedRefCountDelta: $sharedRefCountDeltas[$i], |
| 133 | sharedObjIds: $sharedObjIds[$i], |
| 134 | sharedObjNumeratorDelta: $sharedObjNumDeltas[$i], |
| 135 | contentStreamOffsetDelta: $contentOffsetDeltas[$i], |
| 136 | contentStreamLengthDelta: $contentLengthDeltas[$i], |
| 137 | ); |
| 138 | } |
| 139 | |
| 140 | return new PageOffsetHintTable( |
| 141 | minObjectsPerPage: $minObjectsPerPage, |
| 142 | firstPageLocation: $firstPageLocation, |
| 143 | minPageLength: $minPageLength, |
| 144 | minSharedRefsPerPage: $minSharedRefsPerPage, |
| 145 | minSharedObjId: $minSharedObjId, |
| 146 | minContentStreamOffset: $minContentStreamOffset, |
| 147 | minContentStreamLength: $minContentStreamLength, |
| 148 | entries: $entries, |
| 149 | ); |
| 150 | } |
| 151 | |
| 152 | /** |
| 153 | * Parse the shared object hint table starting at the given byte offset. |
| 154 | * |
| 155 | * @param int $offset Byte offset within the hint stream data |
| 156 | */ |
| 157 | public function parseSharedObjectTable(int $offset): SharedObjectHintTable |
| 158 | { |
| 159 | if (strlen($this->data) < $offset + 20) { |
| 160 | throw new \RuntimeException('Hint stream too short for shared object table header'); |
| 161 | } |
| 162 | |
| 163 | $pos = $offset; |
| 164 | $read32 = function () use (&$pos): int { |
| 165 | $val = unpack('N', substr($this->data, $pos, 4)); |
| 166 | $pos += 4; |
| 167 | return $val[1]; |
| 168 | }; |
| 169 | |
| 170 | $firstSharedObjNumber = $read32(); // Item 1 |
| 171 | $firstSharedObjOffset = $read32(); // Item 2 |
| 172 | $numSharedGroups = $read32(); // Item 3 |
| 173 | $minGroupLength = $read32(); // Item 4 |
| 174 | $bitsForGroupLength = $read32(); // Item 5 |
| 175 | |
| 176 | // Parse per-group entries |
| 177 | $reader = new BitReader(substr($this->data, $pos)); |
| 178 | $entries = []; |
| 179 | |
| 180 | // First pass: length deltas |
| 181 | $lengthDeltas = []; |
| 182 | for ($i = 0; $i < $numSharedGroups; $i++) { |
| 183 | $lengthDeltas[] = $reader->readBits($bitsForGroupLength); |
| 184 | } |
| 185 | $reader->alignToByte(); |
| 186 | |
| 187 | // Second pass: signature flags (1 bit each) |
| 188 | $sigFlags = []; |
| 189 | for ($i = 0; $i < $numSharedGroups; $i++) { |
| 190 | $sigFlags[] = $reader->readBits(1) === 1; |
| 191 | } |
| 192 | $reader->alignToByte(); |
| 193 | |
| 194 | // Third pass: number of objects per group (if signature flag set) |
| 195 | for ($i = 0; $i < $numSharedGroups; $i++) { |
| 196 | $numObjects = $sigFlags[$i] ? 0 : 1; // Simplified — full spec has more logic |
| 197 | $entries[] = new SharedObjectHintEntry( |
| 198 | lengthDelta: $lengthDeltas[$i], |
| 199 | isSignatureObject: $sigFlags[$i], |
| 200 | numObjects: $numObjects, |
| 201 | ); |
| 202 | } |
| 203 | |
| 204 | return new SharedObjectHintTable( |
| 205 | firstSharedObjNumber: $firstSharedObjNumber, |
| 206 | firstSharedObjOffset: $firstSharedObjOffset, |
| 207 | numSharedGroups: $numSharedGroups, |
| 208 | minGroupLength: $minGroupLength, |
| 209 | entries: $entries, |
| 210 | ); |
| 211 | } |
| 212 | } |