Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
90.12% |
520 / 577 |
|
74.29% |
26 / 35 |
CRAP | |
0.00% |
0 / 1 |
| PdfFileWriter | |
90.12% |
520 / 577 |
|
74.29% |
26 / 35 |
170.40 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
| setCompressStreams | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| setEncryption | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
| getRegistry | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getVersion | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getPdfVersion | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| setVersion | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
| setStrictVersionMode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| setDeprecationHandler | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| setStrictDeprecation | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getVersionWarnings | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| setCeilingVersion | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| setCatalog | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| setInfo | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| getInfo | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| register | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| setSigner | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
1 | |||
| setTimestamper | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
1 | |||
| setTsaClient | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| generate | |
96.24% |
128 / 133 |
|
0.00% |
0 / 1 |
42 | |||
| generateLinearized | |
95.53% |
171 / 179 |
|
0.00% |
0 / 1 |
23 | |||
| emitPaddedLinearizationDict | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
1 | |||
| patchPaddedNumber | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
| patchHintArray | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
2.01 | |||
| buildMinimalHintStream | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
1 | |||
| buildSubsectionXref | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| toBytes | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| writeTo | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
3.01 | |||
| save | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| applyStreamCompression | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
5 | |||
| applySignature | |
73.91% |
34 / 46 |
|
0.00% |
0 / 1 |
7.87 | |||
| checkVersionRequirements | |
70.27% |
26 / 37 |
|
0.00% |
0 / 1 |
15.78 | |||
| enforceRemoval | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
5.12 | |||
| applyCeilingStripping | |
56.41% |
22 / 39 |
|
0.00% |
0 / 1 |
30.23 | |||
| applyVersionRequirement | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Phpdftk\Pdf\Core\File; |
| 6 | |
| 7 | use Phpdftk\Filesystem\LocalFilesystem; |
| 8 | use Phpdftk\Pdf\Core\Document\Catalog; |
| 9 | use Phpdftk\Pdf\Core\Document\Info; |
| 10 | use Phpdftk\Pdf\Core\Interactive\Signature\Pkcs7Signer; |
| 11 | use Phpdftk\Pdf\Core\Interactive\Signature\SignatureValue; |
| 12 | use Phpdftk\Pdf\Core\Interactive\Signature\TsaClient; |
| 13 | use Phpdftk\Pdf\Core\Security\PdfEncryptor; |
| 14 | use Phpdftk\Pdf\Core\Content\ContentStream; |
| 15 | use Phpdftk\Pdf\Core\Document\CrossReferenceStream; |
| 16 | use Phpdftk\Pdf\Core\Document\ObjectStream; |
| 17 | use Phpdftk\Filters\FlateFilter; |
| 18 | use Phpdftk\Pdf\Core\PdfArray; |
| 19 | use Phpdftk\Pdf\Core\PdfName; |
| 20 | use Phpdftk\Pdf\Core\PdfObject; |
| 21 | use Phpdftk\Pdf\Core\PdfReference; |
| 22 | use Phpdftk\Pdf\Core\PdfStream; |
| 23 | use Phpdftk\Pdf\Core\PdfString; |
| 24 | use Phpdftk\Pdf\Core\PdfVersion; |
| 25 | use Phpdftk\Pdf\Core\Serializable; |
| 26 | |
| 27 | /** |
| 28 | * Byte-level PDF file emitter — ISO 32000-2 §7.5. |
| 29 | * |
| 30 | * Given a {@see Catalog} root, an {@see Info} (optional), a working |
| 31 | * {@see ObjectRegistry}, and optionally a signer, `generate()` produces |
| 32 | * the exact bytes of a spec-compliant PDF file: header, binary comment, |
| 33 | * indirect-object body, cross-reference table, trailer, `startxref`, |
| 34 | * `%%EOF`. With a signer configured, the output is post-processed to |
| 35 | * compute `/ByteRange` and patch `/Contents` in place. |
| 36 | * |
| 37 | * This class deliberately knows nothing about pages, fonts, resources, |
| 38 | * or any other high-level document assembly concern. Those live in |
| 39 | * `Phpdftk\Pdf\Writer\PdfWriter`, which composes an instance of this |
| 40 | * class. |
| 41 | * |
| 42 | * @api |
| 43 | */ |
| 44 | class PdfFileWriter |
| 45 | { |
| 46 | public const DEFAULT_VERSION = '1.7'; |
| 47 | public const DEFAULT_PDF_VERSION = PdfVersion::V1_7; |
| 48 | |
| 49 | private ObjectRegistry $registry; |
| 50 | private ?Catalog $catalog = null; |
| 51 | private ?Info $info = null; |
| 52 | |
| 53 | private ?SignatureValue $signatureValue = null; |
| 54 | private ?Pkcs7Signer $signer = null; |
| 55 | private int $signaturePlaceholderBytes = 8192; |
| 56 | private ?TsaClient $tsaClient = null; |
| 57 | private bool $compressStreams = true; |
| 58 | private bool $useXRefStream = false; |
| 59 | private bool $useObjectStreams = false; |
| 60 | private ?PdfEncryptor $encryptor = null; |
| 61 | private PdfVersion $version; |
| 62 | private bool $strictVersionMode = false; |
| 63 | private ?PdfVersion $ceilingVersion = null; |
| 64 | |
| 65 | /** @var list<string> */ |
| 66 | private array $versionWarnings = []; |
| 67 | |
| 68 | /** @var (\Closure(string): void)|null */ |
| 69 | private ?\Closure $deprecationHandler = null; |
| 70 | |
| 71 | private bool $strictDeprecation = false; |
| 72 | |
| 73 | public function __construct( |
| 74 | bool $compressStreams = true, |
| 75 | bool $useXRefStream = false, |
| 76 | bool $useObjectStreams = false, |
| 77 | PdfVersion|string $version = self::DEFAULT_PDF_VERSION, |
| 78 | ) { |
| 79 | $this->registry = new ObjectRegistry(); |
| 80 | $this->compressStreams = $compressStreams; |
| 81 | $this->version = $version instanceof PdfVersion |
| 82 | ? $version |
| 83 | : (PdfVersion::tryFrom($version) ?? self::DEFAULT_PDF_VERSION); |
| 84 | // Object streams require xref streams (type 2 entries) |
| 85 | $this->useXRefStream = $useXRefStream || $useObjectStreams; |
| 86 | $this->useObjectStreams = $useObjectStreams; |
| 87 | } |
| 88 | |
| 89 | /** |
| 90 | * Enable or disable automatic FlateDecode compression of streams |
| 91 | * that have no filter already set. |
| 92 | */ |
| 93 | public function setCompressStreams(bool $compress): void |
| 94 | { |
| 95 | $this->compressStreams = $compress; |
| 96 | } |
| 97 | |
| 98 | /** |
| 99 | * Configure encryption for the generated PDF. |
| 100 | * |
| 101 | * The encrypt dictionary is registered as an indirect object and |
| 102 | * referenced from the trailer. All string values and stream data |
| 103 | * are encrypted per-object during generation. |
| 104 | * |
| 105 | * @param PdfEncryptor $encryptor A configured encryptor (use PdfEncryptor::rc4128() or ::aes128()) |
| 106 | */ |
| 107 | public function setEncryption(PdfEncryptor $encryptor): void |
| 108 | { |
| 109 | $this->encryptor = $encryptor; |
| 110 | $encryptDict = $encryptor->getEncryptDictionary(); |
| 111 | $this->register($encryptDict); |
| 112 | $encryptor->setEncryptDictObjNum($encryptDict->objectNumber); |
| 113 | |
| 114 | $required = $encryptor->getMinimumPdfVersion(); |
| 115 | if ($this->ceilingVersion !== null && $required->isGreaterThan($this->ceilingVersion)) { |
| 116 | throw new CeilingVersionException(PdfEncryptor::class, $required, $this->ceilingVersion); |
| 117 | } |
| 118 | $this->applyVersionRequirement($required, PdfEncryptor::class); |
| 119 | } |
| 120 | |
| 121 | /** |
| 122 | * The underlying object registry. Exposed so callers that want |
| 123 | * fine-grained control over registration order can drive it |
| 124 | * directly. Most callers should use {@see register()}. |
| 125 | */ |
| 126 | public function getRegistry(): ObjectRegistry |
| 127 | { |
| 128 | return $this->registry; |
| 129 | } |
| 130 | |
| 131 | public function getVersion(): string |
| 132 | { |
| 133 | return $this->version->value; |
| 134 | } |
| 135 | |
| 136 | public function getPdfVersion(): PdfVersion |
| 137 | { |
| 138 | return $this->version; |
| 139 | } |
| 140 | |
| 141 | public function setVersion(PdfVersion|string $version): void |
| 142 | { |
| 143 | $this->version = $version instanceof PdfVersion |
| 144 | ? $version |
| 145 | : (PdfVersion::tryFrom($version) ?? self::DEFAULT_PDF_VERSION); |
| 146 | } |
| 147 | |
| 148 | public function setStrictVersionMode(bool $strict = true): void |
| 149 | { |
| 150 | $this->strictVersionMode = $strict; |
| 151 | } |
| 152 | |
| 153 | public function setDeprecationHandler(\Closure $handler): void |
| 154 | { |
| 155 | $this->deprecationHandler = $handler; |
| 156 | } |
| 157 | |
| 158 | public function setStrictDeprecation(bool $strict = true): void |
| 159 | { |
| 160 | $this->strictDeprecation = $strict; |
| 161 | } |
| 162 | |
| 163 | /** @return list<string> */ |
| 164 | public function getVersionWarnings(): array |
| 165 | { |
| 166 | return $this->versionWarnings; |
| 167 | } |
| 168 | |
| 169 | /** |
| 170 | * Set a ceiling version — properties requiring a higher version are |
| 171 | * silently stripped (set to null) during registration. Objects whose |
| 172 | * class-level requirement exceeds the ceiling throw CeilingVersionException. |
| 173 | * |
| 174 | * Mutually exclusive with strict mode — setting a ceiling disables strict. |
| 175 | */ |
| 176 | public function setCeilingVersion(?PdfVersion $ceiling): void |
| 177 | { |
| 178 | $this->ceilingVersion = $ceiling; |
| 179 | if ($ceiling !== null) { |
| 180 | $this->strictVersionMode = false; |
| 181 | $this->version = $ceiling; |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | /** |
| 186 | * Register the document catalog and return its reference. The |
| 187 | * catalog becomes the `/Root` of the emitted file. |
| 188 | */ |
| 189 | public function setCatalog(Catalog $catalog): PdfReference |
| 190 | { |
| 191 | $this->catalog = $catalog; |
| 192 | $this->checkVersionRequirements($catalog); |
| 193 | $this->registry->register($catalog); |
| 194 | return new PdfReference($catalog->objectNumber); |
| 195 | } |
| 196 | |
| 197 | /** |
| 198 | * Set the document info dict. Pass null to clear. Registers the |
| 199 | * object if non-null. |
| 200 | */ |
| 201 | public function setInfo(?Info $info): void |
| 202 | { |
| 203 | $this->info = $info; |
| 204 | if ($info !== null) { |
| 205 | $this->checkVersionRequirements($info); |
| 206 | $this->registry->register($info); |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | /** Return the Info dict, if set. */ |
| 211 | public function getInfo(): ?Info |
| 212 | { |
| 213 | return $this->info; |
| 214 | } |
| 215 | |
| 216 | /** |
| 217 | * Register any PdfObject and return a reference to it. |
| 218 | */ |
| 219 | public function register(PdfObject $object): PdfReference |
| 220 | { |
| 221 | $this->checkVersionRequirements($object); |
| 222 | $this->registry->register($object); |
| 223 | return new PdfReference($object->objectNumber); |
| 224 | } |
| 225 | |
| 226 | /** |
| 227 | * Configure a signer. After calling this, {@see generate()} will: |
| 228 | * |
| 229 | * 1. Serialize the document with a fixed-size `/Contents` hex |
| 230 | * placeholder and a 4-element `/ByteRange` of zero-padded |
| 231 | * 10-digit slots inside `$signatureValue`. |
| 232 | * 2. Locate the placeholders in the serialized bytes. |
| 233 | * 3. Patch `/ByteRange` with the real offsets (same byte length). |
| 234 | * 4. Feed the two byte ranges to `$signer` to produce a PKCS#7 |
| 235 | * SignedData blob. |
| 236 | * 5. Patch `/Contents` with the DER bytes (hex-encoded, zero-padded |
| 237 | * to the placeholder length). |
| 238 | * |
| 239 | * `$signatureValue` must already be registered as an indirect object |
| 240 | * and referenced by a `SignatureField::$v` (directly or via |
| 241 | * reference). `$placeholderBytes` bounds the maximum signature size |
| 242 | * — 8 KiB is more than enough for typical RSA/ECDSA PKCS#7 blobs. |
| 243 | */ |
| 244 | public function setSigner( |
| 245 | SignatureValue $signatureValue, |
| 246 | Pkcs7Signer $signer, |
| 247 | int $placeholderBytes = 8192, |
| 248 | ): void { |
| 249 | $this->signatureValue = $signatureValue; |
| 250 | $this->signer = $signer; |
| 251 | $this->signaturePlaceholderBytes = $placeholderBytes; |
| 252 | |
| 253 | // Install fixed-width placeholders that we'll patch after serialization. |
| 254 | $signatureValue->contents = new PdfString( |
| 255 | str_repeat("\x00", $placeholderBytes), |
| 256 | hex: true, |
| 257 | ); |
| 258 | $signatureValue->byteRange = new PdfArray([ |
| 259 | '0000000000', |
| 260 | '0000000000', |
| 261 | '0000000000', |
| 262 | '0000000000', |
| 263 | ]); |
| 264 | } |
| 265 | |
| 266 | /** |
| 267 | * Configure a document-level timestamp using a TSA client. |
| 268 | * |
| 269 | * This is the timestamp equivalent of {@see setSigner()}: it installs |
| 270 | * a DocTimeStamp signature value and TSA client, then patches |
| 271 | * /ByteRange and /Contents at generation time with the RFC 3161 |
| 272 | * timestamp token from the TSA. |
| 273 | * |
| 274 | * @param SignatureValue $docTimeStamp A DocTimeStamp instance to hold the token |
| 275 | * @param TsaClient $tsaClient The TSA client to request the token from |
| 276 | * @param int $placeholderBytes Size of the /Contents placeholder |
| 277 | */ |
| 278 | public function setTimestamper( |
| 279 | SignatureValue $docTimeStamp, |
| 280 | TsaClient $tsaClient, |
| 281 | int $placeholderBytes = 16384, |
| 282 | ): void { |
| 283 | $this->signatureValue = $docTimeStamp; |
| 284 | $this->tsaClient = $tsaClient; |
| 285 | $this->signaturePlaceholderBytes = $placeholderBytes; |
| 286 | |
| 287 | // Install fixed-width placeholders (same pattern as setSigner) |
| 288 | $docTimeStamp->contents = new PdfString( |
| 289 | str_repeat("\x00", $placeholderBytes), |
| 290 | hex: true, |
| 291 | ); |
| 292 | $docTimeStamp->byteRange = new PdfArray([ |
| 293 | '0000000000', |
| 294 | '0000000000', |
| 295 | '0000000000', |
| 296 | '0000000000', |
| 297 | ]); |
| 298 | } |
| 299 | |
| 300 | /** |
| 301 | * Configure a TSA client for RFC 3161 timestamping. |
| 302 | * |
| 303 | * When set alongside a signer, the timestamp token will be embedded |
| 304 | * in the PKCS#7 signature. When set without a signer (with a |
| 305 | * DocTimeStamp signature value), produces a document-level timestamp. |
| 306 | */ |
| 307 | public function setTsaClient(TsaClient $tsaClient): void |
| 308 | { |
| 309 | $this->tsaClient = $tsaClient; |
| 310 | } |
| 311 | |
| 312 | /** |
| 313 | * Generate the complete PDF as a binary string. |
| 314 | * |
| 315 | * Assembles the file as an array of string chunks and implodes once at |
| 316 | * the end -- this is O(N) in total output size, whereas repeated string |
| 317 | * concatenation would be O(N^2) because PHP copies the growing string |
| 318 | * on every `.=`. |
| 319 | * |
| 320 | * The chunk order follows ISO 32000-2 section 7.5: |
| 321 | * 1. %PDF-X.Y header |
| 322 | * 2. Binary comment (%\xE2\xE3\xCF\xD3) so transfer tools treat |
| 323 | * the file as binary rather than ASCII |
| 324 | * 3. Indirect-object body (one chunk per registered object) |
| 325 | * 4. Cross-reference table (or xref stream for PDF >= 1.5) |
| 326 | * 5. Trailer dictionary |
| 327 | * 6. startxref pointer |
| 328 | * 7. %%EOF marker |
| 329 | * |
| 330 | * When a signer is configured, the serialized bytes are post-processed |
| 331 | * by {@see applySignature()} to fill /ByteRange and /Contents. |
| 332 | */ |
| 333 | public function generate(): string |
| 334 | { |
| 335 | if ($this->catalog === null) { |
| 336 | throw new \RuntimeException( |
| 337 | 'PdfFileWriter::generate() called before setCatalog()', |
| 338 | ); |
| 339 | } |
| 340 | |
| 341 | // Auto-bump version for xref/object streams before emission |
| 342 | if ($this->useXRefStream) { |
| 343 | if ($this->ceilingVersion !== null && !$this->ceilingVersion->isAtLeast(PdfVersion::V1_5)) { |
| 344 | // Ceiling is below 1.5 — downgrade to classic xref |
| 345 | $this->useXRefStream = false; |
| 346 | $this->useObjectStreams = false; |
| 347 | $this->versionWarnings[] = 'Downgraded from xref stream to classic xref (ceiling < 1.5)'; |
| 348 | } else { |
| 349 | $this->applyVersionRequirement(PdfVersion::V1_5, 'XRefStream'); |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | // Sync catalog /Version for PDF > 1.4 (ISO 32000 §7.2.2) |
| 354 | if ($this->version->isGreaterThan(PdfVersion::V1_4)) { |
| 355 | $this->catalog->version = new PdfName($this->version->value); |
| 356 | } |
| 357 | |
| 358 | $xref = new CrossReferenceTable(); |
| 359 | $chunks = []; |
| 360 | |
| 361 | // PDF header |
| 362 | $chunks[] = '%PDF-' . $this->version->value . "\n"; |
| 363 | // Binary comment — 4 bytes > 127 to signal binary file |
| 364 | $chunks[] = "%\xE2\xE3\xCF\xD3\n"; |
| 365 | |
| 366 | // Track byte offset without concatenating the full string each iteration |
| 367 | $offset = strlen($chunks[0]) + strlen($chunks[1]); |
| 368 | |
| 369 | // Build the array of objects to serialize. When encryption is |
| 370 | // active we clone every object so the originals stay untouched, |
| 371 | // allowing generate() to be called multiple times (idempotency). |
| 372 | // Compression is applied on the clones as well so that the spec- |
| 373 | // mandated order (compress → encrypt) is respected. |
| 374 | $serializableObjects = $this->registry->getAll(); |
| 375 | |
| 376 | if ($this->encryptor !== null) { |
| 377 | $clones = []; |
| 378 | $flate = $this->compressStreams ? new FlateFilter() : null; |
| 379 | |
| 380 | foreach ($serializableObjects as $objNum => $object) { |
| 381 | $clone = clone $object; |
| 382 | |
| 383 | // Deep-clone the dictionary for PdfStream subclasses so |
| 384 | // we don't mutate the original's dictionary. |
| 385 | if ($clone instanceof PdfStream) { |
| 386 | $clone->dictionary = clone $clone->dictionary; |
| 387 | } |
| 388 | |
| 389 | // Materialize ContentStream operators into data |
| 390 | if ($clone instanceof ContentStream) { |
| 391 | $clone->data = implode("\n", $clone->getOperators()); |
| 392 | $clone->clearOperators(); |
| 393 | } |
| 394 | |
| 395 | // Compress before encrypting (spec order: compress → encrypt). |
| 396 | // We do this manually on the clone instead of via setFilter() |
| 397 | // so that toPdf() won't try to re-compress later. |
| 398 | if ( |
| 399 | $flate !== null |
| 400 | && $clone instanceof PdfStream |
| 401 | && !$clone->dictionary->has('Filter') |
| 402 | && $clone->data !== '' |
| 403 | ) { |
| 404 | $clone->data = $flate->encode($clone->data); |
| 405 | $clone->dictionary->set('Filter', new PdfName('FlateDecode')); |
| 406 | } |
| 407 | |
| 408 | $this->encryptor->encryptObject($clone); |
| 409 | $clones[$objNum] = $clone; |
| 410 | } |
| 411 | $serializableObjects = $clones; |
| 412 | } elseif ($this->compressStreams) { |
| 413 | // No encryption — apply compression on the originals (safe: |
| 414 | // setFilter is idempotent due to the has('Filter') guard). |
| 415 | $this->applyStreamCompression(); |
| 416 | } |
| 417 | |
| 418 | // Object stream packing: group eligible objects into ObjectStream containers |
| 419 | // Per spec §7.5.7, objects with streams and the encrypt dict cannot be packed. |
| 420 | /** @var array<int, int> objNum => objStmObjNum for type 2 xref entries */ |
| 421 | $compressedEntries = []; |
| 422 | /** @var array<int, int> objNum => index within the object stream */ |
| 423 | $compressedIndices = []; |
| 424 | |
| 425 | if ($this->useObjectStreams) { |
| 426 | $encryptObjNum = $this->encryptor !== null |
| 427 | ? $this->encryptor->getEncryptDictionary()->objectNumber |
| 428 | : 0; |
| 429 | |
| 430 | $eligible = []; |
| 431 | $ineligible = []; |
| 432 | foreach ($serializableObjects as $objNum => $object) { |
| 433 | // Cannot pack: streams (have their own data), catalog (must be direct), |
| 434 | // encrypt dict (must be accessible before decryption) |
| 435 | if ( |
| 436 | $object instanceof PdfStream |
| 437 | || $objNum === $this->catalog->objectNumber |
| 438 | || $objNum === $encryptObjNum |
| 439 | ) { |
| 440 | $ineligible[$objNum] = $object; |
| 441 | } else { |
| 442 | $eligible[$objNum] = $object; |
| 443 | } |
| 444 | } |
| 445 | |
| 446 | if ($eligible !== []) { |
| 447 | // Create ObjectStream(s) — pack up to 200 objects per stream |
| 448 | $nextObjNum = $this->registry->getSize(); |
| 449 | $objStreamBatches = array_chunk($eligible, 200, true); |
| 450 | foreach ($objStreamBatches as $batch) { |
| 451 | $objStm = new ObjectStream(); |
| 452 | $objStmObjNum = $nextObjNum++; |
| 453 | $objStm->objectNumber = $objStmObjNum; |
| 454 | $objStm->generationNumber = 0; |
| 455 | |
| 456 | $index = 0; |
| 457 | foreach ($batch as $objNum => $object) { |
| 458 | $objStm->addObject($object); |
| 459 | $compressedEntries[$objNum] = $objStmObjNum; |
| 460 | $compressedIndices[$objNum] = $index; |
| 461 | $index++; |
| 462 | } |
| 463 | |
| 464 | // Compress the object stream |
| 465 | if ($this->compressStreams) { |
| 466 | $objStm->setFilter(new FlateFilter(), 'FlateDecode'); |
| 467 | } |
| 468 | |
| 469 | // Add to ineligible so it gets written as a normal indirect object |
| 470 | $ineligible[$objStmObjNum] = $objStm; |
| 471 | } |
| 472 | |
| 473 | $serializableObjects = $ineligible; |
| 474 | } |
| 475 | } |
| 476 | |
| 477 | // Write all objects in registration order |
| 478 | foreach ($serializableObjects as $objNum => $object) { |
| 479 | $xref->add($objNum, $offset); |
| 480 | $chunk = $object->toIndirectObject() . "\n"; |
| 481 | $chunks[] = $chunk; |
| 482 | $offset += strlen($chunk); |
| 483 | } |
| 484 | |
| 485 | // Trailer — use encryptor's file ID if encrypting, otherwise derive |
| 486 | // the ID from the body bytes so identical content produces identical |
| 487 | // PDFs. ISO 32000-2 §14.4 specifies that the file identifier "shall |
| 488 | // be based on the contents of the file at the time it was originally |
| 489 | // created", so hashing the body satisfies the spec and removes a |
| 490 | // source of nondeterminism in test/build outputs. |
| 491 | $id = $this->encryptor !== null |
| 492 | ? $this->encryptor->getFileId() |
| 493 | : md5(implode('', $chunks), true); |
| 494 | |
| 495 | $idArray = new PdfArray([ |
| 496 | new PdfString($id, hex: true), |
| 497 | new PdfString($id, hex: true), |
| 498 | ]); |
| 499 | |
| 500 | if ($this->useXRefStream) { |
| 501 | // PDF 1.5+ cross-reference stream — trailer entries are in the stream dict |
| 502 | $xrefStream = new CrossReferenceStream(); |
| 503 | |
| 504 | // Build entries: object 0 is free, then in-use or compressed entries |
| 505 | $xrefStream->addFreeEntry(0, 65535); |
| 506 | $xrefEntries = $xref->getEntries(); |
| 507 | $maxObjNum = max( |
| 508 | $this->registry->getSize() - 1, |
| 509 | $compressedEntries !== [] ? max(array_keys($compressedEntries)) : 0, |
| 510 | $xrefEntries !== [] ? max(array_keys($xrefEntries)) : 0, |
| 511 | ); |
| 512 | for ($i = 1; $i <= $maxObjNum; $i++) { |
| 513 | if (isset($compressedEntries[$i])) { |
| 514 | $xrefStream->addCompressedEntry($compressedEntries[$i], $compressedIndices[$i]); |
| 515 | } elseif (isset($xrefEntries[$i])) { |
| 516 | $xrefStream->addInUseEntry($xrefEntries[$i]); |
| 517 | } else { |
| 518 | $xrefStream->addFreeEntry(0, 0); |
| 519 | } |
| 520 | } |
| 521 | |
| 522 | // The xref stream itself is the next sequential object number. |
| 523 | $xrefStreamObjNum = $maxObjNum + 1; |
| 524 | $xrefStream->objectNumber = $xrefStreamObjNum; |
| 525 | $xrefStream->size = $xrefStreamObjNum + 1; |
| 526 | $xrefStream->root = new PdfReference($this->catalog->objectNumber); |
| 527 | $xrefStream->id = $idArray; |
| 528 | if ($this->info !== null) { |
| 529 | $xrefStream->info = new PdfReference($this->info->objectNumber); |
| 530 | } |
| 531 | if ($this->encryptor !== null) { |
| 532 | $encryptDict = $this->encryptor->getEncryptDictionary(); |
| 533 | $xrefStream->encrypt = new PdfReference($encryptDict->objectNumber); |
| 534 | } |
| 535 | |
| 536 | // Record the xref stream's own offset |
| 537 | $xrefOffset = $offset; |
| 538 | $xrefStream->addInUseEntry($xrefOffset); |
| 539 | |
| 540 | // Compress the xref stream data via setFilter (applied during toPdf) |
| 541 | if ($this->compressStreams) { |
| 542 | $xrefStream->setFilter(new FlateFilter(), 'FlateDecode'); |
| 543 | } |
| 544 | |
| 545 | $chunks[] = $xrefStream->toIndirectObject() . "\n"; |
| 546 | $chunks[] = "startxref\n" . $xrefOffset . "\n"; |
| 547 | $chunks[] = '%%EOF'; |
| 548 | } else { |
| 549 | // Classic cross-reference table + trailer |
| 550 | $xrefOffset = $offset; |
| 551 | $xrefChunk = $xref->build($this->registry->getSize()); |
| 552 | $chunks[] = $xrefChunk; |
| 553 | |
| 554 | $trailer = new TrailerDictionary(new PdfReference($this->catalog->objectNumber)); |
| 555 | $trailer->size = $this->registry->getSize(); |
| 556 | if ($this->info !== null) { |
| 557 | $trailer->info = new PdfReference($this->info->objectNumber); |
| 558 | } |
| 559 | if ($this->encryptor !== null) { |
| 560 | $encryptDict = $this->encryptor->getEncryptDictionary(); |
| 561 | $trailer->encrypt = new PdfReference($encryptDict->objectNumber); |
| 562 | } |
| 563 | $trailer->id = $idArray; |
| 564 | |
| 565 | $chunks[] = "trailer\n" . $trailer->toPdf() . "\n"; |
| 566 | $chunks[] = "startxref\n" . $xrefOffset . "\n"; |
| 567 | $chunks[] = '%%EOF'; |
| 568 | } |
| 569 | |
| 570 | $pdf = implode('', $chunks); |
| 571 | |
| 572 | if ($this->signatureValue !== null && ($this->signer !== null || $this->tsaClient !== null)) { |
| 573 | $pdf = $this->applySignature($pdf); |
| 574 | } |
| 575 | |
| 576 | return $pdf; |
| 577 | } |
| 578 | |
| 579 | /** |
| 580 | * Generate a linearized (web-optimized) PDF per ISO 32000-2 Annex F. |
| 581 | * |
| 582 | * Linearized PDFs place the first page's objects at the front of the |
| 583 | * file so a viewer can display it before downloading the rest. The |
| 584 | * structure is: |
| 585 | * |
| 586 | * 1. Header + linearization parameters dict (object 1) |
| 587 | * 2. First-page cross-reference section |
| 588 | * 3. Catalog, page tree, first page, and its resources |
| 589 | * 4. Hint stream |
| 590 | * 5. Remaining pages and objects |
| 591 | * 6. Main cross-reference section + trailer |
| 592 | * |
| 593 | * @param list<int> $firstPageObjectNumbers Object numbers belonging to the first page |
| 594 | * (page, content streams, fonts, images). |
| 595 | * If empty, auto-detected from object order. |
| 596 | */ |
| 597 | public function generateLinearized(array $firstPageObjectNumbers = []): string |
| 598 | { |
| 599 | if ($this->catalog === null) { |
| 600 | throw new \RuntimeException( |
| 601 | 'PdfFileWriter::generateLinearized() called before setCatalog()', |
| 602 | ); |
| 603 | } |
| 604 | |
| 605 | // Sync catalog /Version for PDF > 1.4 |
| 606 | if ($this->version->isGreaterThan(PdfVersion::V1_4)) { |
| 607 | $this->catalog->version = new PdfName($this->version->value); |
| 608 | } |
| 609 | |
| 610 | // Apply compression |
| 611 | if ($this->compressStreams) { |
| 612 | $this->applyStreamCompression(); |
| 613 | } |
| 614 | |
| 615 | $allObjects = $this->registry->getAll(); |
| 616 | |
| 617 | // Partition objects into first-page set and remaining set. |
| 618 | // The first-page set always includes the catalog and page tree. |
| 619 | $catalogNum = $this->catalog->objectNumber; |
| 620 | $firstPageSet = []; |
| 621 | $remainingSet = []; |
| 622 | |
| 623 | // Build the set of first-page object numbers |
| 624 | $fpNums = array_flip($firstPageObjectNumbers); |
| 625 | // Always include catalog in first-page set |
| 626 | $fpNums[$catalogNum] = true; |
| 627 | |
| 628 | // If no explicit first-page objects given, use heuristic: |
| 629 | // catalog + page tree + first few objects after page tree |
| 630 | if ($firstPageObjectNumbers === []) { |
| 631 | // Include all objects — they'll all be in the "first page" section |
| 632 | // This is a valid minimal linearization where all objects are first-page |
| 633 | foreach ($allObjects as $objNum => $_) { |
| 634 | $fpNums[$objNum] = true; |
| 635 | } |
| 636 | } |
| 637 | |
| 638 | foreach ($allObjects as $objNum => $object) { |
| 639 | if (isset($fpNums[$objNum])) { |
| 640 | $firstPageSet[$objNum] = $object; |
| 641 | } else { |
| 642 | $remainingSet[$objNum] = $object; |
| 643 | } |
| 644 | } |
| 645 | |
| 646 | // Count pages by looking at the page tree kids |
| 647 | $pageCount = 0; |
| 648 | foreach ($allObjects as $object) { |
| 649 | if ($object instanceof \Phpdftk\Pdf\Core\Document\PageTree) { |
| 650 | $pageCount = $object->count; |
| 651 | break; |
| 652 | } |
| 653 | } |
| 654 | if ($pageCount === 0) { |
| 655 | $pageCount = 1; |
| 656 | } |
| 657 | |
| 658 | // Find first page object number |
| 659 | $firstPageObjNum = $catalogNum; // fallback |
| 660 | foreach ($allObjects as $object) { |
| 661 | if ($object instanceof \Phpdftk\Pdf\Core\Document\Page) { |
| 662 | $firstPageObjNum = $object->objectNumber; |
| 663 | break; |
| 664 | } |
| 665 | } |
| 666 | |
| 667 | // === PASS 1: Build the PDF to determine offsets === |
| 668 | // We need two passes because the linearization parameters dict |
| 669 | // contains the total file length and hint stream offsets, which |
| 670 | // are only known after serialization. |
| 671 | |
| 672 | // We'll build the PDF structure, then patch the linearization dict. |
| 673 | |
| 674 | $chunks = []; |
| 675 | |
| 676 | // 1. Header |
| 677 | $chunks[] = '%PDF-' . $this->version->value . "\n"; |
| 678 | $chunks[] = "%\xE2\xE3\xCF\xD3\n"; |
| 679 | $offset = strlen($chunks[0]) + strlen($chunks[1]); |
| 680 | |
| 681 | // 2. Linearization parameters dict (placeholder — will be patched) |
| 682 | $linParams = new \Phpdftk\Pdf\Core\Document\LinearizationParameters(); |
| 683 | $linParams->objectNumber = 0; // will use a synthetic object number |
| 684 | $linParams->generationNumber = 0; |
| 685 | |
| 686 | // Use object number = max + 1 for the linearization dict |
| 687 | $maxObjNum = max(array_keys($allObjects)); |
| 688 | $linObjNum = $maxObjNum + 1; |
| 689 | $linParams->objectNumber = $linObjNum; |
| 690 | |
| 691 | // Serialize a placeholder with max-width numbers (10 digits each) |
| 692 | $linParams->linearized = 1.0; |
| 693 | $linParams->l = 0; // patched later |
| 694 | $linParams->n = $pageCount; |
| 695 | $linParams->o = $firstPageObjNum; |
| 696 | $linParams->e = 0; // patched later |
| 697 | $linParams->t = 0; // patched later |
| 698 | $linParams->h = new \Phpdftk\Pdf\Core\PdfArray([ |
| 699 | new \Phpdftk\Pdf\Core\PdfNumber(0), |
| 700 | new \Phpdftk\Pdf\Core\PdfNumber(0), |
| 701 | ]); |
| 702 | |
| 703 | // Emit the linearization dict with padded numbers so the byte |
| 704 | // offsets don't shift when we patch real values. |
| 705 | $linDictChunk = $this->emitPaddedLinearizationDict($linObjNum, $linParams); |
| 706 | $linDictOffset = $offset; |
| 707 | $chunks[] = $linDictChunk; |
| 708 | $offset += strlen($linDictChunk); |
| 709 | |
| 710 | // 3. First-page objects |
| 711 | $firstPageXref = new CrossReferenceTable(); |
| 712 | |
| 713 | // Record linearization dict in first-page xref |
| 714 | $firstPageXref->add($linObjNum, $linDictOffset); |
| 715 | |
| 716 | foreach ($firstPageSet as $objNum => $object) { |
| 717 | $firstPageXref->add($objNum, $offset); |
| 718 | $chunk = $object->toIndirectObject() . "\n"; |
| 719 | $chunks[] = $chunk; |
| 720 | $offset += strlen($chunk); |
| 721 | } |
| 722 | |
| 723 | $endOfFirstPage = $offset; // /E value |
| 724 | |
| 725 | // 4. Hint stream (minimal — page offset table only) |
| 726 | $hintObjNum = $linObjNum + 1; |
| 727 | $hintStreamOffset = $offset; |
| 728 | $hintData = $this->buildMinimalHintStream($pageCount); |
| 729 | $hintStream = new \Phpdftk\Pdf\Core\Document\HintStream($hintData); |
| 730 | $hintStream->objectNumber = $hintObjNum; |
| 731 | $hintStream->generationNumber = 0; |
| 732 | $hintStream->p = 0; // page offset table starts at byte 0 of stream data |
| 733 | $hintStream->s = strlen($hintData); // shared object table at end (empty) |
| 734 | |
| 735 | $firstPageXref->add($hintObjNum, $offset); |
| 736 | $hintChunk = $hintStream->toIndirectObject() . "\n"; |
| 737 | $chunks[] = $hintChunk; |
| 738 | $offset += strlen($hintChunk); |
| 739 | |
| 740 | // 5. First-page xref section |
| 741 | $firstPageXrefOffset = $offset; |
| 742 | $totalSize = $hintObjNum + 1; // total object count including lin + hint |
| 743 | |
| 744 | // Build xref for first-page objects only (subsection format) |
| 745 | $xrefSection = $this->buildSubsectionXref($firstPageXref, $totalSize); |
| 746 | $chunks[] = $xrefSection; |
| 747 | $offset += strlen($xrefSection); |
| 748 | |
| 749 | // 6. Remaining-page objects |
| 750 | $mainXref = new CrossReferenceTable(); |
| 751 | // Re-record first-page objects for the main xref too |
| 752 | foreach ($firstPageXref->getEntries() as $on => $off) { |
| 753 | $mainXref->add($on, $off); |
| 754 | } |
| 755 | |
| 756 | foreach ($remainingSet as $objNum => $object) { |
| 757 | $mainXref->add($objNum, $offset); |
| 758 | $chunk = $object->toIndirectObject() . "\n"; |
| 759 | $chunks[] = $chunk; |
| 760 | $offset += strlen($chunk); |
| 761 | } |
| 762 | |
| 763 | // 7. Main xref section |
| 764 | $mainXrefOffset = $offset; |
| 765 | $mainXrefChunk = $mainXref->build($totalSize); |
| 766 | $chunks[] = $mainXrefChunk; |
| 767 | $offset += strlen($mainXrefChunk); |
| 768 | |
| 769 | // File ID — content-based (ISO 32000-2 §14.4) so identical linearized |
| 770 | // bodies produce identical PDFs. |
| 771 | $id = md5(implode('', $chunks), true); |
| 772 | $idArray = new \Phpdftk\Pdf\Core\PdfArray([ |
| 773 | new PdfString($id, hex: true), |
| 774 | new PdfString($id, hex: true), |
| 775 | ]); |
| 776 | |
| 777 | // 8. First-page trailer (between first-page xref and remaining objects) |
| 778 | // This trailer has /Prev pointing to the main xref |
| 779 | $firstPageTrailer = new TrailerDictionary(new PdfReference($catalogNum)); |
| 780 | $firstPageTrailer->size = $totalSize; |
| 781 | $firstPageTrailer->prev = $mainXrefOffset; |
| 782 | $firstPageTrailer->id = $idArray; |
| 783 | if ($this->info !== null) { |
| 784 | $firstPageTrailer->info = new PdfReference($this->info->objectNumber); |
| 785 | } |
| 786 | |
| 787 | // Insert the first-page trailer right after the first-page xref |
| 788 | // We need to insert it before the remaining objects |
| 789 | $fpTrailerStr = "trailer\n" . $firstPageTrailer->toPdf() . "\n" |
| 790 | . "startxref\n" . $firstPageXrefOffset . "\n" |
| 791 | . "%%EOF\n"; |
| 792 | |
| 793 | // 9. Main trailer |
| 794 | $mainTrailer = new TrailerDictionary(new PdfReference($catalogNum)); |
| 795 | $mainTrailer->size = $totalSize; |
| 796 | $mainTrailer->id = $idArray; |
| 797 | if ($this->info !== null) { |
| 798 | $mainTrailer->info = new PdfReference($this->info->objectNumber); |
| 799 | } |
| 800 | |
| 801 | $chunks[] = "trailer\n" . $mainTrailer->toPdf() . "\n"; |
| 802 | $chunks[] = "startxref\n" . $mainXrefOffset . "\n"; |
| 803 | $chunks[] = '%%EOF'; |
| 804 | |
| 805 | // Now we need to insert the first-page trailer after the first-page xref. |
| 806 | // Find the index where the first-page xref was emitted and insert after it. |
| 807 | // The structure in chunks is: |
| 808 | // [0] header, [1] binary comment, [2] lin dict, [3..N] first page objects, |
| 809 | // [N+1] hint stream, [N+2] first-page xref, [N+3..] remaining objects, main xref, main trailer |
| 810 | |
| 811 | // Actually, let me restructure: the linearized format requires the first-page |
| 812 | // trailer to come right after the first-page xref, THEN the remaining objects. |
| 813 | // Let me rebuild the chunks array properly. |
| 814 | |
| 815 | // === REBUILD with correct ordering === |
| 816 | $chunks = []; |
| 817 | $offset = 0; |
| 818 | |
| 819 | // Part 1: Header |
| 820 | $header = '%PDF-' . $this->version->value . "\n%\xE2\xE3\xCF\xD3\n"; |
| 821 | $chunks[] = $header; |
| 822 | $offset += strlen($header); |
| 823 | |
| 824 | // Part 2: Linearization dict |
| 825 | $linDictOffset = $offset; |
| 826 | $chunks[] = $linDictChunk; |
| 827 | $offset += strlen($linDictChunk); |
| 828 | |
| 829 | // Part 3: First-page objects |
| 830 | $fpXref = new CrossReferenceTable(); |
| 831 | $fpXref->add($linObjNum, $linDictOffset); |
| 832 | |
| 833 | foreach ($firstPageSet as $objNum => $object) { |
| 834 | $fpXref->add($objNum, $offset); |
| 835 | $chunk = $object->toIndirectObject() . "\n"; |
| 836 | $chunks[] = $chunk; |
| 837 | $offset += strlen($chunk); |
| 838 | } |
| 839 | |
| 840 | $endOfFirstPage = $offset; |
| 841 | |
| 842 | // Part 4: Hint stream |
| 843 | $hintStreamOffset = $offset; |
| 844 | $fpXref->add($hintObjNum, $hintStreamOffset); |
| 845 | $hintChunk = $hintStream->toIndirectObject() . "\n"; |
| 846 | $chunks[] = $hintChunk; |
| 847 | $offset += strlen($hintChunk); |
| 848 | |
| 849 | // Part 5: First-page xref |
| 850 | $fpXrefOffset = $offset; |
| 851 | $fpXrefSection = $this->buildSubsectionXref($fpXref, $totalSize); |
| 852 | $chunks[] = $fpXrefSection; |
| 853 | $offset += strlen($fpXrefSection); |
| 854 | |
| 855 | // Part 6: First-page trailer |
| 856 | $fpTrailer = new TrailerDictionary(new PdfReference($catalogNum)); |
| 857 | $fpTrailer->size = $totalSize; |
| 858 | $fpTrailer->id = $idArray; |
| 859 | if ($this->info !== null) { |
| 860 | $fpTrailer->info = new PdfReference($this->info->objectNumber); |
| 861 | } |
| 862 | |
| 863 | // We'll patch /Prev after we know the main xref offset — for now use placeholder |
| 864 | $fpTrailerStr = "trailer\n" . $fpTrailer->toPdf() . "\n" |
| 865 | . "startxref\n" . $fpXrefOffset . "\n" |
| 866 | . "%%EOF\n"; |
| 867 | $chunks[] = $fpTrailerStr; |
| 868 | $offset += strlen($fpTrailerStr); |
| 869 | |
| 870 | // Part 7: Remaining objects |
| 871 | $mainXref = new CrossReferenceTable(); |
| 872 | // Copy first-page entries to main xref |
| 873 | foreach ($fpXref->getEntries() as $on => $off) { |
| 874 | $mainXref->add($on, $off); |
| 875 | } |
| 876 | |
| 877 | foreach ($remainingSet as $objNum => $object) { |
| 878 | $mainXref->add($objNum, $offset); |
| 879 | $chunk = $object->toIndirectObject() . "\n"; |
| 880 | $chunks[] = $chunk; |
| 881 | $offset += strlen($chunk); |
| 882 | } |
| 883 | |
| 884 | // Part 8: Main xref |
| 885 | $mainXrefOffset = $offset; |
| 886 | $mainXrefChunk = $mainXref->build($totalSize); |
| 887 | $chunks[] = $mainXrefChunk; |
| 888 | $offset += strlen($mainXrefChunk); |
| 889 | |
| 890 | // Part 9: Main trailer — /Prev points to first-page xref |
| 891 | $mainTrailer2 = new TrailerDictionary(new PdfReference($catalogNum)); |
| 892 | $mainTrailer2->size = $totalSize; |
| 893 | $mainTrailer2->id = $idArray; |
| 894 | if ($this->info !== null) { |
| 895 | $mainTrailer2->info = new PdfReference($this->info->objectNumber); |
| 896 | } |
| 897 | |
| 898 | $chunks[] = "trailer\n" . $mainTrailer2->toPdf() . "\n"; |
| 899 | $chunks[] = "startxref\n" . $mainXrefOffset . "\n"; |
| 900 | $chunks[] = '%%EOF'; |
| 901 | |
| 902 | $pdf = implode('', $chunks); |
| 903 | |
| 904 | // === PATCH linearization parameters === |
| 905 | $totalLen = strlen($pdf); |
| 906 | |
| 907 | // Patch /L (file length) |
| 908 | $pdf = $this->patchPaddedNumber($pdf, '/L ', $totalLen); |
| 909 | // Patch /E (end of first page) |
| 910 | $pdf = $this->patchPaddedNumber($pdf, '/E ', $endOfFirstPage); |
| 911 | // Patch /T (main xref offset) |
| 912 | $pdf = $this->patchPaddedNumber($pdf, '/T ', $mainXrefOffset); |
| 913 | // Patch /H hint stream offset and length |
| 914 | $hintLength = strlen($hintChunk); |
| 915 | $pdf = $this->patchHintArray($pdf, $hintStreamOffset, $hintLength); |
| 916 | |
| 917 | return $pdf; |
| 918 | } |
| 919 | |
| 920 | /** |
| 921 | * Emit the linearization dict with padded 10-digit numbers so patching |
| 922 | * doesn't change byte offsets. |
| 923 | */ |
| 924 | private function emitPaddedLinearizationDict(int $objNum, \Phpdftk\Pdf\Core\Document\LinearizationParameters $params): string |
| 925 | { |
| 926 | // Use fixed-width formatting so patching is safe |
| 927 | return sprintf( |
| 928 | "%d 0 obj\n<< /Linearized 1 /L %010d /H [ %010d %010d ] /O %d /E %010d /N %d /T %010d >>\nendobj\n", |
| 929 | $objNum, |
| 930 | $params->l, |
| 931 | 0, // hint offset placeholder |
| 932 | 0, // hint length placeholder |
| 933 | $params->o, |
| 934 | $params->e, |
| 935 | $params->n, |
| 936 | $params->t, |
| 937 | ); |
| 938 | } |
| 939 | |
| 940 | /** |
| 941 | * Patch a 10-digit padded number in the linearization dict. |
| 942 | */ |
| 943 | private function patchPaddedNumber(string $pdf, string $key, int $value): string |
| 944 | { |
| 945 | // Find the key in the linearization dict (first occurrence) |
| 946 | $pos = strpos($pdf, $key); |
| 947 | if ($pos === false) { |
| 948 | return $pdf; |
| 949 | } |
| 950 | $numStart = $pos + strlen($key); |
| 951 | return substr_replace($pdf, sprintf('%010d', $value), $numStart, 10); |
| 952 | } |
| 953 | |
| 954 | /** |
| 955 | * Patch the /H hint array with the actual offset and length. |
| 956 | */ |
| 957 | private function patchHintArray(string $pdf, int $offset, int $length): string |
| 958 | { |
| 959 | $pos = strpos($pdf, '/H [ '); |
| 960 | if ($pos === false) { |
| 961 | return $pdf; |
| 962 | } |
| 963 | $start = $pos + strlen('/H [ '); |
| 964 | // Replace the two 10-digit numbers |
| 965 | $pdf = substr_replace($pdf, sprintf('%010d', $offset), $start, 10); |
| 966 | $pdf = substr_replace($pdf, sprintf('%010d', $length), $start + 11, 10); |
| 967 | return $pdf; |
| 968 | } |
| 969 | |
| 970 | /** |
| 971 | * Build a minimal hint stream for a linearized PDF. |
| 972 | * |
| 973 | * Per ISO 32000-2 §F.4, the page offset hint table has an 11-field |
| 974 | * header (44 bytes) followed by per-page bit-packed entries. |
| 975 | * For simplicity, this builds a minimal table with all deltas = 0. |
| 976 | */ |
| 977 | private function buildMinimalHintStream(int $pageCount): string |
| 978 | { |
| 979 | $bw = new BitWriter(); |
| 980 | |
| 981 | // Page offset hint table header — 11 × 32-bit values |
| 982 | // All minimums are 1, all bit counts are 0 (no per-page variance) |
| 983 | $bw->writeUint32(1); // 1: min objects per page |
| 984 | $bw->writeUint32(0); // 2: largest page object count - min (dummy) |
| 985 | $bw->writeUint32(0); // 3: bit count for object count delta |
| 986 | $bw->writeUint32(1); // 4: min page length |
| 987 | $bw->writeUint32(0); // 5: largest page length - min (dummy) |
| 988 | $bw->writeUint32(0); // 6: bit count for page length delta |
| 989 | $bw->writeUint32(0); // 7: min content stream offset |
| 990 | $bw->writeUint32(0); // 8: bit count for content stream offset delta |
| 991 | $bw->writeUint32(0); // 9: min content stream length |
| 992 | $bw->writeUint32(0); // 10: bit count for content stream length delta |
| 993 | $bw->writeUint32(0); // 11: bit count for shared object refs |
| 994 | |
| 995 | // Per-page entries: with all bit counts = 0, there are no bits to write per page |
| 996 | |
| 997 | $bw->alignToByte(); |
| 998 | return $bw->getData(); |
| 999 | } |
| 1000 | |
| 1001 | /** |
| 1002 | * Build a cross-reference section with a single subsection covering |
| 1003 | * all objects from 0 to $size-1. |
| 1004 | */ |
| 1005 | private function buildSubsectionXref(CrossReferenceTable $xref, int $size): string |
| 1006 | { |
| 1007 | return $xref->build($size); |
| 1008 | } |
| 1009 | |
| 1010 | /** |
| 1011 | * Alias for {@see generate()} — returns the raw PDF bytes as a string. |
| 1012 | */ |
| 1013 | public function toBytes(): string |
| 1014 | { |
| 1015 | return $this->generate(); |
| 1016 | } |
| 1017 | |
| 1018 | /** |
| 1019 | * Write the generated PDF to an open stream resource (anything |
| 1020 | * `fwrite()` accepts: a file handle, `php://memory`, `php://output`, |
| 1021 | * a socket, …). Returns the number of bytes written. |
| 1022 | * |
| 1023 | * @param resource $stream |
| 1024 | */ |
| 1025 | public function writeTo($stream): int |
| 1026 | { |
| 1027 | if (!is_resource($stream)) { |
| 1028 | throw new \InvalidArgumentException( |
| 1029 | 'PdfFileWriter::writeTo() expects an open stream resource', |
| 1030 | ); |
| 1031 | } |
| 1032 | $pdf = $this->generate(); |
| 1033 | $written = fwrite($stream, $pdf); |
| 1034 | if ($written === false) { |
| 1035 | throw new \RuntimeException('Failed to write PDF bytes to stream'); |
| 1036 | } |
| 1037 | return $written; |
| 1038 | } |
| 1039 | |
| 1040 | /** |
| 1041 | * Write the generated PDF to a file, creating parent directories |
| 1042 | * as needed. |
| 1043 | */ |
| 1044 | public function save(string $path): void |
| 1045 | { |
| 1046 | $pdf = $this->generate(); |
| 1047 | LocalFilesystem::writeFile($path, $pdf, createDirectories: true); |
| 1048 | } |
| 1049 | |
| 1050 | /** |
| 1051 | * Apply FlateDecode compression to all PdfStream objects that don't |
| 1052 | * already have a filter set and contain non-empty data. |
| 1053 | */ |
| 1054 | private function applyStreamCompression(): void |
| 1055 | { |
| 1056 | $flate = new FlateFilter(); |
| 1057 | foreach ($this->registry->getAll() as $object) { |
| 1058 | if (!$object instanceof PdfStream) { |
| 1059 | continue; |
| 1060 | } |
| 1061 | // Metadata streams must not be compressed (ISO 32000 §14.3.2, |
| 1062 | // ISO 19005 clause 6.7.2) so they remain searchable/readable. |
| 1063 | if ($object instanceof \Phpdftk\Pdf\Core\Document\MetadataStream) { |
| 1064 | continue; |
| 1065 | } |
| 1066 | // Skip streams that already have a filter configured via setFilter() |
| 1067 | // or an explicit /Filter entry in their dictionary |
| 1068 | if ($object->dictionary->has('Filter')) { |
| 1069 | continue; |
| 1070 | } |
| 1071 | $object->setFilter($flate, 'FlateDecode'); |
| 1072 | } |
| 1073 | } |
| 1074 | |
| 1075 | /** |
| 1076 | * Post-process a fully serialized PDF to fill in a signature's |
| 1077 | * /ByteRange and /Contents placeholders with real values. |
| 1078 | * |
| 1079 | * The two-pass approach is required because /ByteRange must describe |
| 1080 | * the exact byte offsets of the signed data, but those offsets are |
| 1081 | * only known after the full PDF has been serialized. The placeholders |
| 1082 | * (installed by setSigner/setTimestamper) are fixed-width so that |
| 1083 | * patching them does not shift any byte offsets. |
| 1084 | * |
| 1085 | * The signed data is the concatenation of the two byte ranges |
| 1086 | * (everything before and after the /Contents hex value), which the |
| 1087 | * signer or TSA client hashes and signs. The resulting DER blob is |
| 1088 | * hex-encoded and zero-padded into the /Contents placeholder. |
| 1089 | */ |
| 1090 | private function applySignature(string $pdf): string |
| 1091 | { |
| 1092 | $placeholderBytes = $this->signaturePlaceholderBytes; |
| 1093 | $zerosHex = str_repeat('0', $placeholderBytes * 2); |
| 1094 | $contentsMarker = '/Contents <' . $zerosHex . '>'; |
| 1095 | |
| 1096 | $contentsPos = strpos($pdf, $contentsMarker); |
| 1097 | if ($contentsPos === false) { |
| 1098 | throw new \RuntimeException('Signature /Contents placeholder not found in serialized PDF'); |
| 1099 | } |
| 1100 | |
| 1101 | // Position of the '<' opening the hex string. |
| 1102 | $valueStart = $contentsPos + strlen('/Contents '); |
| 1103 | // Position *after* the '>' closing the hex string. |
| 1104 | $valueEnd = $valueStart + strlen('<' . $zerosHex . '>'); |
| 1105 | $totalLen = strlen($pdf); |
| 1106 | |
| 1107 | // Byte range covers everything except the /Contents value itself. |
| 1108 | $byteRange = [ |
| 1109 | 0, |
| 1110 | $valueStart, |
| 1111 | $valueEnd, |
| 1112 | $totalLen - $valueEnd, |
| 1113 | ]; |
| 1114 | |
| 1115 | $brPlaceholder = '[ 0000000000 0000000000 0000000000 0000000000 ]'; |
| 1116 | $brPos = strpos($pdf, $brPlaceholder); |
| 1117 | if ($brPos === false) { |
| 1118 | throw new \RuntimeException('Signature /ByteRange placeholder not found in serialized PDF'); |
| 1119 | } |
| 1120 | $brReplacement = sprintf( |
| 1121 | '[ %010d %010d %010d %010d ]', |
| 1122 | $byteRange[0], |
| 1123 | $byteRange[1], |
| 1124 | $byteRange[2], |
| 1125 | $byteRange[3], |
| 1126 | ); |
| 1127 | if (strlen($brReplacement) !== strlen($brPlaceholder)) { |
| 1128 | throw new \RuntimeException('ByteRange replacement length mismatch'); |
| 1129 | } |
| 1130 | $pdf = substr_replace($pdf, $brReplacement, $brPos, strlen($brPlaceholder)); |
| 1131 | |
| 1132 | // Feed the signer/TSA the concatenation of the two byte ranges. |
| 1133 | $signedData = substr($pdf, 0, $valueStart) . substr($pdf, $valueEnd); |
| 1134 | |
| 1135 | if ($this->signer !== null) { |
| 1136 | $der = $this->signer->sign($signedData); |
| 1137 | } elseif ($this->tsaClient !== null) { |
| 1138 | // Document-level timestamp (DocTimeStamp) — no PKCS#7 signature, |
| 1139 | // just an RFC 3161 timestamp token over the byte ranges. |
| 1140 | $der = $this->tsaClient->timestamp($signedData); |
| 1141 | } else { |
| 1142 | throw new \RuntimeException('No signer or TSA client configured'); |
| 1143 | } |
| 1144 | |
| 1145 | $derHex = bin2hex($der); |
| 1146 | if (strlen($derHex) > $placeholderBytes * 2) { |
| 1147 | throw new \RuntimeException(sprintf( |
| 1148 | 'Signature DER (%d bytes) exceeds placeholder capacity (%d bytes); ' |
| 1149 | . 'call setSigner() with a larger $placeholderBytes', |
| 1150 | strlen($der), |
| 1151 | $placeholderBytes, |
| 1152 | )); |
| 1153 | } |
| 1154 | $derHexPadded = str_pad($derHex, $placeholderBytes * 2, '0', STR_PAD_RIGHT); |
| 1155 | |
| 1156 | // Patch /Contents: replace the zero-padded hex block in place. |
| 1157 | // valueStart points at '<'; the hex content begins at valueStart + 1. |
| 1158 | $pdf = substr_replace($pdf, $derHexPadded, $valueStart + 1, $placeholderBytes * 2); |
| 1159 | |
| 1160 | return $pdf; |
| 1161 | } |
| 1162 | |
| 1163 | /** |
| 1164 | * Check version requirements and deprecation status for an object |
| 1165 | * and its inline Serializable children. |
| 1166 | */ |
| 1167 | private function checkVersionRequirements(PdfObject $object): void |
| 1168 | { |
| 1169 | // Check deprecation |
| 1170 | $deprecation = VersionRequirementResolver::getDeprecation($object); |
| 1171 | if ($deprecation !== null) { |
| 1172 | $msg = sprintf( |
| 1173 | '%s is deprecated since PDF %s%s', |
| 1174 | $object::class, |
| 1175 | $deprecation->since, |
| 1176 | $deprecation->replacement ? "; use {$deprecation->replacement} instead" : '', |
| 1177 | ); |
| 1178 | $this->versionWarnings[] = $msg; |
| 1179 | if ($this->deprecationHandler !== null) { |
| 1180 | ($this->deprecationHandler)($msg); |
| 1181 | } |
| 1182 | |
| 1183 | $this->enforceRemoval($object::class, $deprecation); |
| 1184 | } |
| 1185 | |
| 1186 | // Ceiling mode: strip incompatible properties instead of bumping |
| 1187 | if ($this->ceilingVersion !== null) { |
| 1188 | $this->applyCeilingStripping($object); |
| 1189 | return; |
| 1190 | } |
| 1191 | |
| 1192 | // Check version requirement (class + non-null properties) |
| 1193 | $required = VersionRequirementResolver::getEffectiveRequirement($object); |
| 1194 | $this->applyVersionRequirement($required, $object::class); |
| 1195 | |
| 1196 | // Walk public Serializable properties for inline children |
| 1197 | $ref = new \ReflectionClass($object); |
| 1198 | foreach ($ref->getProperties(\ReflectionProperty::IS_PUBLIC) as $prop) { |
| 1199 | if (!$prop->isInitialized($object)) { |
| 1200 | continue; |
| 1201 | } |
| 1202 | $value = $prop->getValue($object); |
| 1203 | if ($value instanceof Serializable && !$value instanceof PdfObject) { |
| 1204 | $childRequired = VersionRequirementResolver::getEffectiveRequirement($value); |
| 1205 | $this->applyVersionRequirement($childRequired, $value::class); |
| 1206 | |
| 1207 | $childDeprecation = VersionRequirementResolver::getDeprecation($value); |
| 1208 | if ($childDeprecation !== null) { |
| 1209 | $msg = sprintf( |
| 1210 | '%s is deprecated since PDF %s%s', |
| 1211 | $value::class, |
| 1212 | $childDeprecation->since, |
| 1213 | $childDeprecation->replacement ? "; use {$childDeprecation->replacement} instead" : '', |
| 1214 | ); |
| 1215 | $this->versionWarnings[] = $msg; |
| 1216 | if ($this->deprecationHandler !== null) { |
| 1217 | ($this->deprecationHandler)($msg); |
| 1218 | } |
| 1219 | |
| 1220 | $this->enforceRemoval($value::class, $childDeprecation); |
| 1221 | } |
| 1222 | } |
| 1223 | } |
| 1224 | } |
| 1225 | |
| 1226 | /** |
| 1227 | * Enforce removal: throw if the feature has a removedIn version and |
| 1228 | * the target version is at or above it (in strict deprecation or ceiling mode). |
| 1229 | */ |
| 1230 | private function enforceRemoval(string $class, \Phpdftk\Pdf\Core\DeprecatedPdfFeature $deprecation): void |
| 1231 | { |
| 1232 | if ($deprecation->removedInVersion === null) { |
| 1233 | return; |
| 1234 | } |
| 1235 | |
| 1236 | $targetVersion = $this->ceilingVersion ?? $this->version; |
| 1237 | if ($targetVersion->isAtLeast($deprecation->removedInVersion)) { |
| 1238 | if ($this->strictDeprecation || $this->ceilingVersion !== null) { |
| 1239 | throw new DeprecatedFeatureException($class, $deprecation, $targetVersion); |
| 1240 | } |
| 1241 | } |
| 1242 | } |
| 1243 | |
| 1244 | /** |
| 1245 | * Apply ceiling-mode stripping: check class-level compatibility and |
| 1246 | * strip incompatible properties. |
| 1247 | */ |
| 1248 | private function applyCeilingStripping(PdfObject $object): void |
| 1249 | { |
| 1250 | $ceiling = $this->ceilingVersion; |
| 1251 | |
| 1252 | // Class-level check — if the entire object is above ceiling, refuse |
| 1253 | $classReq = VersionRequirementResolver::getClassRequirement($object); |
| 1254 | if ($classReq !== null && $classReq->isGreaterThan($ceiling)) { |
| 1255 | throw new CeilingVersionException($object::class, $classReq, $ceiling); |
| 1256 | } |
| 1257 | |
| 1258 | // PdfVersionAware runtime check |
| 1259 | if ($object instanceof \Phpdftk\Pdf\Core\PdfVersionAware) { |
| 1260 | $runtimeReq = $object->getMinimumPdfVersion(); |
| 1261 | if ($runtimeReq !== null && $runtimeReq->isGreaterThan($ceiling)) { |
| 1262 | throw new CeilingVersionException($object::class, $runtimeReq, $ceiling); |
| 1263 | } |
| 1264 | } |
| 1265 | |
| 1266 | // Strip incompatible properties |
| 1267 | $stripped = VersionRequirementResolver::stripIncompatibleProperties($object, $ceiling); |
| 1268 | foreach ($stripped as $propName) { |
| 1269 | $this->versionWarnings[] = sprintf( |
| 1270 | 'Stripped property %s::$%s (requires PDF > %s)', |
| 1271 | $object::class, |
| 1272 | $propName, |
| 1273 | $ceiling->value, |
| 1274 | ); |
| 1275 | } |
| 1276 | |
| 1277 | // Walk inline Serializable children — strip their properties too |
| 1278 | $ref = new \ReflectionClass($object); |
| 1279 | foreach ($ref->getProperties(\ReflectionProperty::IS_PUBLIC) as $prop) { |
| 1280 | if (!$prop->isInitialized($object)) { |
| 1281 | continue; |
| 1282 | } |
| 1283 | $value = $prop->getValue($object); |
| 1284 | if ($value instanceof Serializable && !$value instanceof PdfObject) { |
| 1285 | $childClassReq = VersionRequirementResolver::getClassRequirement($value); |
| 1286 | if ($childClassReq !== null && $childClassReq->isGreaterThan($ceiling)) { |
| 1287 | // Nullify the parent property that holds this incompatible child |
| 1288 | $object->{$prop->getName()} = null; |
| 1289 | $this->versionWarnings[] = sprintf( |
| 1290 | 'Stripped inline %s from %s (requires PDF > %s)', |
| 1291 | $value::class, |
| 1292 | $object::class, |
| 1293 | $ceiling->value, |
| 1294 | ); |
| 1295 | } else { |
| 1296 | $childStripped = VersionRequirementResolver::stripIncompatibleProperties($value, $ceiling); |
| 1297 | foreach ($childStripped as $childPropName) { |
| 1298 | $this->versionWarnings[] = sprintf( |
| 1299 | 'Stripped property %s::$%s (requires PDF > %s)', |
| 1300 | $value::class, |
| 1301 | $childPropName, |
| 1302 | $ceiling->value, |
| 1303 | ); |
| 1304 | } |
| 1305 | } |
| 1306 | } |
| 1307 | } |
| 1308 | } |
| 1309 | |
| 1310 | /** |
| 1311 | * Apply a version requirement: auto-bump or throw in strict mode. |
| 1312 | */ |
| 1313 | private function applyVersionRequirement(PdfVersion $required, string $source): void |
| 1314 | { |
| 1315 | if ($required->isGreaterThan($this->version)) { |
| 1316 | if ($this->strictVersionMode) { |
| 1317 | throw new VersionRequirementException($source, $required, $this->version); |
| 1318 | } |
| 1319 | $this->version = $required; |
| 1320 | $this->versionWarnings[] = sprintf( |
| 1321 | 'Auto-bumped PDF version to %s for %s', |
| 1322 | $required->value, |
| 1323 | $source, |
| 1324 | ); |
| 1325 | } |
| 1326 | } |
| 1327 | } |