Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
91.60% covered (success)
91.60%
229 / 250
72.22% covered (warning)
72.22%
13 / 18
CRAP
0.00% covered (danger)
0.00%
0 / 1
IncrementalWriter
91.60% covered (success)
91.60%
229 / 250
72.22% covered (warning)
72.22%
13 / 18
90.38
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 setEncryption
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 fromReader
92.59% covered (success)
92.59%
25 / 27
0.00% covered (danger)
0.00%
0 / 1
6.01
 getPdfVersion
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 wasVersionBumped
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setStrictVersionMode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setDeprecationHandler
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setStrictDeprecation
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getVersionWarnings
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 addModifiedObject
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
2
 deleteObject
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 addNewObject
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 generate
95.73% covered (success)
95.73%
112 / 117
0.00% covered (danger)
0.00%
0 / 1
38
 save
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 checkVersionRequirements
72.50% covered (warning)
72.50%
29 / 40
0.00% covered (danger)
0.00%
0 / 1
14.99
 enforceRemoval
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
4.25
 buildIncrementalXref
100.00% covered (success)
100.00%
26 / 26
100.00% covered (success)
100.00%
1 / 1
9
 findStartxrefOffset
77.78% covered (warning)
77.78%
7 / 9
0.00% covered (danger)
0.00%
0 / 1
3.10
1<?php
2
3declare(strict_types=1);
4
5namespace Phpdftk\Pdf\Core\File;
6
7use Phpdftk\Filesystem\LocalFilesystem;
8use Phpdftk\Filters\FlateFilter;
9use Phpdftk\Pdf\Core\Content\ContentStream;
10use Phpdftk\Pdf\Core\Document\CrossReferenceStream;
11use Phpdftk\Pdf\Core\Security\PdfEncryptor;
12use Phpdftk\Pdf\Core\PdfArray;
13use Phpdftk\Pdf\Core\PdfName;
14use Phpdftk\Pdf\Core\PdfNumber;
15use Phpdftk\Pdf\Core\PdfObject;
16use Phpdftk\Pdf\Core\PdfReference;
17use Phpdftk\Pdf\Core\PdfStream;
18use Phpdftk\Pdf\Core\PdfString;
19use Phpdftk\Pdf\Core\PdfVersion;
20use Phpdftk\Pdf\Core\Serializable;
21
22/**
23 * Incremental update writer — ISO 32000-2 §7.5.6.
24 *
25 * Appends modified/new objects to an existing PDF without rewriting
26 * the original file content. The result is a valid PDF where the
27 * original bytes are preserved intact, followed by:
28 *
29 *   1. New/modified indirect objects
30 *   2. A new cross-reference section (covering only the changed objects)
31 *   3. A new trailer with `/Prev` pointing to the original xref offset
32 *   4. `startxref` + `%%EOF`
33 *
34 * This preserves existing digital signatures and is significantly
35 * more efficient than a full rewrite for small modifications (e.g.,
36 * form filling, annotation additions).
37 */
38final class IncrementalWriter
39{
40    /** @var array<int, PdfObject> Objects to append (objNum => object) */
41    private array $objects = [];
42
43    /** @var list<int> Object numbers to mark as free (deleted) */
44    private array $deletedObjects = [];
45
46    private int $nextObjectNumber;
47    private bool $compressStreams;
48    private bool $useXRefStream;
49    private ?PdfEncryptor $encryptor = null;
50    private PdfVersion $version;
51    private bool $versionBumped = false;
52    private bool $strictVersionMode = false;
53    /** @var list<string> */
54    private array $versionWarnings = [];
55    /** @var (\Closure(string): void)|null */
56    private ?\Closure $deprecationHandler = null;
57    private bool $strictDeprecation = false;
58
59    /**
60     * @param string $originalPdf The complete bytes of the original PDF
61     * @param int    $originalSize Total object count from the original trailer /Size
62     * @param int    $originalXrefOffset The startxref byte offset from the original PDF
63     * @param PdfReference $rootRef The /Root reference from the original trailer
64     * @param PdfReference|null $infoRef The /Info reference (if present)
65     * @param PdfArray|null $idArray The /ID array from the original trailer
66     * @param PdfReference|null $encryptRef The /Encrypt reference (if present)
67     */
68    public function __construct(
69        private readonly string $originalPdf,
70        private readonly int $originalSize,
71        private readonly int $originalXrefOffset,
72        private readonly PdfReference $rootRef,
73        private readonly ?PdfReference $infoRef = null,
74        private readonly ?PdfArray $idArray = null,
75        private readonly ?PdfReference $encryptRef = null,
76        bool $compressStreams = true,
77        bool $useXRefStream = false,
78        PdfVersion $version = PdfVersion::V1_7,
79    ) {
80        $this->nextObjectNumber = $originalSize;
81        $this->compressStreams = $compressStreams;
82        $this->useXRefStream = $useXRefStream;
83        $this->version = $version;
84    }
85
86    /**
87     * Configure encryption for new/modified objects in the incremental update.
88     *
89     * The encryptor must use the same key as the original PDF's encryption.
90     * New and modified objects will be encrypted before serialization.
91     */
92    public function setEncryption(PdfEncryptor $encryptor): void
93    {
94        $this->encryptor = $encryptor;
95    }
96
97    /**
98     * Create an IncrementalWriter from a PdfReader instance.
99     *
100     * Extracts the necessary metadata (size, xref offset, root, info,
101     * ID, encrypt) from the reader's trailer.
102     */
103    public static function fromReader(
104        \Phpdftk\Pdf\Reader\PdfReader $reader,
105        string $originalPdf,
106        bool $compressStreams = true,
107        bool $useXRefStream = false,
108    ): self {
109        $trailer = $reader->getTrailer();
110
111        // Extract original startxref offset
112        $xrefOffset = self::findStartxrefOffset($originalPdf);
113
114        // Extract /Size
115        $sizeVal = $trailer->get('Size');
116        $size = $sizeVal instanceof \Phpdftk\Pdf\Core\PdfNumber
117            ? (int) $sizeVal->toPdf()
118            : 0;
119
120        // Extract /Root
121        $root = $trailer->get('Root');
122        if (!$root instanceof PdfReference) {
123            throw new \RuntimeException('Original PDF trailer missing /Root reference');
124        }
125
126        // Extract optional /Info
127        $info = $trailer->get('Info');
128        $infoRef = $info instanceof PdfReference ? $info : null;
129
130        // Extract optional /ID
131        $id = $trailer->get('ID');
132        $idArray = $id instanceof PdfArray ? $id : null;
133
134        // Extract optional /Encrypt
135        $encrypt = $trailer->get('Encrypt');
136        $encryptRef = $encrypt instanceof PdfReference ? $encrypt : null;
137
138        return new self(
139            $originalPdf,
140            $size,
141            $xrefOffset,
142            $root,
143            $infoRef,
144            $idArray,
145            $encryptRef,
146            $compressStreams,
147            $useXRefStream,
148            $reader->getPdfVersion(),
149        );
150    }
151
152    public function getPdfVersion(): PdfVersion
153    {
154        return $this->version;
155    }
156
157    /**
158     * Whether the version was auto-bumped during object registration.
159     *
160     * When true, callers that have access to the Catalog should update
161     * its /Version entry via an incremental update, since the original
162     * PDF header bytes are immutable.
163     */
164    public function wasVersionBumped(): bool
165    {
166        return $this->versionBumped;
167    }
168
169    public function setStrictVersionMode(bool $strict = true): void
170    {
171        $this->strictVersionMode = $strict;
172    }
173
174    public function setDeprecationHandler(\Closure $handler): void
175    {
176        $this->deprecationHandler = $handler;
177    }
178
179    public function setStrictDeprecation(bool $strict = true): void
180    {
181        $this->strictDeprecation = $strict;
182    }
183
184    /** @return list<string> */
185    public function getVersionWarnings(): array
186    {
187        return $this->versionWarnings;
188    }
189
190    /**
191     * Add a modified object (preserving its original object number).
192     *
193     * The object must already have its `objectNumber` set to the
194     * original value from the PDF being updated.
195     */
196    public function addModifiedObject(PdfObject $object): void
197    {
198        if ($object->objectNumber <= 0) {
199            throw new \InvalidArgumentException(
200                'Modified object must have its original objectNumber set',
201            );
202        }
203        $this->checkVersionRequirements($object);
204        $this->objects[$object->objectNumber] = $object;
205    }
206
207    /**
208     * Mark an object as deleted (free) in the incremental update.
209     *
210     * The object will appear as a free entry in the new xref section.
211     */
212    public function deleteObject(int $objNum): void
213    {
214        if ($objNum <= 0) {
215            throw new \InvalidArgumentException('Cannot delete object 0');
216        }
217        $this->deletedObjects[] = $objNum;
218    }
219
220    /**
221     * Add a new object (assigns a new sequential object number).
222     *
223     * @return PdfReference Reference to the newly assigned object
224     */
225    public function addNewObject(PdfObject $object): PdfReference
226    {
227        $this->checkVersionRequirements($object);
228        $objNum = $this->nextObjectNumber++;
229        $object->objectNumber = $objNum;
230        $object->generationNumber = 0;
231        $this->objects[$objNum] = $object;
232        return new PdfReference($objNum);
233    }
234
235    /**
236     * Generate the incremental update and return the complete PDF
237     * (original bytes + appended update).
238     */
239    public function generate(): string
240    {
241        if (empty($this->objects) && empty($this->deletedObjects)) {
242            return $this->originalPdf;
243        }
244
245        $chunks = [];
246
247        // Start with the original PDF bytes
248        $chunks[] = $this->originalPdf;
249
250        // Ensure we start on a new line after %%EOF
251        if (!str_ends_with($this->originalPdf, "\n")) {
252            $chunks[] = "\n";
253        }
254
255        $offset = strlen(implode('', $chunks));
256
257        // Optionally compress streams
258        if ($this->compressStreams) {
259            $flate = new FlateFilter();
260            foreach ($this->objects as $object) {
261                if ($object instanceof ContentStream) {
262                    // Materialize operators
263                    $ops = $object->getOperators();
264                    if (!empty($ops)) {
265                        $object->data = implode("\n", $ops);
266                        $object->clearOperators();
267                    }
268                }
269                if ($object instanceof PdfStream
270                    && !$object->dictionary->has('Filter')
271                    && $object->data !== ''
272                ) {
273                    $object->setFilter($flate, 'FlateDecode');
274                }
275            }
276        }
277
278        // Write modified/new objects and build xref entries.
279        // When encryption is active, clone objects and encrypt the clones
280        // so originals stay untouched (same pattern as PdfFileWriter).
281        $objectsToWrite = $this->objects;
282        if ($this->encryptor !== null) {
283            $flate = $this->compressStreams ? new FlateFilter() : null;
284            $clones = [];
285            foreach ($objectsToWrite as $objNum => $object) {
286                $clone = clone $object;
287                if ($clone instanceof PdfStream) {
288                    $clone->dictionary = clone $clone->dictionary;
289                }
290                // Materialize ContentStream operators into data
291                if ($clone instanceof ContentStream) {
292                    $ops = $clone->getOperators();
293                    if (!empty($ops)) {
294                        $clone->data = implode("\n", $ops);
295                        $clone->clearOperators();
296                    }
297                }
298                // Compress before encrypting (spec order: compress → encrypt)
299                if (
300                    $flate !== null
301                    && $clone instanceof PdfStream
302                    && !$clone->dictionary->has('Filter')
303                    && $clone->data !== ''
304                ) {
305                    $clone->data = $flate->encode($clone->data);
306                    $clone->dictionary->set('Filter', new PdfName('FlateDecode'));
307                }
308                $this->encryptor->encryptObject($clone);
309                $clones[$objNum] = $clone;
310            }
311            $objectsToWrite = $clones;
312        }
313
314        $xrefEntries = [];
315        foreach ($objectsToWrite as $objNum => $object) {
316            $xrefEntries[$objNum] = $offset;
317            $chunk = $object->toIndirectObject() . "\n";
318            $chunks[] = $chunk;
319            $offset += strlen($chunk);
320        }
321
322        // Compute new /Size
323        $newSize = max($this->nextObjectNumber, $this->originalSize);
324        foreach (array_keys($xrefEntries) as $objNum) {
325            $newSize = max($newSize, $objNum + 1);
326        }
327
328        // Updated /ID — first element is preserved (permanent identifier),
329        // second element is recomputed from the incremental body bytes so
330        // identical updates produce identical output (ISO 32000-2 §14.4).
331        $newIdArray = null;
332        if ($this->idArray !== null) {
333            $firstId = $this->idArray->items[0] ?? new PdfString('', hex: true);
334            $secondId = new PdfString(md5(implode('', $chunks), true), hex: true);
335            $newIdArray = new PdfArray([$firstId, $secondId]);
336        }
337
338        if ($this->useXRefStream) {
339            // PDF 1.5+ cross-reference stream for the incremental update
340            $xrefStream = new CrossReferenceStream();
341
342            // Add entries for modified/new objects
343            foreach ($xrefEntries as $objNum => $byteOffset) {
344                $xrefStream->addInUseEntry($byteOffset);
345            }
346            // Add free entries for deleted objects
347            foreach ($this->deletedObjects as $delObjNum) {
348                $xrefStream->addFreeEntry(0, 0);
349            }
350
351            // The xref stream itself is a new object
352            $xrefStreamObjNum = $newSize;
353            $xrefStream->objectNumber = $xrefStreamObjNum;
354            $newSize = $xrefStreamObjNum + 1;
355            $xrefStream->size = $newSize;
356            $xrefStream->prev = $this->originalXrefOffset;
357            $xrefStream->root = $this->rootRef;
358            if ($this->infoRef !== null) {
359                $xrefStream->info = $this->infoRef;
360            }
361            if ($this->encryptRef !== null) {
362                $xrefStream->encrypt = $this->encryptRef;
363            }
364            if ($newIdArray !== null) {
365                $xrefStream->id = $newIdArray;
366            }
367
368            // Build /Index array with subsection ranges
369            $allObjNums = array_keys($xrefEntries);
370            foreach ($this->deletedObjects as $d) {
371                $allObjNums[] = $d;
372            }
373            $allObjNums[] = $xrefStreamObjNum;
374            sort($allObjNums);
375
376            $indexPairs = [];
377            $currentStart = -1;
378            $currentCount = 0;
379            $lastNum = -2;
380            foreach ($allObjNums as $num) {
381                if ($num !== $lastNum + 1) {
382                    if ($currentCount > 0) {
383                        $indexPairs[] = new PdfNumber($currentStart);
384                        $indexPairs[] = new PdfNumber($currentCount);
385                    }
386                    $currentStart = $num;
387                    $currentCount = 1;
388                } else {
389                    $currentCount++;
390                }
391                $lastNum = $num;
392            }
393            $indexPairs[] = new PdfNumber($currentStart);
394            $indexPairs[] = new PdfNumber($currentCount);
395            $xrefStream->index = new PdfArray($indexPairs);
396
397            // Record the xref stream's own offset and add its entry
398            $xrefOffset = $offset;
399            $xrefStream->addInUseEntry($xrefOffset);
400
401            if ($this->compressStreams) {
402                $xrefStream->setFilter(new FlateFilter(), 'FlateDecode');
403            }
404
405            $chunks[] = $xrefStream->toIndirectObject() . "\n";
406            $chunks[] = "startxref\n" . $xrefOffset . "\n";
407            $chunks[] = '%%EOF';
408        } else {
409            // Classic cross-reference table
410            $xrefOffset = $offset;
411            $xrefChunk = $this->buildIncrementalXref($xrefEntries);
412            $chunks[] = $xrefChunk;
413
414            $trailer = new TrailerDictionary($this->rootRef);
415            $trailer->size = $newSize;
416            $trailer->prev = $this->originalXrefOffset;
417
418            if ($this->infoRef !== null) {
419                $trailer->info = $this->infoRef;
420            }
421            if ($this->encryptRef !== null) {
422                $trailer->encrypt = $this->encryptRef;
423            }
424            if ($newIdArray !== null) {
425                $trailer->id = $newIdArray;
426            }
427
428            $chunks[] = "trailer\n" . $trailer->toPdf() . "\n";
429            $chunks[] = "startxref\n" . $xrefOffset . "\n";
430            $chunks[] = '%%EOF';
431        }
432
433        return implode('', $chunks);
434    }
435
436    /**
437     * Write the incrementally updated PDF to a file.
438     */
439    public function save(string $path): void
440    {
441        LocalFilesystem::writeFile($path, $this->generate(), createDirectories: true);
442    }
443
444    /**
445     * Check version requirements and deprecation status for an object.
446     */
447    private function checkVersionRequirements(PdfObject $object): void
448    {
449        $deprecation = VersionRequirementResolver::getDeprecation($object);
450        if ($deprecation !== null) {
451            $msg = sprintf(
452                '%s is deprecated since PDF %s%s',
453                $object::class,
454                $deprecation->since,
455                $deprecation->replacement ? "; use {$deprecation->replacement} instead" : '',
456            );
457            $this->versionWarnings[] = $msg;
458            if ($this->deprecationHandler !== null) {
459                ($this->deprecationHandler)($msg);
460            }
461
462            $this->enforceRemoval($object::class, $deprecation);
463        }
464
465        $required = VersionRequirementResolver::getEffectiveRequirement($object);
466        if ($required->isGreaterThan($this->version)) {
467            if ($this->strictVersionMode) {
468                throw new VersionRequirementException($object::class, $required, $this->version);
469            }
470            $this->versionWarnings[] = sprintf(
471                'Auto-bumped PDF version to %s for %s',
472                $required->value,
473                $object::class,
474            );
475            $this->version = $required;
476            $this->versionBumped = true;
477        }
478
479        // Walk public Serializable properties for inline children
480        $ref = new \ReflectionClass($object);
481        foreach ($ref->getProperties(\ReflectionProperty::IS_PUBLIC) as $prop) {
482            if (!$prop->isInitialized($object)) {
483                continue;
484            }
485            $value = $prop->getValue($object);
486            if ($value instanceof Serializable && !$value instanceof PdfObject) {
487                $childRequired = VersionRequirementResolver::getEffectiveRequirement($value);
488                if ($childRequired->isGreaterThan($this->version)) {
489                    if ($this->strictVersionMode) {
490                        throw new VersionRequirementException($value::class, $childRequired, $this->version);
491                    }
492                    $this->versionWarnings[] = sprintf(
493                        'Auto-bumped PDF version to %s for %s',
494                        $childRequired->value,
495                        $value::class,
496                    );
497                    $this->version = $childRequired;
498                    $this->versionBumped = true;
499                }
500            }
501        }
502    }
503
504    /**
505     * Enforce removal: throw if the feature has a removedIn version and
506     * the target version is at or above it (in strict deprecation mode).
507     */
508    private function enforceRemoval(string $class, \Phpdftk\Pdf\Core\DeprecatedPdfFeature $deprecation): void
509    {
510        if ($deprecation->removedInVersion === null) {
511            return;
512        }
513
514        if ($this->version->isAtLeast($deprecation->removedInVersion) && $this->strictDeprecation) {
515            throw new DeprecatedFeatureException($class, $deprecation, $this->version);
516        }
517    }
518
519    /**
520     * Build a cross-reference table covering only the updated objects.
521     *
522     * Uses subsection format: each contiguous range of object numbers
523     * gets its own subsection header.
524     *
525     * @param array<int, int> $entries objNum => byte offset
526     */
527    private function buildIncrementalXref(array $entries): string
528    {
529        // Merge deleted objects as free entries (type 'f')
530        $allEntries = [];
531        foreach ($entries as $objNum => $byteOffset) {
532            $allEntries[$objNum] = sprintf("%010d 00000 n \r\n", $byteOffset);
533        }
534        foreach ($this->deletedObjects as $objNum) {
535            // Free entry: next free obj = 0, generation = 0
536            $allEntries[$objNum] = sprintf("%010d 00000 f \r\n", 0);
537        }
538        ksort($allEntries);
539
540        $xref = "xref\n";
541
542        // Group into contiguous subsections
543        $subsections = [];
544        $currentStart = -1;
545        $currentEntries = [];
546        $lastObjNum = -2;
547
548        foreach ($allEntries as $objNum => $line) {
549            if ($objNum !== $lastObjNum + 1) {
550                if (!empty($currentEntries)) {
551                    $subsections[] = [$currentStart, $currentEntries];
552                }
553                $currentStart = $objNum;
554                $currentEntries = [];
555            }
556            $currentEntries[] = $line;
557            $lastObjNum = $objNum;
558        }
559        if (!empty($currentEntries)) {
560            $subsections[] = [$currentStart, $currentEntries];
561        }
562
563        foreach ($subsections as [$start, $entryLines]) {
564            $xref .= sprintf("%d %d\n", $start, count($entryLines));
565            foreach ($entryLines as $line) {
566                $xref .= $line;
567            }
568        }
569
570        return $xref;
571    }
572
573    /**
574     * Find the startxref byte offset from the end of a PDF.
575     */
576    private static function findStartxrefOffset(string $pdf): int
577    {
578        $tailLen = min(1024, strlen($pdf));
579        $tail = substr($pdf, -$tailLen);
580        $pos = strrpos($tail, 'startxref');
581        if ($pos === false) {
582            throw new \RuntimeException('Cannot find startxref in PDF');
583        }
584        $after = substr($tail, $pos + strlen('startxref'));
585        if (!preg_match('/\s+(\d+)/', $after, $m)) {
586            throw new \RuntimeException('Cannot parse startxref offset');
587        }
588        return (int) $m[1];
589    }
590}