Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
91.38% |
106 / 116 |
|
50.00% |
2 / 4 |
CRAP | |
0.00% |
0 / 1 |
| Shaper | |
91.38% |
106 / 116 |
|
50.00% |
2 / 4 |
30.58 | |
0.00% |
0 / 1 |
| shapeRun | |
100.00% |
51 / 51 |
|
100.00% |
1 / 1 |
12 | |||
| lookupGid | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| applyLigaturesWithMap | |
94.44% |
34 / 36 |
|
0.00% |
0 / 1 |
11.02 | |||
| decodeUtf8 | |
71.43% |
20 / 28 |
|
0.00% |
0 / 1 |
6.84 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Phpdftk\Text; |
| 6 | |
| 7 | use Phpdftk\FontParser\OpenTypeData; |
| 8 | |
| 9 | /** |
| 10 | * OpenType text shaper. |
| 11 | * |
| 12 | * Phase-1 implementation: cmap-based codepoint → glyph mapping, GSUB |
| 13 | * ligature substitution via the font-parser's `TextShaper::applyLigatures`, |
| 14 | * and GPOS kerning via the font's `kernPairs` table (legacy kern; modern |
| 15 | * `GPOS` lookups will land in Phase 2 alongside Arabic/Indic shaping). |
| 16 | * |
| 17 | * For glyphs whose codepoint isn't in the font's `fullUnicodeToGid` map, |
| 18 | * the shaper emits glyph 0 (`.notdef`). Higher-level paragraph shaping — |
| 19 | * which would attempt font fallback before falling back to `.notdef` — |
| 20 | * lives at the layout layer once font stacks are wired in. |
| 21 | * |
| 22 | * Advances are reported in PDF user-space units (1pt = 1/72in), already |
| 23 | * scaled by `fontSizePt / unitsPerEm`. Layout consumers do not need to |
| 24 | * know about font design units. |
| 25 | */ |
| 26 | final class Shaper |
| 27 | { |
| 28 | public function shapeRun(string $text, ShapingContext $context): ShapedRun |
| 29 | { |
| 30 | $font = $context->font; |
| 31 | if ($text === '') { |
| 32 | return new ShapedRun( |
| 33 | $font, |
| 34 | $context->fontSizePt, |
| 35 | $context->direction, |
| 36 | [], |
| 37 | 0.0, |
| 38 | ); |
| 39 | } |
| 40 | |
| 41 | $codepoints = self::decodeUtf8($text); |
| 42 | $gids = []; |
| 43 | foreach ($codepoints as $cp) { |
| 44 | $gids[] = self::lookupGid($cp['codepoint'], $font); |
| 45 | } |
| 46 | |
| 47 | // Track which codepoint each output GID came from, so ligature |
| 48 | // substitution can preserve byte offsets for the consolidated glyph. |
| 49 | // `sourceMap` is per output-glyph index → [startCpIdx, endCpIdxExclusive]. |
| 50 | $sourceMap = []; |
| 51 | for ($i = 0; $i < count($gids); $i++) { |
| 52 | $sourceMap[$i] = [$i, $i + 1]; |
| 53 | } |
| 54 | |
| 55 | if (in_array('liga', $context->features, true) && $font->ligatures !== null) { |
| 56 | [$gids, $sourceMap] = self::applyLigaturesWithMap($gids, $sourceMap, $font->ligatures); |
| 57 | } |
| 58 | |
| 59 | $scale = $context->fontSizePt / ($font->unitsPerEm > 0 ? $font->unitsPerEm : 1000); |
| 60 | $applyKern = in_array('kern', $context->features, true) && $font->kernPairs !== null; |
| 61 | $kernPairs = $font->kernPairs ?? []; |
| 62 | |
| 63 | $glyphs = []; |
| 64 | $totalAdvance = 0.0; |
| 65 | $count = count($gids); |
| 66 | for ($i = 0; $i < $count; $i++) { |
| 67 | $gid = $gids[$i]; |
| 68 | $advanceUnits = $font->glyphWidths[$gid] ?? 0; |
| 69 | if ($applyKern && $i + 1 < $count) { |
| 70 | $next = $gids[$i + 1]; |
| 71 | $adjust = $kernPairs[$gid][$next] ?? 0; |
| 72 | $advanceUnits += $adjust; |
| 73 | } |
| 74 | $advanceX = $advanceUnits * $scale; |
| 75 | [$startIdx, $endIdx] = $sourceMap[$i]; |
| 76 | $startByte = $codepoints[$startIdx]['byteOffset']; |
| 77 | $endByte = $endIdx < count($codepoints) |
| 78 | ? $codepoints[$endIdx]['byteOffset'] |
| 79 | : strlen($text); |
| 80 | |
| 81 | $glyphs[] = new ShapedGlyph( |
| 82 | glyphId: $gid, |
| 83 | sourceOffset: $startByte, |
| 84 | sourceLength: $endByte - $startByte, |
| 85 | advanceX: $advanceX, |
| 86 | ); |
| 87 | $totalAdvance += $advanceX; |
| 88 | } |
| 89 | |
| 90 | return new ShapedRun( |
| 91 | $font, |
| 92 | $context->fontSizePt, |
| 93 | $context->direction, |
| 94 | $glyphs, |
| 95 | $totalAdvance, |
| 96 | ); |
| 97 | } |
| 98 | |
| 99 | private static function lookupGid(int $codepoint, OpenTypeData $font): int |
| 100 | { |
| 101 | return $font->fullUnicodeToGid[$codepoint] ?? 0; |
| 102 | } |
| 103 | |
| 104 | /** |
| 105 | * Run the existing `FontTextShaper::applyLigatures` pass and keep a |
| 106 | * parallel sourceMap so each surviving glyph still points back to the |
| 107 | * byte range in the original input. |
| 108 | * |
| 109 | * @param list<int> $gids |
| 110 | * @param array<int, array{int, int}> $sourceMap |
| 111 | * @param array<int, list<array{components: int[], ligature: int}>> $ligatures |
| 112 | * @return array{list<int>, array<int, array{int, int}>} |
| 113 | */ |
| 114 | private static function applyLigaturesWithMap(array $gids, array $sourceMap, array $ligatures): array |
| 115 | { |
| 116 | if ($gids === [] || $ligatures === []) { |
| 117 | return [$gids, $sourceMap]; |
| 118 | } |
| 119 | $outGids = []; |
| 120 | $outMap = []; |
| 121 | $i = 0; |
| 122 | $len = count($gids); |
| 123 | while ($i < $len) { |
| 124 | $gid = $gids[$i]; |
| 125 | $matched = false; |
| 126 | if (isset($ligatures[$gid])) { |
| 127 | foreach ($ligatures[$gid] as $rule) { |
| 128 | $components = $rule['components']; |
| 129 | $compLen = count($components); |
| 130 | if ($i + $compLen >= $len) { |
| 131 | continue; |
| 132 | } |
| 133 | $allMatch = true; |
| 134 | for ($j = 0; $j < $compLen; $j++) { |
| 135 | if ($gids[$i + 1 + $j] !== $components[$j]) { |
| 136 | $allMatch = false; |
| 137 | break; |
| 138 | } |
| 139 | } |
| 140 | if ($allMatch) { |
| 141 | $outIdx = count($outGids); |
| 142 | $outGids[] = $rule['ligature']; |
| 143 | $outMap[$outIdx] = [ |
| 144 | $sourceMap[$i][0], |
| 145 | $sourceMap[$i + $compLen][1], |
| 146 | ]; |
| 147 | $i += 1 + $compLen; |
| 148 | $matched = true; |
| 149 | break; |
| 150 | } |
| 151 | } |
| 152 | } |
| 153 | if (!$matched) { |
| 154 | $outIdx = count($outGids); |
| 155 | $outGids[] = $gid; |
| 156 | $outMap[$outIdx] = $sourceMap[$i]; |
| 157 | $i++; |
| 158 | } |
| 159 | } |
| 160 | return [$outGids, $outMap]; |
| 161 | } |
| 162 | |
| 163 | /** |
| 164 | * @return list<array{codepoint: int, byteOffset: int}> |
| 165 | */ |
| 166 | private static function decodeUtf8(string $text): array |
| 167 | { |
| 168 | $out = []; |
| 169 | $bytes = strlen($text); |
| 170 | $i = 0; |
| 171 | while ($i < $bytes) { |
| 172 | $byte = ord($text[$i]); |
| 173 | if ($byte < 0x80) { |
| 174 | $out[] = ['codepoint' => $byte, 'byteOffset' => $i]; |
| 175 | $i++; |
| 176 | } elseif ($byte < 0xC0) { |
| 177 | $out[] = ['codepoint' => 0xFFFD, 'byteOffset' => $i]; |
| 178 | $i++; |
| 179 | } elseif ($byte < 0xE0) { |
| 180 | $cp = (($byte & 0x1F) << 6) | (ord($text[$i + 1] ?? "\x00") & 0x3F); |
| 181 | $out[] = ['codepoint' => $cp, 'byteOffset' => $i]; |
| 182 | $i += 2; |
| 183 | } elseif ($byte < 0xF0) { |
| 184 | $cp = (($byte & 0x0F) << 12) |
| 185 | | ((ord($text[$i + 1] ?? "\x00") & 0x3F) << 6) |
| 186 | | (ord($text[$i + 2] ?? "\x00") & 0x3F); |
| 187 | $out[] = ['codepoint' => $cp, 'byteOffset' => $i]; |
| 188 | $i += 3; |
| 189 | } else { |
| 190 | $cp = (($byte & 0x07) << 18) |
| 191 | | ((ord($text[$i + 1] ?? "\x00") & 0x3F) << 12) |
| 192 | | ((ord($text[$i + 2] ?? "\x00") & 0x3F) << 6) |
| 193 | | (ord($text[$i + 3] ?? "\x00") & 0x3F); |
| 194 | $out[] = ['codepoint' => $cp, 'byteOffset' => $i]; |
| 195 | $i += 4; |
| 196 | } |
| 197 | } |
| 198 | return $out; |
| 199 | } |
| 200 | } |