blob: 1d10e0ccb8ed4e0f62b041af8e9de03f14da95df [file] [log] [blame]
Matthias Andreas Benkard7b2a3a12021-08-16 10:57:25 +02001<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\Translation;
13
14use Symfony\Contracts\Translation\TranslatorInterface;
15
16/**
17 * This translator should only be used in a development environment.
18 */
19final class PseudoLocalizationTranslator implements TranslatorInterface
20{
21 private const EXPANSION_CHARACTER = '~';
22
23 private $translator;
Matthias Andreas Benkard1ba53812022-12-27 17:32:58 +010024 private bool $accents;
25 private float $expansionFactor;
26 private bool $brackets;
27 private bool $parseHTML;
28
29 /**
30 * @var string[]
31 */
32 private array $localizableHTMLAttributes;
Matthias Andreas Benkard7b2a3a12021-08-16 10:57:25 +020033
34 /**
35 * Available options:
36 * * accents:
37 * type: boolean
38 * default: true
39 * description: replace ASCII characters of the translated string with accented versions or similar characters
40 * example: if true, "foo" => "ƒöö".
41 *
42 * * expansion_factor:
43 * type: float
44 * default: 1
45 * validation: it must be greater than or equal to 1
46 * description: expand the translated string by the given factor with spaces and tildes
47 * example: if 2, "foo" => "~foo ~"
48 *
49 * * brackets:
50 * type: boolean
51 * default: true
52 * description: wrap the translated string with brackets
53 * example: if true, "foo" => "[foo]"
54 *
55 * * parse_html:
56 * type: boolean
57 * default: false
58 * description: parse the translated string as HTML - looking for HTML tags has a performance impact but allows to preserve them from alterations - it also allows to compute the visible translated string length which is useful to correctly expand ot when it contains HTML
59 * warning: unclosed tags are unsupported, they will be fixed (closed) by the parser - eg, "foo <div>bar" => "foo <div>bar</div>"
60 *
61 * * localizable_html_attributes:
62 * type: string[]
63 * default: []
64 * description: the list of HTML attributes whose values can be altered - it is only useful when the "parse_html" option is set to true
65 * example: if ["title"], and with the "accents" option set to true, "<a href="#" title="Go to your profile">Profile</a>" => "<a href="#" title="Ĝö ţö ýöûŕ þŕöƒîļé">Þŕöƒîļé</a>" - if "title" was not in the "localizable_html_attributes" list, the title attribute data would be left unchanged.
66 */
67 public function __construct(TranslatorInterface $translator, array $options = [])
68 {
69 $this->translator = $translator;
70 $this->accents = $options['accents'] ?? true;
71
72 if (1.0 > ($this->expansionFactor = $options['expansion_factor'] ?? 1.0)) {
73 throw new \InvalidArgumentException('The expansion factor must be greater than or equal to 1.');
74 }
75
76 $this->brackets = $options['brackets'] ?? true;
77
78 $this->parseHTML = $options['parse_html'] ?? false;
79 if ($this->parseHTML && !$this->accents && 1.0 === $this->expansionFactor) {
80 $this->parseHTML = false;
81 }
82
83 $this->localizableHTMLAttributes = $options['localizable_html_attributes'] ?? [];
84 }
85
86 /**
87 * {@inheritdoc}
88 */
Matthias Andreas Benkard1ba53812022-12-27 17:32:58 +010089 public function trans(string $id, array $parameters = [], string $domain = null, string $locale = null): string
Matthias Andreas Benkard7b2a3a12021-08-16 10:57:25 +020090 {
91 $trans = '';
92 $visibleText = '';
93
94 foreach ($this->getParts($this->translator->trans($id, $parameters, $domain, $locale)) as [$visible, $localizable, $text]) {
95 if ($visible) {
96 $visibleText .= $text;
97 }
98
99 if (!$localizable) {
100 $trans .= $text;
101
102 continue;
103 }
104
105 $this->addAccents($trans, $text);
106 }
107
108 $this->expand($trans, $visibleText);
109
110 $this->addBrackets($trans);
111
112 return $trans;
113 }
114
115 public function getLocale(): string
116 {
117 return $this->translator->getLocale();
118 }
119
120 private function getParts(string $originalTrans): array
121 {
122 if (!$this->parseHTML) {
123 return [[true, true, $originalTrans]];
124 }
125
126 $html = mb_convert_encoding($originalTrans, 'HTML-ENTITIES', mb_detect_encoding($originalTrans, null, true) ?: 'UTF-8');
127
128 $useInternalErrors = libxml_use_internal_errors(true);
129
130 $dom = new \DOMDocument();
131 $dom->loadHTML('<trans>'.$html.'</trans>');
132
133 libxml_clear_errors();
134 libxml_use_internal_errors($useInternalErrors);
135
136 return $this->parseNode($dom->childNodes->item(1)->childNodes->item(0)->childNodes->item(0));
137 }
138
139 private function parseNode(\DOMNode $node): array
140 {
141 $parts = [];
142
143 foreach ($node->childNodes as $childNode) {
144 if (!$childNode instanceof \DOMElement) {
145 $parts[] = [true, true, $childNode->nodeValue];
146
147 continue;
148 }
149
150 $parts[] = [false, false, '<'.$childNode->tagName];
151
152 /** @var \DOMAttr $attribute */
153 foreach ($childNode->attributes as $attribute) {
154 $parts[] = [false, false, ' '.$attribute->nodeName.'="'];
155
156 $localizableAttribute = \in_array($attribute->nodeName, $this->localizableHTMLAttributes, true);
157 foreach (preg_split('/(&(?:amp|quot|#039|lt|gt);+)/', htmlspecialchars($attribute->nodeValue, \ENT_QUOTES, 'UTF-8'), -1, \PREG_SPLIT_DELIM_CAPTURE) as $i => $match) {
158 if ('' === $match) {
159 continue;
160 }
161
162 $parts[] = [false, $localizableAttribute && 0 === $i % 2, $match];
163 }
164
165 $parts[] = [false, false, '"'];
166 }
167
168 $parts[] = [false, false, '>'];
169
170 $parts = array_merge($parts, $this->parseNode($childNode, $parts));
171
172 $parts[] = [false, false, '</'.$childNode->tagName.'>'];
173 }
174
175 return $parts;
176 }
177
178 private function addAccents(string &$trans, string $text): void
179 {
180 $trans .= $this->accents ? strtr($text, [
181 ' ' => ' ',
182 '!' => '¡',
183 '"' => '″',
184 '#' => '♯',
185 '$' => '€',
186 '%' => '‰',
187 '&' => '⅋',
188 '\'' => '´',
189 '(' => '{',
190 ')' => '}',
191 '*' => '⁎',
192 '+' => '⁺',
193 ',' => '،',
194 '-' => '‐',
195 '.' => '·',
196 '/' => '⁄',
197 '0' => '⓪',
198 '1' => '①',
199 '2' => '②',
200 '3' => '③',
201 '4' => '④',
202 '5' => '⑤',
203 '6' => '⑥',
204 '7' => '⑦',
205 '8' => '⑧',
206 '9' => '⑨',
207 ':' => '∶',
208 ';' => '⁏',
209 '<' => '≤',
210 '=' => '≂',
211 '>' => '≥',
212 '?' => '¿',
213 '@' => '՞',
214 'A' => 'Å',
215 'B' => 'Ɓ',
216 'C' => 'Ç',
217 'D' => 'Ð',
218 'E' => 'É',
219 'F' => 'Ƒ',
220 'G' => 'Ĝ',
221 'H' => 'Ĥ',
222 'I' => 'Î',
223 'J' => 'Ĵ',
224 'K' => 'Ķ',
225 'L' => 'Ļ',
226 'M' => 'Ṁ',
227 'N' => 'Ñ',
228 'O' => 'Ö',
229 'P' => 'Þ',
230 'Q' => 'Ǫ',
231 'R' => 'Ŕ',
232 'S' => 'Š',
233 'T' => 'Ţ',
234 'U' => 'Û',
235 'V' => 'Ṽ',
236 'W' => 'Ŵ',
237 'X' => 'Ẋ',
238 'Y' => 'Ý',
239 'Z' => 'Ž',
240 '[' => '⁅',
241 '\\' => '∖',
242 ']' => '⁆',
243 '^' => '˄',
244 '_' => '‿',
245 '`' => '‵',
246 'a' => 'å',
247 'b' => 'ƀ',
248 'c' => 'ç',
249 'd' => 'ð',
250 'e' => 'é',
251 'f' => 'ƒ',
252 'g' => 'ĝ',
253 'h' => 'ĥ',
254 'i' => 'î',
255 'j' => 'ĵ',
256 'k' => 'ķ',
257 'l' => 'ļ',
258 'm' => 'ɱ',
259 'n' => 'ñ',
260 'o' => 'ö',
261 'p' => 'þ',
262 'q' => 'ǫ',
263 'r' => 'ŕ',
264 's' => 'š',
265 't' => 'ţ',
266 'u' => 'û',
267 'v' => 'ṽ',
268 'w' => 'ŵ',
269 'x' => 'ẋ',
270 'y' => 'ý',
271 'z' => 'ž',
272 '{' => '(',
273 '|' => '¦',
274 '}' => ')',
275 '~' => '˞',
276 ]) : $text;
277 }
278
279 private function expand(string &$trans, string $visibleText): void
280 {
281 if (1.0 >= $this->expansionFactor) {
282 return;
283 }
284
285 $visibleLength = $this->strlen($visibleText);
286 $missingLength = (int) (ceil($visibleLength * $this->expansionFactor)) - $visibleLength;
287 if ($this->brackets) {
288 $missingLength -= 2;
289 }
290
291 if (0 >= $missingLength) {
292 return;
293 }
294
295 $words = [];
296 $wordsCount = 0;
297 foreach (preg_split('/ +/', $visibleText, -1, \PREG_SPLIT_NO_EMPTY) as $word) {
298 $wordLength = $this->strlen($word);
299
300 if ($wordLength >= $missingLength) {
301 continue;
302 }
303
304 if (!isset($words[$wordLength])) {
305 $words[$wordLength] = 0;
306 }
307
308 ++$words[$wordLength];
309 ++$wordsCount;
310 }
311
312 if (!$words) {
313 $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
314
315 return;
316 }
317
318 arsort($words, \SORT_NUMERIC);
319
320 $longestWordLength = max(array_keys($words));
321
322 while (true) {
323 $r = mt_rand(1, $wordsCount);
324
325 foreach ($words as $length => $count) {
326 $r -= $count;
327 if ($r <= 0) {
328 break;
329 }
330 }
331
332 $trans .= ' '.str_repeat(self::EXPANSION_CHARACTER, $length);
333
334 $missingLength -= $length + 1;
335
336 if (0 === $missingLength) {
337 return;
338 }
339
340 while ($longestWordLength >= $missingLength) {
341 $wordsCount -= $words[$longestWordLength];
342 unset($words[$longestWordLength]);
343
344 if (!$words) {
345 $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
346
347 return;
348 }
349
350 $longestWordLength = max(array_keys($words));
351 }
352 }
353 }
354
355 private function addBrackets(string &$trans): void
356 {
357 if (!$this->brackets) {
358 return;
359 }
360
361 $trans = '['.$trans.']';
362 }
363
364 private function strlen(string $s): int
365 {
366 return false === ($encoding = mb_detect_encoding($s, null, true)) ? \strlen($s) : mb_strlen($s, $encoding);
367 }
368}