blob: 49f122eb83b344b53b80176f2098c2d49b31276d [file] [log] [blame]
Matthias Andreas Benkard7b2a3a12021-08-16 10:57:25 +02001<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\Translation;
13
14use Symfony\Contracts\Translation\TranslatorInterface;
15
16/**
17 * This translator should only be used in a development environment.
18 */
19final class PseudoLocalizationTranslator implements TranslatorInterface
20{
21 private const EXPANSION_CHARACTER = '~';
22
23 private $translator;
24 private $accents;
25 private $expansionFactor;
26 private $brackets;
27 private $parseHTML;
28 private $localizableHTMLAttributes;
29
30 /**
31 * Available options:
32 * * accents:
33 * type: boolean
34 * default: true
35 * description: replace ASCII characters of the translated string with accented versions or similar characters
36 * example: if true, "foo" => "ƒöö".
37 *
38 * * expansion_factor:
39 * type: float
40 * default: 1
41 * validation: it must be greater than or equal to 1
42 * description: expand the translated string by the given factor with spaces and tildes
43 * example: if 2, "foo" => "~foo ~"
44 *
45 * * brackets:
46 * type: boolean
47 * default: true
48 * description: wrap the translated string with brackets
49 * example: if true, "foo" => "[foo]"
50 *
51 * * parse_html:
52 * type: boolean
53 * default: false
54 * description: parse the translated string as HTML - looking for HTML tags has a performance impact but allows to preserve them from alterations - it also allows to compute the visible translated string length which is useful to correctly expand ot when it contains HTML
55 * warning: unclosed tags are unsupported, they will be fixed (closed) by the parser - eg, "foo <div>bar" => "foo <div>bar</div>"
56 *
57 * * localizable_html_attributes:
58 * type: string[]
59 * default: []
60 * description: the list of HTML attributes whose values can be altered - it is only useful when the "parse_html" option is set to true
61 * example: if ["title"], and with the "accents" option set to true, "<a href="#" title="Go to your profile">Profile</a>" => "<a href="#" title="Ĝö ţö ýöûŕ þŕöƒîļé">Þŕöƒîļé</a>" - if "title" was not in the "localizable_html_attributes" list, the title attribute data would be left unchanged.
62 */
63 public function __construct(TranslatorInterface $translator, array $options = [])
64 {
65 $this->translator = $translator;
66 $this->accents = $options['accents'] ?? true;
67
68 if (1.0 > ($this->expansionFactor = $options['expansion_factor'] ?? 1.0)) {
69 throw new \InvalidArgumentException('The expansion factor must be greater than or equal to 1.');
70 }
71
72 $this->brackets = $options['brackets'] ?? true;
73
74 $this->parseHTML = $options['parse_html'] ?? false;
75 if ($this->parseHTML && !$this->accents && 1.0 === $this->expansionFactor) {
76 $this->parseHTML = false;
77 }
78
79 $this->localizableHTMLAttributes = $options['localizable_html_attributes'] ?? [];
80 }
81
82 /**
83 * {@inheritdoc}
84 */
85 public function trans(string $id, array $parameters = [], string $domain = null, string $locale = null)
86 {
87 $trans = '';
88 $visibleText = '';
89
90 foreach ($this->getParts($this->translator->trans($id, $parameters, $domain, $locale)) as [$visible, $localizable, $text]) {
91 if ($visible) {
92 $visibleText .= $text;
93 }
94
95 if (!$localizable) {
96 $trans .= $text;
97
98 continue;
99 }
100
101 $this->addAccents($trans, $text);
102 }
103
104 $this->expand($trans, $visibleText);
105
106 $this->addBrackets($trans);
107
108 return $trans;
109 }
110
111 public function getLocale(): string
112 {
113 return $this->translator->getLocale();
114 }
115
116 private function getParts(string $originalTrans): array
117 {
118 if (!$this->parseHTML) {
119 return [[true, true, $originalTrans]];
120 }
121
122 $html = mb_convert_encoding($originalTrans, 'HTML-ENTITIES', mb_detect_encoding($originalTrans, null, true) ?: 'UTF-8');
123
124 $useInternalErrors = libxml_use_internal_errors(true);
125
126 $dom = new \DOMDocument();
127 $dom->loadHTML('<trans>'.$html.'</trans>');
128
129 libxml_clear_errors();
130 libxml_use_internal_errors($useInternalErrors);
131
132 return $this->parseNode($dom->childNodes->item(1)->childNodes->item(0)->childNodes->item(0));
133 }
134
135 private function parseNode(\DOMNode $node): array
136 {
137 $parts = [];
138
139 foreach ($node->childNodes as $childNode) {
140 if (!$childNode instanceof \DOMElement) {
141 $parts[] = [true, true, $childNode->nodeValue];
142
143 continue;
144 }
145
146 $parts[] = [false, false, '<'.$childNode->tagName];
147
148 /** @var \DOMAttr $attribute */
149 foreach ($childNode->attributes as $attribute) {
150 $parts[] = [false, false, ' '.$attribute->nodeName.'="'];
151
152 $localizableAttribute = \in_array($attribute->nodeName, $this->localizableHTMLAttributes, true);
153 foreach (preg_split('/(&(?:amp|quot|#039|lt|gt);+)/', htmlspecialchars($attribute->nodeValue, \ENT_QUOTES, 'UTF-8'), -1, \PREG_SPLIT_DELIM_CAPTURE) as $i => $match) {
154 if ('' === $match) {
155 continue;
156 }
157
158 $parts[] = [false, $localizableAttribute && 0 === $i % 2, $match];
159 }
160
161 $parts[] = [false, false, '"'];
162 }
163
164 $parts[] = [false, false, '>'];
165
166 $parts = array_merge($parts, $this->parseNode($childNode, $parts));
167
168 $parts[] = [false, false, '</'.$childNode->tagName.'>'];
169 }
170
171 return $parts;
172 }
173
174 private function addAccents(string &$trans, string $text): void
175 {
176 $trans .= $this->accents ? strtr($text, [
177 ' ' => ' ',
178 '!' => '¡',
179 '"' => '″',
180 '#' => '♯',
181 '$' => '€',
182 '%' => '‰',
183 '&' => '⅋',
184 '\'' => '´',
185 '(' => '{',
186 ')' => '}',
187 '*' => '⁎',
188 '+' => '⁺',
189 ',' => '،',
190 '-' => '‐',
191 '.' => '·',
192 '/' => '⁄',
193 '0' => '⓪',
194 '1' => '①',
195 '2' => '②',
196 '3' => '③',
197 '4' => '④',
198 '5' => '⑤',
199 '6' => '⑥',
200 '7' => '⑦',
201 '8' => '⑧',
202 '9' => '⑨',
203 ':' => '∶',
204 ';' => '⁏',
205 '<' => '≤',
206 '=' => '≂',
207 '>' => '≥',
208 '?' => '¿',
209 '@' => '՞',
210 'A' => 'Å',
211 'B' => 'Ɓ',
212 'C' => 'Ç',
213 'D' => 'Ð',
214 'E' => 'É',
215 'F' => 'Ƒ',
216 'G' => 'Ĝ',
217 'H' => 'Ĥ',
218 'I' => 'Î',
219 'J' => 'Ĵ',
220 'K' => 'Ķ',
221 'L' => 'Ļ',
222 'M' => 'Ṁ',
223 'N' => 'Ñ',
224 'O' => 'Ö',
225 'P' => 'Þ',
226 'Q' => 'Ǫ',
227 'R' => 'Ŕ',
228 'S' => 'Š',
229 'T' => 'Ţ',
230 'U' => 'Û',
231 'V' => 'Ṽ',
232 'W' => 'Ŵ',
233 'X' => 'Ẋ',
234 'Y' => 'Ý',
235 'Z' => 'Ž',
236 '[' => '⁅',
237 '\\' => '∖',
238 ']' => '⁆',
239 '^' => '˄',
240 '_' => '‿',
241 '`' => '‵',
242 'a' => 'å',
243 'b' => 'ƀ',
244 'c' => 'ç',
245 'd' => 'ð',
246 'e' => 'é',
247 'f' => 'ƒ',
248 'g' => 'ĝ',
249 'h' => 'ĥ',
250 'i' => 'î',
251 'j' => 'ĵ',
252 'k' => 'ķ',
253 'l' => 'ļ',
254 'm' => 'ɱ',
255 'n' => 'ñ',
256 'o' => 'ö',
257 'p' => 'þ',
258 'q' => 'ǫ',
259 'r' => 'ŕ',
260 's' => 'š',
261 't' => 'ţ',
262 'u' => 'û',
263 'v' => 'ṽ',
264 'w' => 'ŵ',
265 'x' => 'ẋ',
266 'y' => 'ý',
267 'z' => 'ž',
268 '{' => '(',
269 '|' => '¦',
270 '}' => ')',
271 '~' => '˞',
272 ]) : $text;
273 }
274
275 private function expand(string &$trans, string $visibleText): void
276 {
277 if (1.0 >= $this->expansionFactor) {
278 return;
279 }
280
281 $visibleLength = $this->strlen($visibleText);
282 $missingLength = (int) (ceil($visibleLength * $this->expansionFactor)) - $visibleLength;
283 if ($this->brackets) {
284 $missingLength -= 2;
285 }
286
287 if (0 >= $missingLength) {
288 return;
289 }
290
291 $words = [];
292 $wordsCount = 0;
293 foreach (preg_split('/ +/', $visibleText, -1, \PREG_SPLIT_NO_EMPTY) as $word) {
294 $wordLength = $this->strlen($word);
295
296 if ($wordLength >= $missingLength) {
297 continue;
298 }
299
300 if (!isset($words[$wordLength])) {
301 $words[$wordLength] = 0;
302 }
303
304 ++$words[$wordLength];
305 ++$wordsCount;
306 }
307
308 if (!$words) {
309 $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
310
311 return;
312 }
313
314 arsort($words, \SORT_NUMERIC);
315
316 $longestWordLength = max(array_keys($words));
317
318 while (true) {
319 $r = mt_rand(1, $wordsCount);
320
321 foreach ($words as $length => $count) {
322 $r -= $count;
323 if ($r <= 0) {
324 break;
325 }
326 }
327
328 $trans .= ' '.str_repeat(self::EXPANSION_CHARACTER, $length);
329
330 $missingLength -= $length + 1;
331
332 if (0 === $missingLength) {
333 return;
334 }
335
336 while ($longestWordLength >= $missingLength) {
337 $wordsCount -= $words[$longestWordLength];
338 unset($words[$longestWordLength]);
339
340 if (!$words) {
341 $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
342
343 return;
344 }
345
346 $longestWordLength = max(array_keys($words));
347 }
348 }
349 }
350
351 private function addBrackets(string &$trans): void
352 {
353 if (!$this->brackets) {
354 return;
355 }
356
357 $trans = '['.$trans.']';
358 }
359
360 private function strlen(string $s): int
361 {
362 return false === ($encoding = mb_detect_encoding($s, null, true)) ? \strlen($s) : mb_strlen($s, $encoding);
363 }
364}