Matthias Andreas Benkard | 7b2a3a1 | 2021-08-16 10:57:25 +0200 | [diff] [blame^] | 1 | <?php |
| 2 | |
| 3 | /* |
| 4 | * This file is part of the Symfony package. |
| 5 | * |
| 6 | * (c) Fabien Potencier <fabien@symfony.com> |
| 7 | * |
| 8 | * For the full copyright and license information, please view the LICENSE |
| 9 | * file that was distributed with this source code. |
| 10 | */ |
| 11 | |
| 12 | namespace Symfony\Component\Translation; |
| 13 | |
| 14 | use Symfony\Contracts\Translation\TranslatorInterface; |
| 15 | |
| 16 | /** |
| 17 | * This translator should only be used in a development environment. |
| 18 | */ |
| 19 | final class PseudoLocalizationTranslator implements TranslatorInterface |
| 20 | { |
| 21 | private const EXPANSION_CHARACTER = '~'; |
| 22 | |
| 23 | private $translator; |
| 24 | private $accents; |
| 25 | private $expansionFactor; |
| 26 | private $brackets; |
| 27 | private $parseHTML; |
| 28 | private $localizableHTMLAttributes; |
| 29 | |
| 30 | /** |
| 31 | * Available options: |
| 32 | * * accents: |
| 33 | * type: boolean |
| 34 | * default: true |
| 35 | * description: replace ASCII characters of the translated string with accented versions or similar characters |
| 36 | * example: if true, "foo" => "ƒöö". |
| 37 | * |
| 38 | * * expansion_factor: |
| 39 | * type: float |
| 40 | * default: 1 |
| 41 | * validation: it must be greater than or equal to 1 |
| 42 | * description: expand the translated string by the given factor with spaces and tildes |
| 43 | * example: if 2, "foo" => "~foo ~" |
| 44 | * |
| 45 | * * brackets: |
| 46 | * type: boolean |
| 47 | * default: true |
| 48 | * description: wrap the translated string with brackets |
| 49 | * example: if true, "foo" => "[foo]" |
| 50 | * |
| 51 | * * parse_html: |
| 52 | * type: boolean |
| 53 | * default: false |
| 54 | * description: parse the translated string as HTML - looking for HTML tags has a performance impact but allows to preserve them from alterations - it also allows to compute the visible translated string length which is useful to correctly expand ot when it contains HTML |
| 55 | * warning: unclosed tags are unsupported, they will be fixed (closed) by the parser - eg, "foo <div>bar" => "foo <div>bar</div>" |
| 56 | * |
| 57 | * * localizable_html_attributes: |
| 58 | * type: string[] |
| 59 | * default: [] |
| 60 | * description: the list of HTML attributes whose values can be altered - it is only useful when the "parse_html" option is set to true |
| 61 | * example: if ["title"], and with the "accents" option set to true, "<a href="#" title="Go to your profile">Profile</a>" => "<a href="#" title="Ĝö ţö ýöûŕ þŕöƒîļé">Þŕöƒîļé</a>" - if "title" was not in the "localizable_html_attributes" list, the title attribute data would be left unchanged. |
| 62 | */ |
| 63 | public function __construct(TranslatorInterface $translator, array $options = []) |
| 64 | { |
| 65 | $this->translator = $translator; |
| 66 | $this->accents = $options['accents'] ?? true; |
| 67 | |
| 68 | if (1.0 > ($this->expansionFactor = $options['expansion_factor'] ?? 1.0)) { |
| 69 | throw new \InvalidArgumentException('The expansion factor must be greater than or equal to 1.'); |
| 70 | } |
| 71 | |
| 72 | $this->brackets = $options['brackets'] ?? true; |
| 73 | |
| 74 | $this->parseHTML = $options['parse_html'] ?? false; |
| 75 | if ($this->parseHTML && !$this->accents && 1.0 === $this->expansionFactor) { |
| 76 | $this->parseHTML = false; |
| 77 | } |
| 78 | |
| 79 | $this->localizableHTMLAttributes = $options['localizable_html_attributes'] ?? []; |
| 80 | } |
| 81 | |
| 82 | /** |
| 83 | * {@inheritdoc} |
| 84 | */ |
| 85 | public function trans(string $id, array $parameters = [], string $domain = null, string $locale = null) |
| 86 | { |
| 87 | $trans = ''; |
| 88 | $visibleText = ''; |
| 89 | |
| 90 | foreach ($this->getParts($this->translator->trans($id, $parameters, $domain, $locale)) as [$visible, $localizable, $text]) { |
| 91 | if ($visible) { |
| 92 | $visibleText .= $text; |
| 93 | } |
| 94 | |
| 95 | if (!$localizable) { |
| 96 | $trans .= $text; |
| 97 | |
| 98 | continue; |
| 99 | } |
| 100 | |
| 101 | $this->addAccents($trans, $text); |
| 102 | } |
| 103 | |
| 104 | $this->expand($trans, $visibleText); |
| 105 | |
| 106 | $this->addBrackets($trans); |
| 107 | |
| 108 | return $trans; |
| 109 | } |
| 110 | |
| 111 | public function getLocale(): string |
| 112 | { |
| 113 | return $this->translator->getLocale(); |
| 114 | } |
| 115 | |
| 116 | private function getParts(string $originalTrans): array |
| 117 | { |
| 118 | if (!$this->parseHTML) { |
| 119 | return [[true, true, $originalTrans]]; |
| 120 | } |
| 121 | |
| 122 | $html = mb_convert_encoding($originalTrans, 'HTML-ENTITIES', mb_detect_encoding($originalTrans, null, true) ?: 'UTF-8'); |
| 123 | |
| 124 | $useInternalErrors = libxml_use_internal_errors(true); |
| 125 | |
| 126 | $dom = new \DOMDocument(); |
| 127 | $dom->loadHTML('<trans>'.$html.'</trans>'); |
| 128 | |
| 129 | libxml_clear_errors(); |
| 130 | libxml_use_internal_errors($useInternalErrors); |
| 131 | |
| 132 | return $this->parseNode($dom->childNodes->item(1)->childNodes->item(0)->childNodes->item(0)); |
| 133 | } |
| 134 | |
| 135 | private function parseNode(\DOMNode $node): array |
| 136 | { |
| 137 | $parts = []; |
| 138 | |
| 139 | foreach ($node->childNodes as $childNode) { |
| 140 | if (!$childNode instanceof \DOMElement) { |
| 141 | $parts[] = [true, true, $childNode->nodeValue]; |
| 142 | |
| 143 | continue; |
| 144 | } |
| 145 | |
| 146 | $parts[] = [false, false, '<'.$childNode->tagName]; |
| 147 | |
| 148 | /** @var \DOMAttr $attribute */ |
| 149 | foreach ($childNode->attributes as $attribute) { |
| 150 | $parts[] = [false, false, ' '.$attribute->nodeName.'="']; |
| 151 | |
| 152 | $localizableAttribute = \in_array($attribute->nodeName, $this->localizableHTMLAttributes, true); |
| 153 | foreach (preg_split('/(&(?:amp|quot|#039|lt|gt);+)/', htmlspecialchars($attribute->nodeValue, \ENT_QUOTES, 'UTF-8'), -1, \PREG_SPLIT_DELIM_CAPTURE) as $i => $match) { |
| 154 | if ('' === $match) { |
| 155 | continue; |
| 156 | } |
| 157 | |
| 158 | $parts[] = [false, $localizableAttribute && 0 === $i % 2, $match]; |
| 159 | } |
| 160 | |
| 161 | $parts[] = [false, false, '"']; |
| 162 | } |
| 163 | |
| 164 | $parts[] = [false, false, '>']; |
| 165 | |
| 166 | $parts = array_merge($parts, $this->parseNode($childNode, $parts)); |
| 167 | |
| 168 | $parts[] = [false, false, '</'.$childNode->tagName.'>']; |
| 169 | } |
| 170 | |
| 171 | return $parts; |
| 172 | } |
| 173 | |
| 174 | private function addAccents(string &$trans, string $text): void |
| 175 | { |
| 176 | $trans .= $this->accents ? strtr($text, [ |
| 177 | ' ' => ' ', |
| 178 | '!' => '¡', |
| 179 | '"' => '″', |
| 180 | '#' => '♯', |
| 181 | '$' => '€', |
| 182 | '%' => '‰', |
| 183 | '&' => '⅋', |
| 184 | '\'' => '´', |
| 185 | '(' => '{', |
| 186 | ')' => '}', |
| 187 | '*' => '⁎', |
| 188 | '+' => '⁺', |
| 189 | ',' => '،', |
| 190 | '-' => '‐', |
| 191 | '.' => '·', |
| 192 | '/' => '⁄', |
| 193 | '0' => '⓪', |
| 194 | '1' => '①', |
| 195 | '2' => '②', |
| 196 | '3' => '③', |
| 197 | '4' => '④', |
| 198 | '5' => '⑤', |
| 199 | '6' => '⑥', |
| 200 | '7' => '⑦', |
| 201 | '8' => '⑧', |
| 202 | '9' => '⑨', |
| 203 | ':' => '∶', |
| 204 | ';' => '⁏', |
| 205 | '<' => '≤', |
| 206 | '=' => '≂', |
| 207 | '>' => '≥', |
| 208 | '?' => '¿', |
| 209 | '@' => '՞', |
| 210 | 'A' => 'Å', |
| 211 | 'B' => 'Ɓ', |
| 212 | 'C' => 'Ç', |
| 213 | 'D' => 'Ð', |
| 214 | 'E' => 'É', |
| 215 | 'F' => 'Ƒ', |
| 216 | 'G' => 'Ĝ', |
| 217 | 'H' => 'Ĥ', |
| 218 | 'I' => 'Î', |
| 219 | 'J' => 'Ĵ', |
| 220 | 'K' => 'Ķ', |
| 221 | 'L' => 'Ļ', |
| 222 | 'M' => 'Ṁ', |
| 223 | 'N' => 'Ñ', |
| 224 | 'O' => 'Ö', |
| 225 | 'P' => 'Þ', |
| 226 | 'Q' => 'Ǫ', |
| 227 | 'R' => 'Ŕ', |
| 228 | 'S' => 'Š', |
| 229 | 'T' => 'Ţ', |
| 230 | 'U' => 'Û', |
| 231 | 'V' => 'Ṽ', |
| 232 | 'W' => 'Ŵ', |
| 233 | 'X' => 'Ẋ', |
| 234 | 'Y' => 'Ý', |
| 235 | 'Z' => 'Ž', |
| 236 | '[' => '⁅', |
| 237 | '\\' => '∖', |
| 238 | ']' => '⁆', |
| 239 | '^' => '˄', |
| 240 | '_' => '‿', |
| 241 | '`' => '‵', |
| 242 | 'a' => 'å', |
| 243 | 'b' => 'ƀ', |
| 244 | 'c' => 'ç', |
| 245 | 'd' => 'ð', |
| 246 | 'e' => 'é', |
| 247 | 'f' => 'ƒ', |
| 248 | 'g' => 'ĝ', |
| 249 | 'h' => 'ĥ', |
| 250 | 'i' => 'î', |
| 251 | 'j' => 'ĵ', |
| 252 | 'k' => 'ķ', |
| 253 | 'l' => 'ļ', |
| 254 | 'm' => 'ɱ', |
| 255 | 'n' => 'ñ', |
| 256 | 'o' => 'ö', |
| 257 | 'p' => 'þ', |
| 258 | 'q' => 'ǫ', |
| 259 | 'r' => 'ŕ', |
| 260 | 's' => 'š', |
| 261 | 't' => 'ţ', |
| 262 | 'u' => 'û', |
| 263 | 'v' => 'ṽ', |
| 264 | 'w' => 'ŵ', |
| 265 | 'x' => 'ẋ', |
| 266 | 'y' => 'ý', |
| 267 | 'z' => 'ž', |
| 268 | '{' => '(', |
| 269 | '|' => '¦', |
| 270 | '}' => ')', |
| 271 | '~' => '˞', |
| 272 | ]) : $text; |
| 273 | } |
| 274 | |
| 275 | private function expand(string &$trans, string $visibleText): void |
| 276 | { |
| 277 | if (1.0 >= $this->expansionFactor) { |
| 278 | return; |
| 279 | } |
| 280 | |
| 281 | $visibleLength = $this->strlen($visibleText); |
| 282 | $missingLength = (int) (ceil($visibleLength * $this->expansionFactor)) - $visibleLength; |
| 283 | if ($this->brackets) { |
| 284 | $missingLength -= 2; |
| 285 | } |
| 286 | |
| 287 | if (0 >= $missingLength) { |
| 288 | return; |
| 289 | } |
| 290 | |
| 291 | $words = []; |
| 292 | $wordsCount = 0; |
| 293 | foreach (preg_split('/ +/', $visibleText, -1, \PREG_SPLIT_NO_EMPTY) as $word) { |
| 294 | $wordLength = $this->strlen($word); |
| 295 | |
| 296 | if ($wordLength >= $missingLength) { |
| 297 | continue; |
| 298 | } |
| 299 | |
| 300 | if (!isset($words[$wordLength])) { |
| 301 | $words[$wordLength] = 0; |
| 302 | } |
| 303 | |
| 304 | ++$words[$wordLength]; |
| 305 | ++$wordsCount; |
| 306 | } |
| 307 | |
| 308 | if (!$words) { |
| 309 | $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1); |
| 310 | |
| 311 | return; |
| 312 | } |
| 313 | |
| 314 | arsort($words, \SORT_NUMERIC); |
| 315 | |
| 316 | $longestWordLength = max(array_keys($words)); |
| 317 | |
| 318 | while (true) { |
| 319 | $r = mt_rand(1, $wordsCount); |
| 320 | |
| 321 | foreach ($words as $length => $count) { |
| 322 | $r -= $count; |
| 323 | if ($r <= 0) { |
| 324 | break; |
| 325 | } |
| 326 | } |
| 327 | |
| 328 | $trans .= ' '.str_repeat(self::EXPANSION_CHARACTER, $length); |
| 329 | |
| 330 | $missingLength -= $length + 1; |
| 331 | |
| 332 | if (0 === $missingLength) { |
| 333 | return; |
| 334 | } |
| 335 | |
| 336 | while ($longestWordLength >= $missingLength) { |
| 337 | $wordsCount -= $words[$longestWordLength]; |
| 338 | unset($words[$longestWordLength]); |
| 339 | |
| 340 | if (!$words) { |
| 341 | $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1); |
| 342 | |
| 343 | return; |
| 344 | } |
| 345 | |
| 346 | $longestWordLength = max(array_keys($words)); |
| 347 | } |
| 348 | } |
| 349 | } |
| 350 | |
| 351 | private function addBrackets(string &$trans): void |
| 352 | { |
| 353 | if (!$this->brackets) { |
| 354 | return; |
| 355 | } |
| 356 | |
| 357 | $trans = '['.$trans.']'; |
| 358 | } |
| 359 | |
| 360 | private function strlen(string $s): int |
| 361 | { |
| 362 | return false === ($encoding = mb_detect_encoding($s, null, true)) ? \strlen($s) : mb_strlen($s, $encoding); |
| 363 | } |
| 364 | } |