Matthias Andreas Benkard | 7b2a3a1 | 2021-08-16 10:57:25 +0200 | [diff] [blame^] | 1 | <?php |
| 2 | |
| 3 | /* |
| 4 | * This file is part of the Symfony package. |
| 5 | * |
| 6 | * (c) Fabien Potencier <fabien@symfony.com> |
| 7 | * |
| 8 | * For the full copyright and license information, please view the LICENSE |
| 9 | * file that was distributed with this source code. |
| 10 | */ |
| 11 | |
| 12 | namespace Symfony\Component\Translation\Extractor; |
| 13 | |
| 14 | use Symfony\Component\Finder\Finder; |
| 15 | use Symfony\Component\Translation\MessageCatalogue; |
| 16 | |
| 17 | /** |
| 18 | * PhpExtractor extracts translation messages from a PHP template. |
| 19 | * |
| 20 | * @author Michel Salib <michelsalib@hotmail.com> |
| 21 | */ |
| 22 | class PhpExtractor extends AbstractFileExtractor implements ExtractorInterface |
| 23 | { |
| 24 | public const MESSAGE_TOKEN = 300; |
| 25 | public const METHOD_ARGUMENTS_TOKEN = 1000; |
| 26 | public const DOMAIN_TOKEN = 1001; |
| 27 | |
| 28 | /** |
| 29 | * Prefix for new found message. |
| 30 | * |
| 31 | * @var string |
| 32 | */ |
| 33 | private $prefix = ''; |
| 34 | |
| 35 | /** |
| 36 | * The sequence that captures translation messages. |
| 37 | * |
| 38 | * @var array |
| 39 | */ |
| 40 | protected $sequences = [ |
| 41 | [ |
| 42 | '->', |
| 43 | 'trans', |
| 44 | '(', |
| 45 | self::MESSAGE_TOKEN, |
| 46 | ',', |
| 47 | self::METHOD_ARGUMENTS_TOKEN, |
| 48 | ',', |
| 49 | self::DOMAIN_TOKEN, |
| 50 | ], |
| 51 | [ |
| 52 | '->', |
| 53 | 'trans', |
| 54 | '(', |
| 55 | self::MESSAGE_TOKEN, |
| 56 | ], |
| 57 | [ |
| 58 | 'new', |
| 59 | 'TranslatableMessage', |
| 60 | '(', |
| 61 | self::MESSAGE_TOKEN, |
| 62 | ',', |
| 63 | self::METHOD_ARGUMENTS_TOKEN, |
| 64 | ',', |
| 65 | self::DOMAIN_TOKEN, |
| 66 | ], |
| 67 | [ |
| 68 | 'new', |
| 69 | 'TranslatableMessage', |
| 70 | '(', |
| 71 | self::MESSAGE_TOKEN, |
| 72 | ], |
| 73 | [ |
| 74 | 'new', |
| 75 | '\\', |
| 76 | 'Symfony', |
| 77 | '\\', |
| 78 | 'Component', |
| 79 | '\\', |
| 80 | 'Translation', |
| 81 | '\\', |
| 82 | 'TranslatableMessage', |
| 83 | '(', |
| 84 | self::MESSAGE_TOKEN, |
| 85 | ',', |
| 86 | self::METHOD_ARGUMENTS_TOKEN, |
| 87 | ',', |
| 88 | self::DOMAIN_TOKEN, |
| 89 | ], |
| 90 | [ |
| 91 | 'new', |
| 92 | '\Symfony\Component\Translation\TranslatableMessage', |
| 93 | '(', |
| 94 | self::MESSAGE_TOKEN, |
| 95 | ',', |
| 96 | self::METHOD_ARGUMENTS_TOKEN, |
| 97 | ',', |
| 98 | self::DOMAIN_TOKEN, |
| 99 | ], |
| 100 | [ |
| 101 | 'new', |
| 102 | '\\', |
| 103 | 'Symfony', |
| 104 | '\\', |
| 105 | 'Component', |
| 106 | '\\', |
| 107 | 'Translation', |
| 108 | '\\', |
| 109 | 'TranslatableMessage', |
| 110 | '(', |
| 111 | self::MESSAGE_TOKEN, |
| 112 | ], |
| 113 | [ |
| 114 | 'new', |
| 115 | '\Symfony\Component\Translation\TranslatableMessage', |
| 116 | '(', |
| 117 | self::MESSAGE_TOKEN, |
| 118 | ], |
| 119 | [ |
| 120 | 't', |
| 121 | '(', |
| 122 | self::MESSAGE_TOKEN, |
| 123 | ',', |
| 124 | self::METHOD_ARGUMENTS_TOKEN, |
| 125 | ',', |
| 126 | self::DOMAIN_TOKEN, |
| 127 | ], |
| 128 | [ |
| 129 | 't', |
| 130 | '(', |
| 131 | self::MESSAGE_TOKEN, |
| 132 | ], |
| 133 | ]; |
| 134 | |
| 135 | /** |
| 136 | * {@inheritdoc} |
| 137 | */ |
| 138 | public function extract($resource, MessageCatalogue $catalog) |
| 139 | { |
| 140 | $files = $this->extractFiles($resource); |
| 141 | foreach ($files as $file) { |
| 142 | $this->parseTokens(token_get_all(file_get_contents($file)), $catalog, $file); |
| 143 | |
| 144 | gc_mem_caches(); |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | /** |
| 149 | * {@inheritdoc} |
| 150 | */ |
| 151 | public function setPrefix(string $prefix) |
| 152 | { |
| 153 | $this->prefix = $prefix; |
| 154 | } |
| 155 | |
| 156 | /** |
| 157 | * Normalizes a token. |
| 158 | * |
| 159 | * @param mixed $token |
| 160 | * |
| 161 | * @return string|null |
| 162 | */ |
| 163 | protected function normalizeToken($token) |
| 164 | { |
| 165 | if (isset($token[1]) && 'b"' !== $token) { |
| 166 | return $token[1]; |
| 167 | } |
| 168 | |
| 169 | return $token; |
| 170 | } |
| 171 | |
| 172 | /** |
| 173 | * Seeks to a non-whitespace token. |
| 174 | */ |
| 175 | private function seekToNextRelevantToken(\Iterator $tokenIterator) |
| 176 | { |
| 177 | for (; $tokenIterator->valid(); $tokenIterator->next()) { |
| 178 | $t = $tokenIterator->current(); |
| 179 | if (\T_WHITESPACE !== $t[0]) { |
| 180 | break; |
| 181 | } |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | private function skipMethodArgument(\Iterator $tokenIterator) |
| 186 | { |
| 187 | $openBraces = 0; |
| 188 | |
| 189 | for (; $tokenIterator->valid(); $tokenIterator->next()) { |
| 190 | $t = $tokenIterator->current(); |
| 191 | |
| 192 | if ('[' === $t[0] || '(' === $t[0]) { |
| 193 | ++$openBraces; |
| 194 | } |
| 195 | |
| 196 | if (']' === $t[0] || ')' === $t[0]) { |
| 197 | --$openBraces; |
| 198 | } |
| 199 | |
| 200 | if ((0 === $openBraces && ',' === $t[0]) || (-1 === $openBraces && ')' === $t[0])) { |
| 201 | break; |
| 202 | } |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | /** |
| 207 | * Extracts the message from the iterator while the tokens |
| 208 | * match allowed message tokens. |
| 209 | */ |
| 210 | private function getValue(\Iterator $tokenIterator) |
| 211 | { |
| 212 | $message = ''; |
| 213 | $docToken = ''; |
| 214 | $docPart = ''; |
| 215 | |
| 216 | for (; $tokenIterator->valid(); $tokenIterator->next()) { |
| 217 | $t = $tokenIterator->current(); |
| 218 | if ('.' === $t) { |
| 219 | // Concatenate with next token |
| 220 | continue; |
| 221 | } |
| 222 | if (!isset($t[1])) { |
| 223 | break; |
| 224 | } |
| 225 | |
| 226 | switch ($t[0]) { |
| 227 | case \T_START_HEREDOC: |
| 228 | $docToken = $t[1]; |
| 229 | break; |
| 230 | case \T_ENCAPSED_AND_WHITESPACE: |
| 231 | case \T_CONSTANT_ENCAPSED_STRING: |
| 232 | if ('' === $docToken) { |
| 233 | $message .= PhpStringTokenParser::parse($t[1]); |
| 234 | } else { |
| 235 | $docPart = $t[1]; |
| 236 | } |
| 237 | break; |
| 238 | case \T_END_HEREDOC: |
| 239 | if ($indentation = strspn($t[1], ' ')) { |
| 240 | $docPartWithLineBreaks = $docPart; |
| 241 | $docPart = ''; |
| 242 | |
| 243 | foreach (preg_split('~(\r\n|\n|\r)~', $docPartWithLineBreaks, -1, \PREG_SPLIT_DELIM_CAPTURE) as $str) { |
| 244 | if (\in_array($str, ["\r\n", "\n", "\r"], true)) { |
| 245 | $docPart .= $str; |
| 246 | } else { |
| 247 | $docPart .= substr($str, $indentation); |
| 248 | } |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | $message .= PhpStringTokenParser::parseDocString($docToken, $docPart); |
| 253 | $docToken = ''; |
| 254 | $docPart = ''; |
| 255 | break; |
| 256 | case \T_WHITESPACE: |
| 257 | break; |
| 258 | default: |
| 259 | break 2; |
| 260 | } |
| 261 | } |
| 262 | |
| 263 | return $message; |
| 264 | } |
| 265 | |
| 266 | /** |
| 267 | * Extracts trans message from PHP tokens. |
| 268 | */ |
| 269 | protected function parseTokens(array $tokens, MessageCatalogue $catalog, string $filename) |
| 270 | { |
| 271 | $tokenIterator = new \ArrayIterator($tokens); |
| 272 | |
| 273 | for ($key = 0; $key < $tokenIterator->count(); ++$key) { |
| 274 | foreach ($this->sequences as $sequence) { |
| 275 | $message = ''; |
| 276 | $domain = 'messages'; |
| 277 | $tokenIterator->seek($key); |
| 278 | |
| 279 | foreach ($sequence as $sequenceKey => $item) { |
| 280 | $this->seekToNextRelevantToken($tokenIterator); |
| 281 | |
| 282 | if ($this->normalizeToken($tokenIterator->current()) === $item) { |
| 283 | $tokenIterator->next(); |
| 284 | continue; |
| 285 | } elseif (self::MESSAGE_TOKEN === $item) { |
| 286 | $message = $this->getValue($tokenIterator); |
| 287 | |
| 288 | if (\count($sequence) === ($sequenceKey + 1)) { |
| 289 | break; |
| 290 | } |
| 291 | } elseif (self::METHOD_ARGUMENTS_TOKEN === $item) { |
| 292 | $this->skipMethodArgument($tokenIterator); |
| 293 | } elseif (self::DOMAIN_TOKEN === $item) { |
| 294 | $domainToken = $this->getValue($tokenIterator); |
| 295 | if ('' !== $domainToken) { |
| 296 | $domain = $domainToken; |
| 297 | } |
| 298 | |
| 299 | break; |
| 300 | } else { |
| 301 | break; |
| 302 | } |
| 303 | } |
| 304 | |
| 305 | if ($message) { |
| 306 | $catalog->set($message, $this->prefix.$message, $domain); |
| 307 | $metadata = $catalog->getMetadata($message, $domain) ?? []; |
| 308 | $normalizedFilename = preg_replace('{[\\\\/]+}', '/', $filename); |
| 309 | $metadata['sources'][] = $normalizedFilename.':'.$tokens[$key][2]; |
| 310 | $catalog->setMetadata($message, $metadata, $domain); |
| 311 | break; |
| 312 | } |
| 313 | } |
| 314 | } |
| 315 | } |
| 316 | |
| 317 | /** |
| 318 | * @return bool |
| 319 | * |
| 320 | * @throws \InvalidArgumentException |
| 321 | */ |
| 322 | protected function canBeExtracted(string $file) |
| 323 | { |
| 324 | return $this->isFile($file) && 'php' === pathinfo($file, \PATHINFO_EXTENSION); |
| 325 | } |
| 326 | |
| 327 | /** |
| 328 | * {@inheritdoc} |
| 329 | */ |
| 330 | protected function extractFromDirectory($directory) |
| 331 | { |
| 332 | $finder = new Finder(); |
| 333 | |
| 334 | return $finder->files()->name('*.php')->in($directory); |
| 335 | } |
| 336 | } |