blob: c5efb5f3b5b4b4df928b16a6be07d9db5a1c4a6e [file] [log] [blame]
Matthias Andreas Benkard7b2a3a12021-08-16 10:57:25 +02001<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\Translation\Extractor;
13
14use Symfony\Component\Finder\Finder;
15use Symfony\Component\Translation\MessageCatalogue;
16
17/**
18 * PhpExtractor extracts translation messages from a PHP template.
19 *
20 * @author Michel Salib <michelsalib@hotmail.com>
21 */
22class PhpExtractor extends AbstractFileExtractor implements ExtractorInterface
23{
24 public const MESSAGE_TOKEN = 300;
25 public const METHOD_ARGUMENTS_TOKEN = 1000;
26 public const DOMAIN_TOKEN = 1001;
27
28 /**
29 * Prefix for new found message.
30 *
31 * @var string
32 */
33 private $prefix = '';
34
35 /**
36 * The sequence that captures translation messages.
37 *
38 * @var array
39 */
40 protected $sequences = [
41 [
42 '->',
43 'trans',
44 '(',
45 self::MESSAGE_TOKEN,
46 ',',
47 self::METHOD_ARGUMENTS_TOKEN,
48 ',',
49 self::DOMAIN_TOKEN,
50 ],
51 [
52 '->',
53 'trans',
54 '(',
55 self::MESSAGE_TOKEN,
56 ],
57 [
58 'new',
59 'TranslatableMessage',
60 '(',
61 self::MESSAGE_TOKEN,
62 ',',
63 self::METHOD_ARGUMENTS_TOKEN,
64 ',',
65 self::DOMAIN_TOKEN,
66 ],
67 [
68 'new',
69 'TranslatableMessage',
70 '(',
71 self::MESSAGE_TOKEN,
72 ],
73 [
74 'new',
75 '\\',
76 'Symfony',
77 '\\',
78 'Component',
79 '\\',
80 'Translation',
81 '\\',
82 'TranslatableMessage',
83 '(',
84 self::MESSAGE_TOKEN,
85 ',',
86 self::METHOD_ARGUMENTS_TOKEN,
87 ',',
88 self::DOMAIN_TOKEN,
89 ],
90 [
91 'new',
92 '\Symfony\Component\Translation\TranslatableMessage',
93 '(',
94 self::MESSAGE_TOKEN,
95 ',',
96 self::METHOD_ARGUMENTS_TOKEN,
97 ',',
98 self::DOMAIN_TOKEN,
99 ],
100 [
101 'new',
102 '\\',
103 'Symfony',
104 '\\',
105 'Component',
106 '\\',
107 'Translation',
108 '\\',
109 'TranslatableMessage',
110 '(',
111 self::MESSAGE_TOKEN,
112 ],
113 [
114 'new',
115 '\Symfony\Component\Translation\TranslatableMessage',
116 '(',
117 self::MESSAGE_TOKEN,
118 ],
119 [
120 't',
121 '(',
122 self::MESSAGE_TOKEN,
123 ',',
124 self::METHOD_ARGUMENTS_TOKEN,
125 ',',
126 self::DOMAIN_TOKEN,
127 ],
128 [
129 't',
130 '(',
131 self::MESSAGE_TOKEN,
132 ],
133 ];
134
135 /**
136 * {@inheritdoc}
137 */
138 public function extract($resource, MessageCatalogue $catalog)
139 {
140 $files = $this->extractFiles($resource);
141 foreach ($files as $file) {
142 $this->parseTokens(token_get_all(file_get_contents($file)), $catalog, $file);
143
144 gc_mem_caches();
145 }
146 }
147
148 /**
149 * {@inheritdoc}
150 */
151 public function setPrefix(string $prefix)
152 {
153 $this->prefix = $prefix;
154 }
155
156 /**
157 * Normalizes a token.
158 *
159 * @param mixed $token
160 *
161 * @return string|null
162 */
163 protected function normalizeToken($token)
164 {
165 if (isset($token[1]) && 'b"' !== $token) {
166 return $token[1];
167 }
168
169 return $token;
170 }
171
172 /**
173 * Seeks to a non-whitespace token.
174 */
175 private function seekToNextRelevantToken(\Iterator $tokenIterator)
176 {
177 for (; $tokenIterator->valid(); $tokenIterator->next()) {
178 $t = $tokenIterator->current();
179 if (\T_WHITESPACE !== $t[0]) {
180 break;
181 }
182 }
183 }
184
185 private function skipMethodArgument(\Iterator $tokenIterator)
186 {
187 $openBraces = 0;
188
189 for (; $tokenIterator->valid(); $tokenIterator->next()) {
190 $t = $tokenIterator->current();
191
192 if ('[' === $t[0] || '(' === $t[0]) {
193 ++$openBraces;
194 }
195
196 if (']' === $t[0] || ')' === $t[0]) {
197 --$openBraces;
198 }
199
200 if ((0 === $openBraces && ',' === $t[0]) || (-1 === $openBraces && ')' === $t[0])) {
201 break;
202 }
203 }
204 }
205
206 /**
207 * Extracts the message from the iterator while the tokens
208 * match allowed message tokens.
209 */
210 private function getValue(\Iterator $tokenIterator)
211 {
212 $message = '';
213 $docToken = '';
214 $docPart = '';
215
216 for (; $tokenIterator->valid(); $tokenIterator->next()) {
217 $t = $tokenIterator->current();
218 if ('.' === $t) {
219 // Concatenate with next token
220 continue;
221 }
222 if (!isset($t[1])) {
223 break;
224 }
225
226 switch ($t[0]) {
227 case \T_START_HEREDOC:
228 $docToken = $t[1];
229 break;
230 case \T_ENCAPSED_AND_WHITESPACE:
231 case \T_CONSTANT_ENCAPSED_STRING:
232 if ('' === $docToken) {
233 $message .= PhpStringTokenParser::parse($t[1]);
234 } else {
235 $docPart = $t[1];
236 }
237 break;
238 case \T_END_HEREDOC:
239 if ($indentation = strspn($t[1], ' ')) {
240 $docPartWithLineBreaks = $docPart;
241 $docPart = '';
242
243 foreach (preg_split('~(\r\n|\n|\r)~', $docPartWithLineBreaks, -1, \PREG_SPLIT_DELIM_CAPTURE) as $str) {
244 if (\in_array($str, ["\r\n", "\n", "\r"], true)) {
245 $docPart .= $str;
246 } else {
247 $docPart .= substr($str, $indentation);
248 }
249 }
250 }
251
252 $message .= PhpStringTokenParser::parseDocString($docToken, $docPart);
253 $docToken = '';
254 $docPart = '';
255 break;
256 case \T_WHITESPACE:
257 break;
258 default:
259 break 2;
260 }
261 }
262
263 return $message;
264 }
265
266 /**
267 * Extracts trans message from PHP tokens.
268 */
269 protected function parseTokens(array $tokens, MessageCatalogue $catalog, string $filename)
270 {
271 $tokenIterator = new \ArrayIterator($tokens);
272
273 for ($key = 0; $key < $tokenIterator->count(); ++$key) {
274 foreach ($this->sequences as $sequence) {
275 $message = '';
276 $domain = 'messages';
277 $tokenIterator->seek($key);
278
279 foreach ($sequence as $sequenceKey => $item) {
280 $this->seekToNextRelevantToken($tokenIterator);
281
282 if ($this->normalizeToken($tokenIterator->current()) === $item) {
283 $tokenIterator->next();
284 continue;
285 } elseif (self::MESSAGE_TOKEN === $item) {
286 $message = $this->getValue($tokenIterator);
287
288 if (\count($sequence) === ($sequenceKey + 1)) {
289 break;
290 }
291 } elseif (self::METHOD_ARGUMENTS_TOKEN === $item) {
292 $this->skipMethodArgument($tokenIterator);
293 } elseif (self::DOMAIN_TOKEN === $item) {
294 $domainToken = $this->getValue($tokenIterator);
295 if ('' !== $domainToken) {
296 $domain = $domainToken;
297 }
298
299 break;
300 } else {
301 break;
302 }
303 }
304
305 if ($message) {
306 $catalog->set($message, $this->prefix.$message, $domain);
307 $metadata = $catalog->getMetadata($message, $domain) ?? [];
308 $normalizedFilename = preg_replace('{[\\\\/]+}', '/', $filename);
309 $metadata['sources'][] = $normalizedFilename.':'.$tokens[$key][2];
310 $catalog->setMetadata($message, $metadata, $domain);
311 break;
312 }
313 }
314 }
315 }
316
317 /**
318 * @return bool
319 *
320 * @throws \InvalidArgumentException
321 */
322 protected function canBeExtracted(string $file)
323 {
324 return $this->isFile($file) && 'php' === pathinfo($file, \PATHINFO_EXTENSION);
325 }
326
327 /**
328 * {@inheritdoc}
329 */
330 protected function extractFromDirectory($directory)
331 {
332 $finder = new Finder();
333
334 return $finder->files()->name('*.php')->in($directory);
335 }
336}