blob: 4d8dcf40ed6a2818b0110f6efcdc912d2035f1aa [file] [log] [blame]
Matthias Andreas Benkardb382b102021-01-02 15:32:21 +01001<?php
2/**
3 * Abstract minifier class
4 *
5 * Please report bugs on https://github.com/matthiasmullie/minify/issues
6 *
7 * @author Matthias Mullie <minify@mullie.eu>
8 * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved
9 * @license MIT License
10 */
11namespace MatthiasMullie\Minify;
12
13use MatthiasMullie\Minify\Exceptions\IOException;
14use Psr\Cache\CacheItemInterface;
15
16/**
17 * Abstract minifier class.
18 *
19 * Please report bugs on https://github.com/matthiasmullie/minify/issues
20 *
21 * @package Minify
22 * @author Matthias Mullie <minify@mullie.eu>
23 * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved
24 * @license MIT License
25 */
26abstract class Minify
27{
28 /**
29 * The data to be minified.
30 *
31 * @var string[]
32 */
33 protected $data = array();
34
35 /**
36 * Array of patterns to match.
37 *
38 * @var string[]
39 */
40 protected $patterns = array();
41
42 /**
43 * This array will hold content of strings and regular expressions that have
44 * been extracted from the JS source code, so we can reliably match "code",
45 * without having to worry about potential "code-like" characters inside.
46 *
47 * @var string[]
48 */
49 public $extracted = array();
50
51 /**
52 * Init the minify class - optionally, code may be passed along already.
53 */
54 public function __construct(/* $data = null, ... */)
55 {
56 // it's possible to add the source through the constructor as well ;)
57 if (func_num_args()) {
58 call_user_func_array(array($this, 'add'), func_get_args());
59 }
60 }
61
62 /**
63 * Add a file or straight-up code to be minified.
64 *
65 * @param string|string[] $data
66 *
67 * @return static
68 */
69 public function add($data /* $data = null, ... */)
70 {
71 // bogus "usage" of parameter $data: scrutinizer warns this variable is
72 // not used (we're using func_get_args instead to support overloading),
73 // but it still needs to be defined because it makes no sense to have
74 // this function without argument :)
75 $args = array($data) + func_get_args();
76
77 // this method can be overloaded
78 foreach ($args as $data) {
79 if (is_array($data)) {
80 call_user_func_array(array($this, 'add'), $data);
81 continue;
82 }
83
84 // redefine var
85 $data = (string) $data;
86
87 // load data
88 $value = $this->load($data);
89 $key = ($data != $value) ? $data : count($this->data);
90
91 // replace CR linefeeds etc.
92 // @see https://github.com/matthiasmullie/minify/pull/139
93 $value = str_replace(array("\r\n", "\r"), "\n", $value);
94
95 // store data
96 $this->data[$key] = $value;
97 }
98
99 return $this;
100 }
101
102 /**
103 * Add a file to be minified.
104 *
105 * @param string|string[] $data
106 *
107 * @return static
Matthias Andreas Benkard7b2a3a12021-08-16 10:57:25 +0200108 *
Matthias Andreas Benkardb382b102021-01-02 15:32:21 +0100109 * @throws IOException
110 */
111 public function addFile($data /* $data = null, ... */)
112 {
113 // bogus "usage" of parameter $data: scrutinizer warns this variable is
114 // not used (we're using func_get_args instead to support overloading),
115 // but it still needs to be defined because it makes no sense to have
116 // this function without argument :)
117 $args = array($data) + func_get_args();
118
119 // this method can be overloaded
120 foreach ($args as $path) {
121 if (is_array($path)) {
122 call_user_func_array(array($this, 'addFile'), $path);
123 continue;
124 }
125
126 // redefine var
127 $path = (string) $path;
128
129 // check if we can read the file
130 if (!$this->canImportFile($path)) {
131 throw new IOException('The file "'.$path.'" could not be opened for reading. Check if PHP has enough permissions.');
132 }
133
134 $this->add($path);
135 }
136
137 return $this;
138 }
139
140 /**
141 * Minify the data & (optionally) saves it to a file.
142 *
143 * @param string[optional] $path Path to write the data to
144 *
145 * @return string The minified data
146 */
147 public function minify($path = null)
148 {
149 $content = $this->execute($path);
150
151 // save to path
152 if ($path !== null) {
153 $this->save($content, $path);
154 }
155
156 return $content;
157 }
158
159 /**
160 * Minify & gzip the data & (optionally) saves it to a file.
161 *
162 * @param string[optional] $path Path to write the data to
163 * @param int[optional] $level Compression level, from 0 to 9
164 *
165 * @return string The minified & gzipped data
166 */
167 public function gzip($path = null, $level = 9)
168 {
169 $content = $this->execute($path);
170 $content = gzencode($content, $level, FORCE_GZIP);
171
172 // save to path
173 if ($path !== null) {
174 $this->save($content, $path);
175 }
176
177 return $content;
178 }
179
180 /**
181 * Minify the data & write it to a CacheItemInterface object.
182 *
183 * @param CacheItemInterface $item Cache item to write the data to
184 *
185 * @return CacheItemInterface Cache item with the minifier data
186 */
187 public function cache(CacheItemInterface $item)
188 {
189 $content = $this->execute();
190 $item->set($content);
191
192 return $item;
193 }
194
195 /**
196 * Minify the data.
197 *
198 * @param string[optional] $path Path to write the data to
199 *
200 * @return string The minified data
201 */
202 abstract public function execute($path = null);
203
204 /**
205 * Load data.
206 *
207 * @param string $data Either a path to a file or the content itself
208 *
209 * @return string
210 */
211 protected function load($data)
212 {
213 // check if the data is a file
214 if ($this->canImportFile($data)) {
215 $data = file_get_contents($data);
216
217 // strip BOM, if any
218 if (substr($data, 0, 3) == "\xef\xbb\xbf") {
219 $data = substr($data, 3);
220 }
221 }
222
223 return $data;
224 }
225
226 /**
227 * Save to file.
228 *
229 * @param string $content The minified data
230 * @param string $path The path to save the minified data to
231 *
232 * @throws IOException
233 */
234 protected function save($content, $path)
235 {
236 $handler = $this->openFileForWriting($path);
237
238 $this->writeToFile($handler, $content);
239
240 @fclose($handler);
241 }
242
243 /**
244 * Register a pattern to execute against the source content.
245 *
246 * @param string $pattern PCRE pattern
247 * @param string|callable $replacement Replacement value for matched pattern
248 */
249 protected function registerPattern($pattern, $replacement = '')
250 {
251 // study the pattern, we'll execute it more than once
252 $pattern .= 'S';
253
254 $this->patterns[] = array($pattern, $replacement);
255 }
256
257 /**
258 * We can't "just" run some regular expressions against JavaScript: it's a
259 * complex language. E.g. having an occurrence of // xyz would be a comment,
260 * unless it's used within a string. Of you could have something that looks
261 * like a 'string', but inside a comment.
262 * The only way to accurately replace these pieces is to traverse the JS one
263 * character at a time and try to find whatever starts first.
264 *
265 * @param string $content The content to replace patterns in
266 *
267 * @return string The (manipulated) content
268 */
269 protected function replace($content)
270 {
271 $processed = '';
272 $positions = array_fill(0, count($this->patterns), -1);
273 $matches = array();
274
275 while ($content) {
276 // find first match for all patterns
277 foreach ($this->patterns as $i => $pattern) {
278 list($pattern, $replacement) = $pattern;
279
280 // we can safely ignore patterns for positions we've unset earlier,
281 // because we know these won't show up anymore
282 if (array_key_exists($i, $positions) == false) {
283 continue;
284 }
285
286 // no need to re-run matches that are still in the part of the
287 // content that hasn't been processed
288 if ($positions[$i] >= 0) {
289 continue;
290 }
291
292 $match = null;
293 if (preg_match($pattern, $content, $match, PREG_OFFSET_CAPTURE)) {
294 $matches[$i] = $match;
295
296 // we'll store the match position as well; that way, we
297 // don't have to redo all preg_matches after changing only
298 // the first (we'll still know where those others are)
299 $positions[$i] = $match[0][1];
300 } else {
301 // if the pattern couldn't be matched, there's no point in
302 // executing it again in later runs on this same content;
303 // ignore this one until we reach end of content
304 unset($matches[$i], $positions[$i]);
305 }
306 }
307
308 // no more matches to find: everything's been processed, break out
309 if (!$matches) {
310 $processed .= $content;
311 break;
312 }
313
314 // see which of the patterns actually found the first thing (we'll
315 // only want to execute that one, since we're unsure if what the
316 // other found was not inside what the first found)
317 $discardLength = min($positions);
318 $firstPattern = array_search($discardLength, $positions);
319 $match = $matches[$firstPattern][0][0];
320
321 // execute the pattern that matches earliest in the content string
322 list($pattern, $replacement) = $this->patterns[$firstPattern];
323 $replacement = $this->replacePattern($pattern, $replacement, $content);
324
325 // figure out which part of the string was unmatched; that's the
326 // part we'll execute the patterns on again next
327 $content = (string) substr($content, $discardLength);
328 $unmatched = (string) substr($content, strpos($content, $match) + strlen($match));
329
330 // move the replaced part to $processed and prepare $content to
331 // again match batch of patterns against
332 $processed .= substr($replacement, 0, strlen($replacement) - strlen($unmatched));
333 $content = $unmatched;
334
335 // first match has been replaced & that content is to be left alone,
336 // the next matches will start after this replacement, so we should
337 // fix their offsets
338 foreach ($positions as $i => $position) {
339 $positions[$i] -= $discardLength + strlen($match);
340 }
341 }
342
343 return $processed;
344 }
345
346 /**
347 * This is where a pattern is matched against $content and the matches
348 * are replaced by their respective value.
349 * This function will be called plenty of times, where $content will always
350 * move up 1 character.
351 *
352 * @param string $pattern Pattern to match
353 * @param string|callable $replacement Replacement value
354 * @param string $content Content to match pattern against
355 *
356 * @return string
357 */
358 protected function replacePattern($pattern, $replacement, $content)
359 {
360 if (is_callable($replacement)) {
361 return preg_replace_callback($pattern, $replacement, $content, 1, $count);
362 } else {
363 return preg_replace($pattern, $replacement, $content, 1, $count);
364 }
365 }
366
367 /**
368 * Strings are a pattern we need to match, in order to ignore potential
369 * code-like content inside them, but we just want all of the string
370 * content to remain untouched.
371 *
372 * This method will replace all string content with simple STRING#
373 * placeholder text, so we've rid all strings from characters that may be
374 * misinterpreted. Original string content will be saved in $this->extracted
375 * and after doing all other minifying, we can restore the original content
376 * via restoreStrings().
377 *
378 * @param string[optional] $chars
379 * @param string[optional] $placeholderPrefix
380 */
381 protected function extractStrings($chars = '\'"', $placeholderPrefix = '')
382 {
383 // PHP only supports $this inside anonymous functions since 5.4
384 $minifier = $this;
385 $callback = function ($match) use ($minifier, $placeholderPrefix) {
386 // check the second index here, because the first always contains a quote
387 if ($match[2] === '') {
388 /*
389 * Empty strings need no placeholder; they can't be confused for
390 * anything else anyway.
391 * But we still needed to match them, for the extraction routine
392 * to skip over this particular string.
393 */
394 return $match[0];
395 }
396
397 $count = count($minifier->extracted);
398 $placeholder = $match[1].$placeholderPrefix.$count.$match[1];
399 $minifier->extracted[$placeholder] = $match[1].$match[2].$match[1];
400
401 return $placeholder;
402 };
403
404 /*
405 * The \\ messiness explained:
406 * * Don't count ' or " as end-of-string if it's escaped (has backslash
407 * in front of it)
408 * * Unless... that backslash itself is escaped (another leading slash),
409 * in which case it's no longer escaping the ' or "
410 * * So there can be either no backslash, or an even number
411 * * multiply all of that times 4, to account for the escaping that has
412 * to be done to pass the backslash into the PHP string without it being
413 * considered as escape-char (times 2) and to get it in the regex,
414 * escaped (times 2)
415 */
416 $this->registerPattern('/(['.$chars.'])(.*?(?<!\\\\)(\\\\\\\\)*+)\\1/s', $callback);
417 }
418
419 /**
420 * This method will restore all extracted data (strings, regexes) that were
421 * replaced with placeholder text in extract*(). The original content was
422 * saved in $this->extracted.
423 *
424 * @param string $content
425 *
426 * @return string
427 */
428 protected function restoreExtractedData($content)
429 {
430 if (!$this->extracted) {
431 // nothing was extracted, nothing to restore
432 return $content;
433 }
434
435 $content = strtr($content, $this->extracted);
436
437 $this->extracted = array();
438
439 return $content;
440 }
441
442 /**
443 * Check if the path is a regular file and can be read.
444 *
445 * @param string $path
446 *
447 * @return bool
448 */
449 protected function canImportFile($path)
450 {
451 $parsed = parse_url($path);
452 if (
453 // file is elsewhere
454 isset($parsed['host']) ||
455 // file responds to queries (may change, or need to bypass cache)
456 isset($parsed['query'])
457 ) {
458 return false;
459 }
460
461 return strlen($path) < PHP_MAXPATHLEN && @is_file($path) && is_readable($path);
462 }
463
464 /**
465 * Attempts to open file specified by $path for writing.
466 *
467 * @param string $path The path to the file
468 *
469 * @return resource Specifier for the target file
470 *
471 * @throws IOException
472 */
473 protected function openFileForWriting($path)
474 {
Matthias Andreas Benkard7b2a3a12021-08-16 10:57:25 +0200475 if ($path === '' || ($handler = @fopen($path, 'w')) === false) {
Matthias Andreas Benkardb382b102021-01-02 15:32:21 +0100476 throw new IOException('The file "'.$path.'" could not be opened for writing. Check if PHP has enough permissions.');
477 }
478
479 return $handler;
480 }
481
482 /**
483 * Attempts to write $content to the file specified by $handler. $path is used for printing exceptions.
484 *
485 * @param resource $handler The resource to write to
486 * @param string $content The content to write
487 * @param string $path The path to the file (for exception printing only)
488 *
489 * @throws IOException
490 */
491 protected function writeToFile($handler, $content, $path = '')
492 {
Matthias Andreas Benkard7b2a3a12021-08-16 10:57:25 +0200493 if (
494 !is_resource($handler) ||
495 ($result = @fwrite($handler, $content)) === false ||
496 ($result < strlen($content))
497 ) {
Matthias Andreas Benkardb382b102021-01-02 15:32:21 +0100498 throw new IOException('The file "'.$path.'" could not be written to. Check your disk space and file permissions.');
499 }
500 }
501}