Matthias Andreas Benkard | b382b10 | 2021-01-02 15:32:21 +0100 | [diff] [blame] | 1 | <?php |
| 2 | /** |
| 3 | * Abstract minifier class |
| 4 | * |
| 5 | * Please report bugs on https://github.com/matthiasmullie/minify/issues |
| 6 | * |
| 7 | * @author Matthias Mullie <minify@mullie.eu> |
| 8 | * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved |
| 9 | * @license MIT License |
| 10 | */ |
| 11 | namespace MatthiasMullie\Minify; |
| 12 | |
| 13 | use MatthiasMullie\Minify\Exceptions\IOException; |
| 14 | use Psr\Cache\CacheItemInterface; |
| 15 | |
| 16 | /** |
| 17 | * Abstract minifier class. |
| 18 | * |
| 19 | * Please report bugs on https://github.com/matthiasmullie/minify/issues |
| 20 | * |
| 21 | * @package Minify |
| 22 | * @author Matthias Mullie <minify@mullie.eu> |
| 23 | * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved |
| 24 | * @license MIT License |
| 25 | */ |
| 26 | abstract class Minify |
| 27 | { |
| 28 | /** |
| 29 | * The data to be minified. |
| 30 | * |
| 31 | * @var string[] |
| 32 | */ |
| 33 | protected $data = array(); |
| 34 | |
| 35 | /** |
| 36 | * Array of patterns to match. |
| 37 | * |
| 38 | * @var string[] |
| 39 | */ |
| 40 | protected $patterns = array(); |
| 41 | |
| 42 | /** |
| 43 | * This array will hold content of strings and regular expressions that have |
| 44 | * been extracted from the JS source code, so we can reliably match "code", |
| 45 | * without having to worry about potential "code-like" characters inside. |
| 46 | * |
| 47 | * @var string[] |
| 48 | */ |
| 49 | public $extracted = array(); |
| 50 | |
| 51 | /** |
| 52 | * Init the minify class - optionally, code may be passed along already. |
| 53 | */ |
| 54 | public function __construct(/* $data = null, ... */) |
| 55 | { |
| 56 | // it's possible to add the source through the constructor as well ;) |
| 57 | if (func_num_args()) { |
| 58 | call_user_func_array(array($this, 'add'), func_get_args()); |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | /** |
| 63 | * Add a file or straight-up code to be minified. |
| 64 | * |
| 65 | * @param string|string[] $data |
| 66 | * |
| 67 | * @return static |
| 68 | */ |
| 69 | public function add($data /* $data = null, ... */) |
| 70 | { |
| 71 | // bogus "usage" of parameter $data: scrutinizer warns this variable is |
| 72 | // not used (we're using func_get_args instead to support overloading), |
| 73 | // but it still needs to be defined because it makes no sense to have |
| 74 | // this function without argument :) |
| 75 | $args = array($data) + func_get_args(); |
| 76 | |
| 77 | // this method can be overloaded |
| 78 | foreach ($args as $data) { |
| 79 | if (is_array($data)) { |
| 80 | call_user_func_array(array($this, 'add'), $data); |
| 81 | continue; |
| 82 | } |
| 83 | |
| 84 | // redefine var |
| 85 | $data = (string) $data; |
| 86 | |
| 87 | // load data |
| 88 | $value = $this->load($data); |
| 89 | $key = ($data != $value) ? $data : count($this->data); |
| 90 | |
| 91 | // replace CR linefeeds etc. |
| 92 | // @see https://github.com/matthiasmullie/minify/pull/139 |
| 93 | $value = str_replace(array("\r\n", "\r"), "\n", $value); |
| 94 | |
| 95 | // store data |
| 96 | $this->data[$key] = $value; |
| 97 | } |
| 98 | |
| 99 | return $this; |
| 100 | } |
| 101 | |
| 102 | /** |
| 103 | * Add a file to be minified. |
| 104 | * |
| 105 | * @param string|string[] $data |
| 106 | * |
| 107 | * @return static |
Matthias Andreas Benkard | 7b2a3a1 | 2021-08-16 10:57:25 +0200 | [diff] [blame^] | 108 | * |
Matthias Andreas Benkard | b382b10 | 2021-01-02 15:32:21 +0100 | [diff] [blame] | 109 | * @throws IOException |
| 110 | */ |
| 111 | public function addFile($data /* $data = null, ... */) |
| 112 | { |
| 113 | // bogus "usage" of parameter $data: scrutinizer warns this variable is |
| 114 | // not used (we're using func_get_args instead to support overloading), |
| 115 | // but it still needs to be defined because it makes no sense to have |
| 116 | // this function without argument :) |
| 117 | $args = array($data) + func_get_args(); |
| 118 | |
| 119 | // this method can be overloaded |
| 120 | foreach ($args as $path) { |
| 121 | if (is_array($path)) { |
| 122 | call_user_func_array(array($this, 'addFile'), $path); |
| 123 | continue; |
| 124 | } |
| 125 | |
| 126 | // redefine var |
| 127 | $path = (string) $path; |
| 128 | |
| 129 | // check if we can read the file |
| 130 | if (!$this->canImportFile($path)) { |
| 131 | throw new IOException('The file "'.$path.'" could not be opened for reading. Check if PHP has enough permissions.'); |
| 132 | } |
| 133 | |
| 134 | $this->add($path); |
| 135 | } |
| 136 | |
| 137 | return $this; |
| 138 | } |
| 139 | |
| 140 | /** |
| 141 | * Minify the data & (optionally) saves it to a file. |
| 142 | * |
| 143 | * @param string[optional] $path Path to write the data to |
| 144 | * |
| 145 | * @return string The minified data |
| 146 | */ |
| 147 | public function minify($path = null) |
| 148 | { |
| 149 | $content = $this->execute($path); |
| 150 | |
| 151 | // save to path |
| 152 | if ($path !== null) { |
| 153 | $this->save($content, $path); |
| 154 | } |
| 155 | |
| 156 | return $content; |
| 157 | } |
| 158 | |
| 159 | /** |
| 160 | * Minify & gzip the data & (optionally) saves it to a file. |
| 161 | * |
| 162 | * @param string[optional] $path Path to write the data to |
| 163 | * @param int[optional] $level Compression level, from 0 to 9 |
| 164 | * |
| 165 | * @return string The minified & gzipped data |
| 166 | */ |
| 167 | public function gzip($path = null, $level = 9) |
| 168 | { |
| 169 | $content = $this->execute($path); |
| 170 | $content = gzencode($content, $level, FORCE_GZIP); |
| 171 | |
| 172 | // save to path |
| 173 | if ($path !== null) { |
| 174 | $this->save($content, $path); |
| 175 | } |
| 176 | |
| 177 | return $content; |
| 178 | } |
| 179 | |
| 180 | /** |
| 181 | * Minify the data & write it to a CacheItemInterface object. |
| 182 | * |
| 183 | * @param CacheItemInterface $item Cache item to write the data to |
| 184 | * |
| 185 | * @return CacheItemInterface Cache item with the minifier data |
| 186 | */ |
| 187 | public function cache(CacheItemInterface $item) |
| 188 | { |
| 189 | $content = $this->execute(); |
| 190 | $item->set($content); |
| 191 | |
| 192 | return $item; |
| 193 | } |
| 194 | |
| 195 | /** |
| 196 | * Minify the data. |
| 197 | * |
| 198 | * @param string[optional] $path Path to write the data to |
| 199 | * |
| 200 | * @return string The minified data |
| 201 | */ |
| 202 | abstract public function execute($path = null); |
| 203 | |
| 204 | /** |
| 205 | * Load data. |
| 206 | * |
| 207 | * @param string $data Either a path to a file or the content itself |
| 208 | * |
| 209 | * @return string |
| 210 | */ |
| 211 | protected function load($data) |
| 212 | { |
| 213 | // check if the data is a file |
| 214 | if ($this->canImportFile($data)) { |
| 215 | $data = file_get_contents($data); |
| 216 | |
| 217 | // strip BOM, if any |
| 218 | if (substr($data, 0, 3) == "\xef\xbb\xbf") { |
| 219 | $data = substr($data, 3); |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | return $data; |
| 224 | } |
| 225 | |
| 226 | /** |
| 227 | * Save to file. |
| 228 | * |
| 229 | * @param string $content The minified data |
| 230 | * @param string $path The path to save the minified data to |
| 231 | * |
| 232 | * @throws IOException |
| 233 | */ |
| 234 | protected function save($content, $path) |
| 235 | { |
| 236 | $handler = $this->openFileForWriting($path); |
| 237 | |
| 238 | $this->writeToFile($handler, $content); |
| 239 | |
| 240 | @fclose($handler); |
| 241 | } |
| 242 | |
| 243 | /** |
| 244 | * Register a pattern to execute against the source content. |
| 245 | * |
| 246 | * @param string $pattern PCRE pattern |
| 247 | * @param string|callable $replacement Replacement value for matched pattern |
| 248 | */ |
| 249 | protected function registerPattern($pattern, $replacement = '') |
| 250 | { |
| 251 | // study the pattern, we'll execute it more than once |
| 252 | $pattern .= 'S'; |
| 253 | |
| 254 | $this->patterns[] = array($pattern, $replacement); |
| 255 | } |
| 256 | |
| 257 | /** |
| 258 | * We can't "just" run some regular expressions against JavaScript: it's a |
| 259 | * complex language. E.g. having an occurrence of // xyz would be a comment, |
| 260 | * unless it's used within a string. Of you could have something that looks |
| 261 | * like a 'string', but inside a comment. |
| 262 | * The only way to accurately replace these pieces is to traverse the JS one |
| 263 | * character at a time and try to find whatever starts first. |
| 264 | * |
| 265 | * @param string $content The content to replace patterns in |
| 266 | * |
| 267 | * @return string The (manipulated) content |
| 268 | */ |
| 269 | protected function replace($content) |
| 270 | { |
| 271 | $processed = ''; |
| 272 | $positions = array_fill(0, count($this->patterns), -1); |
| 273 | $matches = array(); |
| 274 | |
| 275 | while ($content) { |
| 276 | // find first match for all patterns |
| 277 | foreach ($this->patterns as $i => $pattern) { |
| 278 | list($pattern, $replacement) = $pattern; |
| 279 | |
| 280 | // we can safely ignore patterns for positions we've unset earlier, |
| 281 | // because we know these won't show up anymore |
| 282 | if (array_key_exists($i, $positions) == false) { |
| 283 | continue; |
| 284 | } |
| 285 | |
| 286 | // no need to re-run matches that are still in the part of the |
| 287 | // content that hasn't been processed |
| 288 | if ($positions[$i] >= 0) { |
| 289 | continue; |
| 290 | } |
| 291 | |
| 292 | $match = null; |
| 293 | if (preg_match($pattern, $content, $match, PREG_OFFSET_CAPTURE)) { |
| 294 | $matches[$i] = $match; |
| 295 | |
| 296 | // we'll store the match position as well; that way, we |
| 297 | // don't have to redo all preg_matches after changing only |
| 298 | // the first (we'll still know where those others are) |
| 299 | $positions[$i] = $match[0][1]; |
| 300 | } else { |
| 301 | // if the pattern couldn't be matched, there's no point in |
| 302 | // executing it again in later runs on this same content; |
| 303 | // ignore this one until we reach end of content |
| 304 | unset($matches[$i], $positions[$i]); |
| 305 | } |
| 306 | } |
| 307 | |
| 308 | // no more matches to find: everything's been processed, break out |
| 309 | if (!$matches) { |
| 310 | $processed .= $content; |
| 311 | break; |
| 312 | } |
| 313 | |
| 314 | // see which of the patterns actually found the first thing (we'll |
| 315 | // only want to execute that one, since we're unsure if what the |
| 316 | // other found was not inside what the first found) |
| 317 | $discardLength = min($positions); |
| 318 | $firstPattern = array_search($discardLength, $positions); |
| 319 | $match = $matches[$firstPattern][0][0]; |
| 320 | |
| 321 | // execute the pattern that matches earliest in the content string |
| 322 | list($pattern, $replacement) = $this->patterns[$firstPattern]; |
| 323 | $replacement = $this->replacePattern($pattern, $replacement, $content); |
| 324 | |
| 325 | // figure out which part of the string was unmatched; that's the |
| 326 | // part we'll execute the patterns on again next |
| 327 | $content = (string) substr($content, $discardLength); |
| 328 | $unmatched = (string) substr($content, strpos($content, $match) + strlen($match)); |
| 329 | |
| 330 | // move the replaced part to $processed and prepare $content to |
| 331 | // again match batch of patterns against |
| 332 | $processed .= substr($replacement, 0, strlen($replacement) - strlen($unmatched)); |
| 333 | $content = $unmatched; |
| 334 | |
| 335 | // first match has been replaced & that content is to be left alone, |
| 336 | // the next matches will start after this replacement, so we should |
| 337 | // fix their offsets |
| 338 | foreach ($positions as $i => $position) { |
| 339 | $positions[$i] -= $discardLength + strlen($match); |
| 340 | } |
| 341 | } |
| 342 | |
| 343 | return $processed; |
| 344 | } |
| 345 | |
| 346 | /** |
| 347 | * This is where a pattern is matched against $content and the matches |
| 348 | * are replaced by their respective value. |
| 349 | * This function will be called plenty of times, where $content will always |
| 350 | * move up 1 character. |
| 351 | * |
| 352 | * @param string $pattern Pattern to match |
| 353 | * @param string|callable $replacement Replacement value |
| 354 | * @param string $content Content to match pattern against |
| 355 | * |
| 356 | * @return string |
| 357 | */ |
| 358 | protected function replacePattern($pattern, $replacement, $content) |
| 359 | { |
| 360 | if (is_callable($replacement)) { |
| 361 | return preg_replace_callback($pattern, $replacement, $content, 1, $count); |
| 362 | } else { |
| 363 | return preg_replace($pattern, $replacement, $content, 1, $count); |
| 364 | } |
| 365 | } |
| 366 | |
| 367 | /** |
| 368 | * Strings are a pattern we need to match, in order to ignore potential |
| 369 | * code-like content inside them, but we just want all of the string |
| 370 | * content to remain untouched. |
| 371 | * |
| 372 | * This method will replace all string content with simple STRING# |
| 373 | * placeholder text, so we've rid all strings from characters that may be |
| 374 | * misinterpreted. Original string content will be saved in $this->extracted |
| 375 | * and after doing all other minifying, we can restore the original content |
| 376 | * via restoreStrings(). |
| 377 | * |
| 378 | * @param string[optional] $chars |
| 379 | * @param string[optional] $placeholderPrefix |
| 380 | */ |
| 381 | protected function extractStrings($chars = '\'"', $placeholderPrefix = '') |
| 382 | { |
| 383 | // PHP only supports $this inside anonymous functions since 5.4 |
| 384 | $minifier = $this; |
| 385 | $callback = function ($match) use ($minifier, $placeholderPrefix) { |
| 386 | // check the second index here, because the first always contains a quote |
| 387 | if ($match[2] === '') { |
| 388 | /* |
| 389 | * Empty strings need no placeholder; they can't be confused for |
| 390 | * anything else anyway. |
| 391 | * But we still needed to match them, for the extraction routine |
| 392 | * to skip over this particular string. |
| 393 | */ |
| 394 | return $match[0]; |
| 395 | } |
| 396 | |
| 397 | $count = count($minifier->extracted); |
| 398 | $placeholder = $match[1].$placeholderPrefix.$count.$match[1]; |
| 399 | $minifier->extracted[$placeholder] = $match[1].$match[2].$match[1]; |
| 400 | |
| 401 | return $placeholder; |
| 402 | }; |
| 403 | |
| 404 | /* |
| 405 | * The \\ messiness explained: |
| 406 | * * Don't count ' or " as end-of-string if it's escaped (has backslash |
| 407 | * in front of it) |
| 408 | * * Unless... that backslash itself is escaped (another leading slash), |
| 409 | * in which case it's no longer escaping the ' or " |
| 410 | * * So there can be either no backslash, or an even number |
| 411 | * * multiply all of that times 4, to account for the escaping that has |
| 412 | * to be done to pass the backslash into the PHP string without it being |
| 413 | * considered as escape-char (times 2) and to get it in the regex, |
| 414 | * escaped (times 2) |
| 415 | */ |
| 416 | $this->registerPattern('/(['.$chars.'])(.*?(?<!\\\\)(\\\\\\\\)*+)\\1/s', $callback); |
| 417 | } |
| 418 | |
| 419 | /** |
| 420 | * This method will restore all extracted data (strings, regexes) that were |
| 421 | * replaced with placeholder text in extract*(). The original content was |
| 422 | * saved in $this->extracted. |
| 423 | * |
| 424 | * @param string $content |
| 425 | * |
| 426 | * @return string |
| 427 | */ |
| 428 | protected function restoreExtractedData($content) |
| 429 | { |
| 430 | if (!$this->extracted) { |
| 431 | // nothing was extracted, nothing to restore |
| 432 | return $content; |
| 433 | } |
| 434 | |
| 435 | $content = strtr($content, $this->extracted); |
| 436 | |
| 437 | $this->extracted = array(); |
| 438 | |
| 439 | return $content; |
| 440 | } |
| 441 | |
| 442 | /** |
| 443 | * Check if the path is a regular file and can be read. |
| 444 | * |
| 445 | * @param string $path |
| 446 | * |
| 447 | * @return bool |
| 448 | */ |
| 449 | protected function canImportFile($path) |
| 450 | { |
| 451 | $parsed = parse_url($path); |
| 452 | if ( |
| 453 | // file is elsewhere |
| 454 | isset($parsed['host']) || |
| 455 | // file responds to queries (may change, or need to bypass cache) |
| 456 | isset($parsed['query']) |
| 457 | ) { |
| 458 | return false; |
| 459 | } |
| 460 | |
| 461 | return strlen($path) < PHP_MAXPATHLEN && @is_file($path) && is_readable($path); |
| 462 | } |
| 463 | |
| 464 | /** |
| 465 | * Attempts to open file specified by $path for writing. |
| 466 | * |
| 467 | * @param string $path The path to the file |
| 468 | * |
| 469 | * @return resource Specifier for the target file |
| 470 | * |
| 471 | * @throws IOException |
| 472 | */ |
| 473 | protected function openFileForWriting($path) |
| 474 | { |
Matthias Andreas Benkard | 7b2a3a1 | 2021-08-16 10:57:25 +0200 | [diff] [blame^] | 475 | if ($path === '' || ($handler = @fopen($path, 'w')) === false) { |
Matthias Andreas Benkard | b382b10 | 2021-01-02 15:32:21 +0100 | [diff] [blame] | 476 | throw new IOException('The file "'.$path.'" could not be opened for writing. Check if PHP has enough permissions.'); |
| 477 | } |
| 478 | |
| 479 | return $handler; |
| 480 | } |
| 481 | |
| 482 | /** |
| 483 | * Attempts to write $content to the file specified by $handler. $path is used for printing exceptions. |
| 484 | * |
| 485 | * @param resource $handler The resource to write to |
| 486 | * @param string $content The content to write |
| 487 | * @param string $path The path to the file (for exception printing only) |
| 488 | * |
| 489 | * @throws IOException |
| 490 | */ |
| 491 | protected function writeToFile($handler, $content, $path = '') |
| 492 | { |
Matthias Andreas Benkard | 7b2a3a1 | 2021-08-16 10:57:25 +0200 | [diff] [blame^] | 493 | if ( |
| 494 | !is_resource($handler) || |
| 495 | ($result = @fwrite($handler, $content)) === false || |
| 496 | ($result < strlen($content)) |
| 497 | ) { |
Matthias Andreas Benkard | b382b10 | 2021-01-02 15:32:21 +0100 | [diff] [blame] | 498 | throw new IOException('The file "'.$path.'" could not be written to. Check your disk space and file permissions.'); |
| 499 | } |
| 500 | } |
| 501 | } |