mailcow/src/mailcow-dockerized/data/web/inc/lib/sieve/SieveScanner.php - kubeia - Gitiles

 <?php namespace Sieve;

 include_once('SieveToken.php');

 class SieveScanner
 {
     public function __construct(&$script)
     {
         if ($script === null)
             return;

         $this->tokenize($script);
     }

     public function setPassthroughFunc($callback)
     {
         if ($callback == null || is_callable($callback))
             $this->ptFn_ = $callback;
     }

     public function tokenize(&$script)
     {
         $pos = 0;
         $line = 1;

         $scriptLength = mb_strlen($script);

         $unprocessedScript = $script;


         //create one regex to find the right match
         //avoids looping over all possible tokens: increases performance
         $nameToType = [];
         $regex = [];
         // chr(65) == 'A'
         $i = 65;

         foreach ($this->tokenMatch_ as $type => $subregex) {
             $nameToType[chr($i)] = $type;
             $regex[] = "(?P<". chr($i) . ">^$subregex)";
             $i++;
         }

         $regex = '/' . join('|', $regex) . '/';

         while ($pos < $scriptLength)
         {
             if (preg_match($regex, $unprocessedScript, $match)) {

                 // only keep the group that match and we only want matches with group names
                 // we can use the group name to find the token type using nameToType
                 $filterMatch = array_filter(array_filter($match), 'is_string', ARRAY_FILTER_USE_KEY);

                 // the first element in filterMatch will contain the matched group and the key will be the name
                 $type = $nameToType[key($filterMatch)];
                 $currentMatch = current($filterMatch);

                 //create the token
                 $token = new SieveToken($type, $currentMatch, $line);
                 $this->tokens_[] = $token;

                 if ($type == SieveToken::Unknown)
                     return;

                 // just remove the part that we parsed: don't extract the new substring using script length
                 // as mb_strlen is \theta(pos)  (it's linear in the position)
                 $matchLength = mb_strlen($currentMatch);
                 $unprocessedScript = mb_substr($unprocessedScript, $matchLength);

                 $pos += $matchLength;
                 $line += mb_substr_count($currentMatch, "\n");
             } else {
                 $this->tokens_[] = new SieveToken(SieveToken::Unknown, '', $line);
                 return;
             }

         }

         $this->tokens_[] = new SieveToken(SieveToken::ScriptEnd, '', $line);
     }

     public function nextTokenIs($type)
     {
         return $this->peekNextToken()->is($type);
     }

     public function peekNextToken()
     {
         $offset = 0;
         do {
             $next = $this->tokens_[$this->tokenPos_ + $offset++];
         } while ($next->is(SieveToken::Comment|SieveToken::Whitespace));

         return $next;
     }

     public function nextToken()
     {
         $token = $this->tokens_[$this->tokenPos_++];

         while ($token->is(SieveToken::Comment|SieveToken::Whitespace))
         {
             if ($this->ptFn_ != null)
                 call_user_func($this->ptFn_, $token);

             $token = $this->tokens_[$this->tokenPos_++];
         }

         return $token;
     }

     protected $ptFn_ = null;
     protected $tokenPos_ = 0;
     protected $tokens_ = array();
     protected $tokenMatch_ = array (
         SieveToken::LeftBracket       =>  '\[',
         SieveToken::RightBracket      =>  '\]',
         SieveToken::BlockStart        =>  '\{',
         SieveToken::BlockEnd          =>  '\}',
         SieveToken::LeftParenthesis   =>  '\(',
         SieveToken::RightParenthesis  =>  '\)',
         SieveToken::Comma             =>  ',',
         SieveToken::Semicolon         =>  ';',
         SieveToken::Whitespace        =>  '[ \r\n\t]+',
         SieveToken::Tag               =>  ':[[:alpha:]_][[:alnum:]_]*(?=\b)',
         /*
         "                           # match a quotation mark
         (                           # start matching parts that include an escaped quotation mark
         ([^"]*[^"\\\\])             # match a string without quotation marks and not ending with a backlash
         ?                           # this also includes the empty string
         (\\\\\\\\)*                 # match any groups of even number of backslashes
                                     # (thus the character after these groups are not escaped)
         \\\\"                       # match an escaped quotation mark
         )*                          # accept any number of strings that end with an escaped quotation mark
         [^"]*                       # accept any trailing part that does not contain any quotation marks
         "                           # end of the quoted string
         */
         SieveToken::QuotedString      =>  '"(([^"]*[^"\\\\])?(\\\\\\\\)*\\\\")*[^"]*"',
         SieveToken::Number            =>  '[[:digit:]]+(?:[KMG])?(?=\b)',
         SieveToken::Comment           =>  '(?:\/\*(?:[^\*]|\*(?=[^\/]))*\*\/|#[^\r\n]*\r?(\n|$))',
         SieveToken::MultilineString   =>  'text:[ \t]*(?:#[^\r\n]*)?\r?\n(\.[^\r\n]+\r?\n|[^\.][^\r\n]*\r?\n)*\.\r?(\n|$)',
         SieveToken::Identifier        =>  '[[:alpha:]_][[:alnum:]_]*(?=\b)',
         SieveToken::Unknown           =>  '[^ \r\n\t]+'
     );
 }
	<?php namespace Sieve;

	include_once('SieveToken.php');

	class SieveScanner
	{
	public function __construct(&$script)
	{
	if ($script === null)
	return;

	$this->tokenize($script);
	}

	public function setPassthroughFunc($callback)
	{
	if ($callback == null \|\| is_callable($callback))
	$this->ptFn_ = $callback;
	}

	public function tokenize(&$script)
	{
	$pos = 0;
	$line = 1;

	$scriptLength = mb_strlen($script);

	$unprocessedScript = $script;


	//create one regex to find the right match
	//avoids looping over all possible tokens: increases performance
	$nameToType = [];
	$regex = [];
	// chr(65) == 'A'
	$i = 65;

	foreach ($this->tokenMatch_ as $type => $subregex) {
	$nameToType[chr($i)] = $type;
	$regex[] = "(?P<". chr($i) . ">^$subregex)";
	$i++;
	}

	$regex = '/' . join('\|', $regex) . '/';

	while ($pos < $scriptLength)
	{
	if (preg_match($regex, $unprocessedScript, $match)) {

	// only keep the group that match and we only want matches with group names
	// we can use the group name to find the token type using nameToType
	$filterMatch = array_filter(array_filter($match), 'is_string', ARRAY_FILTER_USE_KEY);

	// the first element in filterMatch will contain the matched group and the key will be the name
	$type = $nameToType[key($filterMatch)];
	$currentMatch = current($filterMatch);

	//create the token
	$token = new SieveToken($type, $currentMatch, $line);
	$this->tokens_[] = $token;

	if ($type == SieveToken::Unknown)
	return;

	// just remove the part that we parsed: don't extract the new substring using script length
	// as mb_strlen is \theta(pos) (it's linear in the position)
	$matchLength = mb_strlen($currentMatch);
	$unprocessedScript = mb_substr($unprocessedScript, $matchLength);

	$pos += $matchLength;
	$line += mb_substr_count($currentMatch, "\n");
	} else {
	$this->tokens_[] = new SieveToken(SieveToken::Unknown, '', $line);
	return;
	}

	}

	$this->tokens_[] = new SieveToken(SieveToken::ScriptEnd, '', $line);
	}

	public function nextTokenIs($type)
	{
	return $this->peekNextToken()->is($type);
	}

	public function peekNextToken()
	{
	$offset = 0;
	do {
	$next = $this->tokens_[$this->tokenPos_ + $offset++];
	} while ($next->is(SieveToken::Comment\|SieveToken::Whitespace));

	return $next;
	}

	public function nextToken()
	{
	$token = $this->tokens_[$this->tokenPos_++];

	while ($token->is(SieveToken::Comment\|SieveToken::Whitespace))
	{
	if ($this->ptFn_ != null)
	call_user_func($this->ptFn_, $token);

	$token = $this->tokens_[$this->tokenPos_++];
	}

	return $token;
	}

	protected $ptFn_ = null;
	protected $tokenPos_ = 0;
	protected $tokens_ = array();
	protected $tokenMatch_ = array (
	SieveToken::LeftBracket => '\[',
	SieveToken::RightBracket => '\]',
	SieveToken::BlockStart => '\{',
	SieveToken::BlockEnd => '\}',
	SieveToken::LeftParenthesis => '\(',
	SieveToken::RightParenthesis => '\)',
	SieveToken::Comma => ',',
	SieveToken::Semicolon => ';',
	SieveToken::Whitespace => '[ \r\n\t]+',
	SieveToken::Tag => ':[[:alpha:]_][[:alnum:]_]*(?=\b)',
	/*
	" # match a quotation mark
	( # start matching parts that include an escaped quotation mark
	([^"]*[^"\\\\]) # match a string without quotation marks and not ending with a backlash
	? # this also includes the empty string
	(\\\\\\\\)* # match any groups of even number of backslashes
	# (thus the character after these groups are not escaped)
	\\\\" # match an escaped quotation mark
	)* # accept any number of strings that end with an escaped quotation mark
	[^"]* # accept any trailing part that does not contain any quotation marks
	" # end of the quoted string
	*/
	SieveToken::QuotedString => '"(([^"][^"\\\\])?(\\\\\\\\)\\\\")[^"]"',
	SieveToken::Number => '[[:digit:]]+(?:[KMG])?(?=\b)',
	SieveToken::Comment => '(?:\/\(?:[^\]\|\(?=[^\/]))\\/\|#[^\r\n]\r?(\n\|$))',
	SieveToken::MultilineString => 'text:[ \t](?:#[^\r\n])?\r?\n(\.[^\r\n]+\r?\n\|[^\.][^\r\n]\r?\n)\.\r?(\n\|$)',
	SieveToken::Identifier => '[[:alpha:]_][[:alnum:]_]*(?=\b)',
	SieveToken::Unknown => '[^ \r\n\t]+'
	);
	}