Blame - mailcow/src/mailcow-dockerized/data/web/inc/lib/sieve/SieveScanner.php - kubeia

blob: a0fa57a27c316d8e2d8874d78fd3922b18bce539 [file] [log] [blame]

Matthias Andreas Benkard	b382b10	2021-01-02 15:32:21 +0100	[diff] [blame^]	1	<?php namespace Sieve;
				2
				3	include_once('SieveToken.php');
				4
				5	class SieveScanner
				6	{
				7	public function __construct(&$script)
				8	{
				9	if ($script === null)
				10	return;
				11
				12	$this->tokenize($script);
				13	}
				14
				15	public function setPassthroughFunc($callback)
				16	{
				17	if ($callback == null \|\| is_callable($callback))
				18	$this->ptFn_ = $callback;
				19	}
				20
				21	public function tokenize(&$script)
				22	{
				23	$pos = 0;
				24	$line = 1;
				25
				26	$scriptLength = mb_strlen($script);
				27
				28	$unprocessedScript = $script;
				29
				30
				31	//create one regex to find the right match
				32	//avoids looping over all possible tokens: increases performance
				33	$nameToType = [];
				34	$regex = [];
				35	// chr(65) == 'A'
				36	$i = 65;
				37
				38	foreach ($this->tokenMatch_ as $type => $subregex) {
				39	$nameToType[chr($i)] = $type;
				40	$regex[] = "(?P<". chr($i) . ">^$subregex)";
				41	$i++;
				42	}
				43
				44	$regex = '/' . join('\|', $regex) . '/';
				45
				46	while ($pos < $scriptLength)
				47	{
				48	if (preg_match($regex, $unprocessedScript, $match)) {
				49
				50	// only keep the group that match and we only want matches with group names
				51	// we can use the group name to find the token type using nameToType
				52	$filterMatch = array_filter(array_filter($match), 'is_string', ARRAY_FILTER_USE_KEY);
				53
				54	// the first element in filterMatch will contain the matched group and the key will be the name
				55	$type = $nameToType[key($filterMatch)];
				56	$currentMatch = current($filterMatch);
				57
				58	//create the token
				59	$token = new SieveToken($type, $currentMatch, $line);
				60	$this->tokens_[] = $token;
				61
				62	if ($type == SieveToken::Unknown)
				63	return;
				64
				65	// just remove the part that we parsed: don't extract the new substring using script length
				66	// as mb_strlen is \theta(pos) (it's linear in the position)
				67	$matchLength = mb_strlen($currentMatch);
				68	$unprocessedScript = mb_substr($unprocessedScript, $matchLength);
				69
				70	$pos += $matchLength;
				71	$line += mb_substr_count($currentMatch, "\n");
				72	} else {
				73	$this->tokens_[] = new SieveToken(SieveToken::Unknown, '', $line);
				74	return;
				75	}
				76
				77	}
				78
				79	$this->tokens_[] = new SieveToken(SieveToken::ScriptEnd, '', $line);
				80	}
				81
				82	public function nextTokenIs($type)
				83	{
				84	return $this->peekNextToken()->is($type);
				85	}
				86
				87	public function peekNextToken()
				88	{
				89	$offset = 0;
				90	do {
				91	$next = $this->tokens_[$this->tokenPos_ + $offset++];
				92	} while ($next->is(SieveToken::Comment\|SieveToken::Whitespace));
				93
				94	return $next;
				95	}
				96
				97	public function nextToken()
				98	{
				99	$token = $this->tokens_[$this->tokenPos_++];
				100
				101	while ($token->is(SieveToken::Comment\|SieveToken::Whitespace))
				102	{
				103	if ($this->ptFn_ != null)
				104	call_user_func($this->ptFn_, $token);
				105
				106	$token = $this->tokens_[$this->tokenPos_++];
				107	}
				108
				109	return $token;
				110	}
				111
				112	protected $ptFn_ = null;
				113	protected $tokenPos_ = 0;
				114	protected $tokens_ = array();
				115	protected $tokenMatch_ = array (
				116	SieveToken::LeftBracket => '\[',
				117	SieveToken::RightBracket => '\]',
				118	SieveToken::BlockStart => '\{',
				119	SieveToken::BlockEnd => '\}',
				120	SieveToken::LeftParenthesis => '\(',
				121	SieveToken::RightParenthesis => '\)',
				122	SieveToken::Comma => ',',
				123	SieveToken::Semicolon => ';',
				124	SieveToken::Whitespace => '[ \r\n\t]+',
				125	SieveToken::Tag => ':[[:alpha:]_][[:alnum:]_]*(?=\b)',
				126	/*
				127	" # match a quotation mark
				128	( # start matching parts that include an escaped quotation mark
				129	([^"]*[^"\\\\]) # match a string without quotation marks and not ending with a backlash
				130	? # this also includes the empty string
				131	(\\\\\\\\)* # match any groups of even number of backslashes
				132	# (thus the character after these groups are not escaped)
				133	\\\\" # match an escaped quotation mark
				134	)* # accept any number of strings that end with an escaped quotation mark
				135	[^"]* # accept any trailing part that does not contain any quotation marks
				136	" # end of the quoted string
				137	*/
				138	SieveToken::QuotedString => '"(([^"][^"\\\\])?(\\\\\\\\)\\\\")[^"]"',
				139	SieveToken::Number => '[[:digit:]]+(?:[KMG])?(?=\b)',
				140	SieveToken::Comment => '(?:\/\(?:[^\]\|\(?=[^\/]))\\/\|#[^\r\n]\r?(\n\|$))',
				141	SieveToken::MultilineString => 'text:[ \t](?:#[^\r\n])?\r?\n(\.[^\r\n]+\r?\n\|[^\.][^\r\n]\r?\n)\.\r?(\n\|$)',
				142	SieveToken::Identifier => '[[:alpha:]_][[:alnum:]_]*(?=\b)',
				143	SieveToken::Unknown => '[^ \r\n\t]+'
				144	);
				145	}