blob: a0fa57a27c316d8e2d8874d78fd3922b18bce539 [file] [log] [blame]
Matthias Andreas Benkardb382b102021-01-02 15:32:21 +01001<?php namespace Sieve;
2
3include_once('SieveToken.php');
4
5class SieveScanner
6{
7 public function __construct(&$script)
8 {
9 if ($script === null)
10 return;
11
12 $this->tokenize($script);
13 }
14
15 public function setPassthroughFunc($callback)
16 {
17 if ($callback == null || is_callable($callback))
18 $this->ptFn_ = $callback;
19 }
20
21 public function tokenize(&$script)
22 {
23 $pos = 0;
24 $line = 1;
25
26 $scriptLength = mb_strlen($script);
27
28 $unprocessedScript = $script;
29
30
31 //create one regex to find the right match
32 //avoids looping over all possible tokens: increases performance
33 $nameToType = [];
34 $regex = [];
35 // chr(65) == 'A'
36 $i = 65;
37
38 foreach ($this->tokenMatch_ as $type => $subregex) {
39 $nameToType[chr($i)] = $type;
40 $regex[] = "(?P<". chr($i) . ">^$subregex)";
41 $i++;
42 }
43
44 $regex = '/' . join('|', $regex) . '/';
45
46 while ($pos < $scriptLength)
47 {
48 if (preg_match($regex, $unprocessedScript, $match)) {
49
50 // only keep the group that match and we only want matches with group names
51 // we can use the group name to find the token type using nameToType
52 $filterMatch = array_filter(array_filter($match), 'is_string', ARRAY_FILTER_USE_KEY);
53
54 // the first element in filterMatch will contain the matched group and the key will be the name
55 $type = $nameToType[key($filterMatch)];
56 $currentMatch = current($filterMatch);
57
58 //create the token
59 $token = new SieveToken($type, $currentMatch, $line);
60 $this->tokens_[] = $token;
61
62 if ($type == SieveToken::Unknown)
63 return;
64
65 // just remove the part that we parsed: don't extract the new substring using script length
66 // as mb_strlen is \theta(pos) (it's linear in the position)
67 $matchLength = mb_strlen($currentMatch);
68 $unprocessedScript = mb_substr($unprocessedScript, $matchLength);
69
70 $pos += $matchLength;
71 $line += mb_substr_count($currentMatch, "\n");
72 } else {
73 $this->tokens_[] = new SieveToken(SieveToken::Unknown, '', $line);
74 return;
75 }
76
77 }
78
79 $this->tokens_[] = new SieveToken(SieveToken::ScriptEnd, '', $line);
80 }
81
82 public function nextTokenIs($type)
83 {
84 return $this->peekNextToken()->is($type);
85 }
86
87 public function peekNextToken()
88 {
89 $offset = 0;
90 do {
91 $next = $this->tokens_[$this->tokenPos_ + $offset++];
92 } while ($next->is(SieveToken::Comment|SieveToken::Whitespace));
93
94 return $next;
95 }
96
97 public function nextToken()
98 {
99 $token = $this->tokens_[$this->tokenPos_++];
100
101 while ($token->is(SieveToken::Comment|SieveToken::Whitespace))
102 {
103 if ($this->ptFn_ != null)
104 call_user_func($this->ptFn_, $token);
105
106 $token = $this->tokens_[$this->tokenPos_++];
107 }
108
109 return $token;
110 }
111
112 protected $ptFn_ = null;
113 protected $tokenPos_ = 0;
114 protected $tokens_ = array();
115 protected $tokenMatch_ = array (
116 SieveToken::LeftBracket => '\[',
117 SieveToken::RightBracket => '\]',
118 SieveToken::BlockStart => '\{',
119 SieveToken::BlockEnd => '\}',
120 SieveToken::LeftParenthesis => '\(',
121 SieveToken::RightParenthesis => '\)',
122 SieveToken::Comma => ',',
123 SieveToken::Semicolon => ';',
124 SieveToken::Whitespace => '[ \r\n\t]+',
125 SieveToken::Tag => ':[[:alpha:]_][[:alnum:]_]*(?=\b)',
126 /*
127 " # match a quotation mark
128 ( # start matching parts that include an escaped quotation mark
129 ([^"]*[^"\\\\]) # match a string without quotation marks and not ending with a backlash
130 ? # this also includes the empty string
131 (\\\\\\\\)* # match any groups of even number of backslashes
132 # (thus the character after these groups are not escaped)
133 \\\\" # match an escaped quotation mark
134 )* # accept any number of strings that end with an escaped quotation mark
135 [^"]* # accept any trailing part that does not contain any quotation marks
136 " # end of the quoted string
137 */
138 SieveToken::QuotedString => '"(([^"]*[^"\\\\])?(\\\\\\\\)*\\\\")*[^"]*"',
139 SieveToken::Number => '[[:digit:]]+(?:[KMG])?(?=\b)',
140 SieveToken::Comment => '(?:\/\*(?:[^\*]|\*(?=[^\/]))*\*\/|#[^\r\n]*\r?(\n|$))',
141 SieveToken::MultilineString => 'text:[ \t]*(?:#[^\r\n]*)?\r?\n(\.[^\r\n]+\r?\n|[^\.][^\r\n]*\r?\n)*\.\r?(\n|$)',
142 SieveToken::Identifier => '[[:alpha:]_][[:alnum:]_]*(?=\b)',
143 SieveToken::Unknown => '[^ \r\n\t]+'
144 );
145}