template re-work: expression tokenization and parsing
[bse.git] / site / cgi-bin / modules / Squirrel / Template / Expr.pm
CommitLineData
4697e5c3
TC
1package Squirrel::Template::Expr;
2use strict;
3
4our $VERSION = "1.000";
5
6package Squirrel::Template::Expr::Parser;
7
8sub new {
9 my ($class) = @_;
10
11 return bless {}, $class;
12}
13
14sub parse {
15 my ($self, $text) = @_;
16
17 my $tokenizer = Squirrel::Template::Expr::Tokenizer->new($text);
18 return $self->_parse_expr($tokenizer);
19}
20
21sub _parse_expr {
22 my ($self, $tok) = @_;
23
24 return $self->_parse_or($tok);
25}
26
27my %ops =
28 (
29 "op+" => "+",
30 "op-" => "-",
31 "op*" => "*",
32 "op/" => "/",
33 "div" => "div",
34 "mod" => "mod",
35 "op_" => "concat",
36 );
37
38sub _parse_or {
39 my ($self, $tok) = @_;
40
41 my $result = $self->_parse_and($tok);
42 while ($tok->peektype eq 'or') {
43 my $op = $tok->get;
44 my $other = $self->_parse_and($tok);
45 $result = [ or => $result, $other ];
46 }
47
48 return $result;
49}
50
51sub _parse_and {
52 my ($self, $tok) = @_;
53
54 my $result = $self->_parse_rel($tok);
55 while ($tok->peektype eq 'and') {
56 my $op = $tok->get;
57 my $other = $self->_parse_rel($tok);
58 $result = [ and => $result, $other ];
59 }
60
61 return $result;
62}
63
64my %relops = map {; "op$_" => 1 } qw(eq ne gt lt ge le == != < > >= <= =~ !~);
65
66sub _parse_rel {
67 my ($self, $tok) = @_;
68
69 my $result = $self->_parse_additive($tok);
70 my $nexttype = $tok->peektype;
71 while ($relops{$nexttype}) {
72 my $op = $tok->get;
73 my $other = $self->_parse_additive($tok);
74 $result = [ substr($nexttype, 2), $result, $other ];
75 $nexttype = $tok->peektype;
76 }
77 return $result;
78}
79
80sub _parse_additive {
81 my ($self, $tok) = @_;
82
83 my $result = $self->_parse_mult($tok);
84 my $nexttype = $tok->peektype;
85 while ($nexttype eq 'op+' || $nexttype eq 'op-' || $nexttype eq 'op_') {
86 my $op = $tok->get;
87 my $other = $self->_parse_mult($tok);
88 $result = [ $ops{$nexttype}, $result, $other ];
89 $nexttype = $tok->peektype;
90 }
91 return $result;
92}
93
94sub _parse_mult {
95 my ($self, $tok) = @_;
96
97 my $result = $self->_parse_prefix($tok);
98 my $nexttype = $tok->peektype;
99 while ($nexttype eq 'op*' || $nexttype eq 'op/'
100 || $nexttype eq 'div' || $nexttype eq 'mod') {
101 my $op = $tok->get;
102 my $other = $self->_parse_prefix($tok);
103 $result = [ $ops{$op->[0]}, $result, $other ];
104 $nexttype = $tok->peektype;
105 }
106 return $result;
107}
108
109sub _parse_prefix {
110 my ($self, $tok) = @_;
111
112 my $nexttype = $tok->peektype('TERM');
113 if ($nexttype eq 'op(') {
114 $tok->get;
115 my $r = $self->_parse_expr($tok);
116 my $close = $tok->get;
117 unless ($close->[0] eq 'op)') {
118 die [ error => "Expected ')' but found $close->[0]" ];
119 }
120 return $r;
121 }
122 elsif ($nexttype eq 'op-') {
123 $tok->get;
124 return [ uminus => $self->_parse_prefix($tok) ];
125 }
126 elsif ($nexttype eq 'op+') {
127 $tok->get;
128 return $self->_parse_prefix($tok);
129 }
130 elsif ($nexttype eq 'op!' || $nexttype eq 'opnot') {
131 $tok->get;
132 return [ not => $self->_parse_prefix($tok) ];
133 }
134 else {
135 return $self->_parse_call($tok);
136 }
137}
138
139sub _parse_list {
140 my ($self, $tok) = @_;
141
142 $tok->peektype eq 'op)'
143 and return [];
144
145 my @list;
146 push @list, $self->_parse_expr($tok);
147 my $peek = $tok->peektype;
148 while ($peek eq 'op,' || $peek eq '..') {
149 $tok->get;
150 if ($peek eq '..') {
151 my $start = pop @list;
152 $start->[0] ne 'range'
153 or die [ error => "Can't use a range as the start of a range" ];
154 my $end = $self->_parse_expr($tok);
155 push @list, [ range => $start, $end ];
156 }
157 else {
158 push @list, $self->_parse_expr($tok);
159 }
160 $peek = $tok->peektype;
161 }
162
163 return \@list;
164}
165
166sub _parse_paren_list {
167 my ($self, $tok, $what) = @_;
168
169 my $open = $tok->get;
170 $open->[0] eq 'op('
171 or die [ error => "Expected '(' for $what but found $open->[0]" ];
172 my $list = $self->_parse_list($tok);
173 my $close = $tok->get;
174 $close->[0] eq 'op)'
175 or die [ error => "Expected ')' for $what but found $close->[0]" ];
176
177 return $list;
178}
179
180sub _parse_call {
181 my ($self, $tok) = @_;
182
183 my $result = $self->_parse_postfix($tok);
184 my $next = $tok->peektype;
185 while ($next eq 'op.' || $next eq 'op[') {
186 if ($next eq 'op.') {
187 $tok->get;
188 my $name = $tok->get;
189 $name->[0] eq 'id'
190 or die [ error => "Expected method name after '.'" ];
191 my $list = [];
192 if ($tok->peektype eq 'op(') {
193 $list = $self->_parse_paren_list($tok, "method");
194 }
195 $result = [ call => $name->[2], $result, $list ];
196 }
197 elsif ($next eq 'op[') {
198 $tok->get;
199 my $index = $self->_parse_expr($tok);
200 my $close = $tok->get;
201 $close->[0] eq 'op]'
202 or die [ error => "Expected list end ']' but got $close->[0]" ];
203 $result = [ subscript => $result, $index ];
204 }
205 $next = $tok->peektype;
206 }
207
208 return $result;
209}
210
211sub _parse_postfix {
212 my ($self, $tok) = @_;
213
214 return $self->_parse_primary($tok);
215}
216
217sub _parse_primary {
218 my ($self, $tok) = @_;
219
220 my $t = $tok->get('TERM');
221 if ($t->[0] eq 'str' || $t->[0] eq 'num') {
222 return [ const => $t->[2] ];
223 }
224 elsif ($t->[0] eq 'id') {
225 return [ var => $t->[2] ];
226 }
227 elsif ($t->[0] eq 'op[') {
228 my $list = $self->_parse_list($tok);
229 my $close = $tok->get;
230 $close->[0] eq 'op]'
231 or die [ error => "Expected ] but got $close->[0]" ];
232 return [ list => $list ];
233 }
234 elsif ($t->[0] eq 're') {
235 return [ re => $t->[2], $t->[3] ];
236 }
237 else {
238 die [ error => "Expected term but got $t->[0]" ];
239 }
240}
241
242package Squirrel::Template::Expr::Tokenizer;
243
244use constant TEXT => 0;
245use constant QUEUE => 1;
246
247sub new {
248 my ($class, $text) = @_;
249
250 return bless [ $text, [] ], $class;
251}
252
253my %escapes =
254 (
255 n => "\n",
256 "\\" => "\\",
257 t => "\t",
258 '"' => '"',
259 );
260
261sub get {
262 my ($self, $want) = @_;
263
264 my $queue = $self->[QUEUE];
265 @$queue
266 and return shift @$queue;
267 length $self->[TEXT]
268 or return;
269
270 $want ||= '';
271
272 if ($want ne 'TERM' &&
273 $self->[TEXT] =~ s/\A(\s*(div|mod|\.\.|and|or)\s*)//) {
274 push @$queue, [ $2 => $1 ];
275 }
276 elsif ($self->[TEXT] =~ s/\A(\s*(0x[0-9A-Fa-f]+)\s*)//) {
277 push @$queue, [ num => $1, oct $2 ];
278 }
279 elsif ($self->[TEXT] =~ s/\A(\s*(0b[01]+)\s*)//) {
280 push @$queue, [ num => $1, oct $2 ];
281 }
282 elsif ($self->[TEXT] =~ s/\A(\s*0o([0-7]+)\s*)//) {
283 push @$queue, [ num => $1, oct $2 ];
284 }
285 elsif ($self->[TEXT] =~ s/\A(\s*((?:\.[0-9]+|[0-9]+(?:\.[0-9]*)?)(?:[Ee][+-]?[0-9]+)?)\s*)//) {
286 push @$queue, [ num => $1, $2 ];
287 }
288 elsif ($want eq 'TERM' &&
289 $self->[TEXT] =~ s!\A(\s*/((?:[^/\\]|\\.)+)/([ismx]*\s)?\s*)!!) {
290 push @$queue, [ re => $1, $2, $3 || "" ];
291 }
292 elsif ($self->[TEXT] =~ s/\A(\s*(not|eq|ne|le|lt|ge|gt|<=|>=|[!=]\=|\=\~|[_\?:,\[\]\(\)<>=!.*\/+-])\s*)//) {
293 push @$queue, [ "op$2" => $1 ];
294 }
295 elsif ($self->[TEXT] =~ s/\A(\s*([A-Za-z_][a-zA-Z_0-9]*)\s*)//) {
296 push @$queue, [ id => $1, $2 ];
297 }
298 elsif ($self->[TEXT] =~ s/\A(\s*\"((?:[^"\\]|\\["\\nt]|\\x[0-9a-fA-F]{2}|\\x\{[0-9a-fA-F]+\}|\\N\{[A-Za-z0-9 ]+\})*)\"\s*)//) {
299 my $orig = $1;
300 my $str = _process_escapes($2);
301 push @$queue, [ str => $1, $str ];
302 }
303 elsif ($self->[TEXT] =~ s/\A(\s*\'([^\']*)\'\s*)//) {
304 push @$queue, [ str => $1, $2 ];
305 }
306 else {
307 die [ error => "Unknown token '$self->[TEXT]'" ];
308 }
309
310 unless (length $self->[TEXT]) {
311 push @$queue, [ eof => "" ];
312 }
313
314 return shift @$queue;
315}
316
317sub unget {
318 my ($self, $tok) = @_;
319
320 unshift @{$self->[QUEUE]}, $tok;
321}
322
323sub peek {
324 my ($self, $what) = @_;
325
326 unless (@{$self->[QUEUE]}) {
327 my $t = $self->get($what)
328 or return;
329 unshift @{$self->[QUEUE]}, $t;
330 }
331
332 return $self->[QUEUE][0];
333}
334
335sub peektype {
336 my ($self, $what) = @_;
337
338 return $self->peek($what)->[0];
339}
340
341sub _process_escapes {
342 my ($str) = @_;
343
344 $str =~
345 s(
346 \\([nt\\\"])
347 |
348 \\x\{([0-9A-Fa-f]+)\}
349 |
350 \\x([0-9A-Fa-f]{2})
351 |
352 \\N\{([A-Za-z0-9\ ]+)\}
353 )(
354 $1 ? $escapes{$1} :
355 $2 ? chr(hex($2)) :
356 $3 ? chr(hex($3)) :
357 _vianame($4)
358 )gex;
359
360 return $str;
361}
362
363my $charnames_loaded;
364sub _vianame {
365 my ($name, $errors) = @_;
366
367 require charnames;
368 my $code = charnames::vianame($name);
369 unless (defined $code) {
370 die [ error => "Unknown \\N name '$name'" ];
371 }
372 return chr($code);
373}
374
3751;