/usr/local/CPAN/WAP-wmls/WAP/wmls/lexer.pm
#
# WMLScript Language Specification Version 1.1
#
# Lexer module
#
package WAP::wmls::lexer;
use strict;
use warnings;
use bigint;
use bignum;
sub _DoubleStringLexer {
my ($parser) = @_;
my $str = q{};
my $type = 'STRING_LITERAL';
while ($parser->YYData->{INPUT}) {
for ($parser->YYData->{INPUT}) {
s/^\"//
and return ($type, $str);
s/^([^"\\]+)//
and $str .= $1,
last;
s/^\\(['"\\\/])//
and $str .= $1, # single quote, double quote, backslash, slash
last;
s/^\\b//
and $str .= "\b", # backspace
last;
s/^\\f//
and $str .= "\f", # form feed
last;
s/^\\n//
and $str .= "\n", # new line
last;
s/^\\r//
and $str .= "\r", # carriage return
last;
s/^\\t//
and $str .= "\t", # horizontal tab
last;
if ($type eq 'UTF8_STRING_LITERAL') {
s/^\\([0-7]{1,2})//
and $str .= chr oct $1,
last;
s/^\\([0-3][0-7]{2})//
and $str .= chr oct $1,
last;
s/^\\x([0-9A-Fa-f]{2})//
and $str .= chr hex $1,
last;
}
else {
if ($parser->YYData->{encoding} eq 'iso-8859-1') {
s/^\\([0-7]{1,2})//
and $str .= chr oct $1,
last;
s/^\\([0-3][0-7]{2})//
and $str .= chr oct $1,
last;
s/^\\x([0-9A-Fa-f]{2})//
and $str .= chr hex $1,
last;
}
else {
s/^\\([0-7]{1,2})//
and $type = 'UTF8_STRING_LITERAL',
and $str .= chr oct $1,
last;
s/^\\([0-3][0-7]{2})//
and $type = 'UTF8_STRING_LITERAL',
and $str .= chr oct $1,
last;
s/^\\x([0-9A-Fa-f]{2})//
and $type = 'UTF8_STRING_LITERAL',
and $str .= chr hex $1,
last;
}
}
if ($type eq 'UTF8_STRING_LITERAL') {
s/^\\u([0-9A-Fa-f]{4})//
and $str .= pack('U', hex $1),
last;
}
else {
s/^\\u([0-9A-Fa-f]{4})//
and $type = 'UTF8_STRING_LITERAL',
and $str .= pack('U', hex $1),
last;
}
s/^\\//
and $parser->Error("Invalid escape sequence $_ .\n"),
last;
}
}
$parser->Error("Untermined string.\n");
$parser->YYData->{lineno} ++;
return ($type, $str);
}
sub _SingleStringLexer {
my ($parser) = @_;
my $str = q{};
my $type = 'STRING_LITERAL';
while ($parser->YYData->{INPUT}) {
for ($parser->YYData->{INPUT}) {
s/^'//
and return ($type, $str);
s/^([^'\\]+)//
and $str .= $1,
last;
s/^\\(['"\\\/])//
and $str .= $1, # single quote, double quote, backslash, slash
last;
s/^\\b//
and $str .= "\b", # backspace
last;
s/^\\f//
and $str .= "\f", # form feed
last;
s/^\\n//
and $str .= "\n", # new line
last;
s/^\\r//
and $str .= "\r", # carriage return
last;
s/^\\t//
and $str .= "\t", # horizontal tab
last;
if ($type eq 'UTF8_STRING_LITERAL') {
s/^\\([0-7]{1,2})//
and $str .= chr oct $1,
last;
s/^\\([0-3][0-7]{2})//
and $str .= chr oct $1,
last;
s/^\\x([0-9A-Fa-f]{2})//
and $str .= chr hex $1,
last;
}
else {
if ($parser->YYData->{encoding} eq 'iso-8859-1') {
s/^\\([0-7]{1,2})//
and $str .= chr oct $1,
last;
s/^\\([0-3][0-7]{2})//
and $str .= chr oct $1,
last;
s/^\\x([0-9A-Fa-f]{2})//
and $str .= chr hex $1,
last;
}
else {
s/^\\([0-7]{1,2})//
and $type = 'UTF8_STRING_LITERAL',
and $str .= chr oct $1,
last;
s/^\\([0-3][0-7]{2})//
and $type = 'UTF8_STRING_LITERAL',
and $str .= chr oct $1,
last;
s/^\\x([0-9A-Fa-f]{2})//
and $type = 'UTF8_STRING_LITERAL',
and $str .= chr hex $1,
last;
}
}
if ($type eq 'UTF8_STRING_LITERAL') {
s/^\\u([0-9A-Fa-f]{4})//
and $str .= pack('U', hex $1),
last;
}
else {
s/^\\u([0-9A-Fa-f]{4})//
and $type = 'UTF8_STRING_LITERAL',
and $str .= pack('U', hex $1),
last;
}
s/^\\//
and $parser->Error("Invalid escape sequence $_ .\n"),
last;
}
}
$parser->Error("Untermined string.\n");
$parser->YYData->{lineno} ++;
return ($type, $str);
}
sub _Identifier {
my ($parser, $ident) = @_;
if (exists $parser->YYData->{keyword}{$ident}) {
return ($parser->YYData->{keyword}{$ident}, $ident);
}
elsif (exists $parser->YYData->{invalid_keyword}{$ident}) {
$parser->Error("Invalid keyword '$ident'.\n");
}
return ('IDENTIFIER', $ident);
}
sub _OctInteger {
my ($parser, $str) = @_;
my $val = 0;
foreach (split //, $str) {
$val = $val * 8 + oct $_;
}
return ('INTEGER_LITERAL', $val);
}
sub _HexInteger {
my ($parser, $str) = @_;
my $val = 0;
foreach (split //, $str) {
$val = $val * 16 + hex $_;
}
return ('INTEGER_LITERAL', $val);
}
sub _CommentLexer {
my ($parser) = @_;
while (1) {
$parser->YYData->{INPUT}
or $parser->YYData->{INPUT} = readline $parser->YYData->{fh}
or return;
for ($parser->YYData->{INPUT}) {
s/^\n//
and $parser->YYData->{lineno} ++,
last;
s/^\*\///
and return;
s/^.//
and last;
}
}
}
sub _DocLexer {
my ($parser) = @_;
$parser->YYData->{doc} = q{};
my $flag = 1;
while (1) {
$parser->YYData->{INPUT}
or $parser->YYData->{INPUT} = readline $parser->YYData->{fh}
or return;
for ($parser->YYData->{INPUT}) {
s/^(\n)//
and $parser->YYData->{lineno} ++,
$parser->YYData->{doc} .= $1,
$flag = 0,
last;
s/^\*\///
and return;
unless ($flag) {
s/^\*//
and $flag = 1,
last;
}
s/^([ \r\t\f\013]+)//
and $parser->YYData->{doc} .= $1,
last;
s/^(.)//
and $parser->YYData->{doc} .= $1,
$flag = 1,
last;
}
}
}
sub Lexer {
my ($parser) = @_;
while (1) {
$parser->YYData->{INPUT}
or $parser->YYData->{INPUT} = readline $parser->YYData->{fh}
or return ('', undef);
for ($parser->YYData->{INPUT}) {
s/^[ \r\t\f\013]+//; # Whitespace
s/^\n//
and $parser->YYData->{lineno} ++,
last;
s/^\/\*\*// # documentation
and _DocLexer($parser),
last;
s/^\/\*// # MultiLineComment
and _CommentLexer($parser),
last;
s/^\/\/(.*)\n// # SingleLineComment
and $parser->YYData->{lineno} ++,
last;
s/^([0-9]+\.[0-9]+[Ee][+\-]?[0-9]+)//
and return ('FLOAT_LITERAL', $1);
s/^([0-9]+[Ee][+\-]?[0-9]+)//
and return ('FLOAT_LITERAL', $1);
s/^(\.[0-9]+[Ee][+\-]?[0-9]+)//
and return ('FLOAT_LITERAL', $1);
s/^([0-9]+\.[0-9]+)//
and return ('FLOAT_LITERAL', $1);
s/^([0-9]+\.)//
and return ('FLOAT_LITERAL', $1);
s/^(\.[0-9]+)//
and return ('FLOAT_LITERAL', $1);
s/^0([0-7]+)//
and return _OctInteger($parser, $1);
s/^0[Xx]([A-Fa-f0-9]+)//
and return _HexInteger($parser, $1);
s/^(0)//
and return ('INTEGER_LITERAL', $1);
s/^([1-9][0-9]*)//
and return ('INTEGER_LITERAL', $1);
s/^\"//
and return _DoubleStringLexer($parser);
s/^\'//
and return _SingleStringLexer($parser);
s/^([A-Z_a-z][0-9A-Z_a-z]*)//
and return _Identifier($parser, $1);
s/^(\+=)//
and return ($1, $1);
s/^(\-=)//
and return ($1, $1);
s/^(\*=)//
and return ($1, $1);
s/^(\/=)//
and return ($1, $1);
s/^(&=)//
and return ($1, $1);
s/^(\|=)//
and return ($1, $1);
s/^(\^=)//
and return ($1, $1);
s/^(%=)//
and return ($1, $1);
s/^(<<=)//
and return ($1, $1);
s/^(>>=)//
and return ($1, $1);
s/^(>>>=)//
and return ($1, $1);
s/^(div=)//
and return ($1, $1);
s/^(&&)//
and return ($1, $1);
s/^(\|\|)//
and return ($1, $1);
s/^(\+\+)//
and return ($1, $1);
s/^(\-\-)//
and return ($1, $1);
s/^(<<)//
and return ($1, $1);
s/^(>>>)//
and return ($1, $1);
s/^(>>)//
and return ($1, $1);
s/^(<=)//
and return ($1, $1);
s/^(>=)//
and return ($1, $1);
s/^(==)//
and return ($1, $1);
s/^(!=)//
and return ($1, $1);
s/^([=><,!~\?:\.\+\-\*\/&\|\^%\(\)\{\};#])//
and return ($1, $1); # punctuator
s/^([\S]+)//
and $parser->Error("lexer error $1.\n"),
last;
}
}
}
sub InitLexico {
my ($parser) = @_;
my %keywords = (
# Literal
'true' => 'TRUE_LITERAL',
'false' => 'FALSE_LITERAL',
'invalid' => 'INVALID_LITERAL',
# Keyword
'access' => 'ACCESS',
'agent' => 'AGENT',
'break' => 'BREAK',
'continue' => 'CONTINUE',
'div' => 'DIV',
'domain' => 'DOMAIN',
'else' => 'ELSE',
'equiv' => 'EQUIV',
'extern' => 'EXTERN',
'for' => 'FOR',
'function' => 'FUNCTION',
'header' => 'HEADER',
'http' => 'HTTP',
'if' => 'IF',
'isvalid' => 'ISVALID',
'meta' => 'META',
'name' => 'NAME',
'path' => 'PATH',
'return' => 'RETURN',
'typeof' => 'TYPEOF',
'use' => 'USE',
'user' => 'USER',
'var' => 'VAR',
'while' => 'WHILE',
'url' => 'URL',
);
my %invalid_keywords = (
# Keyword not used
'delete' => 'DELETE',
'in' => 'IN',
'lib' => 'LIB',
'new' => 'NEW',
'null' => 'NULL',
'this' => 'THIS',
'void' => 'VOID',
'with' => 'WITH',
# Future reserved word
'case' => 'CASE',
'catch' => 'CATCH',
'class' => 'CLASS',
'const' => 'CONST',
'debugger' => 'DEBUGGER',
'default' => 'DEFAULT',
'do' => 'DO',
'enum' => 'ENUM',
'export' => 'EXPORT',
'extends' => 'EXTENDS',
'finally' => 'FINALLY',
'import' => 'IMPORT',
'private' => 'PRIVATE',
'public' => 'PUBLIC',
'sizeof' => 'SIZEOF',
'struct' => 'STRUCT',
'super' => 'SUPER',
'switch' => 'SWITCH',
'throw' => 'THROW',
'try' => 'TRY',
);
$parser->YYData->{keyword} = \%keywords;
$parser->YYData->{invalid_keyword} = \%invalid_keywords;
return;
}
1;