/usr/local/CPAN/FLAT/FLAT/Regex/Parser.pm
package FLAT::Regex::Parser;
use strict;
#### Is this one level of abstraction too far? Parser generator generators..
#### TODO: try YAPP, since recursive descent is SLOOOW
use Parse::RecDescent;
use FLAT::Regex::Op;
use vars '$CHAR';
$CHAR = qr{ [A-Za-z0-9_\$\#] | \[[^\]]*\] }x;
sub new {
my $pkg = shift;
my @ops = sort { $a->{prec} <=> $b->{prec} }
map {{
pkg => "FLAT::Regex::Op::$_",
prec => "FLAT::Regex::Op::$_"->precedence,
spec => "FLAT::Regex::Op::$_"->parse_spec,
short => $_
}} @_;
my $lowest = shift @ops;
my $grammar = qq!
parse:
$lowest->{short} /^\\Z/ { \$item[1] }
!;
my $prev = $lowest;
for (@ops) {
my $spec = sprintf $prev->{spec}, $_->{short};
$grammar .= qq!
$prev->{short}:
$spec { $prev->{pkg}\->from_parse(\@item) }
| $_->{short} { \$item[1] }
!;
$prev = $_;
}
my $spec = sprintf $prev->{spec}, "atomic";
$grammar .= qq!
$prev->{short}:
$spec { $prev->{pkg}\->from_parse(\@item) }
| atomic { \$item[1] }
atomic:
"(" $lowest->{short} ")" { \$item[2] }
| /\$FLAT::Regex::Parser::CHAR/
{ FLAT::Regex::Op::atomic->from_parse(\@item) }
!;
Parse::RecDescent->new($grammar);
}
1;
__END__
original parser:
use vars '$CHAR';
$CHAR = qr{ [A-Za-z0-9_\!\@\#\$\%\&] | \[[^\]]*\] }x;
my $PARSER = Parse::RecDescent->new(<<'__EOG__') or die;
parse:
alt /^\Z/ { $item[1] }
alt:
concat(2.. /[+|]/) { FLAT::Regex::Op::alt->from_parse(@item) }
| concat { $item[1] }
concat:
star(2..) { FLAT::Regex::Op::concat->from_parse(@item) }
| star { $item[1] }
star :
atomic '*' { FLAT::Regex::Op::star->from_parse(@item) }
| atomic { $item[1] }
atomic:
"(" alt ")" { $item[2] }
| /$FLAT::Regex::CHAR/ { FLAT::Regex::Op::atomic->from_parse(@item) }
__EOG__