/usr/local/CPAN/Parse-Pyapp/Parse/Pyapp/Parser.pm
package Parse::Pyapp::Parser;
use 5.006;
use strict;
our $VERSION = '0.01';
#use Data::Dumper;
sub addrule {
my $pkg = shift;
my $lhs = shift;
foreach (@_){
my $sub = pop @$_ if ref $_->[-1] eq 'CODE';
push @{$pkg->{grammar}->{$lhs}}, { rhs => $_, callback => $sub };
if(ref $sub eq 'CODE'){
$pkg->{rcb}->{join q/,/, $lhs, @{$_}[0..$#$_-1]} = $sub;
}
}
}
sub addlex {
my $pkg = shift;
my $lhs = shift;
# lexical callback function
$pkg->{lcb}->{$lhs} = pop @_ if( ref $_[-1] eq 'CODE' );
foreach (@_){
$pkg->{lexidx}->{$_->[0]}->{$lhs} = $_->[1];
push @{$pkg->{grammar}->{$lhs}}, { rhs => $_ };
}
}
sub start {
die "Unknown symbol $_[1]\n" unless exists $_[0]->{grammar}->{$_[1]};
$_[0]->{start} = $_[1];
}
use B::Deparse;
sub stringify {
my $pkg = shift;
my $grammar;
my $deparse = B::Deparse->new();
foreach my $lhs (keys %{$pkg->{grammar}}){
my $sum = 0;
$grammar .=
join q//,
"$lhs : \n\t",
join( qq/\n\t | \n\t/,
map {
my $body;
if(ref( $_->{callback}) eq 'CODE'){
$body = $deparse->coderef2text($_->{callback});
$body =~ s/^(.+)$/\t$1/mg;
$body = "\n$body";
}
join q/ /, grep{$_}
@{$_->{rhs}}[0..$#{$_->{rhs}}-1],
"[".$_->{rhs}->[-1]."]",
$body;
}
@{$pkg->{grammar}->{$lhs}})."\n\t;\n",
$/
;
}
$grammar
}
sub toCNF {
die unless caller eq __PACKAGE__;
my $pkg = shift;
my $maxsym;
do{
$maxsym = 0;
foreach my $lhs (keys %{$pkg->{grammar}}){
foreach (@{$pkg->{grammar}->{$lhs}}){
if(@{$_->{rhs}} > 3){
$maxsym = @{$_->{rhs}} if(@{$_->{rhs}} > $maxsym);
$pkg->addrule("%%".$pkg->{symcount}, [splice(@{$_->{rhs}}, 1, -1, "%%".$pkg->{symcount}), 1]);
$pkg->{symcount}++;
}
}
}
}while($maxsym > 3);
# building rules' index
foreach my $lhs (keys %{$pkg->{grammar}}){
foreach (@{$pkg->{grammar}->{$lhs}}){
$pkg->{rulidx }->{join q/,/,$lhs, @{$_->{rhs}}[0..$#{$_->{rhs}}-1]} = $_->{rhs}->[-1];
}
}
}
sub visit {
my $pkg = shift;
$pkg->{var} = {};
$pkg->{tree} = {};
@{$pkg->{nonterm}} = ();
$pkg->_visit(join( q/,/, 0, $pkg->{lastidx}, $pkg->{start}));
}
sub _visit {
my ($pkg, $key) = @_;
my @L = split /,/, $key;
my $root = (split /,/,$key)[-1];
my @R = split /,/, $pkg->{bp}->{$key};
if(!defined $pkg->{bp}->{$key} && $L[0] == $L[1]){
if(ref($pkg->{lcb}->{$root}) eq 'CODE'){
$pkg->{lhs} = $root;
$pkg->{lcb}->{$root}->($pkg, $pkg->{token}->[$L[0]]);
}
return;
}
# left
$pkg->_visit(join( q/,/, $L[0], $R[0], $R[1]));
# right
$pkg->_visit(join( q/,/, $R[0]+1, $L[1], $R[2])) if $R[2];
# root
if($root !~ /^%%/o){
$pkg->{pos} = [ $root, $R[1], @{$pkg->{nonterm}} ];
if(ref($pkg->{rcb}->{join q/,/, @{$pkg->{pos}}}) eq 'CODE'){
$pkg->{lhs} = $root;
$pkg->{rcb}->{join q/,/, @{$pkg->{pos}}}->($pkg, @{$pkg->{token}}[$L[0]..$L[1]]);
}
@{$pkg->{nonterm}} = ();
@{$pkg->{pos}} = ();
}
else{
unshift @{$pkg->{nonterm}}, grep{$_!~/^%%/o} $R[1], $R[2];
# print @{$pkg->{nonterm}},$/;
}
}
sub parse($@) {
my $pkg = shift;
$pkg->toCNF;
my @nont = keys %{$pkg->{grammar}};
$pkg->{lastidx} = $#_;
$pkg->{token} = \@_;
# probability matrix
$pkg->{pi} = undef;
# back pointers
$pkg->{bp} = undef;
####################
# base case
####################
foreach my $i (0..$#_){
foreach (keys %{$pkg->{grammar}}){
$pkg->{pi}->{"$i,$i,$_"} = $pkg->{lexidx}->{$_[$i]}->{$_} if $pkg->{lexidx}->{$_[$i]}->{$_};
}
}
####################
# recursive case
####################
foreach my $span (0..$#_){
foreach my $begin (0..$#_-$span){
my $end = $begin + $span;
foreach my $m ($begin..$end){
foreach my $A (@nont){
foreach my $B (@nont){
foreach my $C (@nont){
my $prob = $pkg->{pi}->{"$begin,$m,$B"} *
$pkg->{pi}->{join q/,/,$m+1,$end,$C} *
$pkg->{rulidx}->{join q/,/, $A, $B, $C};
if($prob && $prob > $pkg->{pi}->{"$begin,$end,$A"}){
$pkg->{pi}->{"$begin,$end,$A"} = $prob;
$pkg->{bp}->{"$begin,$end,$A"} = "$m,$B,$C";
}
}
########################################
# for a single right hand derivation
########################################
if($pkg->{rulidx}->{join q/,/, $A, $B}){
my $prob = $pkg->{pi}->{"$begin,$m,$B"} * $pkg->{rulidx}->{join q/,/, $A, $B};
if($prob && $prob > $pkg->{pi}->{"$begin,$end,$A"}){
$pkg->{pi}->{"$begin,$end,$A"} = $prob;
$pkg->{bp}->{"$begin,$end,$A"} = "$begin,$B";
}
}
}
}
}
}
}
return unless ($pkg->{bp}->{join(q/,/,0,$pkg->{lastidx},$pkg->{start})});
$pkg->visit;
1;
}
1;
__END__