/usr/local/CPAN/Lingua-LinkParser-MatchPath/Lingua/LinkParser/MatchPath/SM.pm


package Lingua::LinkParser::MatchPath::SM;
use strict;

use Data::Dumper;

use Lingua::LinkParser::MatchPath::BuildSM;
use Lingua::LinkParser::MatchPath::SMContext;
our @ISA =
    qw(
     Lingua::LinkParser::MatchPath::BuildSM
     Lingua::LinkParser::MatchPath::SMContext
       );

sub print_stat {
#    print @_;
}

sub word_content_and_pos {
    shift() =~ /^(.+?)(?:\[.\])?(?:\.([pavding]))?$/o;
    print_stat "## $1 , $2\n";
    ($1, $2);
}


sub check_wordmatch {
    my ($wordpattern, $content_ref, $pos) = @_;
    my $match = 0;

    if($wordpattern->[0] eq 'W'){
	$match = 1 if $$content_ref eq $wordpattern->[1];
    }
    # word regexp
    elsif($wordpattern->[0] eq 'WR'){
	if( $$content_ref =~ /$wordpattern->[1]/ ){
	    $match = 1;
	    $$content_ref = $1 if $#+ >= 1;
	}
    }
    # pos tag
    elsif($wordpattern->[0] eq 'P'){
	$match = 1 if $pos eq $wordpattern->[1];
    }
    # negative word
    elsif ($wordpattern->[0] eq 'NW'){
	$match = 1 if $$content_ref ne $wordpattern->[1];
    }
    # negative word regexp
    elsif ($wordpattern->[0] eq 'NWR'){
	if( $$content_ref !~ /$wordpattern->[1]/ ){
	    $match = 1;
	}
    }
    # negative pos tag
    elsif ($wordpattern->[0] eq 'NP'){
	$match = 1 if $pos ne $wordpattern->[1];
    }

    return $match;
}

sub check_labelmatch {
    my ($labelpattern, $content) = @_;
    my $match = 0;
    if($labelpattern->[0] eq 'L'){
	return 1 if $content eq $labelpattern->[1];
    }
    elsif($labelpattern->[0] eq 'LR'){
	return 1 if $content =~ /$labelpattern->[1]/;
    }
}


sub get_arcs {
    my $self = shift;
    my $curr_state = shift;
    return ref($self->{_arc}->{$curr_state}) ?
	@{$self->{_arc}->{$curr_state}} : ();
}

sub get_arctype {
    my $self = shift;
    my $arc = shift;
    if($arc->{label}){
	return 'L';
    }
    elsif($arc->{word}){
	return 'W';
    }
    elsif($arc->{branch}){
	return 'EB' if $arc->{branch} eq 'E';
	return 'LB' if $arc->{branch} eq 'L';
    }
    return 'N';
}

sub get_branchtype {
    my $self = shift;
    my $arc = shift;
    $arc->{branch_type};
}

sub match_first_word {
    my $self = shift;
    my $arc = shift;

    my $word_index = 0;

    foreach my $w ($self->{_linkage}->words){
        $word_index++;
	my ($content, $pos) = word_content_and_pos($w->text);

	if (check_wordmatch(
			     $arc->{word}, \$content, $pos
			     )){
	    print_stat "First Word: ", Dumper $w;
	    print_stat "($content, $pos)\n";
	    # word capturing
	    if($arc->{word}->[2]){
		push @{$self->{_item}}, $content;
	    }
	    return $w;
	}
    }
}

sub match_word {
    my $self = shift;
    my $arc = shift;

    if(!$self->{_wordptr}){
	$self->{_wordptr} = $self->match_first_word($arc);
	if($self->{_wordptr}){
	    return (1, $arc->{next_state});
	}
	else {
	    return (0, $arc->{next_state});
	}
    }
    elsif($self->{_wordptr}) {
	my $link = ($self->{_wordptr}->links)[$self->{_label_num}];
	my ($content, $pos) = word_content_and_pos(
						    $link->linkword
						    );

	if($link &&
	   check_wordmatch ($arc->{word}, \$content, $pos)
	   ){
	    $self->{_wordptr} = ($self->{_linkage}->words)[$link->linkposition];
	    print_stat "WORD MATCH";
	    print_stat Dumper $self->{_wordptr};
            # word capturing
            if($arc->{word}->[2]){
                push @{$self->{_item}}, $content;
            }
	    return (1, $arc->{next_state});
	}
	else {
	    return (0, $arc->{next_state});
	}
    }
    else {
	return (0, $arc->{next_state});
    }
}

sub match_label {
    my $self = shift;
    my $arc = shift;

    my @links = $self->{_wordptr}->links;
    my $match;
    foreach my $link_num (0..$#links){
	my $link = $links[$link_num];

	# skip visited labels
	next if $self->{_visited}->{ $link->{index} };

	if(check_labelmatch($arc->{label}, $link->linklabel)){
	    print_stat "LABEL MATCH: ", Dumper $link_num,$/;
	    $self->{_label_num} = $link_num;
	    $self->{_visited}->{ $link->{index} } = 1;
	    $match = 1;
	    last;
	}
    }

    print_stat "VISITED: ", Dumper $self->{_visited},$/;
    if($match){
	(1, $arc->{next_state});
    }
    else {
	(0, $arc->{next_state});
    }
}

sub go {
    my $self = shift;
    my $linkage = shift;

    my $curr_wordptr;
    my $curr_state = 0;
    my $next_state;
    my @arc_stack;
    my @arcs;
    my $cnt;
    my @state_stack;
    my $result;
    my @branchtype_stack;

    $self->{_linkage} = $linkage;

    while(1){
	print_stat Dumper \@state_stack;
	unless($self->{_built_arc_stack}->{$curr_state}){
	    print_stat "State: $curr_state\n";
	    my @arcs = $self->get_arcs($curr_state);
	    if( @arcs > 1 ){
		print_stat scalar(@arcs)." MULTI-BRANCHES\n";
		push @state_stack, $curr_state;
	    }
	    $self->{_arc_stack}->{$curr_state} = \@arcs;
	    $self->{_built_arc_stack}->{$curr_state} = 1;
	}

	my $arc = shift @{$self->{_arc_stack}->{$curr_state}};
	if(!$arc){
	    pop @state_stack;
	    $curr_state = $state_stack[-1];
	    if(!defined $curr_state){
		$self->{_failed} = 1;
		return 0;
	    }
	    next;
	}
	print_stat Dumper $arc;
	my $arctype = $self->get_arctype($arc);
	print_stat Dumper $arctype;

	$result = 1;
	if($arctype eq 'W'){
	    ($result, $next_state) = $self->match_word($arc);
	    print_stat "($result, $next_state)\n";
	}
	elsif($arctype eq 'L'){
	    ($result, $next_state) = $self->match_label($arc);
	}
	elsif($arctype eq 'EB'){
	    my $branchtype = $self->get_branchtype($arc);
	    if($branchtype == 1 || $branchtype == 2){
		push @{$self->{_branch_entrance}}, $curr_state;
		push @{$self->{_wordptr_stack}}, $self->{_wordptr};
		print_stat "PUSH BRANCHTYPE STACK";
		push @branchtype_stack, $branchtype;
	    }
	    $next_state = $arc->{next_state};
	}
	elsif($arctype eq 'LB'){
	    my $branchtype = $self->get_branchtype($arc);
	    if($branchtype == 1 || $branchtype == 2){
		$next_state = pop @{$self->{_branch_entrance}};
		print_stat "POP BRANCHTYPE STACK";
		pop @branchtype_stack;
		print_stat "<<", Dumper($self->{_wordptr_stack}), ">>";
		$self->{_wordptr} = pop @{$self->{_wordptr_stack}};
	    }
	    $next_state = $arc->{next_state};
	}
	elsif($arctype eq 'N') {
	    $next_state = $arc->{next_state};
	}
	else {
	    die;
	}

	if(!$result){
	    if($branchtype_stack[-1] != 2){
		$next_state = $state_stack[-1];
		if(!defined $next_state){
		    $self->{_failed} = 1;
		}
		print_stat "=> MATCH FAILURE, go back to state $next_state\n\n";
	    }
	    else {
		print_stat "SUCCESSFUL MATCHING IN NEGATIVE BRANCH\n";
		print_stat "NEXT_STATE: $next_state\n";
	    }
	}
	$curr_state = $next_state;
	return 1 if $self->accepted($curr_state);
	return 0 if $self->failed;
    }
    1;
}



our $PRINT_DIAGRAM;
our $PRINT_SUCCESSFUL_DIAGRAM;
our $PRINT_DIAGRAM_TO_FILE;
our $PRINT_SUCCESSFUL_DIAGRAM_TO_FILE;

sub match($$) {
    my $self = shift;
    my $Lparser = $self->{parser};
    my $sentence = ref($_[0]) ? shift() : $Lparser->create_sentence(shift);

    my $num_words;
    my $linkage_count = 0;
    $self->{_start_position} = 0;
    $self->{_diagram_printed} = 0;

    # iterate through linkages
    foreach my $linkage ($sentence->linkages){
	$linkage_count++;
	last if $linkage_count > 1;

	$num_words = $linkage->num_words;

	print_stat $Lparser->get_diagram($linkage) if $PRINT_DIAGRAM;


	if( ref($self->{_fh}) && $PRINT_DIAGRAM_TO_FILE ){
	    $self->{_fh}->dump('DIAGRAM', $Lparser->get_diagram($linkage));
	    $self->{_diagram_printed} = 1;
	}

	# clean up temporary state information
	$self->reset;
	my $cool = $self->go($linkage);
	    
	if($cool){
	    print_stat "** COOL **\n";
	    print_stat $Lparser->get_diagram($linkage) if $PRINT_SUCCESSFUL_DIAGRAM;
	    print_stat $self->{_fh}->dump(
					  'SUCCESSFUL_DIAGRAM', 
					  $Lparser->get_diagram($linkage)
					  )
		if ref $self->{_fh} && $PRINT_SUCCESSFUL_DIAGRAM_TO_FILE;
	    
	    return 1;
	}
	else {
	    print_stat "FAILED\n";
	}
    }
}


sub accepted {
    my $self = shift;
    my $curr_state = shift;
    print_stat "CURR_STATE: $curr_state, ACCEPT_STATE: $self->{_final_state}\n";
    $curr_state == $self->{_final_state};
}

sub failed { $_[0]->{_failed} }


sub item { @_ ? @{$_[0]->{_item}}[@_] : @{$_[0]->{_item}} }

1;


__END__