Lingua::Treebank::HeadFinder - Head-finding in Lingua::Treebank


Lingua-Treebank documentation Contained in the Lingua-Treebank distribution.

Index


Code Index:

NAME

Top

Lingua::Treebank::HeadFinder - Head-finding in Lingua::Treebank

SYNOPSIS

Top

  use Lingua::Treebank;

  my @utterances = Lingua::Treebank->from_penn_file($filename);

  foreach (@utterances) {
    # $_ is a Lingua::Treebank::Const now

    foreach ($_->get_all_terminals) {
      # $_ is a Lingua::Treebank::Const that is a terminal (word)

      print $_->word(), ' ' $_->tag(), "\n";
    }

    print "\n\n";

  }

ABSTRACT

Top

  Lingua::Treebank::HeadFinder is an object that reads a
  Magerman-style head-finding list and performs headfinding on
  Lingua::Treebank::Const trees.

DESCRIPTION

Top



  The L::TB::HeadFinder object is initialized from a list like the one in 

To do


Lingua-Treebank documentation Contained in the Lingua-Treebank distribution.

package Lingua::Treebank::HeadFinder;
use strict;
use Carp;
use Lingua::Treebank;
#################################
sub new {
    my $class = shift;
    my %args = @_;

    my $self = bless \%args, $class;

    croak "format not defined" unless defined $self->{format};

    if ($self->{format} eq 'roark') {
	$self->read_roark_rules($self->{file});
    }
    elsif ($self->{format} eq 'charniak') {
	$self->read_charniak_rules($self->{file});
    }

    return $self;
}
#################################
sub read_charniak_rules {
  my $self = shift;
  my $file = shift;
  open my $fh, "<", $file
    or die "Can't open charniak rules file '$file' for reading: $!\n";
  while (<$fh>) {
    chomp;
    next if /^\s*#/;

    s/^(\S+)\s+//;
    my $parent = $1;
    if ($parent eq '*default*') {
      $parent = 'DEFAULT';
    }

    while (length $_) {
      s/\( ([^()]+?) \)//x;
      my $ruleset = $1;

      my ($direction, @candidates) = split " ", $ruleset;
      s/^\s+//;
      s/\s+$//;
      if ($direction eq 'r') {
	$direction = 'right';
      }
      elsif ($direction eq 'l') {
	$direction = 'left';
      }
      croak "line $. of $file direction is wrong\n"
	unless $direction eq 'right' or $direction eq 'left';

      $self->add_rule(parent => $parent,
		      direction => $direction,
		      candidates => \@candidates,
		      text => $_);
    }
  } # loop over lines
  close $fh
    or die "can't close rules file '$file' after reading: $!\n";
}
#################################
sub read_roark_rules {
    my ($self) = shift;
    my ($file) = @_;

    open (my $fh, "<", $file)
      or die "can't open rules file '$file' for reading: $!\n";

    while (<$fh>) {
	chomp;

	# comment lines begin with a sharp
	next if /^\s*#/;

	# lines look like: NP right NP NN NT

	# and a second line with the same target will be used only if
	# the first line fails, so fallback choices can be encoded,
	# e.g.:

	# NP right AJ AAJ

	# also there can be a DEFAULT right NP NN NT  -- used last

	my ($parent, $direction, @candidates) = split;

	$direction = lc $direction; # don't care about caps

	if ($direction eq 'r') {
	    $direction = 'right';
	}
	elsif ($direction eq 'l') {
	    $direction = 'left';
	}

	if ($parent eq '*default*') {
	    $parent = 'DEFAULT';
	}

	croak "line $. of $file direction is wrong\n"
	  unless $direction eq 'right' or $direction eq 'left';

	$self->add_rule(parent => $parent,
			direction => $direction,
			candidates => \@candidates,
			text => $_);

    }
    close $fh
      or die "can't close rules file '$file' after reading: $!\n";
}
#################################
sub add_rule {
    my $self = shift;
    my %args = @_;

    my %cands = map { $_ => 1 } @{$args{candidates}};

    my $rule =  [ $args{direction},
		  \%cands,
		  $args{text},
		];
    push @{$self->{$args{parent}}}, $rule;
}
#################################
sub select_head {
    my $self = shift;
    my Lingua::Treebank::Const $tree = shift;
    my $tag = $tree->tag();
    my @children = @{$tree->children()};

  RULE:
    for my $rule (@{$self->{$tag}}, @{$self->{DEFAULT}}) {
	my @search;
	if ($rule->[0] eq 'left' ) {
	    @search =  @children ;
	}
	else {
	    @search = reverse @children;
	}
	for my $tok (@search) {
	    if (not keys %{$rule->[1]}) {
		# rule is empty but has direction; match anything so
		# pick the first one
		return $tok;
	    }
	    my $child_tag = $tok->tag();
	    if ($rule->[1]{$child_tag}) {
		# carp "selecting $child_tag as head of $tag";
		return $tok;
	    }
	}
	# carp "rejecting rule $rule->[2]";
    }
    carp "no rule found for $tag";
}
#################################
sub annotate_heads {
    # recursively annotate a tree
    my $self = shift;
    my Lingua::Treebank::Const $tree = shift;

    return if $tree->is_terminal();

    my $headchild = $self->select_head($tree);
    $tree->headchild($headchild);

    for my $branch (@{$tree->children()}) {
	$self->annotate_heads($branch);
    }
}
#################################

1;

__END__