Lingua::LinkParser::Simple - Experiments with some high-level link grammar processing.


Lingua-LinkParser documentation Contained in the Lingua-LinkParser distribution.

Index


Code Index:

NAME

Top

Lingua::LinkParser::Simple - Experiments with some high-level link grammar processing.

SYNOPSIS

Top

  use Lingua::LinkParser::Simple;
  @subjects = extract_subject(sentence => $sentence, verb => $verb);

DESCRIPTION

Top

This module allows simple but incomplete access to the features provided by Lingua::LinkParser, and should be considered purely experimental. If you have any cool functions you'd like added here, let me know.

This function tries to parse the sentence, find the specified verb, and return all words (or noun phrases) that are subjects for that verb.

AUTHOR

Top

Danny Brian <danny@brians.org>

SEE ALSO

Top

perl.


Lingua-LinkParser documentation Contained in the Lingua-LinkParser distribution.

package Lingua::LinkParser::Simple;

use 5.006;
use strict;

require Exporter;
use AutoLoader qw(AUTOLOAD);
use Lingua::LinkParser;

our @ISA = qw(Exporter);

our @EXPORT = qw(
  extract_subject
);
our $VERSION = '1.16';

sub new {
  my $class = shift;
  my $self = bless {}, $class;
  $self->{parser} = new Lingua::LinkParser;
  $self->{parser}->opts(
      'max_sentence_length' => 70,
      'panic_mode'          => 'TRUE',
      'max_parse_time'      => 20,
      'linkage_limit'       => 50,
      'short_length'        => 10,
      'disjunct_cost'       => 2,
      'min_null_count'      => 0,
      'max_null_count'      => 0,
  );
  $self;
}

sub extract_subject {
  my $self = shift;
  my %args = @_;
  my $sentence = $self->{parser}->create_sentence($args{sentence});
  return unless ($sentence);
  if ($sentence->num_linkages == 0) {
    $self->{parser}->opts('min_null_count' => 1,
                  'max_null_count' => $sentence->length);
    $sentence = $self->{parser}->create_sentence($args{sentence});
    return unless ($sentence);
      # print "null linkages found: ", $sentence->num_linkages, "\n";

      if ($sentence->num_linkages == 0) {
        $self->{parser}->opts('disjunct_cost'    => 3,
                    'min_null_count'   => 1,
                    'max_null_count'   => 30,
                    'max_parse_time'   => 20,
                    'islands_ok'       => 1,
                    'short_length'     => 6,
                    'all_short_connectors' => 1,
                    'linkage_limit'    => 50
        );
       $sentence = $self->{parser}->create_sentence($args{$sentence});
       return unless ($sentence);
    }
  }

  my $verb  = $args{verb};
  my $linkage  = $sentence->linkage(1);
  return unless ($linkage);
  # computing the union and then using the last sublinkage
  # permits conjunctions.
  $linkage->compute_union;
  my $sublinkage = $linkage->sublinkage($linkage->num_sublinkages);
  return unless ($sublinkage);

  my $subject = 'S[s|p]' .                   # singular and plural subject
                '(?:[\w\*]{1,2})*' .         # any optional subscripts
                ':(\d+):' .                  # number of the word
                '(\w+(?:\.\w)*)';            # and save the word itself
  my $other      = '[^\)]+';                 # junk, within the parenthesis
  my $verbre     = '"(' . $args{verb} . '*)\.v"';
                                             # singular and plural verbs
  my $no_objects = '(?![^\)]* O.{1,3}:)';    # don't match objects

  my $pattern = "$subject $other $verbre $no_objects";

  my $wordtxt;
  my @wordlist;
  if ($sublinkage =~ /$pattern/mx) {
    my $wordobj  = $sublinkage->word($1); # the stored word number
    $wordtxt  = $2;
    $verb     = $3;
    foreach my $link ($wordobj->links) { # process array of links
        # proper nouns and noun modifiers
      if ($link->linklabel =~ /^G|AN|A/)
      {
        $wordlist[$link->linkposition] = $link->linkword;
      }
      # possessive pronouns, via a noun determiner
      if ($link->linklabel =~ /^D[s|m]/)
      {
        my $wword = $sublinkage->word($link->linkposition);
        foreach my $llink ($wword->links)
        {
          if ($llink->linklabel =~ /^YS/)
          {
            $wordlist[$llink->linkposition] = $llink->linkword;
            $wordlist[$link->linkposition]  = $link->linkword;
            my $wwword = $sublinkage->word($llink->linkposition);
            foreach my $lllink ($wwword->links)
            {
              if ($lllink->linklabel =~ /^G|AN/)
              {
                $wordlist[$lllink->linkposition] = $lllink->linkword;
              }
            }
          }
        }
      }
    } 
    return join (" ", @wordlist) . " $wordtxt";
  }
}

1;
__END__