WordNet::SenseRelate::TargetWord - Perl module for performing word sense disambiguation.


WordNet-SenseRelate-TargetWord documentation Contained in the WordNet-SenseRelate-TargetWord distribution.

Index


Code Index:

NAME

Top

WordNet::SenseRelate::TargetWord - Perl module for performing word sense disambiguation.

SYNOPSIS

Top

  use WordNet::SenseRelate::TargetWord;

  $tool = WordNet::SenseRelate::TargetWord->new();

  $sense = $tool->disambiguate($instance);

DESCRIPTION

Top

WordNet::SenseRelate::TargetWord combines the different parts of the word sense disambiguation process. It allows the user to select the disambiguation algorithm, the context selection algorithm, and other data processing tasks. This module applies these to the context and returns the selected sense.

USING THE API (WITH EXAMPLE CODE)

Top

The WordNet::SenseRelate::TargetWord module handles the managerial task of initializing the processing modules, initializing the data and passing it between modules. The following pieces of code can serve as a guide for using the module to disambiguate a word within its context.

We would start by initializing the module:

  use WordNet::SenseRelate::TargetWord;

  # Create a hash with the config options
  my %wsd_options = (preprocess => [],
                     preprocessconfig => [],
                     context => 'WordNet::SenseRelate::Context::NearestWords',
                     contextconfig => {(windowsize => 5,
                                       contextpos => 'n')},
                     algorithm => 'WordNet::SenseRelate::Algorithm::Local',
                     algorithmconfig => {(measure => 'WordNet::Similarity::res')});

  # Initialize the object
  my ($wsd, $error) = WordNet::SenseRelate::TargetWord->new(\%wsd_options, 0);

In the current implementation, an "instance" is a hash reference with these fields: "text", "words", "head", "target", "wordobjects", "lexelt", "id", "answer" and "targetpos". The values of the hash reference corresponding to "text", "words" and "wordobjects" are array references. The remaining values are scalars. So an instance object can be created like so:

  my $hashRef = {};             # Creates a reference to an empty hash.
  $hashRef->{text} = [];        # Value is an empty array ref.
  $hashRef->{words} = [];       # Value is an empty array ref.
  $hashRef->{wordobjects} = []; # Value is an empty array ref.
  $hashRef->{head} = -1;        # Index into the text array (initialized to -1)
  $hashRef->{target} = -1;      # Index into the words & wordobjects arrays (initialized to -1)
  $hashRef->{lexelt} = "";      # Lexical element (terminology from Senseval2)
  $hashRef->{id} = "";          # Some ID assigned to this instance
  $hashRef->{answer} = "";      # Answer key (only required for evaluation)
  $hashRef->{targetpos} = "";   # Part-of-speech of the target word (if known).

The ones that are important to us are wordobjects and target. The wordobjects array is an array of WordNet::SenseRelate::Word objects. Given a word (say "bank"), a WordNet::SenseRelate::Word object can be created like this:

  use WordNet::SenseRelate::Word;

  my $wordobj = WordNet::SenseRelate::Word->new("bank");

The wordobject array represents a sentence/paragraph containing the word to be disambiguated. The target field is an index into this array, pointing to the word to be disambiguated. So, for a given example sentence, the disambiguation code would be as follows:

  my @sentence = ("The", "boat", "ran", "aground", "on", "the", "river", "bank");
  foreach my $theword (@sentence)
  {
    my $wordobj = WordNet::SenseRelate::Word->new($theword);
    push(@{$hashRef->{wordobjects}}, $wordobj);
    push(@{$hashRef->{words}}, $theword);
  }
  $hashRef->{target} = 7;        # Index of "bank"
  $hashRef->{id} = "Instance1";  # ID can be any string.

The remaining fields are not really used by the system, but they could be initialized (for use later in the system):

  $hashRef->{lexelt} = "bank.n";
  $hashRef->{answer} = "bank#n#1";
  $hashRef->{targetpos} = "n";        # n, v, a or r
  $hashRef->{text} = [("The boat ran aground on the river", "bank")];
  $hashRef->{head} = 1;               # Index to bank

Finally, the disambiguation is done as follows:

  my ($sense, $error) = $wsd->disambiguate($hashRef);
  print "$sense\n";

The scalar $sense contains the selected sense of the target word, and can be processed as required.

EXPORT

Top

None by default.

SEE ALSO

Top

perl(1)

WordNet::Similarity(3)

http://wordnet.princeton.edu

http://senserelate.sourceforge.net

http://groups.yahoo.com/group/senserelate

AUTHOR

Top

Ted Pedersen, tpederse at d.umn.edu

Siddharth Patwardhan, sidd at cs.utah.edu

Satanjeev Banerjee, banerjee+ at cs.cmu.edu

COPYRIGHT AND LICENSE

Top


WordNet-SenseRelate-TargetWord documentation Contained in the WordNet-SenseRelate-TargetWord distribution.

# WordNet::SenseRelate::TargetWord v0.09
# (Last Updated $Id: TargetWord.pm,v 1.16 2006/12/24 12:39:19 sidz1979 Exp $)
package WordNet::SenseRelate::TargetWord;

use 5.006;
use strict;
use warnings;

use WordNet::SenseRelate::Tools;

require Exporter;

our @ISA         = qw(Exporter);
our %EXPORT_TAGS = ('all' => [qw()]);
our @EXPORT_OK   = (@{$EXPORT_TAGS{'all'}});
our @EXPORT      = qw();
our $VERSION     = '0.09';

# CONSTRUCTOR: Creates new SenseRelate::TargetWord object.
# Returns the created object.
sub new
{
    my $class   = shift;
    my $self    = {};
    my $modules = {};

    # Create the TargetWord object
    $class = ref $class || $class;
    bless($self, $class);

    # Set the default options first
    $modules->{preprocess} = [];

    # Compound detection no longer done by default
    # push(
    #      @{$modules->{preprocess}},
    #      "WordNet::SenseRelate::Preprocess::Compounds"
    # );
    $modules->{preprocessconfig} = [];
    $modules->{context}       = "WordNet::SenseRelate::Context::NearestWords";
    $modules->{contextconfig} = undef;
    $modules->{postprocess}   = [];
    $modules->{postprocessconfig} = [];
    $modules->{algorithm}         = "WordNet::SenseRelate::Algorithm::Local";
    $modules->{algorithmconfig}   = undef;
    $modules->{wntools}           = undef;

    # Get the options
    my $options = shift;
    my $trace   = shift;
    $trace = 0 if (!defined $trace);
    if(defined $options && ref $options eq "HASH")
    {
        # Get the Preprocessor modules
        if(defined $options->{preprocess} && ref $options->{preprocess} eq "ARRAY")
        {
            $modules->{preprocess} = [];
            push(@{$modules->{preprocess}}, @{$options->{preprocess}});
        }
	elsif(defined $options->{preprocess})
        {
            return (undef, "WordNet::SenseRelate::TargetWord->new() -- Malformed 'options' data structure (preprocess).");
        }

        # Get configuration options for preprocessor modules
        if(defined $options->{preprocessconfig} && ref $options->{preprocessconfig} eq "ARRAY")
        {
            $modules->{preprocessconfig} = [];
            push(@{$modules->{preprocessconfig}}, @{$options->{preprocessconfig}});
        }
	elsif(defined $options->{preprocessconfig})
        {
            return (undef, "WordNet::SenseRelate::TargetWord->new() -- Malformed 'options' data structure (preprocessconfig).");
        }

        # Get context selection module
        $modules->{context} = $options->{context}
          if(defined $options->{context} && !ref $options->{context});
        return (undef, "WordNet::SenseRelate::TargetWord->new() -- Malformed 'options' data structure (context).")
          if(defined $options->{context} && ref $options->{context});

        # Get configuration options for context selection module
        if(defined $options->{contextconfig} && ref $options->{contextconfig} eq "HASH")
        {
            $modules->{contextconfig} = $options->{contextconfig};
        }
	elsif(defined $options->{contextconfig})
        {
            return (undef, "WordNet::SenseRelate::TargetWord->new() -- Malformed 'options' data structure (contextconfig).");
        }

        # Get postprocess modules
        if(defined $options->{postprocess} && ref $options->{postprocess} eq "ARRAY")
        {
            $modules->{postprocess} = [];
            push(@{$modules->{postprocess}}, @{$options->{postprocess}});
        }
	elsif(defined $options->{postprocess})
        {
            return (undef, "WordNet::SenseRelate::TargetWord->new() -- Malformed 'options' data structure (postprocess).");
        }

        # Get configuration options for postprocess modules
        if(defined $options->{postprocessconfig} && ref $options->{postprocessconfig} eq "ARRAY")
        {
            $modules->{postprocessconfig} = [];
            push(@{$modules->{postprocessconfig}}, @{$options->{postprocessconfig}});
        }
	elsif(defined $options->{postprocessconfig})
        {
            return (undef, "WordNet::SenseRelate::TargetWord->new() -- Malformed 'options' data structure (postprocessconfig).");
        }

        # Get algorithm module
        $modules->{algorithm} = $options->{algorithm}
          if(defined $options->{algorithm} && !ref $options->{algorithm});
        return (undef, "WordNet::SenseRelate::TargetWord->new() -- Malformed 'options' data structure (algorithm).")
          if(defined $options->{algorithm} && ref $options->{algorithm});

        # Get configuration options for algorithm module
        if(defined $options->{algorithmconfig} && ref $options->{algorithmconfig} eq "HASH")
        {
          $modules->{algorithmconfig} = $options->{algorithmconfig}
        }
	elsif(defined $options->{algorithmconfig})
        {
            return (undef, "WordNet::SenseRelate::TargetWord->new() -- Malformed 'options' data structure (algorithmconfig).");
        }

        # Get the WordNet::SenseRelate::Tools
        $modules->{wntools} = $options->{wntools}
          if(defined $options->{wntools} && ref($options->{wntools}) eq "WordNet::SenseRelate::Tools");
        return (undef, "WordNet::SenseRelate::TargetWord->new() -- Unknown/illegal WordNet::SenseRelate::Tools object given.")
          if(defined $options->{wntools} && ref $options->{wntools} ne "WordNet::SenseRelate::Tools");
    }
    elsif(defined($options))
    {
        return (undef, "WordNet::SenseRelate::TargetWord->new() -- Malformed 'options' data structure.");
    }

    # Load WordNet::SenseRelate::Tools
    my $wntools = $modules->{wntools};
    if(!defined $wntools || !ref $wntools || ref($wntools) ne "WordNet::SenseRelate::Tools")
    {
        $wntools = WordNet::SenseRelate::Tools->new($wntools);
        return (undef, "WordNet::SenseRelate::TargetWord->new() -- Unable to load WordNet::SenseRelate::Tools")
          if (!defined $wntools);
    }
    $self->{wntools} = $wntools;

    # Load all the modules
    my $module;
    my $modulePath;

    # Load Preprocessor modules
    $self->{preprocess} = [];
    foreach my $i (0 .. scalar(@{$modules->{preprocess}}) - 1)
    {
        my $preproc = $modules->{preprocess}->[$i];
        $modulePath = $modules->{preprocess}->[$i];
        $modulePath =~ s/::/\//g;
        $modulePath .= ".pm";
        require $modulePath;
        $module =
          $preproc->new($wntools, $trace, $modules->{preprocessconfig}->[$i]);
        return (
            undef,
"WordNet::SenseRelate::TargetWord->new() -- Unable to load preprocess module $preproc"
          )
          if (!defined($module));
        push(@{$self->{preprocess}}, $module);
    }

    # Load Context Selection module
    $modulePath = $modules->{context};
    $modulePath =~ s/::/\//g;
    $modulePath .= ".pm";
    require $modulePath;
    $module =
      $modules->{context}->new($wntools, $trace, $modules->{contextconfig});
    return (undef,
"WordNet::SenseRelate::TargetWord->new() -- Unable to load context selection module "
          . ($modules->{context}))
      if (!defined($module));
    $self->{context} = $module;

    # Load Postprocessor modules
    $self->{postprocess} = [];
    foreach my $i (0 .. scalar(@{$modules->{postprocess}}) - 1)
    {
        my $postproc = $modules->{postprocess}->[$i];
        $modulePath = $postproc;
        $modulePath =~ s/::/\//g;
        $modulePath .= ".pm";
        require $modulePath;
        $module =
          $postproc->new($wntools, $trace, $modules->{postprocessconfig}->[$i]);
        return (
            undef,
"WordNet::SenseRelate::TargetWord->new() -- Unable to load postprocess module $postproc"
          )
          if (!defined($module));
        push(@{$self->{postprocess}}, $module);
    }

    # Load Disambiguation Algorithm module
    $modulePath = $modules->{algorithm};
    $modulePath =~ s/::/\//g;
    $modulePath .= ".pm";
    require $modulePath;
    $module =
      $modules->{algorithm}->new($wntools, $trace, $modules->{algorithmconfig});
    return (undef,
"WordNet::SenseRelate::TargetWord->new() -- Unable to load disambiguation module "
          . ($modules->{algorithm}))
      if (!defined($module));
    $self->{algorithm}  = $module;
    $self->{contextpos} = $module->{contextpos};

    # Initialize the trace string
    $self->{trace}       = $trace;
    $self->{tracestring} = "";
    if ($trace)
    {
        foreach my $tmpModName (@{$modules->{preprocess}})
        {
            $self->{tracestring} .=
"WordNet::SenseRelate::TargetWord ~ Loaded preprocess module $tmpModName\n";
        }
        $self->{tracestring} .=
          "WordNet::SenseRelate::TargetWord ~ Loaded context selection module "
          . ($modules->{context}) . "\n";
        foreach my $tmpModName (@{$modules->{postprocess}})
        {
            $self->{tracestring} .=
"WordNet::SenseRelate::TargetWord ~ Loaded postprocess module $tmpModName\n";
        }
        $self->{tracestring} .=
          "WordNet::SenseRelate::TargetWord ~ Loaded algorithm module "
          . ($modules->{algorithm}) . "\n";
    }

    return ($self, undef);
}

# Takes an instance object and disambiguates the target word
# Returns the selected sense of the target word
sub disambiguate
{
    my $self     = shift;
    my $instance = shift;
    my $sense;
    my $wntools = $self->{wntools};
    my $trace   = $self->{trace};
    $trace = 0 if (!defined $trace);

    return (
        undef,
"WordNet::SenseRelate::TargetWord->disambiguate() -- TargetWord object not found."
      )
      if (   !defined($self)
          || !ref($self)
          || ref($self) ne "WordNet::SenseRelate::TargetWord");
    return (
        undef,
"WordNet::SenseRelate::TargetWord->disambiguate() -- No instance specified."
      )
      if (!defined($instance) || !ref($instance));

    # Preprocess the instance
    foreach my $preproc (@{$self->{preprocess}})
    {
        $instance = $preproc->preprocess($instance);
        return (
            undef,
"WordNet::SenseRelate::TargetWord->disambiguate() -- Error preprocessing instance."
          )
          if (!defined $instance);
        if ($trace)
        {
            $self->{tracestring} .=
              "WordNet::SenseRelate::TargetWord ~ Preprocessing instance ("
              . ($instance->{id}) . ").\n";
            $self->{tracestring} .= $preproc->getTraceString();
        }
    }

    # Required processing of words:
    # (a) Get the base forms of all words
    # (b) Get the possible parts of speech
    # (c) Get the possible senses
    foreach my $i (0 .. scalar(@{$instance->{wordobjects}}) - 1)
    {
      # Get the sense for all the words
      $instance->{wordobjects}->[$i]->retrieveSenses($wntools->{wn});

      # Apply contextpos (POS-capability of sim module) to all words
      $instance->{wordobjects}->[$i]->restrictSenses($self->{contextpos});

      # Apply targetpos to target wordobject
      $instance->{wordobjects}->[$i]->restrictSenses($instance->{targetpos})
        if(defined $instance->{target}
           && $instance->{target} == $i
           && defined $instance->{targetpos});
    }

    # Select context
    $instance = $self->{context}->process($instance);
    return (undef, "WordNet::SenseRelate::TargetWord->disambiguate() -- Error selecting the context.")
      if (!defined $instance);
    $self->{tracestring} .= $self->{context}->getTraceString() if ($trace);

    # Postprocess the instance
    foreach my $postproc (@{$self->{postprocess}})
    {
        $instance = $postproc->postprocess($instance);
        return (undef, "WordNet::SenseRelate::TargetWord->disambiguate() -- Error postprocessing instance.")
          if (!defined $instance);
        if ($trace)
        {
            $self->{tracestring} .= "WordNet::SenseRelate::TargetWord ~ Postprocessing instance (".($instance->{id}).").\n";
            $self->{tracestring} .= $postproc->getTraceString();
        }
    }

    # Debug output...
    #foreach my $wobj (@{$instance->{contextwords}})
    #{
    #  my $daWord = $wobj->getWord();
    #  my @daSenses = $wobj->getSenses();
    #  print STDERR "$daWord: ".(join(", ", @daSenses))."\n";
    #}

    # Get target sense
    $sense = $self->{algorithm}->disambiguate($instance);
    $self->{tracestring} .= $self->{algorithm}->getTraceString() if ($trace);

    return ($sense, undef);
}

# Get the trace string, and reset the trace
sub getTraceString
{
    my $self = shift;
    return ""
      if (   !defined $self
          || !ref($self)
          || ref($self) ne "WordNet::SenseRelate::TargetWord");
    my $returnString = "";
    $returnString = $self->{tracestring} if (defined $self->{tracestring});
    $self->{tracestring} = "";
    return $returnString;
}

1;

__END__