| WordNet-SenseRelate-WordToSet documentation | Contained in the WordNet-SenseRelate-WordToSet distribution. |
WordNet::SenseRelate::WordToSet - Find the WordNet Sense of a Target Word that is Most Related to a Given Set of Words
use WordNet::SenseRelate::WordToSet;
use WordNet::QueryData;
my $qd = WordNet::QueryData->new;
my %options = (measure => 'WordNet::Similarity::jcn',
wordnet => $qd);
my $mod = WordNet::SenseRelate::WordToSet->new (%options);
my $res = $mod->disambiguate (target => 'bank',
context => [qw/money cash dollar/]);
# all senses for target and their scores are returned
# we will just print the sense most related to the set
$best_score = -100;
foreach my $key (keys %$res) {
next unless defined $res->{$key};
if ($res->{$key} > $best_score) {
$best_score = $res->{$key};
$best = $key;
}
}
# let's call WordNet::QueryData to get the gloss of the most
# related sense of the target to the set
print "$best : ", join(", ", $qd->querySense($best, "glos")), "\n";
my $res = $mod->disambiguate (target => 'bank',
context => [qw/river shore slope water/]);
# all senses for target and their scores are returned
# we will just print the sense most related to the set
$best_score = -100;
foreach my $key (keys %$res) {
next unless defined $res->{$key};
if ($res->{$key} > $best_score) {
$best_score = $res->{$key};
$best = $key;
}
}
# let's call WordNet::QueryData to get the gloss of the most
# related sense of the target to the set
print "$best : ", join(", ", $qd->querySense($best, "glos")), "\n";
WordNet::SenseRelate::WordToSet finds the sense of a given target word that is most related to the words in a given set.
The methods below will die() on serious errors. Wrap calls to these methods in an eval BLOCK to catch the exceptions. See 'perldoc -f eval' for more information.
The constructor for this class.
Parameters:
wordnet => REFERENCE : WordNet::QueryData object (required) measure => STRING : name of a WordNet::Similarity measure (required) config => FILENAME : path to a config file for above measure trace => INTEGER : generate traces (default : 0) threshold => NUMBER : similarity scores less than this are ignored
Returns:
A reference to the constructed object or undef on error.
The trace levels are:
1 show non-zero scores from the semantic relatedness measure 2 show zero & undefined scores from the relatedness measure (no effect unless combined with level 1) 4 show traces from the semantic relatedness measure
Note: the trace levels can be added together to achieve a combined effect. For example, to show the non-zero scores, the zero scores, and the traces from the measure, use level 7.
Disambiguates the target word
Parameters:
target => STRING : the target word to disambiguate (required) context => REFERENCE : a reference to an array of context words
Returns:
A hash reference. The keys of the hash will be the senses of the target word, and the values will be the score for each sense.
Gets the current trace string and resets it to "".
Parameters:
None
Returns:
The current trace string (before resetting it). If the returned string is not empty, it will end with a newline.
Example:
my $str = $wsd->getTrace (); print $str;
L<http://senserelate.sourceforge.net/> Ted Pedersen, Satanjeev Banerjee, and Siddharth Patwardhan (2005) Maximizing Semantic Relatedness to Perform Word Sense Disambiguation, University of Minnesota Supercomputing Institute Research Report UMSI 2005/25, March. L<http://www.msi.umn.edu/general/Reports/rptfiles/2005-25.pdf>
Ted Pedersen, University of Minnesota, Duluth tpederse at d.umn.edu Jason Michelizzi
Last modified by : $Id: WordToSet.pm,v 1.10 2008/04/07 03:29:47 tpederse Exp $
Copyright (C) 2005-2008 by Jason Michelizzi and Ted Pedersen
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
| WordNet-SenseRelate-WordToSet documentation | Contained in the WordNet-SenseRelate-WordToSet distribution. |
package WordNet::SenseRelate::WordToSet;
use 5.006; use strict; use warnings; use Carp; our @ISA = (); our $VERSION = '0.04'; my %wordnet; my %simMeasure; my %trace; my %wnformat; my %threshold; # constants used to specify trace levels #use constant TR_CONTEXT => 1; # show the context window #use constant TR_BESTSCORE => 2; # show the best score #use constant TR_ALLSCORES => 4; # show all non-zero scores # the previous three levels don't make a lot of sense for WordToSet # * The context should be obvious # * All the scores are returned from disambiguate() use constant TR_PAIRWISE => 1; # show all the non-zero similarity scores use constant TR_ZERO => 2; use constant TR_MEASURE => 4; # show similarity measure traces
sub new { my $class = shift; my %args = @_; $class = ref $class || $class; my $qd; my $measure; my $measure_config; my $threshold = 0; my $trace; my $wnformat = 0; while (my ($key, $val) = each %args) { if ($key eq 'wordnet') { $qd = $val; } elsif ($key eq 'measure') { $measure = $val; } elsif ($key eq 'config') { $measure_config = $val; } elsif ($key eq 'threshold') { $threshold = $val; } elsif ($key eq 'trace') { $trace = $val; } elsif ($key eq 'wnformat') { $wnformat = $val; } else { croak "Unknown parameter type '$key'"; } } unless (ref $qd) { croak "No WordNet::QueryData object supplied"; } unless ($measure) { croak "No relatedness measure supplied"; } my $self = bless [], $class; # initialize tracing if (defined $trace) { $trace{$self} = {level => $trace, string => ''}; if (($trace & TR_ZERO) and !($trace & TR_PAIRWISE)) { warn ("Warning: trace level ", TR_ZERO, " has no effect unless combined with level ", TR_PAIRWISE, ".\n"); } } else { $trace{$self} = {level => 0, string => ''}; } # setup relatedness measure my $file = $measure; $file =~ s/::/\//g; require "${file}.pm"; if (defined $measure_config) { $simMeasure{$self} = $measure->new ($qd, $measure_config); } else { $simMeasure{$self} = $measure->new ($qd); } # check for errors my ($errCode, $errStr) = $simMeasure{$self}->getError; if ($errCode) { croak $errStr; } # turn on traces in the relatedness measure if required if ($trace{$self}->{level} & TR_MEASURE) { $simMeasure{$self}->{trace} = 1; } else { $simMeasure{$self}->{trace} = 0; } $wordnet{$self} = $qd; # store threshold value $threshold{$self} = $threshold; $wnformat{$self} = $wnformat; return $self; } sub DESTROY { my $self = shift; delete $wordnet{$self}; delete $simMeasure{$self}; delete $threshold{$self}; delete $trace{$self}; delete $wnformat{$self}; 1; }
sub disambiguate { my $self = shift; my %options = @_; # local vars my @context; my $target; while (my ($key, $val) = each %options) { if ($key eq 'target') { $target = $val; } elsif ($key eq 'context') { if ('ARRAY' eq ref $val) { @context = @$val; } else { carp "Value for option 'context' is not an array reference"; return undef; } } elsif ($key eq 'threshold') { $threshold{$self} = $val; } else { croak "Unknown option '$key'"; } } my $tagged = 0; # quick sanity check to ensure that all words are in WordNet my $qd = $wordnet{$self}; if ($wnformat{$self}) { foreach my $word ($target, @context) { my @t = $qd->querySense ($word); unless (scalar @t) { warn "'$word' is not found in WordNet\n"; return undef; } } } else { foreach my $word ($target, @context) { my @t = $qd->validForms ($target); unless (scalar @t) { warn "'$word' is not found in WordNet\n"; return undef; } } } my $result; $result = $self->doNormal ($target, @context); return $result; } sub doNormal { my $self = shift; my $target = shift; my @context = @_; my $measure = $simMeasure{$self}; my $threshold = $threshold{$self}; my $tracelevel = $trace{$self}->{level}; my @traces; # get senses for the target and context words my @targetsenses = $self->_getSenses ($target); my @contextsenses; for my $i (0..$#context) { $contextsenses[$i] = [$self->_getSenses ($context[$i])]; } # now disambiguate the target my @sums; for my $targetsense (0..$#targetsenses) { $sums[$targetsense] = 0; for my $i (0..$#contextsenses) { next if 0 == scalar $contextsenses[$i]; my @tempScores; for my $k (0..$#{$contextsenses[$i]}) { unless (defined $contextsenses[$i][$k]) { warn "\$contextsenses[$i][$k] is undef"; } $tempScores[$k] = $measure->getRelatedness ($targetsenses[$targetsense], $contextsenses[$i][$k]); } my $max = -1; my $maxidx = -1; for my $n (0..$#tempScores) { if ($tracelevel & TR_PAIRWISE) { if (($tempScores[$n] && $tempScores[$n] > 0) || ($tracelevel & TR_ZERO)) { unless (defined $contextsenses[$i][$n]) { warn "\$contextsenses[$i][$n] is undef"; } my $s = " " . $targetsenses[$targetsense] . ' ' . $contextsenses[$i][$n] . ' ' . (defined $tempScores[$n] ? $tempScores[$n] : 'undef'); push @{$traces[$targetsense]}, $s; } } if ($tracelevel & TR_MEASURE) { if (($tempScores[$n] && $tempScores[$n] > 0) || ($tracelevel & TR_ZERO)) { push @{$traces[$targetsense]}, $measure->getTraceString; } } $measure->getError; # clear errors from relatedness object if (defined $tempScores[$n] && ($tempScores[$n] > $max)) { $max = $tempScores[$n]; $maxidx = $n; } } $sums[$targetsense] += $max if $max > $threshold; } } my $max = -1; my $maxidx = -1; foreach my $p (0..$#sums) { if ($sums[$p] > $max) { $maxidx = $p; $max = $sums[$p]; } # if ($tracelevel & TR_ALLSCORES # && (($sums[$p] > 0) or ($tracelevel & TR_ZERO))) { # $trace{$self}->{string} .= " $targetsenses[$p]: $sums[$p]\n"; # } if (($tracelevel & TR_MEASURE or $tracelevel & TR_PAIRWISE) && defined $traces[$p]) { for my $str (@{$traces[$p]}) { $trace{$self}->{string} .= $str . "\n"; } } } my %rhash; my $best_sense = ''; my $best_score = -1; foreach my $i (0..$#sums) { if ($sums[$i] > $best_score) { $best_sense = $targetsenses[$i]; $best_score = $sums[$i]; } $rhash{$targetsenses[$i]} = $sums[$i] if $sums[$i] > $threshold; } # if ($tracelevel & TR_BESTSCORE) { # if ($best_score >= 0) { # $trace{$self}->{string} .= " Winning sense: $best_sense\n"; # $trace{$self}->{string} .= " Winning score: $best_score\n"; # } # else { # $trace{$self}->{string} .= " Winning sense: (none)\n"; # $trace{$self}->{string} .= " Winning score: (none)\n"; # } # } return \%rhash; # if ($maxidx >= 0) { # return $targetsenses[$maxidx]; # } # return $target; }
sub getTrace { my $self = shift; return '' unless $trace{$self}; my $s = $trace{$self}->{string}; $trace{$self}->{string} = ''; return $s; } sub _getSenses { my $self = shift; my $word = shift; my $qd = $wordnet{$self}; my @senses; # first get all forms for each POS if ($word =~ /\#o/) { @senses = undef; } else { my @forms; unless ($wnformat{$self}) { @forms = $qd->validForms ($word); } else { @forms = $word; } if (scalar @forms == 0) { @senses = (); } else { # now get all the senses for each form foreach my $form (@forms) { my @temps = $qd->querySense ($form); push @senses, @temps; } } } return @senses; } 1; __END__