WordNet::Similarity::DepthFinder - methods to find the depth of synsets in


WordNet-Similarity documentation Contained in the WordNet-Similarity distribution.

Index


Code Index:

NAME

Top

WordNet::Similarity::DepthFinder - methods to find the depth of synsets in WordNet taxonomies

SYNOPSIS

Top

 use WordNet::QueryData;
 my $wn = WordNet::QueryData->new;
 defined $wn or die "Construction of WordNet::QueryData failed";

 use WordNet::Similarity::DepthFinder;

 my $obj = WordNet::Similarity::DepthFinder->new ($wn);
 my ($err, $errString) = $obj->getError ();
 $err and die $errString;

 my $wps1 = 'car#n#4';
 my $wps2 = 'oil#n#1';

 my $offset1 = $wn -> offset ($wps1);
 my $offset2= $wn -> offset ($wps2);

 my @roots = $obj->getTaxonomies ($offset1, 'n');
 my $taxonomy_depth = $obj->getTaxonomyDepth ($roots[0], 'n');
 print "The maximum depth of the taxonomy where $wps1 is found is $taxonomy_depth\n";

 my @depths = $obj->getSynsetDepth ($offset1, 'n');
 print "The depth of $offset1 is $depths[0]->[0]\n";

 my @lcsbyic = $obj -> getLCSbyDepth($wps1,$wps2,'n','wps');
 print "$wps1 and $wps2 have LCS $lcsbyic[0]->[0] with Depth $lcsbyic[0]->[1]\n";

 my @lcsbyic = $obj -> getLCSbyDepth($offset1,$offset2,'n','offset');
 print "$offset1 and $offset2 have LCS $lcsbyic[0]->[0] with Depth $lcsbyic[0]->[1]\n";

DESCRIPTION

Top

The following methods are provided by this module:

$obj->initialize ($configfile)

Overrides the initialize method in WordNet::Similarity to look for and process depths files. The initialize method of the superclass is also called.

$obj->getSynsetDepth ($offset, $pos)

Returns the depth(s) of the synset denoted by $offset and $pos. The return value is a list of references to arrays. Each array has the form (depth, root).

$obj->getTaxonomyDepth ($offset, $pos)

Returns the maximum depth of the taxonomy rooted at the synset identified by $offset and $pos. If $offset and $pos does not identify a root of a taxonomy, then undef is returned and an error is raised.

$obj->getTaxonomies ($offset, $pos)

Returns a list of the roots of the taxonomies to which the synset identified by $offset and $pos belongs.

getLCSbyDepth($synset1, $synset2, $pos, $mode)

Given two input synsets, finds the least common subsumer (LCS) of them. If there are multiple candidates for the LCS (due to multiple inheritance in WordNet), the LCS with the greatest depth is chosen (i.e., the candidate whose shortest path to the root is the longest).

Parameters: a blessed reference, two synsets, a part of speech, and a mode. The mode must the either the string 'wps' or 'offset'. If the mode is wps, then the two input synsets must be in word#pos#sense format. If the mode is offset, then the input synsets must be WordNet offsets.

Returns: a list of the form ($lcs, $depth) where $lcs is the LCS (in wps format if mode is 'wps' or an offset if mode is 'offset'. $depth is the depth of the LCS in its taxonomy. Returns undef on error.

$obj->_processSynsetsFile ($filename)

Reads and processes a synsets file as output by wnDepths.pl

$obj->_processTaxonomyFile ($filename)

Reads and processes a taxonomies file as produced by wnDepths.pl

AUTHORS

Top

 Ted Pedersen, University of Minnesota Duluth
 tpederse at d.umn.edu

 Jason Michelizzi, University of Minnesota Duluth
 mich0212 at d.umn.edu

BUGS

Top

None.

To report bugs, e-mail tpederse at d.umn.edu or go to http://groups.yahoo.com/group/wn-similarity/.

SEE ALSO

Top

WordNet::Similarity(3) WordNet::Similarity::wup(3) WordNet::Similarity::lch(3)

COPYRIGHT

Top


WordNet-Similarity documentation Contained in the WordNet-Similarity distribution.
# WordNet::Similarity::DepthFinder version 2.04
# (Last updated $Id: DepthFinder.pm,v 1.20 2008/03/27 06:21:17 sidz1979 Exp $)
#
# Module containing code to find the depths of (noun and verb) synsets in
# the WordNet 'is-a' taxonomies
#
# Copyright (c) 2005,
#
# Ted Pedersen, University of Minnesota Duluth
# tpederse at d.umn.edu
#
# Jason Michelizzi, Univeristy of Minnesota Duluth
# mich0212 at d.umn.edu
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to 
#
# The Free Software Foundation, Inc., 
# 59 Temple Place - Suite 330, 
# Boston, MA  02111-1307, USA.
#
# ------------------------------------------------------------------

package WordNet::Similarity::DepthFinder;

use strict;
use warnings;

use WordNet::Similarity::PathFinder;

our @ISA = qw/WordNet::Similarity::PathFinder/;

our $VERSION = '2.04';

WordNet::Similarity::addConfigOption ("taxonomyDepthsFile", 1, "p", undef);
WordNet::Similarity::addConfigOption ("synsetDepthsFile", 1, "p", undef);

sub initialize
{
    my $self = shift;
    my $class = ref $self || $self;

    my $wn = $self->{wn};

    my $defaultdepths = "synsetdepths.dat";
    my $defaultroots = "treedepths.dat";

    $self->SUPER::initialize (@_);

    my $depthsfile = $self->{synsetDepthsFile};

    unless (defined $depthsfile) {
    DEPTHS_SEARCH:
	foreach (@INC) {
	    my $file = File::Spec->catfile ($_, 'WordNet', $defaultdepths);
	    if (-e $file) {
		if (-r $file) {
		    $depthsfile = $file;
		    last DEPTHS_SEARCH;
		}
		else {
		    # The file not readable--is this an error?
		    # I suppose we shouldn't punish people for having
		    # unreadable files lying around; let's do nothing.
		}
	    }
	}
    }

    unless (defined $depthsfile) {
	$self->{error} = 2;
	$self->{errorString} .= "\nError (${class}::initialize()) - ";
	$self->{errorString} .= "No depths file found.";
	return undef;
    }

    $self->_processSynsetsFile ($depthsfile) or return undef;

    my $rootsfile = $self->{treeDepthsFile};

    unless (defined $rootsfile) {
    TAXONOMY_SEARCH:
	foreach (@INC) {
	    my $file = File::Spec->catfile ($_, 'WordNet', $defaultroots);
	    if (-e $file) {
		if (-r $file) {
		    $rootsfile = $file;
		    last TAXONOMY_SEARCH;
		}
		else {
		    # The file not readable--is this an error?
		    # I suppose we shouldn't punish people for having
		    # unreadable files lying around; let's do nothing.
		}
	    }
	}
    }

    $self->_processTaxonomyFile ($rootsfile) or return undef;

    return 1;
}


sub getSynsetDepth
{
    my $self = shift;
    my $class = ref $self || $self;
    my $offset = shift;
    my $pos = shift;

    my $ref = $self->{depths}->{$pos}->{$offset};
    my @depths = @$ref;


    unless (defined $depths[0]) {
	$self->{errorString} .= "\nWarning (${class}::getSynsetDepth()) - ";
	$self->{errorString} .= "No depth found for '$offset#$pos'.";
	$self->{error} = $self->{error} < 1 ? 1 : $self->{error};
	return undef;
    }

    return @depths;
}


sub getTaxonomyDepth
{
    my $self = shift;
    my $class = ref $self || $self;
    my $synset = shift;
    my $pos = shift;

    my $depth = $self->{taxonomyDepths}->{$pos}->{$synset};

    unless (defined $depth) {
	$self->{error} = $self->{error} < 1 ? 1 : $self->{error};
	$self->{errorString} .= "\nWarning (${class}::getTaxonomyDepth()) - ";
	$self->{errorString} .= "No taxonomy is rooted at $synset#$pos.";
	return undef;
    }

    return $depth;
}

sub getTaxonomies
{
    my $self = shift;
    my $offset = shift;
    my $pos = shift;
    my $class = ref $self || $self;

    my $ref = $self->{depths}->{$pos}->{$offset};
    my @tmp = @$ref;
    my %tmp;
    foreach (@tmp) {
	$tmp{$_->[1]} = 1;
    }
    my @rtn = keys %tmp;
    unless (defined $rtn[0]) {
	$self->{errorString} .= "\nWarning (${class}::getTaxonomies()) - ";
	$self->{errorString} .= "No root information for $offset#$pos.";
	$self->{error} = $self->{error} < 1 ? 1 : $self->{error};
	return undef;
    }
    return @rtn;
}

sub getLCSbyDepth
{
  my $self = shift;
  my $synset1 = shift;
  my $synset2 = shift;
  my $pos = shift;
  my $mode = shift;
  my $class = ref $self || $self;

  my @paths = $self->getAllPaths ($synset1, $synset2, $pos, $mode);
  unless (defined $paths[0]) {
    # no paths found
    $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
    $self->{errorString} .= "\nWarning (${class}::getLCSbyDepth()) - ";
    $self->{errorString} .= "No path between synsets found.";
    return $self->UNRELATED;
  }

  my $wn = $self->{wn};
  my %depth;           # a hash to hold the depth of each LCS candidate

  # find the depth of each LCS candidate
  foreach (@paths) {
    my $offset = $_->[0];
    if ($mode eq 'wps') {
      if (index ($_->[0], "*Root*") >= $[) {
	$offset = 0;
      }
      else {
	$offset = $wn->offset ($_->[0]);
      }
    }

    my @depths = $self->getSynsetDepth ($offset, $pos);
    my ($depth, $root) = @{$depths[0]};
    unless (defined $depth) {
      # serious internal error -- possible problem with depths file?
      $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
      $self->{errorString} .= "\nWarning (${class}::getLCSbyDepth()) - ";
      $self->{errorString} .= "Undefined depth for $_->[0].  ";
      $self->{errorString} .= "Possible problem with the depths file?";
      return undef;
    }
    $depth{$_->[0]} = [$depth, $root];
  }

  # sort according to depth (descending order)
  my @tmp = sort {$b->[1] <=> $a->[1]} map [$_, @{$depth{$_}}], keys %depth;

  # remove from the array all the subsumers that are not tied for best
  foreach (0..$#tmp) {
    if ($tmp[$_]->[1] == $tmp[0]->[1]) {
      # do nothing
    }
    else {
      # kill the rest of the array and exit the loop
      $#tmp = $_ - 1;
      last;
    }
  }

  unless (defined $tmp[0]) {
    my $wps1 = $synset1;
    my $wps2 = $synset2;
    if ($mode eq 'offset') {
      $wps1 = $synset1 ? $wn->getSense ($synset1, $pos) : "*Root*#$pos#1";
      $wps2 = $synset2 ? $wn->getSynse ($synset2, $pos) : "*Root*#$pos#1";
    }

    $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
    $self->{errorString} .= "\nWarning (${class}::getLCSbyDepth() - ";
    $self->{errorString} .= "No LCS found for $wps1 and $wps2.";

    if ($self->{trace}) {
      $self->{traceString} .= "\nNo LCS found for ";
      $self->printSet ($pos, 'wps', $wps1);
      $self->{traceString} .= ", ";
      $self->printSet ($pos, 'wps', $wps2);
      $self->{traceString} .= ".";
    }
    return undef;
  }

  if ($self->{trace}) {
    $self->{traceString} .= "\nLowest Common Subsumers: ";
    foreach (@tmp) {
      $self->printSet ($pos, $mode, $_->[0]);
      $self->{traceString} .= " (Depth=$_->[1]) ";
    }
  }

  return @tmp;
}



sub _processSynsetsFile
{
    my $self = shift;
    my $file = shift;
    my $class = ref $self || $self;
    my $wnver = $self->{wntools}->hashCode ();

    unless (open FH, '<', $file) {
	$self->{error} = 2;
	$self->{errorString} .= "\nError (${class}::_processSynsetsFile()) - ";
	$self->{errorString} .= "Cannot open $file for reading: $!.";
	return 0;
    }

    my $line = <FH>;
    unless ($line =~ /^wnver::(\S+)$/) {
	$self->{errorString} .= "\nError (${class}::_processSynsetsFile()) - ";
	$self->{errorString} .= "File $file has bad format.";
	$self->{error} = 2;
	return 0;
    }
    unless ($1 eq $wnver) {
	$self->{errorString} .= "\nError (${class}::_processSynsetsFile()) - ";
	$self->{errorString} .= "Bad WordNet hash-code in $file, $1, should be $wnver.";
	$self->{error} = 2;
	return 0;
    }

    # If we are using a root node, then we need to slightly adjust all
    # the synset depths.  Thus, the correction will be 1 if the root node
    # is on and 0 otherwise.
    my $correction = $self->{rootNode} ? 1 : 0;

    while ($line = <FH>) {
	my ($pos, $offset, @depths) = split /\s+/, $line;
	# convert the offset string to a number.  When we make the number
	# into a string again, there won't be any leading zeros.
	$offset = 0 + $offset;

	# We assume the the first depth listed is the smallest.
	# The wnDepths.pl program should guarantee this behavior.
	my @refs;
	foreach (@depths) {
	    my ($depth, $root) = split /:/;
	    # make root a number; see above for why.  If the root node
	    # is on, then all roots are the root node, so adjust for that.
	    $root = $self->{rootNode} ? 0 : $root + 0;
	    $depth += $correction;
	    push @refs, [$depth, $root];
	}
	$self->{depths}->{$pos}->{$offset} = [@refs];
    }

    if ($self->{rootNode}) {
	# set the depth of the root nodes to be one
	$self->{depths}->{n}->{0} = [[1, 0]];
	$self->{depths}->{v}->{0} = [[1, 0]];
    }

    return 1;
}

sub _processTaxonomyFile
{
    my $self = shift;
    my $filename = shift;
    my $class = ref $self || $self;

    unless (open FH, '<', $filename) {
	$self->{errorString} .= "Error (${class}::_processTaxonomyFile()) - ";
	$self->{errorString} .= "Could not open '$filename' for reading: $!.";
	$self->{error} = 2;
	return 0;
    }

    my $line = <FH>;

    unless ($line =~ /^wnver::(\S+)$/) {
	$self->{errorString} .= "Error (${class}::_processTaxonomyFile()) - ";
	$self->{errorString} .= "Bad file format for $filename.";
	$self->{error} = 2;
	return 0;
    }

    while ($line = <FH>) {
	my ($p, $o, $d) = split /\s+/, $line;

	# add 0 to offset to make it a number; see above for why
	$o = $o + 0;

        $self->{taxonomyDepths}->{$p}->{$o} = $d;
    }

    close FH;
    return 1;
}

1;

__END__