| Lingua-Lexicon-IDP documentation | Contained in the Lingua-Lexicon-IDP distribution. |
Lingua::Lexicon::IDP - OOP interface for parsing Internet Dictionary Project files
use Lingua::Lexicon::IDP
use Data::Denter;
my $idp = Lingua::Lexicon::IDP->new("en");
print Indent([ $idp->pt("dog")])."\n";'
# prints:
@
cachorro[Noun]
ca~o[Noun]
"c%e3o[Noun]"
An OOP interface for parsing translation files from the Internet Dictionary Project (IDP).
The package uses a boolean-search for doing lookups and the Memoize package for caching.
Currently, this method doesn't actually accept any parameters since the IDP has only released English-to-Foobar files.
Eventually, there might be a variety of different Foo-to-Bar files, in which case you will be able to specify $lang.
The default language is English.
Returns an object. Woot!
When an object is instantiated, the package automagically populates the symbol table with methods corresponding to the languages for which translation files exist.
Object methods return a list of words, or phrases.
Available method for translating English words are:
Return the language code for the current language.
Return an array ref of language codes for which their are translation files available for $pkg->lang().
1.0
$Date: 2003/02/04 14:03:59 $
Aaron Straup Cope
http://www.ilovelanguages.com/IDP/
Please report all bugs via http://rt.cpan.org
Copyright (c) 2003, Aaron Straup Cope. All Rights Reserved.
This is free software, you may use it and distribute it under the same terms as Perl itself.
| Lingua-Lexicon-IDP documentation | Contained in the Lingua-Lexicon-IDP distribution. |
use strict; package Lingua::Lexicon::IDP; use Carp; use IO::File; use Memoize; use File::Spec::Functions qw (:DEFAULT); use constant MAX_TRIES => 64; use constant LANG_DEFAULT => "en"; use constant LANG_TRANSLATIONS => { "en" => [ "de","es","fr","it","la","pt" ] }; $Lingua::Lexicon::IDP::VERSION = '1.0'; sub new { my $pkg = shift; my $self = {'__lang' => LANG_DEFAULT }; bless $self,$pkg; $self->_init(); return $self; } sub _init { my $self = shift; $self->{'__datadir'} ||= join("/", (grep { -d $_ } map { catdir($_,split("::",__PACKAGE__)) } exists $INC{"blib.pm"} ? grep {/blib/} @INC : @INC)[0], "Data"); foreach my $tr (@{LANG_TRANSLATIONS->{$self->lang()}}) { my $datafile = join("/", $self->{'__datadir'}, join("_",$self->lang(),"$tr.txt")); no strict "refs"; *{join("::",__PACKAGE__,$tr)} = sub { my $self = shift; return $self->_query($datafile,$_[0]); }; } return 1; } sub lang { my $self = shift; return $self->{'__lang'}; } sub translations { my $self = shift; return LANG_TRANSLATIONS->{$self->lang()}; } sub _query { my $self = shift; my $data = shift; my $word = shift; if ((exists($self->{'__fh'})) && ($self->{'__datafile'} ne $data)) { $self->{'__fh'}->close(); delete $self->{'__datafile'}; delete $self->{'__len'}; delete $self->{'__fh'}; } if (! $self->{'__fh'}) { $self->{'__datafile'} = $data; $self->{'__fh'} = IO::File->new($self->{'__datafile'}); if (! $self->{'__fh'}) { carp "Unable to create fh, $!\n"; return undef; } } if (! $self->{'__len'}) { $self->{'__len'} = (stat($self->{'__datafile'}))[7]; } # For reasons I don't understand, I # cant pass \*$self->{'__fh'} without # generating errors.... my $fh = $self->{'__fh'}; return &_do_query(\*$fh,$self->{'__len'},$word); } sub _do_query { my $fh = shift; my $len = shift; my $word = shift; # my $begin = 0; my $end = $len; my $tries = 0; my $first = undef; my $last = undef; my $found = 0; my @translations = (); while (! $found) { if (($begin +1) == $end) { return undef; } # Just because you're paranoid # Don't mean they're not after you if ($tries >= MAX_TRIES) { carp "Tried query ".MAX_TRIES." times without success. Something is probably wrong.\n"; return undef; } my $guess = int(($begin + $end) /2); #print STDERR "[B] $begin [E] $end\n"; #print STDERR "[$tries] Guess is $guess\n"; my $pos = $guess; my $char = ""; my $stop = 0; # First thing is to back up # to the start of the line while (! $stop) { sysseek($fh,$pos,0); sysread ($fh,$char,1); if ($char =~ /\n/) { $stop = 1; $pos = $pos+2; } $pos--; if (! $pos) { $stop = 1; } } # Next, try to see if we can find # any matches at all $first = $pos; $stop = 0; my $line = undef; my $match = 0; while (! $stop) { sysseek($fh,$pos,0); sysread($fh,$char,1); # We've found the word we're # looking for. Make a note of # this so that we can stop # performing this regex(p) and # start collecting the translation. if ($line =~ /$word\t.*/) { $match = 1; $stop = 1; } # We're not sure if we've found # a match but the current line looks # like it could still be the word # we're looking for. elsif ($word =~ /^$line/) { $line .= $char; $pos++; } # This is not the droid, we're looking for. # The only question now is whether to look # forwards or backwards. else { if ([sort ($word,$line)]->[0] eq $word) { $end = $guess; } else { $begin = $guess; } $stop = 1; } } # Did not find anything. # Try again next unless ($match); # Okay, since we're doing a boolean # search we have to back up to find # the first instance of the word. $stop = 0; $line = undef; # So far, we think that the # first instance of $word is here my $first_instance = $first; # print STDERR "FIRST INSTANCE '$first_instance'\n"; # Back up past the newline $pos = $first_instance - 2; # print STDERR "START AT '$pos'\n"; while (! $stop) { sysseek($fh,$pos,0); sysread($fh,$char,1); # print STDERR "[$pos] '$char' '$line'\n"; if ($char =~ /\n/) { # print STDERR "CHECKING '$line'\n"; # Okay, well this line has an entry # for $word so we'll mark it as the # the first entry and keep going. if ($line =~ /$word\t/) { $first_instance = $pos +1; $line = undef; $pos--; } # Different word. Stop. else { $stop = 1; } } else { $line = $char.$line; $pos--; } } # Start recording. # Go to the first instance. $pos = $first_instance; $line = undef; $match = 0; $stop = 0; my $translation = undef; # print STDERR "START LOOKING AT '$pos'\n"; while (! $stop) { sysseek($fh,$pos,0); sysread($fh,$char,1); # print STDERR " [$word][$match][$pos] '$line' '$translation'\n"; # We've found the word we're # looking for and now we're just # reading the translation. if ($match) { # End of the line. # Hello, translation. # Note, that we'll keep going trying # to find additional translations. if ($char =~ /\n/) { push @translations, $translation; $line = undef; $translation = undef; $match = 0; $pos++; } # Munge munge munge else { $translation .= $char; $pos++; } } # We've found the word we're # looking for. Make a note of # this so that we can stop # performing this regex(p) and # start collecting the translation. elsif ($line =~ /$word\t.*/) { $match = 1; } # We're not sure if we've found # a match but the current line looks # like it could still be the word # we're looking for. elsif ($word =~ /^$line/) { $line .= $char; $pos++; } # Stop. else { $found = 1; $stop = 1; } } $tries++; } return @translations; } END { memoize("_do_query"); } return 1; __END__