/usr/local/CPAN/Text-TransMetaphone/Text/TransMetaphone/am.pm
package Text::TransMetaphone::am;
# If either of these next two lines are inside
# the BEGIN block the package will break.
#
use utf8;
use Regexp::Ethiopic::Amharic qw(:forms setForm overload);
BEGIN
{
use strict;
use vars qw( $VERSION $LocaleRange %IMExpected %IMError %plosives );
$VERSION = '0.02';
$LocaleRange = qr/[á-áá -á¾á-á·á-áá -á¼]/;
%plosives = (
k => 'á',
t => 'á ',
ʧ => 'á¨',
s => 'á¸',
p => 'á°',
);
%IMExpected =(
áµ => "s",
á½ => "s'",
á
=> "k'",
á => "q",
áµ => "t",
ᥠ=> "t'",
ὠ=> "ʧ",
á => "ʧ",
á => "n",
á => "k",
á => "z",
ᥠ=> "Ê",
áµ => "p'",
á => "p"
);
%IMError =(
áµ => "s'",
á½ => "s",
á
=> "q",
á => "k'",
áµ => "t'",
ᥠ=> "t",
ὠ=> "ʧ'",
á => "ʧ'",
á => "ɲ",
á => "x",
á => "Ê",
ᥠ=> "z",
áµ => "p",
á => "p'"
);
}
sub trans_metaphone
{
$_ = $_[0];
#
# strip out all but first vowel:
#
s/^[=#á #=]/a/;
s/[=#á #=]//g;
s/([#11#])/setForm($1,$á³áµáµ)."á"/eg;
s/[=#á#=]/h/g;
s/[=#á°#=]/á°/g;
s/[=#á¸#=]/á¸/g;
# s/(.)[=#á¸#=]/s'/g; # compare this to ts in english, it should be a 2nd key
#
# now strip vowels, this simplies later code:
#
s/(\p{InEthiopic})/ ($1 eq 'á') ? $1 : setForm($1,$á³áµáµ)/eg;
tr/áááá½á¥ááááµá
áá/lmrÊbvwjdʤgf/;
my @keys = ( $_ );
my $re = $_;
#
# mixed glyphs: á½ for áµ or á½ is shift stick for áµ
#
if ( $keys[0] =~ /á½/ ) {
$keys[2] = $keys[1] = $keys[0];
$keys[0] =~ s/á½/É/; # caps problem
$keys[1] =~ s/á½/d/; # literal
$keys[2] =~ s/á½/p'/; # mistaken glyph
$re =~ s/á½/([dÉ]|p')/g;
}
#
# mixed glyphs: á for á or á is shift stick for á
#
if ( $keys[0] =~ /á/ ) {
my (@newKeysA, @newKeysB);
for (my $i=0; $i < @keys; $i++) {
$newKeysA[$i] = $newKeysB[$i] = $keys[$i]; # copy old keys
$keys[$i] =~ s/á/ɲ/; # literal
$newKeysA[$i] =~ s/á/n/; # caps problem
$newKeysB[$i] =~ s/á/p/; # mistaken glyph
}
push (@keys,@newKeysA); # add new keys to old keys
push (@keys,@newKeysB); # add new keys to old keys
$re =~ s/á/[nɲp]/g;
}
#
# handle phonological problems
#
if ( $keys[0] =~ /mb/ ) {
my @newKeys;
for (my $i=0; $i < @keys; $i++) {
$newKeys[$i] = $keys[$i]; # copy old keys
$newKeys[$i] =~ s/mb/nb/; # update old keys for primary mapping
}
push (@keys,@newKeys); # add new keys to old keys
$re =~ s/mb/[mn]b/g;
}
#
# try to keep least probable keys last:
#
#
# Handle IM problems
#
while ( $keys[0] =~ /([áµá
ááµá½áááá¥á¥áá½áµá])/ ) {
my $a = $1;
my @newKeys;
for (my $i=0; $i < @keys; $i++) {
$newKeys[$i] = $keys[$i]; # copy old keys
$keys[$i] =~ s/$a/$IMExpected{$a}/; # update old keys for primary mapping
}
for (my $i=0; $i < @newKeys; $i++) {
$newKeys[$i] =~ s/$a/$IMError{$a}/; # update new keys for alternative
}
push (@keys,@newKeys); # add new keys to old keys
# print "$a => $IMExpected{$a} / $IMError{$a}\n";
if ( $plosives{$IMExpected{$a}} || $plosives{$IMError{$a}} ) {
$re =~ s/$a/($IMExpected{$a}|$IMError{$a})/g;
}
else {
$re =~ s/$a/[$IMExpected{$a}$IMError{$a}]/g;
}
}
if ( $#keys ) {
push ( @keys, qr/$re/ );
}
@keys;
}
sub reverse_key
{
$_ = $_[0];
s/([stʧkp])'/$plosives{$1}/g;
tr/hlmrsÊqbvtʧnɲakwjdÉʤzÊgɲfp/áááá¨á°á¸áá á¨á°á¸ááá á¨áá¨á°á¸ááá áááá/;
s/(\p{InEthiopic})/[#$1#]/g;
s/á¸/á°á¸/g;
s/á/áá/g;
$_;
}
#########################################################
# Do not change this, Do not put anything below this.
# File must return "true" value at termination
1;
##########################################################
__END__
=head1 NAME
Text::TransMetaphone::am - Transcribe Amharic words into IPA symbols.
=head1 SYNOPSIS
This module is used by L<Text::TransMetaphone> and need not be used
directly.
=head1 DESCRIPTION
The Text::TransMetaphone::am module implements the TransMetaphone algorithm
for Amharic. The module provides a C<trans_metaphone> function that accepts
an Amharic word as an argument and returns a list of keys transcribed into
IPA symbols under Amharic orthography rules. The last key of the list is
a regular expression that matching all previously returned keys.
A C<reverse_key> function is also provided to convert an IPA symbol key into
a regular expression that would phonological sequence under Amharic orthography.
=head1 STATUS
The Amharic module is the most developed in the TransMetaphone package.
It has awareness of common mispelling in Amharic, perhaps too much, the
module will produce a high number of keys.
=head1 REQUIRES
Regexp::Ethiopic.
=head1 COPYRIGHT
This module is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.
=head1 BUGS
None presently known.
=head1 AUTHOR
Daniel Yacob, L<dyacob@cpan.org|mailto:dyacob@cpan.org>
=head1 SEE ALSO
L<Text::TransMetaphone>
=cut