| Lingua-RU-NameParse documentation | Contained in the Lingua-RU-NameParse distribution. |
Given a first name and a patronymic, returns the name and patronymic in the nominative case.
Transliterates the string from Cyrillic to Latin.
Lingua::RU::NameParse - Normalize Russian names
use Lingua::EN::NameParse;
my $p = Lingua::EN::NameParse->new();
my $norm = $p->normalize("Карлу Марксу");
Takes case endings off of Russian proper names, and normalizes them to the nominative. For the moment works only with first name + patronymic.
Maciej Ceglowski <maciej@ceglowski.com>
| Lingua-RU-NameParse documentation | Contained in the Lingua-RU-NameParse distribution. |
package Lingua::RU::NameParse; use 5.008; use strict; use warnings; our $VERSION = '0.02'; sub new { my $class = shift; bless {}, $class; }
sub normalize { my ( $self, $testme ) = @_; use utf8; # voodo to set the string's UTF8 flag $testme = pack 'U*', unpack( 'U*', $testme ); my ( $w1, $w2 ) = split m/\s+/, $testme; ################## # ACCUSATIVE ################## if ( $w2 =~ /[Ñк]а$/ ) { $w2 =~ s/а$//g; $w1 =~ s/а$//g; $w1 =~ s/Ñ$/й/g; } elsif ( $w2 =~ /овнÑ\b/ ) { $w2 =~ s/Ñ\b/а/; $w1 =~ s/ÑÑ\b/ÑÑ/; $w1 =~ s/иÑ\b/иÑ/; } ################ # DATIVE ################ elsif ( $w2 =~ /не\b/ ) { $w2 =~ s/е\b/а/; $w1 =~ s/Ñе\b/ÑÑ/; $w1 =~ s/е\b/а/; $w1 =~ s/ии\b/иÑ/; } elsif ( $w2 =~ /Ñе\b/ ) { $w2 =~ s/е$//; $w1 =~ s/ее\b/ей/; # aleksei } # ÐонÑÑанÑÐ¸Ð½Ñ ÐÐµÐ²Ð¸Ð½Ñ elsif ( $w2 =~ /[нÑ]Ñ$/ ) { $w2 =~ s/Ñ$//; $w1 =~ s/Ñ\b//; $w1 =~ s/([еи])Ñ\b/$1й/; } ################### # INSTRUMENTAL ################### # ÐеÑой ÐÐ°Ð²Ð»Ð¾Ð²Ð½Ð¾Ñ elsif ( $w2 =~ /но[Ñй]$/ ) { $w2 =~ s/оÑ$/а/; $w2 =~ s/ой$/а/; $w1 =~ s/ой$/а/; $w1 =~ s/еÑ/Ñ/; $w1 =~ s/ей/Ñ/; } elsif ( $w2 =~ /ем$/ ) { $w2 =~ s/ем\b//; $w1 =~ s/ом\b//; $w1 =~ s/еем\b/ей/; } elsif ( $w2 =~ /Ñм\b/ ) { $w2 =~ s/Ñм/Ñй/; } ################### # GENITIVE ################### elsif ( $w2 =~ /нÑ$/ ) { $w2 =~ s/Ñ$/а/g; $w1 =~ s/Ñ$/а/g; $w1 =~ s/Ñи$/ÑÑ/; } elsif ( $w2 =~ /ого$/ ) { $w2 =~ s/ого\b/ий/g; $w1 =~ s/а$//g; } $w1 =~ s/вл\b/вел/; # pavel return "$w1 $w2"; }
sub transliterate { my ( $self, $in ) = @_; for ( $in ) { s/Ñ/ts/gi; s/Ñ/sh/gi; s/Ñ/shch/gi; s/ж/zh/gi; s/Ñ/ya/gi; s/Ñ\b/ya/gi; s/Ñ/ch/g; s/Ñ\b/ch/g; s/Ñ/yu/gi; tr/йÑÐºÐ½Ð³Ð·Ñ ÑÑвапÑолдÑÑмиÑеÑбÐÐУÐÐÐÐÐХФЫÐÐÐÐ ÐÐÐÐСÐÐТ/jukngzhfyvaproldesmite'bBJUKENGZHFYVAPROLDESMIT/; } return $in; } 1; __END__