| Lingua-RU-Sklon documentation | Contained in the Lingua-RU-Sklon distribution. |
Lingua::RU::Sklon - helps declensing russian word
use Lingua::RU::Sklon;
print sklon("Алексеев Алексей Алексеевич"=>'VIN');
print sklon(convert('koi'=>'win', 'юКЕЙЯЕЕБ юКЕЙЯЕИ юКЕЙЯЕЕБХВ' )=>'VIN');
# gives Алексеева Алексея Алексеевича
Lingua::RU::Sklon - specially made to helps declense russian names in any acts or docs you've come through. This, sadly, doesn't help yet at some more complex names such as Московская-Муштак Виктория-Степанида Джульраби оглы. But, in 99.9% cases this module fits. default encoding for this module is win-1251. be sure you install this locale. If not, then please send all names initcapped, this should do the trick either.
usage my $win_text=convert ('koi'=>'win', $koi_text); This lil' helper converts russian text from/to different encodings. available charsets koi, win, iso, dos see Lingua::RU::Charset for more flexible version.
This function gets full name of client, and transforms it into desired declense. Available list of declesnes is: C< I=>1, IMEN=>1, 1=>1, #Именительный R=>2, ROD=>2, 2=>2, #Родительного D=>3, DAT=>3, 3=>3, #Дательным V=>4, VIN=>4, 4=>4, #Винительный T=>5, TVOR=>5, 5=>5, #Творительным P=>6, PRED=>6, 6=>6 #о Предложном >
Yes, it's the hash from above.
Make First Letters of the Every Word Capital.
parses noun (name or second name) of client.
parses last name of client.
Alexey Usanov <alexey_usa@mail.ru>
Lingua::RU::Charset, perllocale
Copyright (c) 2007, Alexey Usanov. All Rights Reserved. This module is free software. It may be used, redistributed and/or modified under the same terms as Perl itself.
| Lingua-RU-Sklon documentation | Contained in the Lingua-RU-Sklon distribution. |
#!/usr/bin/perl -w use strict; use warnings; use POSIX qw(locale_h); use locale; setlocale(LC_CTYPE, "ru_RU.cp1251"); package Lingua::RU::Sklon; BEGIN { use Exporter (); our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); # set the version for version checking $VERSION = 0.01; @ISA = qw(Exporter); @EXPORT = qw(&parse_n &parse_lastname &convert &initcap &sklon); %EXPORT_TAGS = ( ); # your exported package globals go here, # as well as any optionally exported functions @EXPORT_OK = qw(%pads); } use Carp; our @EXPORT_OK; our %pads; # non-exported package globals go here our $dos; our $iso; our $koi; our $win; # initialize package globals, first exported ones %pads=( I=>1, IMEN=>1, 1=>1, #ýòî æåíà R=>2, ROD=>2, 2=>2, #îòñòàëñÿ áåç æåíû D=>3, DAT=>3, 3=>3, #ñòóäåíòó æåíå V=>4, VIN=>4, 4=>4, #ïîðâàë æåíó T=>5, TVOR=>5, 5=>5, #íàçâàëñÿ æåíîé P=>6, PRED=>6, 6=>6 #ïèøó î æåíå ); $dos={'à'=>160,'á'=>161,'â'=>162,'ã'=>163,'ä'=>164,'å'=>165,'¸'=>241,'æ'=>166,'ç'=>167,'è'=>168,'é'=>169,'ê'=>170,'ë'=>171,'ì'=>172,'í'=>173,'î'=>174,'ï'=>175,'ð'=>224,'ñ'=>225,'ò'=>226,'ó'=>227,'ô'=>228,'õ'=>229,'ö'=>230,'÷'=>231,'ø'=>232,'ù'=>233,'ü'=>236,'û'=>235,'ú'=>234,'ý'=>237,'þ'=>238,'ÿ'=>239,'À'=>128,'Á'=>129,'Â'=>130,'Ã'=>131,'Ä'=>132,'Å'=>133,'¨'=>240,'Æ'=>134,'Ç'=>135,'È'=>136,'É'=>137,'Ê'=>138,'Ë'=>139,'Ì'=>140,'Í'=>141,'Î'=>142,'Ï'=>143,'Ð'=>144,'Ñ'=>145,'Ò'=>146,'Ó'=>147,'Ô'=>148,'Õ'=>149,'Ö'=>150,'×'=>151,'Ø'=>152,'Ù'=>153,'Ü'=>156,'Û'=>155,'Ú'=>154,'Ý'=>157,'Þ'=>158,'ß'=>159}; $iso={'à'=>208,'á'=>209,'â'=>210,'ã'=>211,'ä'=>212,'å'=>213,'¸'=>241,'æ'=>214,'ç'=>215,'è'=>216,'é'=>217,'ê'=>218,'ë'=>219,'ì'=>220,'í'=>221,'î'=>222,'ï'=>223,'ð'=>224,'ñ'=>225,'ò'=>226,'ó'=>227,'ô'=>228,'õ'=>229,'ö'=>230,'÷'=>231,'ø'=>232,'ù'=>233,'ü'=>236,'û'=>235,'ú'=>234,'ý'=>237,'þ'=>238,'ÿ'=>239,'À'=>176,'Á'=>177,'Â'=>178,'Ã'=>179,'Ä'=>180,'Å'=>181,'¨'=>161,'Æ'=>182,'Ç'=>183,'È'=>184,'É'=>185,'Ê'=>186,'Ë'=>187,'Ì'=>188,'Í'=>189,'Î'=>190,'Ï'=>191,'Ð'=>192,'Ñ'=>193,'Ò'=>194,'Ó'=>195,'Ô'=>196,'Õ'=>197,'Ö'=>198,'×'=>199,'Ø'=>200,'Ù'=>201,'Ü'=>204,'Û'=>203,'Ú'=>202,'Ý'=>205,'Þ'=>206,'ß'=>207}; $koi={'à'=>193,'á'=>194,'â'=>215,'ã'=>199,'ä'=>196,'å'=>197,'¸'=>163,'æ'=>214,'ç'=>218,'è'=>201,'é'=>202,'ê'=>203,'ë'=>204,'ì'=>205,'í'=>206,'î'=>207,'ï'=>208,'ð'=>210,'ñ'=>211,'ò'=>212,'ó'=>213,'ô'=>198,'õ'=>200,'ö'=>195,'÷'=>222,'ø'=>219,'ù'=>221,'ü'=>216,'û'=>217,'ú'=>223,'ý'=>220,'þ'=>192,'ÿ'=>209,'À'=>225,'Á'=>226,'Â'=>247,'Ã'=>231,'Ä'=>228,'Å'=>229,'¨'=>179,'Æ'=>246,'Ç'=>250,'È'=>233,'É'=>234,'Ê'=>235,'Ë'=>236,'Ì'=>237,'Í'=>238,'Î'=>239,'Ï'=>240,'Ð'=>242,'Ñ'=>243,'Ò'=>244,'Ó'=>245,'Ô'=>230,'Õ'=>232,'Ö'=>227,'×'=>254,'Ø'=>251,'Ù'=>253,'Ü'=>248,'Û'=>249,'Ú'=>255,'Ý'=>252,'Þ'=>224,'ß'=>241}; $win={'à'=>224,'á'=>225,'â'=>226,'ã'=>227,'ä'=>228,'å'=>229,'¸'=>184,'æ'=>230,'ç'=>231,'è'=>232,'é'=>233,'ê'=>234,'ë'=>235,'ì'=>236,'í'=>237,'î'=>238,'ï'=>239,'ð'=>240,'ñ'=>241,'ò'=>242,'ó'=>243,'ô'=>244,'õ'=>245,'ö'=>246,'÷'=>247,'ø'=>248,'ù'=>249,'ü'=>252,'û'=>251,'ú'=>250,'ý'=>253,'þ'=>254,'ÿ'=>255,'À'=>192,'Á'=>193,'Â'=>194,'Ã'=>195,'Ä'=>196,'Å'=>197,'¨'=>168,'Æ'=>198,'Ç'=>199,'È'=>200,'É'=>201,'Ê'=>202,'Ë'=>203,'Ì'=>204,'Í'=>205,'Î'=>206,'Ï'=>207,'Ð'=>208,'Ñ'=>209,'Ò'=>210,'Ó'=>211,'Ô'=>212,'Õ'=>213,'Ö'=>214,'×'=>215,'Ø'=>216,'Ù'=>217,'Ü'=>220,'Û'=>219,'Ú'=>218,'Ý'=>221,'Þ'=>222,'ß'=>223}; END { } # module clean-up code here (global destructor) sub convert { my $src=lc shift; my $tgt=lc shift; my ($src_cp, $tgt_cp); if ($src eq 'dos') { $src_cp=$dos; } elsif ($src eq 'win') { $src_cp=$win; } elsif ($src eq 'iso') { $src_cp=$iso; } elsif ($src eq 'koi') { $src_cp=$koi; } else { croak "Wrong Source encoding: $src"; return "! Wrong Source encoding: $src"; } if ($tgt eq 'dos') { $tgt_cp=$dos; } elsif ($tgt eq 'win') { $tgt_cp=$win; } elsif ($tgt eq 'iso') { $tgt_cp=$iso; } elsif ($tgt eq 'koi') { $tgt_cp=$koi; } else { croak "Wrong tgt encoding: $tgt"; return "! Wrong tgt encoding: $tgt"; } my %src_cp = reverse %{$src_cp}; my $out; my @out; foreach (@_) { my @a=split //; $out=''; foreach (@a) { my $r=chr($tgt_cp->{$src_cp{ord($_)}}); $out.= $r?$r:$_; } push @out,$out; } if (wantarray) { return @out; } else { return join ('',@out); } } sub parse_lastname { my $txt=lc shift; my $wrap=shift; my $last_letter=substr($txt,-2); #print $last_letter; if ($last_letter eq 'èé') { my $h={1=>'èé', 2=>'îãî', 3=>'îìó', 4=>'îãî', 5=>'èì', 6=>'îì' }; return substr($txt,0,-2).($h->{$wrap}||return "!$txt"); } elsif ($last_letter eq 'ûé') { my $h={1=>'ûé', 2=>'îãî', 3=>'îìó', 4=>'îãî', 5=>'ûì', 6=>'îì' }; return substr($txt,0,-2).($h->{$wrap}||return "!$txt"); } elsif ($last_letter eq 'îé') { return $txt; } elsif ($last_letter eq 'àÿ') { my $h={1=>'àÿ', 2=>'óþ', 3=>'îé', 4=>'óþ', 5=>'îé', 6=>'îé' }; return substr($txt,0,-2).($h->{$wrap}||return "!$txt"); } elsif ($last_letter eq 'ÿÿ') { my $h={1=>'ÿÿ', 2=>'þþ', 3=>'åé', 4=>'þþ', 5=>'åé', 6=>'åé' }; return substr($txt,0,-2).($h->{$wrap}||return "!$txt"); } elsif ($last_letter eq 'îê') { my $h={1=>'îê', #ýòî æåíà 2=>'êà', #îòñòàëñÿ áåç æåíû 3=>'êå', #ñòóäåíòó æåíå 4=>'êó', #ïîðâàë æåíó 5=>'êîì', #íàçâàëñÿ æåíîé 6=>'êå' #ïèøó î æåíå }; return substr($txt,0,-2).$h->{$wrap}; } $_=substr($txt,-1); if ($_ eq 'é') { my $h={1=>'é', 2=>'ÿ', 3=>'ÿ', 4=>'þ', 5=>'åì', 6=>'è' }; return substr($txt,0,-1).($h->{$wrap}||return "!$txt"); } if ($_ eq 'à') { my $h={1=>'à', #ýòî æåíà 2=>'îé', #îòñòàëñÿ áåç æåíû 3=>'ó', #ïîðâàë æåíó 4=>'îé', #ñòóäåíòó æåíå 5=>'îé', #íàçâàëñÿ æåíîé 6=>'îé' #ïèøó î æåíå }; return substr($txt,0,-1).($h->{$wrap}||return "!$txt"); } if ($_ eq 'ÿ') { my $h={1=>'ÿ', #ýòî æåíà 2=>'è', #îòñòàëñÿ áåç æåíû 3=>'þ', #ïîðâàë æåíó 4=>'å', #ñòóäåíòó æåíå 5=>'åé', #íàçâàëñÿ æåíîé 6=>'å' #ïèøó î æåíå }; return substr($txt,0,-1).($h->{$wrap}||return "!$txt"); } if ($_ eq 'ü') { my $h={1=>'ü', #ýòî æåíà 2=>'ÿ', #îòñòàëñÿ áåç æåíû 3=>'ÿ', #ïîðâàë æåíó 4=>'þ', #ñòóäåíòó æåíå 5=>'åì', #íàçâàëñÿ æåíîé 6=>'å' #ïèøó î æåíå }; return substr($txt,0,-1).($h->{$wrap}||return "!$txt"); } if (/[óåõúôûïðîëäæýÿ÷ñìèòüáþ]/) { return $txt; } if (/â/) { my $h={1=>'', #ýòî æåíà 2=>'à', #îòñòàëñÿ áåç æåíû 3=>'e', #ñòóäåíòó æåíå 4=>'ó', #ïîðâàë æåíó 5=>'ûì', #íàçâàëñÿ æåíîé 6=>'å' #ïèøó î æåíå }; return $txt.$h->{$wrap}; } if (/[öóêåíãøùçõúôûâàïðîëäæýÿ÷ñìèòüáþ]/) { my $h={1=>'', #ýòî æåíà 2=>'à', #îòñòàëñÿ áåç æåíû 3=>'ó', #ïîðâàë æåíó 4=>'e', #ñòóäåíòó æåíå 5=>'îì', #íàçâàëñÿ æåíîé 6=>'îé' #ïèøó î æåíå }; return $txt.$h->{$wrap}; } carp ("Unalbe to sklon: $txt"); return "$txt"; } sub parse_n { my $txt=lc shift; my $wrap=shift; my $last_letter=substr($txt,-2); #print $last_letter; if ($last_letter eq 'îê') { my $h={1=>'îê', #ýòî æåíà 2=>'êà', #îòñòàëñÿ áåç æåíû 3=>'êó', #ïîðâàë æåíó 4=>'êå', #ñòóäåíòó æåíå 5=>'êîì', #íàçâàëñÿ æåíîé 6=>'êå' #ïèøó î æåíå }; return substr($txt,0,-2).$h->{$wrap}; } elsif ($last_letter eq 'åë') { my $h={1=>'åë', #ýòî æåíà 2=>'ëà', #îòñòàëñÿ áåç æåíû 3=>'ëó', #ïîðâàë æåíó 4=>'ëó', #ñòóäåíòó æåíå 5=>'ëîì', #íàçâàëñÿ æåíîé 6=>'ëå' #ïèøó î æåíå }; return substr($txt,0,-2).$h->{$wrap}; } elsif ($last_letter eq 'åâ') { my $h={1=>'åâ', #ýòî æåíà 2=>'üâà', #îòñòàëñÿ áåç æåíû 3=>'üâà', #ïîðâàë æåíó 4=>'üâó', #ñòóäåíòó æåíå 5=>'üâîì', #íàçâàëñÿ æåíîé 6=>'üâå' #ïèøó î æåíå }; return substr($txt,0,-2).$h->{$wrap}; } $_=substr($txt,-1); if ($_ eq 'é') { my $h={1=>'é', 2=>'ÿ', 3=>'þ', 4=>'þ', 5=>'åì', 6=>'å' }; return substr($txt,0,-1).($h->{$wrap}||return "!$txt"); } if ($_ eq 'à') { my $h={1=>'à', #ýòî æåíà 2=>'û', #îòñòàëñÿ áåç æåíû 3=>'å', #ñòóäåíòó æåíå 4=>'ó', #ïîðâàë æåíó 5=>'îé', #íàçâàëñÿ æåíîé 6=>'å' #ïèøó î æåíå }; return substr($txt,0,-1).($h->{$wrap}||return "!$txt"); } if ($_ eq 'ÿ') { my $h={1=>'ÿ', #ýòî æåíà 2=>'è', #îòñòàëñÿ áåç æåíû 3=>'å', #ñòóäåíòó æåíå 4=>'þ', #ïîðâàë æåíó 5=>'åé', #íàçâàëñÿ æåíîé 6=>'å' #ïèøó î æåíå }; return substr($txt,0,-1).($h->{$wrap}||return "!$txt"); } if ($_ eq 'ü') { my $h={1=>'ü', #ýòî æåíà 2=>'ÿ', #îòñòàëñÿ áåç æåíû 3=>'þ', #ñòóäåíòó æåíå 4=>'ÿ', #ïîðâàë æåíó 5=>'åì', #íàçâàëñÿ æåíîé 6=>'å' #ïèøó î æåíå }; return substr($txt,0,-1).($h->{$wrap}||return "!$txt"); } if (/[óåúûýþ]/) { return $txt; } if (/[âòêíãçõôâïðëäìòá]/) { my $h={1=>'', #ýòî æåíà 2=>'à', #îòñòàëñÿ áåç æåíû 3=>'å', #ñòóäåíòó æåíå 4=>'ó', #ïîðâàë æåíó 5=>'îì', #íàçâàëñÿ æåíîé 6=>'å' #ïèøó î æåíå }; return $txt.$h->{$wrap}; } if (/[øùõæ÷]/) { my $h={1=>'', #ýòî æåíà 2=>'à', #îòñòàëñÿ áåç æåíû 3=>'Ó', #ñòóäåíòó æåíå 4=>'ó', #ïîðâàë æåíó 5=>'åì', #íàçâàëñÿ æåíîé 6=>'å' #ïèøó î æåíå }; return $txt.$h->{$wrap}; } carp ("Unalbe to sklon: $txt"); return "$txt"; } sub sklon { $_=shift; print "$_\n"; /(\w+)\s(\w+)\s(.+)/; my $pad=shift; my $decl=$pads{$pad}; croak "Unknown pad attempting to be set : $pad" unless $decl; return initcap(parse_lastname($1,$decl)." ".parse_n($2,$decl)." ".parse_n($3,$decl)); } sub initcap { $_=shift; my $out; while (/(\w)(\w*)(\W*)/g) { $out.=uc ($1).$2.$3; } return $out; } 1; __END__