Lingua::RU::Sklon - helps declensing russian word


Lingua-RU-Sklon documentation Contained in the Lingua-RU-Sklon distribution.

Index


Code Index:

NAME

Top

Lingua::RU::Sklon - helps declensing russian word

SYNOPSIS

Top

  use Lingua::RU::Sklon;

  print sklon("Алексеев Алексей Алексеевич"=>'VIN');
  print sklon(convert('koi'=>'win', 'юКЕЙЯЕЕБ юКЕЙЯЕИ юКЕЙЯЕЕБХВ' )=>'VIN');
  # gives Алексеева Алексея Алексеевича

DESCRIPTION

Top

  Lingua::RU::Sklon - specially made to helps declense russian names in any acts
  or docs you've come through. This, sadly, doesn't help yet at some more
  complex names such as Московская-Муштак Виктория-Степанида Джульраби оглы.
  But, in 99.9% cases this module fits.

  default encoding for this module is win-1251. be sure you install this locale.
  If not, then please send all names initcapped, this should do the trick either.

convert (FROM=>TO, WHAT)

usage my $win_text=convert ('koi'=>'win', $koi_text); This lil' helper converts russian text from/to different encodings. available charsets koi, win, iso, dos see Lingua::RU::Charset for more flexible version.

sklon (WHAT=>PAD)
 This function gets full name of client, and transforms it into desired
 declense. Available list of declesnes is:

 C<
  I=>1, IMEN=>1, 1=>1, #Именительный
  R=>2, ROD=>2,  2=>2, #Родительного
  D=>3, DAT=>3,  3=>3, #Дательным
  V=>4, VIN=>4,  4=>4, #Винительный
  T=>5, TVOR=>5, 5=>5, #Творительным
  P=>6, PRED=>6, 6=>6  #о Предложном
 >

%pads
 Yes, it's the hash from above.

initcap (NAME)

Make First Letters of the Every Word Capital.

parse_n

parses noun (name or second name) of client.

parse_lastname

parses last name of client.

AUTHOR

Top

Alexey Usanov <alexey_usa@mail.ru>

SEE ALSO

Top

Lingua::RU::Charset, perllocale

COPYRIGHT

Top


Lingua-RU-Sklon documentation Contained in the Lingua-RU-Sklon distribution.

#!/usr/bin/perl -w
use strict;
use warnings;
use POSIX qw(locale_h);
use locale;
setlocale(LC_CTYPE, "ru_RU.cp1251"); 

package Lingua::RU::Sklon; 




BEGIN {
    use Exporter   ();
    our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);

    # set the version for version checking
    $VERSION     = 0.01;
    
    @ISA         = qw(Exporter);
    @EXPORT      = qw(&parse_n &parse_lastname &convert &initcap &sklon);
    %EXPORT_TAGS = ( );

    # your exported package globals go here,
    # as well as any optionally exported functions
    @EXPORT_OK   = qw(%pads);


}
use Carp;

our @EXPORT_OK;

our %pads;

# non-exported package globals go here
our $dos;
our $iso;
our $koi;
our $win;

# initialize package globals, first exported ones

%pads=(
  I=>1, IMEN=>1, 1=>1, #ýòî æåíà
  R=>2, ROD=>2,  2=>2, #îòñòàëñÿ áåç æåíû
  D=>3, DAT=>3,  3=>3, #ñòóäåíòó æåíå
  V=>4, VIN=>4,  4=>4, #ïîðâàë æåíó
  T=>5, TVOR=>5, 5=>5, #íàçâàëñÿ æåíîé
  P=>6, PRED=>6, 6=>6  #ïèøó î æåíå
  );


 $dos={'à'=>160,'á'=>161,'â'=>162,'ã'=>163,'ä'=>164,'å'=>165,'¸'=>241,'æ'=>166,'ç'=>167,'è'=>168,'é'=>169,'ê'=>170,'ë'=>171,'ì'=>172,'í'=>173,'î'=>174,'ï'=>175,'ð'=>224,'ñ'=>225,'ò'=>226,'ó'=>227,'ô'=>228,'õ'=>229,'ö'=>230,'÷'=>231,'ø'=>232,'ù'=>233,'ü'=>236,'û'=>235,'ú'=>234,'ý'=>237,'þ'=>238,'ÿ'=>239,'À'=>128,'Á'=>129,'Â'=>130,'Ã'=>131,'Ä'=>132,'Å'=>133,'¨'=>240,'Æ'=>134,'Ç'=>135,'È'=>136,'É'=>137,'Ê'=>138,'Ë'=>139,'Ì'=>140,'Í'=>141,'Î'=>142,'Ï'=>143,'Ð'=>144,'Ñ'=>145,'Ò'=>146,'Ó'=>147,'Ô'=>148,'Õ'=>149,'Ö'=>150,'×'=>151,'Ø'=>152,'Ù'=>153,'Ü'=>156,'Û'=>155,'Ú'=>154,'Ý'=>157,'Þ'=>158,'ß'=>159};
 $iso={'à'=>208,'á'=>209,'â'=>210,'ã'=>211,'ä'=>212,'å'=>213,'¸'=>241,'æ'=>214,'ç'=>215,'è'=>216,'é'=>217,'ê'=>218,'ë'=>219,'ì'=>220,'í'=>221,'î'=>222,'ï'=>223,'ð'=>224,'ñ'=>225,'ò'=>226,'ó'=>227,'ô'=>228,'õ'=>229,'ö'=>230,'÷'=>231,'ø'=>232,'ù'=>233,'ü'=>236,'û'=>235,'ú'=>234,'ý'=>237,'þ'=>238,'ÿ'=>239,'À'=>176,'Á'=>177,'Â'=>178,'Ã'=>179,'Ä'=>180,'Å'=>181,'¨'=>161,'Æ'=>182,'Ç'=>183,'È'=>184,'É'=>185,'Ê'=>186,'Ë'=>187,'Ì'=>188,'Í'=>189,'Î'=>190,'Ï'=>191,'Ð'=>192,'Ñ'=>193,'Ò'=>194,'Ó'=>195,'Ô'=>196,'Õ'=>197,'Ö'=>198,'×'=>199,'Ø'=>200,'Ù'=>201,'Ü'=>204,'Û'=>203,'Ú'=>202,'Ý'=>205,'Þ'=>206,'ß'=>207};
 $koi={'à'=>193,'á'=>194,'â'=>215,'ã'=>199,'ä'=>196,'å'=>197,'¸'=>163,'æ'=>214,'ç'=>218,'è'=>201,'é'=>202,'ê'=>203,'ë'=>204,'ì'=>205,'í'=>206,'î'=>207,'ï'=>208,'ð'=>210,'ñ'=>211,'ò'=>212,'ó'=>213,'ô'=>198,'õ'=>200,'ö'=>195,'÷'=>222,'ø'=>219,'ù'=>221,'ü'=>216,'û'=>217,'ú'=>223,'ý'=>220,'þ'=>192,'ÿ'=>209,'À'=>225,'Á'=>226,'Â'=>247,'Ã'=>231,'Ä'=>228,'Å'=>229,'¨'=>179,'Æ'=>246,'Ç'=>250,'È'=>233,'É'=>234,'Ê'=>235,'Ë'=>236,'Ì'=>237,'Í'=>238,'Î'=>239,'Ï'=>240,'Ð'=>242,'Ñ'=>243,'Ò'=>244,'Ó'=>245,'Ô'=>230,'Õ'=>232,'Ö'=>227,'×'=>254,'Ø'=>251,'Ù'=>253,'Ü'=>248,'Û'=>249,'Ú'=>255,'Ý'=>252,'Þ'=>224,'ß'=>241};
 $win={'à'=>224,'á'=>225,'â'=>226,'ã'=>227,'ä'=>228,'å'=>229,'¸'=>184,'æ'=>230,'ç'=>231,'è'=>232,'é'=>233,'ê'=>234,'ë'=>235,'ì'=>236,'í'=>237,'î'=>238,'ï'=>239,'ð'=>240,'ñ'=>241,'ò'=>242,'ó'=>243,'ô'=>244,'õ'=>245,'ö'=>246,'÷'=>247,'ø'=>248,'ù'=>249,'ü'=>252,'û'=>251,'ú'=>250,'ý'=>253,'þ'=>254,'ÿ'=>255,'À'=>192,'Á'=>193,'Â'=>194,'Ã'=>195,'Ä'=>196,'Å'=>197,'¨'=>168,'Æ'=>198,'Ç'=>199,'È'=>200,'É'=>201,'Ê'=>202,'Ë'=>203,'Ì'=>204,'Í'=>205,'Î'=>206,'Ï'=>207,'Ð'=>208,'Ñ'=>209,'Ò'=>210,'Ó'=>211,'Ô'=>212,'Õ'=>213,'Ö'=>214,'×'=>215,'Ø'=>216,'Ù'=>217,'Ü'=>220,'Û'=>219,'Ú'=>218,'Ý'=>221,'Þ'=>222,'ß'=>223}; 

END { }       # module clean-up code here (global destructor)




sub convert {
  my $src=lc shift;
  my $tgt=lc shift;
  my ($src_cp, $tgt_cp);
  if      ($src eq 'dos') {  $src_cp=$dos;
  } elsif ($src eq 'win') {  $src_cp=$win;
  } elsif ($src eq 'iso') {  $src_cp=$iso;
  } elsif ($src eq 'koi') {  $src_cp=$koi;
  } else {
    croak "Wrong Source encoding: $src"; 
    return "! Wrong Source encoding: $src"; 
  }
  
  if      ($tgt eq 'dos') {  $tgt_cp=$dos;
  } elsif ($tgt eq 'win') {  $tgt_cp=$win;
  } elsif ($tgt eq 'iso') {  $tgt_cp=$iso;
  } elsif ($tgt eq 'koi') {  $tgt_cp=$koi; 
  } else {
    croak "Wrong tgt encoding: $tgt"; 
    return "! Wrong tgt encoding: $tgt";
  }
  
  my %src_cp = reverse %{$src_cp};
  my $out;
  my @out;
  foreach (@_) {
    my @a=split //;
    $out='';
    foreach (@a) {
      my $r=chr($tgt_cp->{$src_cp{ord($_)}});
      $out.= $r?$r:$_;
    }
    push @out,$out;
  }
  if (wantarray) {
    return @out;
  } else {
    return join ('',@out);
  }
}


sub parse_lastname {
  my $txt=lc shift;
  my $wrap=shift;
  my $last_letter=substr($txt,-2);
  
  #print $last_letter;
  if ($last_letter eq 'èé') {
    my $h={1=>'èé',
           2=>'îãî',
           3=>'îìó',
           4=>'îãî',
           5=>'èì',
           6=>'îì'
           };
    return substr($txt,0,-2).($h->{$wrap}||return "!$txt");
  } elsif ($last_letter eq 'ûé') {
    my $h={1=>'ûé',
           2=>'îãî',
           3=>'îìó',
           4=>'îãî',
           5=>'ûì',
           6=>'îì'
           };
    return substr($txt,0,-2).($h->{$wrap}||return "!$txt");
  } elsif ($last_letter eq 'îé') {
    return $txt;
  } elsif ($last_letter eq 'àÿ') {
    my $h={1=>'àÿ',
           2=>'óþ',
           3=>'îé',
           4=>'óþ',
           5=>'îé',
           6=>'îé'
           };
    return substr($txt,0,-2).($h->{$wrap}||return "!$txt");
  } elsif ($last_letter eq 'ÿÿ') {
    my $h={1=>'ÿÿ',
           2=>'þþ',
           3=>'åé',
           4=>'þþ',
           5=>'åé',
           6=>'åé'
           };
    return substr($txt,0,-2).($h->{$wrap}||return "!$txt");
  } elsif ($last_letter eq 'îê') {
  
    my $h={1=>'îê',  #ýòî æåíà
           2=>'êà',  #îòñòàëñÿ áåç æåíû
           3=>'êå',  #ñòóäåíòó æåíå
           4=>'êó',  #ïîðâàë æåíó
           5=>'êîì', #íàçâàëñÿ æåíîé
           6=>'êå'   #ïèøó î æåíå
           };
    return substr($txt,0,-2).$h->{$wrap};
  }
  
  $_=substr($txt,-1);
  
  if ($_ eq 'é') {
  
  my $h={1=>'é',
           2=>'ÿ',
           3=>'ÿ',
           4=>'þ',
           5=>'åì',
           6=>'è'
           };
    return substr($txt,0,-1).($h->{$wrap}||return "!$txt");
  }
  if ($_ eq 'à') {
    my $h={1=>'à',  #ýòî æåíà
           2=>'îé',  #îòñòàëñÿ áåç æåíû
           3=>'ó',  #ïîðâàë æåíó
           4=>'îé',  #ñòóäåíòó æåíå
           5=>'îé', #íàçâàëñÿ æåíîé
           6=>'îé'   #ïèøó î æåíå
           };
    return substr($txt,0,-1).($h->{$wrap}||return "!$txt");
  }
  if ($_ eq 'ÿ') {
    my $h={1=>'ÿ',  #ýòî æåíà
           2=>'è',  #îòñòàëñÿ áåç æåíû
           3=>'þ',  #ïîðâàë æåíó
           4=>'å',  #ñòóäåíòó æåíå
           5=>'åé', #íàçâàëñÿ æåíîé
           6=>'å'   #ïèøó î æåíå
           };
    return substr($txt,0,-1).($h->{$wrap}||return "!$txt");
  }
  if ($_ eq 'ü') {
    my $h={1=>'ü',  #ýòî æåíà
           2=>'ÿ',  #îòñòàëñÿ áåç æåíû
           3=>'ÿ',  #ïîðâàë æåíó
           4=>'þ',  #ñòóäåíòó æåíå
           5=>'åì', #íàçâàëñÿ æåíîé
           6=>'å'   #ïèøó î æåíå
           };
    return substr($txt,0,-1).($h->{$wrap}||return "!$txt");
  }
  if (/[óåõúôûïðîëäæýÿ÷ñìèòüáþ]/) {
    return $txt;
  }
  if (/â/) {
    my $h={1=>'',  #ýòî æåíà
           2=>'à',  #îòñòàëñÿ áåç æåíû
           3=>'e',  #ñòóäåíòó æåíå
           4=>'ó',  #ïîðâàë æåíó
           5=>'ûì', #íàçâàëñÿ æåíîé
           6=>'å'   #ïèøó î æåíå
           };
    return $txt.$h->{$wrap};
  }
  
  if (/[öóêåíãøùçõúôûâàïðîëäæýÿ÷ñìèòüáþ]/) {
    my $h={1=>'',  #ýòî æåíà
           2=>'à',  #îòñòàëñÿ áåç æåíû
           3=>'ó',  #ïîðâàë æåíó
           4=>'e',  #ñòóäåíòó æåíå
           5=>'îì', #íàçâàëñÿ æåíîé
           6=>'îé'   #ïèøó î æåíå
           };
    return $txt.$h->{$wrap};
  }
  carp ("Unalbe to sklon: $txt");
  return "$txt";
  
}

sub parse_n {
  my $txt=lc shift;
  my $wrap=shift;
  my $last_letter=substr($txt,-2);
  
  #print $last_letter;
  if ($last_letter eq 'îê') {
    my $h={1=>'îê',  #ýòî æåíà
           2=>'êà',  #îòñòàëñÿ áåç æåíû
           3=>'êó',  #ïîðâàë æåíó
           4=>'êå',  #ñòóäåíòó æåíå
           5=>'êîì', #íàçâàëñÿ æåíîé
           6=>'êå'   #ïèøó î æåíå
           };
    return substr($txt,0,-2).$h->{$wrap};
  } elsif ($last_letter eq 'åë') {
    my $h={1=>'åë',  #ýòî æåíà
           2=>'ëà',  #îòñòàëñÿ áåç æåíû
           3=>'ëó',  #ïîðâàë æåíó
           4=>'ëó',  #ñòóäåíòó æåíå
           5=>'ëîì', #íàçâàëñÿ æåíîé
           6=>'ëå'   #ïèøó î æåíå
           };
    return substr($txt,0,-2).$h->{$wrap};
  } elsif ($last_letter eq 'åâ') {
    my $h={1=>'åâ',  #ýòî æåíà
           2=>'üâà',  #îòñòàëñÿ áåç æåíû
           3=>'üâà',  #ïîðâàë æåíó
           4=>'üâó',  #ñòóäåíòó æåíå
           5=>'üâîì', #íàçâàëñÿ æåíîé
           6=>'üâå'   #ïèøó î æåíå
           };
    return substr($txt,0,-2).$h->{$wrap};
  }
  
  $_=substr($txt,-1);
  
  if ($_ eq 'é') {
    my $h={1=>'é',
           2=>'ÿ',
           3=>'þ',
           4=>'þ',
           5=>'åì',
           6=>'å'
           };
    return substr($txt,0,-1).($h->{$wrap}||return "!$txt");
  }
  if ($_ eq 'à') {
    my $h={1=>'à',  #ýòî æåíà
           2=>'û',  #îòñòàëñÿ áåç æåíû
           3=>'å',  #ñòóäåíòó æåíå
           4=>'ó',  #ïîðâàë æåíó
           5=>'îé', #íàçâàëñÿ æåíîé
           6=>'å'   #ïèøó î æåíå
           };
    return substr($txt,0,-1).($h->{$wrap}||return "!$txt");
  }
  if ($_ eq 'ÿ') {
    my $h={1=>'ÿ',  #ýòî æåíà
           2=>'è',  #îòñòàëñÿ áåç æåíû
           3=>'å',  #ñòóäåíòó æåíå
           4=>'þ',  #ïîðâàë æåíó
           5=>'åé', #íàçâàëñÿ æåíîé
           6=>'å'   #ïèøó î æåíå
           };
    return substr($txt,0,-1).($h->{$wrap}||return "!$txt");
  }
  if ($_ eq 'ü') {
    my $h={1=>'ü',  #ýòî æåíà
           2=>'ÿ',  #îòñòàëñÿ áåç æåíû
           3=>'þ',  #ñòóäåíòó æåíå
           4=>'ÿ',  #ïîðâàë æåíó
           5=>'åì', #íàçâàëñÿ æåíîé
           6=>'å'   #ïèøó î æåíå
           };
    return substr($txt,0,-1).($h->{$wrap}||return "!$txt");
  }
  if (/[óåúûýþ]/) {
    return $txt;
  }
  if (/[âòêíãçõôâïðëäìòá]/) {
    my $h={1=>'',  #ýòî æåíà
           2=>'à',  #îòñòàëñÿ áåç æåíû
           3=>'å',  #ñòóäåíòó æåíå
           4=>'ó',  #ïîðâàë æåíó
           5=>'îì', #íàçâàëñÿ æåíîé
           6=>'å'   #ïèøó î æåíå
           };
    return $txt.$h->{$wrap};
  }
  
  if (/[øùõæ÷]/) {
    my $h={1=>'',  #ýòî æåíà
           2=>'à',  #îòñòàëñÿ áåç æåíû
           3=>'Ó',  #ñòóäåíòó æåíå
           4=>'ó',  #ïîðâàë æåíó
           5=>'åì', #íàçâàëñÿ æåíîé
           6=>'å'   #ïèøó î æåíå
           };
    return $txt.$h->{$wrap};
  }
  carp ("Unalbe to sklon: $txt");
  return "$txt";
  
}

sub sklon {
  $_=shift;
  print "$_\n";
  /(\w+)\s(\w+)\s(.+)/;
  my $pad=shift;
  my $decl=$pads{$pad};

  croak "Unknown pad attempting to be set : $pad" unless $decl;
  return initcap(parse_lastname($1,$decl)." ".parse_n($2,$decl)." ".parse_n($3,$decl));
  
}
sub initcap {
  $_=shift;
  my $out;
  while (/(\w)(\w*)(\W*)/g) {
    $out.=uc ($1).$2.$3;
  }
  return $out;
}

1;

__END__