Lingua::RU::NameParse - Normalize Russian names


Lingua-RU-NameParse documentation Contained in the Lingua-RU-NameParse distribution.

Index


Code Index:

Given a first name and a patronymic, returns the name and patronymic in the nominative case.

Transliterates the string from Cyrillic to Latin.

NAME

Top

Lingua::RU::NameParse - Normalize Russian names

SYNOPSIS

Top

  use Lingua::EN::NameParse;

  my $p = Lingua::EN::NameParse->new();

  my $norm = $p->normalize("Карлу Марксу");




DESCRIPTION

Top

Takes case endings off of Russian proper names, and normalizes them to the nominative. For the moment works only with first name + patronymic.

AUTHOR

Top

Maciej Ceglowski <maciej@ceglowski.com>


Lingua-RU-NameParse documentation Contained in the Lingua-RU-NameParse distribution.
package Lingua::RU::NameParse;

use 5.008;
use strict;
use warnings;


our $VERSION = '0.02';


sub new { my $class = shift; 
		  bless {}, $class; }





sub normalize {
	
	
	
	my ( $self, $testme ) = @_;
	
	use utf8;

	# voodo to set the string's UTF8 flag
	$testme = pack 'U*', unpack( 'U*', $testme ); 
 
	my ( $w1, $w2 ) = split m/\s+/, $testme;
	
	
	##################
	# 	 ACCUSATIVE
	##################
	if (  $w2 =~ /[чк]а$/ ) {
		$w2 =~ s/а$//g;
		$w1 =~ s/а$//g;
		$w1 =~ s/я$/й/g;
	}
	
	elsif ( $w2 =~ /овну\b/ ) {
		$w2 =~ s/у\b/а/;
		$w1 =~ s/ью\b/ья/;
		$w1 =~ s/ию\b/ия/;
	}
	
	################
	# 	 DATIVE
	################
	elsif (  $w2 =~ /не\b/ ) {
		$w2 =~ s/е\b/а/;
		$w1 =~ s/ье\b/ья/;
		$w1 =~ s/е\b/а/;
		$w1 =~ s/ии\b/ия/;
	}
	
	elsif (  $w2 =~ /че\b/ ) {
		$w2 =~ s/е$//;
		$w1 =~ s/ее\b/ей/;  # aleksei
	}
	
	# Константину Левину
	elsif (  $w2 =~ /[нч]у$/ ) {
		$w2 =~ s/у$//;
		$w1 =~ s/у\b//;
		$w1 =~ s/([еи])ю\b/$1й/;
	}
	

	###################
	#   INSTRUMENTAL
	###################
	# Верой Павловною
	elsif (  $w2 =~ /но[юй]$/ ) {

		$w2 =~ s/ою$/а/;
		$w2 =~ s/ой$/а/;
		$w1 =~ s/ой$/а/;
		$w1 =~ s/ею/я/;
		$w1 =~ s/ей/я/;
	}
	

	elsif (  $w2 =~ /ем$/ ) {
		$w2 =~ s/ем\b//;
		$w1 =~ s/ом\b//;
		$w1 =~ s/еем\b/ей/;		
	}
	
	elsif ( $w2 =~ /ым\b/ ) {
		$w2 =~ s/ым/ый/;
	}

	###################
	#   GENITIVE
	###################
	elsif (  $w2 =~ /ны$/ ) {
		$w2 =~ s/ы$/а/g;
		$w1 =~ s/ы$/а/g;
		$w1 =~ s/ьи$/ья/;
	}
	
	elsif (  $w2 =~ /ого$/ ) {
		$w2 =~ s/ого\b/ий/g;
		$w1 =~ s/а$//g;
	}
	
	$w1 =~ s/вл\b/вел/; # pavel
	
	return "$w1 $w2";
}


sub transliterate {
	my ( $self, $in ) = @_;
	
 
 	for ( $in ) {
 		s/ц/ts/gi;
		s/ш/sh/gi;
		s/щ/shch/gi;
		s/ж/zh/gi;
		s/я/ya/gi;
		s/я\b/ya/gi;
		s/ч/ch/g;
		s/ч\b/ch/g;
		s/ÑŽ/yu/gi;
 
		tr/йукнгзхфывапролдэсмитеьбБЙУКЕНГЗХФЫВАПРОЛДЭСМИТ/jukngzhfyvaproldesmite'bBJUKENGZHFYVAPROLDESMIT/;
	}
	
	
	return $in;

}

1;
__END__