| Lingua-UK-Translit documentation | Contained in the Lingua-UK-Translit distribution. |
Lingua::UK::Translit - Perl extension for correct transliteration of Ukrainian text in UTF-8 encoding to Latin symbols.
use utf8; use Lingua::UK::Translit; my $ukrainian_text="Україна"; print uk2ascii( $ukrainian_text ), "\n";
Lingua::UK::Translit is collection of some functions for proper transliteration of Ukrainian text in UTF-8 encoding to Latin symbols.
Consists of functions for proper text transliteration. Works only with UTF-8 encoding. Returns all symbols in UTF-8 encoding.
,where $ukrainian_text - text in UTF-8 encoding.
Returns transliterated text in Latin symbols, but encoded as UTF-8. Transliterates only letters of Ukrainian alphabet, other symbols leaves untouched. Preserves formatting and punctuation.
sub uk2ascii()
perl(1) - Practical Extraction and Report Language Lingua::UK::Jcuken - Conversion between QWERTY and JCUKEN keys in Ukrainian Lingua::RU::Jcuken - Conversion between QWERTY and JCUKEN keys in Russian Lingua::RU::PhTranslit - Writing cyrillic(russian) symbols by ASCII symbols (0x20-0x7f) Lingua::RU::Translit - Converts from Russian "translit" encoding to russian in koi8-r
O. Y. Panchuk, <olex@ucu.edu.ua>
Copyright (C) 2006 by O. Y. Panchuk
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available.
| Lingua-UK-Translit documentation | Contained in the Lingua-UK-Translit distribution. |
package Lingua::UK::Translit; use 5.006; use strict; use warnings; use utf8; require Exporter; our @ISA = qw(Exporter); our %EXPORT_TAGS = ( 'all' => [ qw( ) ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw( &uk2ascii ); our $VERSION = '0.10'; my %ua2en = ( 'а' => 'a', 'Ð' => 'A', 'б' => 'b', 'Ð' => 'B', 'в' => 'v', 'Ð' => 'V', 'г' => 'h', 'Ð' => 'H', 'Ò' => 'g', 'Ò' => 'G', 'д' => 'd', 'Ð' => 'D', 'е' => 'e', 'Ð' => 'E', 'Ñ' => 'ie', 'Ð' => 'Ie', 'ж' => 'zh', 'Ð' => 'Zh', 'з' => 'z', 'Ð' => 'Z', 'и' => 'y', 'Ð' => 'Y', 'Ñ' => 'i', 'Ð' => 'I', 'Ñ' => 'i', 'Ð' => 'I', 'й' => 'i', 'Ð' => 'I', 'к' => 'k', 'Ð' => 'K', 'л' => 'l', 'Ð' => 'L', 'м' => 'm', 'Ð' => 'M', 'н' => 'n', 'Ð' => 'N', 'о' => 'o', 'Ð' => 'O', 'п' => 'p', 'Ð' => 'P', 'Ñ' => 'r', 'Ð ' => 'R', 'Ñ' => 's', 'С' => 'S', 'Ñ' => 't', 'Т' => 'T', 'Ñ' => 'u', 'У' => 'U', 'Ñ' => 'f', 'Ф' => 'F', 'Ñ ' => 'kh', 'Ð¥' => 'Kh', 'Ñ' => 'ts', 'Ц' => 'Ts', 'Ñ' => 'ch', 'Ч' => 'Ch', 'Ñ' => 'sh', 'Ш' => 'Sh', 'Ñ' => 'sch', 'Щ' => 'Sch', 'Ñ' => '\'', 'Ь' => '\'', 'Ñ' => 'iu', 'Ю' => 'Iu', 'Ñ' => 'ia', 'Я' => 'Ia' ); my %ua2enwb = ( 'Ñ' => 'ye', 'Ð' => 'Ye', 'Ñ' => 'y', 'Ð' => 'Y', 'й' => 'y', 'Ð' => 'Y', 'Ñ' => 'yu', 'Ю' => 'Yu', 'Ñ' => 'ya', 'Я' => 'Ya' ); sub uk2ascii { my $strin = shift; my @words = split ('\b',$strin); my $strans = ''; foreach my $word (@words){ my @c = split('',$word); my $wtrans = ''; for ( my $i = 0; $i <= $#c; $i++){ if ( ($i == 0) and (exists $ua2enwb{$c[0]}) ){ $wtrans .= $ua2enwb{$c[0]}; } elsif (exists $ua2en{$c[$i]}){ if ( ($c[$i] eq 'г') and (($c[$i-1] eq 'з') or ($c[$i-1] eq 'Ð')) ){ $wtrans .= 'gh'; } elsif ( ($c[$i] eq 'Ð') and (($c[$i-1] eq 'з') or ($c[$i-1] eq 'Ð')) ){ $wtrans .= 'Gh'; } else { $wtrans .= $ua2en{$c[$i]}; } } else { $wtrans .= $c[$i]; } } $strans .= $wtrans; } return $strans; } 1; __END__