| Lingua-JA-Hepburn-Passport documentation | Contained in the Lingua-JA-Hepburn-Passport distribution. |
Lingua::JA::Hepburn::Passport - Hepburn Romanization using Japanese passport rules
use utf8;
use Lingua::JA::Hepburn::Passport;
my $hepburn = Lingua::JA::Hepburn::Passport->new;
$hepburn->romanize("みやがわ"); # MIYAGAWA
$hepburn->romanize("おおの"); # ONO
$hepburn->romanize("かとう"); # KATO
$hepburn->romanize("ゆうこ"); # YUKO
$hepburn->romanize("なんば"); # NAMBA
$hepburn->romanize("はっちょう"); # HATCHO
# Indicate long vowels by "h"
my $hepburn = Lingua::JA::Hepburn::Passport->new( long_vowels_h => 1 );
$hepburn->romanize("おおの"); # OHNO
$hepburn->romanize("かとう"); # KATOH
Lingua::JA::Hepburn::Passport is a Hiragana/Katakana to Romanization engine using Japanese passport rules.
There is already a couple of Hepburn romanization modules on CPAN (See "SEE ALSO"), but none of them conform to the conversion rule defined in Japanese passport regulation. This one does.
$hepburn = Lingua::JA::Hepburn::Passport->new; $hepburn = Lingua::JA::Hepburn::Passport->new( long_vowels_h => 1 );
Creates a new object. Optionally you can pass long_vowels_h parameter to 1, with which this module tries to add H to the long vowels OO and OU, as allowed in Japanese passport rules.
$roman = $hepburn->romanize( $kana );
Romanizes the string $kana using Hepburn romanization. $kana should be either Hiragana or Katakana, as an Unicode string in Perl (a.k.a UTF-8 flagged), otherwise it throws an error. Returned $roman would be all upper case roman letters.
This module doesn't come with deromanize method (yet), which would do the Roman to Katakana/Hiragana translation, since I don't think we need it. Other modules on CPAN already do the job quite nicely.
Tatsuhiko Miyagawa <miyagawa@bulknews.net>
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
Code algorithm is based on http://www.d-project.com/hebonconv/
| Lingua-JA-Hepburn-Passport documentation | Contained in the Lingua-JA-Hepburn-Passport distribution. |
package Lingua::JA::Hepburn::Passport; use strict; our $VERSION = '0.02'; use utf8; use Carp; our %Map = ( "ã", "A", "ã", "I", "ã", "U", "ã", "E", "ã", "O", "ã", "KA", "ã", "KI", "ã", "KU", "ã", "KE", "ã", "KO", "ã", "SA", "ã", "SHI", "ã", "SU", "ã", "SE", "ã", "SO", "ã", "TA", "ã¡", "CHI", "ã¤", "TSU", "ã¦", "TE", "ã¨", "TO", "ãª", "NA", "ã«", "NI", "ã¬", "NU", "ã", "NE", "ã®", "NO", "ã¯", "HA", "ã²", "HI", "ãµ", "FU", "ã¸", "HE", "ã»", "HO", "ã¾", "MA", "ã¿", "MI", "ã", "MU", "ã", "ME", "ã", "MO", "ã", "YA", "ã", "YU", "ã", "YO", "ã", "RA", "ã", "RI", "ã", "RU", "ã", "RE", "ã", "RO", "ã", "WA", "ã", "I", "ã", "E", "ã", "O", "ã", "N", "ã", "A", "ã", "I", "ã ", "U", "ã", "E", "ã", "O", "ã", "GA", "ã", "GI", "ã", "GU", "ã", "GE", "ã", "GO", "ã", "ZA", "ã", "JI", "ã", "ZU", "ã", "ZE", "ã", "ZO", "ã ", "DA", "ã¢", "JI", "ã¥", "ZU", "ã§", "DE", "ã©", "DO", "ã°", "BA", "ã³", "BI", "ã¶", "BU", "ã¹", "BE", "ã¼", "BO", "ã±", "PA", "ã´", "PI", "ã·", "PU", "ãº", "PE", "ã½", "PO", "ãã", "KYA", "ãã ", "KYU", "ãã", "KYO", "ãã", "SHA", "ãã ", "SHU", "ãã", "SHO", "ã¡ã", "CHA", "ã¡ã ", "CHU", "ã¡ã", "CHO", "ã¡ã", "CHE", "ã«ã", "NYA", "ã«ã ", "NYU", "ã«ã", "NYO", "ã²ã", "HYA", "ã²ã ", "HYU", "ã²ã", "HYO", "ã¿ã", "MYA", "ã¿ã ", "MYU", "ã¿ã", "MYO", "ãã", "RYA", "ãã ", "RYU", "ãã", "RYO", "ãã", "GYA", "ãã ", "GYU", "ãã", "GYO", "ãã", "JA", "ãã ", "JU", "ãã", "JO", "ã³ã", "BYA", "ã³ã ", "BYU", "ã³ã", "BYO", "ã´ã", "PYA", "ã´ã ", "PYU", "ã´ã", "PYO", ); sub new { my($class, %opt) = @_; bless { %opt }, $class; } sub _hepburn_for { my($string, $index) = @_; my($hepburn, $char); if ($index + 1 < length $string) { $char = substr $string, $index, 2; $hepburn = $Map{$char}; } if (!$hepburn && $index < length $string) { $char = substr $string, $index, 1; $hepburn = $Map{$char}; } return { char => $char, hepburn => $hepburn }; } sub romanize { my($self, $string) = @_; unless (utf8::is_utf8($string)) { croak "romanize(string): should be UTF-8 flagged string"; } $string =~ tr/ã¢-ã³/ã-ã/; if ($self->{strict}) { $string =~ /^\p{Hiragana}*$/ or croak "romanize(string): should be all Hiragana/Katakana"; } my $output; my $last_hepburn; my $last_char; my $i = 0; while ($i < length $string) { my $hr = _hepburn_for($string, $i); # ï¼ï¼æ¥é³ ããã³å¼ã§ã¯B ã»M ã»P ã®åã« N ã®ä»£ããã« M ããã if ($hr->{char} eq 'ã') { my $next = _hepburn_for($string, $i + 1); $hr->{hepburn} = $next->{hepburn} && $next->{hepburn} =~ /^[BMP]/ ? 'M' : 'N'; } # ï¼ï¼ä¿é³ åé³ãéãã¦ç¤ºã elsif ($hr->{char} eq 'ã£') { my $next = _hepburn_for($string, $i + 1); # ãï¼CH Iï¼ããã£ï¼CHAï¼ããã¥ï¼CHUï¼ããã§ï¼CHOï¼é³ã«éãããã®åã« T ãå ããã if ($next->{hepburn}) { $hr->{hepburn} = $next->{hepburn} =~ /^CH/ ? 'T' : substr($next->{hepburn}, 0, 1); } } # ï¼ï¼é·é³ ããã³å¼ã§ã¯é·é³ã表è¨ããªã elsif ($hr->{char} eq "ã¼") { $hr->{hepburn} = ""; } # Japanese Passport table doesn't have entries for ã-ã elsif ($hr->{char} =~ /[ã-ã]/ && $self->{strict}) { croak "$hr->{char} is not allowed"; } if (defined $hr->{hepburn}) { if ($last_hepburn) { my $h_test = $last_hepburn . $hr->{hepburn}; if (length $h_test > 2) { $h_test = substr $h_test, -2; } # ï¼ï¼é·é³ ããã³å¼ã§ã¯é·é³ã表è¨ããªã if (grep $h_test eq $_, qw( AA II UU EE )) { $hr->{hepburn} = ''; } # æ°åã«ããªã¦ãåã¯ããªãªãã®é·é³ãå«ã¾ããå ´åã # ã O ã ã ã OH ã ã®ããããã®è¡¨è¨ã鏿ãããã¨ãã§ãã if (grep $h_test eq $_, qw( OO OU )) { $hr->{hepburn} = $self->{long_vowels_h} ? 'H' : ''; } } $output .= $hr->{hepburn}; } else { if ($self->{strict}) { croak "Can't find hepburn replacement for $hr->{char}"; } $output .= $hr->{char}; } $last_hepburn = $hr->{hepburn}; $last_char = $hr->{char}; $i += length $hr->{char}; } return $output; } 1; __END__