Lingua::KO::Romanize::Hangul - Romanization of Korean language


Lingua-KO-Romanize-Hangul documentation Contained in the Lingua-KO-Romanize-Hangul distribution.

Index


Code Index:

NAME

Top

Lingua::KO::Romanize::Hangul - Romanization of Korean language

SYNOPSIS

Top

    use Lingua::KO::Romanize::Hangul;

    my $conv = Lingua::KO::Romanize::Hangul->new();
    my $roman = $conv->char( $hangul );
    printf( "<ruby><rb>%s</rb><rt>%s</rt></ruby>", $hangul, $roman );

    my @array = $conv->string( $string );
    foreach my $pair ( @array ) {
        my( $raw, $ruby ) = @$pair;
        if ( defined $ruby ) {
            printf( "<ruby><rb>%s</rb><rt>%s</rt></ruby>", $raw, $ruby );
        } else {
            print $raw;
        }
    }

DESCRIPTION

Top

Hangul is phonemic characters of the Korean language. This module follows the Revised Romanization of Korean which was released on July 7, 2000 as the official romanization system in South Korea.

$conv = Lingua::KO::Romanize::Hangul->new();

This constructer methods returns a new object.

$roman = $conv->char( $hangul );

This method returns romanized letters of a Hangul character. It returns undef when $hanji is not a valid Hangul character. The argument's encoding must be UTF-8.

$roman = $conv->chars( $string );

This method returns romanized letters of Hangul characters.

@array = $conv->string( $string );

This method returns a array of referenced arrays which are pairs of a Hangul chacater and its romanized letters.

    $array[0]           # first Korean character's pair (array)
    $array[1][0]        # secound Korean character itself
    $array[1][1]        # its romanized letters

UTF-8 FLAG

Top

This module treats utf8 flag transparently.

SEE ALSO

Top

Lingua::JA::Romanize::Japanese for Japanese

Lingua::ZH::Romanize::Pinyin for Chinese

http://www.korean.go.kr/06_new/rule/rule06.jsp

http://www.kawa.net/works/perl/romanize/romanize-e.html

COPYRIGHT AND LICENSE

Top


Lingua-KO-Romanize-Hangul documentation Contained in the Lingua-KO-Romanize-Hangul distribution.

# ----------------------------------------------------------------
package Lingua::KO::Romanize::Hangul;
use strict;
use vars qw( $VERSION );
$VERSION = "0.20";
my $PERL581 = 1 if ( $] >= 5.008001 );

my $INITIAL_LETTER = [map {$_ eq '-' ? '' : $_} qw(
    g   kk  n   d   tt  r   m   b   pp  s   ss  -   j   jj
    ch  k   t   p   h
)];
my $PEAK_LETTER = [map {$_ eq '-' ? '' : $_} qw(
    a   ae  ya  yae eo  e   yeo ye  o   wa  wae oe  yo  u
    wo  we  wi  yu  eu  ui  i
)];
my $FINAL_LETTER = [map {$_ eq '-' ? '' : $_} qw(
    -   g   kk  ks  n   nj  nh  d   r   lg  lm  lb  ls  lt
    lp  lh  m   b   ps  s   ss  ng  j   c   k   t   p   h
)];
# my $FINAL_LETTER = [map {$_ eq '-' ? '' : $_} qw(
#     -   g   kk  ks  n   nj  nh  d   r   rg  rm  rb  rs  rt
#     rp  rh  m   b   bs  s   ss  ng  j   c   k   t   p   h
# )];

# ----------------------------------------------------------------
sub new {
    my $package = shift;
    my $self = {@_};
    bless $self, $package;
    $self;
}

sub char {
    my $self = shift;
    return $self->_char(@_) unless $PERL581;
    my $char = shift;
    my $utf8 = utf8::is_utf8( $char );
    utf8::encode( $char ) if $utf8;
    $char = $self->_char( $char );
    utf8::decode( $char ) if $utf8;
    $char;
}

sub _char {
    my $self = shift;
    my $char = shift;
    my( $c1, $c2, $c3, $c4 ) = unpack("C*",$char);
    return if ( ! defined $c3 || defined $c4 );
    my $ucs2 = (($c1 & 0x0F)<<12) | (($c2 & 0x3F)<<6) | ($c3 & 0x3F);
    return if ( $ucs2 < 0xAC00 );
    return if ( $ucs2 > 0xD7A3 );
    my $han = $ucs2 - 0xAC00;
    my $init = int( $han / 21 / 28 );
    my $peak = int( $han / 28 ) % 21;
    my $fin  = $han % 28;
    join( "", $INITIAL_LETTER->[$init], $PEAK_LETTER->[$peak], $FINAL_LETTER->[$fin] );
}

sub chars {
    my $self = shift;
    my @array = $self->string( shift );
    join( " ", map {$#$_>0 ? $_->[1] : $_->[0]} @array );
}

sub string {
    my $self = shift;
    return $self->_string(@_) unless $PERL581;
    my $char = shift;
    my $flag = utf8::is_utf8( $char );
    utf8::encode( $char ) if $flag;
    my @array = $self->_string( $char );
    if ( $flag ) {
        foreach my $pair ( @array ) {
            utf8::decode( $pair->[0] ) if defined $pair->[0];
            utf8::decode( $pair->[1] ) if defined $pair->[1];
        }
    }
    @array;
}

#   [UCS-2] AC00-D7A3
#   [UTF-8] EAB080-ED9EA3
#   EA-ED are appeared only as Hangul's first character.

sub _string {
    my $self = shift;
    my $src = shift;
    my $array = [];
    while ( $src =~ /([\xEA-\xED][\x80-\xBF]{2})|([^\xEA-\xED]+)/sg ) {
        if ( defined $1 ) {
            my $pair = [ $1 ];
            my $roman = $self->char( $1 );
            $pair->[1] = $roman if defined $roman;
            push( @$array, $pair );
        } else {
            push( @$array, [ $2 ] );
        }
    }

    for ( my $i = 0 ; $i < $#$array ; $i++ ) {
        next if ( scalar @{ $array->[$i] } < 2 );
        next if ( scalar @{ $array->[ $i + 1 ] } < 2 );
        my $this = $array->[$i]->[1];
        my $next = $array->[ $i + 1 ]->[1];
        my $novowel = 1 unless ( $next =~ /^[aeouiwy]/ );

        if ( $this =~ /(tt|pp|jj)$/ && $novowel ) {
            $array->[$i]->[1] =~ s/(tt|pp|jj)$//;
        }
        elsif ( $this =~ /([^n]g|kk)$/ && $novowel ) {
            $array->[$i]->[1] =~ s/(g|kk)$/k/;
        }
        elsif ( $this =~ /(d|j|ch|s?s)$/ && $novowel ) {
            $array->[$i]->[1] =~ s/(d|j|ch|s?s)$/t/;
        }
        elsif ( $this =~ /(b)$/ && $novowel ) {
            $array->[$i]->[1] =~ s/(b)$/p/;
        }
        elsif ( $this =~ /(r)$/ && $novowel ) {
            $array->[$i]->[1] =~ s/(r)$/l/;
            $array->[$i+1]->[1] =~ s/^r/l/;
        }
    }

    if ( scalar @$array ) {
        my $last = $array->[$#$array];
        my $this = $last->[1];
        if ( $this =~ /(tt|pp|jj)$/ ) {
            $last->[1] =~ s/(tt|pp|jj)$//;
        }
        elsif ( $this =~ /([^n]g|kk)$/ ) {
            $last->[1] =~ s/(g|kk)$/k/;
        }
        elsif ( $this =~ /(d|j|ch|s?s)$/ ) {
            $last->[1] =~ s/(d|j|ch|s?s)$/t/;
        }
        elsif ( $this =~ /(b)$/ ) {
            $last->[1] =~ s/(b)$/p/;
        }
        elsif ( $this =~ /(r)$/ ) {
            $last->[1] =~ s/(r)$/l/;
        }
    }

    @$array;
}

# ----------------------------------------------------------------
;1;