MARC::Charset::Code - represents a MARC-8/UTF-8 mapping


MARC-Charset documentation Contained in the MARC-Charset distribution.

Index


Code Index:

NAME

Top

MARC::Charset::Code - represents a MARC-8/UTF-8 mapping

SYNOPSIS

Top

DESCRIPTION

Top

Each mapping from a MARC-8 value to a UTF-8 value is represented by a MARC::Charset::Code object in a MARC::Charset::Table.

METHODS

Top

new()

The constructor.

name()

A descriptive name for the code point.

marc()

A string representing the MARC-8 bytes codes.

ucs()

A string representing the UCS code point in hex.

charset_code()

The MARC-8 character set code.

is_combining()

Returns true/false to tell if the character is a combining character.

to_string()

A stringified version of the object suitable for pretty printing.

char_value()

Returns the unicode character. Essentially just a helper around ucs().

marc_value()

The string representing the MARC-8 encoding.

charset_name()

Returns the name of the character set, instead of the code.

to_string()

Returns a stringified version of the object.

marc8_hash_code()

Returns a hash code for this Code object for looking up the object using MARC8. First portion is the character set code and the second is the MARC-8 value.

utf8_hash_code()

Returns a hash code for uniquely identifying a Code by it's UCS value.

default_charset_group

Returns 'G0' or 'G1' indicating where the character is typicalling used in the MARC-8 environment.

get_marc8_escape

Returns an escape sequence to move to the Code from another marc-8 character set.

charset_value

Returns the charset value, not the hex sequence.


MARC-Charset documentation Contained in the MARC-Charset distribution.
package MARC::Charset::Code;

use strict;
use warnings;
use base qw(Class::Accessor);
use Carp qw(croak);
use Encode qw(encode_utf8);
use MARC::Charset::Constants qw(:all);

MARC::Charset::Code
    ->mk_accessors(qw(marc ucs name charset is_combining alt));

sub char_value() 
{
    return chr(hex(shift->ucs()));
}

sub marc_value
{
    my $code = shift;
    my $marc = $code->marc();
    return chr(hex($marc)) unless $code->charset_name eq 'CJK';
    return 
        chr(hex(substr($marc,0,2))) .
        chr(hex(substr($marc,2,2))) .
        chr(hex(substr($marc,4,2)));
}


sub charset_name()
{
    return MARC::Charset::Constants::charset_name(shift->charset_value());
}

sub to_string
{
    my $self = shift;
    my $str = 
        $self->name() . ': ' .
        'charset_code=' . $self->charset() . ' ' .
        'marc='         . $self->marc() . ' ' . 
        'ucs='          . $self->ucs() .  ' ';

    $str .= ' combining' if $self->is_combining();
    return $str;
}


sub marc8_hash_code 
{
    my $self = shift;
    return sprintf('%s:%s', $self->charset_value(), $self->marc_value());
}


sub utf8_hash_code
{
    return int(hex(shift->ucs()));
}


sub default_charset_group
{
    my $charset = shift->charset_value();

    return 'G0'
        if $charset eq ASCII_DEFAULT 
            or $charset eq GREEK_SYMBOLS
            or $charset eq SUBSCRIPTS
            or $charset eq SUPERSCRIPTS
            or $charset eq BASIC_LATIN
            or $charset eq BASIC_ARABIC
            or $charset eq BASIC_CYRILLIC
            or $charset eq BASIC_GREEK
            or $charset eq BASIC_HEBREW
            or $charset eq CJK;

    return 'G1';
}


sub get_escape 
{
    my $charset = shift->charset_value();

    return ESCAPE . $charset
        if $charset eq ASCII_DEFAULT 
            or $charset eq GREEK_SYMBOLS
            or $charset eq SUBSCRIPTS
            or $charset eq SUPERSCRIPTS;

    return ESCAPE . SINGLE_G0_A . $charset
        if $charset eq ASCII_DEFAULT
            or $charset eq BASIC_LATIN
            or $charset eq BASIC_ARABIC
            or $charset eq BASIC_CYRILLIC
            or $charset eq BASIC_GREEK
            or $charset eq BASIC_HEBREW;

    return ESCAPE . SINGLE_G1_A . $charset
        if $charset eq EXTENDED_ARABIC
            or $charset eq EXTENDED_LATIN
            or $charset eq EXTENDED_CYRILLIC;

    return ESCAPE . MULTI_G0_A . CJK
        if $charset eq CJK;
}

sub charset_value
{
    return chr(hex(shift->charset()));
}



1;