| MARC-Charset documentation | Contained in the MARC-Charset distribution. |
MARC::Charset::Code - represents a MARC-8/UTF-8 mapping
Each mapping from a MARC-8 value to a UTF-8 value is represented by a MARC::Charset::Code object in a MARC::Charset::Table.
The constructor.
A descriptive name for the code point.
A string representing the MARC-8 bytes codes.
A string representing the UCS code point in hex.
The MARC-8 character set code.
Returns true/false to tell if the character is a combining character.
A stringified version of the object suitable for pretty printing.
Returns the unicode character. Essentially just a helper around ucs().
The string representing the MARC-8 encoding.
Returns the name of the character set, instead of the code.
Returns a stringified version of the object.
Returns a hash code for this Code object for looking up the object using MARC8. First portion is the character set code and the second is the MARC-8 value.
Returns a hash code for uniquely identifying a Code by it's UCS value.
Returns 'G0' or 'G1' indicating where the character is typicalling used in the MARC-8 environment.
Returns an escape sequence to move to the Code from another marc-8 character set.
Returns the charset value, not the hex sequence.
| MARC-Charset documentation | Contained in the MARC-Charset distribution. |
package MARC::Charset::Code; use strict; use warnings; use base qw(Class::Accessor); use Carp qw(croak); use Encode qw(encode_utf8); use MARC::Charset::Constants qw(:all); MARC::Charset::Code ->mk_accessors(qw(marc ucs name charset is_combining alt));
sub char_value() { return chr(hex(shift->ucs())); }
sub marc_value { my $code = shift; my $marc = $code->marc(); return chr(hex($marc)) unless $code->charset_name eq 'CJK'; return chr(hex(substr($marc,0,2))) . chr(hex(substr($marc,2,2))) . chr(hex(substr($marc,4,2))); }
sub charset_name() { return MARC::Charset::Constants::charset_name(shift->charset_value()); }
sub to_string { my $self = shift; my $str = $self->name() . ': ' . 'charset_code=' . $self->charset() . ' ' . 'marc=' . $self->marc() . ' ' . 'ucs=' . $self->ucs() . ' '; $str .= ' combining' if $self->is_combining(); return $str; }
sub marc8_hash_code { my $self = shift; return sprintf('%s:%s', $self->charset_value(), $self->marc_value()); }
sub utf8_hash_code { return int(hex(shift->ucs())); }
sub default_charset_group { my $charset = shift->charset_value(); return 'G0' if $charset eq ASCII_DEFAULT or $charset eq GREEK_SYMBOLS or $charset eq SUBSCRIPTS or $charset eq SUPERSCRIPTS or $charset eq BASIC_LATIN or $charset eq BASIC_ARABIC or $charset eq BASIC_CYRILLIC or $charset eq BASIC_GREEK or $charset eq BASIC_HEBREW or $charset eq CJK; return 'G1'; }
sub get_escape { my $charset = shift->charset_value(); return ESCAPE . $charset if $charset eq ASCII_DEFAULT or $charset eq GREEK_SYMBOLS or $charset eq SUBSCRIPTS or $charset eq SUPERSCRIPTS; return ESCAPE . SINGLE_G0_A . $charset if $charset eq ASCII_DEFAULT or $charset eq BASIC_LATIN or $charset eq BASIC_ARABIC or $charset eq BASIC_CYRILLIC or $charset eq BASIC_GREEK or $charset eq BASIC_HEBREW; return ESCAPE . SINGLE_G1_A . $charset if $charset eq EXTENDED_ARABIC or $charset eq EXTENDED_LATIN or $charset eq EXTENDED_CYRILLIC; return ESCAPE . MULTI_G0_A . CJK if $charset eq CJK; }
sub charset_value { return chr(hex(shift->charset())); } 1;