/usr/local/CPAN/Regexp-Ethiopic/Regexp/Ethiopic.pm
package Regexp::Ethiopic;
use base qw(Exporter);
use utf8;
BEGIN
{
use strict;
use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS %EthiopicClasses
$ááá $á«áᥠ$á£ááµ $á«á¥á $áááµ $á³áµáµ $á³á¥á
$ááá°_ááá $ááá°_á«áᥠ$ááá°_á£ááµ $ááá°_á«á¥á $ááá°_áááµ);
$VERSION = "0.15";
@EXPORT_OK = qw(%EthiopicClasses &getForm &setForm &subForm &formatForms
$ááá $á«áᥠ$á£ááµ $á«á¥á $áááµ $á³áµáµ $á³á¥á
$ááá°_ááá $ááá°_á«áᥠ$ááá°_á£ááµ $ááá°_á«á¥á $ááá°_áááµ
);
%EXPORT_TAGS = ( forms => [qw(
$ááá $á«áᥠ$á£ááµ $á«á¥á $áááµ $á³áµáµ $á³á¥á
$ááá°_ááá $ááá°_á«áᥠ$ááá°_á£ááµ $ááá°_á«á¥á $ááá°_áááµ)],
utils => [qw(&getForm &setForm &subForm &formatForms &isFamilyOf)]
);
%EthiopicClasses =(
1 => "ááááá á¨á°á¸ááá á¨á°á¸áááá á¨á¸áááá á¨á°á¸áááá á¨á°á¸ááá",
2 => "ááááá¡á©á±á¹ááá¡á©á±á¹áááá¡á©á¹áááá¡á©á±á¹áááá¡á©á±á¹ááá",
3 => "ááááá¢áªá²áºááá¢áªá²áºáááá¢áªáºáááá¢áªá²áºáááá¢áªá²áºááá",
4 => "ááááá£á«á³á»ááá£á«á³á»áááá£á«á»áááá£á«á³á»áááá£á«á³á»ááá",
5 => "ááááá¤á¬á´á¼ááá¤á¬á´á¼áááá¤á¬á¼áááá¤á¬á´á¼áááá¤á¬á´á¼ááá",
6 => "á
áááá¥ááµá½á
áá¥ááµá½á
ááá¥áá½áááá¥ááµá½á
ááá¥ááµá½á
áá",
7 => "ááááá¦á®á¶á¾ááá¦á®á¶á¾áááá¦á®á¾áááá¦á®á¶á¾áááá¦á®á¶á¾ááá",
8 => "áááá°ááá§",
9 => "ááááµá
á",
10 => "áááá²áá",
11 => "áááá§á¯á·á¿ááá§á¯á·á¿áááá³ááá§á·á¿ááá§á¯á·á¿áá",
12 => "áááá´áá",
á => "á-á",
á => "á-á",
á => "á-á",
á => "á-á",
á => "á -á§",
ᨠ=> "á¨-á¯",
á° => "á°-á·",
Ḡ=> "á¸-á¿",
á => "á-áá-á",
á => "á-áá-á",
á => "á -á§",
ᨠ=> "á¨-á¯",
á° => "á°-á·",
Ḡ=> "á¸-á¿",
á => "á-áá-á",
á => "á-á",
á => "á-á",
á => "á -á§",
ᨠ=> "á¨-á®á°á²-áµ",
Ḡ=> "á¸-á¾á-á
",
á => "á-á",
á => "á-á",
á => "á-á",
á => "á -á§",
ᨠ=> "á¨-á®",
á° => "á°-á·",
Ḡ=> "á¸-á¿",
á => "á-á",
á => "á-áá-á",
á => "á-á",
á => "á -á§",
ᨠ=> "á¨-á¯",
á° => "á°-á·",
Ḡ=> "á¸-á¿",
á => "á-á",
á => "á-á",
á => "á-á",
á áá => "á©-á¼"
);
$EthiopicClasses{'ááá'}
= $EthiopicClasses{geez}
= $EthiopicClasses{1}
;
$EthiopicClasses{'á«áá¥'}
= $EthiopicClasses{kaib}
= $EthiopicClasses{2}
;
$EthiopicClasses{'á£ááµ'}
= $EthiopicClasses{salis}
= $EthiopicClasses{3}
;
$EthiopicClasses{'á«á¥á'}
= $EthiopicClasses{rabi}
= $EthiopicClasses{4}
;
$EthiopicClasses{'áááµ'}
= $EthiopicClasses{hamis}
= $EthiopicClasses{5}
;
$EthiopicClasses{'á³áµáµ'}
= $EthiopicClasses{sadis}
= $EthiopicClasses{6}
;
$EthiopicClasses{'á³á¥á'}
= $EthiopicClasses{sabi}
= $EthiopicClasses{7}
;
$EthiopicClasses{'ááá°á¡ááá'}
= $EthiopicClasses{'zemede:geez'}
= $EthiopicClasses{8}
;
$EthiopicClasses{'ááá°á¡á«áá¥'}
= $EthiopicClasses{'zemede:kaib'}
= $EthiopicClasses{9}
;
$EthiopicClasses{'ááá°á¡á£ááµ'}
= $EthiopicClasses{'zemede:salis'}
= $EthiopicClasses{10}
;
$EthiopicClasses{'ááá°á¡á«á¥á'}
= $EthiopicClasses{'zemede:rabi'}
= $EthiopicClasses{11}
;
$EthiopicClasses{'ááá°á¡áááµ'}
= $EthiopicClasses{'zemede:hamis'}
= $EthiopicClasses{12}
;
$EthiopicClasses{'ahaz'}
= $EthiopicClasses{'á áá'}
;
($ááá, $á«áá¥, $á£ááµ, $á«á¥á, $áááµ, $á³áµáµ, $á³á¥á,
$ááá°_ááá, $ááá°_á«áá¥, $ááá°_á£ááµ, $ááá°_á«á¥á, $ááá°_áááµ) = (1 .. 12);
}
sub import
{
my @args = ( shift ); # package
foreach (@_) {
if ( /overload/o ) {
use overload;
overload::constant 'qr' => \&getRe;
}
elsif ( /:forms/o ) {
Regexp::Ethiopic->export_to_level (1, $args[0], ':forms'); # this works too...
}
elsif ( /:utils/o ) {
Regexp::Ethiopic->export_to_level (1, $args[0], ':utils'); # this works too...
}
else {
push (@args, $_);
}
}
if ($#args) {
Regexp::Ethiopic->export_to_level (1, @args); # this works too...
}
}
sub getForm
{
my ($áá) = @_;
my $form = ord($áá)%8 + 1;
if ( $form == 8 || $áá =~ /[áááá³áá]/o ) {
$form = 11;
}
elsif ( $áá =~ /[ááááµá
á]/o ) {
$form = 9;
}
elsif ( $áá =~ /[áááá°áááááá²áááááá´áá]/o ) {
$form += 7;
}
$form;
}
sub setForm
{
my ($áá, $form) = @_;
if ( $áá =~ /[á-áá-áá-áá°-áµá-á
á-á]/o ) {
$áá =~ s/[á-á]/á
/o;
$áá =~ s/[á-á]/á/o;
$áá =~ s/[á-á]/á/o;
$áá =~ s/[á°-áµ]/á¨/o;
$áá =~ s/[á-á
]/á¸/o;
$áá =~ s/[á-á]/á/o;
}
$form = 4 if ( $áá =~ /[áááá³áá]/o );
$form -= 7 if ( $form == 8 || $form == 10 || $form == 12 );
$form = 8 if ( $form == 11 );
$form = 6 if ( $form == 9 );
chr ( ord($áá) - ord($áá)%8 + $form-1 );
}
sub subForm
{
my ($set, $get) = @_;
# e.g. s/([=#á#=])/subForm($1, á)/eg;
setForm ( $set, getForm ( $get ) );
}
sub isFamilyOf
{
my ($a,$b) = @_;
my $gez = setForm($a,1);
my $re = getRe( "[#$gez#]" );
( $b =~ /$re/ );
}
sub formatForms
{
my ($format, $string) = @_;
my @chars = split ( //, $string );
if ( @chars != ($format =~ s/%/%/g) ) {
$format =~ s/\p{Ethiopic}//g;
warn ( "\"$string\" is of different length from $format." );
return;
}
foreach (@chars) {
$format =~ s/%(\d+)/setForm($_, $1)/e;
}
$format;
}
sub handleChars
{
my ($chars,$form) = @_;
return ( $EthiopicClasses{$form} ) if ( $chars eq "all" );
my $re;
$chars =~ s/(\w)(?=\w)/$1,/og;
my @Chars = split ( /,/, $chars );
foreach (@Chars) {
if ( /(\w)-(\w)/o ) {
my ($a,$b) = ($1,$2);
foreach my $char (sort keys %EthiopicClasses) {
next if ( length($char) > 1 );
next unless ( (ord($a) <= ord($char)) && (ord($char) <= ord($b)) );
if ( $form eq "all" ) {
$re .= $EthiopicClasses{$char};
}
else {
$EthiopicClasses{$form} =~ /([$EthiopicClasses{$char}])/;
$re .= $1;
}
}
}
else {
my $geez = setForm( $_, $ááá);
if ( $form eq "all" ) {
$re .= $EthiopicClasses{$geez};
}
else {
$EthiopicClasses{$form} =~ /([$EthiopicClasses{$geez}])/;
$re .= $1;
}
}
}
$re;
}
sub setRange
{
my ($chars,$forms,$not) = @_;
$not ||= $_[3];
my $re;
if ( $forms eq "all" ) {
$re = handleChars ( $chars, $forms );
}
else {
my @Forms = split ( /,/, $forms);
foreach (@Forms) {
if ( /(\d)-(\d)/o ) {
my ($a,$b) = ($1,$2);
foreach my $form ($a..$b) {
$re .= handleChars ( $chars, $form );
}
}
else {
my $form = $_;
$re .= handleChars ( $chars, $form );
}
}
}
($re) ? ($not) ? "[$not$re]" : "[$re]" : "";
}
sub getRe
{
$_ = ($#_) ? $_[1] : $_[0];
s/\[:(\p{Ethiopic}+|\w+):\]/($EthiopicClasses{$1}) ? "[$EthiopicClasses{$1}]" : "[:$1:]"/eog;
s/\[#(\p{Ethiopic}|\d)#\]/($EthiopicClasses{$1}) ? "[$EthiopicClasses{$1}]" : ""/eog;
s/\[#(\^)?([\d,-]+)#\]/setRange("all",$2,$1)/eog;
s/\[#(\^)?([\p{Ethiopic},-]+)#\]/setRange($2,"all",$1)/eog;
# print " IN: $_\n";
#
# for some stupid reason the below doesn't work, so \w
# is used in place of \p{Ethiopic}, dangerous...
#
# test 9 in examples/overload.pl will fail
#
# s/(\p{Ethiopic})\{#([\d,-]+)#\}/setRange($1,$2)/eog;
s/(\w)\{#([\d,-]+)#\}/setRange($1,$2)/eog;
s/\[(\^)?(\p{Ethiopic}+.*?)\]\{(\^)?#([\d,-]+)#\}/setRange($2,$4,$1,$3)/eog;
# print " OUT: $_\n";
$_;
}
#########################################################
# Do not change this, Do not put anything below this.
# File must return "true" value at termination
1;
##########################################################
__END__
=head1 NAME
Regexp::Ethiopic - Regular Expressions Support for Ethiopic Script.
=head1 SYNOPSIS
#
# Overloading Perl REs:
#
use utf8;
use Regexp::Ethiopic qw(:forms overload setForm);
:
s/([#2#])/setForm($1,$á³áµáµ)/eg;
s/([áá¨á á]{#2#})/setForm($1,$á³áµáµ)/eg;
s/([áá¨á á]{#1,3#})/setForm($1,$á³áµáµ)/eg;
s/([áá¨á á]{#1-3,7#})/setForm($1,$á³áµáµ)/eg;
s/([#á#])/subForm('á¸',$1)/eg; # substitute, a 'á¸' for a 'á' in the form found for the 'á'
if ( /[#á#]/ ) {
#
# do something
#
:
}
:
:
#
# Without overloading:
#
use utf8;
require Regexp::Ethiopic;
my $string = "[áá¨á á]{#1-3,7#}";
my $re = Regexp::Ethiopic::getRe ( $string );
s/abc($re)xyz/"abc".Regexp::Ethipic::setForm($1,6)."xyz"/eg;
=head1 DESCRIPTION
The Regexp::Ethiopic module provides POSIX style character class
definitions for working with the Ethiopic syllabary. The character
classes provided by the Regexp::Ethiopic package correspond to inate
properties of the script and are language independent.
The Regexp::Ethiopic package is NOT derived from the Regexp class
and may not be instantiated into an object. Regexp::Ethiopic can
optionally export the utility functions C<getForm>, C<setForm>,
C<subForm> and C<formatForms> (or all with the C<:utils> pragma)
to query or set the form of an Ethiopic character. Tags of variables
in the form names set to form values may be exported under the C<:forms>
pragma.
See the files in the doc/ and examples/ directories that are included
with this package.
=head2 Substituion Utilities
=head3 getForm
A utility function to query the "form" of an Ethiopic syllable. It
will return an integer between 1 and 12 corresponding to the [#\d+#]
classes.
print getForm ( "á " ), "\n"; # prints 1
=head3 setForm
A utility function to set the form number of a syllable. The form
number must be an integer between 1 and 12 corresponding to the [#\d+#]
classes.
s/(.)/setForm($1, 1)/eg;
=head3 subForm
A utility function to set the form number of a syllable based on the
form of another syllable.
s/(\w+)([#á#])/$1.subForm('á¸', $2)/eg;
=head3 formatForms
A utility function somewhat analogous to C<sprintf> for a sequence of
syllables:
print formatForms ( "%1%2%3%4", "á á áá°" ), "\n"; # prints á á¡áá³
=head1 LIMITATIONS
The overloading mechanism only applies to the constant part of the RE. The
following would not be handled by the Regexp::Ethipic package as expected:
use Regexp::Ethiopic 'overload';
my $x = "á¨";
:
:
if ( /[#$x#]/ ) {
:
:
}
The package never gets to see the variable C<$x> to then
perform the RE expansion. The work around is to use the package as per:
use Regexp::Ethiopic 'overload';
my $x = "á¨";
:
:
my $re = Regexp::Ethiopic::getRe ( "[#$x#]" );
if ( /$re/ ) {
:
:
}
This works as expected at the cost of one extra step. The overloading and
functional modes of the Regexp::Ethiopic package may be used together
without conflict.
=head1 REQUIRES
Works perfectly with Perl 5.8.0, may work with Perl 5.6.x but has
not yet been tested.
=head1 BUGS
None presently known.
=head1 AUTHOR
Daniel Yacob, L<dyacob@cpan.org|mailto:dyacob@cpan.org>
=head1 SEE ALSO
Included with this package:
doc/index.html examples/overload.pl
examples/utils.pl examples/asfunction.pl
=cut