| Encode-Arabic documentation | Contained in the Encode-Arabic distribution. |
Encode::Arabic::ArabTeX::ZDMG - ZDMG phonetic transcription of Arabic using the ArabTeX notation
$Revision: 676 $ $Date: 2008-08-14 13:25:26 +0200 (Thu, 14 Aug 2008) $
use Encode::Arabic::ArabTeX::ZDMG; # imports just like 'use Encode' would, plus extended options
while ($line = <>) { # maps the ArabTeX notation for Arabic into the Latin symbols
print encode 'utf8', decode 'zdmg', $line; # 'ZDMG' alias 'ArabTeX-ZDMG'
}
# ArabTeX lower ASCII transliteration <--> Latin phonetic transcription, ZDMG style
$string = decode 'ArabTeX-ZDMG', $octets;
$octets = encode 'ArabTeX-ZDMG', $string;
ArabTeX is an excellent extension to TeX/LaTeX designed for typesetting the right-to-left scripts of the Orient. It comes up with very intuitive and comprehensible lower ASCII transliterations, the expressive power of which is even better than that of the scripts.
Encode::Arabic::ArabTeX::ZDMG implements the rules needed for proper interpretation of the ArabTeX notation of Arabic into the phonetic transcription in the ZDMG style. The conversion ifself is done by Encode::Mapper, and the user interface is built on the Encode::Encoding module.
Relevant guidance is given in Encode::Arabic::ArabTeX, from which this module inherits. The transformation rules are, however, quite different ;)
Encode::Arabic::ArabTeX, Encode::Arabic, Encode::Mapper, Encode::Encoding, Encode
ArabTeX system ftp://ftp.informatik.uni-stuttgart.de/pub/arabtex/arabtex.htm
Klaus Lagally http://www.informatik.uni-stuttgart.de/ifi/bs/people/lagall_e.htm
ArabTeX extensions http://sourceforge.net/projects/encode-arabic/
ArabXeTeX http://tug.ctan.org/info/?id=arabxetex
Encode Arabic: Exercise in Functional Parsinghttp://ufal.mff.cuni.cz/padt/online/2006/06/encode-arabic.html
Otakar Smrz, http://ufal.mff.cuni.cz/~smrz/
eval { 'E<lt>' . ( join '.', qw 'otakar smrz' ) . "\x40" . ( join '.', qw 'mff cuni cz' ) . 'E<gt>' }
Perl is also designed to make the easy jobs not that easy ;)
Copyright 2003-2008 by Otakar Smrz
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| Encode-Arabic documentation | Contained in the Encode-Arabic distribution. |
# ##################################################################### Otakar Smrz, 2003/08/05 # # Encoding of Arabic: ArabTeX Notation by Klaus Lagally, ZDMG ################################# # $Id: ZDMG.pm 676 2008-08-14 11:25:26Z smrz $ package Encode::Arabic::ArabTeX::ZDMG; use 5.008; use strict; use warnings; use Carp; our $VERSION = do { q $Revision: 676 $ =~ /(\d+)/; sprintf "%4.2f", $1 / 100 }; use Encode::Arabic::ArabTeX (); use base 'Encode::Arabic::ArabTeX'; use Encode::Encoding; use base 'Encode::Encoding'; __PACKAGE__->Define('ZDMG', 'ArabTeX-ZDMG'); use Encode::Mapper ':others', ':silent', ':join'; our %options; # records of options per package .. global register our %option; # options of the caller package .. used with local sub import { # perform import as if Encode were used one level before this module if (defined $_[1] and $_[1] eq ':xml') { # interfere little with possible Encode's options Encode::Mapper->options ( 'override' => [ # override rules of these LHS .. no other tricks ^^ ( # combinations of '<' and '>' with the other bytes map { my $x = chr $_; "<" . $x, [ "<" . $x, ">" ], # propagate the '>' sign implying .. ">" . $x, [ $x, ">" ], # .. preservation of the bytes } 0x00 .. 0x3B, 0x3D, 0x3F .. 0xFF ), ">>", ">", # stop the whole process .. "<>", "<>", # .. do not even start it "><", [ "<", ">" ], # rather than nested '<' and '>', .. "<<", [ "<<", ">" ], ">\\<", [ "<", ">" ], # .. prefer these escape sequences ">\\\\", [ "\\", ">" ], ">\\>", [ ">", ">" ], ">", ">", # singular symbols may migrate right .. "<", "<", # .. or preserve the rest of the data ] ); splice @_, 1, 1; } if (defined $_[1] and $_[1] eq ':describe') { __PACKAGE__->options($_[1]); splice @_, 1, 1; } require Encode; Encode->export_to_level(1, @_); # here comes the only trick ^^ } sub encoder ($;%) { my ($cls, %opt) = @_; my $encoder = []; $encoder->[0] = Encode::Mapper->compile ( [ 'silent' => 0, ], "\x{0054}", "\\cap t", "\x{0074}", "t", "\x{1E6E}", "\\cap _t", "\x{1E6F}", "_t", "\x{0054}\x{0331}", "\\cap _t", "\x{0074}\x{0331}", "_t", "\x{0044}", "\\cap d", "\x{0064}", "d", "\x{1E0E}", "\\cap _d", "\x{1E0F}", "_d", "\x{0044}\x{0331}", "\\cap _d", "\x{0064}\x{0331}", "_d", "\x{0052}", "\\cap r", "\x{0072}", "r", "\x{005A}", "\\cap z", "\x{007A}", "z", "\x{0053}", "\\cap s", "\x{0073}", "s", "\x{0160}", "\\cap ^s", "\x{0161}", "^s", "\x{0053}\x{030C}", "\\cap ^s", "\x{0073}\x{030C}", "^s", "\x{1E62}", "\\cap .s", "\x{1E63}", ".s", "\x{0053}\x{0323}", "\\cap .s", "\x{0073}\x{0323}", ".s", "\x{1E0C}", "\\cap .d", "\x{1E0D}", ".d", "\x{0044}\x{0323}", "\\cap .d", "\x{0064}\x{0323}", ".d", "\x{1E6C}", "\\cap .t", "\x{1E6D}", ".t", "\x{0054}\x{0323}", "\\cap .t", "\x{0074}\x{0323}", ".t", "\x{1E92}", "\\cap .z", "\x{1E93}", ".z", "\x{005A}\x{0323}", "\\cap .z", "\x{007A}\x{0323}", ".z", "\x{004C}", "\\cap l", "\x{006C}", "l", "\x{004E}", "\\cap n", "\x{006E}", "n", # "\x{0054}", "\\cap T", "\x{0074}", "T", # "\x{004E}", "\\cap N", "\x{006E}", "N", # "\x{0057}", "\\cap W", "\x{0077}", "W", "\x{0041}", "\\cap a", "\x{0061}", "a", "\x{0045}", "\\cap e", "\x{0065}", "e", "\x{0049}", "\\cap i", "\x{0069}", "i", "\x{004F}", "\\cap o", "\x{006F}", "o", "\x{0055}", "\\cap u", "\x{0075}", "u", "\x{0100}", "\\cap A", "\x{0101}", "A", "\x{0041}\x{0304}", "\\cap A", "\x{0061}\x{0304}", "A", "\x{0112}", "\\cap E", "\x{0113}", "E", "\x{0045}\x{0304}", "\\cap E", "\x{0065}\x{0304}", "E", "\x{012A}", "\\cap I", "\x{012B}", "I", "\x{0049}\x{0304}", "\\cap I", "\x{0069}\x{0304}", "I", "\x{014C}", "\\cap O", "\x{014D}", "O", "\x{004F}\x{0304}", "\\cap O", "\x{006F}\x{0304}", "O", "\x{016A}", "\\cap U", "\x{016B}", "U", "\x{0055}\x{0304}", "\\cap U", "\x{0075}\x{0304}", "U", "\x{02BC}", "\"", "\x{02BE}", "'", "\x{02BF}", "`", "\x{0042}", "\\cap b", "\x{0062}", "b", "\x{01E6}", "\\cap ^g", "\x{01E7}", "^g", "\x{0047}\x{030C}", "\\cap ^g", "\x{0067}\x{030C}", "^g", "\x{1E24}", "\\cap .h", "\x{1E25}", ".h", "\x{0048}\x{0323}", "\\cap .h", "\x{0068}\x{0323}", ".h", "\x{1E2A}", "\\cap _h", "\x{1E2B}", "_h", "\x{0048}\x{032E}", "\\cap _h", "\x{0068}\x{032E}", "_h", "\x{0120}", "\\cap .g", "\x{0121}", ".g", "\x{0047}\x{0307}", "\\cap .g", "\x{0067}\x{0307}", ".g", "\x{0046}", "\\cap f", "\x{0066}", "f", "\x{0051}", "\\cap q", "\x{0071}", "q", "\x{004B}", "\\cap k", "\x{006B}", "k", "\x{004D}", "\\cap m", "\x{006D}", "m", "\x{0048}", "\\cap h", "\x{0068}", "h", "\x{0057}", "\\cap w", "\x{0077}", "w", "\x{0059}", "\\cap y", "\x{0079}", "y", "\x{0050}", "\\cap p", "\x{0070}", "p", "\x{0056}", "\\cap v", "\x{0076}", "v", "\x{0047}", "\\cap g", "\x{0067}", "g", "\x{0043}", "\\cap c", "\x{0063}", "c", "\x{010C}", "\\cap ^c", "\x{010D}", "^c", "\x{0043}\x{030C}", "\\cap ^c", "\x{0063}\x{030C}", "^c", "\x{0106}", "\\cap ,c", "\x{0107}", ",c", "\x{0043}\x{0301}", "\\cap ,c", "\x{0063}\x{0301}", ",c", "\x{017D}", "\\cap ^z", "\x{017E}", "^z", "\x{005A}\x{030C}", "\\cap ^z", "\x{007A}\x{030C}", "^z", "\x{00D1}", "\\cap ^n", "\x{00F1}", "^n", "\x{004E}\x{0303}", "\\cap ^n", "\x{006E}\x{0303}", "^n", "\x{004C}\x{0303}", "\\cap ^l", "\x{006C}\x{0303}", "^l", "\x{0052}\x{0307}", "\\cap .r", "\x{0072}\x{0307}", ".r", ); no strict 'refs'; ${ $cls . '::encoder' } = $encoder; if ($option{'describe'}) { $_->describe('') foreach @{${ $cls . '::encoder' }}; } return ${ $cls . '::encoder' }; } sub decoder ($;$$) { my ($cls, undef, undef) = @_; my $decoder = []; my @sunny = ( [ "t", "\x{0074}" ], [ "_t", "\x{0074}\x{0331}" ], # "\x{1E6F}" [ "d", "\x{0064}" ], [ "_d", "\x{0064}\x{0331}" ], # "\x{1E0F}" [ "r", "\x{0072}" ], [ "z", "\x{007A}" ], [ "s", "\x{0073}" ], [ "^s", "\x{0073}\x{030C}" ], # "\x{0161}" [ ".s", "\x{0073}\x{0323}" ], # "\x{1E63}" [ ".d", "\x{0064}\x{0323}" ], # "\x{1E0D}" [ ".t", "\x{0074}\x{0323}" ], # "\x{1E6D}" [ ".z", "\x{007A}\x{0323}" ], # "\x{1E93}" [ "l", "\x{006C}" ], [ "n", "\x{006E}" ], ); my @extra = ( [ "T", "\x{0074}" ], [ "H", "\x{0068}" ], # "" [ "N", "\x{006E}" ], [ "W", "\x{0077}" ], # "" ); my @vowel = ( [ "a", "\x{0061}" ], [ "_a", "\x{0061}\x{0304}" ], # "\x{0101}" [ "_aA", "\x{0061}\x{0304}" ], # "\x{0101}" [ "_aY", "\x{0061}\x{0304}" ], # "\x{0101}" [ "_aU", "\x{0061}\x{0304}" ], # "\x{0101}" [ "_aI", "\x{0061}\x{0304}" ], # "\x{0101}" [ "A", "\x{0061}\x{0304}" ], # "\x{0101}" [ "^A", "\x{0061}\x{0304}" ], # "\x{0101}" [ "e", "\x{0065}" ], [ "E", "\x{0065}\x{0304}" ], # "\x{0113}" [ "i", "\x{0069}" ], [ "_i", "\x{0069}\x{0304}" ], # "\x{012B}" [ "I", "\x{0069}\x{0304}" ], # "\x{012B}" [ "^I", "\x{0069}\x{0304}" ], # "\x{012B}" [ "_I", "\x{0069}" ], [ "o", "\x{006F}" ], [ "O", "\x{006F}\x{0304}" ], # "\x{014D}" [ "u", "\x{0075}" ], [ "_u", "\x{0075}\x{0304}" ], # "\x{016B}" [ "U", "\x{0075}\x{0304}" ], # "\x{016B}" [ "^U", "\x{0075}\x{0304}" ], # "\x{016B}" [ "_U", "\x{0075}" ], [ "Y", "\x{0061}\x{0304}" ], # "\x{0101}" ); my @minor = ( [ "'", "\x{02BE}" ], # "\x{02BC}" [ "`", "\x{02BF}" ], # "\x{02BB}" ); my @empty = ( [ "\"", "", ], # "\x{02BC}" [ "|", "", ], [ "B", "", ], ); my @moony = ( [ "b", "\x{0062}" ], [ "^g", "\x{0067}\x{030C}" ], # "\x{01E7}" [ ".h", "\x{0068}\x{0323}" ], # "\x{1E25}" [ "_h", "\x{0068}\x{032E}" ], # "\x{1E2B}" [ ".g", "\x{0067}\x{0307}" ], # "\x{0121}" [ "f", "\x{0066}" ], [ "q", "\x{0071}" ], [ "k", "\x{006B}" ], [ "m", "\x{006D}" ], [ "h", "\x{0068}" ], [ "w", "\x{0077}" ], [ "y", "\x{0079}" ], [ "p", "\x{0070}" ], [ "v", "\x{0076}" ], [ "g", "\x{0067}" ], [ "c", "\x{0063}" ], [ "^c", "\x{0063}\x{030C}" ], # "\x{010D}" [ ",c", "\x{0063}\x{0301}" ], # "\x{0107}" [ "^z", "\x{007A}\x{030C}" ], # "\x{017E}" [ "^n", "\x{006E}\x{0303}" ], # "\x{00F1}" [ "^l", "\x{006C}\x{0303}" ], [ ".r", "\x{0072}\x{0307}" ], ); $decoder->[0] = Encode::Mapper->compile ( [ 'silent' => 0, ], # definite article assimilation .. non-linguistic ( map { "l-" . $_->[0] x 2, [ '', $_->[0] . "-" . $_->[0] ], } @sunny, @moony ), # initial vowel tying ( map { my $x = $_; map { my $y = $_; map { $x->[0] . $_ . $y, $x->[1] . $_ . "\x{02BC}", # "\x{02C8}" "\\cap\x09" . $x->[0] . $_ . $y, ucfirst $x->[1] . $_ . "\x{02BC}", # "\x{02C8}" "\\cap\x0A" . $x->[0] . $_ . $y, ucfirst $x->[1] . $_ . "\x{02BC}", # "\x{02C8}" "\\cap\x0D" . $x->[0] . $_ . $y, ucfirst $x->[1] . $_ . "\x{02BC}", # "\x{02C8}" "\\cap\x20" . $x->[0] . $_ . $y, ucfirst $x->[1] . $_ . "\x{02BC}", # "\x{02C8}" } "-", "\x09", "\x0A", "\x0D", "\x20", "\x0D\x0A", "\x20\x20", "\x20\x20\x20", "\x20\x20\x20\x20" } "a", "e", "i", "o", "u" } @vowel ), # silence the silent "WA", [ "", "W" ], "UW", [ "", "U" ], "UA", [ "", "U" ], "NA", [ "", "N" ], "NY", [ "", "N" ], "NU", [ "", "N" ], "N_A", [ "", "N" ], # regular capitalization ( map { $_->[0], $_->[1], "\\cap\x09" . $_->[0], ucfirst $_->[1], "\\cap\x0A" . $_->[0], ucfirst $_->[1], "\\cap\x0D" . $_->[0], ucfirst $_->[1], "\\cap\x20" . $_->[0], ucfirst $_->[1], } @sunny, @moony, @empty, @vowel, @extra ), ( map { $_->[0] . "i", $_->[1] . "i", $_->[0] . "u", $_->[1] . "u", "\\cap\x09" . $_->[0] . "i", ucfirst $_->[1] . "i", "\\cap\x0A" . $_->[0] . "i", ucfirst $_->[1] . "i", "\\cap\x0D" . $_->[0] . "i", ucfirst $_->[1] . "i", "\\cap\x20" . $_->[0] . "i", ucfirst $_->[1] . "i", "\\cap\x09" . $_->[0] . "u", ucfirst $_->[1] . "u", "\\cap\x0A" . $_->[0] . "u", ucfirst $_->[1] . "u", "\\cap\x0D" . $_->[0] . "u", ucfirst $_->[1] . "u", "\\cap\x20" . $_->[0] . "u", ucfirst $_->[1] . "u", } @sunny, @moony, @empty ), ( map { my $x = $_; map { $x->[0] . "i" . $_, [ $x->[1], "i" . $_ ], $x->[0] . "u" . $_, [ $x->[1], "u" . $_ ], "\\cap\x09" . $x->[0] . "i" . $_, [ ucfirst $x->[1], "i" . $_ ], "\\cap\x0A" . $x->[0] . "i" . $_, [ ucfirst $x->[1], "i" . $_ ], "\\cap\x0D" . $x->[0] . "i" . $_, [ ucfirst $x->[1], "i" . $_ ], "\\cap\x20" . $x->[0] . "i" . $_, [ ucfirst $x->[1], "i" . $_ ], "\\cap\x09" . $x->[0] . "u" . $_, [ ucfirst $x->[1], "u" . $_ ], "\\cap\x0A" . $x->[0] . "u" . $_, [ ucfirst $x->[1], "u" . $_ ], "\\cap\x0D" . $x->[0] . "u" . $_, [ ucfirst $x->[1], "u" . $_ ], "\\cap\x20" . $x->[0] . "u" . $_, [ ucfirst $x->[1], "u" . $_ ], } "-", "\x09", "\x0A", "\x0D", "\x20" } @sunny, @moony, @empty ), # initial vowel assimilation ( map { "i" . $_, [ '', "I" ], "u" . $_, [ '', "U" ], "\\cap\x09" . "i" . $_, [ '', "\\cap\x09" . "I" ], "\\cap\x0A" . "i" . $_, [ '', "\\cap\x0A" . "I" ], "\\cap\x0D" . "i" . $_, [ '', "\\cap\x0D" . "I" ], "\\cap\x20" . "i" . $_, [ '', "\\cap\x20" . "I" ], "\\cap\x09" . "u" . $_, [ '', "\\cap\x09" . "U" ], "\\cap\x0A" . "u" . $_, [ '', "\\cap\x0A" . "U" ], "\\cap\x0D" . "u" . $_, [ '', "\\cap\x0D" . "U" ], "\\cap\x20" . "u" . $_, [ '', "\\cap\x20" . "U" ], } "y", "w" # "'" ), # capitalization of minors ( map { $_->[0], $_->[1], $_->[0] . "i", $_->[1] . "i", $_->[0] . "u", $_->[1] . "u", "\\cap\x09" . $_->[0], [ $_->[1], "\\cap " ], "\\cap\x0A" . $_->[0], [ $_->[1], "\\cap " ], "\\cap\x0D" . $_->[0], [ $_->[1], "\\cap " ], "\\cap\x20" . $_->[0], [ $_->[1], "\\cap " ], "\\cap\x09" . $_->[0] . "i", $_->[1] . ucfirst "i", "\\cap\x0A" . $_->[0] . "i", $_->[1] . ucfirst "i", "\\cap\x0D" . $_->[0] . "i", $_->[1] . ucfirst "i", "\\cap\x20" . $_->[0] . "i", $_->[1] . ucfirst "i", "\\cap\x09" . $_->[0] . "u", $_->[1] . ucfirst "u", "\\cap\x0A" . $_->[0] . "u", $_->[1] . ucfirst "u", "\\cap\x0D" . $_->[0] . "u", $_->[1] . ucfirst "u", "\\cap\x20" . $_->[0] . "u", $_->[1] . ucfirst "u", } @minor ), ( map { my $x = $_; map { $x->[0] . "i" . $_, [ $x->[1], "i" . $_ ], $x->[0] . "u" . $_, [ $x->[1], "u" . $_ ], "\\cap\x09" . $x->[0] . "i" . $_, [ $x->[1], "\\cap i" . $_ ], "\\cap\x0A" . $x->[0] . "i" . $_, [ $x->[1], "\\cap i" . $_ ], "\\cap\x0D" . $x->[0] . "i" . $_, [ $x->[1], "\\cap i" . $_ ], "\\cap\x20" . $x->[0] . "i" . $_, [ $x->[1], "\\cap i" . $_ ], "\\cap\x09" . $x->[0] . "u" . $_, [ $x->[1], "\\cap u" . $_ ], "\\cap\x0A" . $x->[0] . "u" . $_, [ $x->[1], "\\cap u" . $_ ], "\\cap\x0D" . $x->[0] . "u" . $_, [ $x->[1], "\\cap u" . $_ ], "\\cap\x20" . $x->[0] . "u" . $_, [ $x->[1], "\\cap u" . $_ ], } "-", "\x09", "\x0A", "\x0D", "\x20" } @minor ), # white-space collapsing ( map { "\\cap\x09" . $_, [ '', "\\cap " ], "\\cap\x0A" . $_, [ '', "\\cap " ], "\\cap\x0D" . $_, [ '', "\\cap " ], "\\cap\x20" . $_, [ '', "\\cap " ], } "\x09", "\x0A", "\x0D", "\x20" ), ); no strict 'refs'; ${ $cls . '::decoder' } = $decoder; if ($option{'describe'}) { $_->describe('') foreach @{${ $cls . '::decoder' }}; } return ${ $cls . '::decoder' }; } 1; __END__