/usr/local/CPAN/MKDoc-XML/MKDoc/XML/Decode/XHTML.pm


package MKDoc::XML::Decode::XHTML;
use warnings;
use strict;


# Portions (c) International Organization for Standardization 1986:
# Permission to copy in any form is granted for use with conforming SGML
# systems and applications as defined in ISO 8879, provided this notice is
# included in all copies.
our %ENTITY_2_CHAR = (
    
    # Latin1 characters
    'nbsp'     => chr(160),
    'iexcl'    => chr(161),
    'cent'     => chr(162),
    'pound'    => chr(163),
    'curren'   => chr(164),
    'yen'      => chr(165),
    'brvbar'   => chr(166),
    'sect'     => chr(167),
    'uml'      => chr(168),
    'copy'     => chr(169),
    'ordf'     => chr(170),
    'laquo'    => chr(171),
    'not'      => chr(172),
    'shy'      => chr(173),
    'reg'      => chr(174),
    'macr'     => chr(175),
    'deg'      => chr(176),
    'plusmn'   => chr(177),
    'sup2'     => chr(178),
    'sup3'     => chr(179),
    'acute'    => chr(180),
    'micro'    => chr(181),
    'para'     => chr(182),
    'middot'   => chr(183),
    'cedil'    => chr(184),
    'sup1'     => chr(185),
    'ordm'     => chr(186),
    'raquo'    => chr(187),
    'frac14'   => chr(188),
    'frac12'   => chr(189),
    'frac34'   => chr(190),
    'iquest'   => chr(191),
    'Agrave'   => chr(192),
    'Aacute'   => chr(193),
    'Acirc'    => chr(194),
    'Atilde'   => chr(195),
    'Auml'     => chr(196),
    'Aring'    => chr(197),
    'AElig'    => chr(198),
    'Ccedil'   => chr(199),
    'Egrave'   => chr(200),
    'Eacute'   => chr(201),
    'Ecirc'    => chr(202),
    'Euml'     => chr(203),
    'Igrave'   => chr(204),
    'Iacute'   => chr(205),
    'Icirc'    => chr(206),
    'Iuml'     => chr(207),
    'ETH'      => chr(208),
    'Ntilde'   => chr(209),
    'Ograve'   => chr(210),
    'Oacute'   => chr(211),
    'Ocirc'    => chr(212),
    'Otilde'   => chr(213),
    'Ouml'     => chr(214),
    'times'    => chr(215),
    'Oslash'   => chr(216),
    'Ugrave'   => chr(217),
    'Uacute'   => chr(218),
    'Ucirc'    => chr(219),
    'Uuml'     => chr(220),
    'Yacute'   => chr(221),
    'THORN'    => chr(222),
    'szlig'    => chr(223),
    'agrave'   => chr(224),
    'aacute'   => chr(225),
    'acirc'    => chr(226),
    'atilde'   => chr(227),
    'auml'     => chr(228),
    'aring'    => chr(229),
    'aelig'    => chr(230),
    'ccedil'   => chr(231),
    'egrave'   => chr(232),
    'eacute'   => chr(233),
    'ecirc'    => chr(234),
    'euml'     => chr(235),
    'igrave'   => chr(236),
    'iacute'   => chr(237),
    'icirc'    => chr(238),
    'iuml'     => chr(239),
    'eth'      => chr(240),
    'ntilde'   => chr(241),
    'ograve'   => chr(242),
    'oacute'   => chr(243),
    'ocirc'    => chr(244),
    'otilde'   => chr(245),
    'ouml'     => chr(246),
    'divide'   => chr(247),
    'oslash'   => chr(248),
    'ugrave'   => chr(249),
    'uacute'   => chr(250),
    'ucirc'    => chr(251),
    'uuml'     => chr(252),
    'yacute'   => chr(253),
    'thorn'    => chr(254),
    'yuml'     => chr(255),
    
    # C0 Controls and Basic Latin
    # 'quot' => chr(34),
    # 'amp' => chr(38),
    # 'apos' => chr(39),
    # 'lt' => chr(60),
    # 'gt' => chr(62),
    
    # Latin Extended-A
    'OElig'    => chr(338),
    'oelig'    => chr(339),
    'Scaron'   => chr(352),
    'scaron'   => chr(353),
    'Yuml'     => chr(376),
    
    # Spacin g Modifier Letters
    'circ'     => chr(710),
    'tilde'    => chr(732),
    
    # General Punctuation
    # * lsaquo is proposed but not yet ISO standardized
    # * rsaquo is proposed but not yet ISO standardized 
    'ensp'     => chr(8194),
    'emsp'     => chr(8195),
    'thinsp'   => chr(8201),
    'zwnj'     => chr(8204),
    'zwj'      => chr(8205),
    'lrm'      => chr(8206),
    'rlm'      => chr(8207),
    'ndash'    => chr(8211),
    'mdash'    => chr(8212),
    'lsquo'    => chr(8216),
    'rsquo'    => chr(8217),
    'sbquo'    => chr(8218),
    'ldquo'    => chr(8220),
    'rdquo'    => chr(8221),
    'bdquo'    => chr(8222),
    'dagger'   => chr(8224),
    'Dagger'   => chr(8225),
    'permil'   => chr(8240),
    'lsaquo'   => chr(8249),
    'rsaquo'   => chr(8250),
    'euro'     => chr(8364),
    
    # Mathematical, Greek and Symbolic characters for HTML
    # Latin Extended-B
    'fnof'     => chr(402),
    
    # Greek
    # * there is no Sigmaf, and no U+03A2 character either 
    'Alpha'    => chr(913),
    'Beta'     => chr(914),
    'Gamma'    => chr(915),
    'Delta'    => chr(916),
    'Epsilon'  => chr(917),
    'Zeta'     => chr(918),
    'Eta'      => chr(919),
    'Theta'    => chr(920),
    'Iota'     => chr(921),
    'Kappa'    => chr(922),
    'Lambda'   => chr(923),
    'Mu'       => chr(924),
    'Nu'       => chr(925),
    'Xi'       => chr(926),
    'Omicron'  => chr(927),
    'Pi'       => chr(928),
    'Rho'      => chr(929),
    'Sigma'    => chr(931),
    'Tau'      => chr(932),
    'Upsilon'  => chr(933),
    'Phi'      => chr(934),
    'Chi'      => chr(935),
    'Psi'      => chr(936),
    'Omega'    => chr(937),
    'alpha'    => chr(945),
    'beta'     => chr(946),
    'gamma'    => chr(947),
    'delta'    => chr(948),
    'epsilon'  => chr(949),
    'zeta'     => chr(950),
    'eta'      => chr(951),
    'theta'    => chr(952),
    'iota'     => chr(953),
    'kappa'    => chr(954),
    'lambda'   => chr(955),
    'mu'       => chr(956),
    'nu'       => chr(957),
    'xi'       => chr(958),
    'omicron'  => chr(959),
    'pi'       => chr(960),
    'rho'      => chr(961),
    'sigmaf'   => chr(962),
    'sigma'    => chr(963),
    'tau'      => chr(964),
    'upsilon'  => chr(965),
    'phi'      => chr(966),
    'chi'      => chr(967),
    'psi'      => chr(968),
    'omega'    => chr(969),
    'thetasym' => chr(977),
    'upsih'    => chr(978),
    'piv'      => chr(982),
    
    # General Punctuation
    # * bullet is NOT the same as bullet operator, U+2219
    'bull'     => chr(8226),
    'hellip'   => chr(8230),
    'prime'    => chr(8242),
    'Prime'    => chr(8243),
    'oline'    => chr(8254),
    'frasl'    => chr(8260),
    
    # Letterlike Symbols
    # * alef symbol is NOT the same as hebrew letter alef, U+05D0 although the same glyph could be used to depict both characters
    'weierp'   => chr(8472),
    'image'    => chr(8465),
    'real'     => chr(8476),
    'trade'    => chr(8482),
    'alefsym'  => chr(8501),
    
    # Arrows
    # * Unicode does not say that lArr is the same as the 'is implied by' arrow but also
    # does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests
    # * Unicode does not say rArr is the 'implies' character but does not have another
    # character with this function so ? rArr can be used for 'implies' as ISOtech suggests
    'larr'     => chr(8592),
    'uarr'     => chr(8593),
    'rarr'     => chr(8594),
    'darr'     => chr(8595),
    'harr'     => chr(8596),
    'crarr'    => chr(8629),
    'lArr'     => chr(8656),
    'uArr'     => chr(8657),
    'rArr'     => chr(8658),
    'dArr'     => chr(8659),
    'hArr'     => chr(8660),
    
    # Mathematical Operators
    # * should there be a more memorable name than 'ni'?
    # * prod is NOT the same character as U+03A0 'greek capital letter pi' though the same glyph might be used for both
    # * sum is NOT the same character as U+03A3 'greek capital letter sigma' though the same glyph might be used for both
    # * sim: tilde operator is NOT the same character as the tilde, U+007E, although the same glyph might be used to represent both
    # * note that nsup, 'not a superset of, U+2283' is not covered by the Symbol font encoding and is not included.
    # Should it be, for symmetry? It is in ISOamsn
    # * sdot: dot operator is NOT the same character as U+00B7 middle dot 
    'forall'   => chr(8704),
    'part'     => chr(8706),
    'exist'    => chr(8707),
    'empty'    => chr(8709),
    'nabla'    => chr(8711),
    'isin'     => chr(8712),
    'notin'    => chr(8713),
    'ni'       => chr(8715),
    'prod'     => chr(8719),
    'sum'      => chr(8721),
    'minus'    => chr(8722),
    'lowast'   => chr(8727),
    'radic'    => chr(8730),
    'prop'     => chr(8733),
    'infin'    => chr(8734),
    'ang'      => chr(8736),
    'and'      => chr(8743),
    'or'       => chr(8744),
    'cap'      => chr(8745),
    'cup'      => chr(8746),
    'int'      => chr(8747),
    'there4'   => chr(8756),
    'sim'      => chr(8764),
    'cong'     => chr(8773),
    'asymp'    => chr(8776),
    'ne'       => chr(8800),
    'equiv'    => chr(8801),
    'le'       => chr(8804),
    'ge'       => chr(8805),
    'sub'      => chr(8834),
    'sup'      => chr(8835),
    'nsub'     => chr(8836),
    'sube'     => chr(8838),
    'supe'     => chr(8839),
    'oplus'    => chr(8853),
    'otimes'   => chr(8855),
    'perp'     => chr(8869),
    'sdot'     => chr(8901),
    
    # Miscellaneous Technical
    # * lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark'
    # * rang is NOT the same character as U+003E 'greater than' or U+203A 'single right-pointing angle quotation mark' 
    'lceil'    => chr(8968),
    'rceil'    => chr(8969),
    'lfloor'   => chr(8970),
    'rfloor'   => chr(8971),
    'lang'     => chr(9001),
    'rang'     => chr(9002),
    
    # Geometric Shapes
    'loz'      => chr(9674),
    
    # Miscellaneous Symbols
    # * black here seems to mean filled as opposed to hollow 
    'spades'   => chr(9824),
    'clubs'    => chr(9827),
    'hearts'   => chr(9829),
    'diams'    => chr(9830),
   );


sub process
{
    (@_ == 2) or warn "MKDoc::XML::Encode::process() should be called with two arguments";
    my $class = shift;
    my $stuff = shift;
    return $ENTITY_2_CHAR{$stuff};
}


1;