/usr/local/CPAN/BBCode-Parser/BBCode/Util.pm
# $Id: Util.pm 284 2006-12-01 07:51:49Z chronos $
package BBCode::Util;
use base qw(Exporter);
use Carp qw(croak);
use HTML::Entities ();
use POSIX ();
use URI ();
use strict;
use warnings;
our $VERSION = '0.34';
our @EXPORT;
our @EXPORT_OK;
our %EXPORT_TAGS;
sub _export {
my $sym = shift;
$sym =~ s/^(?=\w)/&/;
unshift @_, 'ALL';
while(@_) {
my $tag = shift;
$EXPORT_TAGS{$tag} = [] unless exists $EXPORT_TAGS{$tag};
push @{$EXPORT_TAGS{$tag}}, $sym;
}
}
BEGIN { _export qw(pkgFilename pkg) }
sub pkgFilename($) {
if($_[0] =~ /^((?:\w+::)*\w+)$/) {
local $_ = $1;
s#::#/#g;
s/$/.pm/;
return $_;
}
return undef;
}
my %userTags = (
'BODY' => 'BBCode::Body',
);
BEGIN { _export qw(tagUserDefined tag) }
sub tagUserDefined($) {
my $pkg = shift;
my $file = pkgFilename($pkg);
croak qq(Invalid package name "$pkg") unless defined $file;
require $file;
my $obj = bless {}, $pkg;
croak qq(Package "$pkg" does not inherit from BBCode::Tag) unless UNIVERSAL::isa($obj,'BBCode::Tag');
$userTags{uc($obj->Tag)} = $pkg;
}
BEGIN { _export qw(tagLoadPackage tag) }
sub tagLoadPackage($) {
my($tag,$pkg);
croak qq(Invalid tag name "$_[0]") unless $_[0] =~ m#^/?(_?\w+)$#;
$tag = uc($1);
if(exists $userTags{$tag}) {
$pkg = $userTags{$tag};
} else {
$tag =~ s/^_/x/;
$pkg = "BBCode::Tag::$tag";
}
my $file = pkgFilename($pkg);
require $file;
return $pkg;
}
BEGIN { _export qw(tagExists tag) }
sub tagExists($) {
my $tag = shift;
return 1 if eval {
tagLoadPackage($tag);
1;
};
return 0;
}
BEGIN { _export qw(tagCanonical tag) }
sub tagCanonical($) {
local $_ = shift;
if(ref $_) {
return $_->Tag if UNIVERSAL::isa($_,'BBCode::Tag');
croak qq(Invalid reference);
} else {
return uc($1) if /^(:\w+)$/;
my $pkg = tagLoadPackage($_);
return $pkg->Tag;
}
}
BEGIN { _export qw(tagObject tag) }
sub tagObject($) {
my $tag = shift;
if(ref $tag) {
return $tag if UNIVERSAL::isa($tag,'BBCode::Tag');
croak qq(Invalid reference);
} else {
my $pkg = tagLoadPackage($tag);
return bless {}, $pkg;
}
}
BEGIN { _export qw(tagHierarchy tag) }
sub tagHierarchy($) {
my $tag = tagCanonical(shift);
return $tag if $tag =~ /^:/;
my $pkg = tagLoadPackage($tag);
return ($pkg->Tag, map { ":$_" } ($pkg->Class, 'ALL'));
}
BEGIN { _export qw(quoteQ quote) }
sub quoteQ($) {
local $_ = $_[0];
s/([\\'])/\\$1/g;
return qq('$_');
}
BEGIN { _export qw(quoteQQ quote) }
sub quoteQQ($) {
local $_ = $_[0];
s/([\\"])/\\$1/g;
return qq("$_");
}
BEGIN { _export qw(quoteBS quote) }
sub quoteBS($) {
local $_ = $_[0];
s/([\\\[\]"'=,\s\n])/\\$1/g;
return $_;
}
BEGIN { _export qw(quoteRaw quote) }
sub quoteRaw($) {
local $_ = $_[0];
return undef if /[\\\[\]"'=,\s\n]/;
return $_;
}
BEGIN { _export qw(quote quote) }
sub quote($) {
my @q = sort {
(length($a) <=> length($b)) or ($a cmp $b)
} grep {
defined $_
} (quoteQ $_[0], quoteQQ $_[0], quoteBS $_[0], quoteRaw $_[0]);
return $q[0];
}
BEGIN { _export qw(encodeHTML encode); }
sub encodeHTML($) {
local $_ = $_[0];
if(defined $_) {
# Basic HTML/XML escapes
s/&/&/g;
s/</</g;
s/>/>/g;
s/"/"/g;
# ' is XML-only
s/'/'/g;
}
return $_;
}
BEGIN { _export qw(decodeHTML encode); }
sub decodeHTML($) {
return HTML::Entities::decode($_[0]);
}
BEGIN { _export qw(parseBool parse) }
sub parseBool($) {
local $_ = $_[0];
return undef if not defined $_;
return $_->as_bool() if ref $_ and UNIVERSAL::can($_, 'as_bool');
return 1 if /^(?:
1 |
T | TR | TRU | TRUE |
Y | YE | YES |
ON
)$/ix;
return 0 if /^(?:
0 |
F | FA | FAL | FALS | FALSE |
N | NO |
OFF
)$/ix;
return $_ ? 1 : 0;
}
BEGIN { _export qw(parseInt parse) }
sub parseInt($) {
my $num = shift;
return undef if not defined $num;
$num =~ s/[\s,_]+//g;
$num =~ s/^\+//;
return 0 if $num =~ /^-?$/;
return 0+$1 if $num =~ /^ ( -? \d+ ) $/x;
return undef;
}
BEGIN { _export qw(parseNum parse) }
sub parseNum($);
sub parseNum($) {
my $num = shift;
return undef if not defined $num;
$num =~ s/[\s,_]+//g;
if($num =~ /^ (.*) e (.*) $/ix) {
my($m,$e) = ($1,$2);
$m = parseNum $m;
$e = parseNum $e;
return $m * (10 ** $e) if defined $m and defined $e;
return undef;
}
if($num =~ /^ ([^.]*) \. ([^.]*) $/x) {
my($i,$f) = ($1,$2);
$i = parseInt $i;
return undef unless defined $i;
return undef unless $f =~ /^(\d*)$/;
$num = "$i.$f";
$num =~ s/\.$//;
return 0+$num;
}
return parseInt($num);
}
BEGIN { _export qw(parseEntity parse) }
sub parseEntity($);
sub parseEntity($) {
local $_ = $_[0];
return undef unless defined $_;
s/^&(.*);$/$1/;
s/^#([xob])/0$1/i;
s/^#//;
s/^U\+/0x/;
my $ch;
if(/^ 0x ([0-9A-F]+) $/xi) {
$ch = hex($1);
} elsif(/^ 0o ([0-7]+) $/xi) {
$ch = oct($1);
} elsif(/^ 0b ([01]+) $/xi) {
my $b = ("\0" x 4) . pack("B*", $1);
$ch = unpack "N", substr($b, -4);
} elsif(/^ 0 ([0-7]{3}) $/x) {
$ch = oct($1);
} elsif(/^ (\d+) $/x) {
$ch = 0+$1;
}
return sprintf "#x%X", $ch if defined $ch;
my $decoded = HTML::Entities::decode("&$_;");
return undef if $decoded eq "&$_;";
return $_;
}
BEGIN { _export qw(parseListType parse) }
my %listtype = (
'*' => [ qw(ul) ],
'1' => [ qw(ol decimal) ],
'01' => [ qw(ol decimal-leading-zero) ],
'A' => [ qw(ol upper-latin) ],
'a' => [ qw(ol lower-latin) ],
'I' => [ qw(ol upper-roman) ],
'i' => [ qw(ol lower-roman) ],
"\x{3B1}" => [ qw(ol lower-greek) ],
"\x{5D0}" => [ qw(ol hebrew) ],
"\x{3042}" => [ qw(ol hiragana) ],
"\x{3044}" => [ qw(ol hiragana-iroha) ],
"\x{30A2}" => [ qw(ol katakana) ],
"\x{30A4}" => [ qw(ol katakana-iroha) ],
);
sub parseListType($) {
local $_ = $_[0];
my @ret;
if(defined $_) {
if(/^(disc|circle|square|none)$/i) {
@ret = ('ul', lc $1);
} elsif(/^(
decimal(?:-leading-zero)? |
(?:upper|lower)-(?:roman|latin|alpha) |
lower-greek |
hebrew |
georgian |
armenian |
cjk-ideographic |
(?:hiragana|katakana)(?:-iroha)?
)$/ix) {
@ret = ('ol', lc $1);
} elsif(exists $listtype{$_}) {
@ret = @{$listtype{$_}};
}
}
return @ret;
}
# Conversion factors from CSS units to points
my %conv = (
# Integer conversions within English units
pt => 1,
pc => 12,
in => 72,
# Floating-point conversions from Metric units
mm => 72/25.4,
cm => 72/2.54,
# Somewhat approximate, but the CSS standard is actually rather
# picky about how many pixels a 'pixel' is at different resolutions,
# so this is actually relatively reliable.
px => 0.75,
);
# Emulation of <font size="num">...</font> from HTML 3.2
# See <URL:http://www.w3.org/TR/CSS21/fonts.html#font-size-props>
# Tweaked slightly to be more logical
my @compat = qw(xx-small x-small small medium large x-large xx-large 300%);
BEGIN { _export qw(parseFontSize parse) }
sub parseFontSize($;$$$);
sub parseFontSize($;$$$) {
local $_ = shift;
return undef unless defined $_;
my($base,$lo,$hi) = @_;
$base = 12 if not defined $base;
$lo = 8 if not defined $lo;
$hi = 72 if not defined $hi;
s/\s+/ /g;
s/^\s|\s$//g;
# CSS 2.1 15.7 <absolute-size>
if(/^( (?:xx?-)? (?:large|small) | medium )$/ix) {
return lc $1;
}
# CSS 2.1 15.7 <relative-size>
# Note: Since [FONT] is nestable and not readily computable before HTML
# rendering, this can allow a malicious user to escape the
# admin-defined font size limits
if(/^ ( larger | smaller ) $/ix) {
return lc $1;
}
# CSS 2.1 4.3.2 <length>
if(/^ ( [\s\d._+-]+ ) ( [a-z]+ ) $/ix) {
my($n,$unit) = ($1,lc $2);
$n = parseNum $n;
if(defined $n and $n > 0) {
my $conv;
if(exists $conv{$unit}) {
$conv = $conv{$unit};
} elsif($unit =~ /^em$/i) {
$conv = $base;
} elsif($unit =~ /^ex$/i) {
$conv = $base * 0.5;
} else {
return undef;
}
my $n2 = $n * $conv;
if(defined $lo and $n2 < $lo) {
$n = $lo / $conv;
} elsif(defined $hi and $n2 > $hi) {
$n = $hi / $conv;
}
$n = sprintf "%.3f", $n;
$n =~ s/0+$//;
$n =~ s/\.$//;
return "$n$unit";
} else {
return undef;
}
}
# CSS 2.1 4.3.3 <percentage>
# Note: The same concerns apply as for <relative-size>
if(/^ ( [\s\d._+-]+ ) % $/x) {
my $n = parseNum $1;
if(defined $n and $n > 0) {
$n *= 0.01;
my $n2 = $n * $base;
if(defined $lo and $n2 < $lo) {
$n = $lo / $base;
} elsif(defined $hi and $n2 > $hi) {
$n = $hi / $base;
}
$n *= 100;
$n = sprintf "%.3f", $n;
$n =~ s/0+$//;
$n =~ s/\.$//;
return "$n%";
} else {
return undef;
}
}
# HTML 3.2 <font size="number">
# See <URL:http://www.w3.org/TR/REC-html32#font>
if(/^ (\d+) $/x) {
my $n = 0+$1;
if($n >= 0 and $n < @compat) {
return $compat[$n];
} else {
return parseFontSize("$n pt",$base,$lo,$hi);
}
}
# HTML 3.2 <font size="+number">
if(/^ \+ (\d+) $/x) {
# "+1" is roughly equivalent to CSS 2.1 "larger"
my $n = sprintf "%f%%", 100 * (1.25 ** $1);
return parseFontSize($n,$base,$lo,$hi);
}
# HTML 3.2 <font size="-number">
if(/^ - (\d+) $/x) {
# "-1" is roughly equivalent to CSS 2.1 "smaller"
my $n = sprintf "%f%%", 100 * (0.85 ** $1);
return parseFontSize($n,$base,$lo,$hi);
}
return undef;
}
# Official CSS 2.1 colors are passed through as-is
my %cssColor = map { $_ => 1 } qw(
maroon red orange yellow olive
purple fuchsia white lime green
navy blue aqua teal
black silver gray
);
# Other named colors must map to an official named color or an #RRGGBB color
my %extraColor = (
darkred => 'maroon',
darkblue => 'navy',
);
BEGIN { _export qw(parseColor parse) }
sub parseColor($) {
local $_ = $_[0];
return undef unless defined $_;
s/\s+//g;
$_ = lc $_;
return $1 if /^(\w+)$/ and exists $cssColor{$1};
return $extraColor{$_} if exists $extraColor{$_};
if(s/^#//) {
s/^ ( [0-9a-f]{1,2} ) $/$1$1$1/x;
s/^ ([0-9a-f]) \1 ([0-9a-f]) \2 ([0-9a-f]) \3 $/$1$2$3/x;
return "#$_" if /^ [0-9a-f]{3} $/x;
return "#$_" if /^ [0-9a-f]{6} $/x;
} else {
return $1 if /^( rgb \( (?: \d+ , ){2} \d+ \) )$/x;
return $1 if /^( rgba\( (?: \d+ , ){3} \d+ \) )$/x;
return $1 if /^( rgb \( (?: \d+% , ){2} \d+% \) )$/x;
return $1 if /^( rgba\( (?: \d+% , ){3} \d+% \) )$/x;
}
return undef;
}
sub _url_parse_opaque($) {
local $_ = $_[0];
my @ret = (undef) x 3;
$ret[2] = $1 if s/(#.*)$//;
$ret[0] = lc $1 if s/^([\w+-]+)://;
$ret[1] = $_;
return @ret if wantarray;
return \@ret;
}
sub _url_parse_query($) {
local $_ = $_[0];
my @ret = (undef) x 2;
$ret[1] = $1 if s/(\?.*)$//;
$ret[0] = $_;
return @ret if wantarray;
return \@ret;
}
sub _url_parse_path($) {
local $_ = $_[0];
my @ret = (undef) x 2;
if(s#^//##) {
$ret[0] = $1 if s#^([^/]+)##;
s#^$#/#;
$ret[1] = $_;
} elsif(m#^/#) {
$ret[1] = $_;
} else {
return () if wantarray;
return undef;
}
return @ret if wantarray;
return \@ret;
}
sub _url_parse_server($) {
local $_ = $_[0];
my($userpass,$hostport);
if(/^ ([^@]*) \@ ([^@]*) $/x) {
($userpass,$hostport) = ($1,$2);
} else {
$hostport = $_;
}
my @ret = (undef) x 4;
$_ = $userpass;
if(defined $_) {
if(/^ ([^:]*) : ([^:]*) $/x) {
@ret[0,1] = ($1,$2);
} else {
$ret[0] = $_;
}
}
$_ = $hostport;
if(s/:(\d+)$//) {
$ret[3] = $1;
} elsif(s/:([\w+-]+)$//) {
$ret[3] = getservbyname($1,'tcp');
goto Failure if not defined $ret[3];
} else {
s/:$//;
}
s/\.*$/./;
if(/^ ( (?: [\w-]+ \. )+ ) $/x) {
$ret[2] = $1;
$ret[2] =~ s/\.$//;
}
goto Failure if not defined $ret[2];
return @ret if wantarray;
return \@ret;
Failure:
return () if wantarray;
return undef;
}
my %urltype = (
'http' => 3,
'https' => 3,
'ftp' => 3,
'file' => 2,
'mailto' => 1,
'data' => 0,
'javascript' => 0,
);
sub _url_parse($$) {
my($str,$schemes) = @_;
my($scheme,$opaque,$fragment) = _url_parse_opaque($str);
return undef unless defined $scheme;
return undef unless exists $urltype{$scheme};
if($urltype{$scheme} > 0) {
my($rest,$query) = _url_parse_query($opaque);
if($urltype{$scheme} > 1) {
my($auth,$path) = _url_parse_path($rest);
return undef unless defined $path;
if($urltype{$scheme} > 2) {
return undef unless defined $auth;
my($user,$pass,$host,$port) = _url_parse_server($auth);
return undef unless defined $host;
$auth = '';
if(defined $user) {
$auth .= $user;
$auth .= ':'.$pass if defined $pass;
$auth .= '@';
}
$auth .= $host;
$auth .= ':'.$port if defined $port;
}
$rest = join '', map { defined $_ ? $_ : '' } ('//',$auth,$path);
}
$opaque = join '', map { defined $_ ? $_ : '' } ($rest,$query);
}
$str = $scheme.':'.$opaque.(defined $fragment ? $fragment : '');
my $url = URI->new_abs($str, 'http://sanity.check.example.com/')->canonical;
return undef unless defined $url->scheme;
return undef unless exists $$schemes{$url->scheme};
return undef if $url->as_string =~ /\bsanity\.check\.example\.com\b/i;
return undef if $url->can('userinfo') and defined $url->userinfo;
return undef if $url->can('host') and not defined $url->host;
if($url->scheme eq 'mailto') {
my %unsafe = $url->headers;
my %safe;
foreach my $key (keys %unsafe) {
if($key =~ /^(?:to|cc|bcc)$/i) {
my @to = split /,/, $unsafe{$key};
$key = lc $key;
foreach(@to) {
if(/^ ( [\w.+-]+ \@ (?: \w[\w-]*(?<=\w) \. )+ [a-z]{2,6} ) $/xi) {
if(exists $safe{$key}) {
$safe{$key} .= ",$1";
} else {
$safe{$key} = $1;
}
}
}
next;
}
if($key =~ /^subject$/i) {
if($unsafe{$key} =~ /^ ( [\x20-\x7E]+ ) $/x) {
$safe{subject} = $1;
}
next;
}
}
return undef unless exists $safe{to};
$url->headers(%safe);
}
return $url;
}
BEGIN { _export qw(parseURL parse) }
my %schemes = map { $_ => 1 } qw(http https ftp mailto data);
sub parseURL($) {
foreach('%', 'http://%', 'mailto:%') {
my $str = $_;
$str =~ s/%/$_[0]/g;
my $url = _url_parse($str, \%schemes);
return $url if defined $url;
}
return undef;
}
BEGIN { _export qw(parseMailURL parse) }
my %mail_schemes = (mailto => 1);
sub parseMailURL($) {
foreach('%', 'mailto:%') {
my $str = $_;
$str =~ s/%/$_[0]/g;
my $url = _url_parse($str, \%mail_schemes);
return $url if defined $url;
}
return undef;
}
BEGIN { _export qw(multilineText text) }
sub multilineText {
if(defined wantarray) {
my $str = join "", @_;
return $str unless wantarray;
return split /(?<=\n)/, $str;
}
}
BEGIN { _export qw(textURL text) }
sub textURL($) {
my $url = shift;
$url = parseURL($url) if not ref $url;
return undef if not defined $url;
if($url->scheme eq 'mailto') {
return $url->to;
}
if($url->scheme eq 'http' or $url->scheme eq 'https') {
if(not defined $url->query or $url->query eq '') {
if($url->path eq '' or $url->path eq '/') {
return $url->host;
}
return $url->host.$url->path;
}
}
if($url->scheme eq 'ftp') {
return $url->path.' on '.$url->host.' (FTP)';
}
if($url->scheme eq 'data') {
my $m = $url->media_type;
if(defined $m) {
$m =~ s/;.*$//;
return "Inline data ($m)";
}
return "Inline data";
}
return $url->as_string;
}
BEGIN { _export qw(textALT text) }
sub textALT($) {
my $url = shift;
$url = parseURL($url) if not ref $url;
return undef if not defined $url;
if($url->scheme eq 'data') {
return "[Inline data]";
}
my $path = $url->path;
$path =~ s#^.*/##;
return "[$path]";
}
sub _b10_len($) {
my $n = shift;
if($n > 0) {
return 1+POSIX::floor(log($n)/log(10));
}
if($n < 0) {
return 2+POSIX::floor(log(-$n)/log(10));
}
return 1;
}
sub _max {
my $max;
while(@_) {
my $val = shift;
$max = $val if defined $val and (not defined $max or $val > $max);
}
return $max;
}
BEGIN { _export qw(createListSequence) }
sub createListSequence($;$$) {
my($type,$start,$total) = @_;
my @list = parseListType($type);
$start = 1 unless defined $start;
if(@list and $list[0] eq 'ol') {
my $type = (@list > 1) ? $list[1] : 'decimal';
if(0) {
# Disabled until the generators can be split into separate packages
if($type =~ /^(upper|lower)-(alpha|latin|roman|greek)$/i) {
my $func = 'textOrder'.ucfirst(lc($2));
my $uc = $1 =~ /^upper$/i;
$func =~ s/Latin$/Alpha/;
{
no strict 'refs';
$func = \&{$func};
}
if($uc) {
return sub { $func->($start++).'.' };
} else {
return sub { lc $func->($start++).'.' };
}
}
if($type =~ /^(hiragana|katakana)(?:-(iroha))?$/i) {
my $func = 'textOrder'.ucfirst(lc($1)).(defined $2 ? uc($2) : '');
{
no strict 'refs';
$func = \&{$func};
}
return sub { $func->($start++).'.' };
}
if($type =~ /^cjk-ideographic$/i) {
return sub { textOrderCJK($start++).'.' };
}
if($type =~ /^hebrew$/i) {
return sub { textOrderHebrew($start++).'.' };
}
if($type =~ /^georgian$/i) {
return sub { textOrderGeorgian($start++).'.' };
}
if($type =~ /^armenian$/i) {
return sub { textOrderArmenian($start++).'.' };
}
}
if($type =~ /^decimal-leading-zero$/i) {
if(defined $total) {
my $end = $total + $start - 1;
my $len = _max 3, 1+_b10_len(abs $start), 1+_b10_len(abs $end);
my $fmt = sprintf '%% 0%dd.', $len;
return sub { sprintf($fmt,$start++) };
} else {
return sub { sprintf("% 03d.", $start++) };
}
}
if(defined $total) {
my $end = $total + $start - 1;
my $len = _max _b10_len $start, _b10_len $end;
my $fmt = sprintf '%%%dd.', $len;
return sub { sprintf($fmt,$start++) };
} else {
return sub { sprintf("%d.",$start++) };
}
}
return sub { '*' };
}
BEGIN {
push @EXPORT_OK, @{$EXPORT_TAGS{ALL}} if exists $EXPORT_TAGS{ALL};
push @EXPORT, @{$EXPORT_TAGS{DEFAULT}} if exists $EXPORT_TAGS{DEFAULT};
}
1;