XML::RSS::Parser - A liberal object-oriented parser for RSS
package XML::RSS::Parser;
use strict;
use XML::Elemental;
use base qw( Class::ErrorHandler );
use vars qw( $VERSION );
$VERSION = 4.0;
my %xpath_prefix = (
admin => "http://webns.net/mvcb/",
ag => "http://purl.org/rss/1.0/modules/aggregation/",
annotate => "http://purl.org/rss/1.0/modules/annotate/",
atom => "http://www.w3.org/2005/Atom",
audio => "http://media.tangent.org/rss/1.0/",
cc => "http://web.resource.org/cc/",
company => "http://purl.org/rss/1.0/modules/company",
content => "http://purl.org/rss/1.0/modules/content/",
cp => "http://my.theinfo.org/changed/1.0/rss/",
dc => "http://purl.org/dc/elements/1.1/",
dcterms => "http://purl.org/dc/terms/",
email => "http://purl.org/rss/1.0/modules/email/",
ev => "http://purl.org/rss/1.0/modules/event/",
feedburner => "http://rssnamespace.org/feedburner/ext/1.0",
foaf => "http://xmlns.com/foaf/0.1/",
image => "http://purl.org/rss/1.0/modules/image/",
itunes => "http://www.itunes.com/DTDs/Podcast-1.0.dtd",
l => "http://purl.org/rss/1.0/modules/link/",
openSearch => "http://a9.com/-/spec/opensearchrss/1.0/",
rdf => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
rdfs => "http://www.w3.org/2000/01/rdf-schema#",
'ref' => "http://purl.org/rss/1.0/modules/reference/",
reqv => "http://purl.org/rss/1.0/modules/richequiv/",
rss091 => "http://purl.org/rss/1.0/modules/rss091#",
search => "http://purl.org/rss/1.0/modules/search/",
slash => "http://purl.org/rss/1.0/modules/slash/",
ss => "http://purl.org/rss/1.0/modules/servicestatus/",
str => "http://hacks.benhammersley.com/rss/streaming/",
'sub' => "http://purl.org/rss/1.0/modules/subscription/",
sy => "http://purl.org/rss/1.0/modules/syndication/",
tapi => "http://api.technorati.com/dtd/tapi-001.xml#",
taxo => "http://purl.org/rss/1.0/modules/taxonomy/",
thr => "http://purl.org/rss/1.0/modules/threading/",
trackback => "http://madskills.com/public/xml/rss/module/trackback/",
wiki => "http://purl.org/rss/1.0/modules/wiki/",
xhtml => "http://www.w3.org/1999/xhtml",
xml => "http://www.w3.org/XML/1998/namespace/",
creativeCommons => "http://backend.userland.com/creativeCommonsRssModule"
);
my %xpath_ns = reverse %xpath_prefix;
sub new {
my $class = shift;
my $self = bless {}, $class;
my $params = {
Document => 'XML::RSS::Parser::Feed',
Element => 'XML::RSS::Parser::Element',
Characters => 'XML::RSS::Parser::Characters'
};
$self->{__parser} = XML::Elemental->parser($params);
$self;
}
sub register_ns_prefix {
my ($this, $prefix, $ns) = @_;
$xpath_prefix{$prefix} = $ns;
$xpath_ns{$ns} = $prefix;
}
sub parse { _parse('parse', @_); }
sub parse_file { _parse('parse_file', @_); }
sub parse_string { _parse('parse_string', @_); }
sub parse_uri { _parse('parse_uri', @_); }
sub _parse {
my $meth = shift;
my $e = shift;
my $doc;
eval { $doc = $e->{__parser}->$meth(@_) };
return $e->error($@) if ($@);
$e->rss_normalize($doc);
}
#--- utils
sub prefix { $xpath_ns{$_[1]} }
sub namespace { $xpath_prefix{$_[1]} }
sub ns_qualify {
my ($this, $name, $ns) = @_;
$ns ||= '';
"{$ns}$name";
}
# Since different RSS formats have slightly different tag hierarchies
# we make some alternations after processing so bring them all into
# line.
sub rss_normalize {
my $self = shift;
my $doc = shift;
my $ns = $doc->find_rss_namespace;
my $channel_name = "{$ns}channel";
my $root = $doc->contents->[0];
my @new_contents;
my $channel;
foreach (@{$root->contents}) {
if ($_->can('name') && ($_->name eq $channel_name)) {
$_->parent($doc);
$channel = $_;
$doc->contents([$_]);
} else {
push(@new_contents, $_);
}
}
map { $_->parent($channel) } @new_contents;
$channel->contents([@{$channel->contents}, @new_contents]);
$root->parent(undef);
$root->contents(undef);
$doc;
}
1;
__END__