XML::RSS::Parser - A liberal object-oriented parser for RSS


XML-RSS-Parser documentation Contained in the XML-RSS-Parser distribution.

Index


Code Index:


XML-RSS-Parser documentation Contained in the XML-RSS-Parser distribution.

package XML::RSS::Parser;
use strict;

use XML::Elemental;
use base qw( Class::ErrorHandler );

use vars qw( $VERSION );
$VERSION = 4.0;

my %xpath_prefix = (
    admin      => "http://webns.net/mvcb/",
    ag         => "http://purl.org/rss/1.0/modules/aggregation/",
    annotate   => "http://purl.org/rss/1.0/modules/annotate/",
    atom       => "http://www.w3.org/2005/Atom",
    audio      => "http://media.tangent.org/rss/1.0/",
    cc         => "http://web.resource.org/cc/",
    company    => "http://purl.org/rss/1.0/modules/company",
    content    => "http://purl.org/rss/1.0/modules/content/",
    cp         => "http://my.theinfo.org/changed/1.0/rss/",
    dc         => "http://purl.org/dc/elements/1.1/",
    dcterms    => "http://purl.org/dc/terms/",
    email      => "http://purl.org/rss/1.0/modules/email/",
    ev         => "http://purl.org/rss/1.0/modules/event/",
    feedburner => "http://rssnamespace.org/feedburner/ext/1.0",
    foaf       => "http://xmlns.com/foaf/0.1/",
    image      => "http://purl.org/rss/1.0/modules/image/",
    itunes     => "http://www.itunes.com/DTDs/Podcast-1.0.dtd",
    l          => "http://purl.org/rss/1.0/modules/link/",
    openSearch => "http://a9.com/-/spec/opensearchrss/1.0/",
    rdf        => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
    rdfs       => "http://www.w3.org/2000/01/rdf-schema#",
    'ref'      => "http://purl.org/rss/1.0/modules/reference/",
    reqv       => "http://purl.org/rss/1.0/modules/richequiv/",
    rss091     => "http://purl.org/rss/1.0/modules/rss091#",
    search     => "http://purl.org/rss/1.0/modules/search/",
    slash      => "http://purl.org/rss/1.0/modules/slash/",
    ss         => "http://purl.org/rss/1.0/modules/servicestatus/",
    str        => "http://hacks.benhammersley.com/rss/streaming/",
    'sub'      => "http://purl.org/rss/1.0/modules/subscription/",
    sy         => "http://purl.org/rss/1.0/modules/syndication/",
    tapi       => "http://api.technorati.com/dtd/tapi-001.xml#",
    taxo       => "http://purl.org/rss/1.0/modules/taxonomy/",
    thr        => "http://purl.org/rss/1.0/modules/threading/",
    trackback  => "http://madskills.com/public/xml/rss/module/trackback/",
    wiki       => "http://purl.org/rss/1.0/modules/wiki/",
    xhtml      => "http://www.w3.org/1999/xhtml",
    xml        => "http://www.w3.org/XML/1998/namespace/",

    creativeCommons => "http://backend.userland.com/creativeCommonsRssModule"
);
my %xpath_ns = reverse %xpath_prefix;

sub new {
    my $class = shift;
    my $self = bless {}, $class;
    my $params = {
                  Document   => 'XML::RSS::Parser::Feed',
                  Element    => 'XML::RSS::Parser::Element',
                  Characters => 'XML::RSS::Parser::Characters'
    };
    $self->{__parser} = XML::Elemental->parser($params);
    $self;
}

sub register_ns_prefix {
    my ($this, $prefix, $ns) = @_;
    $xpath_prefix{$prefix} = $ns;
    $xpath_ns{$ns}         = $prefix;
}

sub parse        { _parse('parse',        @_); }
sub parse_file   { _parse('parse_file',   @_); }
sub parse_string { _parse('parse_string', @_); }
sub parse_uri    { _parse('parse_uri',    @_); }

sub _parse {
    my $meth = shift;
    my $e    = shift;
    my $doc;
    eval { $doc = $e->{__parser}->$meth(@_) };
    return $e->error($@) if ($@);
    $e->rss_normalize($doc);
}

#--- utils

sub prefix { $xpath_ns{$_[1]} }
sub namespace { $xpath_prefix{$_[1]} }

sub ns_qualify {
    my ($this, $name, $ns) = @_;
    $ns ||= '';
    "{$ns}$name";
}

# Since different RSS formats have slightly different tag hierarchies
# we make some alternations after processing so bring them all into
# line.
sub rss_normalize {
    my $self         = shift;
    my $doc          = shift;
    my $ns           = $doc->find_rss_namespace;
    my $channel_name = "{$ns}channel";
    my $root         = $doc->contents->[0];
    my @new_contents;
    my $channel;
    foreach (@{$root->contents}) {
        if ($_->can('name') && ($_->name eq $channel_name)) {
            $_->parent($doc);
            $channel = $_;
            $doc->contents([$_]);
        } else {
            push(@new_contents, $_);
        }
    }
    map { $_->parent($channel) } @new_contents;
    $channel->contents([@{$channel->contents}, @new_contents]);
    $root->parent(undef);
    $root->contents(undef);
    $doc;
}

1;

__END__