Bio::Phylo::Parsers::Tolweb - Parser used by Bio::Phylo::IO, no serviceable parts inside


Bio-Phylo documentation Contained in the Bio-Phylo distribution.

Index


Code Index:

NAME

Top

Bio::Phylo::Parsers::Tolweb - Parser used by Bio::Phylo::IO, no serviceable parts inside

DESCRIPTION

Top

This module parses Tree of Life data. It is called by the Bio::Phylo::IO facade, don't call it directly. In addition to parsing from files, handles or strings (which are specified by the -file, -handle and -string arguments) this parser can also parse xml directly from a url (-url => $tolweb_output), provided you have LWP installed.

SEE ALSO

Top

Bio::Phylo::IO

The ToL web parser is called by the Bio::Phylo::IO object. Look there to learn how to parse Tree of Life data (or any other data Bio::Phylo supports).

Bio::Phylo::Manual

Also see the manual: Bio::Phylo::Manual and http://rutgervos.blogspot.com.

http://tolweb.org

For more information about the Tree of Life xml format, visit http://tolweb.org/tree/home.pages/downloadtree.html

CITATION

Top

If you use Bio::Phylo in published research, please cite it:

Rutger A Vos, Jason Caravas, Klaas Hartmann, Mark A Jensen and Chase Miller, 2011. Bio::Phylo - phyloinformatic analysis using Perl. BMC Bioinformatics 12:63. http://dx.doi.org/10.1186/1471-2105-12-63

REVISION

Top

 $Id: Tolweb.pm 1660 2011-04-02 18:29:40Z rvos $


Bio-Phylo documentation Contained in the Bio-Phylo distribution.
package Bio::Phylo::Parsers::Tolweb;
use strict;
use base 'Bio::Phylo::Parsers::Abstract';
use Bio::Phylo::Util::Exceptions 'throw';
use Bio::Phylo::Util::CONSTANT qw'looks_like_instance :namespaces';
use Bio::Phylo::Util::Dependency 'XML::Twig';

# podinherit_insert_token

# this is the constructor that gets called by Bio::Phylo::IO,
# here we create the object instance that will process the file/string
sub _init {
    my $self = shift;
    $self->_logger->debug("initializing $self");

    # this is the actual parser object, which needs to hold a reference
    # to the XML::Twig object and to the tree
    $self->{'_tree'}      = undef;
    $self->{'_node_of'}   = {};
    $self->{'_parent_of'} = {};

    # here we put the two together, i.e. create the actual XML::Twig object
    # with its handlers, and create a reference to it in the parser object
    $self->{'_twig'} = XML::Twig->new(
        'TwigHandlers' => { 'NODE' => sub { &_handle_node( $self, @_ ) }, } );
    return $self;
}

sub _parse {
    my $self = shift;
    $self->_init;
    $self->_logger->debug("going to parse xml");
    $self->{'_tree'} =
      $self->_factory->create_tree->insert( $self->_factory->create_node );
    $self->{'_twig'}->parse( $self->_string );
    $self->_logger->debug("done parsing xml");

    # now we build the tree structure
    my $root;
    for my $node_id ( keys %{ $self->{'_node_of'} } ) {
        if ( defined( my $parent_id = $self->{'_parent_of'}->{$node_id} ) ) {
            my $child  = $self->{'_node_of'}->{$node_id};
            my $parent = $self->{'_node_of'}->{$parent_id};
            $child->set_parent($parent);
        }
        else {
            $root = $self->{'_node_of'}->{$node_id};
        }
    }
    $root->set_parent( $self->{'_tree'}->get_root );
    $self->{'_tree'}->get_root->add_meta(
        $self->_factory->create_meta(
            '-triple' => { 'tba:id' => $root->get_generic('ANCESTORWITHPAGE') }
        )
    );
    $self->_logger->debug("done building tree");

    # we're done, now grab the tree from its field
    my $tree = $self->{'_tree'};
    return $self->_factory->create_forest->insert($tree);
}

sub _handle_node {
    my ( $self, $twig, $node_elt ) = @_;
    $self->_logger->debug("handling node $node_elt");
    my $node_obj = $self->_factory->create_node;
    my $id       = $node_elt->att('ID');
    $node_obj->set_generic( 'id' => $id );
    $self->{'_node_of'}->{$id} = $node_obj;
    if ( my $parent = $node_elt->parent->parent ) {
        $self->{'_parent_of'}->{$id} = $parent->att('ID');
        $self->_logger->debug("found parent node");
    }
    $self->{'_tree'}->insert($node_obj);
    for my $child_elt ( $node_elt->children ) {
        if ( $child_elt->tag eq 'NODES' or $child_elt->tag eq 'OTHERNAMES' ) {
            next;
        }
        elsif ( $child_elt->tag eq 'NAME' ) {
            if ( my $name = $child_elt->text ) {
                $name =~ m/[ ()]/
                  ? $node_obj->set_name( "'" . $name . "'" )
                  : $node_obj->set_name($name);
            }
        }
        elsif ( $child_elt->tag eq 'DESCRIPTION' ) {
            if ( my $desc = $child_elt->text ) {
                $node_obj->set_namespaces( 'dc' => _NS_DC_ );
                $node_obj->add_meta(
                    $self->_factory->create_meta(
                        '-triple' => { 'dc:description' => $desc }
                    )
                );
            }
        }
        elsif ( my $text = $child_elt->text ) {
            $node_obj->set_namespaces( 'tbe' => _NS_TWE_ );
            $node_obj->add_meta(
                $self->_factory->create_meta(
                    '-triple' => { 'tbe:' . lc( $child_elt->tag ) => $text }
                )
            );
        }
    }
    for my $att_name ( $node_elt->att_names ) {
        $node_obj->set_namespaces( 'tba' => _NS_TWA_ );
        if ( $att_name eq 'ANCESTORWITHPAGE' ) {
            $node_obj->set_generic(
                'ANCESTORWITHPAGE' => $node_elt->att($att_name) );
        }
        if ( defined $node_elt->att($att_name) ) {
            $node_obj->add_meta(
                $self->_factory->create_meta(
                    '-triple' =>
                      { 'tba:' . lc($att_name) => $node_elt->att($att_name) }
                )
            );
        }
    }
    $twig->purge;
}
1;