| Bio-Phylo documentation | Contained in the Bio-Phylo distribution. |
Bio::Phylo::Parsers::Phyloxml - Parser used by Bio::Phylo::IO, no serviceable parts inside
This module parses phyloxml data. It is called by the Bio::Phylo::IO facade, don't call it directly. In addition to parsing from files, handles or strings (which are specified by the -file, -handle and -string arguments) this parser can also parse xml directly from a url (-url => $phylows_output), provided you have LWP installed.
The phyloxml parser is called by the Bio::Phylo::IO object. Look there to learn how to parse phyloxml (or any other data Bio::Phylo supports).
Also see the manual: Bio::Phylo::Manual and http://rutgervos.blogspot.com.
For more information about the phyloxml data standard, visit http://www.phyloxml.org
If you use Bio::Phylo in published research, please cite it:
Rutger A Vos, Jason Caravas, Klaas Hartmann, Mark A Jensen and Chase Miller, 2011. Bio::Phylo - phyloinformatic analysis using Perl. BMC Bioinformatics 12:63. http://dx.doi.org/10.1186/1471-2105-12-63
$Id: Phyloxml.pm 1660 2011-04-02 18:29:40Z rvos $
| Bio-Phylo documentation | Contained in the Bio-Phylo distribution. |
package Bio::Phylo::Parsers::Phyloxml; use strict; use base 'Bio::Phylo::Parsers::Abstract'; use Bio::Phylo::Util::Exceptions 'throw'; use Bio::Phylo::Util::Dependency 'XML::Twig'; use Bio::Phylo::Util::CONSTANT qw'looks_like_instance'; use Bio::Phylo::NeXML::Writable; use Bio::Phylo::Factory;
# podinherit_insert_token
# The factory object, to instantiate Bio::Phylo objects #my $factory = Bio::Phylo::Factory->new; # For semantic annotations Bio::Phylo::NeXML::Writable->set_namespaces( 'px' => 'http://www.phyloxml.org/1.10/terms#' ); # I factored the logging methods in Bio::Phylo (debug, info, # warning, error, fatal) out of the inheritance tree and put # them in a separate logging object. # my $logger = Bio::Phylo::Util::Logger->new; # helper method to add parser reading position to log messages sub _pos { my $self = shift; my $t = $self->{'_twig'}; join ':', ( $t->current_line, $t->current_column, $t->current_byte ); } # nice 'n' generic: we provide an element and a class, # from the class we instantiate a new object, we set # the element id in the generic slot of the object. # If the element has a label, use that as name, # otherwise use id. Additional constructor args can # be specified using named arguments, e.g. -type => 'dna' sub _obj_from_elt { my ( $self, $elt, $class, %args ) = @_; # factory object handles instantiation (and class loading) # see Bio::Phylo::Factory my $method = "create_$class"; my $obj = $self->_factory->$method(%args); # description if ( my ($desc_elt) = $elt->children('description') ) { $obj->set_desc( $desc_elt->text ); } # id_source if ( my $id_source = $elt->att('id_source') ) { $obj->add_meta( $self->_factory->create_meta( '-triple' => { 'px:id_source' => $id_source } ) ); } # name my $tag = $elt->tag; my ($name_elt) = ( $elt->children('name') ); if ( defined $name_elt ) { my $id = $name_elt->text; $obj->set_name($id); $self->_logger->debug( $self->_pos . " processed <$tag id=\"$id\"/>" ); } else { $self->_logger->debug( $self->_pos . " processed <$tag/>" ); } return $obj; } # here we create the object instance that will process the file/string sub _init { my $self = shift; # this is the actual parser object, which needs to hold a reference # to the XML::Twig object, to a hash of processed blocks (for fast lookup by id) # and an array of ids (to preserve processing order) $self->{'_taxon_in_taxa'} = {}; $self->{'_proj'} = $self->_factory->create_project; $self->{'_blocks'} = [ $self->{'_proj'} ]; # here we put the two together, i.e. create the actual XML::Twig object # with its handlers, and create a reference to it in the parser object $self->{'_twig'} = XML::Twig->new( # These handlers are called when the subtree is fully loaded, which # means we can traverse it 'TwigHandlers' => { 'phylogeny' => sub { &_handle_phylogeny( @_, $self ) }, }, # These handlers are called when the element opens, that is the # subtree hasn't been loaded yet - but the attributes have been, # so we can read in the namespaces here. 'StartTagHandlers' => { '_all_' => sub { my ( $twig, $elt ) = @_; for my $att_name ( $elt->att_names ) { if ( $att_name =~ /^xmlns:(.+)$/ ) { my $prefix = $1; my $ns = $elt->att($att_name); Bio::Phylo::NeXML::Writable->set_namespaces( $prefix => $ns ); } } } }, ); return $self; } sub _handle_phylogeny { my ( $twig, $phylogeny_elt, $self ) = @_; my $forest; my $tree = _obj_from_elt( $self, $phylogeny_elt, 'tree' ); unless ( $forest = $self->_project->get_forests->[0] ) { $forest = $self->_factory->create_forest; $self->_project->insert($forest); } $forest->insert($tree); $tree->set_as_unrooted( $phylogeny_elt->att('rooted') ne 'true' ); for ( $phylogeny_elt->children('clade') ) { $self->_process_clade( $twig, $_, $tree ); } } sub _process_clade { my ( $self, $twig, $clade_elt, $tree, $parent ) = @_; my $node = _obj_from_elt( $self, $clade_elt, 'node' ); $node->set_parent($parent) if $parent; $tree->insert($node); # branch length $self->_process_branch_length( $clade_elt, $node ); # support values, e.g. bootstrap, posterior $self->_handle_confidence( $_, $node ) for $clade_elt->children('confidence'); # taxonomy, e.g. identifiers, GUIDs, ranks, names $self->_process_taxonomy( $_, $node ) for $clade_elt->children('taxonomy'); # events, e.g. duplications, speciations $self->_process_events( $_, $node ) for $clade_elt->children('events'); for ( $clade_elt->children('clade') ) { $self->_process_clade( $twig, $_, $tree, $node ); } } sub _process_sequence { my ( $self, $seq_elt, $node ) = @_; my ( $taxon, $taxa ) = $self->_fetch_taxon_and_taxa($node); my $matrix; unless ( $matrix = $self->get_matrices->[0] ) { $matrix = $self->_factory->create_matrix( '-taxa' => $taxa ); $self->_project->insert($matrix); } my $datum = $self->_obj_from_elt( $seq_elt, 'datum', '-taxon' => $taxon ); $matrix->insert($datum); } sub _process_branch_length { my ( $self, $clade_elt, $node ) = @_; if ( my ($bl_elt) = ( $clade_elt->children('branch_length') ) ) { $node->set_branch_length( $bl_elt->text ); } if ( my $length = $clade_elt->att('branch_length') ) { $node->set_branch_length($length); } } sub _fetch_taxon_and_taxa { my ( $self, $node ) = @_; # fetch or instantiate taxon object my ( $taxon, $taxa ); unless ( $taxon = $node->get_taxon ) { unless ( $taxa = $self->_project->get_taxa->[0] ) { $self->_project->insert( $taxa = $self->_factory->create_taxa ); $self->_project->get_forests->[0]->set_taxa($taxa); } $taxon = $self->_factory->create_taxon; $taxa->insert($taxon); $node->set_taxon($taxon); } return $taxon, $taxa; } sub _process_taxonomy { my ( $self, $taxonomy_elt, $node ) = @_; # fetch or instantiate taxon object my ($taxon) = $self->_fetch_taxon_and_taxa($node); # handle taxonomy annotations $self->_process_taxonomy_annotations( $_, $taxon ) for $taxonomy_elt->children; } sub _process_taxonomy_annotations { my ( $self, $elt, $taxon ) = @_; my ( $text, $tag ) = ( $elt->text, $elt->tag ); if ( my $provider = $elt->att('provider') ) { $taxon->add_meta( $self->_factory->create_meta( '-triple' => { "px:${tag}" => $self->_factory->create_meta( '-triple' => { "px:${provider}" => $elt->text } ) } ) ); } else { $taxon->add_meta( $self->_factory->create_meta( '-triple' => { "px:${tag}" => $text } ) ); } } sub _handle_confidence { my ( $self, $confidence_elt, $node ) = @_; $node->add_meta( $self->_factory->create_meta( '-triple' => { 'px:confidence' => $self->_factory->create_meta( '-triple' => { 'px:' . $confidence_elt->att('type') => $confidence_elt->text } ) } ) ); } sub _process_events { my ( $self, $events_elt, $node ) = @_; my @events; for ( $events_elt->children ) { push @events, $self->_factory->create_meta( '-triple' => { 'px:' . $_->tag => $_->text } ); } $node->add_meta( $self->_factory->create_meta( '-triple' => { 'px:events' => \@events } ) ); } # this method will be called by Bio::Phylo::IO, indirectly, through # _from_handle if the parse function is called with the -file => $filename # argument, or through _from_string if called with the -string => $string # argument sub _parse { my $self = shift; $self->_logger->debug("going to parse xml"); $self->_init; my %opt = @_; # XML::Twig doesn't care if we parse from a handle or a string $self->{'_twig'}->parse( $self->_string ); # we're done, now order the blocks my $ordered_blocks = $self->{'_blocks'}; # prepare the requested return... my $temp_project = shift( @{$ordered_blocks} ); return @{ $temp_project->get_taxa }, @{ $temp_project->get_forests }, @{ $temp_project->get_matrices }; } sub DESTROY { 1 } 1;