/usr/local/CPAN/Bio-Das/Bio/Das/Request/Features.pm
package Bio::Das::Request::Features;
# $Id: Features.pm,v 1.16 2010/06/16 21:28:41 lstein Exp $
# this module issues and parses the types command, with arguments -dsn, -segment, -categories, -enumerate
use strict;
use Bio::Das::Type;
use Bio::Das::Feature;
use Bio::Das::Segment;
use Bio::Das::Request;
use Bio::Das::Util 'rearrange';
use vars '@ISA';
@ISA = 'Bio::Das::Request';
sub new {
my $pack = shift;
my ($dsn,$segments,$types,$categories,$feature_id,$group_id,$das,$fcallback,$scallback)
= rearrange([
['dsn','dsns'],
['segment','segments'],
['type','types'],
['category','categories'],
'feature_id',
'group_id',
'das',
['callback','feature_callback'],
'segment_callback',
],@_);
my $self = $pack->SUPER::new(
-dsn => $dsn,
-callback => $fcallback,
-args => {
segment => $segments,
category => $categories,
type => $types,
feature_id => $feature_id,
group_id => $group_id,
}
);
$self->{segment_callback} = $scallback if $scallback;
$self->das($das) if defined $das;
$self;
}
sub command { 'features' }
sub das {
my $self = shift;
my $d = $self->{das};
$self->{das} = shift if @_;
$d;
}
sub segment_callback { shift->{segment_callback} }
sub t_DASGFF {
my $self = shift;
my $attrs = shift;
if ($attrs) {
$self->clear_results;
}
delete $self->{tmp};
}
sub t_GFF {
# nothing to do here -- probably should check version
}
sub t_SEGMENT {
my $self = shift;
my $attrs = shift;
if ($attrs) { # segment section is starting
$self->{tmp}{current_segment} = Bio::Das::Segment->new($attrs->{id},$attrs->{start},
$attrs->{stop},$attrs->{version},
$self->das,$self->dsn
);
$self->{tmp}{current_feature} = undef;
$self->{tmp}{features} = [];
}
else { # reached the end of the segment, so push result
$self->finish_segment();
}
}
sub finish_segment {
my $self = shift;
$self->infer_parents_from_groups($self->{tmp}{features});
my $features = $self->build_object_hierarchy($self->{tmp}{features});
if ($self->segment_callback) {
eval {$self->segment_callback->($self->{tmp}{current_segment}=>$features)};
warn $@ if $@;
} else {
$self->add_object($self->{tmp}{current_segment},$features);
}
delete $self->{tmp}{current_segment};
delete $self->{tmp}{features};
}
# for features that have a <group> but no parent or parts,
# create inferred parents
sub infer_parents_from_groups {
my $self = shift;
my $f = shift;
my (%inferred_parents,%group_types);
for my $feature (@$f) {
my $group = $feature->group or next;
next if $feature->parent_id;
next if $feature->child_ids > 0;
$group = "group_$group"; # avoid collisions
unless ($inferred_parents{$group}) {
my $p = $inferred_parents{$group} = Bio::Das::Feature->new(
-segment => $feature->segment,
-id => $group,
-start => $feature->start,
-stop => $feature->stop
);
$p->orientation($feature->orientation);
$p->category('group');
my $gt = $feature->group_type || $feature->type;
my $type = $group_types{$gt}
||= Bio::Das::Type->new($gt,$gt,'group');
$p->type($type);
$p->link($feature->link);
$p->label($feature->label);
}
my $p = $inferred_parents{$group};
$p->start($feature->start) if $feature->start < $p->start;
$p->stop($feature->stop) if $feature->stop > $p->stop;
$feature->parent_id($group);
$p->add_child_id($feature->id);
}
push @$f,values %inferred_parents;
}
# this builds up hierarchical objects using their parent/child relationships
sub build_object_hierarchy {
my $self = shift;
my $f = shift;
my %id_to_feature = map {$_->id => $_} @$f;
my @top_level;
for my $feature (@$f) {
my $parent_id = $feature->parent_id;
if (defined $parent_id
&& (my $parent = $id_to_feature{$parent_id})) {
$parent->add_subfeature($feature);
} else {
push @top_level,$feature;
}
}
return \@top_level;
}
sub cleanup {
my $self = shift;
# this fixes a problem in the UCSC server
$self->finish_segment if $self->{tmp}{current_segment};
}
sub add_object {
my $self = shift;
push @{$self->{results}},@_;
}
# do nothing
sub t_UNKNOWNSEGMENT { }
sub t_ERRORSEGMENT { }
sub t_FEATURE {
my $self = shift;
my $attrs = shift;
if ($attrs) { # start of tag
my $feature = $self->{tmp}{current_feature} = Bio::Das::Feature->new($self->{tmp}{current_segment},
$attrs->{id}
);
$feature->label($attrs->{label}) if exists $attrs->{label};
$self->{tmp}{type} = undef;
}
else {
# feature is ending. This would be the place to do group aggregation
my $feature = $self->{tmp}{current_feature};
my $cft = $feature->type;
if (!$cft->complete) {
# fix up broken das servers that don't set a method
# the id and method will be set to the same value
$cft->id($cft->method) if $cft->method && !$cft->id;
$cft->method($cft->id) if $cft->id && !$cft->method;
}
if (my $callback = $self->callback) {
$callback->($feature);
} else {
push @{$self->{tmp}{features}},$feature;
}
}
}
sub t_TYPE {
my $self = shift;
my $attrs = shift;
my $feature = $self->{tmp}{current_feature} or return;
my $cft = $self->{tmp}{type} ||= Bio::Das::Type->new();
if ($attrs) { # tag starts
$cft->id($attrs->{id});
$cft->category($attrs->{category}) if $attrs->{category};
$cft->reference(1) if $attrs->{reference} && $attrs->{reference} eq 'yes';
$cft->has_subparts(1) if $attrs->{subparts} && $attrs->{subparts} eq 'yes';
$cft->has_superparts(1) if $attrs->{superparts} && $attrs->{superparts} eq 'yes';
} else {
# possibly add a label
if (my $label = $self->char_data) {
$cft->label($label);
}
my $type = $self->_cache_types($cft);
$feature->type($type);
}
}
sub t_METHOD {
my $self = shift;
my $attrs = shift;
my $feature = $self->{tmp}{current_feature} or return;
my $cft = $self->{tmp}{type} ||= Bio::Das::Type->new();
if ($attrs) { # tag starts
$cft->method($attrs->{id});
}
else { # tag ends
# possibly add a label
if (my $label = $self->char_data) {
$cft->method_label($label);
}
if ($cft->complete) {
my $type = $self->_cache_types($cft);
$feature->type($type);
}
}
}
sub t_PARENT {
my $self = shift;
my $attrs = shift;
my $feature = $self->{tmp}{current_feature} or return;
$feature->parent_id($attrs->{id}) if $attrs;
}
sub t_PART {
my $self = shift;
my $attrs = shift;
my $feature = $self->{tmp}{current_feature} or return;
$feature->add_child_id($attrs->{id}) if $attrs;
}
sub t_START {
my $self = shift;
my $attrs = shift;
my $feature = $self->{tmp}{current_feature} or return;
$feature->start($self->char_data) unless $attrs;
}
sub t_END {
my $self = shift;
my $attrs = shift;
my $feature = $self->{tmp}{current_feature} or return;
$feature->stop($self->char_data) unless $attrs;
}
sub t_SCORE {
my $self = shift;
my $attrs = shift;
my $feature = $self->{tmp}{current_feature} or return;
$feature->score($self->char_data) unless $attrs;
}
sub t_ORIENTATION {
my $self = shift;
my $attrs = shift;
my $feature = $self->{tmp}{current_feature} or return;
$feature->orientation($self->char_data) unless $attrs;
}
sub t_PHASE {
my $self = shift;
my $attrs = shift;
my $feature = $self->{tmp}{current_feature} or return;
$feature->phase($self->char_data) unless $attrs;
}
sub t_GROUP {
my $self = shift;
my $attrs = shift;
my $feature = $self->{tmp}{current_feature} or return;
if($attrs) {
$feature->group_label( $attrs->{label} );
$feature->group_type( $attrs->{type} );
$feature->group( $attrs->{id} );
}
}
sub t_LINK {
my $self = shift;
my $attrs = shift;
my $feature = $self->{tmp}{current_feature} or return;
if($attrs) {
$feature->link( $attrs->{href} );
} else {
$feature->link_label( $self->char_data );
}
}
sub t_NOTE {
my $self = shift;
my $attrs = shift;
my $feature = $self->{tmp}{current_feature} or return;
if ($attrs) {
$self->{tmp}{note_tag} = $attrs->{tag} if exists $attrs->{tag};
} else {
$feature->add_note($self->{tmp}{note_tag},$self->char_data);
}
}
sub t_TARGET {
my $self = shift;
my $attrs = shift;
my $feature = $self->{tmp}{current_feature} or return;
if($attrs){
$feature->target($attrs->{id},$attrs->{start},$attrs->{stop});
} else {
$feature->target_label($self->char_data());
}
}
sub _cache_types {
my $self = shift;
my $type = shift;
my $key = $type->_key;
return $self->{cached_types}{$key} ||= $type;
}
# override for segmentation behavior
sub results {
my $self = shift;
my %r = $self->SUPER::results or return;
# in array context, return the list of types
return map { @{$_} } values %r if wantarray;
# otherwise return ref to a hash
return \%r;
}
1;