| GBrowse documentation | Contained in the GBrowse distribution. |
Given a detail segment, return the whole seq_id that contains it
Given a detail segment and the current settings, return the region centered on the segment. The whole segment can be passed if desired -- this will avoid a redundant lookup.
| GBrowse documentation | Contained in the GBrowse distribution. |
package Bio::Graphics::Browser2::Region; # provide method for fetching and manipulating the current # region or regions. use strict; use warnings; use Bio::Graphics::Browser2::Shellwords; use constant DEBUG=>0; sub new { my $self = shift; my $args = shift; my ($source,$db,$state,$searchopts) = @{$args}{'source','db','state','searchopts'}; $searchopts ||= 'default'; return bless { source => $source, db => $db, state => $state, searchopts => $self->parse_searchopts($searchopts), },ref($self) || $self; } sub source { shift->{source} } sub state { shift->{state} } sub db { shift->{db} } sub searchopts { shift->{searchopts} } sub parse_searchopts { my $self = shift; my $optstring = shift; my @default = qw(exact wildcard stem fulltext heuristic); my %all = map {$_=>1} qw(exact wildcard stem fulltext heuristic autocomplete); my %opts; my @tokens = split /[\s,]+/,lc $optstring; @tokens = ('default') unless @tokens; for my $t (@tokens) { my ($sign,$token) = $t =~ /([+-]?)(\w+)/; if ($token eq 'all') { $opts{$_}++ foreach keys %all; next; } if ($token eq 'none') { %opts = (); next; } if ($token eq 'default') { $opts{$_}++ foreach @default; next; } next unless $all{$token}; $opts{$token}++ if $sign eq '+' or !$sign; delete $opts{$token} if $sign eq '-'; } return \%opts; } sub feature_count { my $self = shift; my $features = $self->features; return 0 unless $features; return scalar @$features; } # get/set list of features that we are working on sub features { my $self = shift; my $d = $self->{features}; $self->{features} = shift if @_; return $d; } # lazy retrieval of current segment(s) -- may transform features if needed sub segments { my $self = shift; $self->{segments} ||= $self->features2segments($self->features) if $self->features; return $self->{segments} || []; } # lazy retrieval of first segment sub seg { my $self = shift; unless (exists $self->{segment}) { my $segments = $self->segments; $self->{segment} = $segments->[0] if @$segments; if (my $seg = $self->{segment}) { my $state = $self->state; $state->{ref} = $seg->seq_id; $state->{start} = $seg->start; $state->{stop} = $seg->end; } } return $self->{segment}; } # lazy retrieval of first whole segment sub whole_seg { my $self = shift; unless (exists $self->{whole_segment}) { my $whole_seg = $self->get_whole_segment($self->seg); $self->{whole_segment} = $whole_seg; my $state = $self->state; $state->{seg_min} = $whole_seg->start; $state->{seg_max} = $whole_seg->end; } return $self->{whole_segment}; } sub set_features_by_region { my $self = shift; my ($ref,$start,$stop) = @_; my $divider = $self->source->unit_divider; my $features = $self->lookup_features($ref,$start/$divider,$stop/$divider); $self->features($features); return $features; } # For backward compatibility, you can call search_features() either # with a scalar string, in which case it will be treated as a search # string to be parsed and search (for example "chrI:1000.2000") or # call with a hashref containing the arguments to be passed to # the db adaptor's features() method. sub search_features { my $self = shift; my $args = shift; my $db = $self->db; my $state = $self->state; $args ||= { }; unless (%$args) { return unless $state->{name}; $args->{-search_term} = $state->{name}; } warn "SEARCHING FOR ",join ' ',%$args," in $db" if DEBUG; my $features = $self->search_db($args); warn "FOUND @$features " if $features && DEBUG; $self->features($features); return $features; } sub features2segments { my $self = shift; my $features = shift; my $refclass = $self->source->global_setting('reference class') || ''; my $db = $self->db; my %seenit; my @segments = map { my $version = $_->can('version') ? $_->version : undef; $db->segment(-class => $refclass, -seq_id => $_->seq_id, -name => $_->seq_id, # to avoid breakage due to sloppy API -start => $_->start||0, -end => $_->end ||0, -stop => $_->end ||0, # to avoid breakage due to sloppy API -absolute => 1, defined $version ? (-version => $version) : ()) } grep { !$seenit{$_->seq_id,$_->start,$_->end}++} @$features; return \@segments; } sub get_whole_segment { my $self = shift; my $segment = shift; my $factory = $self->source->open_database(); # the segment class has been deprecated, but we still must support it my $class = eval {$segment->seq_id->class} || eval{$factory->refclass}; my ($whole_segment) = $factory->segment(-class=>$class, -name=>$segment->seq_id); $whole_segment ||= $segment; # just paranoia $whole_segment; } sub search_db { my $self = shift; my $args = shift; my ($features); if (my $name = $args->{-search_term}) { $name =~ tr/a-zA-Z0-9|.'"_*?: ;+-\/\#\[\]//cd; # remove rude/naughty characters my ($ref,$start,$stop,$class,$id) = $self->parse_feature_name($name); $features = $self->lookup_features($ref,$start,$stop,$class,$name,$id); } elsif ($args->{-name} && $args->{-name}=~/^id:(.+)/) { $features = $self->lookup_features(undef,undef,undef,undef,undef,$1); } else { my @features = $self->db->features(%$args); $features = \@features; } return wantarray ? @$features : $features; } sub lookup_features { my $self = shift; my ($name,$start,$stop,$class,$literal_name,$id) = @_; my $source = $self->source; my $refclass = $source->global_setting('reference class') || 'Sequence'; my $db = $self->db; my $divisor = $source->unit_divider; $start *= $divisor if defined $start; $stop *= $divisor if defined $stop; # automatic classes to try my @classes = $class ? ($class) : (split /\s+/,$source->global_setting('automatic classes')||''); if (defined $id && $db->can('get_feature_by_id')) { # this overrides everything else my $f = $db->get_feature_by_id($id); return $f ? [$f] : []; } my $features = []; my $searchopts = $self->searchopts; SEARCHING: { warn "searchopts = ",join ',',%$searchopts if DEBUG; unless (%$searchopts) { warn "segment(-name => $name,-start=>$start,-end=>$stop)" if DEBUG; my @f = $db->segment(-name => $name,-start=>$start,-end=>$stop); $features = \@f; last SEARCHING; } for my $n ([$name,$start,$stop], [$literal_name,undef,undef]) { my ($name_to_try,$start_to_try,$stop_to_try) = @$n; $name_to_try =~ s/([*?])/\\$1/g unless $searchopts->{wildcard}; if ($searchopts->{exact}) { # first try the non-heuristic search for my $class ($class,$refclass,@classes) { $features = $self->_feature_get($db, $name_to_try,$class, $start_to_try,$stop_to_try); last SEARCHING if @$features; } } # heuristic fetch. Try various abbreviations and wildcards my @sloppy_names = (); if ($searchopts->{heuristic}) { my $seqid_prefix = $source->seqid_prefix; if ($name_to_try =~ /^([\dIVXA-F]+)$/) { my %seenit; my $id = $1; foreach (qw(CHROMOSOME_ Chr chr),$seqid_prefix) { next unless $_; next if $seenit{$_}++; my $n = "${_}${id}"; push @sloppy_names,$n; } } # try to remove the chr CHROMOSOME_I if ((my $chr = $name_to_try) =~ s/^(chromosome_?|chr)//i) { push @sloppy_names,$chr; } if ($seqid_prefix && (my $chr = $name_to_try) =~ s/^$seqid_prefix//) { push @sloppy_names,$chr; } } if ($searchopts->{stem}) { push @sloppy_names,"$name_to_try*" if length $name_to_try >= 3 and $name_to_try !~ /\*$/; } for my $n (@sloppy_names) { for my $c ('',@classes) { $features = $self->_feature_get($db,$n,$c,$start_to_try,$stop_to_try); last SEARCHING if @$features; } } } } if (!@$features && $searchopts->{fulltext}) { warn "try a keyword search for $literal_name" if DEBUG; $features = $self->_feature_keyword_search($literal_name); } if ($class) { my $regex = quotemeta($class); my @f; foreach (@$features) { my $c = eval {$_->class}; push @f,$_ if ($c && $c =~ /^$regex/i) or $_->primary_tag =~ /^$regex/i; } $features = \@f; } return $features; } sub _feature_get { my $self = shift; my ($db,$name,$class,$start,$stop) = @_; my $refclass = $self->source->global_setting('reference class') || 'Sequence'; my @argv = (-name => $name); push @argv,(-class => $class) if defined $class; push @argv,(-start => $start) if defined $start; push @argv,(-end => $stop) if defined $stop; my @features; @features = grep {$_->length} $db->get_feature_by_name(@argv) #misnomer -- should be get_features_by_name! if !defined($start) && !defined($stop); warn "get_feature_by_name(@argv) => @features" if DEBUG; @features = grep {$_->length} $db->get_features_by_alias(@argv) if !@features && !defined($start) && !defined($stop) && $db->can('get_features_by_alias'); warn "get_features_by_alias(@argv) => @features" if DEBUG; @features = grep {$_->length} $db->segment(@argv) if !@features && $name !~ /[*?]/; return [] unless @features; warn "segment => @features" if DEBUG; # Deal with multiple hits. Winnow down to just those that # were mentioned in the config file. $class ||= ''; # to get rid of uninit variable warnings my $types = $self->source->_all_types($db); my @filtered = grep { my $type = $_->primary_tag; my $method = eval {$_->method} || ''; my $fclass = eval {$_->class} || ''; $type eq 'Segment' # ugly stuff accomodates loss of "class" concept in GFF3 || $type eq 'region' || $types->{$type} || $types->{$method} || !$fclass || $fclass eq $refclass || $fclass eq $class; } @features; # remove duplicate features -- same name, source, method and position warn "before duplicate removal features = @features" if DEBUG; my %seenit; @features = grep { my $key = eval{$_->gff_string} || join (':',$_->type,$_->seq_id,$_->start,$_->end); !$seenit{$key}++; } @features; warn "after duplicate removal features = @features" if DEBUG; return \@features; } sub _feature_keyword_search { my $self = shift; my $searchterm = shift; my $source = $self->source; # if they wanted something specific, don't give them non-specific results. return if $searchterm =~ /^[\w._-]+:/; # Need to untaint the searchterm. We are very lenient about # what is accepted here because we wil be quote-metaing it later. $searchterm =~ /([\w .,~!@\#$%^&*()-+=<>?:;\/]+)/; $searchterm = $1; my $db = $self->db; my $max_keywords = $source->global_setting('max keyword results'); my @matches; if ($db->can('search_attributes')) { my @attribute_names = shellwords ($source->global_setting('search attributes')||''); @attribute_names = ('Note') unless @attribute_names; @matches = $db->search_attributes($searchterm,\@attribute_names,$max_keywords); } elsif ($db->can('search_notes')) { @matches = $db->search_notes($searchterm,$max_keywords); } my @results; for my $r (@matches) { my ($name,$description,$score,$type,$id) = @$r; my ($seg) = $db->segment($name) or next; push @results,Bio::Graphics::Feature->new(-name => $name, -class => eval{$name->class} || undef, -desc => $description, -score => $score, -ref => $seg->abs_ref, -start => $seg->abs_start, -end => $seg->abs_end, -type => $type || 'feature', -primary_id => $id || undef, -factory=> $db); } return \@results; } sub parse_feature_name { my $self = shift; my $name = shift; if ($name =~ /^id:(.+)/) { return (undef,undef,undef,undef,$1); } my ($class,$ref,$start,$stop); if (my @a = $self->is_chromosome_region($name)) { ($ref,$start,$stop) = @a; } elsif ($name =~ /^(\w+):([^:]+)$/) { $class = $1; $ref = $2; } else { $ref = $name; } return ($ref,$start,$stop,$class); } sub is_chromosome_region { my $self = shift; my $name = shift; if ( ($name !~ /\.\./ and $name =~ /([\w._\/-]+):\s*(-?[-e\d.]+)\s*,\s*(-?[-e\d.]+)\s*$/) or $name =~ /([\w._\/-]+):\s*(-?[-e\d,.]+?)\s*(?:-|\.\.)\s*(-?[-e\d,.]+)\s*$/) { my $ref = $1; my $start = $2; my $stop = $3; $start =~ s/,//g; # get rid of commas $stop =~ s/,//g; return ($ref,$start,$stop); } return; }
sub whole_segment { my $self = shift; my $segment = shift || $self->seg; return unless $segment; my $db = $segment->factory; my $class = eval {$segment->seq_id->class} || eval{$db->refclass} || 'Sequence'; my ($whole) = $db->segment(-class=>$class, -name=>$segment->seq_id); return $whole; }
sub region_segment { my $self = shift; my $segment = shift || $self->seg; my $settings = shift; my $whole = shift; $whole ||= $self->whole_segment($segment) or return; my $regionview_length = $settings->{region_size}||0; my $detail_start = $segment->start; my $detail_end = $segment->end; my $whole_start = $whole->start; my $whole_end = $whole->end; # region can't be smaller than detail if ($detail_end - $detail_start + 1 > $regionview_length) { $regionview_length = 3*($detail_end - $detail_start + 1); } my $midpoint = ($detail_end + $detail_start) / 2; my $regionview_start = int($midpoint - $regionview_length/2 + 1); my $regionview_end = int($midpoint + $regionview_length/2); if ($regionview_length > $whole->length) { $regionview_length = $whole->length; } if ($regionview_start < $whole_start) { $regionview_start = 1; $regionview_end = $regionview_length; } if ($regionview_end > $whole_end) { $regionview_start = $whole_end - $regionview_length + 1; $regionview_end = $whole_end; } my $db = eval {$segment->factory}; my $class = eval {$segment->seq_id->class} || eval{$db->refclass}; my ($region_segment) = $db ? $db->segment(-class => $class, -name => $segment->seq_id, -seq_id=>$segment->seq_id, -start => $regionview_start, -end => $regionview_end) :Bio::Graphics::Feature->new(-name => $segment->seq_id, -seq_id => $segment->seq_id, -start => $regionview_start, -end => $regionview_end); return $region_segment; } 1;