| Bio-Prospect documentation | Contained in the Bio-Prospect distribution. |
Bio::Prospect::Align -- Package for overlaying multiple Prospect alignments
$Id: Align.pm,v 1.15 2003/11/18 19:45:45 rkh Exp $
use Bio::Prospect::Options;
use Bio::Prospect::LocalClient;
use Bio::Prospect::Align;
use Bio::SeqIO;
my $in = new Bio::SeqIO( -format=> 'Fasta', '-file' => $ARGV[0] );
my $po = new Bio::Prospect::Options( seq=>1, svm=>1, global_local=>1,
templates=>[qw(1bgc 1alu 1rcb 1eera)] );
my $pf = new Bio::Prospect::LocalClient( {options=>$po} );
while ( my $s = $in->next_seq() ) {
my @threads = $pf->thread( $s );
my $pa = new Bio::Prospect::Align( -debug=>0,-threads => \@threads );
print $pa->getAlignment(-format=>'html');
}
Bio::Prospect::Align represents an alignment of one or more Prospect structural alignments.
Name: new()
Purpose: return Bio::Prospect::Align object
Arguments:
-threads => [ Bio::Prospect::Thread objects ],
Returns: Bio::Prospect::Align object
Name: get_alignment()
Purpose: return the sequence alignment of the Bio::Prospect::Thread objects
Arguments:
-format => one of: 'clustalw', 'bl2seq','clustalw',
'emboss','fasta','mase','mega','meme','msf','nexus',
'pfam','phylip','prodom','psi','selex','stockholm'
(default: clustalw)
-show_ss => output secondard structure (default: off)
-show_seq => output target sequence (default: on)
Returns: scalar containing the alignment
Name: get_threads() Purpose: return the Bio::Prospect::Thread object list associated with this object Arguments: none Returns: list of Bio::Prospect::Thread objects
The following functions are documented for developers' benefit. THESE SHOULD NOT BE CALLED OUTSIDE OF THIS MODULE. YOU'VE BEEN WARNED.
Name: _align()
Purpose: private method that does the alignment work - called by new().
Builds a clustalw alignment internally. use get_alignment() to
retrieve the alignment in other formats.
Arguments:
-show_ss => 0 | 1 output secondard structure (default: off)
-show_seq => 0 | 1 output target sequence (default: on)
Returns: nothing
@@banner@@
| Bio-Prospect documentation | Contained in the Bio-Prospect distribution. |
# $Id: Align.pm,v 1.15 2003/11/18 19:45:45 rkh Exp $ # @@banner@@
package Bio::Prospect::Align; use vars qw( $VERSION ); $VERSION = sprintf( "%d.%02d", q$Revision: 1.15 $ =~ /(\d+)\.(\d+)/ ); use strict; use fields qw( debug threads alignment ); use Carp qw(cluck); use IO::Scalar; use Bio::AlignIO; use Bio::Prospect::Init;
#------------------------------------------------------------------------------- # new() #-------------------------------------------------------------------------------
sub new { my $self = fields::new(shift); # parse the arguments into a parameter hash my %param = @_; @param{ map { lc $_ } keys %param } = values %param; # lowercase keys # -threads is a required argument, return undef # if not supplied if ( $param{'-threads'} ) { $self->{'threads'} = $param{'-threads'}; } else { return undef; } if (not defined $ENV{MVIEW_APP}) { $ENV{MVIEW_APP} = $Bio::Prospect::Init::MVIEW_APP; if (not -x $ENV{MVIEW_APP}) { throw Bio::Prospect::Exception ( "MVIEW_APP not set", "MVIEW_APP isn't set and $ENV{MVIEW_APP} doesn't exist", "MVIEW_APP can be set in the Bio::Prospect::Init class or as an environment variable" ); } } return $self; } #------------------------------------------------------------------------------- # get_alignment() #-------------------------------------------------------------------------------
sub get_alignment { my $self = shift; my $retval = ''; # parse the arguments into a parameter hash my %param = @_; @param{ map { lc $_ } keys %param } = values %param; # lowercase keys # the show_ss and show_seq flags are only valid for html and clustalw output # (Bio::AlignIO.pm has some issues when I try them) if ( $param{'-show_ss'} && ( $param{'-format'} !~ m/clustalw|html/i ) ) { print STDERR "Bio::Prospect::Align.pm WARNING: get_alignment() show_ss flag is only valid for " . "-format => clustalw | html - ignoring\n"; $param{ '-show_ss' } = 0; } if ( !$param{'-show_seq'} && ( $param{'-format'} !~ m/clustalw|html/i ) ) { print STDERR "Bio::Prospect::Align.pm WARNING: get_alignment() show_seq flag is only valid for " . "-format => clustalw | html - ignoring\n"; $param{ '-show_seq' } = 1; } # # get the clustalw alignment if we have not already done so. # if ( ! defined $self->{'alignment'} ) { $self->_align( %param ); } # # default is clustalw because the alignment is internally stored # in clustalw format. utilize Bio::SimpleAln object for other # format tyoes if ( ! defined $param{'-format'} || $param{'-format'} eq 'clustalw' ) { $retval = $self->{'alignment'}; } elsif ( $param{'-format'} =~ m/html/i ) { my @args; #push(@args,'-css on'); # sigh... won't work to embed in another html push(@args,'-html head'); push(@args,'-ruler on -width 60'); push(@args,'-coloring consensus -threshold 80 -consensus on -con_coloring any'); $retval = `echo \"$self->{'alignment'}\" | $Bio::Prospect::Init::MVIEW_APP -in clustalw -alncolor '#FFFFFF' @args`; } elsif ( $param{'-format'} =~ m/bl2seq|clustalw|emboss|fasta|mase|mega|meme|msf|nexus|pfam|phylip|prodom|psi|selex|stockholm/i ) { my $in_fh = new IO::Scalar; my $out_fh = new IO::Scalar; $in_fh->open(\$self->{'alignment'}); $out_fh->open(\$retval); my $in = Bio::AlignIO->new(-fh => $in_fh , '-format' => 'clustalw'); my $out = Bio::AlignIO->new(-fh => $out_fh, '-format' => $param{'-format'} ); while ( my $aln = $in->next_aln() ) { $out->write_aln($aln); } $in_fh->close(); $out_fh->close(); } else { die( "Bio::Prospect::Align.pm ERROR: get_alignment() format ($param{'-format'}) " . "not recognized" ); } # [rkh] strip the surrounding table tags $retval =~ s%</PRE>\n<TABLE BORDER=0.+<TR><TD>\n<PRE>%\n%; $retval =~ s%\n</TD></TR></TABLE>\n%%; $retval =~ s%^%<!-- BEGIN Bio::Prospect::Align output -->\n%; $retval =~ s%$%\n<!-- END Bio::Prospect::Align output -->\n%; $retval =~ s/^ consensus\/[198].+\n//gm; return( $retval ); } #------------------------------------------------------------------------------- # get_threads() #-------------------------------------------------------------------------------
sub get_threads { my $self = shift; return( @{$self->{'threads'}}); } #------------------------------------------------------------------------------- # DEPRECATED METHODS - generally replaced by other methods, will be removed in # subsequent releases. #------------------------------------------------------------------------------- sub getAlignment { my $self = shift; cluck("getAlignment is deprecated on Oct-22-2003: use get_alignment instead\n"); return( $self->get_alignment(@_)); } sub getThreads { my $self = shift; cluck("getThreads is deprecated on Oct-22-2003: use get_threads instead\n"); return( $self->get_threads ); } #------------------------------------------------------------------------------- # INTERNAL METHODS: not intended for use outside this module #-------------------------------------------------------------------------------
#------------------------------------------------------------------------------- # _align() #-------------------------------------------------------------------------------
sub _align { my $self = shift; # # parse the arguments into a parameter hash # my %param = @_; @param{ map { lc $_ } keys %param } = values %param; # lowercase keys my @threads = @{$self->{'threads'}}; # # define defaults # $param{ '-show_ss' } = 0 if ! defined $param{ '-show_ss' }; $param{ '-show_seq' } = 1 if ! defined $param{ '-show_seq' }; # alignment algorithm: # 1. the ungapped query sequence is the universal coordinate system # 2. gaps inserted into the query sequence as a result of # an alignment to a template, must be reflected in the # alignments of the templates my %query_gap; # store the number of gaps inserted by a given alignment and residue number my %max_gap; # store the largest gap in all the alignments prior to each residue my @template; my @ss; # store an ungapped alignment my @ungapped_query = grep ! /-/, split '', $threads[0]->qseq_align(); # iterate through each alignment and count the gaps inserted prior # to each residue and which template alignment inserted that gap. my $res_num = 0; for (my $i=0;$i<=$#threads;$i++) { # store target sequence $template[$i] = [ split '', $threads[$i]->tseq_align() ]; # store the secondary structure information (this is based on the # gapped template sequence) $ss[$i] = [ split '', $threads[$i]->tss() ]; $res_num = 0; my @query = split //,$threads[$i]->qseq_align(); for( my $j=0; $j<=$#query; $j++ ) { if ( $query[$j] eq '-' ) { # found gap $query_gap{$res_num}{$i}++; } else { # if no gap, then increment the residue number $res_num++; } } } # sanity check on query_res_count using the ungapped query aligment # build consensus query sequence by applying the maximium number of gaps # prior to each residue my $consensus_str = ''; for (my $res_num=0; $res_num<=$#ungapped_query; $res_num++) { if ( defined $query_gap{$res_num} ) { foreach my $t ( sort { $query_gap{$res_num}{$b} <=> $query_gap{$res_num}{$a} } keys %{$query_gap{$res_num}} ) { $max_gap{$res_num} = $query_gap{$res_num}{$t}; $consensus_str .= ( '-'x$max_gap{$res_num} ); last; } } $consensus_str .= $ungapped_query[$res_num]; } # Iterate through each template alignment. Fix the template alignments by # inserting the difference between the maximium number of gap inserts for the # corresponding query residue and the gap inserts for this template alignment. for (my $i=0;$i<=$#threads;$i++) { my $res_num = 0; my $gaps_inserted=0; my @query = split //,$threads[$i]->qseq_align(); for( my $j=0; $j<=$#query; $j++ ) { if ( $query[$j] ne '-' ) { my $gap_length = ( defined $query_gap{$res_num}{$i} ) ? $max_gap{$res_num} - $query_gap{$res_num}{$i} : $max_gap{$res_num}; if ( defined $gap_length && $gap_length > 0 ) { # account for the fact that we have already added some gaps into template and ss my $ins_pos = $j+$gaps_inserted; print STDERR "insert $gap_length at the $res_num th residue into the $i sequence at the $ins_pos th position\n" if $ENV{'DEBUG'}; print STDERR "template: " . join('',@{$template[$i]}),"\n" if $ENV{'DEBUG'}; for ( my $k=0; $k<$gap_length; $k++ ) { splice(@{$ss[$i]}, $ins_pos, 0, '-' ); splice(@{$template[$i]}, $ins_pos, 0, '-' ); $gaps_inserted++; } print STDERR "template: " . join('',@{$template[$i]}),"\n" if $ENV{'DEBUG'}; } $res_num++; } } } my @consensus = split //,$consensus_str; print STDERR "consensus: " . join('',@consensus) . "\n" if $ENV{'DEBUG'}; # sanity check for(my $i=0;$i<=$#template;$i++) { if ( scalar(@{$template[$i]} != $#consensus+1 )) { warn("Bio::Prospect::Align.pm ERROR: template length(" . scalar(@{$template[$i]}) .") != query length (" . ($#consensus+1) . ")\n"); } } # build clustalw alignment my $offset = 60; my $align = "CLUSTAL W(1.81) multiple sequence alignment\n\n\n"; for (my $start=0; $start<=$#consensus; $start+=($offset)) { my $end = ( $start + $offset - 1 ) < $#consensus ? $start + $offset - 1: $#consensus; $align .= sprintf("%-22s %s\n","QUERY",join('',@consensus[$start..$end])); for (my $i=0; $i<=$#template; $i++) { $align .= sprintf("%-22s %s\n",$threads[$i]->tname(),join('',@{$template[$i]}[$start..$end])) if $param{'-show_seq'}; $align .= sprintf("%-22s %s\n",'#ss-' . $threads[$i]->tname(),join('',@{$ss[$i]}[$start..$end])) if $param{'-show_ss'}; } $align .= "\n"; } $self->{'alignment'} = $align; return; } 1;