| Bio-Prospect documentation | Contained in the Bio-Prospect distribution. |
Bio::Prospect::LocalClient -- execute Prospect locally $Id: LocalClient.pm,v 1.32 2003/11/18 19:45:45 rkh Exp $
my $in = new Bio::SeqIO( -format=> 'Fasta', '-file' => $ARGV[0] );
my $po = new Bio::Prospect::Options( seq=>1, svm=>1, global_local=>1,
templates=>['1alu', '1bgc','1eera']);
my $pf = new Bio::Prospect::LocalClient( {options=>$po );
while ( my $s = $in->next_seq() ) {
my @threads = $pf->thread( $s );
}
Bio::Prospect::LocalClient is runs Prospect locally. It is intended to be used to facilitate high-throughput protein sequence threading and as the server-side component of Bio::Prospect::SoapClient, with which it is API compatible.
Name: new() Purpose: constructor Arguments: hash reference with following key/value pairs options => Bio::Prospect::Options object (required) Returns: Bio::Prospect::LocalClient
Name: thread() Purpose: return a list of Thread objects Arguments: scalar sequence or Bio::PrimarySeqI-derived object Returns: list of Bio::Prospect::Thread objects
Name: thread_summary() Purpose: return a list of ThreadSummary objects Arguments: Bio::Seq object Returns: list of rospect2::ThreadSummary objects
Name: xml() Purpose: return xml string Arguments: Bio::Seq object Returns: string
The following functions are documented for developers' benefit. THESE SHOULD NOT BE CALLED OUTSIDE OF THIS MODULE. YOU'VE BEEN WARNED.
Name: _get_svm_scores() Purpose: return a hash of svm scores from a prospect sort file Arguments: sort filename Returns: hash
Thread one sequence in the Bio::Seq object or the scalar string. The xml ouptut filename is returned. Threading results are cached by sequence for the lifetime of the LocalClient object. See also ::thread.
Threads the fasta-formatted sequence in filename which is passed
directly to prospect. The name of a temporary file which contains the raw
xml output is returned. This method will work with multiple sequences in
filename, but other routines in this module will not understand
multi-query xml output reliably. Most callers should use thread()
instead.
Name: _hasSvmScore() Purpose: check whether the prospect xml file already contains a svmScore tag Arguments: prospect xml file Returns: 1 (has svm score) or 0 (no svm score)
Name: _output_svm_score() Purpose: output the svm score in the propsect output file Arguments: prospect xml file, prospect sort file Returns: prospect xml file with svm score
Name: _sort1() Purpose: run sortProspect on threading file Arguments: prospect xml file Returns: filename of sortProspect results
Prepares temporary files based on options (e.g., writes a temporary `tfile') and generates an array of command line options in @{$self->{commandline}}. Args 1 and 2 are input and output respectively and MUST be sprintf'd before use. See thread_1_file().
Bio::Prospect::Options, Bio::Prospect::File, Bio::Prospect::Client, Bio::Prospect::SoapClient, Bio::Prospect::Thread, Bio::Prospect::ThreadSummary
http://www.bioinformaticssolutions.com/
| Bio-Prospect documentation | Contained in the Bio-Prospect distribution. |
package Bio::Prospect::LocalClient; use base Bio::Prospect::Client; use warnings; use strict; use File::Temp qw( tempfile tempdir ); use Carp qw(cluck); use IO::File; use Bio::Prospect::Exceptions; use Bio::Prospect::utilities; use Bio::Prospect::ThreadSummary; use Bio::Prospect::Init; use Digest::MD5; use vars qw( $VERSION ); $VERSION = sprintf( "%d.%02d", q$Revision: 1.32 $ =~ /(\d+)\.(\d+)/ ); #------------------------------------------------------------------------------- # new() #-------------------------------------------------------------------------------
sub new(;%) { my $self = shift->SUPER::new(@_); $self->_setenv(); $self->_prepare_options(); $self->{'xmlCacheName'} = 'xmlCache'; # name of xml file cache $self->{'sortCacheName'} = 'sortCache'; # name of sort file cache return $self; } #------------------------------------------------------------------------------- # thread() #-------------------------------------------------------------------------------
sub thread($$) { my ($self,$s) = @_; if ( not defined $s or (ref $s and not $s->isa('Bio::PrimarySeqI')) ) { throw Bio::Prospect::BadUsage( "Bio::Prospect::LocalClient::thread() requires one Bio::PrimarySeqI subclass or " . "scalar sequence argument" ); } my $seq = ref $s ? $s->seq() : $s; my $xfn = $self->_thread_to_file( $seq ); my $pf = new Bio::Prospect::File; $pf->open( "<$xfn" ) || throw Bio::Prospect::RuntimeError("$xfn: $!\n"); $self->{'threads'} = []; while( my $t = $pf->next_thread() ) { push @{$self->{'threads'}}, $t; } return( @{$self->{'threads'}} ); } #------------------------------------------------------------------------------- # thread_summary() #-------------------------------------------------------------------------------
sub thread_summary($$) { my ($self,$s) = @_; my @summary; foreach my $t ( $self->thread($s) ) { push @summary, new Bio::Prospect::ThreadSummary( $t ); } return( @summary ); } #------------------------------------------------------------------------------- # xml() #-------------------------------------------------------------------------------
sub xml($$) { my ($self,$s) = @_; my $xfn = $self->_thread_to_file( $s ); my $in = new IO::File "<$xfn" or throw Bio::Prospect::RuntimeError( "can't open $xfn for reading"); my $xml=''; while(<$in>){ $xml .= $_; } return( $xml ); } #------------------------------------------------------------------------------- # DEPRECATED METHODS - will be removed in subsequent releases. #------------------------------------------------------------------------------- sub score_summary($$) { cluck("This function is deprecated on Oct-23-2003:\n"); my ($self,$s) = @_; my $xfn = $self->thread_to_file( $s ); return Bio::Prospect::utilities::score_summary( $xfn ); } sub thread_to_file($$) { cluck("This function is deprecated on Oct-23-2003:\n"); return _thread_to_file($_[0],$_[1]); } #------------------------------------------------------------------------------- # INTERNAL METHODS: not intended for use outside this module #-------------------------------------------------------------------------------
#------------------------------------------------------------------------------- # _get_svm_scores() #-------------------------------------------------------------------------------
sub _get_svm_scores($$) { my ($self,$fn) = @_; my %retval; my $in = new IO::File $fn || throw Bio::Prospect::RuntimeError( "can't open $fn for reading" ); my @fld; while(<$in>) { next if m/^:Protein/; @fld = split /\s+/; $retval{$fld[0]} = $fld[3]; } if ( scalar (keys %retval) == 0 ) { throw Bio::Prospect::RuntimeError ( 'Sort file is empty', "The sort file for this sequence is empty. sortProspect likely failed!", "Execute sortProspect on the command-line and check output messages. sortProspect " . "can fail because of erroneous characters in the output xml file (e.g. null character)." ); } close($in); return %retval; } sub _thread_to_file($$) { my ($self,$s) = @_; my $xfn; my $seq = ref $s ? $s->seq() : $s; # check the cache for a cached file cooresponding to this sequence. # if available then return it rather than running prospect my $cached = $self->_get_cache_file( Digest::MD5::md5_hex( $seq ), $self->{'xmlCacheName'} ); if ( defined $cached and -e $cached ) { warn("retrieved cache threading info $cached\n") if $ENV{DEBUG}; return $cached; } my $ifn = $self->_write_seqfile( $seq ); $xfn = $self->_thread1( $ifn ); unlink( $ifn ); # new version of prospect outputs svm score when threading. no # longer need to run sortProspect in this case. for backwards # compatibility, check the xml file for svmScore tag. If # not present, then run sortProspect. if ( $self->_hasSvmScore( $xfn ) ) { print(STDERR "xml file ($xfn) already contains svm scores - skip sortProspect step\n") if $ENV{DEBUG}; $self->_put_cache_file( Digest::MD5::md5_hex( $seq ), $self->{'xmlCacheName'}, $xfn ); return $xfn; } else { print(STDERR "xml file ($xfn) doesn't contain svm scores - run sortProspect\n") if $ENV{DEBUG}; # run sortProspect so as to get svm score my $sfn = $self->_sort1( $xfn ); # insert svm score into the prospect output my $ffn = $self->_output_svm_score( $xfn, $sfn ); unlink( $xfn ); unlink( $sfn ); # cache the prospect output filename $self->_put_cache_file( Digest::MD5::md5_hex( $seq ), $self->{'xmlCacheName'}, $ffn ); return $ffn; }
} sub _thread1($$) { my ($self,$ifn) = @_; my $xfn = "$ifn.xml"; my @cl = @{$self->{commandline}}; $cl[1] = sprintf($cl[1],$ifn); $cl[2] = sprintf($cl[2],$xfn); print(STDERR "about to @cl\n") if $ENV{DEBUG}; if ( eval { system("@cl") } ) { my $s = $?; if ($s & 127) { $s &= 127; my $sn = Bio::Prospect::utilities::signame($s); throw Bio::Prospect::RuntimeError ( 'failed to execute Prospect', "received signal $s ($sn)" ); } $s >>= 8; throw Bio::Prospect::RuntimeError ( 'failed to execute Prospect', "system(@cl) exited with status $s", 'check your prospect installation manually' ); } my $fh = new IO::File; $fh->open("<$xfn") || throw Bio::Prospect::Exception("Prospect failed", "prospect completed but didn't create an output file"); while(<$fh>) { # ugh-prospect sometimes barfs if (m/<scoreInfo>/) { # and completes with status 0 $fh->close(); return $xfn; } # (e.g., large sequences) } throw Bio::Prospect::Exception("Prospect failed", "prospect completed but the output wasn't valid", "prospect may fail if the sequence is " ."too large or there's not enough memory. Try " ."running the sequence manually."); return undef;
} #------------------------------------------------------------------------------- # _hasSvmScore() #-------------------------------------------------------------------------------
sub _hasSvmScore { my ($self,$xmlFile) = @_; my $in = new IO::File "$xmlFile" or throw Bio::Prospect::RuntimeError("can't open $xmlFile for reading"); my $retval = 0; while(<$in>) { if ( m/svmScore/ ) { $retval = 1; last; } } $in->close(); return $retval; } #------------------------------------------------------------------------------- # _output_svm_score() #-------------------------------------------------------------------------------
sub _output_svm_score { my ($self,$xmlFile,$sortFile) = @_; my %svm = $self->_get_svm_scores( $sortFile ); my $outFile = "$xmlFile.svm"; my $in = new IO::File "$xmlFile" or throw Bio::Prospect::RuntimeError("can't open $xmlFile for reading"); my $out = new IO::File ">$outFile" or throw Bio::Prospect::RuntimeError("can't open $outFile for reading"); local $/ = '</threading>'; while(<$in>) { next if ! m/threading/; # make sure that we have valid prospect thread m#template="(\w+)"#; my $t = $1; if ( ! defined $svm{$t} or $svm{$t} eq '') { throw Bio::Prospect::RuntimeError ( 'Unable to retrieve svm sort', "no svm score for template=$t" ); } s#(<rawScore>.*?</rawScore>)#$1\n<svmScore>$svm{$t}</svmScore>#g; print $out $_; } close($in); return( $outFile ); } #------------------------------------------------------------------------------- # _sort1() #-------------------------------------------------------------------------------
sub _sort1($$) { my ($self,$xfn) = @_; my $sfn = "$xfn.sort"; my $cmd = "sortProspect $xfn 2>/dev/null 1>$sfn"; print(STDERR "about to $cmd\n") if $ENV{DEBUG}; if ( eval { system("$cmd") } ) { my $s = $?; if ($s & 127) { $s &= 127; my $sn = Bio::Prospect::utilities::signame($s); throw Bio::Prospect::RuntimeError ( 'failed to execute Prospect', "received signal $s ($sn)" ); } $s >>= 8; throw Bio::Prospect::RuntimeError ( 'failed to execute Prospect', "system($cmd) exited with status $s", 'check your prospect installation manually' ); } # sanity checks on the sort output?? return $sfn; } sub _setenv { if (not -d $Bio::Prospect::Init::PROSPECT_PATH ) { throw Bio::Prospect::Exception ( "PROSPECT_PATH is not set correctly", "PROSPECT_PATH ($Bio::Prospect::Init::PROSPECT_PATH}) is not a valid directory", "Check your prospect installation and set PROSPECT_PATH in Bio::Prospect::Init or as an environment variable" ); } else { $ENV{'PROSPECT_PATH'} = $Bio::Prospect::Init::PROSPECT_PATH; } if (not -d $Bio::Prospect::Init::PDB_PATH) { throw Bio::Prospect::Exception ( "PDB_PATH is not set correctly", "PDB_PATH ($Bio::Prospect::Init::PDB_PATH) is not a valid directory", "Check your prospect installation and set PDB_PATH in Bio::Prospect::Init or as an environment variable" ); } else { $ENV{'PDB_PATH'} = $Bio::Prospect::Init::PDB_PATH; } } sub _prepare_options($$) { my $self = shift; my $opts = $self->{options}; (ref $opts eq 'Bio::Prospect::Options') || throw Bio::Prospect::BadUsage('Bio::Prospect::Options argument is missing'); my @cl = ( "$Bio::Prospect::Init::PROSPECT_PATH/bin/prospect" ); if (exists $opts->{phd}) { throw Exception::NotYetSupported ( "phd threading isn't implemented" ); } elsif (exists $opts->{ssp}) { throw Exception::NotYetSupported ( "ssp threading isn't implemented" ); } elsif (exists $opts->{seq}) { push( @cl, '-seqfile %s' ); } else { throw Bio::Prospect::BadUsage("Bio::Prospect::Options doesn't specify input type"); } push(@cl, '-o %s'); push(@cl, '-ncpus '.($opts->{ncpus}||2) ); push(@cl, '-freqfile',$opts->{freqfile} ) if ( exists $opts->{freqfile} ); push(@cl, '-reliab') if $opts->{zscore}; push(@cl, '-3d') if $opts->{'3d'}; push(@cl, $opts->{global_local} ? '-global_local' : '-global'); # template set selection # ONE of -scop, -tfile, -templates (array), or -fssp (default) if ($opts->{scop}) { push(@cl, '-scop') } elsif (exists $opts->{tfile}) { push(@cl, '-tfile', $opts->{tfile}) } elsif (exists $opts->{templates}) { my ($fh,$fn) = $self->_tempfile('lst'); $fh->print(join("\n",@{$opts->{templates}}),"\n"); $fh->close(); push(@cl, '-tfile', $fn); } else { push(@cl, '-fssp'); } push(@cl, '2> /dev/null' ) unless (defined $ENV{DEBUG} and $ENV{DEBUG}>5); push(@cl, '1>&2'); @{$self->{commandline}} = @cl; return @cl;
} sub _write_seqfile($$) { my ($self,$seq) = @_; throw Exception ('seq undefined') unless defined $seq; my ($fh,$fn) = $self->_tempfile('fa'); $seq =~ s/\s//g; my $len = length($seq); $seq =~ s/.{60}/$&\n/g; # wrap at 60 cols $fh->print( ">LocalClient /len=$len\n$seq\n"); $fh->close(); return $fn; }
1;