InSilicoSpectro::InSilico::RetentionTimer::Petritis - InSilicoSpectro::InSilico::RetentionTimer::Petritis documentation


InSilicoSpectro documentation Contained in the InSilicoSpectro distribution.

Index


Code Index:

NAME

Top

InSilicoSpectro::InSilico::RetentionTimer::Petritis Prediction of peptide retention time by neural network training

SYNOPSIS

Top

  # creates a retention time predictor
  my $rt = InSilicoSpectro::InSilico::RetentionTimer::Petritis->new;

  # trains the predictor
  $rt->learn( data=>{expseqs=>['ELGFQG','HPGDFGADAQAAMSK','LSSPATLNSR','RFIK'],
              exptimes=>[1314,1194,1152,1500]},mode=>'verbose',
	      maxepoch=>100, sqrerror=>1e-3,mode=>'verbose',
	      nnet=>{learningrate=>0.05},layers=>[{nodes=>20},{nodes=>2},{nodes=>1}] );

  # predicts retention time for a peptide
  $rt->predict( peptide=>'ACFGDMKWVTFISLLRPLLFSSAYSRGVFRRDTHKSEIAHRFKDLGE' );

  # saves the network
  $rt->write_xml(confile=>'nnet01.xml');

  # retrieves a previously saved network
  $rt->read_xml(confile=>'nnet00.xml');

  # assigns a calibrator to the predictor
  $ec=InSilicoSpectro::InSilico::ExpCalibrator->new( fitting=>'spline' );

  # fits the calibrator from expermiental values
  $rt->calibrate( data=>{calseqs=>['ELGFQG','HPGDFGADAQAAMSK','LSSPATLNSR','RFIK'],
                 caltimes=>[1314,1194,1152,1500]},calibrator=>$ec );

  # save current calibrator
  $rt->write_cal( calfile=>$file );

  # retrieve previously saved calibrator
  $rt->read_cal ( calfile=>$file );




DESCRIPTION

Top

Predicts HPLC retention time for peptides

METHODS

Top

my $rt=InSilicoSpectro::InSilico::RetentionTimer::Petritis->new(%h )

%h contains a hash

$rt->learn( data=>{expseqs=>\@seqs,exptimes=>\@times}, mode=>'verbose',maxepoch=>100, sqrerror=>1e-3,mode=>'verbose', nnet=>{learningrate=>0.05},layers=>[{nodes=>20},{nodes=>2},{nodes=>1}] ); );

Trains the network from experimental data given in the arrays (@seqs,@times).

maxepoch, sqrerror : train the network until sse < sqrerror or maxepoch
nnet=>{%h} : hash with options for method AI::NNFlex::Backprop->new( %h )
layers=>[{%h1},{%h2},{%h3}] : hashes with options for the 3 layers as defined by method AI::NNFlex::Backprop->add_layer( %hi )
mode=>'silent'|'verbose'

Method used for fitting

$rt->predict(peptide=>$str)

Predicts retention time for the peptide

$rt->predictor(peptide=>$str)

Same as predict() but without experimental fitting

$rt->calibrate( data=>{calseqs=>\@str,caltimes=>\@val},fitting=>$str );

Trains the predictor with experimental data and the chosen fitting method

fitting=>'linear'|'spline'

Method used for fitting

$rc->filter( filter=>$pc,error=>$str )

Filter experimental data in $rc->{data} by a cutting threshold of relative prediction error of $pc (in %).

error=>'relative'|'absolute'

Type of error for filtering.

$rt->writexml( confile=>$file )

Saves network into a file

$rt->readxml( confile=>$file )

Retrieves a previously saved network

$rt->write_cal( calfile=>$file );

Save current calibrator.

$rt->read_cal ( calfile=>$file );

Retrieve a previously saved calibrator.

$rt->set($name, $val)

Set an instance paramter.

$rt->get($name)

Get an instance parameter.

EXAMPLES

Top

see InSilicoSpectro/t/InSilico/testPetritis.pl script

SEE ALSO

Top

InSilicoSpectro::InSilico::RetentionTimer

InSilicoSpectro::InSilico::ExpCalibrator

Petritis K, Kangas LJ, Ferguson PL, Anderson GA, Pasa-Tolic L, Lipton MS, Auberry KJ, Strittmatter EF, Shen Y, Zhao R, Smith RD. "Use of artificial neural networks for the accurate prediction of peptide liquid chromatography elution times in proteome analyses". Anal Chem. 2003; 75(5):1039-48.

COPYRIGHT

Top

AUTHORS

Top

Pablo Carbonell, Alexandre Masselot, www.genebio.com


InSilicoSpectro documentation Contained in the InSilicoSpectro distribution.

use strict;

package InSilicoSpectro::InSilico::RetentionTimer::Petritis;
require Exporter;
use Carp;

use InSilicoSpectro::InSilico::RetentionTimer;
use InSilicoSpectro::InSilico::ExpCalibrator;

use File::Basename;

use AI::NNFlex::Dataset;
use AI::NNFlex::Backprop;

use XML::Dumper;

our (@ISA, @EXPORT, @EXPORT_OK);
@ISA = qw(Exporter InSilicoSpectro::InSilico::RetentionTimer);
@EXPORT = qw();
@EXPORT_OK = ();

sub new{
  my ($pkg,%h)=@_;# pkg: name of the module; h: hash with the rest of parameters
  my $rt=$pkg->SUPER::new;# Create a reference

# Setting of properties
  $rt->set('confile','-');# default file

  $rt->set('maxepoch',1000);
  $rt->set('sqrerror',1e-3);
  $rt->set('mode','silent');
  $rt->set('network',{});
  $rt->set('nnet',{});
  $rt->set('layers',[{},{},{}]);

  $rt->set('nnet0',{randomconnections=>0,randomweights=>1,learningrate=>.1,
		  debug=>[],bias=>1,momentum=>0.6});
  my %layerdef=(persistentactivation=>0,
		decay=>0.0,
		randomactivation=>0,
		threshold=>0.0,);
  $rt->set('layers0',[{%layerdef,nodes=>20,activationfunction=>"linear",},
		    {%layerdef,nodes=>2,activationfunction=>"sigmoid",},
		    {%layerdef,nodes=>1,activationfunction=>"sigmoid",}]);

  foreach (keys %h){ $rt->set($_, $h{$_}) }

  return $rt;
}

# -------------------------------   predictor

sub predict {
  my ($this,%h)=@_;
  foreach (keys %h){ $this->set($_, $h{$_}) }

  return $this->SUPER::predict($this->predictor);

}

sub predictor {


  my ($this,%h)=@_;
  foreach (keys %h){ $this->set($_, $h{$_}) }

  my $dataset = AI::NNFlex::Dataset->new;
  $dataset->add([[count_aa($this->{peptide})],[0]]);
  return(${${$dataset->run($this->{network})}[0]}[0]/$this->{knorm});
}

# -------------------------------   learn

sub learn {

  my ($this,%h)=@_;
  foreach (keys %h){ $this->set($_, $h{$_}) }

  my ($network,$dataset,$knorm);
  my ($watcher,$sqrerror)=(0,10);

  my @expdata=@{$this->{data}{expseqs}};# Sequences
  my @prdata=@{$this->{data}{exptimes}};# Retention times
  my @aas=split '','ACDEFGHIKLMNPQRSTVWY';

  $network = AI::NNFlex::Backprop->new(%{$this->{nnet0}},%{$this->{nnet}});
  for (0..2) {
    $network->add_layer(%{$this->{layers0}[$_]},%{$this->{layers}[$_]});
  }
  $network->init();
  $this->{network}=$network;
  $dataset = AI::NNFlex::Dataset->new;

  $this->{knorm}=normdata(\@prdata);# Normalize scale of times

  # Add data points
  for (my $k=0;$k<(scalar @expdata);$k++) {
    $dataset->add([[count_aa($expdata[$k])],[$this->{knorm}*$prdata[$k]]]);
  }

  while (($sqrerror>$this->{sqrerror}) and ($watcher<$this->{maxepoch}))
      {
        $sqrerror = $dataset->learn($network);# Learning
	$watcher++;
	print "Epoch: ",$watcher," SSE: ",$sqrerror,"\n" if ($this->{mode} eq 'verbose');
      }
}

sub normdata {
  my $tmax=1e-12;
  foreach (@{$_[0]}) { $tmax=$_ if ($_ > $tmax) }
  return(1/$tmax);
}

# -------------------------------   calibrate

sub calibrate {

  my ($this,%h)=@_;
  foreach (keys %h) { $this->set($_, $h{$_}) }

  my (@prdata);
  my $k;

  for ($k=0;$k<scalar(@{$this->{data}{calseqs}});$k++) {
     push(@prdata,$this->predictor(peptide=>${$this->{data}{calseqs}}[$k]));# Assign an index to each prediction
  }
  $this->set('data',{prdata=>\@prdata});
  $this->SUPER::calibrate;

}

# -------------------------------   Filter data

sub filter {

  my ($this,%h)=@_;
  foreach (keys %h){ $this->set($_, $h{$_}) }

  my @error;

  for(my $m=0;$m<=$#{$this->{data}{expseqs}};$m++) {
     push(@error,(abs($this->predict(peptide=>${$this->{data}{expseqs}}[$m])
		      -${$this->{data}{exptimes}}[$m])));
   }

 return $this->SUPER::filter(\@error);
}


# -------------------------------   write / read xml file with nnet data

sub write_xml {

  my ($this,%h)=@_;
  foreach (keys %h){ $this->set($_, $h{$_}) }

  my $dump = new XML::Dumper;
  $dump->pl2xml( [$this->{network},$this->{knorm}],$this->{confile} );

}

sub read_xml_str {

 my  ($this,$str)=@_;

  my $dump = new XML::Dumper;
  ($this->{network},$this->{knorm}) = @{$dump->xml2pl( $str )};
}


sub read_xml {

 my  ($this,%h)=@_;
 foreach (keys %h){ $this->set($_, $h{$_}) }

  my $dump = new XML::Dumper;
  ($this->{network},$this->{knorm}) = @{$dump->xml2pl( $this->{confile} )};
}

# -------------------------------   misc
return 1;