MassSpec::ViewSpectrum::RealVsHypPeptide - View a real mass spectrum on the same graph as a hypothetical spectrum generated by fragmenting a peptide in silico


MassSpec-ViewSpectrum-RealVsHypPeptide documentation Contained in the MassSpec-ViewSpectrum-RealVsHypPeptide distribution.

Index


Code Index:

NAME

Top

MassSpec::ViewSpectrum::RealVsHypPeptide - View a real mass spectrum on the same graph as a hypothetical spectrum generated by fragmenting a peptide in silico

SYNOPSIS

Top

  use MassSpec::ViewSpectrum::RealVsHypPeptide;
  open GRAPHIC, ">mygraphic.png" or die "Unable to open output file\n";
  binmode GRAPHIC;

  my @masses = (78.1,81.1,81.7,85.4,86.8,88.8,89.4,97.6,99.0,99.4,108.7,112.1,129.1,
    130.1,132.1,147.7,157.1,158.1,159.1,169.1,171.1,175.1,187.1,229.1,246.2,258.1,
    266.0,327.2,328.2,345.2,415.2,426.2,432.2,531.2,559.3,623.4,639.3,643.3,644.4,
    645.0,647.5,686.4,687.4,689.4);
  my @intensities = (8.7,7.7,7.3,10.5,7.7,7.3,8.4,8.0,9.1,9.1,7.3,29.0,12.6,7.3,8.0,
    7.7,11.9,9.8,10.1,7.3,10.5,131.0,9.4,50.3,22.7,44.7,16.8,30.4,18.2,53.1,25.5,
    15.7,7.7,14.0,46.8,38.4,7.3,11.5,8.7,7.3,8.7,7.3,24.8,194.2);
  my $peptide = "RTSVAR";

  my $vs = MassSpec::ViewSpectrum::RealVsHypPeptide->new($peptide, \@masses,\@intensities);
  $vs->set(yaxismultiplier => 1.8); # a sample tweak to adjust the output
  $vs->set(title => "BSA-689 -- " . $peptide);

  my $output = $vs->plot();
  print GRAPHIC $output;
  close GRAPHIC;

DESCRIPTION

Top

MassSpec::ViewSpectrum::RealVsHypPeptide - View a real mass spectrum on the same graph as a hypothetical spectrum generated by fragmenting a peptide in silico. The in silico fragmention is performed by generating all of the possible peptides which contain either the amino-terminal or carboxyl-terminal amino acids.

Negative peak intensity values are permitted; this permits the drawing of "pseudospectra" which, for example, illustrate peaks present in one spectrum but missing in another. Note that these negative peaks have no true intensities, but in some cases we assign different heights to illustrate the differences among different hypothetical peaks. In addition, pseudocoloring of both positive and negative peaks is performed to illustrate what type of ion that peak represents. In some cases these ions are labelled explicitly, although in practice it is best to minimize this labelling to avoid excessive clutter.

The real spectrum appears on the positive y axis with known peaks, while the negative y axis reflects:

Intersection

Peaks appearing in both the experimental and hypothetical spectra.

H-E

Peaks appearing the the hypothetical but not experimental spectra, i.e. peaks which failed to be fragmented and/or captured in the mass spec. apparatus.

E-H

Peaks appearing the the experimental but not hypothetical spectra; in some cases these peaks can be used to discredit the hypothetical spectrum by pointing out important peaks that the fragmentation of the peptide fails to account for.

OPTIONS

In addition to the options inherited from MassSpec::ViewSpectrum, the following options are available:

HminusEpeakheight

The negative heights associated with peaks appearing in the hypothetical spectrum but not in the experimental spectrum.

otherPeakHeights

The heights of other negative peaks.

yScalingFactor

A fudge factor.

yScalingOffset

Another fudge factor, used to make negative labels appear without taking up too much display real estate and without being obscured.

tolerance

How close an experimental and hypothetical peak's m/z value (x axis value) must be to be treated as the same peak.

TO DO

Top

Greater configurability with regard to which types of ion peak labels are displayed, when/whether the associated peptides are displayed as part of the label.

AUTHORS

Top

Jonathan Epstein, <Jonathan_Epstein@nih.gov>
Matthew Olson, <olsonmat@mail.nih.gov@mail.nih.gov>
Xiongfong Chen, <xchen@helix.nih.gov@mail.nih.gov>

COPYRIGHT AND LICENSE

Top


MassSpec-ViewSpectrum-RealVsHypPeptide documentation Contained in the MassSpec-ViewSpectrum-RealVsHypPeptide distribution.

package MassSpec::ViewSpectrum::RealVsHypPeptide;

use 5.006;
use strict;
use warnings;

use MassSpec::ViewSpectrum;

my $haveCUtilities;
if (eval 'require MassSpec::CUtilities') {
	import MassSpec::CUtilities;
	$haveCUtilities = 1;
} else {
	$haveCUtilities = 0;
}


our @ISA = qw(MassSpec::ViewSpectrum);

our $VERSION = '0.02';

my ($add_aa_A,$add_aa_R,$add_aa_N,$add_aa_D,$add_aa_C,$add_aa_E,$add_aa_Q,$add_aa_G,$add_aa_H,$add_aa_IL,$add_aa_K,$add_aa_M,$add_aa_F,$add_aa_P,$add_aa_S,$add_aa_T,$add_aa_W,$add_aa_Y,$add_aa_V);
my @add_aa_vals;



# Preloaded methods go here.

sub new (\$\@\@;\@\%\%) # (peptide, masses, intensities, [optional] annotations, annotations_matching, colormap)
{
	my $type = shift;
	my $peptide = shift;
	
	my $self = $type->SUPER::new(@_);

	
	init_add_aa ();

	$self->{peptide} = $peptide;	
	$self->{tolerance} = 0.1;

	my $i;
	my $maxintensity;

	for ($i = 0; $i <= $#{$self->{masses}}; $i++) {
		my $intensity = $self->{intensities}[$i];

		$maxintensity = $intensity unless defined $maxintensity and $maxintensity > $intensity;
	}



	$self->{HminusEpeakheight} = -0.1 * $maxintensity * $self->{yaxismultiplier};
   	$self->{otherPeakHeights} = -0.2 * $maxintensity * $self->{yaxismultiplier};
   	$self->{yScalingFactor} = (0.1/20.0);
   	$self->{yScalingOffset} = 3;



	return $self;
}


sub set {
        my ($self, $key, $value) = @_;

        $self->{$key} = $value;
}



sub plot
{
	my $self = shift;


	my ($EminusHRef,$HminusERef,$intersectionRef, $massintenRef);

	my $pep = $self->{peptide};

	my ($aRef_theoretical_masses,$aRef_annotations) = generate_theoretical_spectrum ($pep);

	# compare the experimental (E) and hypothetical (H) spectra, and compute 
	# their intersection as well as which peaks appear in one spectrum but
	# not the other
     ($EminusHRef,$HminusERef,$intersectionRef, $massintenRef) = $self->FindDiffs($aRef_theoretical_masses);

	my %inter = %$intersectionRef;
        my @masses;
        my @intensities;
        my @annotations;
        my $mass;
        my $annot;
        my $maxNegativeStrLen = 0;

	my ($HminusEpeakheight,$otherPeakHeights,$yScalingFactor,$yScalingOffset);

	$HminusEpeakheight = $self->{HminusEpeakheight};
   	$otherPeakHeights = $self->{otherPeakHeights};
   	$yScalingFactor = $self->{yScalingFactor};
	$yScalingOffset = $self->{yScalingOffset};

        foreach $mass (@$EminusHRef) {
                push (@masses,$mass);
                push (@intensities,$$massintenRef{$mass});
                push (@annotations,'');
	  }
	
	  
        foreach $mass (@$HminusERef) {
                push (@masses,$mass);
                push (@intensities,$HminusEpeakheight);
                $annot = $$aRef_annotations{$mass};
		    $maxNegativeStrLen = length $annot if length $annot > $maxNegativeStrLen;
                push (@annotations,$annot);

        }

        # plot the intersection twice; once above the axis and once below
        foreach $mass (keys %inter) {
                push (@masses,$mass);
                push (@intensities,$$massintenRef{$mass});
                $annot = $$aRef_annotations{$inter{$mass}};
                push (@annotations,$annot);
                push (@masses,$mass);
                push (@intensities,$otherPeakHeights);
                push (@annotations,'@' . $annot);
        }


	$self->{masses} = \@masses;
	$self->{intensities} = \@intensities;
	$self->{annotations} = \@annotations;
	$self->{extranegativeheight} = $yScalingFactor * ($maxNegativeStrLen - $yScalingOffset);

	$self->SUPER::plot();
}

# TODO(JAE):place local subroutines here, by convention using a leading underscore for
# these local subroutine names


my %add_aa_hsh;

sub init_add_aa {
        my @add_aa_syms = ("A","R","N","D","C","E","Q","G","H","X","K","M","F","P","S","T","W","Y","V");
        my $i = 0;
        my $sym;

($add_aa_A,$add_aa_R,$add_aa_N,$add_aa_D,$add_aa_C,$add_aa_E)=(71.03711,156.10111,114.04293,115.02694,103.00919,129.04259);
($add_aa_Q,$add_aa_G,$add_aa_H,$add_aa_IL,$add_aa_K,$add_aa_M)=(128.05858,57.02146,137.05891,113.08406,128.09496,131.04049);
($add_aa_F,$add_aa_P,$add_aa_S,$add_aa_T,$add_aa_W,$add_aa_Y,$add_aa_V)=(147.06841,97.05276,87.03203,101.04768,186.07931,163.06333,99.06841);
@add_aa_vals = ($add_aa_A,$add_aa_R,$add_aa_N,$add_aa_D,$add_aa_C,$add_aa_E,$add_aa_Q,$add_aa_G,$add_aa_H,$add_aa_IL,$add_aa_K,$add_aa_M,$add_aa_F,$add_aa_P,$add_aa_S,$add_aa_T,$add_aa_W,$add_aa_Y,$add_aa_V);

        foreach $sym (@add_aa_syms) {
                $add_aa_hsh{$sym} = $add_aa_vals[$i++];
        }
}

sub computePeptideMass      {
        my $comp = shift (@_);
	return MassSpec::CUtilities::computePeptideMass($comp) if ($haveCUtilities);
				
        my $sum = 0;
        do {
                $sum = $sum + $add_aa_hsh{chop($comp)};
        }until $comp le 0;
        return $sum;
}




sub generate_theoretical_spectrum	{
	
	my ($sequence) = @_;
	my $parent_comp = aasort ( remove_brackets ($sequence) );	#generate the parent composition which is used for determining compositions of complementary ions
	my (@family,%annotations,@masses,%ion,%conserved_labels);	#essentially the @family is for y ions and the @complement_family is for b ions, although this can be made more sophisticated easily if both ions go in the same direction
	my $length_sequence = length ($sequence);	#for the for loop
	my $chop_incrementer;	#for the for loop
	
	$ion{'y'} = 19.01839;
	$ion{'y-H2O'} = 1.007825;
	$ion{'b'} = 1.007825;
	$ion{'a'} = -26.98709;	
	#plus the proton to make a b ion, then minus CO = 27.9949
	
	$conserved_labels{'y'} = 1;
	$conserved_labels{'b'} = 1;
	$family[0] = ''; #JAE


	for ($chop_incrementer = 0;$chop_incrementer < $length_sequence;$chop_incrementer++)	{
		my $chop = chop ($sequence);	#get a 1 character piece of sequence
		unless ($chop eq "")	{
			if ( $chop eq "\)" )	{	#if this is the beginning of a missed fragmentation
				my $mf = "";
				$chop = chop ($sequence);
				do {
					$mf .= $chop;
					$chop = chop ($sequence);	
				}until $chop eq "\(";
				my $family = aasort($mf.$family[0]);
				my $complement_family = aasort (compdiff ($family,$parent_comp));
				unshift (@family, $family);
				make_ion_mass ($family,'y',\%annotations,\@masses,\%ion,\%conserved_labels);
				make_ion_mass ($family,'y-H2O',\%annotations,\@masses,\%ion,\%conserved_labels);
				if ($complement_family eq "")	{
					make_ion_mass ($parent_comp,'b',\%annotations,\@masses,\%ion,\%conserved_labels);
					make_ion_mass ($parent_comp,'a',\%annotations,\@masses,\%ion,\%conserved_labels);
				}else{
					make_ion_mass ($complement_family,'b',\%annotations,\@masses,\%ion,\%conserved_labels);
					make_ion_mass ($complement_family,'a',\%annotations,\@masses,\%ion,\%conserved_labels);
				}
			}else{
				my $family = aasort ($chop.$family[0]);
				my $complement_family = aasort ( compdiff ($family,$parent_comp) );
				unshift (@family, $family);
				make_ion_mass ($family,'y',\%annotations,\@masses,\%ion,\%conserved_labels);
				make_ion_mass ($family,'y-H2O',\%annotations,\@masses,\%ion,\%conserved_labels);
				if ($complement_family eq "")	{
					make_ion_mass ($parent_comp,'b',\%annotations,\@masses,\%ion,\%conserved_labels);
					make_ion_mass ($parent_comp,'a',\%annotations,\@masses,\%ion,\%conserved_labels);
				}else{
					make_ion_mass ($complement_family,'b',\%annotations,\@masses,\%ion,\%conserved_labels);
					make_ion_mass ($complement_family,'a',\%annotations,\@masses,\%ion,\%conserved_labels);
				}
			}
		}
	}
#	return (\@masses,\%annotations,\%conserved_labels);
	return (\@masses,\%annotations);
}
my $debug;
sub make_ion_mass	{
	my ($composition,$ion,$aRef_annotations,$aRef_masses,$hRef_ion,$aRef_conserved_labels) = @_;
	

	my $mass = computePeptideMass ($composition) + $$hRef_ion{$ion};
	my $annotation = $ion." \(".$composition."\)";


	$annotation = '@' . $annotation unless $$aRef_conserved_labels{$ion};

	#push (@$aRef_annotations,$annotation);
	$$aRef_annotations{$mass} = $annotation;

	push (@$aRef_masses,$mass);
	

}

sub remove_brackets	{
	local $_;
	$_ = shift @_;
	tr/()//d;
	return $_;
}

sub aasort	{
	return join ( "", sort ( split //,$_[0] ) );
}

sub compdiff	{
#returns the compositional difference between 2 amino acid compositions - the second needs to be longer than the first
	my ($aa_one,$aa_two) = @_;
	my ($first,$second);

	if (length ($aa_one) > length ($aa_two))	{
		$first = $aa_two;
		$second = $aa_one;
	}else{
		$first = $aa_one;
		$second = $aa_two;
	}

	my $diff = "";
	do{
		my ($faa,$saa) = (chop $first,chop $second);
		unless ($faa eq $saa)	{
			do	{
				$diff = $saa.$diff;
				$saa = chop $second;
			}until $saa eq $faa;
		}
	}until $first le "";
	$diff = $second.$diff if $second gt "";
	return $diff;
}

sub FindDiffs {
	  my $self = shift;
	  my($hyp_peptide) = @_;

	my @MassData = @{$self->{masses}};
	my @inten = @{$self->{intensities}};
	my $mass_tolerance = $self->{tolerance};

	  my($EminusHRef,$HminusERef,$interRef);
        my $exp_mass;
        my $hyp_mass;
        my $EminusH;
        my $HminusE;
        my(@EminusH,@HminusE,%intersection);

        my %hyp; # hypothetical peaks associated with $peptide
        my %exp; # experimental peaks (filtered masses)
	my %massinten; # experimental intensities

        my $count = 0;
	foreach $exp_mass(@MassData) {
                $exp{$exp_mass} = -1;
		    $massinten{$exp_mass} = $inten[$count++];

        }

	my @s = extend_spectrum(\@MassData,$mass_tolerance,0);
		
        foreach $hyp_mass (@$hyp_peptide) {
		my $mid;
                $hyp{$hyp_mass} = -1;
		if ($haveCUtilities) {
			$mid = MassSpec::CUtilities::binarySearchSpectrum($hyp_mass,\@s);
		} else {
			my $low = 0;
			my $high = @s - 1;

			while ($low <= $high) {
				$mid = int(($low+$high)/2);
				if ($hyp_mass < $s[$mid]) {
					$high = $mid - 1;
				} else {
					$low = $mid + 1;
				}
			}
			$mid++ if ($hyp_mass > $s[$mid]);
		}
		#
		# s[$mid] is now the lowest value in s just greater than $hyp_mass
		#
		#
		# Suppose our spectrum consisted of the peaks 347.42, 639.31, 722.37 and 831.93
		# and our error margin is 0.05 daltons.  Then the spectrum provided by
		# extend_spectrum() would look like:
		# s[0] = artificial negative value
		# s[1] = 347.37
		# s[2] = 347.47
		# s[3] = 639.26
		# s[4] = 639.36
		# s[5] = 722.32
		# s[6] = 722.42
		# s[7] = 831.88
		# s[8] = 831.98
		# s[9] = artificial huge value
		#
		# Suppose the input $mass is 639.27.  Then after the binary search, $mid
		# will be 4.  It turns out that $mid is always even for masses within
		# the error bands and always odd for values which fall outside the error bands.
		#
		unless ($mid & 1) { # unless ($mid is odd)
			my $index = $mid/2-1;
			my $peak = $MassData[$index];
			my $max_intensity = $inten[$index];
			$index--;
			while ($index >= 0 && abs($MassData[$index] - $hyp_mass) < $mass_tolerance) {
				if ($max_intensity < $inten[$index]) {
					$peak = $MassData[$index];
					$max_intensity = $inten[$index];
				}
				$index--;
			}
			$hyp{$hyp_mass} = 1;
			$exp{$peak} = $hyp_mass;
		}
	}



        foreach $hyp_mass (sort {$a <=> $b} (keys %hyp)) {
		if ($hyp{$hyp_mass} < 0) {
			push (@HminusE, $hyp_mass);
			
		}
        }

        # note that the hypothetical mass is logged in the intersection,
        # since that is what we want to plot
        foreach $exp_mass (sort {$a <=> $b} (keys %exp)) {
                if ($exp{$exp_mass} < 0) {
                        push (@EminusH, $exp_mass)
                } else { # note the associated experimental mass
                        $hyp_mass = $exp{$exp_mass};
                        $intersection{$exp_mass} = $hyp_mass;
                }
        }

        $EminusHRef = \@EminusH;
        $HminusERef = \@HminusE;
        $interRef = \%intersection;

        return($EminusHRef, $HminusERef, $interRef, \%massinten);
}


#
# Extend a spectrum to make it suitable for subsequent binary searching
#
sub extend_spectrum {
	my($aRef_rms,$err,$backwards_compatability) = @_;
	my $i;
	my @result = ();
	my $lastlow;
	my $lasthigh = -1;

	# place artificial low & high values to simplify subsequent binary search
	push (@result,-9999);

	for ($i = 0; $i < @{$aRef_rms}; $i++) {
		my $low = $aRef_rms->[$i] - $err;
		my $high = $aRef_rms->[$i] + $err;
		if ($backwards_compatability) {
			$low = sprintf("%.2f",$low);
			if ($high =~ /(\d+\.\d\d)/) {
				$high = $1;
			}
			$high = sprintf("%.2f",$high) + 0.009999;
		}
		if ($low < $lasthigh) { # if (two adjacent bands overlap)
			my $middle = ($lastlow + $high) / 2;
			if ($backwards_compatability) {
				$middle = sprintf("%.2f",$middle);
			}
			pop @result;
			push (@result,$middle-0.000001); # replace high-value of last band
			$low = $middle;
		}
		push (@result,$low);
		push (@result,$high);
		$lastlow = $low;
		$lasthigh = $high;
	}
	push (@result,999999999999);

	return @result;
}


1;
__END__
# Below is stub documentation for your module. You'd better edit it!