| GO-TermFinder documentation | Contained in the GO-TermFinder distribution. |
GO::TermFinderReport::Text - prints results of GO::TermFinder as a text report
This print() method of this Perl module receives a reference to an the array that is the return value from the findTerms method of GO::TermFinder, the number of genes that were used to generate the terms, and the number of genes that were said to be in the genome. It will then generate a text report that summarizes those results. Optionally, filehandle and p-value cutoff arguments may also be passed in. It will return the
use GO::TermFinder;
use GO::TermFinderReport::Text;
.
.
.
my @pvalues = $termFinder->findTerms(genes=>\@genes);
my $report = GO::TermFinderReport::Text->new();
open (OUT, ">report.text");
my $numHypotheses = $report->print(pvalues => \@pvalues,
aspect => $aspect,
numGenes => scalar(@genes),
totalNum => $totalNum,
cutoff => 0.01,
fh => \*OUT);
close OUT;
This is the constructor.
Usage:
my $report = GO::TermFinderReport::Text->new();
A GO::TermFinderReport::Text object is returned.
This method prints out the text report of the passed in hypotheses. The report is ordered in ascending order of p-value (i.e. most significant first). If the FDR was calculated, the FDR will also be printed. It returns the number of hypotheses that had corrected p-values as good or better than the passed in cutoff.
Usage:
my $numHypotheses = $report->print(pvalues => \@pvalues,
numGenes => scalar(@genes),
totalNum => $totalNum,
cutoff => 0.01,
fh => \*OUT,
table => 0 );
Required arguments:
pvalues : A reference to the array returned by the findTerms() method of GO::TermFinder
numGenes : The number of genes that were in the list passed to the findTerms method
totalNum : The total number of genes that were indicated to be in the genome for finding terms.
Optional arguments:
fh : A reference to a file handle to which the table should be printed. Defaults to standard out.
cutoff : The p-value cutoff, above which p-values and associated information will not be printed. Default is no cutoff.
table : 0 for standard output, 1 for tab delimited table. Default is 0
Gavin Sherlock
sherlock@genome.stanford.edu
| GO-TermFinder documentation | Contained in the GO-TermFinder distribution. |
package GO::TermFinderReport::Text;
use strict; use warnings; use diagnostics; use vars qw ($VERSION); $VERSION = 0.10; ###################################################################################### sub new{ ######################################################################################
###################################################################################### my $self = {}; bless $self, shift; return $self; } ###################################################################################### sub print{ ######################################################################################
###################################################################################### my ($self, %args) = @_; if (!exists($args{'pvalues'})){ die "You must supply a pvalues argument to the print method."; } if (!exists $args{'numGenes'}){ die "You must supply a numGene argument to the print method."; } if (!exists $args{'totalNum'}){ die "You must supply a totalNum argument to the print method."; } my $pvalues = $args{'pvalues'}; my $numGenes = $args{'numGenes'}; my $totalNum = $args{'totalNum'}; my $fh = $args{'fh'} || \*STDOUT; my $cutoff = $args{'cutoff'} || 1; my $table = $args{'table'} || 0; my $rows; my $numRows = 0; my $hasFdr = 0; my $hypothesis = 1; my @header = ("GOID", "TERM", "CORRECTED_PVALUE", "UNCORRECTED_PVALUE", "NUM_LIST_ANNOTATIONS", "LIST_SIZE", "TOTAL_NUM_ANNOTATIONS", "POPULATION_SIZE", "FDR_RATE", "EXPECTED_FALSE_POSITIVES", "ANNOTATED_GENES"); print $fh join("\t", @header), "\n" if ($table); foreach my $pvalue (@{$pvalues}){ # skip if above cutoff next if ($pvalue->{CORRECTED_PVALUE} > $cutoff); # now format the p-value my $value = $pvalue->{CORRECTED_PVALUE}; # if it's in scientific notation, we want up to two of the decimal places $value =~ s/^(.*\.[0-9]{2}).*(e.+)$/$1$2/; # otherwise, we'll take up to five decimal places $value =~ s/^(0\.[0-9]{5})[0-9]*$/$1/; if (defined ($pvalue->{NUM_OBSERVATIONS}) && $pvalue->{NUM_OBSERVATIONS} == 0){ # simulations were used to generate the corrected p-value. # If we never saw anything better than this p-value in the # simulations, then prepend a less than sign to the # corrected p-value $value = "<".$value; } if (!$table){ print $fh "-- $hypothesis of ", scalar @{$pvalues}, " --\n", "GOID\t", $pvalue->{NODE}->goid, "\n", "TERM\t", $pvalue->{NODE}->term, "\n", "CORRECTED P-VALUE\t", $pvalue->{CORRECTED_PVALUE}, "\n", "UNCORRECTED P-VALUE\t", $pvalue->{PVALUE}, "\n"; }else{ print $fh join("\t", ($pvalue->{NODE}->goid, $pvalue->{NODE}->term, $pvalue->{CORRECTED_PVALUE}, $pvalue->{PVALUE}, $pvalue->{NUM_ANNOTATIONS}, $numGenes, $pvalue->{TOTAL_NUM_ANNOTATIONS}, $totalNum)), "\t"; } # deal with FDR my ($fdr, $falsePositives); if (exists ($pvalue->{FDR_RATE})){ $fdr = sprintf ("%.2f%%", $pvalue->{FDR_RATE} * 100); $falsePositives = sprintf ("%.2f", $pvalue->{EXPECTED_FALSE_POSITIVES}); if(!$table){ print $fh "FDR_RATE\t", $fdr, "\n", "EXPECTED_FALSE_POSITIVES\t", $falsePositives, "\n"; }else{ print $fh $fdr, "\t", $falsePositives, "\t"; } }else{ print $fh "\t\t" if ($table); # Gotta fill in the blanks } if (!$table){ print $fh "NUM_ANNOTATIONS\t"; print $fh $pvalue->{NUM_ANNOTATIONS}; print $fh " of $numGenes in the list, vs "; print $fh $pvalue->{TOTAL_NUM_ANNOTATIONS}; print $fh " of $totalNum in the genome\n"; print $fh "The genes annotated to this node are:\n";; } print $fh join(", ", values(%{$pvalue->{ANNOTATED_GENES}})), "\n"; print $fh "\n" if (!$table); $hypothesis++; } return ($hypothesis - 1); } 1; # to keep Perl happy