GO::Metadata::Panther - Species info for data used by Panther Clusters


go-perl documentation Contained in the go-perl distribution.

Index


Code Index:

NAME

Top

GO::Metadata::Panther - Species info for data used by Panther Clusters

SYNOPSIS

Top

 use GO::Metadata::Panther qw/@species/;

 for my $species (@species) {
  # do something
 }




Or

 use GO::Metadata::Panther;
 my $s = GO::Metadata::Panther->code('YEAST');

DESCRIPTION

Top

Accesses information related to species in the Panther seq2pthr.gz file. This file can be fetched from: ftp://ftp.pantherdb.org/genome/pthr7.0/

Each item in the exportable @species array contains a hash reference for each species. The items in that hash are:

code

A scalar or the UniProt species code.

ncbi_taxa_id

A scalar reference of NCBI taxa ids that items in the GO database match. This should only be one id, but sometimes it's useful to scan multiple.

For a complete list of every UniProt species matched to a NCBI taxa http://www.uniprot.org/docs/speclist

Constructors

The constructors scans @species for the requested data and returns the object that matches the data. Otherwise it returns a false false.

my $s = GO::Metadata::Panther->code(unicode_species_code)

Return an object filled with the species reference from the UniProtKB species code.

my $s = GO::Metadata::Panther->ncbi(ncbi_taxa_id)

Greate an object from the ncbi_taxa_id.

Function

Functions that can be used outside of the OO interface.

GO::Metadata::Panther::codes()

Returns a list of all UniProt species codes in @species.

GO::Metadata::Panther::valid_codes(unicode_species_code)

Send it a list of panther Unicode codes, returns true if they are all present in @species. Othewise returns false.

OO Function

$s->ncbi_ids()

Returns the list of NCBI taxa identifiers associated with the UniProt species code. In a perfect word this will only every return one value. In any case, the first value will be the actual numeric identifier associated.

AUTHOR

Sven Heinicke <sven@genomics.princeton.edu</gt>


go-perl documentation Contained in the go-perl distribution.
package GO::Metadata::Panther;
use strict;
use warnings;
use Exporter;
use base qw/Exporter/;
use Memoize;
use List::Util qw/sum first/;
use Data::Dumper;

our @EXPORT_OK = qw/@species/;

# These need to be in the order you wish to view them on the AmiGO
# dist png.
#
# For reference genomes, the first number is the ncbi_taxa_id list
# needs to be the reference in AmiGO::Aid::ReferenceGenome


our @species =
  (
   #
   # A
   #

   { # Anopheles gambiae
    code => 'ANOGA',
    ncbi_taxa_id =>  [ 7165 ],
    prefer => [ 'Gene' ],
   },

   { # Arabidopsis thaliana
    code => 'ARATH',
    ncbi_taxa_id => [ 3702 ],
   },

   { # Aquifex aeolicus
    code => 'AQUAE',
    ncbi_taxa_id => [ 63363 ],
   },

   { # Ashbya gossypii ATCC 10895
    code => 'ASHGO',
    ncbi_taxa_id => [ 33169 ],
   },

   #
   # B
   #

   { # Bacillus subtilis,
    code => 'BACSU',
    ncbi_taxa_id => [ 1423 ],
   },

   { # Bacteroides thetaiotaomicron
    code => 'BACTN',
    ncbi_taxa_id => [ 818 ],
   },

   { # Bos taurus
    code => 'BOVIN',
    ncbi_taxa_id => [ 9913 ],
   },

   { # Bradyrhizobium japonicum
    code => 'BRAJA',
    # matches the two UniProtKB items in GO
    ncbi_taxa_id => [ 375 ],
   },

   #
   # C
   #

   { # Caenorhabditis briggsae
    code => 'CAEBR',
    ncbi_taxa_id => [ 6238 ],
   },

   { # Caenorhabditis elegans
    code => 'CAEEL',
    ncbi_taxa_id => [ 6239 ],
    prefer => [ 'WB' ],
   },

   { # Canis lupus familiaris
    code => 'CANFA',
    ncbi_taxa_id => [ 9615 ],
    prefer => [ 'UniProtKB' ],
   },

   { # Chlamydia trachomatis
    code => 'CHLTA',
    ncbi_taxa_id => [ 315277 ],

   },

   { # Chlamydomonas reinhardtii
    code => 'CHLRE',
    ncbi_taxa_id => [ 3055 ],
    # found 11 of them
   },

   {
    code => 'CHLAA',
    ncbi_taxa_id => [ 324602 ],
   },

   { # Ciona intestinalis
    code => 'CIOIN',
    ncbi_taxa_id => [ 7719 ],
   },

   #
   # D
   #

   { # Danio rerio
    code => 'DANRE',
    ncbi_taxa_id => [ 7955 ],
    prefer => [ 'ZFIN' ],
   },

   { # Deinococcus radiodurans
    code => 'DEIRA',
    ncbi_taxa_id => [ 1299 ],
   },

   { # Dictyostelium discoideum
    code => 'DICDI',
    ncbi_taxa_id => [ 44689 ],
   },

   { # Drosophila melanogaster
    code => 'DROME',
    ncbi_taxa_id => [ 7227 ],
    prefer => [ 'FB' ],
   },

   #
   # E
   #

   { # Emericella nidulans
    code => 'EMENI',
    ncbi_taxa_id => [ 162425 ],
   },

   { # Entamoeba histolytica
    code => 'ENTHI',
    ncbi_taxa_id => [ 5759 ]
   },

   { #  Escherichia coli
    code => 'ECOLI',
    ncbi_taxa_id => [ 83333, 511145 ],
    prefer => [ 'EcoCyc' ],
   },

   #
   # G
   #

   { # Gallus gallus
    code => 'CHICK',
    ncbi_taxa_id => [ 9031 ],
    prefer => [ 'UniProtKB' ],
   },

   { # Geobacter sulfurreducens
    code => 'GEOSL',
    ncbi_taxa_id => [ 35554 ],
   },

   { # Gloeobacter violaceus
    code => 'GLOVI',
    ncbi_taxa_id => [ 33072, 251221 ],
    # 251221 is only here to match GLOVI|ENTREZ:2601616|UniProtKB
    prefer => [ 'UniProtKB' ],
   },

   #
   # H
   #

   { # Homo sapiens
    code => 'HUMAN',
    ncbi_taxa_id => [ 9606 ],
    prefer => [ 'ENSEMBL', 'UniProtKB' ],
   },

   #
   # L
   #

   { # Leishmania major
    code => 'LEIMA',
    ncbi_taxa_id => [ 5664, 347515 ],
   },

   { # Leptospira interrogans
    code => 'LEPIN',
    # only gets one
    ncbi_taxa_id => [ 173 ],
   },

   #
   # M
   #

   { # Macaca mulatta
    code => 'MACMU',
    ncbi_taxa_id => [ 9544 ],
   },

   { # Methanosarcina acetivorans
    code => 'METAC',
    ncbi_taxa_id => [ 2214 ],
   },

   { # Monodelphis domestica
    code => 'MONDO',
    ncbi_taxa_id => [ 13616 ],
   },

   { # Mus musculus
    code => 'MOUSE',
    ncbi_taxa_id => [ 10090 ],
    prefer => [ 'MGI' ],
   },

   #
   # N
   #

   { # Neurospora crassa
    code => 'NEUCR',
    ncbi_taxa_id => [ 5141 ],
   },

   #
   # O
   #

   { # Ornithorhynchus anatinus
    code => 'ORNAN',
    ncbi_taxa_id => [ 9258 ],
   },

   { # Oryza sativa
    code => 'ORYSJ',
    ncbi_taxa_id => [ 39947 ],
   },

   #
   # P
   #

   { # Pan troglodytes
    code => 'PANTR',
    ncbi_taxa_id => [ 9598 ],
   },

   { # Plasmodium yoelii
    code => 'PLAYO',
    ncbi_taxa_id => [ 73239 ],
   },

   { # Pseudomonas aeruginosa
    code => 'PSEA7',
    ncbi_taxa_id => [ 381754 ],
   },

   #
   # R
   #

   { # Rattus norvegicus
    code => 'RAT',
    ncbi_taxa_id => [ 10116 ],
    prefer => [ 'RGD', 'UniProtKB' ],
   },

   #
   # S
   #

   { # Saccharomyces cerevisiae
    code => 'YEAST',
    ncbi_taxa_id => [ 4932 ],
   },

   { # Schizosaccharomyces pombe
    code => 'SCHPO',
    ncbi_taxa_id => [ 4896 ],
   },

   { # Streptomyces coelicolor
    code => 'STRCO',
    ncbi_taxa_id => [ 1902 ],
   },

   { # Strongylocentrotus purpuratus
    code => 'STRPU',
    ncbi_taxa_id => [ 7668 ],
   },

   { # Sulfolobus solfataricus
    code => 'SULSO',
    ncbi_taxa_id => [ 2287 ],
   },

   #
   # T
   #

   { # Takifugu rubripes
    code => 'FUGRU', # UniProt calls this: TAKRU
    ncbi_taxa_id => [ 31033 ],
   },

   { # Tetrahymena thermophila
    code => 'TETTH',
    ncbi_taxa_id => [ 5911, 312017 ],
   },

   { # Thermotoga maritima
    code => 'THEMA',
    ncbi_taxa_id => [ 2336 ],
   },

   #
   # X
   #

   { # Xenopus', '(Silurana) tropicalis
    code => 'XENTR',
    ncbi_taxa_id => [ 8364 ],
   },

);



memoize('code');
sub code{
    my $class = shift;
    my $code = shift;

    for my $species (@species) {
	if ($species->{code} eq $code) {
	    return bless $species, $class;
	}
    }
    return undef;
}

sub ncbi{
    my $class = shift;
    my $ncbi = shift;

    for my $species (@species) {
	if (first {
	    $ncbi == $_;
	} @{ $species->{ncbi_taxa_id} }) {
	    return bless $species, $class;
	}
    }
    return undef
}

sub codes{
    return map { $_->{code} } @species;
}

sub valid_codes{
    return scalar(@_) == sum(map {
	__PACKAGE__->code($_) ? 1 : 0;
    } @_);
}

sub ncbi_ids{
    my $s = shift;
    return @{ $s->{ncbi_taxa_id} };
}

1;