/usr/local/CPAN/Bio-ConnectDots/Bio/ConnectDots/ConnectorSet/Uniprot.pm
package Bio::ConnectDots::ConnectorSet::Uniprot;
use strict;
use vars qw(@ISA);
use Bio::ConnectDots::ConnectorSet;
@ISA = qw(Bio::ConnectDots::ConnectorSet);
sub parse_entry {
my ($self) = @_;
my $input_fh = $self->input_fh;
while (<$input_fh>) {
chomp;
if (/^\/\//) {
next unless $self->have_dots;
return 1;
} #end of if
if (/^ID/) {
my ( $field, $SP_entry_id, $others ) = split /\s+/;
$self->put_dot( 'SwissProtEntryName', $SP_entry_id );
}
if (/^AC/) {
my ( $field, $SwissProt ) = split /\s+/;
chop($SwissProt);
$self->put_dot( 'SwissProt', $SwissProt );
}
# important that this comes before any entries in the entry that are after DE
if (/^DE/) {
my ( $field, $desc ) = /(^DE)\s+(.*)$/;
my $cont = 1;
while ($cont) {
$_ = <$input_fh>;
if (/^DE/) {
my ( $label, $desc_more ) = /(^DE)\s+(.*)$/;
$desc .= " $desc_more";
}
else { $cont = 0; }
}
$self->put_dot( 'Protein_Desc', $desc );
}
if (/^OS/) {
my ( $field, $Organism ) = /(^OS)\s+(.*)$/;
chop($Organism);
$self->put_dot( 'Organism', $Organism );
}
if (/^OX/) {
my ( $field, $taxID ) = split /\s+/;
chop($taxID);
$self->put_dot( 'NCBI_taxID', $taxID );
}
if (/^RX/) {
my @RX = split /\s+/;
for my $rx (@RX) {
$rx =~ s/;//;
if ( $rx =~ /MEDLINE/ ) {
my ( $tmp, $medline ) = split( '=', $rx );
$self->put_dot( 'MEDLINE', $medline );
}
if ( $rx =~ /PubMed/ ) {
my ( $tmp, $pubmed ) = split( '=', $rx );
$self->put_dot( 'PubMed', $pubmed );
}
}
}
if (/^CC.*-!- FUNCTION:/) {
my $function =~ /^CC.*-!- FUNCTION:\s*(.*)/;
my $cont = 1;
while ($cont) {
$_ = <$input_fh>;
chomp;
if (/^CC/ && !/^CC\s+-!-/) {
my $more = /^CC\s+(.*)$/;
$function .= " $more";
}
else { $cont = 0; }
}
$self->put_dot( 'Function', $function );
}
if (/^CC.*-!- CATALYTIC ACTIVITY:/) {
my $catyl =~ /^CC.*-!- CATALYTIC ACTIVITY:\s*(.*)/;
my $cont = 1;
while ($cont) {
$_ = <$input_fh>;
chomp;
if (/^CC/ && !/^CC\s+-!-/) {
my $more = /^CC\s+(.*)$/;
$catyl .= " $more";
}
else { $cont = 0; }
}
$self->put_dot( 'Catalytic_Activity', $catyl );
}
if (/^CC.*-!- SUBUNIT:/) {
my $subunit =~ /^CC.*-!- SUBUNIT:\s*(.*)/;
my $cont = 1;
while ($cont) {
$_ = <$input_fh>;
chomp;
if (/^CC/ && !/^CC\s+-!-/) {
my $more = /^CC\s+(.*)$/;
$subunit .= " $more";
}
else { $cont = 0; }
}
$self->put_dot( 'Subunit', $subunit );
}
if (/^CC.*-!- TISSUE SPECIFICITY:/) {
my $tissue =~ /^CC.*-!- TISSUE SPECIFICITY:\s*(.*)/;
my $cont = 1;
while ($cont) {
$_ = <$input_fh>;
chomp;
if (/^CC/ && !/^CC\s+-!-/) {
my $more = /^CC\s+(.*)$/;
$$tissue .= " $more";
}
else { $cont = 0; }
}
$self->put_dot( 'Tissue', $tissue );
}
if (/^DR/) {
my @DR = split /\s+/;
if ( $DR[1] eq 'EMBL;' ) {
chop( $DR[2] );
$self->put_dot( 'EMBL_mRNA_protein', $DR[2] );
}
if ( $DR[1] eq 'PIR;' ) {
chop( $DR[2] );
$self->put_dot( 'PIR', $DR[2] );
}
if ( $DR[1] eq 'InterPro;' ) {
chop( $DR[2] );
$self->put_dot( 'InterPro', $DR[2] );
}
if ( $DR[1] eq 'pfam;' ) {
chop( $DR[2] );
$self->put_dot( 'pfam', $DR[2] );
}
if ( $DR[1] eq 'TIGRFAMs;' ) {
chop( $DR[2] );
$self->put_dot( 'TIGRFAMs', $DR[2] );
}
}
if (/^KW/) {
chomp;
my ($keywords) =~ /^KW\s*(.*)/;
my $cont = 1;
while ($cont) {
$_ = <$input_fh>;
chomp;
if (/^KW/) {
my ($more) = /^KW\s+(.*)$/;
$keywords .= " $more";
}
else { $cont = 0; }
}
$self->put_dot( 'Keywords', $keywords );
}
if (/^GN/) {
chomp;
my ($gene_name, $alias) =~ /^GN\s+Name=(.+);\s+Synonyms=(.+);/;
$self->put_dot( 'Gene_Name', $gene_name);
$self->put_dot( 'Alias_Symbol', $alias);
}
if (/^\/\//) {
next unless $self->have_dots;
return 1;
} #end of if
} #end of while
return undef;
} #end of sub
1;