GO::Handlers::obo_text - GO::Handlers::obo_text documentation
Index
Code Index:
NAME

SYNOPSIS

use GO::Handlers::obo_text
DESCRIPTION

PUBLIC METHODS -

# $Id: obo_text.pm,v 1.16 2008/01/22 23:54:45 cmungall Exp $
#
# This GO module is maintained by Chris Mungall <cjm@fruitfly.org>
#
# see also - http://www.geneontology.org
# - http://www.godatabase.org/dev
#
# You may distribute this module under the same terms as perl itself
# makes objects from parser events
package GO::Handlers::obo_text;
use Data::Stag qw(:all);
use GO::Parsers::ParserEventNames;
use base qw(GO::Handlers::base);
use strict qw(vars refs);
sub s_obo {
my $self = shift;
#$self->SUPER::s_obo(@_);
return;
}
sub e_header {
my $self = shift;
my $hdr = shift;
my $fmt = stag_get($hdr,'format-version');
$self->tag("format-version"=>
(stag_sget($hdr,'format-version') || '1.2'));
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
localtime(time);
$self->tag('date'=>sprintf("%02d:%02d:%04d %02d:%02d",
$mday,$mon+1,$year+1900,$hour,$min));
$self->tag('autogenerated-by'=>$0);
foreach (stag_tnodes($hdr)) {
$self->tag(stag_name($_), _obo_escape($_->data));
}
my @sts = stag_get($hdr,'synonymtypedef');
foreach (@sts) {
my $scope = stag_sget($_,'scope');
$self->tag(synonymtypedef => sprintf("%s \"%s\" %s",
stag_sget($_,ID),
stag_sget($_,NAME) || '',
($scope ? uc($scope) : '')));
}
my @ssdefs = stag_get($hdr,'subsetdef');
foreach (@ssdefs) {
$self->tag(subsetdef => sprintf("%s \"%s\"",
stag_sget($_,ID),
stag_sget($_,NAME)));
}
$self->{__emitted_header} = 1;
$self->print("\n");
return;
}
sub e_typedef {
my $self = shift;
my $t = shift;
$self->stanza('Typedef', $t);
}
sub e_term {
my $self = shift;
my $t = shift;
if (!$self->{__emitted_header}) {
$self->e_header(stag_new(HEADER,[]));
}
$self->stanza('Term', $t);
}
sub e_annotation {
my $self = shift;
my $t = shift;
if (!$self->{__emitted_header}) {
$self->e_header(stag_new(HEADER,[]));
}
$self->stanza('Annotation', $t);
}
sub e_instance {
my $self = shift;
my $t = shift;
if (!$self->{__emitted_header}) {
$self->e_header(stag_new(HEADER,[]));
}
$self->stanza('Instance', $t);
}
sub stanza {
my $self = shift;
my $stanza = shift;
my $t = shift;
$self->print("[$stanza]\n");
my @BOOLEAN_TAGS =
(
IS_ANONYMOUS,
IS_OBSOLETE,
IS_CYCLIC,
IS_TRANSITIVE,
IS_SYMMETRIC,
IS_ANTI_SYMMETRIC,
IS_REFLEXIVE,
IS_METADATA_TAG,
);
my @TAGS =
(ID,
NAME,
ALT_ID,
NAMESPACE,
DEF,
COMMENT,
SUBSET,
IS_A ,
RELATIONSHIP,
UNION_OF,
INTERSECTION_OF,
SYNONYM,
PROPERTY_VALUE,
XREF_ANALOG,
XREF_UNKNOWN,
'object',
@BOOLEAN_TAGS,
);
my %IS_BOOLEAN = map { ($_=>1) } @BOOLEAN_TAGS;
my @IGNORE = qw(is_root);
foreach my $tag (@IGNORE) {
stag_unset($t, $tag);
}
foreach my $tag (@TAGS) {
my @vals = stag_get($t, $tag);
next unless @vals;
if ($tag eq DEF) {
my $def = shift @vals;
my $defstr = $def->get_defstr;
my $qn = stag_sget($t, "$tag/@");
$self->tag(def => _obo_escape($defstr), [$def->get_dbxref], $qn);
}
elsif ($tag eq RELATIONSHIP) {
$self->tag(relationship => sprintf("%s %s",
$_->sget_type,
$_->sget_to),
undef,
$_->sget('@'))
foreach @vals;
}
elsif ($tag eq INTERSECTION_OF) {
$self->tag(intersection_of => sprintf("%s %s",
$_->sget_type,
$_->sget_to),
undef,
$_->sget('@'))
foreach @vals;
}
elsif ($tag eq UNION_OF) {
$self->tag(union_of => sprintf("%s %s",
$_->sget_type,
$_->sget_to),
undef,
$_->sget('@'))
foreach @vals;
}
elsif ($tag eq SYNONYM) {
foreach my $syn (@vals) {
my $type = $syn->sget('@/synonym_type');
my $scope = $syn->sget('@/scope');
my @vals = (quote($syn->sget_synonym_text));
push(@vals,uc($scope)) if $scope;
push(@vals,$type) if $type;
$self->tag($tag,
join(' ',@vals),
[$syn->get_dbxref]);
}
}
elsif ($tag eq XREF_ANALOG) {
$self->tag('xref', dbxref($_),undef,$_->sget('@'))
foreach @vals;
}
elsif ($tag eq PROPERTY_VALUE) {
foreach (@vals) {
my $dt = $_->sget_datatype;
if ($dt) {
$self->tag('property_value' => sprintf("%s %s %s",
$_->sget_type,
quote($_->sget_value),
$dt));
}
else {
$self->tag('property_value' => sprintf("%s %s",
$_->sget_type,
$_->sget_to));
}
}
}
elsif ($tag eq 'object') {
# experimental: obof1.3
$self->tag('object' => $self->obo_id(@vals));
}
elsif ($IS_BOOLEAN{$tag}) {
$self->tag($tag, $vals[0] ? "true" : "false");
}
else {
foreach (@vals) {
if (ref($_)) {
$self->tag($tag, $_->sget('.'),undef,$_->sget('@'))
}
else {
$self->tag($tag, _obo_escape($_));
}
}
}
stag_unset($t, $tag);
}
my @tnodes = stag_tnodes($t);
$self->tag($_->name, _obo_escape($_->data))
foreach @tnodes;
my @ntnodes = stag_ntnodes($t);
if (@ntnodes) {
print STDERR $_->xml foreach @ntnodes;
$self->throw( "unknown elements");
}
$self->print("\n");
}
sub obo_id {
my $self = shift;
my $v = shift;
if (ref($v)) {
my $isect = $v->sget_intersection;
if ($isect) {
my @links = $isect->get_link;
my @genus = grep {!$_->get_type} @links;
my @diffs = grep {$_->get_type} @links;
my $s =
join('^',
(map {$self->obo_id($_->sget_to)} @genus),
(map {
sprintf("%s(%s)",$_->sget_type,$self->obo_id($_->sget_to))
} @diffs));
return $s;
}
else {
}
}
else {
return $v;
}
}
sub tag {
my $self = shift;
my ($t, $v, $xrefsr, $qualsr) = @_;
my @xrefs = @{$xrefsr || []};
return unless defined $v;
if ($t eq DEF) {
$v=quote($v);
}
my $xrefl = '';
if ($xrefsr) {
$xrefl =
' ['.join(', ',
map {
dbxref($_);
} @xrefs).']';
}
my $ql = '';
if ($qualsr) {
my %qh = stag_pairs($qualsr);
$ql = ' {'.join(
', ',
map {
"$_=".quote($qh{$_})
} keys %qh
).'}';
}
$self->printf("%s: %s$xrefl$ql\n", $t, $v);
return;
}
sub _obo_escape {
my $s=shift;
$s =~ s/\\/\\\\/;
$s =~ s/([\{\}])/\\$1/g;
$s;
}
sub dbxref {
my $x = shift;
if (ref($x)) {
my $xref = $x->sget_dbname . ':' . $x->sget_acc;
my $name = $x->sget_name;
if (defined($name)) {
$name =~ s/\"/\\\"/g;
$xref." \"$name\"";
}
else {
$xref;
}
}
else {
$x;
}
}
sub safe {
my $word = shift;
$word =~ s/ /_/g;
$word =~ s/\-/_/g;
$word =~ s/\'/prime/g;
$word =~ tr/a-zA-Z0-9_//cd;
$word =~ s/^([0-9])/_$1/;
$word;
}
sub quote {
my $word = shift;
#$word =~ s/,/\\,/g; ## no longer required
$word =~ s/\"/\\\"/g;
"\"$word\"";
}
# -- EXPERIMENTAL CODE --
# obo format for gene_assocs
# we are hardcoding aspects here; this is OK, only for
# gene_assoc file which is GO specific
our %ASPECT_IDX =
(F => 'has_activity',
P => 'involved_in',
C => 'localised_to'
);
sub e_prod {
my $self = shift;
my $prod = shift;
my $proddb = $self->up_to('dbset')->get_proddb;
my $acc = $prod->get_prodacc;
my $id = "$proddb:$acc";
my $type = $prod->get_prodtype || 'gene_product';
$self->print("!! ***************************** \n");
$self->print("!! Gene Product: $id \n");
$self->print("!! ***************************** \n");
$self->print("[$type]\n");
$self->tag(id=>$id);
$self->tag(dbname=>$proddb);
$self->tag(acc=>$acc);
$self->tag(symbol=>$prod->sget_prodsymbol);
$self->tag(name=>$prod->sget_prodname);
$self->tag(synonym=>$_) foreach $prod->sget_prodsyn;
$self->tag(has_taxon=>'NCBI:'.$prod->sget_prodtaxa);
$self->print("\n");
my @assocs = $prod->get_assoc;
foreach my $assoc (@assocs) {
my $termacc = $assoc->get_termacc;
my $aspect = $assoc->get_aspect;
my $ns = $ASPECT_IDX{$aspect};
$self->print("[gene_product_annotation]\n");
$self->tag(involves_gene_product=>$id);
$self->tag($ns=>$termacc);
$self->tag($_=>'true') foreach $assoc->get_qualifier;
$self->tag(date=>$assoc->sget_assocdate);
$self->tag(source_db=>$assoc->sget_source_db);
my @evs = $assoc->get_evidence;
foreach my $ev (@evs) {
$self->tag(has_evidence=>$ev->sget_evcode, $ev->get_ref);
$self->tag(with=>$_) foreach $ev->get_with;
}
$self->print("\n");
}
$self->print("!! //\n\n");
}
sub dbxrefstr {
}
1;