/usr/local/CPAN/Bio-ConnectDots/Bio/ConnectDots/DotTable.pm
package Bio::ConnectDots::DotTable;
use vars qw(@ISA @AUTO_ATTRIBUTES @OTHER_ATTRIBUTES %SYNONYMS %DEFAULTS);
use strict;
#use lib "/users/ywang/temp";
use Bio::ConnectDots::Connector;
use Bio::ConnectDots::Dot;
use Bio::ConnectDots::DotQuery;
use Bio::ConnectDots::DotQuery::InnerCt;
use Bio::ConnectDots::DotQuery::InnerCs;
use Bio::ConnectDots::DotQuery::OuterCt;
use Bio::ConnectDots::DotQuery::OuterCs;
use Class::AutoClass;
use HTML::Entities;
@ISA = qw(Class::AutoClass); # AutoClass must be first!!
@AUTO_ATTRIBUTES=qw(db db_id connectdots name outputs alias2info preview preview_limit);
@OTHER_ATTRIBUTES=qw();
%SYNONYMS=();
%DEFAULTS=(query_type=>'inner',input_type=>'ConnectorTable');
Class::AutoClass::declare(__PACKAGE__);
sub _init_self {
my($self,$class,$args)=@_;
return unless $class eq __PACKAGE__; # to prevent subclasses from re-running this
$self->alias2info || $self->alias2info({});
my $cd=$self->connectdots;
$self->throw("Required parameter -name missing") unless $self->name;
$self->throw("Required parameter -connectdots missing") unless $cd;
my($drop,$create,$query)=$args->get_args(qw(drop create query));
Bio::ConnectDots::DB::DotTable->drop($self) if $drop || $create;
$self->preview($args->get_args('preview'));
$self->preview_limit(500);
my $saved=Bio::ConnectDots::DB::DotTable->get($self,$cd);
if ($saved) { # copy relevant attributes from db object to self
$self->throw("DotTable ".$self->name." already exists") if $query;
$self->db_id($saved->db_id);
$self->alias2info($saved->alias2info);
}
$self->query($query) if $query;
}
sub dotsets {
my($self)=@_;
my @dotsets;
foreach my $alias (keys %{$self->{alias2info}}) {
push @dotsets, $self->{alias2info}->{$alias}->{dotset};
}
wantarray? @dotsets: \@dotsets;
}
sub put {
my($self)=@_;
Bio::ConnectDots::DB::DotTable->put($self);
}
sub query {
my($self,$args)=@_;
if ($self->db_id) {
$self->throw("Connectortable ".$self->name." already exists. Use -create to overwrite")
unless $args->create;
Bio::ConnectDots::DB::DotTable->drop($self);
}
my $query_type=$args->query_type || $self->DEFAULTS_ARGS->query_type;
my $input_type=$args->input_type || $self->DEFAULTS_ARGS->input_type;
$self->throw("Unrecognized query type: $query_type") unless $query_type=~/inner|full|outer/i;
$self->throw("Unrecognized input type: $input_type") unless $input_type=~/table|set/i;
$args->set_args(-dottable=>$self);
# create correct query object for query_type and input_type
my $query;
if ($query_type=~/inner/i && $input_type=~/table/i) {
$query=new Bio::ConnectDots::DotQuery::InnerCt($args);
} elsif ($query_type=~/inner/i && $input_type=~/set/i) {
$query=new Bio::ConnectDots::DotQuery::InnerCs($args);
} elsif ($query_type=~/full|outer/i && $input_type=~/table/i) {
$query=new Bio::ConnectDots::DotQuery::OuterCt($args);
} elsif ($query_type=~/full|outer/i && $input_type=~/set/i) {
$query=new Bio::ConnectDots::DotQuery::OuterCs($args);
}
$query->execute;
$self->outputs($query->outputs);
$self->put;
# copy to outfile
my $outfile_name = $args->get_args('outfile');
$self->output_file($outfile_name) if $outfile_name;
# check for collapse
my $collapse = $args->get_args('collapse');
my $delimiter = $args->get_args('collapse_seperator');
$self->collapse($collapse,$delimiter) if $collapse;
# check for XML output
my $xml_file = $args->get_args('xml_file');
my $xml_root = $args->get_args('xml_root');
if($xml_file && $xml_root) { # collapse xml
$self->output_xml($xml_file,$xml_root);
} elsif ($xml_file) { # by row xml
$self->xml_rows($xml_file);
}
}
### outputs the table to a flat file
sub output_file {
my ($self, $filename) = @_;
my $tablename = $self->name;
$self->{db}->do_sql("COPY $tablename TO '$filename'");
}
### collapses all rows into one on the given identifier
sub collapse {
my ($self, $centric, $delimiter) = @_;
my $db = $self->{db};
my $dbh = $self->{db}->dbh();
my $name = $self->name;
$delimiter = ',' if !$delimiter;
my $centricIdx = -1;
my $outlists = []; # lists on column of identifiers
# get the column names for the table and find column number for centric
my @columns;
my $i=0;
foreach my $output (@{$self->outputs}) {
push @columns, $output->{output_name};
$centricIdx = $i if $output->{output_name} eq $centric;
$i++;
}
$self->throw ("Unknown column in collapse option: $centric") if $centricIdx == -1; # centric column not found.
my $tmp_name = '__'. $name .'_temp';
$db->do_sql("DROP TABLE $tmp_name") if $db->table_exist($tmp_name);
$db->do_sql("SELECT * INTO $tmp_name FROM $name LIMIT 1"); # create temp table with identical columns
$db->do_sql("TRUNCATE TABLE $tmp_name");
my $iterator = $dbh->prepare("SELECT * FROM $name ORDER BY $centric");
$iterator->execute();
my $currentID;
while (my @cols = $iterator->fetchrow_array()) {
next unless @cols;
if($cols[$centricIdx] ne $currentID && defined($currentID)) { # clear out lists and insert row
$self->_collapse_insert($outlists,$tmp_name,$centricIdx,$delimiter);
$outlists = [];
}
# add columns to lists
for(my $c=0; $c<@cols; $c++) { # push identifiers onto their columns
$outlists->[$c]->{$cols[$c]} = 1;
}
$currentID = $cols[$centricIdx];
}
$self->_collapse_insert($outlists,$tmp_name,$centricIdx,$delimiter); # insert last case
$db->do_sql("DROP TABLE $name");
$db->do_sql("ALTER TABLE $tmp_name RENAME TO $name");
}
# form lists into an insert statement and insert it into tmp_table
sub _collapse_insert {
my ($self,$outlists, $tmp_name,$centricIdx,$delimiter) = @_;
my $db = $self->{db};
return unless $outlists->[0];
my $sql = "INSERT INTO $tmp_name VALUES(";
my $i=0;
foreach my $val_list (@$outlists) {
if($i == $centricIdx) {
my ($id) = keys %{$val_list};
$sql .= "'". $id ."',";
}
else {
my $addstr;
foreach my $val (keys %{$val_list}) {
$addstr .= $val . $delimiter unless $val eq '';
}
if($addstr) { # cleanup extra delimiter
$addstr = substr($addstr,0,length($addstr)-length($delimiter));
$sql .= "'". $addstr ."',";
}
else {
$sql .= "'". "',";
}
}
$i++;
}
chop($sql); # remove extra comma
$sql .= ")";
$db->do_sql($sql);
}
sub output_xml {
my ($self, $xml_file, $xml_root) = @_;
$self->throw("You must define -xml_file to output data to XML.") if !$xml_file;
$self->throw("You must define -xml_root to output data to XML.") if !$xml_root;
open(OUT, ">$xml_file") or $self->throw("Can not open output xml_file: $xml_file");
my $db = $self->{db};
my $dbh = $self->{db}->dbh();
my $name = $self->name;
my $rootIdx = -1;
# get the column names for the table and find column number for centric
my @columns;
my @internal_tags;
my $i=0;
foreach my $output (@{$self->outputs}) {
push @columns, $output->{output_name};
if ($output->{output_name} eq $xml_root) {
$rootIdx = $i;
} else {
push @internal_tags, $output->{output_name}
}
$i++;
}
$self->throw ("Unknown column as XML output root: $xml_root") if $rootIdx == -1; # root column not found.
# create the DTD
my $DTD = "<!DOCTYPE DotTable [";
$DTD .= "<!ELEMENT DotTable ($xml_root*)>";
$DTD .= "<!ATTLIST DotTable name CDATA #REQUIRED>";
$DTD .= "<!ELEMENT $xml_root (". join('*,',@internal_tags) ."*)>";
$DTD .= "<!ATTLIST $xml_root id CDATA #REQUIRED>";
foreach my $tagname (@internal_tags) {
$DTD .= "<!ELEMENT $tagname (#PCDATA)>";
}
$DTD .= "]>\n";
print OUT $DTD;
print OUT "<DotTable name='$name'>\n";
# iterate over the ids and output XML
my $sql = "SELECT $xml_root,". join(',',@internal_tags) ." FROM $name ORDER BY $xml_root";
my $iterator = $dbh->prepare($sql);
$iterator->execute();
my $currentID;
my $outcols;
while (my @cols = $iterator->fetchrow_array()) {
if($cols[0] ne $currentID && defined($currentID)) { # close out tags and start new tag
my $entry = _create_xml_entry($outcols,\@internal_tags,$xml_root,$currentID);
print OUT $entry;
$outcols = [];
}
# save data by column for this id
for(my $i=1; $i<@cols; $i++) {
$outcols->[$i-1]->{$cols[$i]} = 1;
}
$currentID = $cols[0];
}
my $entry = _create_xml_entry($outcols,\@internal_tags,$xml_root,$currentID);
print OUT $entry;
print OUT "</DotTable>";
close(OUT);
}
# returns an xml entry based off the structure of the passed in ...
sub _create_xml_entry {
my ($outcols, $internal_tags, $xml_root, $keyid) = @_;
return unless $outcols && defined($keyid);
$keyid = _encode($keyid);
my $entry;
$entry = "<$xml_root id='$keyid'>";
for(my $c=0; $c<@$outcols; $c++) {
my $hash = $outcols->[$c];
my $tag = $internal_tags->[$c];
foreach my $data (keys %$hash) {
$data = _encode($data);
$entry .= "<$tag>$data</$tag>" if $data;
}
}
$entry .= "</$xml_root>\n";
return $entry;
}
# exports xml by row
sub xml_rows {
my ($self, $xml_file) = @_;
$self->throw("You must define -xmlrows_file to output data to XML.") if !$xml_file;
open(OUT, ">$xml_file") or $self->throw("Can not open output xml_file: $xml_file");
my $db = $self->{db};
my $dbh = $self->{db}->dbh();
my $name = $self->name;
# get the column names for the table and find column number for centric
my @columns;
foreach my $output (@{$self->outputs}) {
push @columns, $output->{output_name};
}
# create the DTD
my $DTD = "<!DOCTYPE DotTable [";
$DTD .= "<!ELEMENT DotTable (row*)>";
$DTD .= "<!ATTLIST DotTable name CDATA #REQUIRED>";
$DTD .= "<!ELEMENT row (". join('*,',@columns) ."*)>";
$DTD .= "<!ATTLIST row line CDATA #REQUIRED>";
foreach my $tagname (@columns) {
$DTD .= "<!ELEMENT $tagname (#PCDATA)>";
}
$DTD .= "]>\n";
print OUT $DTD;
print OUT "<DotTable name='$name'>\n";
my $sql = "SELECT ". join(',',@columns) ." FROM $name";
my $iterator = $dbh->prepare($sql);
$iterator->execute();
my $linenum=1;
while (my @cols = $iterator->fetchrow_array()) {
my $entry = "<row line='$linenum'>";
for (my $i=0; $i<@cols; $i++) {
my $tag = $columns[$i];
my $data = _encode($cols[$i]);
$entry .= "<$tag>$data</$tag>" if defined($cols[$i]);
}
$entry .= "</row>\n";
print OUT $entry;
$linenum++;
}
print OUT "</DotTable>\n";
close(OUT);
}
sub _encode() {
my ($string) = @_;
$string = encode_entities($string);
$string =~ s/\'/'/g;
return $string;
}
1;