/usr/local/CPAN/XML-RDB/XML/RDB/MakeTables.pm
#####
#
# $Id: MakeTables.pm,v 1.2 2003/04/19 04:17:48 trostler Exp $
#
# COPYRIGHT AND LICENSE
# Copyright (c) 2001, 2003, Juniper Networks, Inc.
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 1. Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer.
# 2. Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# 3. The name of the copyright owner may not be used to
# endorse or promote products derived from this software without specific
# prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
#####
package XML::RDB::MakeTables;
use vars qw($VERSION);
$VERSION = '1.2';
#####
#
# This script will take an XML document & build a set of RDB tables
# that can store it & output the table defs to STDOUT
# See 'pop_tables.pl' for a script that will then take the XML & populate
# these tables with actual values
# See 'unpop_tables.pl' for a script that will read the DB & convert back
# to XML
#
####
# We use DOM to parse the entire XML doc into memory
# use XML::DOM; XML::RDB loads it
use strict;
# For generic schema def
use DBIx::DBSchema;
sub new {
# my ($class, $rdb, $xmlfile, $outfile) = @_;
my ($class, $rdb, $doc, $head, $outfile) = @_;
# set up FH
my $fh = new IO::File;
if ($outfile) {
$fh->open("> $outfile") || die "$!";
} else {
$fh->fdopen(fileno(STDOUT), 'w') || die "$!";
}
# my $doc = new XML::DOM::Parser->parsefile($xmlfile) || die "$!";
# my $head = $doc->getDocumentElement;
my $self = bless {
rdb => $rdb,
doc => $doc,
head => $head,
one_to_n => $rdb->find_one_to_n_relationships($head),
tables => {}, # where our tree is built
headers => $rdb->{_HEADERS} || 0,
selects => $rdb->{_SELECTS} || 0,
outfile => $outfile,
fh => $fh,
}, $class;
$self;
}
sub go {
my($self) = @_;
$self->_headers if ($self->{headers});
# Create the table defs in memory
$self->_make_tables($self->{head});
$self->_add_in_1_to_n_cols;
# Create DB-generic sequence tables for DBIx::Sequence
# XML root and primary key, used in unpopulating this table set.
$self->_make_work_tables;
# Dump them
$self->_dump_dbschema_tables;
# Select statemetents for viewing data
$self->_dump_select_statements if ($self->{selects});
$self->{fh}->close;
}
sub _headers {
my $self = shift;
my @t = localtime();
my $datetime = sprintf("%4d-%02d-%02d %02d:%02d:%02d",
$t[5] +1900,$t[4] +1,$t[3],$t[2],$t[1],$t[0]);
$self->_p( <<"HEADER"
-- DSN : $self->{rdb}->{DSN}
--
-- XML::RDB SQL Generation
-- XML file : $self->{rdb}->{_XMLFILE}
-- SQL file : $self->{outfile}
-- date : $datetime
--
-- TABLE_PREFIX : $self->{rdb}->{TABLE_PREFIX}
-- PK_NAME : $self->{rdb}->{PK_NAME}
-- FK_NAME : $self->{rdb}->{FK_NAME}
-- TEXT_WIDTH : $self->{rdb}->{TEXT_WIDTH}
------- ONE to MANY ------
------------------------------
HEADER
);
# This will print out those relationships (preceded by a '--'!)
# so we can check 'em out
$self->_p($self->{rdb}->dump_otn(\%{$self->{one_to_n}}, '--'));
$self->_p( "\n" .'-- Gerated Tables'. "\n"
.'---------------------------------'. "\n");
return $self;
}
sub _dump_select_statements {
my $self = shift;
my $buff;
my @one2one;
my $PK = $self->{rdb}->{PK_NAME};
my $FK = $self->{rdb}->{FK_NAME};
my $meta = \%{$self->{tables}{__meta__}};
$self->_p( "\n" .'-- Flattened views of related tables'. "\n"
.'------------------------------------'. "\n")
if ($self->{headers});
foreach my $one (map { $self->{rdb}->mtn($_) }
(sort(keys(%{$self->{one_to_n}})))) {
# select columns
$buff = "-- SELECT \n";
my @ns = (map {$self->{rdb}->mtn($_) }
(sort(keys(%{$self->{one_to_n}{$meta->{$one}}}))));
foreach my $t ($one, @ns) {
foreach my $f (sort(keys(%{$self->{tables}{$t}{cols}}))) {
#[ table.field, lookup_table ] into ref_array
if ( $f =~ /^(\w+)_$PK$/ ) {
push @one2one, ["$t.$f", $1];
}
# $one.field & $n's.field from column list
elsif ( $f !~ /^($PK|\w+_$FK)$/ ) {
$buff .= "-- $t.$f ,\n";
}
}
}
# lookup_table.field column list
foreach my $t (@one2one) {
map { $buff .= '-- '. $t->[1] .".$_ ,\n" }
sort(grep (!/^($PK|\w+_$FK|\w+_$PK)$/,
(keys(%{$self->{tables}{$t->[1]}{cols}}))));
}
$buff =~ s/,$//;
# from
my $tables = "-- FROM \n-- ". ('(' x (scalar(@ns) + scalar(@one2one))) ."$one \n";
my $space = (' ' x (scalar(@ns) + scalar(@one2one)));
# one -> many are inner joins
foreach my $t (@ns) {
my $inner = '-- '. $space ."INNER JOIN $t ON ";
map { $tables .= "$inner $one.$PK = $t.$_ ) \n" }
sort(grep (/^${one}_$FK$/,
(keys(%{$self->{tables}{$t}{cols}}))));
}
# lookup tables are left joins
foreach my $t (@one2one) {
my $left = '-- '. $space ." LEFT JOIN $t->[1] ON ";
$tables .= "$left ". $t->[0] .' = '. $t->[1] .'.'. $PK ." ) \n";
}
# rap it, print it
@one2one = ();
$tables .= "-- LIMIT 500;\n\n";
$self->_p($buff,$tables);
}
return $self;
}
sub _make_tables {
my($self, $head) = @_;
my(@stack, $sub_table, $text_field);
my $rdb = $self->{rdb};
TOP_TBLS:
my $nodes = [ $head->getChildNodes ];
($sub_table, $text_field) = ();
TOPLESS_TBLS:
# Blow thru each child node of this node
while ( scalar(@{$nodes}) ) {
my $sub_node = shift(@{$nodes});
# Skip these
next if ($sub_node->getNodeType == XML::DOM::TEXT_NODE);
next if ($sub_node->getNodeType == XML::DOM::COMMENT_NODE);
# if (sub node doesn't have attributes) & (it only has 1 child node
# that's a next node || it has no sub nodes)
# NOTE: This is the EXACT SAME 'if' statement as in 'PopulateTables.pm'
# THEY MUST MATCH or carnage will ensue.
if (($sub_node->getAttributes && !$sub_node->getAttributes->getLength) &&
(!$sub_node->getChildNodes
|| ($#{$sub_node->getChildNodes} == 1
&& $sub_node->getChildNodes->[0]->getNodeType == XML::DOM::TEXT_NODE))) {
# Plain text - just a regular column in table
push @{$text_field}, $sub_node->getNodeName;
}
else {
# Figure out what kind of relationship this element has to
# this sub-table - either 1:1 or 1:N
if (!$self->{one_to_n}->{$head->getNodeName}{$sub_node->getNodeName}) {
# Foreign key references in this table (1:1 relationship)
push @{$sub_table}, $sub_node->getNodeName;
}
# The FKs in 1:N relationship tables will get dumped at the end
# We'll need to make tables for these guys
push(@stack, [$head, $nodes, $sub_table, $text_field]);
$head = $sub_node;
goto TOP_TBLS;
}
}
# NOTE : Replaced the recursive sub loop with goto's & lifo @stack,
# then combined make_tables & make_table. For about 500 xml nodes
# we where penilized for about 2000 calls on each routine.
# Now 1 call is made, replacing 4000 calls.
# Makes it a little harder to read, but not much.
# -------------------------------------------------------------
# We've got all the info we need - fill out our data structure
##
# This function fills out or data structure that describes a table
##
# sub _make_table
# Takes a pro-spective table name
# array refs of sub tables & text field names
# and an XML::DOM::NamedNodeMap of XML attributes
# my($self, $o_table_name, $sub_table, $leaf, $attr_ref) = @_;
# DB-ize table name
my $table_name = $rdb->mtn($head->getNodeName);
# Keep original element name
$self->{tables}->{__meta__}{$table_name} = $head->getNodeName;
# Do foreign keys first - they're integers
foreach (@$sub_table) {
$self->{tables}->{$table_name}{cols}{$rdb->mtn($_)."_".$rdb->{PK_NAME}}{type} = "integer";
}
# Now do 'leaf/real' fields - text columns
foreach (@$text_field) {
next if ($self->{one_to_n}->{$table_name}{$_});
# Create column name & stash original name for this field
my $field = XML::RDB::normalize($_);
my $col_name = "${table_name}_${field}_value";
$self->{tables}->{__meta__}{$col_name} = $_;
$self->{tables}->{$table_name}{cols}{$col_name}{type} = $rdb->{TEXT_COLUMN};
}
# Now do attributes - these are just text columns
my $attr_ref = $head->getAttributes;
if ($attr_ref) {
for(my $i = 0 ; $i < $attr_ref->getLength ; $i++) {
my $attr = $attr_ref->item($i);
my $name = XML::RDB::normalize($attr->getName);
$_ = "${table_name}_${name}_attribute";
# Stash original name for this column
$self->{tables}->{__meta__}{$_} = $attr->getName;
$self->{tables}->{$table_name}{cols}{$_}{type} = $rdb->{TEXT_COLUMN};
}
}
# Need this for stuff like <element>Howdy!</element>
# unfortunately this also picks up <element/> so
# it adds some extra cruft but those cols just won't get
# populated...
if (!@$text_field && !@$sub_table) {
$self->{tables}->{$table_name}{cols}{"${table_name}_value"}{type} = $rdb->{TEXT_COLUMN};
}
if (scalar(@stack) > 0) {
($head, $nodes, $sub_table, $text_field) = @{pop(@stack)};
goto TOPLESS_TBLS;
}
}
# generic-ized dump of tables
# It's up to DBIx::DBSchema to provide us w/the generic
# table defs - currently MySQL & PostgreSQL are supported
# for sure w/very generic SQL for everything else
# So 'hopefully' it'll 'just work'! (yeah right)
sub _dump_dbschema_tables {
my($self) = @_;
my %tables = %{$self->{tables}};
my $schema = new DBIx::DBSchema;
# Generic PK column
my $pk_id = new DBIx::DBSchema::Column({
name => $self->{rdb}->{PK_NAME},
type => 'integer', # Use DBIx::Sequence to handle PKs
null => 'NOT NULL'
});
foreach my $table_name (keys %tables) {
# Skip the meta-info stuff
next if ($table_name eq '__meta__');
# The table
my (@columns,$table);
foreach my $col (keys %{$tables{$table_name}{cols}}) {
push @columns, new DBIx::DBSchema::Column({
name => $col,
type => $tables{$table_name}{cols}{$col}{type},
null => !$tables{$table_name}{cols}{$col}{not_null}
});
}
if ((exists($tables{$table_name}{no_id})) and ($tables{$table_name}{no_id} == 1)) {
$table = new DBIx::DBSchema::Table({ name => $table_name,
columns => \@columns });
}
else {
push @columns, $pk_id;
$table = new DBIx::DBSchema::Table({ name => $table_name,
primary_key => $self->{rdb}->{PK_NAME},
columns => \@columns });
}
$schema->addtable($table) if ($table);
}
#
# Now create table with table names & attributes mapped to
# their 'real' names
my %values;
foreach my $thing (sort keys %{$tables{__meta__}}) {
next if ($thing eq 'cols');
$values{$thing} = $tables{__meta__}{$thing};
}
# local($") = ", ";
# Dump out tables real purty like
local($") = '';
my @sql = map { chomp ; $_.=";\n\n" } $schema->sql($self->{rdb}->{DBH});
$self->_p("@sql\n");
# Dump real element names mappings
$self->_p( "\n" .'-- Real XML element names mapping'. "\n"
.'---------------------------------'. "\n")
if ($self->{headers});
local($") = ",";
map { $self->_p("INSERT INTO ", $self->{rdb}->{REAL_ELEMENT_NAME_TABLE}," VALUES ('$_','$values{$_}');\n"); } keys %values;
# Dump 1:N table relationship names
$self->_p("\n" .'-- 1:N table relationship names'. "\n"
.'-------------------------------'. "\n")
if ($self->{headers});
foreach my $one (keys %{$self->{one_to_n}}) {
foreach my $many (keys %{$self->{one_to_n}->{$one}}) {
my $table_name = "'" . $self->{rdb}->mtn($one) . "'";
my $link = "'" . XML::RDB::normalize($many) . "'";
$self->_p("INSERT INTO ", $self->{rdb}->{LINK_TABLE_NAMES_TABLE},
" VALUES ($table_name,$link);\n");
}
}
}
sub _make_work_tables
{
my ($self) = @_;
# DBIx::Sequence needs these tables
# Used for DB-generic sequence values
#
# dbix_sequence_state:
# | dataset | varchar(50) |
# | state_id | int(11) |
#
# dbix_sequence_release:
# | dataset | varchar(50) |
# | released_id | int(11) |
# my $t1 = "dbix_sequence_state";
# my $t2 = "dbix_sequence_release";
#
# $self->{tables}->{$t1}{cols}{'dataset'}{type} = $self->{rdb}->{TEXT_COLUMN};
# $self->{tables}->{$t1}{cols}{'state_id'}{type} = "integer";
# $self->{tables}->{$t1}{no_id} = 1;
#
# $self->{tables}->{$t2}{cols}{'dataset'}{type} = $self->{rdb}->{TEXT_COLUMN};
# $self->{tables}->{$t2}{cols}{'released_id'}{type} = "integer";
# $self->{tables}->{$t1}{no_id} = 1;
# Table added to hold primary key and root table for dumping the XML
$self->{tables}->{$self->{rdb}->{ROOT_TABLE_N_PK_TABLE}}{cols}{'root'}{type} = $self->{rdb}->{TEXT_COLUMN};
$self->{tables}->{$self->{rdb}->{ROOT_TABLE_N_PK_TABLE}}{cols}{'pk'}{type} = "integer";
$self->{tables}->{$self->{rdb}->{ROOT_TABLE_N_PK_TABLE}}{cols}{'root'}{not_null} = 1;
$self->{tables}->{$self->{rdb}->{ROOT_TABLE_N_PK_TABLE}}{cols}{'pk'}{not_null} = 1;
$self->{tables}->{$self->{rdb}->{ROOT_TABLE_N_PK_TABLE}}{no_id} = 1;
$self->{tables}->{$self->{rdb}->{REAL_ELEMENT_NAME_TABLE}}{cols}{'db_name'}{type} = $self->{rdb}->{TEXT_COLUMN};
$self->{tables}->{$self->{rdb}->{REAL_ELEMENT_NAME_TABLE}}{cols}{'xml_name'}{type} = $self->{rdb}->{TEXT_COLUMN};
$self->{tables}->{$self->{rdb}->{REAL_ELEMENT_NAME_TABLE}}{cols}{'db_name'}{not_null} = 1;
$self->{tables}->{$self->{rdb}->{REAL_ELEMENT_NAME_TABLE}}{cols}{'xml_name'}{not_null} = 1;
$self->{tables}->{$self->{rdb}->{REAL_ELEMENT_NAME_TABLE}}{no_id} = 1;
$self->{tables}->{$self->{rdb}->{LINK_TABLE_NAMES_TABLE}}{cols}{'one_table'}{type} = $self->{rdb}->{TEXT_COLUMN};
$self->{tables}->{$self->{rdb}->{LINK_TABLE_NAMES_TABLE}}{cols}{'many_table'}{type} = $self->{rdb}->{TEXT_COLUMN};
$self->{tables}->{$self->{rdb}->{LINK_TABLE_NAMES_TABLE}}{cols}{'one_table'}{not_null} = 1;
$self->{tables}->{$self->{rdb}->{LINK_TABLE_NAMES_TABLE}}{cols}{'many_table'}{not_null} = 1;
$self->{tables}->{$self->{rdb}->{LINK_TABLE_NAMES_TABLE}}{no_id} = 1;
return $self;
}
#
# So the game here is to add in the PK of one column
# as an FK in the many column
#
sub _add_in_1_to_n_cols {
my ($self) = @_;
foreach my $one (keys %{$self->{one_to_n}}) {
foreach my $many (keys %{$self->{one_to_n}->{$one}}) {
my $many_table = $self->{rdb}->mtn($many);
my $col_to_add = $self->{rdb}->mtn($one . "_" . $self->{rdb}->{FK_NAME});
$self->{tables}->{$many_table}{cols}{$col_to_add}{type} = 'integer';
# MySQL allows this,
# SQLite3 does not.
# Postgresql does not.
# and I do not, well maybe add a constraint after load.
# $self->{tables}->{$many_table}{cols}{$col_to_add}{not_null} = 1;
}
}
}
sub _p {
my($self) = shift;
local($") = "";
my $fh = $self->{fh};
print $fh "@_";
}
1;