/usr/local/CPAN/DBD-Amazon/SQL/Amazon/Parser.pm


#
#   Copyright (c) 2005, Presicient Corp., USA
#
# Permission is granted to use this software according to the terms of the
# Artistic License, as specified in the Perl README file,
# with the exception that commercial redistribution, either 
# electronic or via physical media, as either a standalone package, 
# or incorporated into a third party product, requires prior 
# written approval of the author.
#
# This software is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Presicient Corp. reserves the right to provide support for this software
# to individual sites under a separate (possibly fee-based)
# agreement.
#
#	History:
#
#		2005-Jan-27		D. Arnold
#			Coded.
#
package SQL::Amazon::Parser;

use Exporter;
use SQL::Parser;
use Data::Dumper 'Dumper';
use Clone qw(clone);
use DBI;

BEGIN {

our @ISA = qw(Exporter SQL::Parser);

use constant SQL_TREE_OP => 0;
use constant SQL_TREE_ARG1 => 1;
use constant SQL_TREE_ARG2 => 2;
use constant SQL_TREE_NEG => 3;
use constant SQL_TREE_TABLES => 4;

use constant SQL_TREE_TYPE => 0;
use constant SQL_TREE_VALUE => 1;

use constant SQL_PRED_CONJOIN => 0;
use constant SQL_PRED_TABLES => 1;

our @EXPORT    = ();		   
our @EXPORT_OK = ();

our %EXPORT_TAGS = (
	pred_node_codes => [
	qw/SQL_TREE_OP SQL_TREE_ARG1 SQL_TREE_ARG2 SQL_TREE_NEG/
	]
);

Exporter::export_tags(keys %EXPORT_TAGS);

};

use SQL::Amazon::StorageEngine;
use SQL::Amazon::ReqFactory;

use strict;

our $VERSION = '0.10';
my %neg_ops = (
'<', '>=',
'>', '<=',
'=', '<>',
'<>', '=',
'<=', '>',
'>=', '<'
);
my %transpose_ops = (
'<', '>',
'>', '<',
'=', '=',
'<>', '<>',
'<=', '>=',
'>=', '<='
);

sub new {
	my ($class, $flags) = @_;
	
	my $obj = $class->SUPER::new('Amazon', $flags);
	return undef unless $obj;
	$obj->LOAD('LOAD SQL::Amazon::Functions');
	return $obj;
}
sub get_in {
	my ($obj, $str) = @_;

    my $in_inside_parens = 0;

	my $strpos = 0;
	my $replpos = 0;
    while ($str =~ /\G(.+?)\b(NOT\s+)?IN \((.+)$/igcs ) {
        my ($col, $contents);
        my $front = $1;
        my $back  = $3;
        my $not = $2 ? 1 : 0;
        $strpos = $-[3];
        $replpos = $-[1];
		my $pos = ($front=~/^.+\b(AND|NOT|OR)\b(.+)$/igcs) ? $-[2] : 0;
		pos($front) = $pos; 
		$in_inside_parens += ($1 eq '(') ? 1 : -1
			while ($front=~/\G.*?([\(\)])/gcs);

		$obj->{struct}{errstr} = "Unmatched right parentheses during IN processing!",
		return undef
			if ($in_inside_parens < 0);
		pos($front) = $pos;
		$in_inside_parens--,
		$pos = $+[0]
			while ($in_inside_parens && ($front=~/\G.*?\(/gcs));
		$col = substr($front, $pos);
		$replpos += $pos;
		my $funcstr = ($not ? ' AMZN_NOT_IN_ANY (' : ' AMZN_IN_ANY (') . 
			$col . ', ';

		substr($str, $replpos, $strpos - $replpos) = $funcstr;
		pos($str) = $replpos + length($funcstr);
    }

	$str =~ s/^\s+//;
	$str =~ s/\s+$//;
	$str =~ s/\(\s+/(/;
	$str =~ s/\s+\)/)/;

	return $str;
}
sub transform_syntax {
	my ($obj, $str) = @_;

	my $repl;
   	while ($str =~/\bMATCHES(\s+(ANY|ALL|TEXT))?\s*\(/i ) {
		$repl = $2 ? 'AMZN_MATCH_' . uc $2 . '(' : 'AMZN_MATCH_ANY(';
		$str=~s/\bMATCHES(\s+(ANY|ALL|TEXT))?\s*\(/$repl/i;
	}
	$str=~s/\bPOWER_SEARCH(\s*\()/AMZN_POWER_SEARCH$1/g;
    return $str;
}
sub arrayify {
	my $tree = shift;

	return (defined($tree) && ($tree ne '')) ?
		((ref $tree ne 'HASH') || (! $tree->{op})) ?
		clone($tree) :
		[ $tree->{op},
			arrayify($tree->{arg1}),
			arrayify($tree->{arg2}),
			$tree->{neg} ] :
		undef;
}

sub hashify {
	my $tree = shift;
	
	return (ref $tree eq 'ARRAY') ?
		{
			op => $tree->[SQL_TREE_OP],
			arg1 => $tree->[SQL_TREE_ARG1],
			arg2 => $tree->[SQL_TREE_ARG2],
			neg => $tree->[SQL_TREE_NEG],
		} : $tree;
}

sub decomment {
	my $sql = shift;
	my $out = '';
	my $spos = 0;
	while ($sql=~/\G.*?(['"]|\/\*|--)/gcs) {
		if ($1 eq "'") {
			return ''
				unless ($sql=~/\G.*?'/gcs);
		}
		elsif ($1 eq '"') {
			return ''
				unless ($sql=~/\G.*?"/gcs);
		}
		elsif ($1 eq '/*') {
			$out .= substr($sql, $spos, $-[1] - $spos) . ' ';
			return ''
				unless ($sql=~/\G.*?\*\//gcs);
			$spos = pos($sql);
		}
		elsif ($1 eq '--') {
			$out .= substr($sql, $spos, $-[1] - $spos);
			return $out
				unless ($sql=~/\G.*?([\r\n])/gcs);
			$spos = pos($sql) - 1;
		}
	}
	$out .= substr($sql, $spos);
	return $out;
}

sub parse {
	my ($obj, $sql) = @_;

	DBI->trace_msg("[SQL::Amazon::Parser::parse] Parsing query\n$sql", 3)
		if $ENV{DBD_AMZN_DEBUG};
	$sql = decomment($sql);
	return undef
		unless $obj->SUPER::parse($sql);
	my $predary = $obj->{struct}{where_clause} ?
		dnf_flatten(
			dnf_recurse(
				dnf_negate(
					arrayify($obj->{struct}{where_clause}))), []) :
		[ ];
	$obj->{struct}{amzn_predicate} = $predary,
	$obj->{struct}{amzn_requests} = [],
	return $obj
		if ($obj->{struct}{table_names} &&
			($#{$obj->{struct}{table_names}} == 0) &&
			(uc $obj->{struct}{table_names}[0] eq 'SYSSCHEMA'));

	my $cachecnt = 0;
	$cachecnt += (/^CACHED/i) ? 1 : 0
		foreach (@{$obj->{struct}{table_names}});
	$obj->{struct}{amzn_predicate} = $predary,
	$obj->{struct}{amzn_requests} = [],
	return $obj
		if ($cachecnt == scalar @{$obj->{struct}{table_names}});
	$cachecnt = 0;
	my @amznreqs = ();
	my @finalpreds = ();
	my $reqobj;
	my $single_table = $obj->{struct}{table_names}[0];
	foreach my $pred (@$predary) {
		my ($table, $reqclass);
		my $requests = [];
		$pred->[SQL_PRED_TABLES] = { $single_table => 1}
			unless ($pred->[SQL_PRED_TABLES] &&
				keys %{$pred->[SQL_PRED_TABLES]});

		my $cached = 1;

		foreach (keys %{$pred->[SQL_PRED_TABLES]}) {
			($table, $reqclass) = 
				SQL::Amazon::StorageEngine::has_table($_);
			$obj->{struct}{errstr} = "Unknown table $_.",
			return undef
				unless $table;

			next if /^CACHED/i;

			$cached = undef;
			next unless $reqclass;
			push @$requests, [ $reqclass, $table ]
				unless ($table=~/^CACHED/i);
		}

		if ($cached) {
			$cachecnt++;
			push @finalpreds, $pred
				if $pred;
			next;
		}

		$pred->[SQL_PRED_TABLES] = 
			SQL::Amazon::ReqFactory->cleanup_requests($requests);
		$obj->{struct}{errstr} = SQL::Amazon::ReqFactory->errstr,
		return undef
			unless (scalar @{$pred->[SQL_PRED_TABLES]});
		$obj->{struct}{errstr} = 
			'Invalid predicate: insufficient qualifiers to issue service request.',
		return undef
			unless scalar @$requests;
		foreach (@$requests) {
			($pred, $reqobj) =
				SQL::Amazon::ReqFactory->create_request(
					$_->[0], $_->[1], $pred, $obj);
			$obj->{struct}{errstr} = SQL::Amazon::ReqFactory->errstr,
			return undef
				unless (defined($pred) || defined($reqobj));

			push @finalpreds, $pred
				if $pred;
			push @amznreqs, $reqobj
				if $reqobj;
		}
	}

	$obj->{struct}{errstr} = 
		'Invalid predicate: insufficient qualifiers to issue service request.',
	return undef
		unless (scalar @amznreqs || 
			($cachecnt == scalar @$predary));
	$obj->{struct}{amzn_predicate} = \@finalpreds;
	$obj->{struct}{amzn_requests} = \@amznreqs;
	return $obj;
}

sub negate_node {
	my $node = shift;
	$node->[SQL_TREE_OP] = $neg_ops{$node->[SQL_TREE_OP]},
	delete $node->[SQL_TREE_NEG],
	return $node
		if $neg_ops{$node->[SQL_TREE_OP]};
	$node->[SQL_TREE_NEG] = (! $node->[SQL_TREE_NEG]);
	return $node;
}
sub dnf_negate {
	my $node = shift;
	
	if ($node->[SQL_TREE_NEG]) {
		if (($node->[SQL_TREE_OP] eq 'AND') || 
			($node->[SQL_TREE_OP] eq 'OR')) {
			$node->[SQL_TREE_OP] = ($node->[SQL_TREE_OP] eq 'AND') ? 'OR' : 'AND';
			negate_node($node->[SQL_TREE_ARG1]);
			negate_node($node->[SQL_TREE_ARG2]);
		}
		else {
			negate_node($node);
		}
	}
	dnf_negate($node->[SQL_TREE_ARG1]),
	dnf_negate($node->[SQL_TREE_ARG2])
		if (($node->[SQL_TREE_OP] eq 'AND') || 
			($node->[SQL_TREE_OP] eq 'OR'));
	$node;
}
sub dnf_find_tables {
	my ($node, $tables) = @_;

	return undef 
		unless ((ref $node eq 'HASH') &&
			($node->{type} ne 'null'));

	if ($node->{type} eq 'column') {
		$tables->{uc $1} = 1
			if ($node->{value}=~/^([A-Z]\w*)\..+$/i);
		return $tables;
	}
	elsif ($node->{value} eq 'multiple values') {
	}
	return undef;
}
sub dnf_recurse {
	my ($node, $optimize) = shift;
	return $node
		if ($optimize && 
			($node->[SQL_TREE_ARG1][SQL_TREE_OP] ne 'OR') && 
			($node->[SQL_TREE_ARG2][SQL_TREE_OP] ne 'OR'));
	if (($node->[SQL_TREE_OP] ne 'OR') && 
		($node->[SQL_TREE_OP] ne 'AND')) {
		return $node
			if $node->[SQL_TREE_TABLES];

		my $tables = {};
		if (dnf_find_tables($node->[SQL_TREE_ARG1], $tables)) {
			$node->[SQL_TREE_TABLES] = $tables
				unless ($node->[SQL_TREE_ARG2] &&
					dnf_find_tables($node->[SQL_TREE_ARG2], $tables));
			return $node;
		}

		$node->[SQL_TREE_TABLES] = $tables
			if ($node->[SQL_TREE_ARG2] &&
				dnf_find_tables($node->[SQL_TREE_ARG2], $tables));
		return $node;
	}
	dnf_recurse($node->[SQL_TREE_ARG1], $optimize);
	dnf_recurse($node->[SQL_TREE_ARG2], $optimize);
	my ($temp, $newnode);
	if ($node->[SQL_TREE_OP] eq 'AND') {

		if ($node->[SQL_TREE_ARG1][SQL_TREE_OP] eq 'OR') {
			$temp = $node->[SQL_TREE_ARG1][SQL_TREE_ARG2];
			$node->[SQL_TREE_ARG1][SQL_TREE_ARG2] = clone($node->[SQL_TREE_ARG2]);
			$node->[SQL_TREE_ARG1][SQL_TREE_OP] = 'AND';
			$newnode = [ 'AND', $temp, $node->[SQL_TREE_ARG2] ];
			$node->[SQL_TREE_OP] = 'OR';
			$node->[SQL_TREE_ARG2] = $newnode;
			dnf_recurse($node->[SQL_TREE_ARG1], 1);
			dnf_recurse($node->[SQL_TREE_ARG2], 1);
		}
		elsif ($node->[SQL_TREE_ARG2][SQL_TREE_OP] eq 'OR') {
			$temp = $node->[SQL_TREE_ARG2][SQL_TREE_ARG2];
			$node->[SQL_TREE_ARG2][SQL_TREE_ARG2] = clone($node->[SQL_TREE_ARG1]);
			$node->[SQL_TREE_ARG2][SQL_TREE_OP] = 'AND';
			$newnode = [ 'AND', $node->[SQL_TREE_ARG1], $temp ];
			$node->[SQL_TREE_OP] = 'OR';
			$node->[SQL_TREE_ARG1] = $newnode;
			dnf_recurse($node->[SQL_TREE_ARG1], 1);
			dnf_recurse($node->[SQL_TREE_ARG2], 1);
		}
	}
	return $node;
}
sub dnf_flatten {
	my ($tree, $dnfary) = @_;
	dnf_flatten($tree->[SQL_TREE_ARG1], $dnfary),
	dnf_flatten($tree->[SQL_TREE_ARG2], $dnfary),
	$tree->[SQL_TREE_ARG1] = undef,
	$tree->[SQL_TREE_ARG2] = undef,
	return $dnfary
		if ($tree->[SQL_TREE_OP] eq 'OR');
	my $conjoins = [];
	my $tables = {};
	dnf_flatten_ANDs($tree, $conjoins, $tables);
	push(@$dnfary, [ $conjoins, $tables ]);
	return $dnfary;
}
sub dnf_flatten_ANDs {
	my ($tree, $conjoins, $tables) = @_;
	dnf_flatten_ANDs($tree->[SQL_TREE_ARG1], $conjoins, $tables),
	dnf_flatten_ANDs($tree->[SQL_TREE_ARG2], $conjoins, $tables),
	$tree->[SQL_TREE_ARG1] = undef,
	$tree->[SQL_TREE_ARG2] = undef,
	return $conjoins
		if ($tree->[SQL_TREE_OP] eq 'AND');
	my $t;
	$t = $tree->[SQL_TREE_ARG1],
	$tree->[SQL_TREE_ARG1] = $tree->[SQL_TREE_ARG2],
	$tree->[SQL_TREE_ARG2] = $t,
	$tree->[SQL_TREE_OP] = $transpose_ops{$tree->[SQL_TREE_OP]}
		if ($transpose_ops{$tree->[SQL_TREE_OP]} &&
			((ref $tree->[SQL_TREE_ARG1] ne 'HASH') || 
				($tree->[SQL_TREE_ARG1]{type} ne 'column')) &&
			(ref $tree->[SQL_TREE_ARG2] eq 'HASH') &&
			($tree->[SQL_TREE_ARG2]{type} eq 'column'));

	$tables->{$_} = 1,
	$tree->[SQL_TREE_TABLES] = undef	
		foreach (keys %{$tree->[SQL_TREE_TABLES]});
	push(@$conjoins, $tree);
	return $conjoins;
}
sub dnf_test {
	my $tree = shift;
	print print_node($tree), "\n";
	dnf_negate($tree);
	dnf_recurse($tree);
	print print_node($tree), "\n";

	return $tree;
}

sub print_node {
	my $tree = shift;
	
	return (($tree->[SQL_TREE_OP] eq 'AND') || ($tree->[SQL_TREE_OP] eq 'OR')) ?
		'(' . $tree->[SQL_TREE_ARG1]->print_node .  ') ' .
			$tree->[SQL_TREE_OP] . ' (' . 
			$tree->[SQL_TREE_ARG2]->print_node . ')' :
		'(' . $tree->[SQL_TREE_ARG1] . ' ' . $tree->[SQL_TREE_OP] . ' ' . 
			$tree->[SQL_TREE_ARG2] . ')';
}


1;