/usr/local/CPAN/Catalog/Catalog/tools/hook_fulcrum.pm


#
#   Copyright (C) 1997, 1998
#   	Free Software Foundation, Inc.
#
#   This program is free software; you can redistribute it and/or modify it
#   under the terms of the GNU General Public License as published by the
#   Free Software Foundation; either version 2, or (at your option) any
#   later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
#
package Catalog::tools::hook_fulcrum;
use vars qw(@ISA $MAXCHAR $MAXFIELD);
use strict;

#
# Maximum length of a varchar type
#
$MAXCHAR = 32767;
#
# Maximum length of a field name
#
$MAXFIELD = 18;

use Catalog::tools::fulcrum;
use Catalog::tools::cgi;
use Catalog::tools::tools;

@ISA = qw(Catalog::tools::fulcrum);

sub initialize {
    my($self) = @_;

    $self->Catalog::tools::fulcrum::initialize();

    my($config) = config_load("hook_fulcrum.conf");
    error("missing hook_fulcrum.conf") if(!defined($config));
    %$self = ( %$self , %$config );
    $config = config_load("ecila.conf");
    error("missing ecila.conf") if(!defined($config));
    %$self = ( %$self , %$config );
}

sub mysql {
    my($self, $mysql) = @_;
    $self->{'mysql'} = $mysql;
}

sub hook_delete {
    my($self, $table, $primary_values) = @_;
    my($mysql) = $self->{'mysql'};

    my($spec) = $self->{$mysql->{'base'}};
    return if(!exists($spec->{'tables'}->{$table}));

    my($fulcrum_table) = $spec->{'params'}->{'table'};

    my($count) = 0;
    my($chunksize) = $self->{'chunksize'};
    my($chunk);
    while(@$primary_values) {
	my(@chunk) = splice(@$primary_values, $count, $chunksize);
	$count += scalar(@chunk);
	my($list) = join(',', @chunk);
	$self->exec("delete from $fulcrum_table where r_$table in ($list)");
    }
}

#
# Since fulcrum reindexes everything each time we change
# a field in a row, don't bother to make an update. Just 
# delete and insert again.
#
sub hook_update {
    my($self, $table, $primary_values) = @_;
    my($mysql) = $self->{'mysql'};

    my($spec) = $self->{$mysql->{'base'}};
    my($fulcrum_table) = $spec->{'params'}->{'table'};
    return if(!exists($spec->{'tables'}->{$table}));

    my($count) = 0;
    my($chunksize) = $self->{'chunksize'};
    my($chunk);
    while(@$primary_values) {
	my(@chunk) = splice(@$primary_values, $count, $chunksize);
	$count += scalar(@chunk);
	my($list) = join(',', @chunk);
	$self->exec("delete from $fulcrum_table where r_$table in ($list)");
	my($primary_value);
	foreach $primary_value (@chunk) {
	    $self->hook_insert($table, $primary_value);
	}
    }
}

sub hook_insert_prepare {
    my($self, $table, $primary_value, $fields_limit) = @_;
    my($mysql) = $self->{'mysql'};

    #
    # General information
    #
    my($spec) = $self->{$mysql->{'base'}};
    return if(!exists($spec->{'tables'}->{$table}));
    my($info) = $mysql->info_table($table);
    my($primary_key) = $info->{'_primary_'};
    my($spec_params) = $spec->{'tables'}->{$table}->{'params'};
    #
    # Extract row
    #
    my($where) = $spec_params->{'where'};
    if(defined($where)) {
	$where = "and ( $where )";
    } else {
	$where = '';
    }
    my($sql) = "select $primary_key from $table where $primary_key = $primary_value $where";
    my($row) = $mysql->exec_select_one($sql);
    #
    # If the row does not match the constraint, do nothing
    #
    dbg($sql, "hook_fulcrum");
    return if(!defined($row));
    #
    # Build the list of fields with appropriate format conversion
    #
    my($spec_fields) = $spec->{'tables'}->{$table}->{'fields'};
    my(@fields);
    my($field, $spec_field);
    while(($field, $spec_field) = each(%$spec_fields)) {
	next if(defined($fields_limit) && !exists($fields_limit->{$field}));
	my($type) = $info->{$field}->{'type'};
	if($type eq 'time' || $type eq 'date') {
	    push(@fields, "date_format($field, 'Y-m-d') as $field");
	} elsif($type eq 'set' && defined($info->{$field}->{'dict'})) {
	    #
	    # Do nothing, will be added magically by sexec_select
	    #
	} else {
	    push(@fields, "$field");
	}
    }
    error("unexpected empty field list for $table") if(!@fields);
    #
    # Extract data
    #
    my($fields) = join(',', @fields);
    $row = $mysql->sexec_select_one($table, "select $primary_key,$fields from $table where $table.$primary_key = $primary_value $where");

    return $row;
}

sub hook_insert {
    my($self, $table, $primary_value) = @_;
    my($mysql) = $self->{'mysql'};

    my($spec) = $self->{$mysql->{'base'}};
    return if(!exists($spec->{'tables'}->{$table}));
    my($info) = $mysql->info_table($table);
    my($primary_key) = $info->{'_primary_'};
    my($params) = $spec->{'params'};

    my($row) = $self->hook_insert_prepare($table, $primary_value);
    return if(!defined($row));
    #
    # Build an insert for fulcrum from Mysql values
    #
    my(%insert);
    $insert{"r_$table"} = $primary_value;
    my($spec_fields) = $spec->{'tables'}->{$table}->{'fields'};
    my($field, $spec_field);
    while(($field, $spec_field) = each(%$spec_fields)) {
	my($fulcrum_field) = $spec_field->{'field'};
	$insert{$fulcrum_field} = $row->{$field} if(defined($row->{$field}));
    }
    my($spec_params) = $spec->{'tables'}->{$table}->{'params'};
    if(exists($spec_params->{'merge'})) {
	my($merge) = $spec_params->{'merge'};
	my($other_table) = $merge->{'table'};
	if(!defined($other_table)) {
	    error("missing table name for merge for $table");
	}
	my($other_fields) = $merge->{'fields'};
	if(!defined($other_fields)) {
	    error("missing fields for merge for $table");
	}
	my(%fields) = map { $_ => 1 } split(',', $other_fields);
	my($relation) = $mysql->{'relations'}->{$table};
	error("missing relation for $table") if(!defined($relation));
	$relation = $relation->{$other_table};
	error("missing relation from $table to $other_table") if(!defined($relation));
	my($row) = $mysql->exec_select_one("select $relation->{'key'} from $table where $primary_key = $primary_value");
	my($other_primary_value) = $row->{$relation->{'key'}};
	error("missing primary key value (field $relation->{'key'}) in record from $table for relation to $other_table thru field $relation->{'field'}") if(!defined($other_primary_value));
	$row = $self->hook_insert_prepare($other_table, $other_primary_value, \%fields);
	my($spec_fields) = $spec->{'tables'}->{$other_table}->{'fields'};
	my($field);
	foreach $field (split(',', $other_fields)) {
	    my($spec_field) = $spec_fields->{$field};
	    my($fulcrum_field) = $spec_field->{'field'};
	    error("$field is not mapped for $other_table when merging for $table") if(!defined($fulcrum_field));
	    $insert{$fulcrum_field} = $row->{$field} if(defined($row->{$field}));
	    dbg("added $row->{$field} for $primary_value", "hook_fulcrum");
	}
    }
    my($ft_cid) = $self->insert($params->{'table'},
				%insert);
    #
    # Fill hookid field in the Mysql base with the newly inserted ft_cid,
    # use exec to prevent activating the hook.
    #
    $mysql->exec("update $table set hookid = $ft_cid where $primary_key = $primary_value");
}

sub hook_select {
    my($self, $relevance, $where, $order, $sql, $index, $length) = @_;

    my($rows_id, $rows_total) = $self->select($sql, $index, $length);

    return (undef, 0) if(!@$rows_id);

    my($mysql) = $self->{'mysql'};

    my($spec) = $self->{$mysql->{'base'}};
    my($query_params) = $spec->{'query'}->{'params'};

    my(@ids) = map { $_->{'FT_CID'} } @$rows_id;

    $where = '' if(!defined($where));
    if($where !~ /^\s*$/o) {
	$where = "and ( $where )";
    }
    my($ids) = join(',', @ids);
    $sql = "select $relevance,$query_params->{'extract'} from $query_params->{'table'} where ft_cid in ($ids) $where $order";
    dbg("hook_select: $sql", "hook_fulcrum");
    my($rows) = $self->exec_select($sql);

    #
    # Relevance factor get fucked up by ft_cid in ($ids) :-(
    #
#    my($nrows) = scalar(@ids);
#    my($i);
#    for($i = 0; $i < $nrows; $i++) {
#	$rows->[$i]->{'REL'} = $rows_id->[$i]->{'REL'};
#    }

    return ($rows, $rows_total);
}

sub schema_build {
    my($self) = @_;
    my($mysql) = $self->{'mysql'};
    
    my($spec) = $self->{$mysql->{'base'}};

    my($dir) = $self->{'fulsearch'};
    if(! -d $dir) {
	mkdir($dir, 0777) or error("cannot create directory $dir : $!");
    }
    #
    # Compute schema
    #
    my($definitions) = '';
    my(%fulcrum_fields);
    my($serial) = $self->{'serial'};
    my($table, $spec_table);
    while(($table, $spec_table) = each(%{$spec->{'tables'}})) {

	my($info_table) = $mysql->info_table($table);
	#
	# Special handling for the primary key
	#
	error("$table has no primary key") if(!defined($info_table->{'_primary_'}));
	$definitions .= "\tr_$table\tINTEGER\t$serial, -- PRIMARY KEY for $table\n";
	$serial++;
	$fulcrum_fields{$info_table->{'_primary_'}} = '';
	error("$table must have a hookid field of type int") if(!defined($info_table->{'hookid'}) || $info_table->{'hookid'}->{'type'} ne 'int');
	#
	# Handle all other fields according to specifications
	#
	my($field, $spec_field);
	while(($field, $spec_field) = each(%{$spec_table->{'fields'}})) {
	    my($fulcrum_field) = $spec_field->{'field'};
	    error("$fulcrum_field appear twice") if(exists($fulcrum_fields{$fulcrum_field}));
	    $fulcrum_fields{$fulcrum_field} = '';
	    error("missing fulcrum field name for table $table, field $field") if(!defined($spec_field->{'field'}));
	    my($info_field) = $info_table->{$field};
	    error("fulcrum field name must not be longer than $MAXFIELD characters") if(length($fulcrum_field) > $MAXFIELD);
	    error("field $field not found in table $table") if(!defined($info_field));
	    $definitions .= "\t$fulcrum_field\t";
	    my($type) = $info_field->{'type'};
	    my($size) = $info_field->{'size'};
	    if(($type eq 'set' || $type eq 'enum')) {
		if(exists($info_field->{'dict'})) {
		    my($dict_table) = $info_field->{'dict'}->{'table'};
		    my($dict_label) = $info_field->{'dict'}->{'field'};
		    my($dict_info) = $mysql->info_table($dict_table);
		    $type = $dict_info->{$dict_label}->{'type'};
		    $size = $dict_info->{$dict_label}->{'size'};
		    if(exists($spec_field->{'factor'})) {
			$size *= $spec_field->{'factor'};
		    }
		    dbg("mute $field to $type($size) because dict $dict_table $dict_label", "hook_fulcrum");
		} else {
		    $type = 'char';
		}
	    }
	    if($type eq 'char') {
		error("in table $table, $field char($size) longer than $MAXCHAR") if($size > $MAXCHAR);
		my($type) = $size < 32 ? 'CHAR' : 'VARCHAR';
		$definitions .= "${type}($size)";
	    } elsif($type eq 'int') {
		$definitions .= "INTEGER";
	    } elsif($type eq 'time' || $type eq 'date') {
		$definitions .= "DATE";
	    } 
	    $definitions .= "\t$serial, -- $table $field\n";
	    $serial++;
	}
    }

    #
    # Get the defined schema and insert modifications
    #
    my($schema) = readfile("$self->{'eciladb'}/create.fte");
    $schema =~ s/-+\s+additional\s+hook_fulcrum\s+fields\s+-+/$definitions/is;
    $schema =~ s/_BASE_/$spec->{'params'}->{'table'}/g;
    $schema =~ s/_FULCRUM_DIR_/$self->{'fulcrumdir'}/g;
    $schema =~ s/_TMP_DIR_/$self->{'fulsearch'}/g;
    $schema =~ s/PERIODIC//g;

    #
    # Build & save the database
    #
    writefile("$self->{'fulsearch'}/$spec->{'params'}->{'table'}.fte", $schema);
    $self->exec($schema);
}

sub now {
    my($self) = @_;
    my($mysql) = $self->{'mysql'};

    return "DATE '" . $mysql->Catalog::db::date(time()) . "'";
}

sub query2sql {
    my($self, $params) = @_;
    my($mysql) = $self->{'mysql'};

    my($spec) = $self->{$mysql->{'base'}};
    my($query_params) = $spec->{'query'}->{'params'};
    my($sql);
    my($stop) = $self->{'stop'};
    my($questions);
    if(defined($params->{'text'}) && $params->{'text'} !~ /^\s*$/o) {
	$_ = unaccent_8859($params->{'text'});
	s/(\w)\'(\w)/$1QoT$2/g;
	s/[\'\"]/./g;
	s/QoT/\'\'/g;
	s/^\s*//;
	s/\s*$//;
	s/\s+/ /;
	my($text) = $_;
	my(@words) = split(' ', $text);
	my(@full_words) = grep(!$stop->{$_}, split(' ', $text));
	my($groups) = $spec->{'query'}->{'groups'};
	my($divide_or) = $query_params->{'divide_or'};
	my($now) = $self->now();
	my($name);
	my(@tmp);
	foreach $name (sort { $a <=> $b } keys(%$groups)) {
	    my(@questions);
	    my($group) = $groups->{$name};
	    #
	    # Prepare constraints
	    #
	    my($constraint) = '';
	    my(@constraints);
	    if(exists($group->{'constraint'})) {
		my($re) = "^" . $group->{'constraint'} . "\$";
		my($weight) = $query_params->{'constraint_weight'} || 100;
		my($tag);
		foreach $tag (grep(/^field_/, keys(%$params))) {
		    next if($params->{$tag} =~ /^\s*$/o);
		    my($field) = $tag =~ /^field_(.*)/o;
		    my($value) = $self->quote($params->{$tag});
		    #
		    # If the field name is valid for this group, apply 
		    # constraint.
		    #
		    if($field =~ /$re/) {
			$constraint .= " and $field contains '$value' weight $weight";
		    } else {
			#
			# If the field name is not valid for this group,
			# keep the value associated with it. At least
			# one field of this group will be forced to contain
			# this value.
			#
			push(@constraints, $value);
		    }
		}
		$constraint =~ s/^ and //;
		@constraints = sortu(@constraints) if(defined(@constraints));
	    }

	    my($weight) = $group->{'weight'};
	    my($weight_or);
	    if(defined($divide_or)) {
		$weight_or = $group->{'weight'} / $divide_or;
	    } else {
		$weight_or = 3;
	    }
	    my($field);
	    foreach $field (split(',', $group->{'fields'})) {
		my($sql);
		if(!defined($params->{'expand'}) || $params->{'expand'} =~ /^\s*$/) {
		    if(@words == 1) {
			$sql .= " $field contains '@words' weight $weight";
		    } else {
			$sql .= " $field contains '@words' weight $weight or ";
			$sql .= " $field contains ('" . join("' weight 0 & '", @full_words) . "' weight $weight) or ";
			$sql .= " $field contains ('" . join("' weight $weight_or | '", @full_words) . "' weight $weight_or) ";
		    }
		} elsif($params->{'expand'} eq 'and') {
		    $sql .= " $field contains ('" . join("' weight 0 & '", @full_words) . "' weight $weight) ";
		} elsif($params->{'expand'} eq 'or') {
		    $sql .= " $field contains ('" . join("' weight $weight_or | '", @full_words) . "' weight $weight_or) ";
		} elsif($params->{'expand'} eq 'phrase') {
		    $sql .= " $field contains '@words' weight $weight ";
		} else {
		    error("unknown expand directive $params->{'expand'}");
		}
		push(@questions, $sql);
	    }
	    my($tmp) = join(' or ', @questions);
	    if(@constraints) {
		my(@questions);
		foreach $field (split(',', $group->{'fields'})) {
		    my($sql) = " $field contains ('" . join("' weight 0 | '", @constraints) . "' weight 0) ";
		    push(@questions, $sql);
		}
		$constraint = join(' or ', @questions);
		$tmp = " ( ( $tmp ) and ( $constraint ) ) ";
	    }
	    if(exists($group->{'where'})) {
		$group->{'where'} =~ s/now\(\)/$now/g;
		$tmp = " ( ( $tmp ) and ( $group->{'where'} ) ) ";
	    }
	    if($constraint ne '') {
		$tmp = " ( ( $tmp ) and ( $constraint )) ";
	    }
	    push(@tmp, $tmp);
	}
	$questions = join(' or ', @tmp);
    } else {
	#
	# No query : only apply constraints
	#
	my(@questions);

	my($tag);
	foreach $tag (grep(/^field_/, keys(%$params))) {
	    next if($params->{$tag} =~ /^\s*$/o);
	    my($field) = $tag =~ /^field_(.*)/o;
	    my($sql);
	    my($value) = $self->quote($params->{$tag});
	    $sql = " $field like '$value' ";
	    push(@questions, $sql);
	}

	if(!defined($params->{'expand'}) ||
	   $params->{'expand'} =~ /^\s*$/ ||
	   $params->{'expand'} eq 'or') {
	    $questions .= join(' or ', @questions);
	} elsif($params->{'expand'} eq 'and' ||
		$params->{'expand'} eq 'phrase') {
	    $questions .= join(' and ', @questions);
	}
    }

    my($where) = '';
    if(defined($questions)) {
	$where = "where $questions";
    }
	   

    my($order) = '';
    if(defined($query_params->{'order'})) {
	$order = "order by $query_params->{'order'}";
    }

    my($flexion);
    if(!defined($params->{'flexion'}) || $params->{'flexion'} eq 'none') {
	$flexion = "SET TERM_GENERATOR ''";
    } elsif($params->{'flexion'} eq 'french') {
	$flexion = "SET TERM_GENERATOR 'word!ftelp/lang=french/inflect'";
    } else {
	error("unknown flexion mode $params->{'flexion'}");
    }
    dbg("flexion : $flexion", "hook_fulcrum");
    $self->exec($flexion);
    
    $sql = "select $query_params->{'relevance'},ft_cid from $spec->{'query'}->{'params'}->{'table'} $where $order";

#    dbg("query = $sql", "hook_fulcrum");
    return ($sql, $query_params->{'relevance'}, $questions, $order);
}

1;
# Local Variables: ***
# mode: perl ***
# End: ***