/usr/local/CPAN/Catalog/Catalog/external.pm


#
#   Copyright (C) 1997, 1998
#   	Free Software Foundation, Inc.
#
#   This program is free software; you can redistribute it and/or modify it
#   under the terms of the GNU General Public License as published by the
#   Free Software Foundation; either version 2, or (at your option) any
#   later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
#
# $Header: /cvsroot/Catalog/Catalog/lib/Catalog/external.pm,v 1.6 1999/07/02 12:11:48 loic Exp $
#
package Catalog::external;

use strict;

use XML::DOM;
use XML::Parser;
use Unicode::String;
use Unicode::Map8;
use Catalog::tools::tools;
use MD5;

sub new {
    my($class, %args) = @_;

    my($self) = {};
    bless($self, $class);

    return $self;
}

sub load {
    my($self, $catalog, $name, $file) = @_;

    $self->{'catalog'} = $catalog;
    $self->{'name'} = $name;

    #
    # Load charset conversion map
    #
    my($map) = Unicode::Map8->new($catalog->{'encoding'});
    error("$catalog->{'encoding'} is not known to Map8") if(!defined($map));
    $self->{'map'} = $map;

    #
    # Open file for reading extracts
    #
    no strict 'refs';
    $self->{'FILE'} = $file;
    open($self->{'FILE'}, "<$file") or error("cannot open $file for reading : $!");
    #
    # Simple parser to find bounds of extracts to read
    # handle() is called by Start.
    #
    delete($self->{'start'});
    my($parser) = XML::Parser->new(Handlers => {
	'Start' => sub { $self->Start(@_); },
	'End' => sub { $self->End(@_); },
	'XMLDecl' => sub { $self->XMLDecl(@_); }
    }
				   );
    $self->{'DOM'} = XML::DOM::Parser->new();

    eval { $parser->parsefile($file); };
    my($error) = $@;
    close($self->{'FILE'});
    error($error) if($error);
}

sub unload {
    my($self, $catalog, $name, $file) = @_;
    my($catalog_row) = $catalog->cinfo()->{$name};
    error("catalog $name does not exists") if(!defined($catalog_row));

    open(FILE, ">$file") or error("cannot open $file for writing : $!");
    my($schema) = $catalog->db()->schema('catalog_schema', 'catalog_unload');
    $schema =~ s/NAME/$name/g;
    $catalog->db()->exec($schema);
    eval {
	$self->unload_head($catalog, $catalog_row);
	$self->unload_body($catalog, $catalog_row);
	$self->unload_symlinks($catalog, $catalog_row);
	$self->unload_auth($catalog, $catalog_row);
	$self->unload_extra($catalog, $catalog_row);
	$self->unload_tail($catalog, $catalog_row);
    };
    my($error) = $@;
    close(FILE);
    $catalog->db()->exec("drop table catalog_unload_$name");
    error($error) if($error);
}

sub unload_head {
    my($self, $catalog, $catalog_row) = @_;

    print FILE "<?xml version=\"1.0\" encoding=\"$catalog->{'encoding'}\" ?>\n";
    print FILE <<EOF;
<RDF xmlns:rdf="http://www.w3.org/TR/1999/REC-rdf-syntax-19990222#"
     xmlns="http://www.ecila.fr/">
EOF

    print FILE "\n";
    my($schema) = $catalog->db()->table_schema($catalog_row->{'tablename'});
    print FILE <<EOF;
 <Table>
  <![CDATA[
$schema
  ]]>
 </Table>
EOF

    print FILE "\n";
    print FILE " <Catalog>\n";
    my($key, $value);
    while(($key, $value) = each(%$catalog_row)) {
	next if($key eq 'rowid' || $key eq 'root');
	next if(!$value);
	$value = $self->escape($value);
	print FILE "  <$key>$value</$key>\n";
    }
    print FILE " </Catalog>\n";
}

sub unload_extra { }

sub unload_body {
    my($self, $catalog, $catalog_row) = @_;

    my($name) = $catalog_row->{'name'};
    my($table) = $catalog_row->{'tablename'};
    my($primary_key) = $catalog->db()->info_table($table)->{'_primary_'};
    my($category_table) = "catalog_category_$name";
    my($category2category_table) = "catalog_category2category_$name";
    my($entry2category_table) = "catalog_entry2category_$name";
    my($unload_table) = "catalog_unload_$name";

    $catalog->db()->exec("insert into catalog_unload_$name select $primary_key from $table");

    my($func) = sub {
	my($id, $name, $pathname, $path) = @_;

	print FILE "\n";

	print FILE " <Category>\n";
	my($category_row) = $catalog->db()->exec_select_one("select * from $category_table where rowid = $id");
	my($parent) = $catalog->db()->exec_select_one("select up from $category2category_table where down = $id and (info is null or not find_in_set('symlink', info))")->{'up'};
	$category_row->{'parent'} = $parent;
	$category_row->{'name'} = $pathname;
	delete($category_row->{'count'});
	$self->unload_record($category_row);
	print FILE " </Category>\n";

	$self->unload_body_entries($table, $catalog, $entry2category_table, $unload_table, $primary_key, $id);

	$catalog->gauge();

	return 1;
    };

    $self->unload_body_entries($table, $catalog, $entry2category_table, $unload_table, $primary_key, $catalog_row->{'root'});
    $catalog->walk_categories($name, $func);
}

sub unload_body_entries {
    my($self, $table, $catalog, $entry2category_table, $unload_table, $primary_key, $id) = @_;

    my($entry2category_rows) = $catalog->db()->exec_select("select row,category from $entry2category_table where category = $id");
    #
    # Stop if category is empty
    #
    return if(!defined($entry2category_rows));

    my($entry2category_row);
    foreach $entry2category_row (@$entry2category_rows) {
	print FILE " <Link>\n";
	$self->unload_record($entry2category_row);
	print FILE " </Link>\n";
    }

    #
    # Select all records that have not been seen already and that 
    # are linked to this category.
    #
    my($table_rows) = $catalog->db()->exec_select("select a.* from $table as a, $entry2category_table as b, $unload_table as c where b.category = $id and b.row = a.$primary_key and c.rowid = a.$primary_key");
    my(@primaries);
    my($table_row);
    foreach $table_row (@$table_rows) {
	print FILE " <Record table=\"$table\">\n";
	$self->unload_record($table_row);
	print FILE " </Record>\n";
	push(@primaries, $table_row->{$primary_key});
    }
    #
    # Delete from unload_table the rowids matching records already 
    # written to file.
    #
    if(@primaries) {
	$catalog->db()->exec("delete from $unload_table where rowid in ( " . join(',', @primaries) . " )");
    }
}

sub unload_symlinks {
    my($self, $catalog, $catalog_row) = @_;

    my($name) = $catalog_row->{'name'};
    my($sql) = "select up,down from catalog_category2category_$name where find_in_set('symlink', info)";
    my($rows) = $catalog->db()->exec_select($sql);
    my($row);
    foreach $row (@$rows) {
	print FILE " <Symlink>\n";
	print FILE "  <up>$row->{'up'}</up>\n";
	print FILE "  <down>$row->{'down'}</down>\n";
	print FILE " </Symlink>\n";
    }
}

sub unload_auth {
    my($self, $catalog, $catalog_row) = @_;

    my($name) = $catalog_row->{'name'};
    my($sql) = "select a.login,b.categorypointer from catalog_auth as a,catalog_auth_properties as b where a.rowid = b.auth";
    my($rows) = $catalog->db()->exec_select($sql);
    my($row);
    foreach $row (@$rows) {
	print FILE " <Auth>\n";
	print FILE "  <login>$row->{'login'}</login>\n";
	print FILE "  <category>$row->{'categorypointer'}</category>\n";
	print FILE " </Auth>\n";
    }
}

sub unload_record {
    my($self, $record) = @_;

    my($key, $value);
    while(($key, $value) = each(%$record)) {
	next if(!$value);
	$value = $self->escape($value);
	print FILE "  <$key>$value</$key>\n";
    }
}

sub unload_tail {
    my($self, $catalog, $catalog_row) = @_;

    print FILE "\n";
    print FILE " <Sync/>\n";
    print FILE "</RDF>\n";
}

#
# Handlers for extract location parser
#
sub XMLDecl {
    my($self, $expat, $version, $encoding, $standalone) = @_;

    if($encoding) {
	$self->{'encoding'} = $encoding;
    }
}

sub Start {
    my($self, $expat, $element, @attlist) = @_;

    if($expat->depth() == 1) {
	$self->extractor($expat);
    }
}

sub End {
    my($self, $expat, $element) = @_;

    if($expat->depth() == 0) {
	$self->extractor($expat);
    }
}

sub extractor {
    my($self, $expat) = @_;

    my($start) = $self->{'start'};
    my($end) = $expat->current_byte();
    if(defined($start)) {
	no strict 'refs';
	sysseek($self->{'FILE'}, $start, 0);
	my($buffer);
	sysread($self->{'FILE'}, $buffer, $end - $start);
	
	eval {
	    $self->handle($buffer);
	};
	if($@) {
	    my($error) = $@;
	    my($line) = $expat->current_line();
	    warn("$self->{'FILE'}: line $line: $error");
	}
    }
    $self->{'start'} = $end;
}

#
# Convert extract to DOM structure and call appropriate function
#
sub handle {
    my($self, $buffer) = @_;
    my($dom) = $self->{'DOM'};
    my(@encoding) = ();
    if(exists($self->{'encoding'})) {
	@encoding = ( 'ProtocolEncoding' => $self->{'encoding'} );
    }
    
    my($doc) = $dom->parse($buffer, @encoding);
    my($element) = $doc->getElementsByTagName("*");
    my($name) = $element->getNodeName();

    $self->${name}($element);

    $doc->dispose();
}

#
# Handlers for each top level tag
#

sub Table {
    my($self, $element) = @_;

    my($schema) = $self->unescape($element->getFirstChild()->getData());
    my($table) = $schema =~ /create\s+table\s+([a-z_]+)/io;
    error("Table is not a create table instruction") if(!$table);

    my($catalog) = $self->{'catalog'};
    
    $catalog->db()->exec("drop table $table") if($catalog->db()->info_table($table));
    $catalog->db()->exec($schema);
}

sub Link {
    my($self, $element) = @_;

    my($record) = $self->torecord($element);

    my($catalog) = $self->{'catalog'};
    my($name) = $self->{'name'};

    $catalog->db()->insert("catalog_entry2category_$name",
		     %$record);
}

sub Catalog {
    my($self, $element) = @_;

    my($record) = $self->torecord($element);

    my($catalog) = $self->{'catalog'};
    my($name) = $self->{'name'};

    if(defined($name)) {
	$record->{'name'} = $name;
    } else {
	$self->{'name'} = $name = $record->{'name'};
	if(!defined($name)) {
	    error("catalog has no name");
	}
    }

    $catalog->cdestroy_api($name);
    $catalog->cbuild_api(%$record);

    $self->{'tablename'} = $record->{'tablename'};
}

sub Category {
    my($self, $element) = @_;

    my($parent);
    my(%record);
    my($node);
    foreach $node ($element->getElementsByTagName("*")) {
	my($child) = $node->getFirstChild();
	next if(!defined($child));
	my($field) = $node->getNodeName();
	my($value) = $self->unescape($child->getData());
	if($field eq 'name') {
	    $value =~ s:.*/::;
	}
	if($field eq 'parent') {
	    $parent = $value;
	} else {
	    $record{$field} = $value;
	}
    }

    my($catalog) = $self->{'catalog'};
    my($name) = $self->{'name'};
    my($rowid) = $catalog->db()->insert("catalog_category_$name",
					%record);
    $catalog->db()->insert("catalog_category2category_$name",
			   'up' => $parent,
			   'down' => $rowid);

    $catalog->gauge();
}

sub Symlink {
    my($self, $element) = @_;

    my($record) = $self->torecord($element);

    my($catalog) = $self->{'catalog'};
    my($name) = $self->{'name'};
    $record->{'down'} = $self->resolv_path($record->{'down'});

    eval {
	$catalog->db()->insert("catalog_category2category_$name",
			       'info' => 'symlink',
			       %$record);
    };
}

sub Auth {
    my($self, $element) = @_;

    my($record) = $self->torecord($element);

    my($catalog) = $self->{'catalog'};
    my($name) = $self->{'name'};
    
    my($auth);
    my($row) = $catalog->db()->exec_select_one("select rowid from catalog_auth where login = '$record->{'login'}'");
    if(defined($row)) {
	$auth = $row->{'rowid'};
    } else {
	$auth = $catalog->db()->insert("catalog_auth",
				       'login' => $record->{'login'});
    }

    $catalog->db()->insert("catalog_auth_properties",
			   'auth' => $auth,
			   'catalogname' => $name,
			   'categorypointer' => $record->{'category'});
}

sub Record {
    my($self, $element) = @_;

    my($record) = $self->torecord($element);

    my($table) = $element->getAttribute("table");
    error("missing table name") if(!$table);

    my($catalog) = $self->{'catalog'};

    $catalog->db()->insert($table,
			   %$record);
}

sub Sync {
    my($self, $element) = @_;

    my($catalog) = $self->{'catalog'};
    my($name) = $self->{'name'};

    #
    # Rebuild computed data
    #
#    warn("Sync: start");
    $catalog->db()->exec("drop table catalog_path_$name");
#    warn("Sync: rebuild catalog_path_$name");
    $catalog->pathcheck($name);
#    warn("Sync: restore category counts");
    $catalog->category_count_api($name);
#    warn("Sync: done");
}

#
# If path is a char string instead of an numerical id, convert it
#
sub resolv_path {
    my($self, $path) = @_;

    if($path =~ m|^/|o) {
	my($catalog) = $self->{'catalog'};
	my($name) = $self->{'name'};
        $path .= "/" if($path !~ m|/$|o);
	my($md5) = MD5->hexhash($catalog->path2url($path));
	my($row) = $catalog->db()->exec_select_one("select id from catalog_path_$name where md5 = '$md5'");
	if(!$row) {
	    dbg("skip $path : not found in catalog_path_$name", "normal");
	    return;
	}
	$path = $row->{'id'};
    }

    return $path;
}

sub torecord {
    my($self, $element) = @_;

    my(%record);
    my($node);
    foreach $node ($element->getElementsByTagName("*")) {
	my($field) = $node->getNodeName();
	my($value) = $self->unescape($node->getFirstChild()->getData());
	$record{$field} = $value;
    }

    return \%record;
}

sub unescape {
    my($self, $string) = @_;

    #
    # Convert utf8 -> utf16
    #
    my($ustr) = Unicode::String->new();
    $ustr->utf8($string);
    my($u16) = $ustr->utf16();
    #
    # Map to 8bit charset defined by $catalog->{'encoding'}
    #
    my($map) = $self->{'map'};
    $string = $map->to8($u16);

    return Catalog::tools::cgi::myunescapeHTML($string);
}

sub escape {
    my($self, $string) = @_;
    
    return Catalog::tools::cgi::myescapeHTML($string);
}

1;
# Local Variables: ***
# mode: perl ***
# End: ***