/usr/local/CPAN/WAIT/WAIT/Wais.pm
# -*- Mode: Perl -*-
# $Basename: Wais.pm $
# $Revision: 1.5 $
# Author : Ulrich Pfeifer
# Created On : Mon Sep 16 11:08:04 1996
# Last Modified By: Ulrich Pfeifer
# Last Modified On: Sat Apr 15 15:51:49 2000
# Language : CPerl
#
# (C) Copyright 1997-2000, Ulrich Pfeifer
#
package WAIT::Wais;
require WAIT::Query::Wais;
require WAIT::Database;
use Fcntl;
use strict;
use vars qw(%DB %TB);
my %FORMATTER;
BEGIN { # check for available formatters
%FORMATTER = qw(text WAIT::Format::Base);
for my $inc (@INC) {
if (-d "$inc/WAIT/Format") {
for my $format ( <$inc/WAIT/Format/*.pm>) {
my ($name) = ($format =~ /(\w+)\.pm$/);
my $module = "WAIT::Format::$name";
$name = lc $name;
$FORMATTER{$name} = $module;
}
}
}
}
sub _database {
my $path = shift;
my ($dir, $dn, $tn) = ($path =~ m:(.*)/([^/]+)/([^/]+)$:);
return $DB{"$dir/$dn"} if exists $DB{"$dir/$dn"};
$DB{"$dir/$dn"} = WAIT::Database->open(name => $dn, directory => $dir,
mode => O_RDONLY);
return $DB{"$dir/$dn"};
}
sub _table {
my $path = shift;
return $TB{$path} if exists $TB{$path};
my $db = _database($path);
my ($dir, $dn, $tn) = ($path =~ m:(.*)/([^/]+)/([^/]+)$:);
$TB{$path} = $db->table(name => $tn);
$TB{$path};
}
sub Search {
my (@requests) = @_;
my $request;
my $result = new WAIT::Wais::Result;
for $request (@requests) {
my $query = $request->{'query'};
my $database = $request->{'database'};
my $tag = $request->{'tag'} || $request->{'database'};
my ($dir, $dn, $tn) = ($database =~ m:(.*)/([^/]+)/([^/]+)$:);
my $tb = _table($database);
unless (defined $tb) {
$result->add(Tag => $tag, Error => 'Could not open database');
return $result;
}
my $wquery;
eval {$wquery = WAIT::Query::Wais::query($tb, $query)};
if ($@ ne '') {
$result->add(Tag => $tag, Error => $@);
return $result;
}
my %po = $wquery->execute();
$result->add(Tag => $tag, Database => $database,
Table => $tb, Postings => \%po)
}
$result;
}
sub Retrieve {
my %parm = @_;
my $result = new WAIT::Wais::Result;
my $tb = _table($parm{database});
unless (defined $tb) {
$result->add(Tag => 'document', Error => 'Could not open database');
return $result;
}
my $did = ref($parm{docid})?$parm{docid}->did:$parm{docid};
my %rec = $tb->fetch($did);
# another CPAN hack
if ($rec{docid} =~ m(^data/)) {
$rec{docid} = $tb->dir . '/' . $rec{docid};
}
my $text = $tb->fetch_extern($rec{docid});
my @txt;
$tb->open;
if ($parm{query}) {
@txt = WAIT::Query::Wais::query($tb,$parm{query})->hilight($text);
} else {
@txt = $tb->layout->tag($text);
}
if ($parm{lines}) {
@txt = filter($parm{lines}, @txt);
}
my $type = lc $parm{type};
my $module = (exists $FORMATTER{$type})?$FORMATTER{$type}:$FORMATTER{text};
my $path = $module;
$path =~ s(::)(/)g;
require "$path.pm";
my $format = new $module;
$text = $format->as_string(\@txt, sub {$tb->fetch($did)});
$result->add(Tag => 'document', Text => $text);
}
sub filter {
my $filter = shift;
my @result;
my @context;
my $lines = 0;
my $clines = 0;
my $elipsis = 0;
while (@_) {
my %tag = %{shift @_};
my $txt = shift @_;
for (split /(\n)/, $txt) {
if ($_ eq "\n") {
if (exists $tag{_qt}) {
#die "Weird!";
push @result, {_i=>1}, "[WEIRD]";
} elsif ($lines) {
push @result, {}, $_;
$lines--;
} else {
push @context, {}, $_;
$clines++;
}
} else {
if (exists $tag{_qt}) {
push @result, {_i=>1}, "\n[ $elipsis lines ]\n" if $elipsis;
push @result, @context, {%tag}, $_;
delete $tag{_qt};
@context = (); $clines = 0; $elipsis=0;
$lines = $filter+1;
} elsif ($lines) {
push @result, \%tag, $_;
} else {
push @context, \%tag, $_;
}
}
if ($clines>$filter) {
my (%tag, $txt);
while ($clines>$filter) {
%tag = %{shift @context};
$txt = shift @context;
if ($txt =~ /\n/) {
$clines--;
$elipsis++;
}
}
}
}
}
@result;
}
package WAIT::Wais::Result;
sub new {
my $type = shift;
my %par = @_;
my $self = {'header' => [], 'diagnostics' => [], 'text' => ''};
bless $self, $type;
}
sub _header {
my ($database, $did, $score) = @_;
my $types;
my $tb = WAIT::Wais::_table($database);
my %rec = $tb->fetch($did);
my $lines = $rec{'lines'} || 0;
my $length = $rec{'size'} || 0;
unless ($length) {
($length) = ($rec{docid} =~ /(\d+)$/)
}
unless ($rec{docid} =~ m(^/)) {
$rec{docid} = $tb->dir . '/' . $rec{docid};
}
my $headline = $rec{headline} || '';
if (exists $rec{types}) {
$types = [split ',', $rec{types}]
} else {
$types = [keys %FORMATTER];
}
[$score, $lines, $length, $headline, $types,
WAIT::Wais::Docid->new('wait',$database, $did)];
}
sub add {
my $self = shift;
my %parm = @_;
my $tag = $parm{Tag};
my $docid;
if ($parm{Postings}) {
my @result;
my @left = @{$self->{'header'}};
my @right;
for (keys %{$parm{Postings}}) {
push @right, _header($parm{Database}, $_, $parm{Postings}->{$_})
}
while (($#left >= $[) or ($#right >= $[)) {
if ($#left < $[) {
for (@right) {
push @result, [$tag, @{$_}];
}
last;
}
if ($#right < $[) {
push @result, @left;
last;
}
if ($left[0]->[1] > $right[0]->[0]) {
push @result, shift @left;
} else {
push @result, [$tag, @{shift @right}];
}
}
$self->{'header'} = \@result;
}
if ($parm{Errors}) {
my %diag = %{$parm{Errors}};
for (keys %diag) {
push(@{$self->{'diagnostics'}}, [$tag, $_, $diag{$_}]);
}
}
if ($parm{Text}) {
$self->{'text'} .= $parm{Text};
}
$self;
}
sub diagnostics {
my $self = shift;
@{$self->{'diagnostics'}};
}
sub header {
my $self = shift;
@{$self->{'header'}};
}
sub text {
my $self = shift;
$self->{'text'};
}
package WAIT::Wais::Docid;
sub new {
my $type = shift;
my ($server, $database, $dodid) = @_;
my $self = join ';', $server, $database, $dodid;
bless \$self, $type;
}
sub did {
($_[0]->split)[2];
}
sub split {
my $self = shift;
split /;/, $$self;
}
1;