/usr/local/CPAN/WAIT/WAIT/Wais.pm


#                              -*- Mode: Perl -*- 
# $Basename: Wais.pm $
# $Revision: 1.5 $
# Author          : Ulrich Pfeifer
# Created On      : Mon Sep 16 11:08:04 1996
# Last Modified By: Ulrich Pfeifer
# Last Modified On: Sat Apr 15 15:51:49 2000
# Language        : CPerl
# 
# (C) Copyright 1997-2000, Ulrich Pfeifer
# 

package WAIT::Wais;

require WAIT::Query::Wais;
require WAIT::Database;
use Fcntl;
use strict;
use vars qw(%DB %TB);

my %FORMATTER;

BEGIN { # check for available formatters
  %FORMATTER = qw(text WAIT::Format::Base);
  for my $inc (@INC) {
    if (-d "$inc/WAIT/Format") {
      for my $format ( <$inc/WAIT/Format/*.pm>) {
        my ($name) = ($format =~ /(\w+)\.pm$/);
        my $module = "WAIT::Format::$name";
        $name = lc $name;
        $FORMATTER{$name} = $module;
      }
    }
  }
}


sub _database {
  my $path = shift;
  my ($dir, $dn, $tn) = ($path =~ m:(.*)/([^/]+)/([^/]+)$:);

  return $DB{"$dir/$dn"} if exists $DB{"$dir/$dn"};
  $DB{"$dir/$dn"} = WAIT::Database->open(name => $dn, directory => $dir,
                                         mode => O_RDONLY);
  return $DB{"$dir/$dn"};
}

sub _table {
  my $path = shift;

  return $TB{$path} if exists $TB{$path};
  my $db = _database($path);
  my ($dir, $dn, $tn) = ($path =~ m:(.*)/([^/]+)/([^/]+)$:);
  $TB{$path} = $db->table(name => $tn);
  $TB{$path};
}

sub Search {
  my (@requests) = @_;
  my $request;
  my $result    = new WAIT::Wais::Result;
  for $request (@requests) {
    my $query     = $request->{'query'};
    my $database  = $request->{'database'};
    my $tag       = $request->{'tag'}  || $request->{'database'};
    my ($dir, $dn, $tn) = ($database =~ m:(.*)/([^/]+)/([^/]+)$:);
    my $tb        = _table($database);
    unless (defined $tb) {
      $result->add(Tag => $tag, Error => 'Could not open database');
      return $result;
    }
    my $wquery;
    eval {$wquery = WAIT::Query::Wais::query($tb, $query)};
    if ($@ ne '') {
      $result->add(Tag => $tag, Error => $@);
      return $result;
    }
    my %po        = $wquery->execute();
    $result->add(Tag => $tag,  Database => $database,
                 Table => $tb, Postings => \%po)
  }
  $result;
}

sub Retrieve {
  my %parm = @_;
  my $result = new WAIT::Wais::Result;
  my $tb = _table($parm{database});

  unless (defined $tb) {
    $result->add(Tag => 'document', Error => 'Could not open database');
    return $result;
  }

  my $did   = ref($parm{docid})?$parm{docid}->did:$parm{docid};

  my %rec   = $tb->fetch($did);

  # another CPAN hack
  if ($rec{docid} =~ m(^data/)) {
    $rec{docid} = $tb->dir . '/' . $rec{docid};
  }

  my $text = $tb->fetch_extern($rec{docid});

  my @txt;
  $tb->open;
  if ($parm{query}) {
    @txt = WAIT::Query::Wais::query($tb,$parm{query})->hilight($text);
  } else {
    @txt = $tb->layout->tag($text);
  }

  if ($parm{lines}) {
    @txt = filter($parm{lines}, @txt);
  }

  my $type = lc $parm{type};

  my $module = (exists $FORMATTER{$type})?$FORMATTER{$type}:$FORMATTER{text};
  my $path   = $module;
  $path =~ s(::)(/)g;

  require "$path.pm";
  my $format = new $module;
  $text = $format->as_string(\@txt, sub {$tb->fetch($did)});
  $result->add(Tag => 'document', Text => $text);
}

sub filter {
  my $filter = shift;
  my @result;
  my @context;
  my $lines   = 0;
  my $clines  = 0;
  my $elipsis = 0;

  while (@_) {
    my %tag = %{shift @_};
    my $txt =  shift @_;

    for (split /(\n)/, $txt) {
      if ($_ eq "\n") {
        if (exists $tag{_qt}) {
          #die "Weird!";
          push @result, {_i=>1}, "[WEIRD]";
        } elsif ($lines) {
          push @result, {}, $_;
          $lines--;
        } else {
          push @context, {}, $_;
          $clines++;
        }
      } else {
        if (exists $tag{_qt}) {
          push @result, {_i=>1}, "\n[ $elipsis lines ]\n" if $elipsis;
          push @result, @context, {%tag}, $_;
          delete $tag{_qt};
          @context = (); $clines = 0; $elipsis=0;
          $lines = $filter+1;
        } elsif ($lines) {
          push @result, \%tag, $_;
        } else {
          push @context, \%tag, $_;
        }
      }
      if ($clines>$filter) {
        my (%tag, $txt);
        while ($clines>$filter) {
          %tag = %{shift @context};
          $txt =  shift @context;
          if ($txt =~ /\n/) {
            $clines--;
            $elipsis++;
          }
        }
      }
    }
  }
  @result;
}

package WAIT::Wais::Result;

sub new {
  my $type = shift;
  my %par  = @_;
  my $self = {'header' => [], 'diagnostics' => [], 'text' => ''};

  bless $self, $type;
}

sub _header {
  my ($database, $did, $score) = @_;
  my $types;
  my $tb = WAIT::Wais::_table($database);
  my %rec = $tb->fetch($did);
  my $lines    = $rec{'lines'} || 0;
  my $length   = $rec{'size'} || 0;
  unless ($length) {
    ($length) = ($rec{docid} =~ /(\d+)$/)
  }
  unless ($rec{docid} =~ m(^/)) {
    $rec{docid} = $tb->dir . '/' . $rec{docid};
  }
  my $headline = $rec{headline} || '';
  if (exists $rec{types}) {
    $types = [split ',', $rec{types}]
  } else {
    $types = [keys %FORMATTER];
  }

  [$score, $lines, $length, $headline, $types,
   WAIT::Wais::Docid->new('wait',$database, $did)];
}

sub add {
  my $self = shift;
  my %parm = @_;
  my $tag  = $parm{Tag};
  my $docid;

  if ($parm{Postings}) {
    my @result;
    my @left  = @{$self->{'header'}};
    my @right;
    for (keys %{$parm{Postings}}) {
      push @right, _header($parm{Database}, $_, $parm{Postings}->{$_})
    }
    while (($#left >= $[) or ($#right >= $[)) {
      if ($#left < $[) {
        for (@right) {
          push @result, [$tag, @{$_}];
        }
        last;
      }
      if ($#right < $[) {
        push @result, @left;
        last;
      }
      if ($left[0]->[1] > $right[0]->[0]) {
        push @result, shift @left;
      } else {
        push @result, [$tag, @{shift @right}];
      }
    }
    $self->{'header'} = \@result;
  }
  if ($parm{Errors}) {
    my %diag = %{$parm{Errors}};
    for (keys %diag) {
      push(@{$self->{'diagnostics'}}, [$tag, $_, $diag{$_}]);
    }
  }
  if ($parm{Text}) {
    $self->{'text'} .= $parm{Text};
  }

  $self;
}


sub diagnostics {
  my $self = shift;

  @{$self->{'diagnostics'}};
}

sub header {
  my $self = shift;

  @{$self->{'header'}};
}

sub text {
  my $self = shift;

  $self->{'text'};
}

package WAIT::Wais::Docid;

sub new {
  my $type = shift;
  my ($server, $database, $dodid) = @_;
  my $self = join ';', $server, $database, $dodid;
  bless \$self, $type;
}

sub did {
  ($_[0]->split)[2];
}

sub split {
  my $self = shift;

  split /;/, $$self;
}

1;