WWW::Search::PLweb - class for searching PLS PLweb search engine


WWW-Search-Backends documentation Contained in the WWW-Search-Backends distribution.

Index


Code Index:

NAME

Top

WWW::Search::PLweb - class for searching PLS PLweb search engine

SYNOPSIS

Top

    require WWW::Search;
    $search = new WWW::Search('PLweb');

DESCRIPTION

Top

This class is a PLweb specialization of WWW::Search. It queries and interprets searches based on PLweb, by PLS http://www.pls.com/.

This class exports no public interface; all interaction should be done through WWW::Search objects.

This software assumes that you're using the default output format for a PLweb search. It should look like this:

  VAL DOCUMENT DB SIZE
  ------------------------------------------------------------------
  388 Document1 foo 1122 ...

OPTIONS

Top

This search supports standard WWW::Search arguments

search_url

The PLweb URL to search. On unix this usually looks like http://somehost/cgi-bin/iopcode.pl

search_args

The arguments used for the search engine, separate them by &.

SEE ALSO

Top

To make new back-ends, see WWW::Search,

AUTHOR

Top

WWW::Search::PLweb is written by Paul Lindner, <lindner@itu.int>

BUGS

Top

Things not supported: $result->raw()

COPYRIGHT

Top


WWW-Search-Backends documentation Contained in the WWW-Search-Backends distribution.

#!/usr/local/bin/perl

#
# PLweb.pm
# by Paul Lindner <lindner@itu.int>
# Copyright (C) 1997 by the UN Administrative Committee on Coordination (ACC)
#

#'

package WWW::Search::PLweb;

use strict;
use warnings;

use base 'WWW::Search';

our
$VERSION = do { my @r = ( q$Revision: 1.4 $ =~ /\d+/g ); sprintf "%d." . "%03d" x $#r, @r };

use Carp ();
require WWW::SearchResult;

my($debug) = 0;

#private
sub native_setup_search {
    my($self, $native_query, $native_opt) = @_;
    my($native_url);
    my($default_native_url) = 
	"http://www.un.org/plweb-cgi/iopcode.pl?platmode=unix&operation=query&dbgroup=un&account=_free_user_&waittime=30 seconds&dbname=gopher:webdepts:webnews&query=%s";
    
    if (defined($native_opt)) {
	$debug = 1 if ($native_opt->{'search_debug'});

	if ($self->{'search_url'} && $native_opt->{'search_args'}) {
	    $native_url = $native_opt->{'search_url'} . "?" . $native_opt->{'search_args'};
	}
    } 

    $native_url = $default_native_url if (!$native_url);

    ## Change behaviour depending on 'search_how'
    my $how = $self->{'search_how'};
    if (defined $how) {
	if ($how =~ /any/) {
	    ## change separator to or, or add it..
	    $native_url =~ s/separator=[^&]+/separator=or/ig;
	    if ($native_url !~ /separator=/) {
		$native_url .= "&separator=or";
	    }
	    
	} elsif ($how =~ /all/) {
	    ## change separator to and, or add it..
	    $native_url =~ s/separator=[^&]+/separator=and/ig;
	    if ($native_url !~ /separator=/) {
		$native_url .= "&separator=and";
	    }
	} elsif ($how =~ /phrase/) {
	    ## change separator to adj, or add it..
	    $native_url =~ s/separator=[^&]+/separator=adj/ig;
	    if ($native_url !~ /separator=/) {
		$native_url .= "&separator=adj";
	    }
	} elsif ($how =~ /boolean/) {
	    ## Get rid of the separator
	    $native_url =~ s/separator=[^&]+//ig;
	}	    
    }
    $native_url =~ s/%s/$native_query/g; # Substitute search terms...

    $self->user_agent();
    $self->{_next_to_retrieve} = 0;
    $self->{_base_url} = $self->{_next_url} = $native_url;
}


# private
sub native_retrieve_some
{
    my ($self) = @_;
    my ($hit)  = ();
    my ($hits_found) = 0;

    # fast exit if already done
    return undef if (!defined($self->{_next_url}));

    # get some
    print  $self->{_next_url} . "\n" if ($debug);
    my $method = $self->{'search_method'};
    $method = 'POST' unless $method;

    my($response) = $self->http_request($method, $self->{_next_url}); 

    if (!$response->is_success) {
	print "Some problem\n" if ($debug);
	return undef;
    };

    print "Got something...\n" if ($debug);
    # parse the output
    use HTML::TreeBuilder;


    my $score = 800;
    my $results = $response->content();
    #print "$results" if ($debug);
    if (!$results) {
	return(undef);
    }
    
    my (@lines) = $self->split_lines($results);
    my ($docs_found);
    my ($score_ratio) = 0;

    while ($#lines > -1)  {
	$_ = shift(@lines);
	s,\s+, ,g;
	if (m,(\d+) documents found,) {
	    $docs_found = $1;
	} elsif (m,^\s*(\d+) <A HREF=\"([^\"]+)\">(.*)</A>.*\s(\d+).*$,) {
	    if (($1 > 0) && ($score_ratio == 0)) {
		if ($1 > 900) {
		    $score_ratio = 1000/$1;
		} else {
		    $score_ratio = (1000/$1) * .9;
		}
	    }	    	    
	    #print "Make abs, " . $self->{_next_url} . "\n  for $2\n";
	    my($linkobj) = new URI::URL $2, $self->{_next_url};
	    my($hit) = new WWW::SearchResult;
	    $hit->add_url($linkobj->abs->as_string);
	    $hit->title($3);
	    $hit->size($4);
	    #print "Found a link $1, $2, $3, $4\n";
	    $hit->score($1 * $score_ratio);
	    push(@{$self->{cache}}, $hit);
	}
    }
    $self->approximate_result_count($docs_found);
    $self->{_next_url} = undef;
    return($docs_found);

    my($h) = new HTML::TreeBuilder;
    $h->parse($results);

    for (@{ $h->extract_links(qw(a)) }) {
	my($link, $linkelem) = @$_;
	
      if (($linkelem->parent->starttag() =~ /<P>/) &&
	  ($linkelem->parent->endtag()   =~ m,</P>,)) {

	    my($linkobj)       = new URI::URL $link, $self->{_next_url};

	    $hits_found++;

	    my($hit) = new WWW::SearchResult;
	    $hit->add_url($linkobj->abs->as_string());
	    $hit->title(join(' ',@{$linkelem->content}));
	    $hit->score($score);
	    $hit->normalized_score($score);

	    push(@{$self->{cache}}, $hit);
		
	    #$srchitem{'origin'} = $self->{'myurl'};
	    #$srchitem{'index'}  = $self->{'index'};

	    $score = int ($score * .95);
	}
    }
    $self->approximate_result_count($hits_found);
    $self->{_next_url} = undef;
    return($hits_found);
}


1;

__END__