Search::Estraier - pure perl module to use Hyper Estraier search engine


Search-Estraier documentation Contained in the Search-Estraier distribution.

Index


Code Index:

NAME

Top

Search::Estraier - pure perl module to use Hyper Estraier search engine

SYNOPSIS

Top

Simple indexer

	use Search::Estraier;

	# create and configure node
	my $node = new Search::Estraier::Node(
		url => 'http://localhost:1978/node/test',
		user => 'admin',
		passwd => 'admin',
		create => 1,
		label => 'Label for node',
		croak_on_error => 1,
	);

	# create document
	my $doc = new Search::Estraier::Document;

	# add attributes
	$doc->add_attr('@uri', "http://estraier.gov/example.txt");
	$doc->add_attr('@title', "Over the Rainbow");

	# add body text to document
	$doc->add_text("Somewhere over the rainbow.  Way up high.");
	$doc->add_text("There's a land that I heard of once in a lullaby.");

	die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });

Simple searcher

	use Search::Estraier;

	# create and configure node
	my $node = new Search::Estraier::Node(
		url => 'http://localhost:1978/node/test',
		user => 'admin',
		passwd => 'admin',
		croak_on_error => 1,
	);

	# create condition
	my $cond = new Search::Estraier::Condition;

	# set search phrase
	$cond->set_phrase("rainbow AND lullaby");

	my $nres = $node->search($cond, 0);

	if (defined($nres)) {
		print "Got ", $nres->hits, " results\n";

		# for each document in results
		for my $i ( 0 ... $nres->doc_num - 1 ) {
			# get result document
			my $rdoc = $nres->get_doc($i);
			# display attribte
			print "URI: ", $rdoc->attr('@uri'),"\n";
			print "Title: ", $rdoc->attr('@title'),"\n";
			print $rdoc->snippet,"\n";
		}
	} else {
		die "error: ", $node->status,"\n";
	}

DESCRIPTION

Top

This module is implementation of node API of Hyper Estraier. Since it's perl-only module with dependencies only on standard perl modules, it will run on all platforms on which perl runs. It doesn't require compilation or Hyper Estraier development files on target machine.

It is implemented as multiple packages which closly resamble Ruby implementation. It also includes methods to manage nodes.

There are few examples in scripts directory of this distribution.

Inheritable common methods

Top

This methods should really move somewhere else.

_s

Remove multiple whitespaces from string, as well as whitespaces at beginning or end

 my $text = $self->_s(" this  is a text  ");
 $text = 'this is a text';

Search::Estraier::Document

Top

This class implements Document which is single item in Hyper Estraier.

It's is collection of:

attributes

'key' => 'value' pairs which can later be used for filtering of results

You can add common filters to attrindex in estmaster's _conf file for better performance. See attrindex in Hyper Estraier P2P Guide.

vectors

also 'key' => 'value' pairs

display text

Text which will be used to create searchable corpus of your index and included in snippet output.

hidden text

Text which will be searchable, but will not be included in snippet.

new

Create new document, empty or from draft.

  my $doc = new Search::HyperEstraier::Document;
  my $doc2 = new Search::HyperEstraier::Document( $draft );

add_attr

Add an attribute.

  $doc->add_attr( name => 'value' );

Delete attribute using

  $doc->add_attr( name => undef );

add_text

Add a sentence of text.

  $doc->add_text('this is example text to display');

add_hidden_text

Add a hidden sentence.

  $doc->add_hidden_text('this is example text just for search');

add_vectors

Add a vectors

  $doc->add_vector(
  	'vector_name' => 42,
	'another' => 12345,
  );

set_score

Set the substitute score

  $doc->set_score(12345);

score

Get the substitute score

id

Get the ID number of document. If the object has never been registred, -1 is returned.

  print $doc->id;

attr_names

Returns array with attribute names from document object.

  my @attrs = $doc->attr_names;

attr

Returns value of an attribute.

  my $value = $doc->attr( 'attribute' );

texts

Returns array with text sentences.

  my @texts = $doc->texts;

cat_texts

Return whole text as single scalar.

 my $text = $doc->cat_texts;

dump_draft

Dump draft data from document object.

  print $doc->dump_draft;

delete

Empty document object

  $doc->delete;

This function is addition to original Ruby API, and since it was included in C wrappers it's here as a convinience. Document objects which go out of scope will be destroyed automatically.

Search::Estraier::Condition

Top

new

  my $cond = new Search::HyperEstraier::Condition;

set_phrase

  $cond->set_phrase('search phrase');

add_attr

  $cond->add_attr('@URI STRINC /~dpavlin/');

set_order

  $cond->set_order('@mdate NUMD');

set_max

  $cond->set_max(42);

set_options

  $cond->set_options( 'SURE' );

  $cond->set_options( qw/AGITO NOIDF SIMPLE/ );

Possible options are:

SURE

check every N-gram

USUAL

check every second N-gram

FAST

check every third N-gram

AGITO

check every fourth N-gram

NOIDF

don't perform TF-IDF tuning

SIMPLE

use simplified query phrase

Skipping N-grams will speed up search, but reduce accuracy. Every call to set_options will reset previous options;

This option changed in version 0.04 of this module. It's backwards compatibile.

phrase

Return search phrase.

  print $cond->phrase;

order

Return search result order.

  print $cond->order;

attrs

Return search result attrs.

  my @cond_attrs = $cond->attrs;

max

Return maximum number of results.

  print $cond->max;

-1 is returned for unitialized value, 0 is unlimited.

options

Return options for this condition.

  print $cond->options;

Options are returned in numerical form.

set_skip

Set number of skipped documents from beginning of results

  $cond->set_skip(42);

Similar to offset in RDBMS.

skip

Return skip for this condition.

  print $cond->skip;

set_distinct

  $cond->set_distinct('@author');

distinct

Return distinct attribute

  print $cond->distinct;

set_mask

Filter out some links when searching.

Argument array of link numbers, starting with 0 (current node).

  $cond->set_mask(qw/0 1 4/);

Search::Estraier::ResultDocument

Top

new

  my $rdoc = new Search::HyperEstraier::ResultDocument(
  	uri => 'http://localhost/document/uri/42',
	attrs => {
		foo => 1,
		bar => 2,
	},
	snippet => 'this is a text of snippet'
	keywords => 'this\tare\tkeywords'
  );

uri

Return URI of result document

  print $rdoc->uri;

attr_names

Returns array with attribute names from result document object.

  my @attrs = $rdoc->attr_names;

attr

Returns value of an attribute.

  my $value = $rdoc->attr( 'attribute' );

snippet

Return snippet from result document

  print $rdoc->snippet;

keywords

Return keywords from result document

  print $rdoc->keywords;

Search::Estraier::NodeResult

Top

new

  my $res = new Search::HyperEstraier::NodeResult(
  	docs => @array_of_rdocs,
	hits => %hash_with_hints,
  );

doc_num

Return number of documents

  print $res->doc_num;

This will return real number of documents (limited by max). If you want to get total number of hits, see hits.

get_doc

Return single document

  my $doc = $res->get_doc( 42 );

Returns undef if document doesn't exist.

hint

Return specific hint from results.

  print $res->hint( 'VERSION' );

Possible hints are: VERSION, NODE, HIT, HINT#n, DOCNUM, WORDNUM, TIME, LINK#n, VIEW.

hints

More perlish version of hint. This one returns hash.

  my %hints = $res->hints;

hits

Syntaxtic sugar for total number of hits for this query

  print $res->hits;

It's same as

  print $res->hint('HIT');

but shorter.

Search::Estraier::Node

Top

new

  my $node = new Search::HyperEstraier::Node;

or optionally with url as parametar

  my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );

or in more verbose form

  my $node = new Search::HyperEstraier::Node(
  	url => 'http://localhost:1978/node/test',
	user => 'admin',
	passwd => 'admin'
	create => 1,
	label => 'optional node label',
	debug => 1,
	croak_on_error => 1
  );

with following arguments:

url

URL to node

user

specify username for node server authentication

passwd

password for authentication

create

create node if it doesn't exists

label

optional label for new node if create is used

debug

dumps a lot of debugging output

croak_on_error

very helpful during development. It will croak on all errors instead of silently returning -1 (which is convention of Hyper Estraier API in other languages).

set_url

Specify URL to node server

  $node->set_url('http://localhost:1978');

set_proxy

Specify proxy server to connect to node server

  $node->set_proxy('proxy.example.com', 8080);

set_timeout

Specify timeout of connection in seconds

  $node->set_timeout( 15 );

set_auth

Specify name and password for authentication to node server.

  $node->set_auth('clint','eastwood');

status

Return status code of last request.

  print $node->status;

-1 means connection failure.

put_doc

Add a document

  $node->put_doc( $document_draft ) or die "can't add document";

Return true on success or false on failure.

out_doc

Remove a document

  $node->out_doc( document_id ) or "can't remove document";

Return true on success or false on failture.

out_doc_by_uri

Remove a registrated document using it's uri

  $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";

Return true on success or false on failture.

edit_doc

Edit attributes of a document

  $node->edit_doc( $document_draft ) or die "can't edit document";

Return true on success or false on failture.

get_doc

Retreive document

  my $doc = $node->get_doc( document_id ) or die "can't get document";

Return true on success or false on failture.

get_doc_by_uri

Retreive document

  my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";

Return true on success or false on failture.

get_doc_attr

Retrieve the value of an atribute from object

  my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
  	die "can't get document attribute";

get_doc_attr_by_uri

Retrieve the value of an atribute from object

  my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
  	die "can't get document attribute";

etch_doc

Exctract document keywords

  my $keywords = $node->etch_doc( document_id ) or die "can't etch document";

etch_doc_by_uri

Retreive document

  my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";

Return true on success or false on failture.

uri_to_id

Get ID of document specified by URI

  my $id = $node->uri_to_id( 'file:///document/uri/42' );

This method won't croak, even if using croak_on_error.

_fetch_doc

Private function used for implementing of get_doc, get_doc_by_uri, etch_doc, etch_doc_by_uri.

 # this will decode received draft into Search::Estraier::Document object
 my $doc = $node->_fetch_doc( id => 42 );
 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );

 # to extract keywords, add etch
 my $doc = $node->_fetch_doc( id => 42, etch => 1 );
 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );

 # to get document attrubute add attr
 my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );

 # more general form which allows implementation of
 # uri_to_id
 my $id = $node->_fetch_doc(
 	uri => 'file:///document/uri/42',
	path => '/uri_to_id',
	chomp_resbody => 1
 );

name

  my $node_name = $node->name;

label

  my $node_label = $node->label;

doc_num

  my $documents_in_node = $node->doc_num;

word_num

  my $words_in_node = $node->word_num;

size

  my $node_size = $node->size;

Search documents which match condition

  my $nres = $node->search( $cond, $depth );

$cond is Search::Estraier::Condition object, while <$depth> specifies depth for meta search.

Function results Search::Estraier::NodeResult object.

cond_to_query

Return URI encoded string generated from Search::Estraier::Condition

  my $args = $node->cond_to_query( $cond, $depth );

shuttle_url

This is method which uses LWP::UserAgent to communicate with Hyper Estraier node master.

  my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );

$resheads and $resbody booleans controll if response headers and/or response body will be saved within object.

set_snippet_width

Set width of snippets in results

  $node->set_snippet_width( $wwidth, $hwidth, $awidth );

$wwidth specifies whole width of snippet. It's 480 by default. If it's 0 snippet is not sent with results. If it is negative, whole document text is sent instead of snippet.

$hwidth specified width of strings from beginning of string. Default value is 96. Negative or zero value keep previous value.

$awidth specifies width of strings around each highlighted word. It's 96 by default. If negative of zero value is provided previous value is kept unchanged.

set_user

Manage users of node

  $node->set_user( 'name', $mode );

$mode can be one of:

0

delete account

1

set administrative right for user

2

set user account as guest

Return true on success, otherwise false.

admins

 my @admins = @{ $node->admins };

Return array of users with admin rights on node

guests

 my @guests = @{ $node->guests };

Return array of users with guest rights on node

cacheusage

Return cache usage for a node

  my $cache = $node->cacheusage;

master

Set actions on Hyper Estraier node master (estmaster process)

  $node->master(
  	action => 'sync'
  );

All available actions are documented in http://hyperestraier.sourceforge.net/nguide-en.html#protocol

PRIVATE METHODS

Top

You could call those directly, but you don't have to. I hope.

_set_info

Set information for node

  $node->_set_info;

_clear_info

Clear information for node

  $node->_clear_info;

On next call to name, label, doc_num, word_num or size node info will be fetch again from Hyper Estraier.

EXPORT

Top

Nothing.

SEE ALSO

Top

http://hyperestraier.sourceforge.net/

Hyper Estraier Ruby interface on which this module is based.

Hyper Estraier now also has pure-perl binding included in distribution. It's a faster way to access databases directly if you are not running estmaster P2P server.

AUTHOR

Top

Dobrica Pavlinusic, <dpavlin@rot13.org>

Robert Klep <robert@klep.name> contributed refactored search code

COPYRIGHT AND LICENSE

Top


Search-Estraier documentation Contained in the Search-Estraier distribution.
package Search::Estraier;

use 5.008;
use strict;
use warnings;

our $VERSION = '0.09';

sub _s {
	my $text = $_[1];
	return unless defined($text);
	$text =~ s/\s\s+/ /gs;
	$text =~ s/^\s+//;
	$text =~ s/\s+$//;
	return $text;
}

package Search::Estraier::Document;

use Carp qw/croak confess/;

use Search::Estraier;
our @ISA = qw/Search::Estraier/;

sub new {
	my $class = shift;
	my $self = {};
	bless($self, $class);

	$self->{id} = -1;

	my $draft = shift;

	if ($draft) {
		my $in_text = 0;
		foreach my $line (split(/\n/, $draft)) {

			if ($in_text) {
				if ($line =~ /^\t/) {
					push @{ $self->{htexts} }, substr($line, 1);
				} else {
					push @{ $self->{dtexts} }, $line;
				}
				next;
			}

			if ($line =~ m/^%VECTOR\t(.+)$/) {
				my @fields = split(/\t/, $1);
				if ($#fields % 2 == 1) {
					$self->{kwords} = { @fields };
				} else {
					warn "can't decode $line\n";
				}
				next;
			} elsif ($line =~ m/^%SCORE\t(.+)$/) {
			    $self->{score} = $1;
			    next;
			} elsif ($line =~ m/^%/) {
				# What is this? comment?
				#warn "$line\n";
				next;
			} elsif ($line =~ m/^$/) {
				$in_text = 1;
				next;
			} elsif ($line =~ m/^(.+)=(.*)$/) {
				$self->{attrs}->{ $1 } = $2;
				next;
			}

			warn "draft ignored: '$line'\n";
		}
	}

	$self ? return $self : return undef;
}


sub add_attr {
	my $self = shift;
	my $attrs = {@_};

	while (my ($name, $value) = each %{ $attrs }) {
		if (! defined($value)) {
			delete( $self->{attrs}->{ $self->_s($name) } );
		} else {
			$self->{attrs}->{ $self->_s($name) } = $self->_s($value);
		}
	}

	return 1;
}


sub add_text {
	my $self = shift;
	my $text = shift;
	return unless defined($text);

	push @{ $self->{dtexts} }, $self->_s($text);
}


sub add_hidden_text {
	my $self = shift;
	my $text = shift;
	return unless defined($text);

	push @{ $self->{htexts} }, $self->_s($text);
}

sub add_vectors {
	my $self = shift;
	return unless (@_);

	# this is ugly, but works
	die "add_vector needs HASH as argument" unless ($#_ % 2 == 1);

	$self->{kwords} = {@_};
}

sub set_score {
    my $self = shift;
    my $score = shift;
    return unless (defined($score));
    $self->{score} = $score;
}

sub score {
    my $self = shift;
    return -1 unless (defined($self->{score}));
    return $self->{score};
}

sub id {
	my $self = shift;
	return $self->{id};
}


sub attr_names {
	my $self = shift;
	return unless ($self->{attrs});
	#croak "attr_names return array, not scalar" if (! wantarray);
	return sort keys %{ $self->{attrs} };
}


sub attr {
	my $self = shift;
	my $name = shift;
	return unless (defined($name) && $self->{attrs});
	return $self->{attrs}->{ $name };
}


sub texts {
	my $self = shift;
	#confess "texts return array, not scalar" if (! wantarray);
	return @{ $self->{dtexts} } if ($self->{dtexts});
}


sub cat_texts {
	my $self = shift;
	return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
}


sub dump_draft {
	my $self = shift;
	my $draft;

	foreach my $attr_name (sort keys %{ $self->{attrs} }) {
		next unless defined(my $v = $self->{attrs}->{$attr_name});
		$draft .= $attr_name . '=' . $v . "\n";
	}

	if ($self->{kwords}) {
		$draft .= '%VECTOR';
		while (my ($key, $value) = each %{ $self->{kwords} }) {
			$draft .= "\t$key\t$value";
		}
		$draft .= "\n";
	}

	if (defined($self->{score}) && $self->{score} >= 0) {
	    $draft .= "%SCORE\t" . $self->{score} . "\n";
	}

	$draft .= "\n";

	$draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
	$draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});

	return $draft;
}


sub delete {
	my $self = shift;

	foreach my $data (qw/attrs dtexts stexts kwords/) {
		delete($self->{$data});
	}

	$self->{id} = -1;

	return 1;
}



package Search::Estraier::Condition;

use Carp qw/carp confess croak/;

use Search::Estraier;
our @ISA = qw/Search::Estraier/;

sub new {
	my $class = shift;
	my $self = {};
	bless($self, $class);

	$self->{max} = -1;
	$self->{options} = 0;

	$self ? return $self : return undef;
}


sub set_phrase {
	my $self = shift;
	$self->{phrase} = $self->_s( shift );
}


sub add_attr {
	my $self = shift;
	my $attr = shift || return;
	push @{ $self->{attrs} }, $self->_s( $attr );
}


sub set_order {
	my $self = shift;
	$self->{order} = shift;
}


sub set_max {
	my $self = shift;
	my $max = shift;
	croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
	$self->{max} = $max;
}


my $options = {
	SURE => 1 << 0,
	USUAL => 1 << 1,
	FAST => 1 << 2,
	AGITO => 1 << 3,
	NOIDF => 1 << 4,
	SIMPLE => 1 << 10,
};

sub set_options {
	my $self = shift;
	my $opt = 0;
	foreach my $option (@_) {
		my $mask;
		unless ($mask = $options->{$option}) {
			if ($option eq '1') {
				next;
			} else {
				croak "unknown option $option";
			}
		}
		$opt += $mask;
	}
	$self->{options} = $opt;
}


sub phrase {
	my $self = shift;
	return $self->{phrase};
}


sub order {
	my $self = shift;
	return $self->{order};
}


sub attrs {
	my $self = shift;
	#croak "attrs return array, not scalar" if (! wantarray);
	return @{ $self->{attrs} } if ($self->{attrs});
}


sub max {
	my $self = shift;
	return $self->{max};
}


sub options {
	my $self = shift;
	return $self->{options};
}


sub set_skip {
	my $self = shift;
	$self->{skip} = shift;
}

sub skip {
	my $self = shift;
	return $self->{skip};
}


sub set_distinct {
	my $self = shift;
	$self->{distinct} = shift;
}

sub distinct {
	my $self = shift;
	return $self->{distinct};
}

sub set_mask {
	my $self = shift;
	return unless (@_);
	$self->{mask} = \@_;
}


package Search::Estraier::ResultDocument;

use Carp qw/croak/;

#use Search::Estraier;
#our @ISA = qw/Search::Estraier/;

sub new {
	my $class = shift;
	my $self = {@_};
	bless($self, $class);

	croak "missing uri for ResultDocument" unless defined($self->{uri});

	$self ? return $self : return undef;
}


sub uri {
	my $self = shift;
	return $self->{uri};
}


sub attr_names {
	my $self = shift;
	croak "attr_names return array, not scalar" if (! wantarray);
	return sort keys %{ $self->{attrs} };
}


sub attr {
	my $self = shift;
	my $name = shift || return;
	return $self->{attrs}->{ $name };
}


sub snippet {
	my $self = shift;
	return $self->{snippet};
}


sub keywords {
	my $self = shift;
	return $self->{keywords};
}


package Search::Estraier::NodeResult;

use Carp qw/croak/;

#use Search::Estraier;
#our @ISA = qw/Search::Estraier/;

sub new {
	my $class = shift;
	my $self = {@_};
	bless($self, $class);

	foreach my $f (qw/docs hints/) {
		croak "missing $f for ResultDocument" unless defined($self->{$f});
	}

	$self ? return $self : return undef;
}


sub doc_num {
	my $self = shift;
	return $#{$self->{docs}} + 1;
}


sub get_doc {
	my $self = shift;
	my $num = shift;
	croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
	return undef if ($num < 0 || $num > $self->{docs});
	return $self->{docs}->[$num];
}


sub hint {
	my $self = shift;
	my $key = shift || return;
	return $self->{hints}->{$key};
}

sub hints {
	my $self = shift;
	return $self->{hints};
}

sub hits {
	my $self = shift;
	return $self->{hints}->{'HIT'} || 0;
}

package Search::Estraier::Node;

use Carp qw/carp croak confess/;
use URI;
use MIME::Base64;
use IO::Socket::INET;
use URI::Escape qw/uri_escape/;

sub new {
	my $class = shift;
	my $self = {
		pxport => -1,
		timeout => 0,	# this used to be -1
		wwidth => 480,
		hwidth => 96,
		awidth => 96,
		status => -1,
	};

	bless($self, $class);

	if ($#_ == 0) {
		$self->{url} = shift;
	} else {
		%$self = ( %$self, @_ );

		$self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user});

		warn "## Node debug on\n" if ($self->{debug});
	}

	$self->{inform} = {
		dnum => -1,
		wnum => -1,
		size => -1.0,
	};

	if ($self->{create}) {
		if (! eval { $self->name } || $@) {
			my $name = $1 if ($self->{url} =~ m#/node/([^/]+)/*#);
			croak "can't find node name in '$self->{url}'" unless ($name);
			my $label = $self->{label} || $name;
			$self->master(
				action => 'nodeadd',
				name => $name,
				label => $label,
			) || croak "can't create node $name ($label)";
		}
	}

	$self ? return $self : return undef;
}


sub set_url {
	my $self = shift;
	$self->{url} = shift;
}


sub set_proxy {
	my $self = shift;
	my ($host,$port) = @_;
	croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
	$self->{pxhost} = $host;
	$self->{pxport} = $port;
}


sub set_timeout {
	my $self = shift;
	my $sec = shift;
	croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
	$self->{timeout} = $sec;
}


sub set_auth {
	my $self = shift;
	my ($login,$passwd) = @_;
	my $basic_auth = encode_base64( "$login:$passwd" );
	chomp($basic_auth);
	$self->{auth} = $basic_auth;
}


sub status {
	my $self = shift;
	return $self->{status};
}


sub put_doc {
	my $self = shift;
	my $doc = shift || return;
	return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
	if ($self->shuttle_url( $self->{url} . '/put_doc',
		'text/x-estraier-draft',
		$doc->dump_draft,
		undef
	) == 200) {
		$self->_clear_info;
		return 1;
	}
	return undef;
}


sub out_doc {
	my $self = shift;
	my $id = shift || return;
	return unless ($self->{url});
	croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
	if ($self->shuttle_url( $self->{url} . '/out_doc',
		'application/x-www-form-urlencoded',
		"id=$id",
		undef
	) == 200) {
		$self->_clear_info;
		return 1;
	}
	return undef;
}


sub out_doc_by_uri {
	my $self = shift;
	my $uri = shift || return;
	return unless ($self->{url});
	if ($self->shuttle_url( $self->{url} . '/out_doc',
		'application/x-www-form-urlencoded',
		"uri=" . uri_escape($uri),
		undef
	) == 200) {
		$self->_clear_info;
		return 1;
	}
	return undef;
}


sub edit_doc {
	my $self = shift;
	my $doc = shift || return;
	return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
	if ($self->shuttle_url( $self->{url} . '/edit_doc',
		'text/x-estraier-draft',
		$doc->dump_draft,
		undef
	) == 200) {
		$self->_clear_info;
		return 1;
	}
	return undef;
}


sub get_doc {
	my $self = shift;
	my $id = shift || return;
	return $self->_fetch_doc( id => $id );
}


sub get_doc_by_uri {
	my $self = shift;
	my $uri = shift || return;
	return $self->_fetch_doc( uri => $uri );
}


sub get_doc_attr {
	my $self = shift;
	my ($id,$name) = @_;
	return unless ($id && $name);
	return $self->_fetch_doc( id => $id, attr => $name );
}


sub get_doc_attr_by_uri {
	my $self = shift;
	my ($uri,$name) = @_;
	return unless ($uri && $name);
	return $self->_fetch_doc( uri => $uri, attr => $name );
}


sub etch_doc {
	my $self = shift;
	my $id = shift || return;
	return $self->_fetch_doc( id => $id, etch => 1 );
}

sub etch_doc_by_uri {
	my $self = shift;
	my $uri = shift || return;
	return $self->_fetch_doc( uri => $uri, etch => 1 );
}


sub uri_to_id {
	my $self = shift;
	my $uri = shift || return;
	return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 );
}


sub _fetch_doc {
	my $self = shift;
	my $a = {@_};
	return unless ( ($a->{id} || $a->{uri}) && $self->{url} );

	my ($arg, $resbody);

	my $path = $a->{path} || '/get_doc';
	$path = '/etch_doc' if ($a->{etch});

	if ($a->{id}) {
		croak "id must be number not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
		$arg = 'id=' . $a->{id};
	} elsif ($a->{uri}) {
		$arg = 'uri=' . uri_escape($a->{uri});
	} else {
		confess "unhandled argument. Need id or uri.";
	}

	if ($a->{attr}) {
		$path = '/get_doc_attr';
		$arg .= '&attr=' . uri_escape($a->{attr});
		$a->{chomp_resbody} = 1;
	}

	my $rv = $self->shuttle_url( $self->{url} . $path,
		'application/x-www-form-urlencoded',
		$arg,
		\$resbody,
		$a->{croak_on_error},
	);

	return if ($rv != 200);

	if ($a->{etch}) {
		$self->{kwords} = {};
		return +{} unless ($resbody);
		foreach my $l (split(/\n/, $resbody)) {
			my ($k,$v) = split(/\t/, $l, 2);
			$self->{kwords}->{$k} = $v if ($v);
		}
		return $self->{kwords};
	} elsif ($a->{chomp_resbody}) {
		return unless (defined($resbody));
		chomp($resbody);
		return $resbody;
	} else {
		return new Search::Estraier::Document($resbody);
	}
}


sub name {
	my $self = shift;
	$self->_set_info unless ($self->{inform}->{name});
	return $self->{inform}->{name};
}


sub label {
	my $self = shift;
	$self->_set_info unless ($self->{inform}->{label});
	return $self->{inform}->{label};
}


sub doc_num {
	my $self = shift;
	$self->_set_info if ($self->{inform}->{dnum} < 0);
	return $self->{inform}->{dnum};
}


sub word_num {
	my $self = shift;
	$self->_set_info if ($self->{inform}->{wnum} < 0);
	return $self->{inform}->{wnum};
}


sub size {
	my $self = shift;
	$self->_set_info if ($self->{inform}->{size} < 0);
	return $self->{inform}->{size};
}


sub search {
	my $self = shift;
	my ($cond, $depth) = @_;
	return unless ($cond && defined($depth) && $self->{url});
	croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
	croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);

	my $resbody;

	my $rv = $self->shuttle_url( $self->{url} . '/search',
		'application/x-www-form-urlencoded',
		$self->cond_to_query( $cond, $depth ),
		\$resbody,
	);
	return if ($rv != 200);

	my @records 	= split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody;
	my $hintsText	= splice @records, 0, 2; # starts with empty record
	my $hints		= { $hintsText =~ m/^(.*?)\t(.*?)$/gsm };

	# process records
	my $docs = [];
	foreach my $record (@records)
	{
		# split into keys and snippets
		my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s;

		# create document hash
		my $doc				= { $keys =~ m/^(.*?)=(.*?)$/gsm };
		$doc->{'@keywords'}	= $doc->{keywords};
		($doc->{keywords})	= $keys =~ m/^%VECTOR\t(.*?)$/gm;
		$doc->{snippet}		= $snippet;

		push @$docs, new Search::Estraier::ResultDocument(
			attrs 		=> $doc,
			uri 		=> $doc->{'@uri'},
			snippet 	=> $snippet,
			keywords 	=> $doc->{'keywords'},
		);
	}

	return new Search::Estraier::NodeResult( docs => $docs, hints => $hints );
}


sub cond_to_query {
	my $self = shift;

	my $cond = shift || return;
	croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
	my $depth = shift;

	my @args;

	if (my $phrase = $cond->phrase) {
		push @args, 'phrase=' . uri_escape($phrase);
	}

	if (my @attrs = $cond->attrs) {
		for my $i ( 0 .. $#attrs ) {
			push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
		}
	}

	if (my $order = $cond->order) {
		push @args, 'order=' . uri_escape($order);
	}
		
	if (my $max = $cond->max) {
		push @args, 'max=' . $max;
	} else {
		push @args, 'max=' . (1 << 30);
	}

	if (my $options = $cond->options) {
		push @args, 'options=' . $options;
	}

	push @args, 'depth=' . $depth if ($depth);
	push @args, 'wwidth=' . $self->{wwidth};
	push @args, 'hwidth=' . $self->{hwidth};
	push @args, 'awidth=' . $self->{awidth};
	push @args, 'skip=' . $cond->{skip} if ($cond->{skip});

	if (my $distinct = $cond->distinct) {
		push @args, 'distinct=' . uri_escape($distinct);
	}

	if ($cond->{mask}) {
		my $mask = 0;
		map { $mask += ( 2 ** $_ ) } @{ $cond->{mask} };

		push @args, 'mask=' . $mask if ($mask);
	}

	return join('&', @args);
}


use LWP::UserAgent;

sub shuttle_url {
	my $self = shift;

	my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_;

	$croak_on_error = $self->{croak_on_error} unless defined($croak_on_error);

	$self->{status} = -1;

	warn "## $url\n" if ($self->{debug});

	$url = new URI($url);
	if (
			!$url || !$url->scheme || !$url->scheme eq 'http' ||
			!$url->host || !$url->port || $url->port < 1
		) {
		carp "can't parse $url\n";
		return -1;
	}

	my $ua = LWP::UserAgent->new;
	$ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );

	my $req;
	if ($reqbody) {
		$req = HTTP::Request->new(POST => $url);
	} else {
		$req = HTTP::Request->new(GET => $url);
	}

	$req->headers->header( 'Host' => $url->host . ":" . $url->port );
	$req->headers->header( 'Connection', 'close' );
	$req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
	$req->content_type( $content_type );

	warn $req->headers->as_string,"\n" if ($self->{debug});

	if ($reqbody) {
		warn "$reqbody\n" if ($self->{debug});
		$req->content( $reqbody );
	}

	my $res = $ua->request($req) || croak "can't make request to $url: $!";

	warn "## response status: ",$res->status_line,"\n" if ($self->{debug});

	($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);

	if (! $res->is_success) {
		if ($croak_on_error) {
			croak("can't get $url: ",$res->status_line);
		} else {
			return -1;
		}
	}

	$$resbody .= $res->content;

	warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});

	return $self->{status};
}


sub set_snippet_width {
	my $self = shift;

	my ($wwidth, $hwidth, $awidth) = @_;
	$self->{wwidth} = $wwidth;
	$self->{hwidth} = $hwidth if ($hwidth >= 0);
	$self->{awidth} = $awidth if ($awidth >= 0);
}


sub set_user {
	my $self = shift;
	my ($name, $mode) = @_;

	return unless ($self->{url});
	croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);

	$self->shuttle_url( $self->{url} . '/_set_user',
		'application/x-www-form-urlencoded',
		'name=' . uri_escape($name) . '&mode=' . $mode,
		undef
	) == 200;
}


sub set_link {
	my $self = shift;
	my ($url, $label, $credit) = @_;

	return unless ($self->{url});
	croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);

	my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
	$reqbody .= '&credit=' . $credit if ($credit > 0);

	if ($self->shuttle_url( $self->{url} . '/_set_link',
		'application/x-www-form-urlencoded',
		$reqbody,
		undef
	) == 200) {
		# refresh node info after adding link
		$self->_clear_info;
		return 1;
	}
	return undef;
}

sub admins {
	my $self = shift;
	$self->_set_info unless ($self->{inform}->{name});
	return $self->{inform}->{admins};
}

sub guests {
	my $self = shift;
	$self->_set_info unless ($self->{inform}->{name});
	return $self->{inform}->{guests};
}

sub links {
	my $self = shift;
	$self->_set_info unless ($self->{inform}->{name});
	return $self->{inform}->{links};
}

sub cacheusage {
	my $self = shift;

	return unless ($self->{url});

	my $resbody;
	my $rv = $self->shuttle_url( $self->{url} . '/cacheusage',
		'text/plain',
		undef,
		\$resbody,
	);

	return if ($rv != 200 || !$resbody);

	return $resbody;
}

my $estmaster_rest = {
	shutdown => {
		status => 202,
	},
	sync => {
		status => 202,
	},
	backup => {
		status => 202,
	},
	userlist => {
		status => 200,
		returns => [ qw/name passwd flags fname misc/ ],
	},
	useradd => {
		required => [ qw/name passwd flags/ ],
		optional => [ qw/fname misc/ ],
		status => 200,
	},
	userdel => {
		required => [ qw/name/ ],
		status => 200,
	},
	nodelist => {
		status => 200,
		returns => [ qw/name label doc_num word_num size/ ],
	},
	nodeadd => {
		required => [ qw/name/ ],
		optional => [ qw/label/ ],
		status => 200,
	},
	nodedel => {
		required => [ qw/name/ ],
		status => 200,
	},
	nodeclr => {
		required => [ qw/name/ ],
		status => 200,
	},
	nodertt => {
		status => 200,	
	},
};

sub master {
	my $self = shift;

	my $args = {@_};

	# have action?
	my $action = $args->{action} || croak "need action, available: ",
		join(", ",keys %{ $estmaster_rest });

	# check if action is valid
	my $rest = $estmaster_rest->{$action};
	croak "action '$action' is not supported, available actions: ",
		join(", ",keys %{ $estmaster_rest }) unless ($rest);

	croak "BUG: action '$action' needs return status" unless ($rest->{status});

	my @args;

	if ($rest->{required} || $rest->{optional}) {

		map {
			croak "need parametar '$_' for action '$action'" unless ($args->{$_});
			push @args, $_ . '=' . uri_escape( $args->{$_} );
		} ( @{ $rest->{required} } );

		map {
			push @args, $_ . '=' . uri_escape( $args->{$_} ) if ($args->{$_});
		} ( @{ $rest->{optional} } );

	}

	my $uri = new URI( $self->{url} );

	my $resbody;

	my $status = $self->shuttle_url(
		'http://' . $uri->host_port . '/master?action=' . $action ,
		'application/x-www-form-urlencoded',
		join('&', @args),
		\$resbody,
		1,
	) or confess "shuttle_url failed";

	if ($status == $rest->{status}) {

		# refresh node info after sync
		$self->_clear_info if ($action eq 'sync' || $action =~ m/^node(?:add|del|clr)$/);

		if ($rest->{returns} && wantarray) {

			my @results;
			my $fields = $#{$rest->{returns}};

			foreach my $line ( split(/[\r\n]/,$resbody) ) {
				my @e = split(/\t/, $line, $fields + 1);
				my $row;
				foreach my $i ( 0 .. $fields) {
					$row->{ $rest->{returns}->[$i] } = $e[ $i ];
				}
				push @results, $row;
			}

			return @results;

		} elsif ($resbody) {
			chomp $resbody;
			return $resbody;
		} else {
			return 0E0;
		}
	}

	carp "expected status $rest->{status}, but got $status";
	return undef;
}

sub _set_info {
	my $self = shift;

	$self->{status} = -1;
	return unless ($self->{url});

	my $resbody;
	my $rv = $self->shuttle_url( $self->{url} . '/inform',
		'text/plain',
		undef,
		\$resbody,
	);

	return if ($rv != 200 || !$resbody);

	my @lines = split(/[\r\n]/,$resbody);

	$self->_clear_info;

	( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum},
		$self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5);

	return $resbody unless (@lines);

	shift @lines;

	while(my $admin = shift @lines) {
		push @{$self->{inform}->{admins}}, $admin;
	}

	while(my $guest = shift @lines) {
		push @{$self->{inform}->{guests}}, $guest;
	}

	while(my $link = shift @lines) {
		push @{$self->{inform}->{links}}, $link;
	}

	return $resbody;

}

sub _clear_info {
	my $self = shift;
	$self->{inform} = {
		dnum => -1,
		wnum => -1,
		size => -1.0,
	};
}

###

1;