WebService::Lucene - Module to interface with the Lucene indexing webservice


WebService-Lucene documentation Contained in the WebService-Lucene distribution.

Index


Code Index:

NAME

Top

WebService::Lucene - Module to interface with the Lucene indexing webservice

SYNOPSIS

Top

    # Connect to the web service
    $ws = WebService::Lucene->new( $url );

    # Create an index
    $ndex = $ws->create_index( $index );

    # Get a particular index
    $index = $ws->get_index( $name );

    # Index a document
    $document = $index->add_document( $document );

    # Get a document
    $document = $index->get_document( $id );

    # Delete the document
    $document->delete;

    # Search an index
    $results = $index->search( $query );

    # Get documents from search
    @documents = $results->documents;

    # Delete an index
    $index->delete;

DESCRIPTION

Top

This module is a Perl API in to the Lucene indexing web service. http://lucene-ws.net/

METHODS

Top

new( $url )

This method will connect to the Lucene Web Service located at $url.

    my $ws = WebService::Lucene->new( 'http://localhost:8080/lucene/' );

base_url( [$url] )

Accessor for the base url of the service.

get_index( $name )

Retuens an WebService::Lucene::Index object for $name.

indexes( )

Alias for indices

indices( )

Returns an array of WebService::Lucene::Index objects.

properties( [$properties] )

Hash reference to a list of properties for the service.

_fetch_service_properties( )

Grabs the service.properties documents and sends the contents to _parse_service_properties.

_parse_service_properties( $xml )

Parses the XML and populates the object's properties

_fetch_service_document( )

Connects to the service url and passes the contents on to _parse_service_document.

_parse_service_document( $xml )

Parses the Atom Publishing Protocol introspection document and populates the service's indices.

title( [$title] )

Accessor for the title of the service.

_fetch_content( $url )

Shortcut for fetching the content at $url.

create_index( $name )

Creates the index on the server and returns the WebService::Lucene::Index object.

delete_index( $name )

Deletes an index.

update( )

Updates the service.properties document.

_properties_as_entry( )

Genereates an XML::Atom::Entry suitable for updating the service.properties document.

search( $indices, $query, [$params] )

Searches one or more indices for $query. Returns an WebService::Lucene::Results object.

    my $results = $ws->search( [ 'index1', 'index2' ], 'foo' );

facets( $indices, [$params] )

Gets facets for one or more indices. Returns an WebService::Lucene::Results object.

    my $results = $ws->facets( [ 'index1', 'index2' ] );

SEE ALSO

Top

* XML::Atom::Client
* WWW::OpenSearch
* http://lucene-ws.net/

AUTHORS

Top

Brian Cassidy <bricas@cpan.org>

Adam Paynter <adapay@cpan.org>

COPYRIGHT AND LICENSE

Top


WebService-Lucene documentation Contained in the WebService-Lucene distribution.
package WebService::Lucene;

use strict;
use warnings;

use base qw( WebService::Lucene::Client Class::Accessor::Fast );

use URI;
use Carp qw( croak );
use WebService::Lucene::Index;
use WebService::Lucene::XOXOParser;
use XML::LibXML;
use Scalar::Util ();

our $VERSION = '0.10';

__PACKAGE__->mk_accessors(
    qw(
        base_url indices_ref properties_ref title_info
        service_doc_fetched
        )
);

sub new {
    my ( $class, $url ) = @_;

    croak( "No URL specified" ) unless $url;

    if ( !ref $url ) {
        $url =~ s{/?$}{/};
        $url = URI->new( $url );
    }

    my $self = $class->SUPER::new;
    $self->base_url( $url );
    $self->indices_ref( {} );

    return $self;
}

sub get_index {
    my ( $self, $name ) = @_;
    my $indices_ref = $self->indices_ref;

    return $name if Scalar::Util::blessed $name;

    if ( ref $name ) {
        $name = join( ',',
            map { Scalar::Util::blessed $_ ? $_->name : $_ } @$name );
    }

    if ( my $index = $indices_ref->{ $name } ) {
        return $index;
    }

    # make sure it ends in a slash
    my $urlname = $name;
    $urlname =~ s{/?$}{/};
    $indices_ref->{ $name } = WebService::Lucene::Index->new(
        URI->new_abs( $urlname, $self->base_url ) );

    return $indices_ref->{ $name };
}

*indexes = \&indices;

sub indices {
    my $self = shift;

    if ( !$self->service_doc_fetched ) {
        $self->_fetch_service_document;
    }

    my $indices = $self->indices_ref;

    # filter out multi-indicies
    return map { $indices->{ $_ } } grep { $_ !~ /,/ } keys %$indices;
}

sub properties {
    my $self = shift;

    if ( !$self->properties_ref ) {
        $self->_fetch_service_properties;
    }

    return $self->properties_ref;
}

sub _fetch_service_properties {
    my ( $self ) = @_;
    my $entry = $self->getEntry(
        URI->new_abs( 'service.properties', $self->base_url ) );
    $self->_parse_service_properties( $entry->content->body );
}

sub _parse_service_properties {
    my ( $self, $xml ) = @_;

    $self->properties_ref(
        {   map { $_->{ name } => $_->{ value } }
                WebService::Lucene::XOXOParser->parse( $xml )
        }
    );
}

sub _fetch_service_document {
    my ( $self ) = @_;
    $self->_parse_service_document(
        $self->_fetch_content( $self->base_url ) );
    $self->service_doc_fetched( 1 );
}

sub _parse_service_document {
    my ( $self, $xml ) = @_;

    my $parser  = XML::LibXML->new;
    my $doc     = $parser->parse_string( $xml );
    my $indices = $self->indices_ref;

    my ( $workspace )
        = $doc->documentElement->getChildrenByTagName( 'workspace' );

    my( $title ) = $workspace->getElementsByLocalName( 'title' );
    $self->title_info( $title->textContent );

    for my $collection ( $workspace->getChildrenByTagName( 'collection' ) ) {
        my $url = $collection->getAttributeNode( 'href' )->value;
        my ( $name ) = $url =~ m{/([^/]+)/?$};
        next if exists $indices->{ $name };
        $indices->{ $name } = WebService::Lucene::Index->new( $url );
    }
}

sub title {
    my ( $self ) = @_;

    if ( !$self->service_doc_fetched ) {
        $self->_fetch_service_document;
    }

    return $self->title_info;
}

sub _fetch_content {
    my ( $self, $url ) = @_;

    my $response = $self->{ ua }->get( $url );

    return $response->content;
}

sub create_index {
    my ( $self, $name ) = @_;
    my $index = $self->get_index( $name );
    return $index->create;
}

sub delete_index {
    my ( $self, $name ) = @_;
    my $index = $self->get_index( $name );
    return $index->delete;
}

sub update {
    my ( $self ) = @_;
    $self->updateEntry( URI->new_abs( 'service.properties', $self->base_url ),
        $self->_properties_as_entry );
}

sub _properties_as_entry {
    my ( $self ) = @_;

    my $entry = XML::Atom::Entry->new;
    $entry->title( 'service.properties' );

    my $props = $self->properties_ref;
    my @properties = map +{ name => $_, value => $props->{ $_ } },
        keys %$props;
    my $xml = WebService::Lucene::XOXOParser->construct( @properties );

    $entry->content( $xml );
    $entry->content->type( 'xhtml' );

    return $entry;
}

sub search {
    my ( $self, $name, @rest ) = @_;
    return $self->get_index( $name )->search( @rest );
}

sub facets {
    my ( $self, $name, @rest ) = @_;
    return $self->get_index( $name )->facets( @rest );
}

1;