Plucene::SearchEngine::Index::Base - The definitely indexer base class


Plucene-SearchEngine documentation Contained in the Plucene-SearchEngine distribution.

Index


Code Index:

NAME

Top

Plucene::SearchEngine::Index::Base - The definitely indexer base class

DESCRIPTION

Top

This module is the base class from which both frontend and backend indiexing modules should inherit. It makes it easier for modules to create Plucene::Document objects through the intermediary of a nested hash.

METHODS

Top

register_handler

    __PACKAGE__->register_handler($ext, $mime_type, $ext2, ...);

This registers the module to handle each given extension or MIME type. Base works out whether a parameter is a file extension or a MIME type.

handler_for

    $self->handler_for($filename, $mime_type)

This finds the relevant handler which has been registered for the givern mime type or file name extension.

new

This creates a new backend object, which knows about the handler, type and indexed date for the data.

add_data

    $self->add_data($field, $type, $data);

This adds data to a backend object. A backend object represents a Plucene::Document, a hash which will later be turned into a Plucene::Document object.

The $field element should be the field name that's stored in Plucene. The $type should be one of the methods that Plucene::Document::Field can cope with - Keyword, Text, UnIndexed, UnStored - or Date, which takes a Time::Piece object as its $data.

document

This turns the backend's hash into a Plucene::Document.


Plucene-SearchEngine documentation Contained in the Plucene-SearchEngine distribution.
package Plucene::SearchEngine::Index::Base;
use Plucene::Document;
use Plucene::Document::DateSerializer;
use Plucene::Document::Field;
use Time::Piece;
use UNIVERSAL::moniker;
use strict;

use constant DEFAULT_HANDLER => "Plucene::SearchEngine::Index::Text";
{
    my %mime_handlers;
    my %extension_handlers;
    sub register_handler {
        my ($package, @specs) = @_;
        for my $spec (@specs) { 
            if ($spec =~ m{/}) {
                $mime_handlers{$spec} = $package;
            } else {
                $extension_handlers{$spec} = $package;
            }
        }
    }
    sub handler_for {
        my ($self, $filename, $mime) = @_;
        if (exists $mime_handlers{$mime}) { return $mime_handlers{$mime} }
        for my $spec (keys %extension_handlers) {
            if ($filename =~ /$spec$/) { return $extension_handlers{$spec} }
        }
        return DEFAULT_HANDLER;
    }
}

sub new { 
    my ($handler) = @_;
    my $self = bless {}, $handler;
    $self->add_data("handler", "Keyword", $handler);
    $self->add_data("type", "Keyword", $handler->moniker);
    $self->add_data("indexed", "Date", Time::Piece->new());
    $self;
}

sub add_data {
    my ($self, $field, $type, $data) = @_;
    $self->{$field}{type} = $type;
    push @{$self->{$field}{data}}, $data;
}


sub document {
    my $self = shift;
    my $doc = Plucene::Document->new;
    my $text;
    for my $field_name (keys %{$self}) {
        next if $field_name eq "text";
        my $field = $self->{$field_name};
        my $type = $field->{type};
        warn "No type for field $field_name!" unless $type;
        if ($field->{type} eq "Date") {
            $type = "Keyword";
            for (@{$field->{data}}) { $_ = freeze_date($_) }
        }
        for (@{$field->{data}}) {
            $text .= " ". $_;
            $doc->add(Plucene::Document::Field->$type( $field_name => $_));
        }
    }
    $text .= " ". join " ", @{$self->{text}{data}||[]};
    $doc->add(Plucene::Document::Field->UnStored(text => $text));
    return $doc;
}

1;