Plucene::SearchEngine::Index::File - File reader for filesystem files


Plucene-SearchEngine documentation Contained in the Plucene-SearchEngine distribution.

Index


Code Index:

NAME

Top

Plucene::SearchEngine::Index::File - File reader for filesystem files

DESCRIPTION

Top

This frontend module takes a filesystem file, extracts its metadata and passes the file onto a backend. The frontend registers the following Plucene fields:

mimetype

The MIME type of the file.

filename

The basename of the file's filename.

id

The URL of the file (file://...)

modified

A Plucene date field representing the last modified date of the file

METHODS

    Plucene::SearchEngine::Index::File->examine($filename [, $encoding])

This examines a file on the filesystem for the above metadata, before handling it to a backend. If an encoding is given, the text will be flagged as originally being that encoding, and then converted to UTF-8.


Plucene-SearchEngine documentation Contained in the Plucene-SearchEngine distribution.

package Plucene::SearchEngine::Index::File;
use strict;
use base "Plucene::SearchEngine::Index::Base";
use Carp;
use File::MMagic;
use File::Spec::Functions qw(rel2abs);
use File::Basename;
use Time::Piece;
use File::stat;
my $magic = File::MMagic->new();

sub examine {
    my ($class, $filename, $encoding) = @_;
    return unless -r $filename;
    my $mime = $magic->checktype_filename($filename);
    my $self = $class->handler_for($filename, $mime)->new();
    $self->add_data("mimetype", "Text", $mime);
    $self->add_data("filename", "Text", basename($filename));
    $self->add_data("id", "Keyword", "file://".rel2abs($filename));
    $self->add_data("modified", "Date", Time::Piece->new(stat($filename)->mtime));
    if ($encoding) { $self->add_data("encoding", "Text", $encoding); }
    my @docs = $self->gather_data_from_file($filename);
    if (wantarray) { if (@docs > 1) { return @docs } else { return $self } }
    else {
        carp "Using ->examine in scalar context is deprecated";
        return $self;
    }
}

1;