Plucene::Analysis::StopAnalyzer - the stop-word analyzer


Plucene documentation Contained in the Plucene distribution.

Index


Code Index:

NAME

Top

Plucene::Analysis::StopAnalyzer - the stop-word analyzer

SYNOPSIS

Top

	my Plucene::Analysis::StopFilter $sf 
		= Plucene::Analysis::StopAnalyzer->new(@args);

DESCRIPTION

Top

Filters LetterTokenizer with LowerCaseFilter and StopFilter.

METHODS

Top

tokenstream

	my Plucene::Analysis::StopFilter $sf 
		= Plucene::Analysis::StopAnalyzer->new(@args);

Filters LowerCaseTokenizer with StopFilter.


Plucene documentation Contained in the Plucene distribution.
package Plucene::Analysis::StopAnalyzer;

use strict;
use warnings;

use Plucene::Analysis::LowerCaseTokenizer;
use Plucene::Analysis::StopFilter;
use base 'Plucene::Analysis::Analyzer';

my @stopwords = (
	"a",     "and",  "are",   "as",    "at",   "be",   "but",  "by",
	"for",   "if",   "in",    "into",  "is",   "it",   "no",   "not",
	"of",    "on",   "or",    "s",     "such", "t",    "that", "the",
	"their", "then", "there", "these", "they", "this", "to",   "was",
	"will",  "with"
);

sub tokenstream {
	my $self = shift;
	return Plucene::Analysis::StopFilter->new({
			input    => Plucene::Analysis::LowerCaseTokenizer->new(@_),
			stoplist => \@stopwords
		});
}

1;