| Lingua-JA-Summarize-Extract documentation | Contained in the Lingua-JA-Summarize-Extract distribution. |
Lingua::JA::Summarize::Extract::Plugin::Parser::Trim - a simple word parser
use strict;
use warnings;
use utf8;
use Lingua::JA::Summarize::Extract;
my $text = '';
my $text = '日本語の文章を適当に書く。';
my $summary = Lingua::JA::Summarize::Extract->extract($text, { plugins => [ 'Parser::Trim' ] });
print "$summary";
sentences are divided by the character kind. you can change the small size of the string.
latin character
katakana character
kanji character
Kazuhiro Osawa <ko@yappo.ne.jp>
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| Lingua-JA-Summarize-Extract documentation | Contained in the Lingua-JA-Summarize-Extract distribution. |
package Lingua::JA::Summarize::Extract::Plugin::Parser::Trim; use strict; use base qw( Lingua::JA::Summarize::Extract::Plugin ); __PACKAGE__->mk_accessors(qw/ han_size kana_size latin_size /); sub parse { my ($self) = @_; my $han_size = $self->han_size || 2; my $kana_size = $self->kana_size || 3; my $latin_size = $self->latin_size || 3; my $term_list = {}; my $text = $self->text; while ($text =~ /(\p{Katakana}{$kana_size,}|\p{Han}{$han_size,}|\p{Latin}{$latin_size,})/g) { $term_list->{$1}++; } $term_list; } 1; __END__