WWW::Mooos::Scraper::Util - WWW::Mooos::Scraper util module


WWW-Mooos-Scraper documentation Contained in the WWW-Mooos-Scraper distribution.

Index


Code Index:

NAME

Top

WWW::Mooos::Scraper::Util - WWW::Mooos::Scraper util module

VERSION

Top

0.02

DESCRIPTION

Top

WWW::Mooos::Scraper util module

METHOD

Top

_entry_time2datetime

_get_entry_type

_get_mooos_page_url

_h2z

_strip

_uri

_utf8_encode

SEE ALSO

Top

DateTime Encode Encode::JP::H2Z Exporter URI

AUTHOR

Top

Akira Horimoto

COPYRIGHT AND LICENSE

Top


WWW-Mooos-Scraper documentation Contained in the WWW-Mooos-Scraper distribution.
package WWW::Mooos::Scraper::Util;

use strict;
use warnings;
use DateTime;
use Encode;
use Encode::JP::H2Z;
use Exporter;
use URI;

our @ISA         = qw(Exporter);
our @EXPORT_OK   = qw(
                    _entry_time2datetime
                    _get_entry_type
                    _get_mooos_page_url
                    _h2z
                    _strip
                    _uri
                    _utf8_encode
                    );

our %EXPORT_TAGS = ( all => \@EXPORT_OK );
our $VERSION     = 0.02;

sub _entry_time2datetime {

    my($entry_time, $timestamp, $timezone) = @_;
    return if !defined $entry_time;
    $timestamp ||= time;
    my($day, $hour, $min);

    # e.g.1  1 hours 3 min ago
    # e.g.2  4 days ago
    # e.g.3  24 min ago
    if($entry_time =~ /((\d{1,})\s+days)?\s?((\d{1,})\s+hours)?\s?((\d{1,})\s+min)?\s?ago$/){
        $day  = $2 || 0;
        $hour = $4 || 0;
        $min  = $6 || 0;
    }

    my $pass_time = (86_400 * $day) + (3_600 * $hour) + (60 * $min);
    my $dt = DateTime->from_epoch(epoch => $timestamp - $pass_time);
    $dt->set_time_zone($timezone) if $timezone;
    return $dt;
}

sub _get_entry_type {

    my $entry_type = shift;
    return if !defined $entry_type;
    return $entry_type =~ /positive/ ? "positive" : "negative";
}

sub _get_mooos_page_url { 

    my($mooos_page_url, $uri) = @_;
    return if !defined $mooos_page_url;
    $mooos_page_url =~ s#^\.##;
    $uri->path($mooos_page_url);
    return $uri;
}

sub _h2z {

    my $str = shift;
    return if !defined $str;
    Encode::from_to($str, "utf8", "euc-jp");
    Encode::JP::H2Z::h2z(\$str);
    Encode::from_to($str, "euc-jp", "utf8");
    return $str;
}

sub _strip {

    my $str = shift;
    return if !defined $str;
    $str =~ s/^\s+//;
    $str =~ s/\s+$//;
    return $str;
}

sub _uri {

    my $uri = shift;
    return if !defined $uri;
    return URI->new($uri);
}

sub _utf8_encode {

    my $str = shift;
    return if !defined $str;
    return $str if !Encode::is_utf8($str);
    return Encode::encode_utf8($str);
}


1;

__END__