WWW::Mixi::Scraper::Mech - WWW::Mixi::Scraper::Mech documentation


WWW-Mixi-Scraper documentation Contained in the WWW-Mixi-Scraper distribution.

Index


Code Index:

NAME

Top

WWW::Mixi::Scraper::Mech

SYNOPSIS

Top

    use WWW::Mixi::Scraper::Mech;
    my $mech = WWW::Mixi::Scraper::Mech->new;
       $mech->login( 'foo@bar.com' => 'password' );

       $mech->may_have_errors('Cannot login');

    my $html = $mech->get_content('/new_friend_diary.pl');

    $mech->logout;

DESCRIPTION

Top

Mainly used internally.

METHODS

Top

new

creates an object. Optional hash is passed to WWW::Mechanize, except for 'email' and 'password' (and 'next_url'), which are used to login.

get

gets content of the uri.

content

returns (hopefully decoded) content.

get_content

As name suggests, this does both 'get' and 'content'. If you pass an additional encoding (which must be Encode-understandable), this returns encoded content.

login

tries to log in to mixi. As of writing this, password obfuscation and ssl login are not implemented.

logout

tries to log out from mixi.

may_have_errors

dies with error message and status code if something is wrong (this may change)

uri

shortcut for {mech}->uri.

SEE ALSO

Top

WWW::Mechanize

AUTHOR

Top

Kenichi Ishigaki, <ishigaki at cpan.org>

COPYRIGHT AND LICENSE

Top


WWW-Mixi-Scraper documentation Contained in the WWW-Mixi-Scraper distribution.

package WWW::Mixi::Scraper::Mech;

use strict;
use warnings;
use Encode;
use WWW::Mechanize 1.50;
use WWW::Mixi::Scraper::Utils qw( _uri );
use Time::HiRes qw( sleep );

sub new {
  my ($class, %options) = @_;

  my $email    = delete $options{email};
  my $password = delete $options{password};
  my $next_url = delete $options{next_url};

  $options{agent} ||= "WWW-Mixi-Scraper/$WWW::Mixi::Scraper::VERSION";
  $options{cookie_jar} ||= {};

  my $mech = WWW::Mechanize->new( %options );
  my $self = bless {
    mech  => $mech,
    login => {
      email    => $email,
      password => $password,
      next_url => $next_url,
      sticky   => 'on',
    }
  }, $class;

  $self;
}

sub login {
  my $self = shift;

  sleep(1.0); # intentional delay not to access too frequently

  $self->{mech}->post( 'http://mixi.jp/login.pl' => $self->{login} );

  $self->may_have_errors('Login failed');

  warn "logged in to mixi";
}

sub logout {
  my $self = shift;

  $self->get('/logout.pl');

  $self->may_have_errors('Failed to logout');
}

sub may_have_errors {
  my $self = shift;

  $self->{mech}->success or $self->_error(@_);
}

sub _error {
  my ($self, $message) = @_;

  $message ||= 'Mech error';

  die "$message: ".$self->{mech}->res->status_line;
}

sub get {
  my ($self, $uri) = @_;

  $uri = _uri($uri) unless ref $uri eq 'URI';

  sleep(1.0); # intentional delay not to access too frequently

  $self->{mech}->get($uri);

  # adapted from Plagger::Plugin::CustomFeed::Mixi
  if ( $self->content =~ /action="\/?login\.pl/ ) {
    # shouldn't be path but path_query, obviously
    $self->{login}->{next_url} = $uri->path_query;
    $self->login;

    # meta refresh
    if ( $self->content =~ /"0;url=(.*?)"/ ) {
      $self->{mech}->get($1);
    }
  }
  $self->{mech}->success;
}

sub content {
  my $self = shift;

  $self->{mech}->content;
}

sub get_content {
  my ($self, $uri, $encoding) = @_;

  my $content = $self->get($uri) ? $self->content : undef;

  if ( $content && $encoding ) {
    $content = encode( $encoding => $content );
  }
  $content;
}

sub uri {
  my $self = shift;
  $self->{mech}->uri;
}

1;

__END__