WWW::Mixi::Scraper::Plugin::ViewDiary - WWW::Mixi::Scraper::Plugin::ViewDiary documentation


WWW-Mixi-Scraper documentation Contained in the WWW-Mixi-Scraper distribution.

Index


Code Index:

NAME

Top

WWW::Mixi::Scraper::Plugin::ViewDiary

DESCRIPTION

Top

This is almost equivalent to WWW::Mixi->parse_view_diary().

METHOD

Top

scrape

returns a hash reference such as

  {
    subject => 'title of the entry',
    time => 'yyyy-mm-dd hh:mm',
    description => 'entry body',
    images => [
      {
        link => 'show_diary_picture.pl?img_src=http://img1.mixi.jp/photo/xx/xx.jpg',
        thumb_link => 'http://img1.mixi.jp/photo/xx/xx.jpg',
      },
    ],
    comments => [
      {
        name => 'commenter',
        link => 'http://mixi.jp/show_friend.pl?id=xxxx',
        time => 'yyyy-mm-dd hh:mm',
        description => 'comment body',
      }
    ]
  }

AUTHOR

Top

Kenichi Ishigaki, <ishigaki at cpan.org>

COPYRIGHT AND LICENSE

Top


WWW-Mixi-Scraper documentation Contained in the WWW-Mixi-Scraper distribution.

package WWW::Mixi::Scraper::Plugin::ViewDiary;

use strict;
use warnings;
use WWW::Mixi::Scraper::Plugin;

validator {qw(
  id        is_number
  owner_id  is_number
)};

sub scrape {
  my ($self, $html) = @_;

  my %scraper;
  $scraper{images} = scraper {
    process 'a',
      link => '@onClick';
    process 'a>img',
      thumb_link => '@src';
    result qw( link thumb_link );
  };

  $scraper{diary} = scraper {
    process 'div.viewDiaryBox>div.listDiaryTitle>dl>dd',
      time => 'TEXT';
    process 'div.viewDiaryBox>div.listDiaryTitle>dl>dt',
      subject => 'TEXT';
    process 'div.viewDiaryBox>div.listDiaryTitle>dl>dt>span',
      string => 'TEXT';
    process 'div#diary_body',
      description => $self->html_or_text;
    process 'div.diaryPhoto>table>tr>td',
      'images[]' => $scraper{images};
    process 'div.diaryPaging01>div.diaryPagingLeft>a',
      prev_link => '@href';
    result qw( time subject description images prev_link string );
  };

  my $stash = $scraper{diary}->scrape(\$html);
  $stash->{link} = delete $stash->{prev_link};
  $stash->{link} =~ s/neighbor_diary/view_diary/;
  $stash->{link} =~ s/&direction=prev.*//;
  $stash = $self->post_process($stash)->[0];

  my $string = delete $stash->{string} || '';
  $stash->{subject} =~ s/$string$//;

  $scraper{comments} = scraper {
    process 'dl.comment>dt>span.date',
      time => 'TEXT';
    process 'dl.comment>dt>a',
      link => '@href',
      name => 'TEXT';
    process 'dl.comment>dd',
      description => $self->html_or_text;
    result qw( time link name description );
  };

  $scraper{list} = scraper {
    process 'div.commentListArea>ul>li',
      'comments[]' => $scraper{comments};
    result qw( comments );
  };

  $stash->{comments} = $self->post_process($scraper{list}->scrape(\$html));

  return $stash;
}

1;

__END__