| WWW-Mixi-Scraper documentation | Contained in the WWW-Mixi-Scraper distribution. |
WWW::Mixi::Scraper::Plugin::ViewEvent
This is almost equivalent to WWW::Mixi->parse_view_event().
returns a hash reference such as
{
subject => 'title of the event',
link => 'http://mixi.jp/view_event.pl?id=xxx',
time => 'yyyy-mm-dd hh:mm',
date => 'yyyy-mm-dd',
deadline => 'sometime soon',
location => 'somewhere',
description => 'event description',
name => 'who plans',
name_link => 'http://mixi.jp/show_friend.pl?id=xxx',
list => {
count => '8人',
link => 'http://mixi.jp/list_event_member.pl?id=xxx&comm_id=xxx',
subject => '参加者一覧を見る',
},
comments => [
{
subject => 1,
name => 'commenter',
name_link => 'http://mixi.jp/show_friend.pl?id=xxxx',
link => 'http://mixi.jp/view_event.pl?id=xxxx#1',
time => 'yyyy-mm-dd hh:mm',
description => 'comment body',
}
]
}
Kenichi Ishigaki, <ishigaki at cpan.org>
Copyright (C) 2007 by Kenichi Ishigaki.
This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| WWW-Mixi-Scraper documentation | Contained in the WWW-Mixi-Scraper distribution. |
package WWW::Mixi::Scraper::Plugin::ViewEvent; use strict; use warnings; use WWW::Mixi::Scraper::Plugin; use WWW::Mixi::Scraper::Utils qw( _uri _datetime ); use utf8; validator {qw( id is_number comm_id is_number page is_number_or_all )}; sub scrape { my ($self, $html) = @_; my %scraper; $scraper{images} = scraper { process 'a', link => '@onClick'; process 'a>img', thumb_link => '@src'; result qw( link thumb_link ); }; $scraper{infos} = scraper { process 'dt', name => 'TEXT'; process 'dd', string => 'TEXT'; process 'dd>a', link => '@href', subject => 'TEXT'; result qw( name string link subject ); }; $scraper{topic} = scraper { process 'dl.bbsList01>dt>span.date', 'time' => 'TEXT'; process 'dl.bbsList01>dt[class="bbsTitle clearfix"]>span.titleSpan', 'subject' => 'TEXT'; process 'dd.bbsContent>dl>dt>a', 'name' => 'TEXT', 'name_link' => '@href'; process 'dd.bbsContent>dl>dd', 'description' => $self->html_or_text; process 'div.communityPhoto>table>tr>td', 'images[]' => $scraper{images}; process 'dl.bbsList01>dd.bbsInfo>dl', 'infos[]' => $scraper{infos}; result qw( time subject name name_link images infos description ); }; $scraper{comment_body} = scraper { process 'dl.commentContent01>dt>a', 'name_link' => '@href', 'name' => 'TEXT'; process 'dl.commentContent01>dd', 'description' => $self->html_or_text; process 'dl.commentContent01>dd>table>tr>td', 'images[]' => $scraper{images}; result qw( name_link name description images ); }; $scraper{comment} = scraper { process 'dl.commentList01>dt>span.date', 'dates[]' => 'TEXT'; process 'dl.commentList01>dt>span.senderId', 'sender_ids[]' => 'TEXT'; process 'dl.commentList01>dd', 'comments[]' => $scraper{comment_body}; result qw( dates comments sender_ids ); }; my $stash = $self->post_process($scraper{topic}->scrape(\$html))->[0]; foreach my $item (@{ $stash->{infos} || [] }) { if ( $item->{name} eq 'é嬿¥æ' ) { $stash->{date} = $item->{string}; } if ( $item->{name} eq 'åéæé' ) { $stash->{deadline} = $item->{string}; } if ( $item->{name} eq 'éå¬å ´æ' ) { $stash->{location} = $item->{string}; } if ( $item->{name} eq 'åå è ' ) { $stash->{list}->{count} = $item->{string}; $stash->{list}->{link} = _uri( $item->{link} ); $stash->{list}->{subject} = $item->{subject}; } } # XXX: this fails when you test with local files. # However, this link cannot be extracted from the html, # at least as of writing this. ugh. $stash->{link} = $self->{uri}; my $stash_c = $self->post_process($scraper{comment}->scrape(\$html))->[0]; my @dates = @{ $stash_c->{dates} || [] }; my @sender_ids = @{ $stash_c->{sender_ids} || [] }; my @comments = @{ $stash_c->{comments} || [] }; foreach my $comment ( @comments ) { $comment->{time} = _datetime( shift @dates ); $comment->{subject} = shift @sender_ids; # incompatible with WWW::Mixi to let comment links # look more 'permanent' to make plagger/rss readers happier $comment->{name_link} = _uri( $comment->{name_link} ); $comment->{link} = $stash->{link} ? _uri( $stash->{link} . '#' . $comment->{subject} ) : undef; if ( $comment->{images} ) { foreach my $image ( @{ $comment->{images} || [] } ) { $image->{link} = _uri( $image->{link} ); $image->{thumb_link} = _uri( $image->{thumb_link} ); } } } $stash->{comments} = \@comments; return $stash; } 1; __END__