WWW::BookBot::Chinese::Novel::ShuKu - Bot to fetch from http://www.shuku.net


WWW-BookBot documentation Contained in the WWW-BookBot distribution.

Index


Code Index:

NAME

Top

WWW::BookBot::Chinese::Novel::ShuKu - Bot to fetch from http://www.shuku.net

SYNOPSIS

Top

  use WWW::BookBot::Chinese::Novel::ShuKu;
  my $bot=WWW::BookBot::Chinese::Novel::ShuKu->new({work_dir=>'/output'});
  $bot->go_catalog({desc=>'畅销', cat1=>0, cat2=>1, pageno=>0});

  bookbot --bot=shuku 畅销 0 1 0
  bookbot --bot=shuku --desc=畅销 --cat1=0 --cat2=1 --pageno=0

  bookbot --bot=shuku 畅销 0 1
  bookbot --bot=shuku --desc=畅销 --cat1=0 --cat2=1

ABSTRACT

Top

Bot to fetch from http://www.shuku.net

DESCRIPTION

Top

Bot to fetch from http://www.shuku.net

desc

  Description infomation for what to fetch.

cat1

  畅销书籍 http://www.shuku.net/dblx/html/0/1-2-0.html
  cat1 -> http://www.shuku.net/dblx/html/[0]/1-2-0.html

cat2

  畅销书籍 http://www.shuku.net/dblx/html/0/1-2-0.html
  cat2 -> http://www.shuku.net/dblx/html/0/[1]-2-0.html

pageno

  pageno=0	first page
  pageno=1	second page
  ...
  if no pageno is set in bookbot command line, all pages will be fetched.

EXPORT

None by default.

BUGS, REQUESTS, COMMENTS

Top

Please report any requests, suggestions or bugs via http://rt.cpan.org/NoAuth/ReportBug.html?Queue=WWW-BookBot

AUTHOR

Top

Qing-Jie Zhou <qjzhou@hotmail.com>

SEE ALSO

Top

WWW::BookBot, bookbot


WWW-BookBot documentation Contained in the WWW-BookBot distribution.

package WWW::BookBot::Chinese::Novel::ShuKu;

use 5.008;
use strict;
use warnings;
no warnings qw(uninitialized);
use base qw(WWW::BookBot::Chinese);
our $VERSION='1.02';

sub default_settings {
	my $self = shift->SUPER::default_settings;
	$self->{text_paragraph_type}='crandspace';
	$self->{get_delay_second}=2;
	$self->{get_delay_second_rand}=2;
	$self;
}
sub msg_init {
	my $self = shift;
	my $msg=$self->SUPER::msg_init;
	$msg->{CatalogURL}='http://www.shuku.net:8082/dblx/html/$pargs->{cat1}/$pargs->{cat2}-2-$pargs->{pageno}.html';
	$msg->{CatalogInfo}='==>$pargs->{desc}µÚ$pargs->{pageno}Ò³£º';
}

sub get_alias {
	'shuku';
}
sub argv_default {
	qw(desc=s cat1=i cat2=i pageno=i);
}
sub argv_process {
	my ($self, $pargs)=@_;
	$pargs->{cat1}=0 if not(defined($pargs->{cat1}));
	$pargs->{cat2}=1 if not(defined($pargs->{cat2}));
	$pargs->{desc}='³©Ïú' if not(defined($pargs->{desc}));
	if( defined($pargs->{pageno}) ){
		$self->go_catalog($pargs);
	}else{
		for($pargs->{pageno}=0; $pargs->{pageno}<$self->{catalog_max_pages}; $pargs->{pageno}++) {
			last if $self->go_catalog($pargs)==0;
		}
	}
}
sub get_url_verify {
	$_[1]=~s/net:8080/net:8082/g;
}
sub getpattern_catalog_get_bookargs_data {
	<<'DATA';
<a href="http://www\.shuku\.net/cgi-bin/dblx/\.libs/lt-displaybook\?ID=([^<>]*?)&URL=([^<>]*?)">([^<>]*?)</a>
DATA
}
sub catalog_get_bookargs {
	my $self = shift;
	my @a=@_;
	my $pargs=$a[0];
	$pargs->{id}=$a[1];
	$pargs->{url}=$a[2];
	$pargs->{title}=$self->parse_titleen($a[3]);
	return 'Skip' if $pargs->{title}=~/×÷Æ·¼¯$/;
	'OK';
}
sub getpattern_TOC_exists_data {
	<<'DATA';
<h(?:\d|r width="\d+%")
DATA
}
sub getpattern_TOC_head_data {
	<<'DATA';
(?=<h\d)
DATA
}
sub getpattern_TOC_end_data {
	<<'DATA';
(?:>·¢±íÆÀÂÛ</a>|</table>|Òà·²¹«ÒæÍ¼Êé¹Ý)
DATA
}
sub getpattern_chapter_head_data {
	<<'DATA';
(?=<pre)
DATA
}
sub getpattern_chapter_end_data {
	<<'DATA';
(?:</pre>|Òà·²¹«ÒæÍ¼Êé¹Ý)
DATA
}
sub parse_paragraph_begin {
	$_[1]=~s/<td height=\"20\" colspan=\"2\">(.*?)<\/td>/\$BOOKBOTRETURN\$$1/sg;	#reserved paragraph
}
sub parse_paragraph_end {
	$_[1]=~s/\n?\$BOOKBOTRETURN\$//sg;			#reserved paragraph
}

1;
__END__