WWW::BookBot::Chinese::Agriculture::Cast - Bot to fetch from http://www.cast.net.cn


WWW-BookBot documentation Contained in the WWW-BookBot distribution.

Index


Code Index:

NAME

Top

WWW::BookBot::Chinese::Agriculture::Cast - Bot to fetch from http://www.cast.net.cn

SYNOPSIS

Top

  use WWW::BookBot::Chinese::Agriculture::Cast;
  my $bot=WWW::BookBot::Chinese::Agriculture::Cast->new({work_dir=>'/output'});
  $bot->go_catalog({pageno=>0});

  bookbot --bot=agr_cast
  bookbot --bot=agr_cast --pageno=1

ABSTRACT

Top

Bot to fetch from http://www.cast.net.cn

DESCRIPTION

Top

Bot to fetch from http://www.cast.net.cn

pageno

  pageno=1	first page
  pageno=2	second page
  ...
  if no pageno is set in bookbot command line, all pages will be fetched.

EXPORT

None by default.

BUGS, REQUESTS, COMMENTS

Top

Please report any requests, suggestions or bugs via http://rt.cpan.org/NoAuth/ReportBug.html?Queue=WWW-BookBot

AUTHOR

Top

Qing-Jie Zhou <qjzhou@hotmail.com>

SEE ALSO

Top

WWW::BookBot, bookbot


WWW-BookBot documentation Contained in the WWW-BookBot distribution.

package WWW::BookBot::Chinese::Agriculture::Cast;

use 5.008;
use strict;
use warnings;
no warnings qw(uninitialized);
use base qw(WWW::BookBot::Chinese);
our $VERSION='1.02';

sub default_settings {
	my $self = shift->SUPER::default_settings;
	$self->{text_paragraph_type}='brbr_or_brandspace';
	$self->{book_has_chapters}=0;
	$self->{get_trunk_size}=2500;
	$self->{get_trunk_fresh_size}=250;
	$self;
}
sub msg_init {
	my $self = shift;
	my $msg=$self->SUPER::msg_init;
	$msg->{CatalogURL}='http://www.cast.net.cn/yaowen/yaowen.asp?page=$pargs->{pageno}';
	$msg->{CatalogInfo}='==>¿Æ¼¼ÒªÎŵÚ$pargs->{pageno}Ò³£º';
}

sub get_alias {
	'agr_cast';
}
sub argv_default {
	qw(pageno=i);
}
sub argv_process {
	my ($self, $pargs)=@_;
	if( defined($pargs->{pageno}) ){
		$self->go_catalog($pargs);
	}else{
		$self->argv_process_all($pargs);
	}
}
sub argv_process_all {
	my ($self, $pargs)=@_;
	for($pargs->{pageno}=1; $pargs->{pageno}<=$self->{catalog_max_pages}; $pargs->{pageno}++) {
		last if $self->go_catalog($pargs)==0;
	}
}
sub getpattern_catalog_get_bookargs_data {
	<<'DATA';
£«</span><a href="#" onClick="MM_openBrWindow\('yao-text.asp\?id=(\d+)[^\)]+\)">([^<>]*)</a>[^<>\(]*\([^\)]+\)</TD>[^<>]*<TD>[^<>]*<font color=gray>([^<>]*)</font></TD>
DATA
}
sub catalog_get_bookargs {
	my $self = shift;
	my @a=@_;
	my $pargs=$a[0];
	$pargs->{id}=$a[1];
	$pargs->{url}='http://www.cast.net.cn/yaowen/yao-text.asp?id='.$pargs->{id};
	$pargs->{title}=$self->parse_titleen($a[2]);
	$pargs->{date}=$self->parse_titleen($a[3]);
	'OK';
}
sub getpattern_catalog_get_next_data {
	<<'DATA';
>ÏÂÒ»Ò³<
DATA
}
sub getpattern_chapter_head_data {
	<<'DATA';
align="left"></div>
DATA
}
sub getpattern_chapter_end_data {
	<<'DATA';
<br></TD>
DATA
}
sub result_time {
	my ($self, $pargs) = @_;
	if($pargs->{date}=~/^(\d+)Äê(\d+)ÔÂ(\d+)ÈÕ$/) {
		return $self->string2time("$1-$2-$3");
	}else{
		return $pargs->{last_modified};
	}
}

1;
__END__