/usr/local/CPAN/Vimana/Vimana/VimOnline/ScriptPage.pm
package Vimana::VimOnline::ScriptPage;
use warnings;
use strict;
use URI;
use LWP::Simple qw();
use HTML::Entities;
use utf8;
# use Lingua::ZH::Wrap qw(wrap $columns $overflow);
use Text::Wrap qw(wrap $columns $huge);
$columns = 72; # Change columns
$huge = 'overflow';
# $overflow = 1; # Chinese char may occupy 76th col
sub fetch {
my ( $class, $id ) = @_;
my $uri = page_uri($id);
my $html = LWP::Simple::get($uri);
unless( $html ) {
die "Can't retrieve vim.org content.\n";
}
return $class->parse($html);
}
sub page_uri {
my $id = shift;
my $uri = URI->new("http://www.vim.org/scripts/script.php");
$uri->query_form( script_id => $id );
$uri;
}
sub find_urls {
my $content = shift;
my @urls = ();
while ( my ( $url ) = ($content =~ m{([htf]tps?://\S.*)}g ) ) {
push @urls , $url;
}
return @urls;
}
my $base_uri = 'http://www.vim.org';
sub display {
my ( $class, $info ) = @_;
if ($^O eq 'MSWin32') {
eval { require "Win32/API.pm" };
unless ($@) {
Win32::API->Import('kernel32', 'UINT GetACP()');
my $cp = "cp".GetACP();
binmode STDOUT, ":encoding($cp)";
}
}
print <<INFO;
@{[ $info->{title} ]}
TYPE: @{[ $info->{type} ]}
VERSION: @{[ $info->{version} ]}
VIM VERSION: @{[ $info->{vimver} ]}
CREATE DATE: @{[ $info->{date} ]}
AUTHOR NAME: @{[ $info->{author_name} ]}
AUTHOR PROFILE: @{[ $info->{author_url} ]}
DESCRIPTION:
@{ [ wrap( ' ', ' ', $info->{description} ) ] }
INSTALL DETAILS:
@{ [ wrap( ' ', ' ', $info->{install_details} ) ] }
FILENAME: @{ [ $info->{filename} ] }
DOWNLOAD: @{ [ $info->{download} ] }
INFO
}
#
# vimonline website sucks , i can't found any elemetn class or to scraper by
# Web::Scraper.
#
# so.. it's very dirty
sub parse {
my ( $class , $content ) = @_;
use Encode qw(decode);
$content = decode('utf-8' , $content );
# map { $info{$_} = decode( 'iso-8859-1' , $info{$_} ) } keys %info;
my %info = ();
( $info{title} ) =
$content =~ m{<title>(.*?)\s:\svim online</title>}gsi;
( $info{author_url} , $info{author_name} ) =
$content =~ m{<tr><td class="prompt">created by</td></tr>\s*<tr><td><a href="(.*?)">(.*?)</a></td></tr>}gsi;
( $info{type} ) =
$content =~ m{<tr><td class="prompt">script type</td></tr>\s*<tr><td>(.*?)</td></tr>}gsi;
( $info{description} ) =
$content =~ m{<tr><td class="prompt">description</td></tr>
.*?<tr><td>(.*?)</td></tr>}gsi;
( $info{install_details} ) =
$content =~ m{<tr><td class="prompt">install details</td></tr>.*?
<tr><td>(.*?)</td></tr>}gsi;
( $info{download} , $info{filename} , $info{version} , $info{date} , $info{vimver} , $info{author_url} , $info{author_name} ) =
$content =~ m{\s*<td class="rowodd" valign="top" nowrap><a href="(.*?)">(.*?)</a></td>
\s*<td class="rowodd" valign="top" nowrap><b>(.*?)</b></td>
\s*<td class="rowodd" valign="top" nowrap><i>(.*?)</i></td>
\s*<td class="rowodd" valign="top" nowrap>(.*?)</td>
\s*<td class="rowodd" valign="top"><i><a href="(.*?)">(.*?)</a></i></td>}gsi;
map {
$info{$_} =~ s{<br/?>}{\n}g;
$info{$_} =~ s{</?.+?>}{}g;
$info{$_} =~ s{\s*$}{}g;
$info{$_} =~ s{^\s*}{}g;
$info{$_} =~ s{ }{ }g; # windows don't have 0xA0.
$info{$_} =~ s{\r}{}g;
} keys %info;
map { $info{$_} = decode_entities( $info{$_} ) } keys %info;
$info{author_url} = $base_uri . $info{author_url};
$info{download} = $base_uri . '/scripts/' . $info{download};
return \%info;
}
1;