WWW::Google::PageRank - Query google pagerank of page


WWW-Google-PageRank documentation Contained in the WWW-Google-PageRank distribution.

Index


Code Index:

NAME

Top

WWW::Google::PageRank - Query google pagerank of page

SYNOPSIS

Top

 use WWW::Google::PageRank;
 my $pr = WWW::Google::PageRank->new;
 print scalar($pr->get('http://www.yahoo.com/')), "\n";

DESCRIPTION

Top

The WWW::Google::PageRank is a class implementing a interface for querying google pagerank.

To use it, you should create WWW::Google::PageRank object and use its method get(), to query page rank of URL.

It uses LWP::UserAgent for making request to Google.

CONSTRUCTOR METHOD

Top

$gpr = WWW::Google::PageRank->new(%options);

This method constructs a new WWW::Google::PageRank object and returns it. Key/value pair arguments may be provided to set up the initial state. The following options correspond to attribute methods described below:

   KEY                     DEFAULT
   -----------             --------------------
   agent                   "Mozilla/4.0 (compatible; GoogleToolbar 2.0.111-big; Windows XP 5.1)"
   proxy                   undef
   timeout                 undef
   env_proxy               undef
   host                    "toolbarqueries.google.com"

agent specifies the header 'User-Agent' when querying Google. If the proxy option is passed in, requests will be made through specified poxy. proxy is the host which serve requests from Googlebar.

If the env_proxy option is passed in with a TRUE value, then proxy settings are read from environment variables (see LWP::UserAgent::env_proxy)

QUERY METHOD

Top

$pr = $gpr->get('http://www.yahoo.com');

Queries Google for a specified pagerank URL and returns pagerank. If query successfull, integer value from 0 to 10 returned. If query fails for some reason (google unreachable, url does not begin from 'http://', undefined url passed) it return undef.

In list context this function returns list from two elements where first is the result as in scalar context and the second is the HTTP::Response object (returned by LWP::UserAgent::get). This can be usefull for debugging purposes and for querying failure details.

BUGS

Top

If you find any, please report ;)

AUTHOR

Top

Yuri Karaban <tech@askold.net>.

Algorithm of computing checksum taken from mozilla module pagerankstatus http://pagerankstatus.mozdev.org by Stephane Queraud <squeraud@toteme.com>.

Algorithm was modified (15-09-2004) according to new algorithm of computingchecksum in googlebar.

COPYRIGHT

Top


WWW-Google-PageRank documentation Contained in the WWW-Google-PageRank distribution.

package WWW::Google::PageRank;

# -*- perl -*-

use strict;
use warnings;

use vars qw($VERSION);

use LWP::UserAgent;
use URI::Escape;

$VERSION = '0.16';

sub new {
  my $class = shift;
  my %par = @_;
  my $self;
  $self->{ua} = LWP::UserAgent->new(agent => $par{agent} ||
				    'Mozilla/4.0 (compatible; GoogleToolbar 2.0.111-big; Windows XP 5.1)')
    or return;
  $self->{ua}->env_proxy if $par{env_proxy};
  $self->{ua}->proxy('http', $par{proxy}) if $par{proxy};
  $self->{ua}->timeout($par{timeout}) if $par{timeout};
  $self->{host} = $par{host} || 'toolbarqueries.google.com';
  bless($self, $class);
}

sub get {
  my ($self, $url) = @_;
  return unless defined $url and $url =~ m[^https?://]i;

  my $ch = '6' . _compute_ch_new('info:' . $url);
  my $query = 'http://' . $self->{host} . '/search?client=navclient-auto&ch=' . $ch .
    '&ie=UTF-8&oe=UTF-8&features=Rank&q=info:' . uri_escape($url);

  my $resp = $self->{ua}->get($query);
  if ($resp->is_success && $resp->content =~ /Rank_\d+:\d+:(\d+)/) {
    if (wantarray) {
      return ($1, $resp);
    } else {
      return $1;
    }
  } else {
    if (wantarray) {
      return (undef, $resp);
    } else {
      return;
    }
  }
}

sub _compute_ch_new {
  my $url = shift;

  my $ch = _compute_ch($url);
  $ch = (($ch % 0x0d) & 7) | (($ch / 7) << 2);

  return _compute_ch(pack("V20", map {my $t = $ch; _wsub($t, $_*9); $t} 0..19));
}

sub _compute_ch {
  my $url = shift;

  my @url = unpack("C*", $url);
  my ($a, $b, $c, $k) = (0x9e3779b9, 0x9e3779b9, 0xe6359a60, 0);
  my $len = scalar @url;

  while ($len >= 12) {
    _wadd($a, $url[$k+0] | ($url[$k+1] << 8) | ($url[$k+2] << 16) | ($url[$k+3] << 24));
    _wadd($b, $url[$k+4] | ($url[$k+5] << 8) | ($url[$k+6] << 16) | ($url[$k+7] << 24));
    _wadd($c, $url[$k+8] | ($url[$k+9] << 8) | ($url[$k+10] << 16) | ($url[$k+11] << 24));

    _mix($a, $b, $c);

    $k += 12;
    $len -= 12;
  }

  _wadd($c, scalar @url);

  _wadd($c, $url[$k+10] << 24) if $len > 10;
  _wadd($c, $url[$k+9] << 16) if $len > 9;
  _wadd($c, $url[$k+8] << 8) if $len > 8;
  _wadd($b, $url[$k+7] << 24) if $len > 7;
  _wadd($b, $url[$k+6] << 16) if $len > 6;
  _wadd($b, $url[$k+5] << 8) if $len > 5;
  _wadd($b, $url[$k+4]) if $len > 4;
  _wadd($a, $url[$k+3] << 24) if $len > 3;
  _wadd($a, $url[$k+2] << 16) if $len > 2;
  _wadd($a, $url[$k+1] << 8) if $len > 1;
  _wadd($a, $url[$k]) if $len > 0;

  _mix($a, $b, $c);

  return $c; # integer is positive always
}

sub _mix {
  my ($a, $b, $c) = @_;

  _wsub($a, $b); _wsub($a, $c); $a ^= $c >> 13;
  _wsub($b, $c); _wsub($b, $a); $b ^= ($a << 8) % 4294967296;
  _wsub($c, $a); _wsub($c, $b); $c ^= $b >>13;
  _wsub($a, $b); _wsub($a, $c); $a ^= $c >> 12;
  _wsub($b, $c); _wsub($b, $a); $b ^= ($a << 16) % 4294967296;
  _wsub($c, $a); _wsub($c, $b); $c ^= $b >> 5;
  _wsub($a, $b); _wsub($a, $c); $a ^= $c >> 3;
  _wsub($b, $c); _wsub($b, $a); $b ^= ($a << 10) % 4294967296;
  _wsub($c, $a); _wsub($c, $b); $c ^= $b >> 15;

  @_[0 .. $#_] = ($a, $b, $c);
}

sub _wadd { $_[0] = int(($_[0] + $_[1]) % 4294967296);}
sub _wsub { $_[0] = int(($_[0] - $_[1]) % 4294967296);}

1;


__END__