| WWW-Scraper-Gmail documentation | Contained in the WWW-Scraper-Gmail distribution. |
WWW::Scraper::Gmail - Perl extension for loging in and reading Gmail Mailbox information.
use WWW::Scraper::Gmail; A simple scraper for gmail.
Logs into email through https, does some stuff and gets back a list of inbox items. Uses ~/.gmailrc for now for username and password. The format is as follows [gmail] username=<username> password=<password>
you'd do well to chmod it 700. Doesn't do error checking for log in problems.
None by default.
Erik F. Kastner, <lt>kastner@gmail.com<gt>
Copyright (C) 2004 by Erik F. Kastner
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.3 or, at your option, any later version of Perl 5 you may have available.
| WWW-Scraper-Gmail documentation | Contained in the WWW-Scraper-Gmail distribution. |
package WWW::Scraper::Gmail; use 5.005003; use strict; use warnings; require Exporter; require LWP; require Crypt::SSLeay; use LWP::UserAgent; use Env qw{HOME}; use Carp; #use Data::Dumper; use HTML::Entities; our @ISA = qw(Exporter); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. # This allows declaration use WWW::Scraper::Gmail ':all'; # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK # will save memory. our %EXPORT_TAGS = ( 'all' => [ qw( ) ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw( ); our $VERSION = '0.09'; # Preloaded methods go here. # my ($next_url); #my ($js_ver); my ($gm_l_cookie, $sid); my ($url, $url2, $url3, $url_init, $urlx, $ua, $req, $res); my ($cookie, $dump, $inbox, $head, $zx); my ($gmail_at); my $num = 0; my ($username, $password); my $logged_in = 0; my $pid = "$ENV{HOME}/.gmailpid"; my $gmailrc = "$ENV{HOME}/.gmailrc"; $url = "https://www.google.com/accounts/ServiceLoginBoxAuth"; $url2 = "https://www.google.com/accounts/CheckCookie?service=mail&chtml=LoginDoneHtml"; #$urlx = "http://gmail.google.com/gmail?search=inbox&view=tl&start=0&init=1&zx=$zx"; $url3 = "http://gmail.google.com/gmail?search=inbox&view=tl&start=0"; $url_init = "http://gmail.google.com/gmail?search=inbox&view=tl&start=0&init=1"; sub setUP { unlink "$gmailrc"; $username = shift @_; $password = shift @_; } sub getUP { open(GMAILRC, "$gmailrc") or die("Can't Open $gmailrc \nFormat:\n[gmail]\nusername=<username>\npassword=<password>\n"); while (<GMAILRC>) { $username = $1 if (/username=(.*)/); $password = $1 if (/password=(.*)/);; } close(GMAILRC); return(0) if(!$username or !$password); return(1); } sub login { $ua = LWP::UserAgent->new(); #its a GOOSE $ua->agent("User-Agent: Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.7) Gecko/20040608"); $head = HTTP::Headers->new(); #, Referer => $ref); if(open(GMAILPID, "$pid")) { my $first = <GMAILPID>; if ($first - time() > 50000) { #cookie is expired unlink($pid); last(); } $cookie = <GMAILPID>; $gmail_at = <GMAILPID>; $zx = <GMAILPID>; #print "cookie = $cookie\ngmail_at = $gmail_at\nzx=$zx\n"; $head = HTTP::Headers->new(Cookie => $cookie); close(GMAILPID); chomp($cookie, $gmail_at, $zx); return(0); } getUP() if (!$username); #--------------------------------------------------------- $req = HTTP::Request->new(GET => "http://gmail.google.com/"); $res = $ua->request($req); #Ok, all the cookies are blanked out at this point #--------------------------------------------------------- $req = HTTP::Request->new(GET => "https://www.google.com/accounts/ServiceLoginBox?service=mail&continue=https%3A%2F%2Fgmail.google.com%2Fgmail"); $res = $ua->request($req); #--------------------------------------------------------- $req = HTTP::Request->new(GET => "https://www.google.com/accounts/ServiceLoginBox?service=mail&continue=https%3A%2F%2Fgmail.google.com%2Fgmail"); $res = $ua->request($req); #--------------------------------------------------------- $head->push_header(Referer => "https://www.google.com/accounts/ServiceLoginBox?service=mail&continue=https%3A%2F%2Fgmail.google.com%2Fgmail"); $head->push_header(Cookie => "Session=en_US; en_US;"); $req = HTTP::Request->new(POST => "https://www.google.com/accounts/ServiceLoginBoxAuth", $head); $req->content_type("application/x-www-form-urlencoded"); $req->content("continue=https://gmail.google.com/gmail&service=mail&Email=$username&Passwd=$password&PersistentCookie=yes&null=Sign+in"); $res = $ua->request($req); my $dump = $res->as_string(); while ($dump =~ m!^Set-Cookie: (SID[^;]*).*!mgs) { #just get the SID $sid .= "$1"; } if ($dump =~ m!(CheckCookie.+?)"!mgs) { #have to get the url to go to next $next_url = "https://www.google.com/accounts/$1"; } $head->referer("https://www.google.com/accounts/ServiceLoginBoxAuth"); $head->remove_header("Cookie"); #Cookie: Session=en_US; en_US; SID=AdVxxBlym6yJx-cWNmCvV2EgDllBCk8R-B7MB_0fqeZ-2vYrVgFMwXiMJiJucsluXNY7SDuK4p7bGDGIcqqa0mg= #Cookie: Session=en_US; en_US; SID=AZ9jS5SGlgV7gX_clo063pjTE1R6OSonFHI2iPJcJGvR2vYrVgFMwXiMJiJucsluXINZah0H-npo0on6buw3QuM=; $head->push_header(Cookie => "Session=en_US; en_US; $sid"); $req = HTTP::Request->new(GET => $next_url, $head); $next_url = ""; $res = $ua->simple_request($req); $dump = $res->as_string(); #do the redirect ourselves! while ($dump =~ m!Location: (.+?)Serv!gis) { $next_url = $1; chomp($next_url); } #--------------------------------------------------------- $head->remove_header("Cookie"); $head->remove_header("Referer"); $gm_l_cookie = "GMAIL_LOGIN=T" . (time() - 2) . "435/" . (time()-1) . "221/" . time() . "264"; $head->push_header(Cookie => "$gm_l_cookie; $sid"); $req = HTTP::Request->new(GET => $next_url, $head); $next_url = ""; $res = $ua->simple_request($req); $cookie = ""; $dump = $res->as_string(); while ($dump =~ m!^Set-Cookie: ([^;]*)!mgs) { $cookie .= "$1; "; if ($1 =~ /SID=(.*)/) { $sid = $1; } if ($1 =~ /AT=(.*)/) { $gmail_at = $1; } } $cookie .= "jscookietest=valid"; if ($dump =~ m!src=(/gmail\?view=page&name=js&ver=(.+?)) f!mgis) { $next_url = $1; #$js_ver = $2; $zx = $2; } #print "got $cookie ---- $next_url - and $js_ver\n\n\n"; #print $res->as_string(), "\n"; exit(); #var fs_time=(new Date()).getTime();var testcookie = 'jscookietest=valid'; #document.cookie = testcookie; #if (document.cookie.indexOf(testcookie) == -1) {top.location = '/gmail/html/nocookies.html';} #document.cookie = testcookie + ';expires=' + new Date(0).toGMTString(); #var agt = navigator.userAgent.toLowerCase(); #if (agt.indexOf('msie')!= -1 && document.all) {var control = (agt.indexOf('msie 5') != -1) ? 'Microsoft.XMLHTTP' : 'Msxml2.XMLHTTP';try {new ActiveXObject(control);} catch (e) {top.location = '/gmail/html/noactivex.html';}} #name=main src=/gmail/html/loading.html frameborder=0 noresize scrolling=no><frame name=js src=/gmail?view=page&name=js #&ver=8d26317a8120ce2c frameborder=0 noresize></frameset> #--------------------------------------------------------- $head->remove_header("Cookie"); $head->push_header(Cookie => "$gm_l_cookie; $cookie"); $req = HTTP::Request->new(GET => "https://gmail.google.com/$next_url", $head); $res = $ua->simple_request($req); #--------------------------------------------------------- my $url3 = "https://gmail.google.com/gmail?search=inbox&view=tl&start=0"; #$req = HTTP::Request->new(GET => "https://gmail.google.com/gmail", $head); $req = HTTP::Request->new(GET=>$url3, $head); $res = $ua->simple_request($req); #--------------------------------------------------------- # # # if (open(GMAILPID, "> $pid")) { #Save the cookie to a file so that we don't have to go through it all each time print GMAILPID time(), "\n"; print GMAILPID $cookie, "\n"; print GMAILPID $gmail_at, "\n"; print GMAILPID $zx, "\n"; #print GMAILPID $zx, "\n"; close(GMAILPID); } $logged_in = 1; } sub doGmailAt { #must be logged in to do the gmail at.. login() unless $logged_in; $req = HTTP::Request->new(GET => $url_init, $head); $res = $ua->request($req); $dump = $res->as_string(); #more cookies #$zx = $1 if ($dump =~ m!ver=([A-Za-z0-9]*)!); while ($dump =~ m!^Set-Cookie: (GMAIL([^;]*)).*!mgs) { $cookie .= $1 . ";"; if ($1 =~ /GMAIL_AT=(.*)/) { $gmail_at = $1; } } $head = HTTP::Headers->new(Cookie => $cookie); #print "cookie = $cookie\ngmail_at = $gmail_at\nzx=$zx\n"; } sub countMail { login(); $req = HTTP::Request->new(GET => $url3, $head); $res = $ua->request($req); my $num = 0; if ($res->is_success()) { $inbox = $res->content(); $inbox =~ m!(D\(\[\"t\".*])!mgis; $inbox = $1; return(0) if (!$inbox); $inbox =~ s!\\!!ig; $inbox =~ s!</?b>!!ig; while ($inbox =~ m!\[".+?",([01]),[01],"(.+?)","<span id='_user_(.+?)'>.+?",".+?","(.+?)","(.+?)".+?\]!mgis) { $num++ if ($1); #my ($from, $subject, $new) = ($2, (($3 =~ /raquo/) ? $4 : $3), (($1 == 1) ? " NEW!!! " : "")); } } return $num; } sub outputMail { login(); my $delim = ($ARGV[0]) ? $ARGV[0] : ";;"; my $ret; $req = HTTP::Request->new(GET => $url3, $head); $res = $ua->request($req); if ($res->is_success()) { $inbox = $res->content(); $inbox =~ m!(D\(\[\"t\".*])!mgis; $inbox = $1; return("") if (!$inbox); $inbox =~ s!\\!!ig; #$inbox =~ s!</?b>!!ig; # D(["t",["fe4f9c5b8c5bf74",1,0,"\<b\>1:49pm\</b\>","\<span id=\'_user_yonnage@gmail.com\'\>John\</span\>, \<span id=\'_user_kastner@gmail.com\'\>me\</span\>, \<span id=\'_user_yonnage@gmail.com\'\>\<b\>John\</b\>\</span\> (36)","\<b\>»\</b\> ","\<b\>nutritional intake\</b\>","On Wed, 11 Aug 2004 16:10:29 -0400, Erik Kastner <kastner@gmail.com> wrote: > Good …",[] # ,"","fe4f9c5b8c5bf74",0] # ,["fe4676e88eff196",0,0,"Aug 9","\<span id=\'_user_davidwboswell@yahoo.com\'\>David Boswell\</span\>","\<b\>»\</b\> ","delicious on mozdev","erik, ok, you're all set up with http://delicious.mozdev.org/ i apologize for the delay in …",[] # ,"","fe4676e88eff196",0] # ] # ); # D(["t",["fe4fc45c88db08f",1,0,"<b>2:32pm</b>","<span id='_user_yonnage@gmail.com'>John</span>, <span id='_user_kastner@gmail.com'>me</span>, <span id='_user_yonnage@gmail.com'><b>John</b></span> (39)","<b>»</b> ","<b>nutritional intake</b>","this is the reply with more, "bitch" On Wed, 11 Aug 2004 17:15:15 -0400, Erik Kastner …",[] # ,"","fe4fc45c88db08f",0] # ,["fe4fc293a18f00a",1,0,"<b>2:30pm</b>","<span id='_user_efk@winelibrary.com'><b>Erik F. Kastner</b></span>","<b>»</b> ","<b>Yoy</b>","yoyo",[] # ,"","fe4fc293a18f00a",0] # ,["fe4676e88eff196",0,0,"Aug 9","<span id='_user_davidwboswell@yahoo.com'>David Boswell</span>","<b>»</b> ","delicious on mozdev","erik, ok, you're all set up with http://delicious.mozdev.org/ i apologize for the delay in …",[] # ,"","fe4676e88eff196",0] while ($inbox =~ m!\[".+?",([01]),[01],"(?:<b>)?(.+?)(?:</b>)?","(.+?)",".+?","(?:<b>)?(.+?)(?:</b>)?","(.+?)".+?\]!mgis) { $num++; my ($time, $from, $subject, $new, $blurb) = ($2, $3, $4, ($1 == 1) ? "new!" : "", $5); if ($from =~ m!<span.+?><b>(.+?)</b>!) { $from = $1; } else { $from =~ s!<span.+?>(.+?)</spa.*!$1!g; } #<span id='_user_yonnage@gmail.com'>John</span>, <span id='_user_kastner@gmail.com'>me</span>, <span id='_user_yonnage@gmail.com'><b>John</b></span> (39) #print "Looking at $4\n\n"; #my ($from, $subject, $new) = ($2, (($3 =~ /raquo/) ? $4 : $3), (($1 == 1) ? " NEW!!! " : "")); my $rec = {}; if ($1) { $ret .= "$from$delim$subject$delim$time$delim$blurb\n"; } } #print "$num total messages in inbox\n"; return $ret; } else { warn $res->content(); warn $res->status_line, "\n"; return(""); } } sub fetchMail { login(); my @msgs; $req = HTTP::Request->new(GET => $url3, $head); $res = $ua->request($req); if ($res->is_success()) { $inbox = $res->content(); $inbox =~ m!(D\(\[\"t\".*])!mgis; $inbox = $1; return(0) if (!$inbox); $inbox =~ s!\\!!ig; $inbox =~ s!</?b>!!ig; while ($inbox =~ m!\[".+?",([01]),[01],"(.+?)","<span id='_user_(.+?)'>.+?",".+?","(.+?)","(.+?)".+?\]!mgis) { $num++; #my ($from, $subject, $new) = ($2, (($3 =~ /raquo/) ? $4 : $3), (($1 == 1) ? " NEW!!! " : "")); my ($time, $from, $subject, $new, $blurb) = ($2, $3, $4, ($1 == 1) ? "new!" : "", $5); my $rec = {}; $rec = { from => $from, subject => $subject, date => $time, blurb => $blurb, new => $1 }; push @msgs, $rec; #print "Thread Started by $from, Subject $subject @ $time $new\n\t$blurb\n"; } #print "$num total messages in inbox\n"; return @msgs; } else { warn $res->content(); warn $res->status_line, "\n"; return(0); } } sub setPrefs { my ($arg) = @_; login(); doGmailAt(); $arg->{"MaxPer"} = 100 unless defined $arg->{MaxPer}; $arg->{"Signature"} = "" unless defined $arg->{Signature}; $arg->{"Signature"} = HTML::Entities::encode_entities_numeric($arg->{"Signature"}); #print Dumper $arg; my $url_pref=" http://gmail.google.com/gmail?search=inbox&view=tl&start=0&act=prefs&at=$gmail_at&p_bx_hs=1&p_ix_nt=$arg->{MaxPer}&p_bx_sc=1&p_sx_sg=$arg->{Signature}&zx=$zx"; #print "Going for $url_pref\n"; #$head = HTTP::Headers->new(Cookie => $cookie); #, Referer => $ref); $req = HTTP::Request->new(GET=>$url_pref, $head); $res = $ua->request($req); return ($res->as_string() =~ /saved/); } 1; __END__ # Below is stub documentation for your module. You'd better edit it!