Mail::Abuse::Reader::GoogleGroups - Reads Mail::Abuse::Report from NANAS via Google Groups


Mail-Abuse documentation Contained in the Mail-Abuse distribution.

Index


Code Index:

NAME

Top

Mail::Abuse::Reader::GoogleGroups - Reads Mail::Abuse::Report from NANAS via Google Groups

SYNOPSIS

Top

  use Mail::Abuse::Report;
  use Mail::Abuse::Reader::GoogleGroups;
  my $r = new Mail::Abuse::Reader::GoogleGroups;
  my $report = new Mail::Abuse::Report (reader => $r);

DESCRIPTION

Top

THIS CODE SHOULD NOT BE USED IN PRODUCTION YET. SEE bin/google-gather INSTEAD, FOR AN ALTERNATE MECHANISM OF FETCHING REPORTS FROM NANAS VIA GOOGLE GROUPS.

This module uses the news.admin.net-abuse.sightings archive kindly provided by the Google(tm) Groups service to feed public complaints into the Mail::Abuse package.

The general idea is that a search is going to be performed and each result will be fed into the Mail::Abuse engine. The parameters of the process can be configured by the following keys in the configuration file.

google groups search

Regexp to search for in the text of each article.

google groups max messages

Maximum number of messages to read in a single run. Defaults to 50.

google groups oldest message

Controls the maximum age of a message. Messages that are older (accorging to its Date: header) will be skipped and not counted. This defaults to five days.

google groups server

The name of the Google(tm) Groups server to use. Defaults to groups.google.com.

google groups proxy

The proxy server to use.

debug google groups

When set to a true value, causes debug information to be sent to STDERR.

The following methods are implemented within this class.

read($report)

Populates the text of the given $report using the ->text method. Must return true if succesful or false otherwise.

EXPORT

None by default.

HISTORY

Top

0.01

Original version; created by h2xs 1.2 with options

  -ACOXcfkn
	Mail::Abuse
	-v
	0.01

LICENSE AND WARRANTY

Top

This code and all accompanying software comes with NO WARRANTY. You use it at your own risk.

This code and all accompanying software can be used freely under the same terms as Perl itself.

AUTHOR

Top

Luis E. Muņoz <luismunoz@cpan.org>

SEE ALSO

Top

perl(1).


Mail-Abuse documentation Contained in the Mail-Abuse distribution.
package Mail::Abuse::Reader::GoogleGroups;

require 5.005_62;

use Carp;
use strict;
use warnings;
use Date::Manip;
use WWW::Google::Groups;

use base 'Mail::Abuse::Reader';

				# The code below should be in a single line
our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf " %d."."%03d" x $#r, @r };

				# This contraption is required to include
				# $VERSION in the BLURB.

BEGIN { $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf " %d."."%03d" x $#r, @r } };

use constant GROUP 	=> 'news.admin.net-abuse.sightings';
use constant BLURB 	=> <<EOF;

The message below has been submitted to the &{GROUP} news group
and fetched using Mail::Abuse::Reader::GoogleGroups $VERSION

EOF
    ;
    use constant QUERY	=> 'google groups search';

    use constant MAX	=> 'google groups max messages';

    use constant OLDEST	=> 'google groups oldest message';

    use constant SERVER	=> 'google groups server';

    use constant PROXY	=> 'google groups proxy';

    use constant DEBUG	=> 'debug google groups';

sub read
{
    my $self	= shift;
    my $rep	= shift;

    my $config	= $rep->config;

    unless ($config->{&QUERY})
    {
	carp "Not enough config info for GoogleGroups reader";
	return;
    }

    my $search = qr/$config->{&QUERY}/;

    unless ($self->agent)
    {
	$self->agent( new WWW::Google::Groups
		      (
		       server => $config->{&SERVER} || 'groups.google.com',
		       $config->{&PROXY} ? (proxy => $config->{&PROXY}) : (),
		       )
		      );
	
	$self->count(0);
	$self->group($self->agent->select_group(&GROUP));
#	$self->group($self->agent->search(query => "group:" . 
#					  &GROUP . " " . $config->{&QUERY},
#					  limit => 100));

    }

    unless ($self->before)
    {
	my $date_before;

	Date_Init('TZ=UTC');

	eval 
	{
	    if (ref $config->{&OLDEST} eq 'ARRAY')
	    {
		$date_before =  ParseDate(join(' ', @{$config->{&OLDEST}}));
	    }
	    else {
		$date_before = ParseDate($rep->config->{&OLDEST} 
					 || "5 days ago");
	    }
	};

	warn "M::A::R::GoogleGroups parser said $@"
	    if $@ and $config->{&DEBUG};
	die "M::A::R::GoogleGroups: Cannot parse '" . &OLDEST . "\n"
	    unless $date_before;
	$self->before(UnixDate($date_before, '%s'));
	die "M::A::R::GoogleGroups: Times before the epoch are not supported" 
	    if $self->before < 0;
	warn "M::A::R::GoogleGroups: Ignoring posts older than ", 
	$self->before, "\n" if $config->{&DEBUG};
    }

    while ($self->thread($self->group->next_thread), $self->thread)
    {
	warn "M::A::R::GoogleGroups: Mining thread '", 
	$self->thread->title, "'\n" 
	    if $config->{&DEBUG};

	my $mid = undef;

	while ($self->article($self->thread->next_article), $self->article)
	{
	    if ($mid and $mid eq $self->article->header('Message-Id'))
	    {
		warn "M::A::R::GoogleGroups: Looks like we pulled the "
		    . "same message twice\n"
			if $config->{&DEBUG};
		last;
	    }

	    $mid = $self->article->header('Message-Id');

	    return unless $self->count < ($config->{&MAX} || 50);

	    my $date = $self->article->header('Date');
	    my $article_date = undef;

	    warn "M::A::R::GoogleGroups: Fetched article ", 
	    $self->article->header('Message-Id'), "/$date\n"
		if $config->{&DEBUG};

	    eval 
	    {
		$article_date = UnixDate(ParseDate($date), '%s');
	    };
				# Only if parsing succeeded, will we filter
				# this article
	    unless ($@)
	    {
		if ($article_date < $self->before)
		{
		    warn "M::A::R::GoogleGroups filtered article ",
		    "($date/$article_date)\n" 
			if $config->{&DEBUG};
		    last;
		}
	    }

	    if ($self->article->body =~ m/$search/i)
	    {
		$self->count($self->count + 1);
		my $buffer = 'From: ' . $self->article->header('From')	. "\n";
		$buffer .= 'Id: ' . $self->article->header('Message-ID'). "\n";
		$buffer .= 'Date: ' . $self->article->header('Date')	. "\n";
		$buffer .= $self->article->header('Subject')	. "\n\n";
		$buffer .= &BLURB;
		$buffer .= $self->article->body;
		$rep->text(\$buffer);
		return 1;
	    }
	}
	warn "M::A::R::GoogleGroups: Thread exhausted. Going to the next one\n"
	    if $config->{&DEBUG};
    }

    warn "M::A::R::GoogleGroups: Group exhausted.\n"
	if $config->{&DEBUG};

    return;
}

__END__