News::Scan::Article - collect information about news articles


News-Scan documentation Contained in the News-Scan distribution.

Index


Code Index:

NAME

Top

News::Scan::Article - collect information about news articles

SYNOPSIS

Top

    use News::Scan::Article;

    my $art = News::Scan::Article->new( ARG, [ OPTIONS, ] SCAN );

DESCRIPTION

Top

This module provides a derived class of Mail::Internet whose objects are suitable for digesting Usenet news articles.

CONSTRUCTOR

Top

new ( ARG, [ OPTIONS, ] SCAN-OBJ )

The ARG and OPTIONS parameters are identical to those required by Mail::Internet, except ARG is required. See Mail::Internet. The SCAN parameter should be a News::Scan object. See News::Scan.

If the article falls into the period of interest for SCAN, the object is returned, else undef.

METHODS

Top

group ( [ SCAN-OBJ ] )

Sets or returns an object's group depending on whether SCAN-OBJ is present.

author

Returns the article's author represented as a Mail::Address object.

message_id

Returns the article's Message-ID.

subject

Returns the article's subject.

newsgroups

Returns the list of newsgroups this article was posted to.

size

Returns the size of this article in bytes.

header_size

Returns the size of this article's header in bytes.

header_lines

Returns the number of lines consumed in this article by headers.

body_size

Returns the size of this article's body in bytes.

body_lines

Returns the number of lines consumed in this article by the body.

orig_size

Returns the size of this article's original content in bytes. See "QuoteRE" in News::Scan.

orig_lines

Returns the number of lines consumed in this article by original content. Keep in mind that original content is a subset of the body.

sig_size

Returns the size of this article'ss signature in bytes.

sig_lines

Returns the number of lines consumed in this article by the signature.

SEE ALSO

Top

News::Scan, Mail::Internet, Mail::Address

AUTHOR

Top

Greg Bacon <gbacon@cs.uah.edu>

COPYRIGHT

Top


News-Scan documentation Contained in the News-Scan distribution.

package News::Scan::Article;

use strict;
use vars qw( $VERSION @ISA );

use Mail::Internet;
use Mail::Address;
use Date::Parse;

$VERSION = '0.51';
@ISA = qw( Mail::Internet );

sub new {
    my $class = shift;
    my $group = pop;
    my $self  = $class->SUPER::new(@_);

    bless $self, $class;

    $self->group($group);
    $self->calculate_sizes;

    if ($self->in_period($group->period)) {
        return $self;
    }
    else {
        return undef;
    }
}

sub in_period {
    my $self = shift;
    my $period = shift(@_) * 60 * 60 * 24;

    my $date = $self->head->get('Date');

    return 0 unless (defined $date and $date);
    chomp $date;

    my $time = str2time $date;
    if ($time < ($^T - $period)) {
        return 0;
    }

    $self->group->earliest($time);
    $self->group->latest($time);

    1;
}

sub group {
    my $self = shift;

    if (@_) {
        my $old = $self->{'news_scan_article_group'};

        $self->{'news_scan_article_group'} = shift;

        return $old;
    }
    else {
        return $self->{'news_scan_article_group'};
    }
}

sub calculate_sizes {
    my $self = shift;

    my $total = 0;
    my $line;

    ## header
    my $header_size = 0;
    foreach $line (@{ $self->head->header }) {
        $header_size += length $line;
        $self->{'news_scan_article_header_lines'}++;
    }

    $total += $header_size;
    $self->{'news_scan_article_header_size'} = $header_size;

    ## add a byte for the separator
    $total++;

    ## signature (if present)
    my @body = @{ $self->body };
    my $sig_start = 0;
    my $found_sig = 0;
    foreach $line (reverse @body) {
        $sig_start--;

        if ($line =~ /^-- $/) {
            $found_sig++;
            last;
        }
    }

    if ($found_sig) {
        my @signature = splice @body, $sig_start;
        shift @signature;  ## toss cutline

        $self->{'news_scan_article_sig_lines'} = @signature;

        my $sig_size = 0;
        foreach $line (@signature) {
            $sig_size += length $line;
        }
        $self->{'news_scan_article_sig_size'} = $sig_size;

        $total += $sig_size;
    }
    else {
        $self->{'news_scan_article_sig_lines'} = 0;
        $self->{'news_scan_article_sig_size'}  = 0;
    }

    ## body
    my $body_size = 0;
    foreach $line (@body) {
        $body_size += length $line;
    }
    $self->{'news_scan_article_body_size'} = $body_size;
    $self->{'news_scan_article_body_lines'} = @body;

    $total += $body_size;
    $self->{'news_scan_article_size'} = $total;

    ## original
    if (my $group = $self->group || 0) {
        my $quote_re = $group->quote_re;

        if ($quote_re) {
            my @orig = grep { ! /$quote_re/o } @body;

            my $orig_size = 0;
            foreach $line (@orig) {
                $orig_size += length $line;
            }
            $self->{'news_scan_article_orig_size'}  = $orig_size;
            $self->{'news_scan_article_orig_lines'} = @orig;
        }
    }
    else {
        $self->{'news_scan_article_orig_size'}  = 0;
        $self->{'news_scan_article_orig_lines'} = 0;
    }
}

sub author {
    my $self = shift;

    my $hd = $self->head || return;

    my $from = $hd->get('Reply-To')
            || $hd->get('From')
            || $hd->get('Sender')
            || "";
    chomp $from;

    my $addr = ( Mail::Address->parse($from) )[0];
    if (exists $self->group->aliases->{lc $addr->address}) {
        ## XXX: Danger, Will Robinson!  Broken Encapsulation Alert!!!
        $addr->[1] = $self->group->aliases->{lc $addr->address};
    }

    unless (defined $addr and ref $addr) {
        return;
    }
    else {
        return $addr;
    }
}

sub message_id {
    my $self = shift;

    my $hdr = $self->head->get('Message-ID');
    chomp $hdr;

    $hdr;
}

sub subject {
    my $self = shift;

    my $hdr = $self->head->get('Subject');
    chomp $hdr;

    $hdr;
}

sub newsgroups {
    my $self = shift;

    my $hdr = $self->head->get('Newsgroups') || '';
    $hdr =~ s/^\s+//;
    $hdr =~ s/\s+$//;

    split /\s*,+\s*/, $hdr;
}

sub size        { $_[0]->{'news_scan_article_size'} }
sub header_size { $_[0]->{'news_scan_article_header_size'} }
sub body_size   { $_[0]->{'news_scan_article_body_size'} }
sub orig_size   { $_[0]->{'news_scan_article_orig_size'} }
sub sig_size    { $_[0]->{'news_scan_article_sig_size'} }

sub header_lines { $_[0]->{'news_scan_article_header_lines'} }
sub body_lines   { $_[0]->{'news_scan_article_body_lines'} }
sub orig_lines   { $_[0]->{'news_scan_article_orig_lines'} }
sub sig_lines    { $_[0]->{'news_scan_article_sig_lines'} }

1;

__END__