| News-Scan documentation | Contained in the News-Scan distribution. |
News::Scan::Article - collect information about news articles
use News::Scan::Article;
my $art = News::Scan::Article->new( ARG, [ OPTIONS, ] SCAN );
This module provides a derived class of Mail::Internet whose objects
are suitable for digesting Usenet news articles.
The ARG and OPTIONS parameters are identical to those required by
Mail::Internet, except ARG is required. See Mail::Internet.
The SCAN parameter should be a News::Scan object. See News::Scan.
If the article falls into the period of interest for SCAN, the object
is returned, else undef.
Sets or returns an object's group depending on whether SCAN-OBJ is
present.
Returns the article's author represented as a Mail::Address object.
Returns the article's Message-ID.
Returns the article's subject.
Returns the list of newsgroups this article was posted to.
Returns the size of this article in bytes.
Returns the size of this article's header in bytes.
Returns the number of lines consumed in this article by headers.
Returns the size of this article's body in bytes.
Returns the number of lines consumed in this article by the body.
Returns the size of this article's original content in bytes. See "QuoteRE" in News::Scan.
Returns the number of lines consumed in this article by original content. Keep in mind that original content is a subset of the body.
Returns the size of this article'ss signature in bytes.
Returns the number of lines consumed in this article by the signature.
Greg Bacon <gbacon@cs.uah.edu>
Copyright (c) 1997 Greg Bacon. All Rights Reserved. This library is free software. You may distribute and/or modify it under the same terms as Perl itself.
| News-Scan documentation | Contained in the News-Scan distribution. |
package News::Scan::Article; use strict; use vars qw( $VERSION @ISA ); use Mail::Internet; use Mail::Address; use Date::Parse; $VERSION = '0.51'; @ISA = qw( Mail::Internet ); sub new { my $class = shift; my $group = pop; my $self = $class->SUPER::new(@_); bless $self, $class; $self->group($group); $self->calculate_sizes; if ($self->in_period($group->period)) { return $self; } else { return undef; } } sub in_period { my $self = shift; my $period = shift(@_) * 60 * 60 * 24; my $date = $self->head->get('Date'); return 0 unless (defined $date and $date); chomp $date; my $time = str2time $date; if ($time < ($^T - $period)) { return 0; } $self->group->earliest($time); $self->group->latest($time); 1; } sub group { my $self = shift; if (@_) { my $old = $self->{'news_scan_article_group'}; $self->{'news_scan_article_group'} = shift; return $old; } else { return $self->{'news_scan_article_group'}; } } sub calculate_sizes { my $self = shift; my $total = 0; my $line; ## header my $header_size = 0; foreach $line (@{ $self->head->header }) { $header_size += length $line; $self->{'news_scan_article_header_lines'}++; } $total += $header_size; $self->{'news_scan_article_header_size'} = $header_size; ## add a byte for the separator $total++; ## signature (if present) my @body = @{ $self->body }; my $sig_start = 0; my $found_sig = 0; foreach $line (reverse @body) { $sig_start--; if ($line =~ /^-- $/) { $found_sig++; last; } } if ($found_sig) { my @signature = splice @body, $sig_start; shift @signature; ## toss cutline $self->{'news_scan_article_sig_lines'} = @signature; my $sig_size = 0; foreach $line (@signature) { $sig_size += length $line; } $self->{'news_scan_article_sig_size'} = $sig_size; $total += $sig_size; } else { $self->{'news_scan_article_sig_lines'} = 0; $self->{'news_scan_article_sig_size'} = 0; } ## body my $body_size = 0; foreach $line (@body) { $body_size += length $line; } $self->{'news_scan_article_body_size'} = $body_size; $self->{'news_scan_article_body_lines'} = @body; $total += $body_size; $self->{'news_scan_article_size'} = $total; ## original if (my $group = $self->group || 0) { my $quote_re = $group->quote_re; if ($quote_re) { my @orig = grep { ! /$quote_re/o } @body; my $orig_size = 0; foreach $line (@orig) { $orig_size += length $line; } $self->{'news_scan_article_orig_size'} = $orig_size; $self->{'news_scan_article_orig_lines'} = @orig; } } else { $self->{'news_scan_article_orig_size'} = 0; $self->{'news_scan_article_orig_lines'} = 0; } } sub author { my $self = shift; my $hd = $self->head || return; my $from = $hd->get('Reply-To') || $hd->get('From') || $hd->get('Sender') || ""; chomp $from; my $addr = ( Mail::Address->parse($from) )[0]; if (exists $self->group->aliases->{lc $addr->address}) { ## XXX: Danger, Will Robinson! Broken Encapsulation Alert!!! $addr->[1] = $self->group->aliases->{lc $addr->address}; } unless (defined $addr and ref $addr) { return; } else { return $addr; } } sub message_id { my $self = shift; my $hdr = $self->head->get('Message-ID'); chomp $hdr; $hdr; } sub subject { my $self = shift; my $hdr = $self->head->get('Subject'); chomp $hdr; $hdr; } sub newsgroups { my $self = shift; my $hdr = $self->head->get('Newsgroups') || ''; $hdr =~ s/^\s+//; $hdr =~ s/\s+$//; split /\s*,+\s*/, $hdr; } sub size { $_[0]->{'news_scan_article_size'} } sub header_size { $_[0]->{'news_scan_article_header_size'} } sub body_size { $_[0]->{'news_scan_article_body_size'} } sub orig_size { $_[0]->{'news_scan_article_orig_size'} } sub sig_size { $_[0]->{'news_scan_article_sig_size'} } sub header_lines { $_[0]->{'news_scan_article_header_lines'} } sub body_lines { $_[0]->{'news_scan_article_body_lines'} } sub orig_lines { $_[0]->{'news_scan_article_orig_lines'} } sub sig_lines { $_[0]->{'news_scan_article_sig_lines'} } 1; __END__