Email::Fingerprint::Cache - Cache observed email fingerprints


Email-Fingerprint documentation Contained in the Email-Fingerprint distribution.

Index


Code Index:

NAME

Top

Email::Fingerprint::Cache - Cache observed email fingerprints

VERSION

Top

Version 0.01

SYNOPSIS

Top

    use Email::Fingerprint::Cache;

    my %fingerprints;           # To access cache contents

    # Create a cache
    my $cache     =  new Email::Fingerprint::Cache(
        backend   => "AnyDBM",
        hash      => \%fingerprints,
        file      => $file,             # Created if doesn't exist
        ttl       => 3600 * 24 * 7,     # Purge records after one week
    );

    # Prepare it for use
    $cache->lock or die "Couldn't lock: $!";    # Waits for lock
    $cache->open or die "Couldn't open: $!";

    # Work with fingerprints
    for my (@message_fingerprints) {

        if ($fingerprints{$_}) {
            print "Fingerprint found: $_\n";
            next;
        }

        my $now = time;
        $fingerprints{$_} = $now;

        print "Fingerprint added: $_\n";
    }

    # Get rid of old records
    $cache->purge;

    # Print a listing of all fingerprints
    $cache->dump;

    # Finish up
    $cache->close;
    $cache->unlock;

ATTRIBUTES

Top

METHODS

Top

new

    my $fingerprint =  new Email::Fingerprint::Cache(
        file        => $file,   # Default: .maildups
        backend     => "AnyDBM",  # Default: "AnyDBM"
        ttl         => $sec,    # Default: 3600*24*7
        hash        => $ref,    # Optional
    );

Returns a new Email::Fingerprint::Cache. The cache must still be opened before it can be used.

BUILD

Internal helper method; never called directly by users.

set_file

  $file = $cache->set_file( 'foo' ) or die "Failed to set filename";
  # now $file eq 'foo.db' or 'foo.dir', etc., depending on the backend;
  # it is almost certainly NOT 'foo'.

Sets the file to be used for the cache. Returns the actual filename on success; false on failure.

The actual filename will probably differ from the 'foo', because the backend will usually add an extension or otherwise munge it.

set_file has no effect while the cache file is locked or open!

get_backend

Returns the backend object for this cache.

dump

    # Be a good citizen
    $cache->lock;
    $cache->open;

    $cache->dump;

    # Be a good neighbor
    $cache->close;
    $cache->unlock;

Dump a human-readable version of the contents of the cache. Data is printed in timestamp order.

The cache must first be opened, and should first be locked.

open

    $cache->open or die;

Open the cache file, and tie it to a hash. This is delegated to the backend.

close

  $cache->close;

Close the cache file and untie the hash.

lock

  $cache->lock or die;                  # returns immediately
  $cache->lock( block => 1 ) or die;    # Waits for a lock
  $cache->lock( %opts ) or die;         # Backend-specific options

Lock the DB file to guarantee exclusive access.

unlock

  $cache->unlock or warn "Unlock failed";

Unlock the DB file.

purge

    $cache->purge;                  # Use default TTL
    $cache->purge( ttl => 3600 );   # Everything older than 1 hour

Purge the cache of old entries. This reduces the risk of false positives from things like reused message IDs, but increases the risk of false negatives.

The default is one week. Dedicated spam-fighters might prefer to use a longer TTL.

DESTROY

Clean up the module. If the hash is still tied, we warn the user and call close() on $self.

DEMOLISH

Internal helper method, never called directly by user.

_delegate

Delegate the specified method to the backend. Internal method.

AUTHOR

Top

Len Budney, <lbudney at pobox.com>

BUGS

Top

The dump() method assumes that Perl's time() function returns seconds since the UNIX epoch, 00:00:00 UTC, January 1, 1970. The module will work on architectures with non-standard epochs, but the automated tests will fail.

Please report any bugs or feature requests to bug-email-fingerprint at rt.cpan.org, or through the web interface at http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Email-Fingerprint. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.

SUPPORT

Top

You can find documentation for this module with the perldoc command.

    perldoc Email::Fingerprint::Cache

You can also look for information at:

* AnnoCPAN: Annotated CPAN documentation

http://annocpan.org/dist/Email-Fingerprint

* CPAN Ratings

http://cpanratings.perl.org/d/Email-Fingerprint

* RT: CPAN's request tracker

http://rt.cpan.org/NoAuth/Bugs.html?Dist=Email-Fingerprint

* Search CPAN

http://search.cpan.org/dist/Email-Fingerprint

ACKNOWLEDGEMENTS

Top

Email::Fingerprint::Cache is based on caching code in the eliminate_dups script by Peter Samuel and available at http://www.qmail.org/.

COPYRIGHT & LICENSE

Top


Email-Fingerprint documentation Contained in the Email-Fingerprint distribution.
package Email::Fingerprint::Cache;
use Class::Std;

use warnings;
use strict;

use Carp qw( croak cluck );
use Scalar::Util qw( reftype blessed );

our $VERSION = '0.01';

my %hash    :ATTR( :get<hash> )                  = ();
my %ttl     :ATTR( :name<ttl> :default(604800) ) = ();
my %backend :ATTR( :init_arg<backend> :default('AnyDBM') ) = ();

sub BUILD {
    my ( $self, $ident, $args ) = @_;

    # Default hash is a fresh-n-tasty anonymous hash
    $hash{$ident} = defined $args->{hash} ? $args->{hash} : {};

    # Backend will also need access to the hash
    $args->{hash} = $hash{$ident};

    # Default backend is AnyDBM
    my $backend = defined $args->{backend} ? $args->{backend} : 'AnyDBM';

    # Default cache file
    $args->{file} ||= '.maildups';

    # Try accessing package as a subclass of Email::Fingerprint::Cache
    my $package = __PACKAGE__ . "::" . $backend;
    eval "use $package";                                    ## no critic

    # Try accessing package using the given name exactly. If this fails,
    # we try constructing a backend anyway, in case the module is already
    # imported--or, e.g., defined in the script file itself.
    if ($@) {
        $package = $backend;
        eval "use $package";                                ## no critic
    }

    undef $backend;

    # Perhaps the correct module was loaded by our caller;
    # try instantiating the backend even if the above steps
    # all failed.
    eval {
        $backend =  $package->new({
            file => $args->{file},
            hash => $args->{hash},
        });
    };

    # It's a fatal error if the backend doesn't load
    croak "Can't load backend module" if $@ or not $backend;

    $backend{$ident} = $backend;
}

sub set_file {
    my ($self, $file) = @_;

    # Validation
    return if $self->get_backend->is_locked;
    return if $self->get_backend->is_open;

    # OK, there's no harm in changing the file attribute
    $self->get_backend->set_file($file);

    1; 
}

sub get_backend :PRIVATE() {
    my $self = shift;
    return $backend{ident $self};
}

sub dump {
    my $self = shift;
    my $hash = $self->get_hash;

    for my $key ( sort { $hash->{$a} <=> $hash->{$b} } keys %$hash )
    {
        my $value = $hash->{$key};
        print "$value\t", scalar gmtime $value, "\t$key\n";
    }
}

sub open {
    my $self = shift;

    return $self->_delegate( "open", @_ );
}

sub close {
    my $self = shift;

    return $self->_delegate( "close", @_ );
}

sub lock {
    my $self = shift;

    return $self->_delegate( "lock", @_ );
}

sub unlock {
    my $self = shift;
    return $self->_delegate( "unlock", @_ );
}

sub purge {
    my $self = shift;
    my %opts = @_;

    my $hash = $self->get_hash;
    my $ttl  = defined $opts{ttl} ? $opts{ttl} : $self->get_ttl;
    my $now  = time;

    for my $key ( keys %$hash )
    {
        my $timestamp = $hash->{$key} || 0; # Also clobbers bad data like undef
        delete $hash->{$key} if ($now - $timestamp) > $ttl;
    }

    1;
}

sub DEMOLISH {
    my $self   = shift;

    my $backend = $self->get_backend;

    # Failing to close() the cache is bad: data won't be
    # committed to disk.
    if ( $backend and $backend->is_open )
    {
        cluck "Cache DESTROY()ed before it was close()ed";
        $self->close;
    }

    # Failure to unlock() is rude, but we don't say anything.
    $self->unlock;
}

sub _delegate :PRIVATE() {
    my ($self, $method, @args) = @_;

    my $backend = $self->get_backend;
    return unless $backend;

    return $backend->$method(@args);
}

1;