| Email-Fingerprint documentation | Contained in the Email-Fingerprint distribution. |
Email::Fingerprint::Cache - Cache observed email fingerprints
Version 0.01
use Email::Fingerprint::Cache;
my %fingerprints; # To access cache contents
# Create a cache
my $cache = new Email::Fingerprint::Cache(
backend => "AnyDBM",
hash => \%fingerprints,
file => $file, # Created if doesn't exist
ttl => 3600 * 24 * 7, # Purge records after one week
);
# Prepare it for use
$cache->lock or die "Couldn't lock: $!"; # Waits for lock
$cache->open or die "Couldn't open: $!";
# Work with fingerprints
for my (@message_fingerprints) {
if ($fingerprints{$_}) {
print "Fingerprint found: $_\n";
next;
}
my $now = time;
$fingerprints{$_} = $now;
print "Fingerprint added: $_\n";
}
# Get rid of old records
$cache->purge;
# Print a listing of all fingerprints
$cache->dump;
# Finish up
$cache->close;
$cache->unlock;
my $fingerprint = new Email::Fingerprint::Cache(
file => $file, # Default: .maildups
backend => "AnyDBM", # Default: "AnyDBM"
ttl => $sec, # Default: 3600*24*7
hash => $ref, # Optional
);
Returns a new Email::Fingerprint::Cache. The cache must still be opened before it can be used.
Internal helper method; never called directly by users.
$file = $cache->set_file( 'foo' ) or die "Failed to set filename"; # now $file eq 'foo.db' or 'foo.dir', etc., depending on the backend; # it is almost certainly NOT 'foo'.
Sets the file to be used for the cache. Returns the actual filename on success; false on failure.
The actual filename will probably differ from the 'foo', because the backend will usually add an extension or otherwise munge it.
set_file has no effect while the cache file is locked or open!
Returns the backend object for this cache.
# Be a good citizen
$cache->lock;
$cache->open;
$cache->dump;
# Be a good neighbor
$cache->close;
$cache->unlock;
Dump a human-readable version of the contents of the cache. Data is printed in timestamp order.
The cache must first be opened, and should first be locked.
$cache->open or die;
Open the cache file, and tie it to a hash. This is delegated to the backend.
$cache->close;
Close the cache file and untie the hash.
$cache->lock or die; # returns immediately $cache->lock( block => 1 ) or die; # Waits for a lock $cache->lock( %opts ) or die; # Backend-specific options
Lock the DB file to guarantee exclusive access.
$cache->unlock or warn "Unlock failed";
Unlock the DB file.
$cache->purge; # Use default TTL
$cache->purge( ttl => 3600 ); # Everything older than 1 hour
Purge the cache of old entries. This reduces the risk of false positives from things like reused message IDs, but increases the risk of false negatives.
The default is one week. Dedicated spam-fighters might prefer to use a longer TTL.
Clean up the module. If the hash is still tied, we warn the user and call
close() on $self.
Internal helper method, never called directly by user.
Delegate the specified method to the backend. Internal method.
Len Budney, <lbudney at pobox.com>
The dump() method assumes that Perl's time() function returns
seconds since the UNIX epoch, 00:00:00 UTC, January 1, 1970. The
module will work on architectures with non-standard epochs, but the
automated tests will fail.
Please report any bugs or feature requests to
bug-email-fingerprint at rt.cpan.org, or through the web interface at
http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Email-Fingerprint.
I will be notified, and then you'll automatically be notified of progress on
your bug as I make changes.
You can find documentation for this module with the perldoc command.
perldoc Email::Fingerprint::Cache
You can also look for information at:
Email::Fingerprint::Cache is based on caching code in the
eliminate_dups script by Peter Samuel and available at
http://www.qmail.org/.
Copyright 2006-2011 Len Budney, all rights reserved.
This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| Email-Fingerprint documentation | Contained in the Email-Fingerprint distribution. |
package Email::Fingerprint::Cache; use Class::Std; use warnings; use strict; use Carp qw( croak cluck ); use Scalar::Util qw( reftype blessed );
our $VERSION = '0.01';
my %hash :ATTR( :get<hash> ) = (); my %ttl :ATTR( :name<ttl> :default(604800) ) = (); my %backend :ATTR( :init_arg<backend> :default('AnyDBM') ) = ();
sub BUILD { my ( $self, $ident, $args ) = @_; # Default hash is a fresh-n-tasty anonymous hash $hash{$ident} = defined $args->{hash} ? $args->{hash} : {}; # Backend will also need access to the hash $args->{hash} = $hash{$ident}; # Default backend is AnyDBM my $backend = defined $args->{backend} ? $args->{backend} : 'AnyDBM'; # Default cache file $args->{file} ||= '.maildups'; # Try accessing package as a subclass of Email::Fingerprint::Cache my $package = __PACKAGE__ . "::" . $backend; eval "use $package"; ## no critic # Try accessing package using the given name exactly. If this fails, # we try constructing a backend anyway, in case the module is already # imported--or, e.g., defined in the script file itself. if ($@) { $package = $backend; eval "use $package"; ## no critic } undef $backend; # Perhaps the correct module was loaded by our caller; # try instantiating the backend even if the above steps # all failed. eval { $backend = $package->new({ file => $args->{file}, hash => $args->{hash}, }); }; # It's a fatal error if the backend doesn't load croak "Can't load backend module" if $@ or not $backend; $backend{$ident} = $backend; }
sub set_file { my ($self, $file) = @_; # Validation return if $self->get_backend->is_locked; return if $self->get_backend->is_open; # OK, there's no harm in changing the file attribute $self->get_backend->set_file($file); 1; }
sub get_backend :PRIVATE() { my $self = shift; return $backend{ident $self}; }
sub dump { my $self = shift; my $hash = $self->get_hash; for my $key ( sort { $hash->{$a} <=> $hash->{$b} } keys %$hash ) { my $value = $hash->{$key}; print "$value\t", scalar gmtime $value, "\t$key\n"; } }
sub open { my $self = shift; return $self->_delegate( "open", @_ ); }
sub close { my $self = shift; return $self->_delegate( "close", @_ ); }
sub lock { my $self = shift; return $self->_delegate( "lock", @_ ); }
sub unlock { my $self = shift; return $self->_delegate( "unlock", @_ ); }
sub purge { my $self = shift; my %opts = @_; my $hash = $self->get_hash; my $ttl = defined $opts{ttl} ? $opts{ttl} : $self->get_ttl; my $now = time; for my $key ( keys %$hash ) { my $timestamp = $hash->{$key} || 0; # Also clobbers bad data like undef delete $hash->{$key} if ($now - $timestamp) > $ttl; } 1; }
sub DEMOLISH { my $self = shift; my $backend = $self->get_backend; # Failing to close() the cache is bad: data won't be # committed to disk. if ( $backend and $backend->is_open ) { cluck "Cache DESTROY()ed before it was close()ed"; $self->close; } # Failure to unlock() is rude, but we don't say anything. $self->unlock; }
sub _delegate :PRIVATE() { my ($self, $method, @args) = @_; my $backend = $self->get_backend; return unless $backend; return $backend->$method(@args); }
1;