| Digest-ManberHash documentation | Contained in the Digest-ManberHash distribution. |
Digest::ManberHash - a Perl package to calculate Manber Hashes
use Digest::ManberHash; $instance = Digest::ManberHash::new($maskbits, $prime, $charcount); $hash1 = $instance->DoHash($filename1); $hash2 = $instance->DoHash($filename2); $similarity = $instance->Compare($hash1, $hash2);
Use Digest::ManberHash::new.
Parameters:
range 1 .. 30, default 11.
range 3 .. 65537, default 7.
range 8 .. 32768, default 64.
For a detailed description please read http://citeseer.nj.nec.com/manber94finding.html.
$hash = $instance->DoHash($filename);
This gives an object, which has an hash of hash values stored within.
$similarity = $instance->Compare($hash1, $hash2);
This gives an value of 0.0 .. 1.0, depending on the similariness. Help wanted: The calculation could do better than now!!
| Digest-ManberHash documentation | Contained in the Digest-ManberHash distribution. |
package Digest::ManberHash;
require Exporter; require DynaLoader; our @ISA = qw(Exporter DynaLoader); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. our @EXPORT = qw( HashFile new Compare ); our $VERSION = '0.7'; sub new { my($class, $maskbits, $prime, $charcount)=@_; my($x,%a); $prime||=7; $maskbits||=11; $charcount||=64; $x=Init($prime,$maskbits,$charcount); %a=( "settings" => $x ); bless \%a; } sub DoHash { my($self,$filename)=@_; my($e,$f,%a,%b); %b=(); ManberHash($self->{"settings"}, $filename, \%b ); %a= ( "data" => \%b, "base" => $self); while (($e, $f) = each(%b)) { $self->{"max"}{$e}=$f if $self->{"max"}{$e} < $f; } bless \%a; } sub Compare { my($self,$file1,$file2)=@_; my(%keys,$a,$k,$c,$v,$m); #return 0 if (ref($self) !~ /^HASH/); die if $self ne $file1->{"base"} || $self ne $file2->{"base"}; %keys=map { $_,1; } (keys %{$file1->{"data"}}, keys %{$file2->{"data"}}); $c=$a=$m=0; for $k (keys %keys) { $v = ($file1->{"data"}->{$k} - $file2->{"data"}->{$k}); # $m += $self->{"max"}{$k} * $self->{"max"}{$k}; $a += $v*$v; $c++; # print "$k = ",$self->{$k}," - ",$other->{$k},"($c, $a)\n"; } return 0 if !$c; # 1 - 6*$a/($c*$c*$c - $c); # 1-sqrt($a)/$c; 1/(1.0+$a); } bootstrap Digest::ManberHash $VERSION; # Preloaded methods go here. # Autoload methods go after __END__, and are processed by the autosplit program. 1; __END__ #