| Alvis-Bags documentation | Contained in the Alvis-Bags distribution. |
Alvis::URLs - URL and hash standardisation utilities.
$Alvis::URLs::keepfrag = 0; # set to keep fragment, default removes $cleanurl = &Alvis::URLs::CleanURL($url); $Alvis::URLs::nocase = 0; # set to convert everything to lowercase $Alvis::URLs::noclean = 0; # set to disable use of URI cleaning $standardurl = &Alvis::URLs::StandardURL($url);
Provides an MD5 hashing interface, as well as simple standards for URL cleaning based on the URI library.
$myhash = &easyhash32($text);
Return 32-bit unsigned part of the MD5 hash as an integer.
$myhash = &easyhash32($text);
Return 64-bit unsigned part of an MD5 hash as a 16 character string in hexadecimal.
$cleanurl = &Alvis::URLs::CleanURL($url);
Use the URI library to cleanup the format of the URL.
$standardurl = &Alvis::URLs::StandardURL($url);
Standardise the format of the URL, including cleaning above if switches dictate.
URI(3).
Wray Buntine, <wray.buntine@hiit.fi>
Copyright (C) 2006 by Wray Buntine
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.4 or, at your option, any later version of Perl 5 you may have available.
| Alvis-Bags documentation | Contained in the Alvis-Bags distribution. |
package Alvis::URLs; $Alvis::URLs::VERSION = '0.3'; ###################### CONFIGURATION ##################### ############ END CONFIGURATION ###################### use Digest::MD5 qw(md5_hex); use strict; use POSIX; use Encode; use URI; # encoding pragmas follow any includes like "use" use encoding 'utf8'; use open ':utf8'; # return 32-bit unsigned sub easyhash32 { my $string = shift; Encode::_utf8_off($string); my $dig = md5_hex($string); # print $dig . " \n"; return POSIX::strtol(substr($dig,0,8),16); } # return 64-bit unsigned sub easyhash64char { my $string = shift; Encode::_utf8_off($string); my $dig = md5_hex($string); # print $dig . " \n"; return substr($dig,0,16); } # URL switches $Alvis::URLs::nocase = 0; $Alvis::URLs::noclean = 0; $Alvis::URLs::keepfrag = 0; sub CleanURL() { if ( !$_[0] ) { return undef; } my $uri = new URI($_[0]); if ( ! $Alvis::URLs::keepfrag ) { $uri->fragment(undef); } return $uri->canonical; } sub StandardURL() { my $inu = shift(); if ( $Alvis::URLs::nocase ) { $inu = lc($inu); } if ( $Alvis::URLs::noclean == 0 ) { $inu = &Alvis::URLs::CleanURL($inu); } return $inu; } 1; __END__