Alvis::URLs - URL and hash standardisation utilities.


Alvis-Bags documentation Contained in the Alvis-Bags distribution.

Index


Code Index:

NAME

Top

Alvis::URLs - URL and hash standardisation utilities.

SYNOPSIS

Top

 $Alvis::URLs::keepfrag = 0; # set to keep fragment, default removes
 $cleanurl = &Alvis::URLs::CleanURL($url);
 $Alvis::URLs::nocase = 0;   # set to convert everything to lowercase
 $Alvis::URLs::noclean = 0;  # set to disable use of URI cleaning
 $standardurl = &Alvis::URLs::StandardURL($url);

DESCRIPTION

Top

Provides an MD5 hashing interface, as well as simple standards for URL cleaning based on the URI library.

METHODS

Top

easyhash32()

   $myhash = &easyhash32($text);

Return 32-bit unsigned part of the MD5 hash as an integer.

easyhash64char()

   $myhash = &easyhash32($text);

Return 64-bit unsigned part of an MD5 hash as a 16 character string in hexadecimal.

CleanURL()

 $cleanurl = &Alvis::URLs::CleanURL($url);

Use the URI library to cleanup the format of the URL.

StandardURL()

 $standardurl = &Alvis::URLs::StandardURL($url);

Standardise the format of the URL, including cleaning above if switches dictate.

SEE ALSO

Top

URI(3).

AUTHOR

Top

Wray Buntine, <wray.buntine@hiit.fi>

COPYRIGHT AND LICENSE

Top


Alvis-Bags documentation Contained in the Alvis-Bags distribution.

package Alvis::URLs;

$Alvis::URLs::VERSION = '0.3';

###################### CONFIGURATION #####################


############ END CONFIGURATION ######################

use Digest::MD5 qw(md5_hex);
use strict;
use POSIX;
use Encode;
use URI;

# encoding pragmas follow any includes like "use"
use encoding 'utf8';
use open ':utf8';

#  return 32-bit unsigned
sub easyhash32
{
  my $string = shift;
  Encode::_utf8_off($string);
  my $dig = md5_hex($string);
  # print $dig . " \n";
  return POSIX::strtol(substr($dig,0,8),16);
}

#  return 64-bit unsigned
sub easyhash64char
{
  my $string = shift;
  Encode::_utf8_off($string);
  my $dig = md5_hex($string);
  # print $dig . " \n";
  return substr($dig,0,16);
}

#  URL switches
$Alvis::URLs::nocase = 0;
$Alvis::URLs::noclean = 0;
$Alvis::URLs::keepfrag = 0;

sub CleanURL() {
  if ( !$_[0] ) {
	return undef;
  }
  my $uri = new URI($_[0]);
  if ( ! $Alvis::URLs::keepfrag ) {
    $uri->fragment(undef);
  }
  return $uri->canonical;
}

sub StandardURL() {
  my $inu = shift();
  if ( $Alvis::URLs::nocase ) {
    $inu = lc($inu);
  }
  if ( $Alvis::URLs::noclean == 0 ) {
    $inu = &Alvis::URLs::CleanURL($inu);
  }
  return $inu;
}


1;

__END__