Statistics::SparseVector - Perl5 extension for manipulating sparse bitvectors


Statistics-MaxEntropy documentation Contained in the Statistics-MaxEntropy distribution.

Index


Code Index:

NAME

Top

Statistics::SparseVector - Perl5 extension for manipulating sparse bitvectors

SYNOPSIS

Top

 use Statistics::SparseVector;

 # methods that create new bitvectors
 $vec = Statistics::SparseVector->new($n);
 $vec2 = $vec1->Clone();
 $vec = Statistics::SparseVector->new_Enum($s, $n);
 $vec = Statistics::SparseVector->new_Bin($s, $n);

 # miscellaneous
 $vec2->Substitute_Vector($vec1, $of22, $len2, $off1, $len1);
 $vec->Size();
 $vec->to_Enum();
 $vec->to_Bin();
 $vec->Fill();
 $vec->Empty();
 $vec->increment();
 $n = $vec->Norm();
 @list = $vec->indices();

 # manipulation on the bit level
 $vec->Bit_Off($i);
 $vec->Bit_On($i);
 $vec->bit_flip($i);
 $vec->bit_test($i);

 # overloaded operators
 # increment
 $vec++;
 # stringify
 "$vec"




DESCRIPTION

Top

This module implements sparse bitvectors. Several methods for manipulating bitvectors are implemented. On purpose the naming of the methods is identical to the Bit::Vector package by Stephen Beyer; if you find your vectors to be sparse (have little bits that are on) you can easily switch to a less memory consuming representation.

Creation of bitvectors

new
 $vec = Statistics::BitVector->new($n);

A bitvector of length $n is created. All bits are zero.

Clone
 $clone = $vec->Clone();

A copy of $vec is returned.

new_Enum
 $vec = Statistics::BitVector->new_Enum($enumstring, $n);

A new bitvector of length $n is created from the comma-separated list of in $enumstring.

new_Bin
 $vec = Statistics::BitVector->new_Bin($bitstring, $n);

A new bitvector of length $n is created from bitstring $bitstring.

Vector-wide manipulation of bits

Substitute_Vector
 $vec2->Substitute_Vector($vec1, $off2, $len2, $off1, $len1);

$len2 contiguous bits in target vector $vec2 starting from $off2 are replaced by $len1 contiguous bits from source vector $vec1 starting at bit $off1. If $off2 equals the length of $vec2 the bits from $vec1 are appended. If $len1 is zero the $len2 bits from $vec2 are deleted.

Fill
 $vec->Fill();

All bits of $vec are set to one.

Empty
 $vec->Empty();

All bits of $vec are set to zero.

increment
 $vec->increment(); $vec++;

The integer value of the bitvector is increased by one.

Bit_Off
 $vec->Bit_Off($i);

Bit $i is set to zero.

Bit_On
 $vec->Bit_On($i);

Bit $i is set to one.

bit_flip
 $vec->bit_flip($i);

Bit $i is flipped.

bit_test
 $vec->bit_test($i);

Returns 1 if bit $i is one, 0 otherwise.

Miscellany

Size
 $n = $vec->Size();

Returns the size of the vector.

to_Enum
 $enumstring = $vec->to_Enum();

Returns a comma-separated list of bits that are set.

indices

Returns an array of indices of bits that are set.

to_Bin
 $bitstring = $vec->to_Bin();

Returns a bitstring; bits should be read from right to left.

Norm

Returns the number of set bits.

Overloaded operators

++
 $vec++;

Same as method increment.

Double quotes
 $string = "$vec";

Data::Dumper wants to stringify vectors. Probably because Statistics::SparseVector is an overloaded package it expects double quotes to be overloaded as well.

Remarks about the implementation

VERSION

Top

Version 0.1.

AUTHOR

Top

SEE ALSO

Top

perl(1), Bit::Vector(3), Statistics::MaxEntropy(3), Statistics::ME.wrapper.pl(3), Statistics::Candidates(3).

COPYRIGHT

Top


Statistics-MaxEntropy documentation Contained in the Statistics-MaxEntropy distribution.

package Statistics::SparseVector;

##---------------------------------------------------------------------------##
##  Author:
##      Hugo WL ter Doest       terdoest@cs.utwente.nl
##  Description: module for sparse bitvectors
##               method names equal that of Bit::Vector
##
##---------------------------------------------------------------------------##
##  Copyright (C) 1998 Hugo WL ter Doest terdoest@cs.utwente.nl
##
##  This library is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 2 of the License, or
##  (at your option) any later version.
##
##  This library  is distributed in the hope that it will be useful,
##  but WITHOUT ANY WARRANTY; without even the implied warranty of
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU General Public License for more details.
##
##  You should have received a copy of the GNU Library General Public 
##  License along with this program; if not, write to the Free Software
##  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
##---------------------------------------------------------------------------##


use strict;
use vars qw($VERSION 
	    @ISA 
	    @EXPORT 
	    @EXPORT_OK);
use overload
'++'     =>    \&increment,
'='      =>    \&Clone,
'""'     =>    \&stringify;
require Exporter;
require AutoLoader;

@ISA = qw(Exporter AutoLoader);
# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
@EXPORT = qw(
	
);
$VERSION = '0.1';


# Preloaded methods go here.

# necessary for overloading ""
# in turn required by Data::Dumper that wants to stringify variables
sub stringify {
    my($self) = @_;

    return($self);
}


# create a new bitvector
# all bits are `off'
sub new {
    my($this, $n) = @_;

    # for calling $self->new($someth):
    my $class = ref($this) || $this;
    my $self = {};
    bless $self, $class;
    $self->{N} = $n;
    return($self);
}


# create a copy of a bitvector and return it
sub Clone {
    my($self) = @_;

    my($new);

    $new = Statistics::SparseVector->new($self->{N});
    bless $new, ref($self);
    for (keys %{$self->{VECTOR}}) {
	$new->{VECTOR}{$_} = $self->{VECTOR}{$_};
    }
    return($new);
}


# is called right before Perl destroys a bitvector
# this happens automatically if a vector has zero references
sub DESTROY {
    my($self) = @_;

}


# creates a new vector from a bitstring
sub new_Bin {
    my($this, $n, $bitvector) = @_;

    my($i, $self);

    $self = $this->new($n);
    $i = 0;
    while ($bitvector) {
	if (chop($bitvector) eq "1") {
	    $self->{VECTOR}{$i} = 1;
	}
	$i++;
    }
    return($self);
}


# we assume a bit is `on' if its value is defined
sub bit_test {
    my($self, $i) = @_;

    return(defined($self->{VECTOR}{$i}));
}


# turning off a bit is making its value undefined
sub Bit_Off {
    my($self, $i) = @_;

    undef $self->{VECTOR}{$i};
}


# turns on a bit
sub Bit_On {
    my($self, $i) = @_;

    $self->{VECTOR}{$i} = 1;
}


# flips a bit, i.e. makes it undef if it is defined,
# and makes it defined if it is undefined
sub bit_flip {
    my($self, $i) = @_;

    if (defined($self->{VECTOR}{$i})) {
	undef $self->{VECTOR}{$i};
    }
    else {
	$self->{VECTOR}{$i} = 1;
    }
}


# increases the integer value of the bitvector by one
sub increment {
    my($self) = @_;

    my($carry, $i);

    $carry = 0;
    $i = 0;
    do {
	if ($self->{VECTOR}{$i}) {
	    undef $self->{VECTOR}{$i};
	    $carry = 1;
	}
	else {
	    $self->{VECTOR}{$i} = 1;
	    $carry = 0;
	}
	$i++;
    } until (($carry == 0) || ($i == $self->{N}));
}


# fills the set
sub Fill {
    my($self) = @_;

    my $i;

    for ($i = 0; $i < $self->{N}; $i++) {
	$self->{VECTOR}{$i} = 1;
    }
}


# clears all bits, empties the set
sub Empty {
    my($self) = @_;
    
    undef $self->{VECTOR};
}


# returns a bitstring
sub to_Bin {
    my($self) = @_;

    my($s, $i);

    $s = "";
    for ($i = 0; $i < $self->{N}; $i++) {
	if (defined($self->{VECTOR}{$i})) {
	    $s = "1$s";
	}
	else {
	    $s = "0$s";
	}
    }
    return($s);
}


# returns a comma-separated list of features that are on
sub to_Enum {
    my($self) = @_;

    return(join(',', keys(%{$self->{VECTOR}})));
}


# expects a comma-separated list of numbers
sub new_Enum {
    my($this, $n, $s) = @_;

    my($self);

    $self = $this->new($n);
    for (split(/,/,$s)) {
	$self->{VECTOR}{$_} = 1
    }
    return($self);
}


# returns the length of the vector
sub Size {
    my($self) = @_;

    return($self->{N});
}


# $len1 >= 0, $len2 >= 0, $vec2->Size() >= $off2 >= 0
# $vec1->Size() >= $off1 >= 0
# $vec1 may be undefined
# $len1 + $off1 < $vec->Size()
sub Interval_Substitute {
    my($vec2, $vec1, $off2, $len2, $off1, $len1) = @_;

    my($i,
       $oldvec2);

    # save  $vec2 in $oldvec2
    $oldvec2 = $vec2->Clone();
    # determine the new length for $vec2
    if ($off2 == $vec2->Size()) {
	# we are appending bits from source $vec1
	$vec2->{N} += $len1;
    }
    else {
	# we are inserting bits from $vec1
	$vec2->{N} += $len1 - $len2;
    }
    # target $vec2 changes only from $off2
    # copy the new bits from the source $vec1
    if (defined($vec1)) { # we have source bits
	for ($i = $off2; $i < $off2 + $len1; $i++) {
	    if (defined($vec1->{VECTOR}{$i - $off2 + $off1})) {
		$vec2->{VECTOR}{$i} = $vec1->{VECTOR}{$i - $off2 + $off1};
	    }
	    else {
		undef $vec2->{VECTOR}{$i};
	    }
	}
    }
    # append the rest of $oldvec2
    # index $i runs for $oldvec2, we correct for $vec2
    for ($i = $off2 + $len2; $i < $oldvec2->{N}; $i++) {
	if (defined($oldvec2->{VECTOR}{$i})) {
	    $vec2->{VECTOR}{$i-$len2+$len1} = $oldvec2->{VECTOR}{$i};
	}
	else {
	    undef $vec2->{VECTOR}{$i-$len2+$len1};
	}
    }
    undef $oldvec2;
}


# returns an array of indices of set bits
sub indices {
    my($self) = @_;

    return(grep($self->{VECTOR}{$_}, keys(%{$self->{VECTOR}})));
}


# the number of bits that are on
sub Norm {
    my($self) = @_;

    my(@a);

    @a = $self->indices();
    return($#a + 1);
}


# Autoload methods go after =cut, and are processed by the autosplit program.

1;

__END__

# Below is the stub of documentation for your module. You better edit it!