Compress::Zlib::Perl - (Partial) Pure perl implementation of Compress::Zlib


Compress-Zlib-Perl documentation Contained in the Compress-Zlib-Perl distribution.

Index


Code Index:

NAME

Top

Compress::Zlib::Perl - (Partial) Pure perl implementation of Compress::Zlib

SYNOPSIS

Top

    use Compress::Zlib::Perl;
    ($i, $status) = inflateInit(-WindowBits => -MAX_WBITS);
    ($out, $status) = $i->inflate($buffer);

DESCRIPTION

Top

This a pure perl implementation of Compress::Zlib's inflate API.

Inflating deflated data

Currently the only thing Compress::Zlib::Perl can do is inflate compressed data. A constructor and 3 methods from Compress::Zlib's interface are replicated:

inflateInit -WindowBits => -MAX_WBITS

Argument list specifies options. Expects that the option -WindowBits is set to a negative value. In scalar context returns an inflater object; in list context returns this object and a status (usually Z_OK)

inflate INPUT

Inflates this section of deflate compressed data stream. In scalar context returns some inflated data; in list context returns this data and an output status. The status is Z_OK if the input stream is not yet finished, Z_STREAM_END if all the input data is consumed and this output is the final output.

inflate modifies the input parameter; at the end of the compressed stream any data beyond its end remains in INPUT. Before the end of stream all input data is consumed during the inflate call.

This implementation of inflate may not be as prompt at returning data as Compress::Zlib's; this implementation currently buffers the last 32768 bytes of output data until the end of the input stream, rather than attempting to return as much data as possible during inflation.

total_in

Returns the total input (compressed) data so far

total_out

Returns the total output (uncompressed) data so far

EXPORT

crc32 BUFFER[, CRC32]

Calculate and return a 32 bit checksum for buffer. CRC32 is suitably initialised if undef is passed in.

Z_OK

Constant for returning normal status

Z_STREAM_END

Constant for returning end of stream

MAX_WBITS

Constant to pass to inflateInit (for compatibility with Compress::Zlib)

TODO

Top

BUGS

Top

SEE ALSO

Top

Compress::Zlib

AUTHOR

Top

Ton Hospel wrote a pure perl gunzip program. Nicholas Clark, <nick@talking.bollo.cx<gt> turned it into a state machine and reworked the decompression core to fit Compress::Zlib's interface.

COPYRIGHT AND LICENSE

Top


Compress-Zlib-Perl documentation Contained in the Compress-Zlib-Perl distribution.

package Compress::Zlib::Perl;

use 5.004;

# use if $] > 5.006, 'warnings';
# use warnings;
use strict;

require Exporter;

use vars qw($VERSION @ISA @EXPORT);
@ISA = qw(Exporter);

# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.

use constant Z_OK => 0;
use constant Z_STREAM_END => 1;
use constant MAX_WBITS => 16;

@EXPORT = qw(
	     Z_OK Z_STREAM_END MAX_WBITS crc32
);

$VERSION = '0.02';

{
  my @crc32;

  sub _init_crc32 {
    # I'm not sure why Ton wanted to reverse the order of the bits in this
    # constant, rather than using the bit-reversed constant
    # my $p=oct reverse sprintf"%032bb0", 0x04C11DB7;
    # But the only 5.005 friendly way I can find is this:
    my $p
      = unpack "I", pack "b*", scalar reverse unpack "b*", pack "I", 0x04C11DB7;
    @crc32 = map{for my$s(0..7) {$_ = $_>>1 ^ ($_&1 && $p)} $_} 0..255;
  }

  # Calculate gzip header 16 bit CRCs
  sub _crc16 {
    my $crc16 = shift;
    _init_crc32() unless @crc32;
    foreach my $input (@_) {
      # I have no way to test this, as nothing that I can find generates
      # gzip files with the header CRC.
      # Ton's code is this:
      $crc16 = $crc16>>8^$crc32[$crc16&0xff^ord(substr $input,$_,1)]
	for 0..length($input)-1;
      # I believe that the following is functionally equivalent, but should
      # be faster:
      # while ($input =~ /(.)/gs) {
      #   $crc16 = $crc16 >> 8 ^ $crc32[$crc16 & 0xff ^ ord $1];
      # }
      return $crc16;
    }
  }

  # Public interface starts here:

  # Calculate 32 bit CRCs
  sub crc32 {
    _init_crc32() unless @crc32;
    my ($buffer, $crc32) = @_;
    $crc32 ||= 0;
    $crc32 ^= 0xffffffff;
    my $pos = -length $buffer;
    $crc32 = $crc32>>8 ^ $crc32[$crc32&0xff^ord(substr($buffer, $pos++, 1))]
      while $pos;
    $crc32 ^ 0xffffffff;
  }
}

sub inflateInit {
  my %args = @_;
  die "Please specify negative window size"
    unless $args{-WindowBits} && $args{-WindowBits} < 0;
  my $self = bless {isize=>0,
		    osize=>0,
		    result=>"",
		    huffman=>"",
		    type0length=>"",
		    state=>\&stateReadFinal
		   };
  $self->_reset_bits_have;
  wantarray ? ($self, Z_OK) : $self;
}

sub total_in {
  $_[0]->{isize};
}

sub total_out {
  $_[0]->{osize};
}

sub inflate {
  $_[0]->{input} = \$_[1];
  my ($return, $status);
  $_[0]->{izize} += length $_[1];
  if (&{$_[0]->{state}}) {
    # Finished, so flush everything
    $return = length $_[0]->{result};
    $status = Z_STREAM_END;
  } else {
    die length ($_[1]) . " input remaining" if length $_[1];
    $return = length ($_[0]->{result}) - 0x8000;
    $return = 0 if $return < 0;
    $status = Z_OK;
  }
  $_[0]->{izize} -= length $_[1];
  $_[0]->{osize} += $return;
  wantarray ? (substr ($_[0]->{result}, 0, $return, ""), $status)
    : substr ($_[0]->{result}, 0, $return, "");
}

# Public interface ends here

sub _reset_bits_have {
  my $self = shift;
  $self->{val} = $self->{have} = 0;
}


# get arg bits (little endian)
sub _get_bits {
  my ($self, $want) = @_;
  my ($bits_val, $bits_have) = @{$self}{qw(val have)};
  while ($want > $bits_have) {
    # inlined input read
    my $byte = substr ${$_[0]->{input}}, 0, 1, "";
    if (!length $byte) {
      @{$self}{qw(val have)} = ($bits_val, $bits_have);
      return;
    }
    $bits_val |= ord($byte) << $bits_have;
    $bits_have += 8;
  }
  my $result = $bits_val & (1 << $want)-1;
  $bits_val >>= $want;
  $bits_have -= $want;
  @{$self}{qw(val have)} = ($bits_val, $bits_have);
  return $result;
}

# Get one huffman code
sub _get_huffman {
  my ($self, $code) = @_;
  $code = $self->{$code};
  my ($bits_val, $bits_have, $str) = @{$self}{qw(val have huffman)};
  do {
      if (--$bits_have < 0) {
	# inlined input read
	my $byte = substr ${$_[0]->{input}}, 0, 1, "";
	if (!length $byte) {
	  # bits_have is -1, but really should be zero, so fix in save
	  @{$self}{qw(val have huffman)} = ($bits_val, 0, $str);
	  return;
	}
	$bits_val = ord $byte;
	$bits_have = 7;
      }
      $str .= $bits_val & 1;
      $bits_val >>= 1;
    } until exists $code->{$str};
  defined($code->{$str}) || die "Bad code $str";
  @{$self}{qw(val have huffman)} = ($bits_val, $bits_have, "");
  return $code->{$str};
}

# construct huffman code
sub make_huffman {
  my $counts = shift;
  my (%code, @counts);
  push @{$counts[$counts->[$_]]}, $_ for 0..$#$counts;
  my $value = 0;
  my $bits = -1;
  for (@counts) {
    $value *= 2;
    next unless ++$bits && $_;
    # Ton used sprintf"%0${bits}b", $value;
    $code{reverse unpack "b$bits", pack "V", $value++} = $_ for @$_;
  }
  # Close the code to avoid infinite loops (and out of memory)
  $code{reverse unpack "b$bits", pack "V", $value++} = undef for
    $value .. (1 << $bits)-1;
  @code{0, 1} = () unless %code;
  return \%code;
}

# Inflate state machine.
{
  my ($static_lit_code, $static_dist_code, @lit_base, @dist_base);

  my @lit_extra = (-1,
		   0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,
		   3,3,3,3,4,4,4,4,5,5,5,5,0,-2,-2);
  my @dist_extra = (0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,
		    9,9,10,10,11,11,12,12,13,13,-1,-1);
  my @alpha_map = (16, 17, 18, 0, 8, 7, 9, 6, 10,
		   5, 11, 4, 12, 3, 13, 2, 14, 1, 15);
  sub prepare_tables {
    my $length = 3;
    for (@lit_extra) {
      push @lit_base, $length;
      $length += 1 << $_ if $_ >= 0;
    }
    # Exceptional case
    splice(@lit_base, -3, 3, 258);

    my $dist = 1;
    for (@dist_extra) {
      push @dist_base, $dist;
      $dist += 1 << $_ if $_ >= 0;
    }
    splice(@dist_base, -2, 2);
  }

  sub stateReadFinal {
    my $bit = _get_bits($_[0], 1);
    if (!defined $bit) {
      # STALL
      return;
    }
    $_[0]->{final} = $bit;
    goto &{$_[0]->{state} = \&stateReadType};
  }
  sub stateReadType {
    my $type = _get_bits($_[0], 2);
    if (!defined $type) {
      # STALL
      return;
    }
    $_[0]->{type} = $type;
    if ($type) {
      prepare_tables() unless @lit_base;
      if ($type == 1) {
	$_[0]->{lit}  = $static_lit_code  ||=
	  make_huffman([(8)x144,(9)x112, (7)x24, (8)x8]);
	$_[0]->{dist} = $static_dist_code ||=
	  make_huffman([(5)x32]);
	# This is the main inflation loop.
	goto &{$_[0]->{state} = \&stateReadLit};
      } elsif ($type == 2) {
	goto &{$_[0]->{state} = \&stateReadHLit};
      } else {
	die "deflate subtype $type not supported\n";
      }
    }
    goto &{$_[0]->{state} = \&stateReadUncompressedLen};
  }

  sub stateReadUncompressedLen {
    # Not compressed;
    $_[0]->_reset_bits_have;
    # inlined input read
    $_[0]->{type0length}
      .= substr ${$_[0]->{input}}, 0, 4 - length $_[0]->{type0length}, "";
    if (length $_[0]->{type0length} < 4) {
      # STALL
      return;
    }
    my ($len, $nlen) = unpack("vv", $_[0]->{type0length});
    $_[0]->{type0length} = "";
    $len == (~$nlen & 0xffff) ||
      die "$len is not the 1-complement of $nlen";
    $_[0]->{type0left} = $len;
    goto &{$_[0]->{state} = \&stateReadUncompressed};
  }

  sub stateReadUncompressed {
    # inlined input read
    my $got = substr ${$_[0]->{input}}, 0, $_[0]->{type0left}, "";
    $_[0]->{result} .= $got;
    if ($_[0]->{type0left} -= length $got) {
      # Still need more.
      # STALL
      return;
    }
    if ($_[0]->{final}) {
      # Finished.
      return 1;
    }
    # Begin the next block
    goto &{$_[0]->{state} = \&stateReadFinal};
  }

  sub stateReadHLit {
    my $hlit = _get_bits($_[0], 5);
    if (!defined $hlit) {
      # STALL
      return;
    }
    $_[0]->{hlit} = $hlit + 257;
    goto &{$_[0]->{state} = \&stateReadHDist};
  }
  sub stateReadHDist {
    my $hdist = _get_bits($_[0], 5);
    if (!defined $hdist) {
      # STALL
      return;
    }
    $_[0]->{hdist} = $hdist + 1;
    goto &{$_[0]->{state} = \&stateReadHCLen};
  }
  sub stateReadHCLen {
    my $hclen = _get_bits($_[0], 4);
    if (!defined $hclen) {
      # STALL
      return;
    }
    $_[0]->{alphaleft} = $_[0]->{hclen} = $hclen + 4;
    # Determine the code length huffman code
    $_[0]->{alpha_raw} = [(0) x @alpha_map];

    goto &{$_[0]->{state} = \&stateReadAlphaCode};
  }
  sub stateReadAlphaCode {
    my $alpha_code = $_[0]->{alpha_raw};
    while ($_[0]->{alphaleft}) {
      my $code = _get_bits($_[0], 3);
      if (!defined $code) {
	# STALL
	return;
      }
      # my $where = $_[0]->{hclen} - $_[0]->{alphaleft};
      $alpha_code->[$alpha_map[$_[0]->{hclen} - $_[0]->{alphaleft}--]] = $code;
    }
    $_[0]->{alpha} = make_huffman($alpha_code);
    delete $_[0]->{alpha_raw};

    # Get lit/length and distance tables
    $_[0]->{code_len} = [];
    goto &{$_[0]->{state} = \&stateBuildAlphaCode};
  }

  sub stateBuildAlphaCode {
    my $code_len = $_[0]->{code_len};
    while (@$code_len < $_[0]->{hlit}+$_[0]->{hdist}) {
      my $alpha = _get_huffman($_[0], 'alpha');
      if (!defined $alpha) {
	# STALL
	return;
      }
      if ($alpha < 16) {
	push @$code_len, $alpha;
      } elsif ($alpha == 16) {
	goto &{$_[0]->{state} = \&stateReadAlphaCode16};
      } elsif ($alpha == 17) {
	goto &{$_[0]->{state} = \&stateReadAlphaCode17};
      } else {
	goto &{$_[0]->{state} = \&stateReadAlphaCodeOther};
      }
    }
    @$code_len == $_[0]->{hlit}+$_[0]->{hdist} || die "too many codes";
    my @lit_len = splice(@$code_len, 0, $_[0]->{hlit});
    $_[0]->{lit}  = make_huffman(\@lit_len);
    $_[0]->{dist} = make_huffman($code_len);
    delete $_[0]->{code_len};
    goto &{$_[0]->{state} = \&stateReadLit};
  }

  sub stateReadAlphaCode16 {
    my $code_len = $_[0]->{code_len};
    my $bits = _get_bits($_[0], 2);
    if (!defined $bits) {
      # STALL
      return;
    }
    push @$code_len, ($code_len->[-1]) x (3+$bits);
    goto &{$_[0]->{state} = \&stateBuildAlphaCode};
  }

  sub stateReadAlphaCode17 {
    my $code_len = $_[0]->{code_len};
    my $bits = _get_bits($_[0], 3);
    if (!defined $bits) {
      # STALL
      return;
    }
    push @$code_len, (0) x (3+$bits);
    goto &{$_[0]->{state} = \&stateBuildAlphaCode};
  }

  sub stateReadAlphaCodeOther {
    my $code_len = $_[0]->{code_len};
    my $bits = _get_bits($_[0], 7);
    if (!defined $bits) {
      # STALL
      return;
    }
    push @$code_len, (0) x (11+$bits);
    goto &{$_[0]->{state} = \&stateBuildAlphaCode};
  }

  sub stateReadLit {
    while (1) {
      my $lit = _get_huffman($_[0], 'lit');
      if (!defined $lit) {
	# STALL
	return;
      }
    if ($lit >= 256) {
      if ($lit_extra[$lit -= 256] < 0) {
	die "Invalid literal code" if $lit;

	if ($_[0]->{final}) {
	  # Finished.
	  return 1;
	}
	# Begin the next block
	goto &{$_[0]->{state} = \&stateReadFinal};
      }
      $_[0]->{litcode} = $lit;
      # BREAK
      goto &{$_[0]->{state} = \&stateGetLength};
    }

      $_[0]->{result} .= chr $lit;
      # Back to the main inflation loop
      # goto &stateReadLit;
      # ie loop
    }
  }

  sub stateGetLength {
    my $lit = $_[0]->{litcode};
    my $bits = _get_bits($_[0], $lit_extra[$lit]);
    if (!defined $bits) {
      # STALL
      return;
    }
    $_[0]->{length} = $lit_base[$lit] + ($lit_extra[$lit] && $bits);
    goto &{$_[0]->{state} = \&stateGetDCode};
  }

  sub stateGetDCode {
    my $d = _get_huffman($_[0], 'dist');
    if (!defined $d) {
      # STALL
      return;
    }
    $_[0]->{dcode} = $d;
    goto &{$_[0]->{state} = \&stateGetDistDecompress};
  }

  sub stateGetDistDecompress {
    my $d = $_[0]->{dcode};
    die "Invalid distance code" if $d >= 30;
    my $bits = _get_bits($_[0], $dist_extra[$d]);
    if (!defined $bits) {
      # STALL
      return;
    }
    my $dist = $dist_base[$d] + ($dist_extra[$d] && $bits);

    # Go for it
    my $length = $_[0]->{length};
    if ($dist >= $length) {
      my $section = substr ($_[0]->{result}, -$dist, $length);
      $_[0]->{result} .= $section;
    } else {
      my $remaining = $length;
      while ($remaining) {
	my $take
	  = $dist >= $remaining ? $remaining : $dist;
	$_[0]->{result} .= substr($_[0]->{result}, -$dist, $take);
	$remaining -= $take;
      }
    }
    # Back to the main inflation loop
    goto &{$_[0]->{state} = \&stateReadLit};
  }
}

1;
__END__