| Compress-Zlib-Perl documentation | Contained in the Compress-Zlib-Perl distribution. |
Compress::Zlib::Perl - (Partial) Pure perl implementation of Compress::Zlib
use Compress::Zlib::Perl;
($i, $status) = inflateInit(-WindowBits => -MAX_WBITS);
($out, $status) = $i->inflate($buffer);
This a pure perl implementation of Compress::Zlib's inflate API.
Currently the only thing Compress::Zlib::Perl can do is inflate compressed data. A constructor and 3 methods from Compress::Zlib's interface are replicated:
Argument list specifies options. Expects that the option -WindowBits is set
to a negative value. In scalar context returns an inflater object; in list
context returns this object and a status (usually Z_OK)
Inflates this section of deflate compressed data stream. In scalar context
returns some inflated data; in list context returns this data and an output
status. The status is Z_OK if the input stream is not yet finished,
Z_STREAM_END if all the input data is consumed and this output is the
final output.
inflate modifies the input parameter; at the end of the compressed stream
any data beyond its end remains in INPUT. Before the end of stream all
input data is consumed during the inflate call.
This implementation of inflate may not be as prompt at returning data as
Compress::Zlib's; this implementation currently buffers the last 32768 bytes
of output data until the end of the input stream, rather than attempting to
return as much data as possible during inflation.
Returns the total input (compressed) data so far
Returns the total output (uncompressed) data so far
Calculate and return a 32 bit checksum for buffer. CRC32 is suitably
initialised if undef is passed in.
Constant for returning normal status
Constant for returning end of stream
Constant to pass to inflateInit (for compatibility with Compress::Zlib)
die Compress::Zlib
Ton Hospel wrote a pure perl gunzip program. Nicholas Clark, <nick@talking.bollo.cx<gt> turned it into a state machine and reworked the decompression core to fit Compress::Zlib's interface.
Copyright 2004 by Ton Hospel, Nicholas Clark
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| Compress-Zlib-Perl documentation | Contained in the Compress-Zlib-Perl distribution. |
package Compress::Zlib::Perl; use 5.004; # use if $] > 5.006, 'warnings'; # use warnings; use strict; require Exporter; use vars qw($VERSION @ISA @EXPORT); @ISA = qw(Exporter); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. use constant Z_OK => 0; use constant Z_STREAM_END => 1; use constant MAX_WBITS => 16; @EXPORT = qw( Z_OK Z_STREAM_END MAX_WBITS crc32 ); $VERSION = '0.02'; { my @crc32; sub _init_crc32 { # I'm not sure why Ton wanted to reverse the order of the bits in this # constant, rather than using the bit-reversed constant # my $p=oct reverse sprintf"%032bb0", 0x04C11DB7; # But the only 5.005 friendly way I can find is this: my $p = unpack "I", pack "b*", scalar reverse unpack "b*", pack "I", 0x04C11DB7; @crc32 = map{for my$s(0..7) {$_ = $_>>1 ^ ($_&1 && $p)} $_} 0..255; } # Calculate gzip header 16 bit CRCs sub _crc16 { my $crc16 = shift; _init_crc32() unless @crc32; foreach my $input (@_) { # I have no way to test this, as nothing that I can find generates # gzip files with the header CRC. # Ton's code is this: $crc16 = $crc16>>8^$crc32[$crc16&0xff^ord(substr $input,$_,1)] for 0..length($input)-1; # I believe that the following is functionally equivalent, but should # be faster: # while ($input =~ /(.)/gs) { # $crc16 = $crc16 >> 8 ^ $crc32[$crc16 & 0xff ^ ord $1]; # } return $crc16; } } # Public interface starts here: # Calculate 32 bit CRCs sub crc32 { _init_crc32() unless @crc32; my ($buffer, $crc32) = @_; $crc32 ||= 0; $crc32 ^= 0xffffffff; my $pos = -length $buffer; $crc32 = $crc32>>8 ^ $crc32[$crc32&0xff^ord(substr($buffer, $pos++, 1))] while $pos; $crc32 ^ 0xffffffff; } } sub inflateInit { my %args = @_; die "Please specify negative window size" unless $args{-WindowBits} && $args{-WindowBits} < 0; my $self = bless {isize=>0, osize=>0, result=>"", huffman=>"", type0length=>"", state=>\&stateReadFinal }; $self->_reset_bits_have; wantarray ? ($self, Z_OK) : $self; } sub total_in { $_[0]->{isize}; } sub total_out { $_[0]->{osize}; } sub inflate { $_[0]->{input} = \$_[1]; my ($return, $status); $_[0]->{izize} += length $_[1]; if (&{$_[0]->{state}}) { # Finished, so flush everything $return = length $_[0]->{result}; $status = Z_STREAM_END; } else { die length ($_[1]) . " input remaining" if length $_[1]; $return = length ($_[0]->{result}) - 0x8000; $return = 0 if $return < 0; $status = Z_OK; } $_[0]->{izize} -= length $_[1]; $_[0]->{osize} += $return; wantarray ? (substr ($_[0]->{result}, 0, $return, ""), $status) : substr ($_[0]->{result}, 0, $return, ""); } # Public interface ends here sub _reset_bits_have { my $self = shift; $self->{val} = $self->{have} = 0; } # get arg bits (little endian) sub _get_bits { my ($self, $want) = @_; my ($bits_val, $bits_have) = @{$self}{qw(val have)}; while ($want > $bits_have) { # inlined input read my $byte = substr ${$_[0]->{input}}, 0, 1, ""; if (!length $byte) { @{$self}{qw(val have)} = ($bits_val, $bits_have); return; } $bits_val |= ord($byte) << $bits_have; $bits_have += 8; } my $result = $bits_val & (1 << $want)-1; $bits_val >>= $want; $bits_have -= $want; @{$self}{qw(val have)} = ($bits_val, $bits_have); return $result; } # Get one huffman code sub _get_huffman { my ($self, $code) = @_; $code = $self->{$code}; my ($bits_val, $bits_have, $str) = @{$self}{qw(val have huffman)}; do { if (--$bits_have < 0) { # inlined input read my $byte = substr ${$_[0]->{input}}, 0, 1, ""; if (!length $byte) { # bits_have is -1, but really should be zero, so fix in save @{$self}{qw(val have huffman)} = ($bits_val, 0, $str); return; } $bits_val = ord $byte; $bits_have = 7; } $str .= $bits_val & 1; $bits_val >>= 1; } until exists $code->{$str}; defined($code->{$str}) || die "Bad code $str"; @{$self}{qw(val have huffman)} = ($bits_val, $bits_have, ""); return $code->{$str}; } # construct huffman code sub make_huffman { my $counts = shift; my (%code, @counts); push @{$counts[$counts->[$_]]}, $_ for 0..$#$counts; my $value = 0; my $bits = -1; for (@counts) { $value *= 2; next unless ++$bits && $_; # Ton used sprintf"%0${bits}b", $value; $code{reverse unpack "b$bits", pack "V", $value++} = $_ for @$_; } # Close the code to avoid infinite loops (and out of memory) $code{reverse unpack "b$bits", pack "V", $value++} = undef for $value .. (1 << $bits)-1; @code{0, 1} = () unless %code; return \%code; } # Inflate state machine. { my ($static_lit_code, $static_dist_code, @lit_base, @dist_base); my @lit_extra = (-1, 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2, 3,3,3,3,4,4,4,4,5,5,5,5,0,-2,-2); my @dist_extra = (0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8, 9,9,10,10,11,11,12,12,13,13,-1,-1); my @alpha_map = (16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15); sub prepare_tables { my $length = 3; for (@lit_extra) { push @lit_base, $length; $length += 1 << $_ if $_ >= 0; } # Exceptional case splice(@lit_base, -3, 3, 258); my $dist = 1; for (@dist_extra) { push @dist_base, $dist; $dist += 1 << $_ if $_ >= 0; } splice(@dist_base, -2, 2); } sub stateReadFinal { my $bit = _get_bits($_[0], 1); if (!defined $bit) { # STALL return; } $_[0]->{final} = $bit; goto &{$_[0]->{state} = \&stateReadType}; } sub stateReadType { my $type = _get_bits($_[0], 2); if (!defined $type) { # STALL return; } $_[0]->{type} = $type; if ($type) { prepare_tables() unless @lit_base; if ($type == 1) { $_[0]->{lit} = $static_lit_code ||= make_huffman([(8)x144,(9)x112, (7)x24, (8)x8]); $_[0]->{dist} = $static_dist_code ||= make_huffman([(5)x32]); # This is the main inflation loop. goto &{$_[0]->{state} = \&stateReadLit}; } elsif ($type == 2) { goto &{$_[0]->{state} = \&stateReadHLit}; } else { die "deflate subtype $type not supported\n"; } } goto &{$_[0]->{state} = \&stateReadUncompressedLen}; } sub stateReadUncompressedLen { # Not compressed; $_[0]->_reset_bits_have; # inlined input read $_[0]->{type0length} .= substr ${$_[0]->{input}}, 0, 4 - length $_[0]->{type0length}, ""; if (length $_[0]->{type0length} < 4) { # STALL return; } my ($len, $nlen) = unpack("vv", $_[0]->{type0length}); $_[0]->{type0length} = ""; $len == (~$nlen & 0xffff) || die "$len is not the 1-complement of $nlen"; $_[0]->{type0left} = $len; goto &{$_[0]->{state} = \&stateReadUncompressed}; } sub stateReadUncompressed { # inlined input read my $got = substr ${$_[0]->{input}}, 0, $_[0]->{type0left}, ""; $_[0]->{result} .= $got; if ($_[0]->{type0left} -= length $got) { # Still need more. # STALL return; } if ($_[0]->{final}) { # Finished. return 1; } # Begin the next block goto &{$_[0]->{state} = \&stateReadFinal}; } sub stateReadHLit { my $hlit = _get_bits($_[0], 5); if (!defined $hlit) { # STALL return; } $_[0]->{hlit} = $hlit + 257; goto &{$_[0]->{state} = \&stateReadHDist}; } sub stateReadHDist { my $hdist = _get_bits($_[0], 5); if (!defined $hdist) { # STALL return; } $_[0]->{hdist} = $hdist + 1; goto &{$_[0]->{state} = \&stateReadHCLen}; } sub stateReadHCLen { my $hclen = _get_bits($_[0], 4); if (!defined $hclen) { # STALL return; } $_[0]->{alphaleft} = $_[0]->{hclen} = $hclen + 4; # Determine the code length huffman code $_[0]->{alpha_raw} = [(0) x @alpha_map]; goto &{$_[0]->{state} = \&stateReadAlphaCode}; } sub stateReadAlphaCode { my $alpha_code = $_[0]->{alpha_raw}; while ($_[0]->{alphaleft}) { my $code = _get_bits($_[0], 3); if (!defined $code) { # STALL return; } # my $where = $_[0]->{hclen} - $_[0]->{alphaleft}; $alpha_code->[$alpha_map[$_[0]->{hclen} - $_[0]->{alphaleft}--]] = $code; } $_[0]->{alpha} = make_huffman($alpha_code); delete $_[0]->{alpha_raw}; # Get lit/length and distance tables $_[0]->{code_len} = []; goto &{$_[0]->{state} = \&stateBuildAlphaCode}; } sub stateBuildAlphaCode { my $code_len = $_[0]->{code_len}; while (@$code_len < $_[0]->{hlit}+$_[0]->{hdist}) { my $alpha = _get_huffman($_[0], 'alpha'); if (!defined $alpha) { # STALL return; } if ($alpha < 16) { push @$code_len, $alpha; } elsif ($alpha == 16) { goto &{$_[0]->{state} = \&stateReadAlphaCode16}; } elsif ($alpha == 17) { goto &{$_[0]->{state} = \&stateReadAlphaCode17}; } else { goto &{$_[0]->{state} = \&stateReadAlphaCodeOther}; } } @$code_len == $_[0]->{hlit}+$_[0]->{hdist} || die "too many codes"; my @lit_len = splice(@$code_len, 0, $_[0]->{hlit}); $_[0]->{lit} = make_huffman(\@lit_len); $_[0]->{dist} = make_huffman($code_len); delete $_[0]->{code_len}; goto &{$_[0]->{state} = \&stateReadLit}; } sub stateReadAlphaCode16 { my $code_len = $_[0]->{code_len}; my $bits = _get_bits($_[0], 2); if (!defined $bits) { # STALL return; } push @$code_len, ($code_len->[-1]) x (3+$bits); goto &{$_[0]->{state} = \&stateBuildAlphaCode}; } sub stateReadAlphaCode17 { my $code_len = $_[0]->{code_len}; my $bits = _get_bits($_[0], 3); if (!defined $bits) { # STALL return; } push @$code_len, (0) x (3+$bits); goto &{$_[0]->{state} = \&stateBuildAlphaCode}; } sub stateReadAlphaCodeOther { my $code_len = $_[0]->{code_len}; my $bits = _get_bits($_[0], 7); if (!defined $bits) { # STALL return; } push @$code_len, (0) x (11+$bits); goto &{$_[0]->{state} = \&stateBuildAlphaCode}; } sub stateReadLit { while (1) { my $lit = _get_huffman($_[0], 'lit'); if (!defined $lit) { # STALL return; } if ($lit >= 256) { if ($lit_extra[$lit -= 256] < 0) { die "Invalid literal code" if $lit; if ($_[0]->{final}) { # Finished. return 1; } # Begin the next block goto &{$_[0]->{state} = \&stateReadFinal}; } $_[0]->{litcode} = $lit; # BREAK goto &{$_[0]->{state} = \&stateGetLength}; } $_[0]->{result} .= chr $lit; # Back to the main inflation loop # goto &stateReadLit; # ie loop } } sub stateGetLength { my $lit = $_[0]->{litcode}; my $bits = _get_bits($_[0], $lit_extra[$lit]); if (!defined $bits) { # STALL return; } $_[0]->{length} = $lit_base[$lit] + ($lit_extra[$lit] && $bits); goto &{$_[0]->{state} = \&stateGetDCode}; } sub stateGetDCode { my $d = _get_huffman($_[0], 'dist'); if (!defined $d) { # STALL return; } $_[0]->{dcode} = $d; goto &{$_[0]->{state} = \&stateGetDistDecompress}; } sub stateGetDistDecompress { my $d = $_[0]->{dcode}; die "Invalid distance code" if $d >= 30; my $bits = _get_bits($_[0], $dist_extra[$d]); if (!defined $bits) { # STALL return; } my $dist = $dist_base[$d] + ($dist_extra[$d] && $bits); # Go for it my $length = $_[0]->{length}; if ($dist >= $length) { my $section = substr ($_[0]->{result}, -$dist, $length); $_[0]->{result} .= $section; } else { my $remaining = $length; while ($remaining) { my $take = $dist >= $remaining ? $remaining : $dist; $_[0]->{result} .= substr($_[0]->{result}, -$dist, $take); $remaining -= $take; } } # Back to the main inflation loop goto &{$_[0]->{state} = \&stateReadLit}; } } 1; __END__