| Data-TemporaryBag documentation | Contained in the Data-TemporaryBag distribution. |
my $self = shift;
my $fn = $self->[FILENAME];
my $fh = $self->[FILEHANDLE];
if ($fh) {
seek $fh, 0, SEEK_END;
return tell($fh)- $self->[STARTPOS];
} elsif ($fn) {
return (-s $fn) - $self->[STARTPOS];
} else {
return length($self->[BUFFER]);
}
Data::TemporaryBag - Handle long size data using temporary file .
use Data::TemporaryBag;
$data = Data::TemporaryBag->new;
# add long string
$data->add('ABC' x 1000);
# You can use an overridden operator
$data .= 'DEF' x 1000;
...
$substr = $data->substr(2997, 6); # ABCDEF
Data::TemporaryBag module provides a bag object class handling long size data. The short size data are kept on memory. When the data size becomes over $Threshold size, they are saved into a temporary file internally.
Creates a bag object.
Clears $bag.
Adds $data to $bag. You can use an assignment operator '.=' instead.
Extracts a substring out of $bag. It behaves similar to CORE::substr except that it can't be an lvalue.
Creates a clone of $bag.
Gets data of $bag as a string. It is possible that the string is extremely long.
Gets length of data.
Returns if the data in $bag are defined or not.
Returns the file name if $bag is saved in a temporary file.
The threshold of the data size in kilobytes whether saved into file or not. Default is 10.
The maximum number of the opened temporary files. Default is 10.
Copyright 2001 Yasuhiro Sasama (ySas), <ysas@nmt.ne.jp>
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| Data-TemporaryBag documentation | Contained in the Data-TemporaryBag distribution. |
package Data::TemporaryBag; use strict; use Fcntl qw/:DEFAULT :seek/; use Carp; use File::Temp 'tempfile'; use overload '""' => \&value, '.=' => \&add, '=' => \&clone, fallback => 1; use constant BUFFER => 0; use constant FILENAME => 1; use constant FILEHANDLE => 2; use constant STARTPOS => 3; use constant RECENTNESS => 4; use constant FINGERPRINT => 4; use constant LENGTH => 5; our ($VERSION, $Threshold, $TempPath, $MaxOpen); $VERSION = '0.09'; $Threshold = 10; # KB #$TempPath = $::ENV{'TEMP'}||$::ENV{'TMP'}||'.'; $TempPath = ''; $MaxOpen = 10; my %OpenFiles; sub new { my $class = shift; my $self = ['']; bless $self, ref($class)||$class; $self->[LENGTH] = 0; $self->add(@_) if @_; $self; } sub clear { my $self = $_[0]; &_clear_buffer; $self->[LENGTH] = 0; } sub _clear_buffer { my $self = shift; my $fn = $self->[FILENAME]; if ($fn) { $self->_close if $self->[FILEHANDLE]; unlink $fn; @{$self}[FILENAME..FINGERPRINT] = (); } $self->[BUFFER] = ''; } sub add { my ($self, $data) = @_; my $buf = \$$self[BUFFER]; $data = '' unless defined $data; $self->[LENGTH] += CORE::length($data); if ($self->[FILENAME]) { my $fh = $self->_open; seek $fh, 0, SEEK_END; print $fh $data; } else { if (CORE::length($data) + CORE::length($$buf) > $Threshold * 1024) { my $fh = $self->_open; seek $fh, 0, SEEK_END; print $fh $$buf, $data; } else { $$buf .= $data; } } $self; } sub substr { my ($self, $pos, $size, $replace) = @_; my $len = $self->[LENGTH]; $pos = $len + $pos if $pos < 0; if (not defined $size or $size+$pos > $len) { $size = $len - $pos; } elsif ($size < 0) { $size = $len + $size; } my $rsize = defined($replace) ? CORE::length($replace) : 0; my $offset = $size - $rsize; my $newlen = $len - $offset; if ($self->[FILENAME]) { my $data; my $fh = $self->_open; my $startpos = $self->[STARTPOS]; return '' if $pos >= $len; seek($fh, $startpos+$pos, SEEK_SET); read($fh, $data, $size); if (defined $replace) { if ($offset == 0) { my $fh = $self->_open; seek($fh, $pos + $startpos, SEEK_SET); print $fh $replace; } elsif ($newlen < $Threshold * 800) { my $data1 = $self->substr(0, $pos); my $data2 = $self->substr($pos + $size); $self->_clear_buffer; $self->[BUFFER] = $data1.$replace.$data2; $self->[LENGTH] = $newlen; } elsif ($pos == 0 and $startpos >= -$offset) { $self->[STARTPOS] += $offset; if ($rsize>0) { seek($fh, $self->[STARTPOS], SEEK_SET); print $fh $replace; } } elsif ($pos+$size == $len) { seek($fh, $startpos+$pos, SEEK_SET); print $fh $replace; truncate($fh, $startpos+$newlen) if $newlen<$len; } elsif ($offset > 0) { my ($data, $pos2); if ($pos < $len - $pos - $size) { seek($fh, $startpos+$pos+$offset, SEEK_SET); print $fh $replace; _blktf_fw($fh, $startpos, $pos, $offset); $self->[STARTPOS] += $offset; } else { seek($fh, $startpos+$pos, SEEK_SET); print $fh $replace; my $start = $startpos+$pos+$size; _blktf_bw($fh, $startpos+$pos+$size, $len-$pos-$size, $offset); truncate($fh, $startpos+$newlen); } } else { my $offset = $rsize-$size; my ($data, $pos2); if ($startpos >= $offset) { _blktf_bw($fh, $startpos, $pos, $offset); seek($fh, $startpos+$pos-$offset, SEEK_SET); print $fh $replace; $self->[STARTPOS] -= $offset; } else { _blktf_fw($fh, $startpos+$pos+$size, $len-$pos-$size, $offset); seek($fh, $startpos+$pos, SEEK_SET); print $fh $replace; } } $self->[LENGTH] = $newlen; } return $data; } else { if (defined $replace) { $self->[LENGTH] = $newlen; substr($self->[BUFFER], $pos, $size, $replace); } else { substr($self->[BUFFER], $pos, $size); } } } sub _blktf_fw { my ($fh, $start, $size, $offset) = @_; my ($pos2, $data); for ($pos2 = $start + $size-1024; $pos2 > $start; $pos2-=1024) { seek($fh, $pos2, SEEK_SET); read($fh, $data, 1024); seek($fh, $pos2+$offset, SEEK_SET); print $fh $data; } seek($fh, $start, SEEK_SET); read($fh, $data, $pos2 - $start+1024); seek($fh, $start+$offset, SEEK_SET); print $fh $data; } sub _blktf_bw { my ($fh, $start, $size, $offset) = @_; my ($pos2, $data); for($pos2 = $start; $pos2 < $start+$size-1024; $pos2+=1024) { seek($fh, $pos2, SEEK_SET); read($fh, $data, 1024); seek($fh, $pos2-$offset, SEEK_SET); print $fh $data; } seek($fh, $pos2, SEEK_SET); read($fh, $data, $start+$size-$pos2); seek($fh, $pos2-$offset, SEEK_SET); print $fh $data; } sub clone { my ($self, $stream)=@_; my $size = $self->[LENGTH]; my $pos = 0; my $new = $self->new; while ($size > $pos) { $new->add($self->substr($pos, 1024)); $pos += 1024; } $new->[LENGTH] = $size; $new; } sub value { my ($self, $stream)=@_; my $size = $self->length; my $pos = 0; my $data = ''; while ($size > $pos) { $data .= $self->substr($pos, 1024); $pos += 1024; } $data; } sub length { shift->[LENGTH];
} sub defined { defined shift->[BUFFER]; } sub _open { my ($self, $mode) = @_; my ($fh, $fn); if (defined ($fh = $self->[FILEHANDLE])) { my $recent = $self->[RECENTNESS]; return $fh if $recent == 1; $self->[RECENTNESS] = 0; while(my (undef, $obj) = each %OpenFiles) { if ($obj->[RECENTNESS] <= $recent) { $obj->[RECENTNESS]++; } } return $fh; } if (defined ($fn = $self->[FILENAME])) { croak "TemporaryBag object seems to be collapsed " if (!-e $fn) or (!-f _); sysopen($fh, $fn, O_RDWR) or croak "TemporaryBag object seems to be collapsed OP"; croak "TemporaryBag object seems to be collapsed " if (-l $fn); binmode $fh; $self->[FILEHANDLE] = $fh; $self->_check_fingerprint or croak "TemporaryBag object seems to be collapsed CH"; } else { ($fh, $fn) = tempfile(); $self->[STARTPOS] = 0; croak "TemporaryBag object seems to be collapsed CR" unless defined $fh; binmode $fh; $self->[FILEHANDLE] = $fh; $self->[FILENAME] = $fn; } while(my (undef, $obj) = each %OpenFiles) { ++$obj->[RECENTNESS]; } if (keys %OpenFiles >= $MaxOpen) { my $to_close; while(my (undef, $obj) = each %OpenFiles) { if ($obj->[RECENTNESS] > $MaxOpen) { $to_close = $obj; last; } } $to_close->_close; } $self->[RECENTNESS] = 1; $OpenFiles{overload::StrVal($self)} = $self; return $fh; } sub _close { my $self = shift; my $recent = $self->[RECENTNESS]; my $fh = $self->[FILEHANDLE]; my $i; delete $OpenFiles{overload::StrVal($self)}; while(my (undef, $obj) = each %OpenFiles) { if (defined $obj and $obj->[RECENTNESS] > $recent) { $obj->[RECENTNESS]--; } } $self->_set_fingerprint; undef $self->[FILEHANDLE]; close $fh or croak "TemporaryBag object seems to be collapsed CL"; } sub is_saved { return shift->[FILENAME]; } sub _set_fingerprint { my $self = shift; my $fingerprint; my $fh = $self->[FILEHANDLE]; seek $fh, 0, SEEK_END; my $range = tell($fh) - $self->[STARTPOS] - 1024; for (1..3) { my $r = int(rand($range))+1024; my $data; seek $fh, -$r, SEEK_END; read($fh, $data, 1024); $fingerprint .= "[$r]".unpack('%32C*',$data); } $self->[FINGERPRINT] = $fingerprint; } sub _check_fingerprint { my $self = shift; my $fh = $self->[FILEHANDLE]; my $fingerprint = $self->[FINGERPRINT]; my $flag = 1; while($fingerprint=~/\[([^]]+)\]([^[]+)/g) { my $pos = $1; my $sum = $2; my $data; seek $fh, -$pos, SEEK_END; read($fh, $data, 1024); $flag &&= (unpack('%32C*',$data) == $sum); } return $flag; } sub DESTROY { my $self = shift; # close $self->[FILEHANDLE] if defined $self->[FILEHANDLE]; $self->_close if defined $self->[FILEHANDLE]; unlink $self->[FILENAME] if defined $self->[FILENAME]; } 1; __END__