| ApacheLog-Parser documentation | Contained in the ApacheLog-Parser distribution. |
ApacheLog::Parser::SkipList - a list of skippable lines
my $skipper = ApacheLog::Parser::SkipList->new;
my %regexps = $skipper->set_config(\%conf);
# you'll typically build %regexps into some condition sub
my $sw = $skipper->new_writer($skipfile);
my $counter = 0;
while(...) {
...
$counter++;
$some_condition and $sw->skip($counter);
}
Later, while reading a file with a prepared skiplist:
my $skipper = ApacheLog::Parser::SkipList->new;
$skipper->set_config(\%conf);
my $sr = $skipper->new_reader($skipfile);
my $skip = $sr->next_skip;
my $counter = 0;
while(my $line = <$fh>) {
if(++$counter == $skip) {
$counter += $sr->skip_lines($fh);
$skip = $sr->next_skip;
next;
}
# then do more expensive stuff
...
}
my $skipper = ApacheLog::Parser::SkipList->new;
my %regexps = $skipper->set_config(\%conf);
my $subref = $skipper->get_matcher;
Merge existing files (adjusting the offsets.)
$skipper->merge($dest, $file, $offset, $file);
my $sw = $skipper->new_writer($skipfile);
my $sr = $skipper->new_reader($skipfile);
Eric Wilhelm @ <ewilhelm at cpan dot org>
http://scratchcomputing.com/
If you found this module on CPAN, please report any bugs or feature requests through the web interface at http://rt.cpan.org. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.
If you pulled this development version from my /svn/, please contact me directly.
Copyright (C) 2007 Eric L. Wilhelm, All Rights Reserved.
Absolutely, positively NO WARRANTY, neither express or implied, is offered with this software. You use this software at your own risk. In case of loss, no person or entity owes you anything whatsoever. You have been warned.
This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| ApacheLog-Parser documentation | Contained in the ApacheLog-Parser distribution. |
package ApacheLog::Parser::SkipList; $VERSION = v0.0.1; use warnings; use strict; use Carp; use Digest::MD5 (); use YAML ();
sub new { my $package = shift; my $class = ref($package) || $package; my $self = {}; bless($self, $class); return($self); } # end subroutine new definition ########################################################################
sub set_config { my $self = shift; my ($conf) = (@_); my $handle = { file => { ext => sub { my $s = join('|', @_); return(qr/\.(?:$s)$/); }, path => sub { my $s = join('|', @_); return(qr/^(?:$s)/); }, }, }; my %reg; foreach my $k (keys(%$conf)) { my $ref = $handle->{$k}; my @ans; foreach my $bit (sort({$b cmp $a} keys(%{$conf->{$k}}))) { $ref->{$bit} or croak("no handler for $k/$bit config"); my $list = $conf->{$k}{$bit}; push(@ans, $ref->{$bit}->(@$list)); } if(@ans) { my $s = join("|", @ans); $reg{$k} = qr/(?:$s)/; } } $self->{config} = Digest::MD5::md5_hex(YAML::Dump($conf)); $self->{regexps} = \%reg; return(%reg); } # end subroutine set_config definition ########################################################################
sub get_matcher { my $self = shift; my %re = %{$self->{regexps}}; my $code = ''; foreach my $type (qw(file)) { $re{$type} or next; $code .= "(\$v->[$type] =~ m#$re{$type}#) and return(1);"; } #die "compiling $code"; my $doskip = eval(" use ApacheLog::Parser qw(:fields); my \$code = sub {my \$v = shift; $code}; no ApacheLog::Parser; \$code"); $@ and die "cannot build doskip sub -- $@"; return($doskip); } # end subroutine get_matcher definition ########################################################################
use constant flag => 2**31; sub merge { my $self = shift; my ($dest, @parts) = @_; my $outfh = $self->_open_write($dest); # just slurp the entire first bit my $first_part = shift(@parts); { my $fh = $self->_open_read($first_part); print $outfh readline($fh); } while(@parts) { my ($offset, $part) = (shift(@parts), shift(@parts)); my $fh = $self->_open_read($part); while(not eof($fh)) { my $v; (read($fh, $v, 4) == 4) or die "gah"; my $n = unpack("N", $v); # if it is flagged, there's another byte if($n & flag) { my $val; (read($fh, $val, 4) == 4) or die "gah"; $n &= ~flag; # de-mangle it $n += $offset; $n |= flag; # re-mangle print $outfh pack('N', $n), $val; } else { $n += $offset; print $outfh pack('N', $n); } } } } # end subroutine merge definition ########################################################################
sub new_writer { my $self = shift; my ($filename) = @_; my $fh = $self->_open_write($filename); my $writer = __PACKAGE__ . '::Writer'; return($writer->new($fh)); } # end subroutine new_writer definition ######################################################################## sub _open_write { my $self = shift; my ($filename) = @_; my $conf_check = $self->{config} or croak("cannot make a writer without a config"); open(my $fh, '>', $filename) or croak("cannot open '$filename' for writing $!"); print $fh $conf_check; return($fh); }
sub new_reader { my $self = shift; my ($filename) = @_; my $fh = $self->_open_read($filename); my $reader = __PACKAGE__ . '::Reader'; return($reader->new($fh)); } # end subroutine new_reader definition ######################################################################## sub _open_read { my $self = shift; my ($filename) = @_; my $conf_check = $self->{config} or croak("cannot make a reader without a config"); open(my $fh, '<', $filename) or croak("cannot open '$filename' for reading $!"); my $verify; my $ok = read($fh, $verify, 32); (($ok||0) == 32) or croak("read error on $filename ", (defined($ok) ? 'eof' : $!)); ($verify eq $conf_check) or croak("the config has changed since this skiplist was created\n", " '$verify' vs '$conf_check'"); return($fh); } { package ApacheLog::Parser::SkipList::Base; sub new { my $package = shift; my ($fh) = @_; my $class = ref($package) || $package; my $self = [$fh, 0, -1]; bless($self, $class); return($self); } } { package ApacheLog::Parser::SkipList::Writer; use Carp; our @ISA = qw(ApacheLog::Parser::SkipList::Base); use constant flag => 2**31; sub skip { my $self = shift; my ($l) = @_; my $fh = $self->[0]; if($l == $self->[2]+1) { # contiguous $self->[2] = $l; } else { # write-out if(my $num = $self->[1]) { if(my $span = $self->[2] - $num) { $num |= flag; print $fh pack('N2', $num, $span); } else { print $fh pack('N', $num); } } $self->[1] = $self->[2] = $l; } } sub DESTROY { close($_[0]->[0]) or croak("close file failed $!"); @{$_[0]} = (); } } { package ApacheLog::Parser::SkipList::Reader; use Carp; our @ISA = qw(ApacheLog::Parser::SkipList::Base); use constant flag => 2**31; # return the next skip value and setup the line counter sub next_skip { my $self = shift; my $fh = $self->[0]; eof($fh) and return(0); my $v; (read($fh, $v, 4) == 4) or die "gah"; my $n = unpack("N", $v); # if it is flagged, there's another byte if($n & flag) { my $val; (read($fh, $val, 4) == 4) or die "gah"; my $more = unpack("N", $val); $self->[1] = $more; $n &= ~flag; # de-mangle it } else { # a single line $self->[1] = 0; } return($self->[2] = $n); } sub skip_lines { my ($self, $fh) = @_; my $n = $self->[1] or return(0); my $q = 0; while(<$fh>) { (++$q >= $n) and return($n); } croak("eof while skipping"); } }
# vi:ts=2:sw=2:et:sta 1;