File::Grep - Find matches to a pattern in a series of files and related


File-Grep documentation Contained in the File-Grep distribution.

Index


Code Index:

NAME

Top

File::Grep - Find matches to a pattern in a series of files and related functions

SYNOPSIS

Top

  use File::Grep qw( fgrep fmap fdo );

  # Void context
  if ( fgrep { /$user/ } "/etc/passwd" ) { do_something(); }

  # Scalar context
  print "The index page was hit ",
	( fgrep { /index\.html/ } glob "/var/log/httpd/access.log.*"),
	" times\n";

  # Array context
  my @matches = fgrep { /index\.html } glob "/var/log/httpd/access.log.*";
  print SUMMARY $_ foreach @matches;

  # Mapping
  my @lower = fmap { chomp; lc; } glob "/var/log/httpd/access.log.*";

  # Foreach style..
  my $count;
  fdo { $count++ } @filelist;
  print "Total lines: $count\n";

  # More complex handling
  my @matchcount;
  fdo { my ( $file, $pos, $line ) = @_;
        $matchcount[$file]++ if ( $line =~ /keyword/ );
      } @filelist;




DESCRIPTION

Top

File::Grep mimics the functionality of the grep function in perl, but applying it to files instead of a list. This is similar in nature to the UNIX grep command, but more powerful as the pattern can be any legal perl function.

The main functions provided by this module are:

fgrep BLOCK LIST

Performs a grep operation on the files in LIST, using BLOCK as the critiria for accepting a line or not. Any lines that match will be added to an array that will be returned to the caller. Note that in void context, this function will immediate return true on the first match, false otherwise, and in scalar context, it will only return the number of matches.

When entering BLOCK, the $_ variable will be localized to the current line. In addition, you will be given the position in LIST of the current file, the line number in that file, and the line itself as arguments to this function. While you can change $_ if necessary, only the original value of the line will be added to the returned list. If you need to get the modified value, use fmap (described below).

The LIST can contain either scalars or filehandle (or filehandle-like objects). If the item is a scalar, it will be attempted to be opened and read in as normal. Otherwise it will be treated as a filehandle. Any errors resulting from IO may be reported to STDERR by setting the class variable, $File::Grep::SILENT to false; otherwise, no error indication is given.

fmap BLOCK LIST

Performs a map operation on the files in LIST, using BLOCK as the mapping function. The results from BLOCK will be appended to the list that is returned at the end of the call.

fdo BLOCK LIST

Performs the equivalent of a foreach operation on the files in LIST, performing BLOCK for each line in each file. This function has no return value. If you need to specialize more than what fgrep or fmap offer, you can use this function.

In addition, if you need additional fine control, you can use the internal function _fgrep_process. This is called just like fgrep/fmap/fdo, as in "_fgrep_process BLOCK LIST" except that you can control when the fucntion 'short circuits' by the return value from BLOCK. If, after processing a line, the BLOCK returns a negative number, the entire process is aborted, closing any open filehandles that were opened by the function. If the return value is 0, the current file is aborted, closed if opened by the function and the next file is then searched. A positive return value will simply go on to the next line as appropriate.

EXPORT

Top

"fgrep", "fmap", and "fdo" may be exported, but these are not set by default.

AUTHOR

Top

Michael K. Neylon, <mneylon-pm@masemware.com>

SEE ALSO

Top

perl.


File-Grep documentation Contained in the File-Grep distribution.

#!/usr/bin/perl -w

package File::Grep;

use strict;
use Carp;

BEGIN {
  use Exporter   ();
  use vars       qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  $VERSION     = sprintf( "%d.%02d", q( $Revision: 0.02 $ ) =~ /\s(\d+)\.(\d+)/ );
  @ISA         = qw(Exporter);
  @EXPORT      = qw();
  @EXPORT_OK   = qw( fgrep fmap fdo );
  %EXPORT_TAGS = (  );
}

# Remain silent on bad files, else shoutout.
our $SILENT = 1;

# Internal function; does the actual walk through the files, and calls 
# out to the coderef to do the work for each line.  This gives me a bit
# more flexibility with the end interface

sub _fgrep_process {
  my ( $closure, @files ) = @_;
  my $openfile = 0;
  my $abort = 0;
  my $i = 0;
  foreach my $file ( @files ) {
    my $fh;
    if ( UNIVERSAL::isa( \$file, "SCALAR" ) ) {
      # If it's a scalar, assume it's a file and open it
      open FILE, "$file" or 
	( !$SILENT and carp "Cannot open file '$file' for fgrep: $!" ) 
	  and next;
      $fh = \*FILE;
      $openfile = 1;
    } else {
      # Otherwise, we will assume it's a legit filehandle.  
      # If something's
      # amiss, we'll catch it at <> below.
      $fh = $file;
      $openfile = 0;
    }
    my $line;
    eval { $line = <$fh> };
    # Fix for perl5.8 - thanks to Benjamin Kram
    if ( $@ ) {
      !$SILENT and carp "Cannot use file '$file' for fgrep: $@";
      last;
    } else {
      while ( defined( $line ) ) {
	my $state = &$closure( $i, $., $line );
	if ( $state < 0 ) { 
	  # If need to shut down whole process...
	  $abort = 1;
	  last; # while!
	} elsif ( $state == 0 ) {
	  # If need to shut down just this file...
	  $abort = 0;
	  last; # while!
	}
	$line = <$fh>;
      }
    }
    if ( $openfile ) { close $fh; }
    last if ( $abort );  # Fileloop...
    $i++; # Increment counter
  }
  return;
}

sub fgrep (&@) {
  my ( $coderef, @files ) = @_;
  if ( wantarray ) {
    my @matches = map { { filename => $_,
			 count => 0,
			   matches => { } } } @files;
    my $sub = sub { 
      my ( $file, $pos, $line ) = @_;
      local $_ = $line;
      if ( &$coderef( $file, $pos, $_ ) ) { 
	$matches[$file]->{ count }++;
	$matches[$file]->{ matches }->{ $pos } = $line;
      } 
      return 1;
    };

    _fgrep_process( $sub, @files );
    return @matches;

  } elsif ( defined( wantarray ) ) {
    my $count = 0;
    my $sub = sub {
      my ( $file, $pos, $line ) = @_;
      local $_ = $line;
      if ( &$coderef( $file, $pos, $_ ) ) { $count++ };
      return 1;
    };
    
    _fgrep_process( $sub, @files );
    return $count;
  } else {
    my $found = 0;
    my $sub = sub {
      my ( $file, $pos, $line ) = @_;
      local $_ = $line;
      if ( &$coderef( $file, $pos, $_ ) ) 
	{ $found=1; return -1; } 
      else 
	{ return 1; }
    };
    _fgrep_process( $sub, @files );
    return $found;
  }
}

sub fgrep_flat (&@) {
  my ( $coderef, @files ) = @_;
  my @matches;
  my $sub = sub {
    my ( $file, $pos, $line ) = @_;
    local $_ = $line;
    if ( &$coderef( $file, $pos, $_ ) ) {
      push @matches, $line;
      return 1;
    }
  };
  _fgrep_process( $sub, @files );
  return @matches;
}

sub fgrep_into ( &$@ ) {
  my ( $coderef, $arrayref, @files ) = @_;
  my $sub = sub {
    my ( $file, $pos, $line ) = @_;
    local $_ = $line;
    if ( &$coderef( $file, $pos, $_ ) ) {
      push @$arrayref, $line;
      return 1;
    }
  };
  _fgrep_process( $sub, @files );
  return $arrayref;
}

sub fmap (&@) {
  my ( $mapper, @files ) = @_;

  my @mapped;
  my $sub = sub {
    my ( $file, $pos, $line ) = @_;
    local $_ = $line;
    push @mapped, &$mapper( $file, $pos, $_ );
    return 1;
  };
  _fgrep_process( $sub, @files );
  return @mapped;
}

sub fdo (&@) {
  my ( $doer, @files ) = @_;
  my $sub = sub {
    my ( $file, $pos, $line ) = @_;
    local $_ = $line;
    &$doer( $file, $pos, $_ );
    return 1;
  };
  _fgrep_process( $sub, @files );
}

1;
__END__