/usr/local/CPAN/PDF-GetImages/PDF/GetImages.pm


package PDF::GetImages;
use strict;
use File::Which 'which';
use Carp;
require Exporter;
use Cwd;
use vars qw(@EXPORT_OK @ISA $WHICH_CONVERT $WHICH_PDFIMAGES $VERSION $DEBUG $errstr $FORCE_JPG @TRASH);
@ISA = qw(Exporter);
@EXPORT_OK = qw(pdfimages);
$VERSION = sprintf "%d.%02d", q$Revision: 1.17 $ =~ /(\d+)/g;
$FORCE_JPG=0;
$WHICH_CONVERT ||= which('convert');
$WHICH_PDFIMAGES ||= which('pdfimages')
   or croak( " is pdfimages (xpdf) installed? Cant get which() pdfimages");

sub errstr { $errstr = $_[0] if defined $_[0]; 1 }

sub debug { $DEBUG and print STDERR __PACKAGE__.", @_\n"; 1 }



sub pdfimages {
	my ($_abs_pdf,$_dir_out) = @_;
   defined $_abs_pdf or croak('missing argument');

   no warnings;
   debug("args: in '$_abs_pdf'");

   carp($_abs_pdf) if $DEBUG;

   my $cwd = Cwd::cwd();

   my $abs_pdf = Cwd::abs_path($_abs_pdf)
      or errstr("can't resolve location of '$_abs_pdf', cwd is $cwd")
      and return;
   
   -f $abs_pdf or errstr("ERROR: $abs_pdf is NOT on disk.") and return;


   $abs_pdf=~/(.+)\/([^\/]+)(\.pdf)$/i
      or errstr("$abs_pdf not '.pdf'?")
      and return;


   my ($abs_loc,$filename,$filename_only) = ($1,"$2$3",$2);
   
   my $_copied=0;
   if( $_dir_out ){ # did user specify a dir out to
      debug("have dir out arg '$_dir_out'.. ");
      my $dir_out = Cwd::abs_path($_dir_out) 
         or croak("cant resolve $_dir_out, should be able to, please notify PDF::GetImages AUTHOR");
      debug("have dir out '$_dir_out', resolved to $dir_out");

      if ($dir_out ne $abs_loc){
         debug("dir out not same as original file loc");
          -d $dir_out or croak("Dir out arg is not a dir $dir_out");

         require File::Copy;
         File::Copy::copy($abs_pdf,"$dir_out/$filename") 
            or croak("you specified dir out $dir_out, but we cant copy '$abs_pdf' there, $!");
         $abs_loc=$dir_out;
         $abs_pdf = "$dir_out/$filename";
         push @TRASH, $abs_pdf;
         debug("switched to use pdf copy $abs_pdf");
      }
   }

	#debug("changing dir to abs loc '$abs_loc'");
   # WHY chdir??? I think this causes problems to sub scripts etc... ????

	#chdir($abs_loc); 
   #   or carp("pdfimages() cannot chdir into $abs_loc.") 
   #   and return [];	
   # TODO this is very freaking weird.. sometimes if you call the app pdfimages with full path, it bonks out

   
   #my @args=($WHICH_PDFIMAGES, $abs_pdf, "$abs_loc/$filename_only");
   #my @args=('pdfimages', $abs_pdf, "$abs_loc/$filename_only");
   #debug("args [bin absin namespace] [@args]");   
	#system(@args) == 0


   my $cmd = "pdfimages '$abs_pdf' '$abs_loc/$filename_only'";   

   debug('cwd is '.cwd().", $cmd");

   system($cmd) == 0   
      or croak("bad args for pdfimages [$cmd]");
	#	or croak("system [@args] bad.. $?");	 # what was the problem passing an array of args?? I think 
   # there was something funny about it...


   if( @TRASH and scalar @TRASH){
      debug("had copied, deleting @TRASH");
      unlink @TRASH;
   }

	opendir(DIR, $abs_loc) 
      or croak("can't open '$abs_loc' dir, $!");
   my @ls = readdir DIR;
   debug("ls is @ls");
   my @pagefiles = map { "$abs_loc/$_" } sort grep { /$filename_only.+\.p.m$/i } @ls;

	#my @pagefiles = map { "$abs_loc/$_" } sort grep { /$filename_only.+\.p.m$/i } readdir DIR;
	closedir DIR;

   #chdir ($cwd);
   #	chdir($cwd); # go back to same place we started ??

	unless(scalar @pagefiles){
		errstr( __PACKAGE__."::pdfimages() says, no output from pdfimages for [$abs_pdf]?\n[abs loc is: $abs_loc]");
		return [];
	}

   

   if($PDF::GetImages::FORCE_JPG){
      debug("FORCE_JPG is on, converting to jpegs..");
      @pagefiles = _convert_all_to_jpg(@pagefiles);
   }
	
	return \@pagefiles;
}

sub _convert_all_to_jpg {
   my @files = map { _convert_to_jpg($_) } @_;
   return @files;
}


sub _convert_to_jpg {
   my $_abs = shift;
   my $_out = $_abs;
   $_out=~s/\.\w{1,5}$/\.jpg/ 
      or warn("cant match ext on '$_abs'") and return;


   
   system($WHICH_CONVERT, $_abs, $_out) ==0 or  die($?);
   unlink $_abs;
   debug(" converted to $_out");
   return $_out;
}


1;

# doc moved to lib/PDF/GetImages.pod