CPAN::Mini::Devel


CPAN-Mini-Devel documentation Contained in the CPAN-Mini-Devel distribution.

Index


Code Index:


CPAN-Mini-Devel documentation Contained in the CPAN-Mini-Devel distribution.

package CPAN::Mini::Devel;
use 5.006;
use strict;
use warnings;
our $VERSION = '0.03'; 
$VERSION = eval $VERSION; ## no critic

use Config;
use CPAN::Mini;
use CPAN (); 
use CPAN::Tarzip;
use CPAN::HandleConfig;
use File::Temp 0.20;
use File::Spec;
use File::Path ();
use File::Basename qw/basename/;

our @ISA = 'CPAN::Mini';

#--------------------------------------------------------------------------#
# globals
#--------------------------------------------------------------------------#

my $tmp_dir = File::Temp->newdir( 'CPAN-Mini-Devel-XXXXXXX', 
    DIR => File::Spec->tmpdir,
);

#--------------------------------------------------------------------------#
# Extend index methods to miror find-ls.gz
#--------------------------------------------------------------------------#

my $index_file = 'indices/find-ls.gz';

sub _fixed_mirrors {
    my $self = shift;
    return ($index_file, $self->SUPER::_fixed_mirrors);
}

#--------------------------------------------------------------------------#
# Replace _get_mirror_list to add developer versions
#--------------------------------------------------------------------------#

sub _get_mirror_list {
	my $self  = shift;

    ## CPAN::Mini::Devel addition using find-ls.gz
    my $file_ls =  File::Spec->catfile(
        $self->{scratch},
        qw(indices find-ls.gz)
    );

    my $packages = File::Spec->catfile(
        $self->{scratch},
        qw(modules 02packages.details.txt.gz)
    );
    
    return $self->_parse_module_index( $packages, $file_ls );
}

#--------------------------------------------------------------------------#
# private variables and functions
#--------------------------------------------------------------------------#

my $module_index_re = qr{
        ^\s href="\.\./authors/id/./../    # skip prelude 
        ([^"]+)                     # capture to next dquote mark
        .+? </a>                    # skip to end of hyperlink
        \s+                         # skip spaces
        \S+                         # skip size
        \s+                         # skip spaces
        (\S+)                       # capture day
        \s+                         # skip spaces
        (\S+)                       # capture month 
        \s+                         # skip spaces
        (\S+)                       # capture year
}xms; 

my %months = ( 
    Jan => '01', Feb => '02', Mar => '03', Apr => '04', May => '05',
    Jun => '06', Jul => '07', Aug => '08', Sep => '09', Oct => '10',
    Nov => '11', Dec => '12'
);

# standard regexes
# note on archive suffixes -- .pm.gz shows up in 02packagesf
my %re = (
    perls => qr{(?:
		  		  /(?:emb|syb|bio)?perl-\d 
				| /(?:parrot|ponie|kurila|Perl6-Pugs)-\d 
				| /perl-?5\.004 
				| /perl_mlb\.zip 
        )}xi,
    archive => qr{\.(?:tar\.(?:bz2|gz|Z)|t(?:gz|bz)|(?<!ppm\.)zip|pm.gz)$}i,
    target_dir => qr{
                ^(?:
                        modules/by-module/[^/]+/./../ | 
                        modules/by-module/[^/]+/ | 
                        modules/by-category/[^/]+/[^/]+/./../ | 
                        modules/by-category/[^/]+/[^/]+/ | 
                        authors/id/./../ 
                )
        }x,
    leading_initials => qr{(.)/\1./},
);

# match version and suffix
$re{version_suffix} = qr{([-._]v?[0-9].*)?($re{archive})};

# split into "AUTHOR/Name" and "Version"
$re{split_them} = qr{^(.+?)$re{version_suffix}$};

# matches "AUTHOR/tarball.suffix" or AUTHOR/modules/tarball.suffix
# and not other "AUTHOR/subdir/whatever"

# Just get AUTHOR/tarball.suffix from whatever file name is passed in
sub _get_base_id { 
    my $file = shift;
    my $base_id = $file;
    $base_id =~ s{$re{target_dir}}{};
    return $base_id;
}

sub _base_name {
    my ($base_id) = @_;
    my $base_file = basename $base_id;
    my ($base_name, $base_version) = $base_file =~ $re{split_them};
    return $base_name;
}

#--------------------------------------------------------------------------#
# _parse_module_index
#
# parse index and return array_ref of distributions in reverse date order
#--------------------------------------------------------------------------#-

sub _parse_module_index {
    my ($self, $packages, $file_ls ) = @_;

	# first walk the packages list
    # and build an index

    my (%valid_bases, %valid_distros, %mirror);
    my (%latest, %latest_dev);

    my $gz = Compress::Zlib::gzopen($packages, "rb")
        or die "Cannot open package list: $Compress::Zlib::gzerrno";

    $self->trace( "Scanning 02packages.details ...\n" );
    my $inheader = 1;
    while ($gz->gzreadline($_) > 0) {
        if ($inheader) {
            $inheader = 0 unless /\S/;
            next;
        }

        my ($module, $version, $path) = split;
        next if $self->_filter_module({
                module  => $module,
                version => $version,
                path    => $path,
            });
        
        my $base_id = _get_base_id("authors/id/$path");
        $valid_distros{$base_id}++;
        my $base_name = _base_name( $base_id );
        if ($base_name) {
            $latest{$base_name} = {
                datetime => 0,
                base_id => $base_id
            };
        }
    }

#    use DDS;
#    $self->trace("Distros\n");
#    Dump \%valid_distros;
#    $self->trace("Bases\n");
#    Dump \%valid_bases;

    # next walk the find-ls file
    local *FH;
    tie *FH, 'CPAN::Tarzip', $file_ls;

    $self->trace( "Scanning find-ls ...\n" );
    while ( defined ( my $line = <FH> ) ) {
        my %stat;
        @stat{qw/inode blocks perms links owner group size datetime name linkname/}
            = split q{ }, $line;
        
        unless ($stat{name} && $stat{perms} && $stat{datetime}) {
            $self->trace("Couldn't parse '$line' \n");
            next;
        }
        # skip directories, symlinks and things that aren't a tarball
        next if $stat{perms} eq "l" || substr($stat{perms},0,1) eq "d";
        next unless $stat{name} =~ $re{target_dir};
        next unless $stat{name} =~ $re{archive};

        # skip if not AUTHOR/tarball 
        # skip perls
        my $base_id = _get_base_id($stat{name});
        next unless $base_id; 
        
        next if $base_id =~ $re{perls};

        my $base_name = _base_name( $base_id );

        # if $base_id matches 02packages, then it is the latest version
        # and we definitely want it; also update datetime from the initial
        # assumption of 0
        if ( $valid_distros{$base_id} ) {
            $mirror{$base_id} = $stat{datetime};
            next unless $base_name;
            if ( $stat{datetime} > $latest{$base_name}{datetime} ) {
                $latest{$base_name} = { 
                    datetime => $stat{datetime}, 
                    base_id => $base_id
                };
            }
        }
        # if not in the packages file, we only want it if it resembles 
        # something in the package file and we only the most recent one
        else {
            # skip if couldn't parse out the name without version number
            next unless defined $base_name;

            # skip unless there's a matching base from the packages file
            next unless $latest{$base_name};

            # keep only the latest
            $latest_dev{$base_name} ||= { datetime => 0 };
            if ( $stat{datetime} > $latest_dev{$base_name}{datetime} ) {
                $latest_dev{$base_name} = { 
                    datetime => $stat{datetime}, 
                    base_id => $base_id
                };
            }
        }
    }

    # pick up anything from packages that wasn't found find-ls
    for my $name ( keys %latest ) {
        my $base_id = $latest{$name}{base_id};
        $mirror{$base_id} = $latest{$name}{datetime} unless $mirror{$base_id};
    }
          
    # for dev versions, it must be newer than the latest version of
    # the same base name from the packages file

    for my $name ( keys %latest_dev ) {
        if ( ! $latest{$name} ) {
            $self->trace( "Shouldn't be missing '$name' matching '$latest_dev{$name}{base_id}'\n" );
            next;
        }
        next if $latest{$name}{datetime} > $latest_dev{$name}{datetime};
        $mirror{ $latest_dev{$name}{base_id} } = $latest_dev{$name}{datetime} 
    }

    my $mirror_list =
        [ sort map { s{^(((.).).+)$}{authors/id/$3/$2/$1}; $_ } keys %mirror ];

    return $mirror_list;
}

1; #modules must return true

__END__

#--------------------------------------------------------------------------#
# pod documentation 
#--------------------------------------------------------------------------#