ORDB::CPANMeta::Generator - Generator for the CPAN Meta database


ORDB-CPANMeta-Generator documentation Contained in the ORDB-CPANMeta-Generator distribution.

Index


Code Index:

NAME

Top

ORDB::CPANMeta::Generator - Generator for the CPAN Meta database

DESCRIPTION

Top

This is the module that is used to generate the "CPAN Meta" database.

For more information, and to access this database as a consumer, see the ORDB::CPANMeta module.

The bulk of the work done in this module is actually achieved with:

CPAN::Mini - Fetching the index and dist tarballs

CPAN::Mini::Visit - Expanding and processing the tarballs

Xtract - Preparing the SQLite database for distribution

METHODS

Top

new

The new constructor creates a new processor/generator.

dir

The dir method returns the directory that the SQLite database will be written into.

dsn

The dsn method returns the DBI DSN that is used to connect to the generated database.

run

The run method executes the process that will produce and fill the final database.

SUPPORT

Top

Bugs should be reported via the CPAN bug tracker at

http://rt.cpan.org/NoAuth/ReportBug.html?Queue=ORDB-CPANMeta-Generator

For other issues, contact the author.

AUTHOR

Top

Adam Kennedy <adamk@cpan.org>

COPYRIGHT

Top


ORDB-CPANMeta-Generator documentation Contained in the ORDB-CPANMeta-Generator distribution.
package ORDB::CPANMeta::Generator;

use 5.008005;
use strict;
use Carp                     ();
use File::Spec          3.29 ();
use File::Path          2.07 ();
use File::Remove        1.42 ();
use File::HomeDir       0.86 ();
use File::Basename         0 ();
use Module::CoreList    2.46 ();
use Parse::CPAN::Meta 1.4200 ();
use Params::Util        1.00 ();
use Getopt::Long        2.34 ();
use DBI                1.609 ();
use CPAN::Mini         0.576 ();
use CPAN::Mini::Visit   0.11 ();
use Xtract::Publish     0.12 ();

our $VERSION = '0.11';

use Object::Tiny 1.06 qw{
	minicpan
	sqlite
	publish
	visit
	trace
	delta
	prefer_bin
	warnings
	dbh
};





######################################################################
# Constructor and Accessors

sub new {
	my $self = shift->SUPER::new(@_);

	# Set the default path to the database
	unless ( defined $self->sqlite ) {
		$self->{sqlite} = File::Spec->catdir(
			File::HomeDir->my_data,
			($^O eq 'MSWin32' ? 'Perl' : '.perl'),
			'ORDB-CPANMeta-Generator',
			'metadb.sqlite',
		);
	}

	# Set the default path to the publishing location
	unless ( exists $self->{publish} ) {
		$self->{publish} = 'cpanmeta';
	}

	return $self;
}

sub dir {
	File::Basename::dirname($_[0]->sqlite);
}

sub dsn {
	"DBI:SQLite:" . $_[0]->sqlite
}





######################################################################
# Main Methods

sub run {
	my $self = shift;

	# Normalise
	$self->{prefer_bin} = $self->prefer_bin ? 1 : 0;

	# Create the output directory
	File::Path::make_path($self->dir);
	unless ( -d $self->dir ) {
		Carp::croak("Failed to create '" . $self->dir . "'");
	}

	# Clear the database if it already exists
	unless ( $self->delta ) {
		if ( -f $self->sqlite ) {
			File::Remove::remove($self->sqlite);
		}
		if ( -f $self->sqlite ) {
			Carp::croak("Failed to clear " . $self->sqlite);
		}
	}

	# Update the minicpan if needed
	if ( Params::Util::_HASH($self->minicpan) ) {
		CPAN::Mini->update_mirror(
			trace         => $self->trace,
			no_conn_cache => 1,
			%{$self->minicpan},
		);
		$self->{minicpan} = $self->minicpan->{local};
	}

	# Connect to the database
	my $dbh = DBI->connect($self->dsn);
	unless ( $dbh ) {
		Carp::croak("connect: \$DBI::errstr");
	}

	# Create the tables
	$dbh->do(<<'END_SQL');
CREATE TABLE IF NOT EXISTS meta_distribution (
	release TEXT NOT NULL,
	meta INTEGER,
	meta_name TEXT,
	meta_version TEXT,
	meta_abstract TEXT,
	meta_generated TEXT,
	meta_from TEXT,
	meta_license TEXT
);
END_SQL

	$dbh->do(<<'END_SQL');
CREATE TABLE IF NOT EXISTS meta_dependency (
	release TEXT NOT NULL,
	module TEXT NOT NULL,
	version TEXT NULL,
	phase TEXT NOT NULL,
	core REAL NULL
)
END_SQL

	### NOTE: This does nothing right now but will later.
	# Build the index of seen archives.
	# While building the index, remove entries
	# that are no longer in the minicpan.
	my $ignore = undef;
	if ( $self->delta ) {
		$dbh->begin_work;
		my %seen  = ();
		my $dists = $dbh->selectcol_arrayref(
			'SELECT DISTINCT release FROM meta_distribution'
		);
		foreach my $dist ( @$dists ) {
			my $one  = substr($dist, 0, 1);
			my $two  = substr($dist, 0, 2);
			my $path = File::Spec->catfile(
				$self->minicpan,
				'authors', 'id',
				$one, $two,
				split /\//, $dist,
			);
			if ( -f $path ) {
				# Add to the ignore list
				$seen{"$one/$two/$dist"} = 1;
				next;
			}

			# Clear the release from the database
			$dbh->do(
				'DELETE FROM meta_distribution WHERE release = ?',
				{}, $dist,
			);
		}
		$dbh->do(
			'DELETE FROM meta_dependency WHERE release NOT IN '
			. '( SELECT release FROM meta_distribution )',
		);
		$dbh->commit;

		# NOW we need to start ignoring something
		$ignore = [ sub { $seen{$_[0]} } ];
	}

	# Clear indexes for speed
	$self->drop_indexes( $dbh );

	# Run the visitor to generate the database
	$dbh->begin_work;
	my @meta_dist = ();
	my @meta_deps = ();
	my $visitor   = CPAN::Mini::Visit->new(
		acme       => 1,
		warnings   => $self->warnings,
		minicpan   => $self->minicpan,
		# This does nothing now but will later
		ignore     => $ignore,
		prefer_bin => $self->prefer_bin,
		callback   => sub {
			print STDERR "$_[0]->{dist}\n" if $self->trace;
			my $the  = shift;
			my @deps = ();
			my $dist = {
				release => $the->{dist},
				meta    => 0,
			};
			my $yaml_file = File::Spec->catfile(
				$the->{tempdir}, 'META.yml',
			);
			my $json_file = File::Spec->catfile(
				$the->{tempdir}, 'META.json',
			);
			my @data = ();
			if ( -f $json_file ) {
				@data = eval {
					Parse::CPAN::Meta->load_file($json_file)
				};
			} elsif ( -f $yaml_file ) {
				@data = eval {
					Parse::CPAN::Meta->load_file($yaml_file)
				};
			}
			unless ( $@ ) {
				$dist->{meta}           = 1;
				$dist->{meta_name}      = $data[0]->{name};
				$dist->{meta_version}   = $data[0]->{version};
				$dist->{meta_abstract}  = $data[0]->{abstract};
				$dist->{meta_generated} = $data[0]->{generated_by};
				$dist->{meta_from}      = $data[0]->{version_from};
				$dist->{meta_license}   = $data[0]->{license},

				# Configure-time dependencies
				my $configure = $data[0]->{configure_requires} || {};
				$configure = {
					$configure => 0,
				} unless ref $configure;
				push @deps, map { +{
					release => $the->{dist},
					phase   => 'configure',
					module  => $_,
					version => $configure->{$_},
				} } sort keys %$configure;

				# Build-time dependencies
				my $build = $data[0]->{build_requires} || {};
				$build = {
					$build => 0,
				} unless ref $build;
				push @deps, map { +{
					release => $the->{dist},
					phase   => 'build',
					module  => $_,
					version => $build->{$_},
				} } sort keys %$build;

				# Run-time dependencies
				my $requires = $data[0]->{requires} || {};
				$requires = {
					$requires => 0,
				} unless ref $requires;
				push @deps, map { +{
					release => $the->{dist},
					phase   => 'runtime',
					module  => $_,
					version => $requires->{$_},
				} } sort keys %$requires;
			}
			$dbh->do(
				'INSERT INTO meta_distribution VALUES ( ?, ?, ?, ?, ?, ?, ?, ? )', {},
				$dist->{release},
				$dist->{meta},
				$dist->{meta_name},
				$dist->{meta_version},
				$dist->{meta_abstract},
				$dist->{meta_generated},
				$dist->{meta_from},
				$dist->{meta_license},
			);
			$dbh->do(
				'INSERT INTO meta_dependency VALUES ( ?, ?, ?, ?, ? )', {},
				$_->{release},
				$_->{module},
				$_->{version},
				$_->{phase},
				$_->{module} eq 'perl'
					? $_->{version}
					: scalar Module::CoreList->first_release(
						$_->{module}, $_->{version},
					),
			) foreach @deps;
			unless ( $the->{counter} % 100 ) {
				$dbh->commit;
				$dbh->begin_work;
			}
		},
	);
	$visitor->run;
	$dbh->commit;

	# Generate the indexes
	$self->create_indexes( $dbh );

	# Clean and optimise the database
	$dbh->do('PRAGMA user_version = 10');
	$dbh->do('VACUUM');
	$dbh->do('ANALYZE main');

	# Publish the database to the current directory
	if ( defined $self->publish ) {
		print STDERR "Publishing the generated database...\n" if $self->trace;
		Xtract::Publish->new(
			from   => $self->sqlite,
			sqlite => $self->publish,
			trace  => $self->trace,
			raw    => 0,
			gz     => 1,
			bz2    => 1,
			lz     => 1,
		)->run;
	}

	return 1;
}





######################################################################
# Index Management

use constant INDEX => (
	[ 'meta_distribution', 'release' ],
	[ 'meta_dependency',   'release' ],
	[ 'meta_dependency',   'phase'   ],
	[ 'meta_dependency',   'module'  ],
);

sub drop_indexes {
	my $self = shift;
	my $dbh  = shift;
	foreach my $i ( INDEX ) {
		$dbh->do("DROP INDEX IF EXISTS $i->[0]__$i->[1]");
	}
	return 1;
}

sub create_indexes {
	my $self = shift;
	my $dbh  = shift;
	foreach my $i ( INDEX ) {
		$self->create_index( $dbh, @$i );
	}
	return 1;
}

sub create_index {
	$_[1]->do("CREATE INDEX IF NOT EXISTS $_[2]__$_[3] on $_[2] ( $_[3] )");
}

1;