| Daizu documentation | Contained in the Daizu distribution. |
Daizu::Revision - functions for loading revisions from Subversion
These functions are used to load metadata about revisions (and the file path changes made in them) from the Subversion into the PostgreSQL database.
The following functions are available for export from this module. None of them are exported by default.
Load information about new revisions, up to $desired_revnum.
It starts from the revision after the last one which was loaded, and
is idempotent (so if you try to load the same revision twice there will
be no change).
If $desired_revnum is not defined, loads up to the most recent revision
in the repository.
This can also be called as a method on a Daizu object.
Return a reference to a hash of known branches. The keys are the paths,
and the values are the ID numbers found in the branch table.
Dies if it can't find a branch with the path trunk, because that indicates
a broken database.
Return a list of two values, the ID number and path of the branch
which a file at $path would be in, whether or not it actually exists.
The path should be relative to the root of the repository, for example
trunk/foo.html. It doesn't mater whether $path starts with a forward
slash.
Returns nothing if the path is not in any branch, in which case Daizu CMS will simply ignore it.
Returns a reference to a hash of information about the GUID for the file
in branch $branch_id at $path in revision $revnum, or undef
if there is/was no such file.
The hash will contain the following keys:
GUID ID.
True iff the associated file is a directory.
The GUID URI (usually starting with tag:). This will be the custom
GUID URI if overridden by a daizu:guid property.
True if a daizu:guid property has overridden the automatically
generated GUID URI.
The number of the first revision in which $path was used for this
file in this branch.
The number of the last revision in which $path was used for this
file in this branch, or undef if it is still being used in the most
recently loaded revision.
This software is copyright 2006 Geoff Richards <geoff@laxan.com>. For licensing information see this page:
| Daizu documentation | Contained in the Daizu distribution. |
package Daizu::Revision; use warnings; use strict; use base 'Exporter'; our @EXPORT_OK = qw( load_revision known_branches branch_and_path file_guid ); use SVN::Core; use SVN::Ra; use SVN::Repos; use SVN::Delta; use Carp qw( croak ); use Carp::Assert qw( assert DEBUG ); use Daizu::Util qw( like_escape validate_uri db_datetime db_row_exists db_select db_insert db_update transactionally mint_guid get_subversion_properties );
sub load_revision { my ($cms, $desired_rev) = @_; croak "bad revision number r$desired_rev" if defined $desired_rev && $desired_rev < 1; return transactionally($cms->{db}, \&_load_revision_txn, $cms, $desired_rev); } sub _load_revision_txn { my ($cms, $desired_rev) = @_; my $db = $cms->{db}; my $ra = $cms->{ra}; my $latest_rev = $ra->get_latest_revnum; $desired_rev = $latest_rev unless defined $desired_rev; croak "can't load up to r$desired_rev, latest revision is r$latest_rev" if $desired_rev > $latest_rev; my $last_known_rev = db_select($db, revision => {}, 'max(revnum)'); $last_known_rev ||= 0; assert($last_known_rev <= $latest_rev) if DEBUG; # Return quickly if there's nothing to do. return $last_known_rev if $last_known_rev >= $desired_rev; my $branches = known_branches($db); my $trunk_id = $branches->{trunk}; for my $revnum (($last_known_rev + 1) .. $desired_rev) { my @modified; my (@added, @copied, @deleted); my $date; # Gather information about the changes in this revision. # The database is only updated later, after the callback is finished, # because we might need to get other information from $ra, and that's # not allowed while get_log() is running. $ra->get_log('/', $revnum, $revnum, 0, 1, 1, sub { my ($paths, undef, undef, $rev_date) = @_; $date = $rev_date; while (my ($full_path, $changes) = each %$paths) { my $action = $changes->action; # Modified files don't affect identity, we just need to record # their paths so that we can check for property changes that # might affect the GUID URI. Only changes on trunk affect it. if ($action eq 'M') { next unless $full_path =~ s!^/trunk/!! && $full_path ne ''; push @modified, $full_path; next; } my ($branch_id, $path) = branch_and_path($branches, $full_path); # Ignore files which live outside branches, except when we # see a new branch being created, in which case make a note # of where it is in the 'branch' table. We recognize a new # branch as a copy from an existing branch to a new location # outside any existing branches. if (!defined $branch_id) { my $from = $changes->copyfrom_path; next unless defined $from; my $to = $full_path; $_ =~ s!^/!! for $from, $to; next unless exists $branches->{$from}; $path = ''; $branch_id = db_insert($db, 'branch', path => $to); $branches->{$to} = $branch_id; } if ($action ne 'D') { # add or replace my $from_full_path = $changes->copyfrom_path; if (defined $from_full_path) { croak "Error in revision $revnum: file $full_path copied from root directory" if $from_full_path eq '/'; my ($from_branch_id, $from_path) = branch_and_path($branches, $from_full_path); croak "Error in revision $revnum: file $full_path copied from unknown branch" unless defined $from_branch_id; push @copied, { full_path => $full_path, path => $path, branch_id => $branch_id, from_full_path => $from_full_path, from_path => $from_path, from_branch_id => $from_branch_id, from_rev => $changes->copyfrom_rev, }; } else { push @added, [ $branch_id, $path, $full_path ]; } } if ($action ne 'A') { # delete or replace push @deleted, [ $branch_id, $path ]; } } }); _add_revision($db, $revnum, $date); _revision_guid_modifications($ra, $db, $trunk_id, $revnum, \@modified) if @modified; _revision_guid_path_changes($cms, $ra, $db, $trunk_id, $revnum, \@added, \@copied, \@deleted) if @added || @copied || @deleted; } return $desired_rev; }
sub known_branches { my ($db) = @_; my $sth = $db->prepare('select id, path from branch'); $sth->execute; my %branch; while (my ($id, $path) = $sth->fetchrow_array) { $branch{$path} = $id; } croak "there is no branch called 'trunk' in the database" unless exists $branch{trunk}; return \%branch; }
sub branch_and_path { my ($branches, $path) = @_; $path =~ s/^\///; return if $path eq '/'; # Don't care about root directory # Figure out which branch this path is on. Do this by checking # ever longer prefixes of the path, since that will allow us to # find 'trunk' very quickly. my @path = split '/', $path; my $branch_path = ''; my $branch_id; while (@path) { $branch_path .= '/' unless $branch_path eq ''; $branch_path .= shift @path; next unless exists $branches->{$branch_path}; $branch_id = $branches->{$branch_path}; last; } # Ignore changes to files which aren't in a branch we know about. return unless defined $branch_id; # The file/directory path relative to the branch directory. # Empty string for the top level directory. $path = $path eq $branch_path ? '' : substr $path, length($branch_path) + 1; return ($branch_id, $path); }
sub file_guid { my ($db, $branch_id, $path, $revnum) = @_; return $db->selectrow_hashref(q{ select g.id, g.is_dir, g.uri, g.old_uri, g.custom_uri, p.first_revnum, p.last_revnum from file_guid g inner join file_path p on g.id = p.guid_id where p.branch_id = ? and p.path = ? and p.first_revnum <= ? and (p.last_revnum is null or p.last_revnum >= ?) }, undef, $branch_id, $path, $revnum, $revnum); } sub _add_revision { my ($db, $revnum, $date) = @_; assert(defined $revnum) if DEBUG; $date = db_datetime($date); croak "revision r$revnum has no datetime stamp, or it is invalid" unless defined $date; db_insert($db, 'revision', revnum => $revnum, committed_at => $date, ); } sub _adjust_custom_uri { my ($ra, $db, $path, $revnum, $guid) = @_; assert($path ne '') if DEBUG; my $full_path = "trunk/$path"; my $props = get_subversion_properties($ra, $full_path, $revnum); return unless defined $props; # not present in trunk if (exists $props->{'daizu:guid'}) { my $new_uri = validate_uri($props->{'daizu:guid'}); croak "error in revision $revnum: invalid URI in 'daizu:guid' property on '$full_path'" unless defined $new_uri; $new_uri = $new_uri->canonical; if ($guid->{custom_uri}) { if ($guid->{uri} ne $new_uri) { # There was a custom URI already, but it has been changed. db_update($db, file_guid => $guid->{id}, uri => $new_uri); } } else { # The guid property has been added, so switch from the standard # guid to the custom one. $db->do(q{ update file_guid set custom_uri = true, old_uri = uri, uri = ? where id = ? }, undef, $new_uri, $guid->{id}); } } elsif ($guid->{custom_uri}) { # The guid property has been removed, so switch back to the # original standard GUID. $db->do(q{ update file_guid set uri = old_uri, old_uri = null, custom_uri = false where id = ? }, undef, $guid->{id}); } } sub _revision_guid_modifications { my ($ra, $db, $trunk_id, $revnum, $modified) = @_; for my $path (@$modified) { assert($path ne '') if DEBUG; my $guid = file_guid($db, $trunk_id, $path, $revnum); croak "modified file 'trunk/$path' has no GUID in revision $revnum" unless defined $guid; db_update($db, file_guid => $guid->{id}, last_changed_revnum => $revnum); _adjust_custom_uri($ra, $db, $path, $revnum, $guid); } } sub _revision_guid_path_changes { my ($cms, $ra, $db, $trunk_id, $revnum, $added, $copied, $deleted) = @_; # Record last revnum of deleted paths. for my $del (@$deleted) { my ($branch_id, $path) = @$del; if ($path eq '') { # If the top-level directory is deleted, that means delete # everything on the branch. db_update($db, file_path => { branch_id => $branch_id, last_revnum => undef }, last_revnum => $revnum - 1, ); } else { db_update($db, file_path => { branch_id => $branch_id, path => $path, last_revnum => undef }, last_revnum => $revnum - 1, ); # If it's a directory, mark the demise of all its children. $db->do(q{ update file_path set last_revnum = ? where branch_id = ? and path like ? and last_revnum is null }, undef, $revnum - 1, $branch_id, like_escape($path) . '/%'); } } # Process copies sorted in reverse order, so that subdirectories are # done before their parents. That way, when I process all the paths # within a copied directory I can skip any which have already been # processed separately, because for example the target subdirectory was # copied from somewhere else. my %source_guid; my %added_path; for (sort { $b->{path} cmp $a->{path} } @$copied) { # If it's not the top-level directory, process the copy. my $is_dir = 1; if ($_->{from_path} ne '') { my $guid = file_guid($db, $_->{from_branch_id}, $_->{from_path}, $_->{from_rev}); croak "Error in revision $revnum: file $_->{full_path} copied from source with no GUID ($_->{from_full_path} r$_->{from_rev})" unless defined $guid; push @{$source_guid{$_->{branch_id}}{$guid->{id}}}, $_; undef $added_path{$_->{path}}; $is_dir = $guid->{is_dir}; } # If the path being copied is a directory, then also copy all of its # children from the same source. if ($is_dir) { my $branch_path = db_select($db, branch => $_->{branch_id}, 'path'); my $from_branch_path = db_select($db, branch => $_->{from_branch_id}, 'path'); my $sth = $db->prepare(q{ select path, guid_id from file_path where branch_id = ? and path like ? and first_revnum <= ? and (last_revnum is null or last_revnum >= ?) }); $sth->execute($_->{from_branch_id}, ($_->{from_path} eq '' ? '%' : like_escape($_->{from_path}) . '/%'), $_->{from_rev}, $_->{from_rev}); my $prefix_len = length $_->{from_path}; ++$prefix_len if $prefix_len; # separating / while (my ($from_path, $guid_id) = $sth->fetchrow_array) { my $from_full_path = "$from_branch_path/$from_path"; my $child_path = substr $from_path, $prefix_len; my $path = $_->{path} eq '' ? $child_path : "$_->{path}/$child_path"; next if exists $added_path{$path}; my $full_path = "$branch_path/$path"; push @{$source_guid{$_->{branch_id}}{$guid_id}}, { full_path => $full_path, path => $path, branch_id => $_->{branch_id}, from_full_path => $from_full_path, from_path => $from_path, from_branch_id => $_->{from_branch_id}, from_rev => $_->{from_rev}, }; undef $added_path{$path}; } } } while (my ($branch_id, $guids) = each %source_guid) { while (my ($guid_id, $copies) = each %$guids) { my @copies = sort { $a->{full_path} cmp $b->{full_path} } @$copies; my $guid_already_present = db_row_exists($db, file_path => guid_id => $guid_id, branch_id => $branch_id, last_revnum => undef, ); # If there isn't already a live path in the target branch for this # GUID then one of the copies with it gets to keep it. if (!$guid_already_present) { my $keep = shift @copies; db_insert($db, 'file_path', guid_id => $guid_id, path => $keep->{path}, branch_id => $keep->{branch_id}, first_revnum => $revnum, ); if ($keep->{branch_id} == $trunk_id) { my $guid = file_guid($db, $trunk_id, $keep->{path}, $revnum); _adjust_custom_uri($ra, $db, $keep->{path}, $revnum, $guid); db_update($db, file_guid => $guid->{id}, last_changed_revnum => $revnum); } } # Copies which can't keep their GUID, because it's already live # in the target branch, get treated just like adds without history. for (@copies) { push @$added, [ $_->{branch_id}, $_->{path}, $_->{full_path} ]; } } } for my $add (@$added) { my ($branch_id, $path, $full_path) = @$add; next if $path eq ''; # First mint a new GUID for it. my $is_dir = $ra->check_path($full_path, $revnum) == $SVN::Node::dir; my ($guid_id, $guid_uri) = mint_guid($cms, $is_dir, $path, $revnum); _adjust_custom_uri($ra, $db, $path, $revnum, { uri => $guid_uri, id => $guid_id, is_dir => $is_dir, }); db_insert($db, file_path => guid_id => $guid_id, path => $path, branch_id => $branch_id, first_revnum => $revnum, ); } }
1; # vi:ts=4 sw=4 expandtab