| Mariachi documentation | Contained in the Mariachi distribution. |
Mariachi - all dancing mail archive generator
An AppConfig object containing the current configuration. See mariachi for details of the configurable items.
The current set of messages
The rootset of threaded messages
Used internally by the _bench method
All of these are instance methods, unless stated.
your general class-method constructor
populate messages from input
remove duplicates from messages
some messages have been near mail2news gateways, which means that some
message ids in the references and in-reply-to headers get munged
like so: <$group/$message_id>
fix this in messages
populate rootset with an Email::Thread::Containers created from
messages
order rootset by date
(in)sanity test - check everything in messages is reachable when
walking rootset
run a strand through all messages - wander over threader setting
the Message ->next and ->prev links
wander over rootset reparenting subthreads that are
considered too deep
copy files into the output dir
render thread tree into the directory of output
do all the right steps
This code was written as part of the Siesta project and includes code from:
Richard Clamp <richardc@unixbeard.net>
Simon Wistow <simon@thegestalt.org>
Tom Insam <tom@jerakeen.org>
Mark Fowler <mark@twoshortplanks.com>
More information about the Siesta project can be found online at http://siesta.unixbeard.net/
Copyright 2003 The Siesta Project
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| Mariachi documentation | Contained in the Mariachi distribution. |
use strict; package Mariachi; use Email::Thread; use Template; use Time::HiRes qw( gettimeofday tv_interval ); use Storable qw( store retrieve ); use File::Path qw( mkpath ); use File::Copy qw( copy move ); use File::Find::Rule; use File::Basename; use base qw( Class::Accessor::Fast ); our $VERSION = '0.52'; __PACKAGE__->mk_accessors( qw( config messages rootset start_time last_time tt ) );
sub new { my $class = shift; $class->SUPER::new({@_}); } sub _bench { my $self = shift; my $message = shift; my $now = [gettimeofday]; my $start = $self->start_time; my $last = $self->last_time || $now; $start = $self->start_time($now) unless $start; printf "%-50s %.3f elapsed %.3f total\n", $message, tv_interval( $last, $now ), tv_interval( $start, $now ); $self->last_time($now); }
sub load { my $self = shift; my $folder = Mariachi::Folder->new( $self->config->input ) or die "Unable to open ".$self->config->input; $| = 1; my $cache; $cache = $self->config->input.".cache" if $ENV{M_CACHE}; if ($cache && -e $cache && !$self->config->refresh) { print "pulling in $cache\n"; $self->messages( retrieve( $cache ) ); return; } my $count = 0; my @msgs; while (my $msg = $folder->next_message) { push @msgs, $msg; print STDERR "\r$count messages" if ++$count % 100 == 0; } print STDERR "\n"; if ($cache) { print "caching\n"; store( \@msgs, $cache ); } $self->messages( \@msgs ); }
sub dedupe { my $self = shift; my (%seen, @new, $dropped); $dropped = 0; for my $mail (@{ $self->messages }) { my $msgid = $mail->header('message-id'); if ($seen{$msgid}++) { $dropped++; next; } push @new, $mail; } print "dropped $dropped duplicate messages\n"; $self->messages(\@new); }
sub sanitise { my $self = shift; for my $mail (@{ $self->messages }) { for (qw( references in_reply_to )) { my $hdr = $mail->header($_) or next; my $before = $hdr; $hdr =~ s{<[^>]*?/}{<}g or next; #print "$_ $before$_: $hdr"; $mail->header_set($_, $hdr); } } }
# the Fisher-Yates shuffle from perlfaq4 sub _shuffle { my $array = shift; my $i; for ($i = @$array; --$i; ) { my $j = int rand ($i+1); @$array[$i,$j] = @$array[$j,$i]; } } sub thread { my $self = shift; #_shuffle $self->messages; my $threader = Email::Thread->new( @{ $self->messages } ); $threader->thread; $self->rootset( [ grep { $_->topmost } $threader->rootset ] ); }
sub order { my $self = shift; my @rootset = @{ $self->rootset }; $_->order_children( sub { sort { eval { $a->topmost->message->epoch_date } <=> eval { $b->topmost->message->epoch_date } } @_ }) for @rootset; # we actually want the root set to be ordered latest first @rootset = sort { $b->topmost->message->epoch_date <=> $a->topmost->message->epoch_date } @rootset; # And optionally reverse the order @rootset = reverse(@rootset) if $self->config->reverse; $self->rootset( \@rootset ); }
sub sanity { my $self = shift; my %mails = map { $_ => $_ } @{ $self->messages }; my $count; my $check = sub { my $cont = shift or return; my $mail = $cont->message or return; ++$count; #print STDERR "\rverify $count"; delete $mails{ $mail || '' }; }; $_->iterate_down( $check ) for @{ $self->rootset }; undef $check; #print STDERR "\n"; return unless %mails; die "\nDidn't see ".(scalar keys %mails)." messages"; print join "\n", map { my @ancestors; my $x = $_->container; my %seen; my $last; while ($x) { if ($seen{$x}++) { push @ancestors, "$x ancestor loop!\n"; last } my $extra = $x->{id}; $extra .= " one-way" if $last && !grep { $last == $_ } $x->children; push @ancestors, $x." $extra"; $last = $x; $x = $x->parent; } $_->header("message-id"), @ancestors } values %mails; }
sub strand { my $self = shift; my $prev; for my $root (@{ $self->rootset }) { my $sub = sub { my $mail = $_[0]->message or return; $prev->next($mail) if $prev; $mail->prev($prev); $mail->root($root); $prev = $mail; }; $root->iterate_down( $sub ); undef $sub; } }
sub split_deep { my $self = shift; my @toodeep; for my $root (@{ $self->rootset }) { my $sub = sub { my ($cont, $depth) = @_; # only note first entries if ($depth && ($depth % 6 == 0) && $cont->parent->child == $cont) { push @toodeep, $cont; } }; $root->iterate_down( $sub ); undef $sub; } print "splicing threads in ", scalar @toodeep, " places\n"; for (@toodeep) { # the top one needs to be empty, because we're cheating. # to keep references straight, we'll move its content my $top = $_->topmost; my $root = $top->message->root or die "batshit!"; if ($root->message) { my $new = Mail::Thread::Container->new($root->messageid); $root->messageid('dummy'); $new->message($root->message); $root->message(undef); $new->child($root->child); $root->child($new); $root = $new; } $root->add_child( $_ ); } }
sub copy_files { my $self = shift; for my $dir (@{ $self->config->templates }) { my @files = map { s{$dir/?}{}; $_ } find( or => [ find( directory => name => [ qw( CVS .svn ) ], prune => discard => ), find( file => '!name' => [ '*.tt2', '*~', '*.bak' ] ) ], in => $dir ); for (@files) { mkpath dirname $self->config->output . "/$_"; copy( "$dir/$_", $self->config->output . "/$_" ) or die "couldn't copy $dir/$_ $!"; } } }
sub init_tt { my $self = shift; $self->tt( Template->new( INCLUDE_PATH => join(':', reverse @{ $self->config->templates }), RECURSION => 1 ) ); }
sub nthpage { my $self = shift; my $n = shift; my $page = shift; return $page if $n == 1; --$n; $page =~ s/\./_$n./; return $page; } sub generate_pages { my $self = shift; my $template = shift; my $spool = shift; my $again; do { my $file = $spool; $self->tt->process( $template, { @_, mariachi => $self, spool => $spool, # callbacktastic nthpage => sub { $self->nthpage( shift, $spool ) }, again => sub { $again }, file => sub { $file }, set_again => sub { $again = shift; return }, set_file => sub { $file = shift; return }, }, $self->config->output . "/$$.tmp" ) or die $self->tt->error; mkpath dirname $self->config->output . "/$file"; move $self->config->output . "/$$.tmp", $self->config->output . "/$file" or die "$!"; } while $again; }
sub generate_lurker { my $self = shift; return unless $self->config->lurker; my $l = Mariachi::Lurker->new; $self->generate_pages( 'lurker.tt2', 'lurker.html', content => [ map { [ $l->arrange( $_ ) ] } @{ $self->rootset } ], perpage => 10, ); }
sub generate_thread { my $self = shift; $self->generate_pages( 'index.tt2', 'index.html', content => $self->rootset, perpage => 20, ); }
sub generate_date { my $self = shift; my %touched_dates; my %dates; # wander things to find dirty threads, and dates for my $root (@{ $self->rootset }) { my $sub; $sub = sub { my $c = shift or return; if (my $mail = $c->message) { # mark the thread dirty, if the message is new unless (-e $self->config->output."/".$mail->filename && !$self->config->refresh) { # dirty up the date indexes $touched_dates{ $mail->year } = 1; $touched_dates{ $mail->month } = 1; $touched_dates{ $mail->day } = 1; } # add things to the date indexes push @{ $dates{ $mail->year } }, $mail; push @{ $dates{ $mail->month } }, $mail; push @{ $dates{ $mail->day } }, $mail; } }; $root->iterate_down($sub); undef $sub; # since we closed over ourself, we'll have to be specific } for ( keys %touched_dates ) { my @mails = sort { $a->epoch_date <=> $b->epoch_date } @{ $dates{$_} }; my @depth = split m!/!; $self->generate_pages( 'date.tt2', "$_/index.html", archive_date => $_, content => \@mails, base => "../" x @depth, perpage => 20, ); } }
sub generate_bodies { my $self = shift; my %touched_threads; # wander things to find dirty threads for my $root (@{ $self->rootset }) { my $sub; $sub = sub { if (my $mail = eval { $_[0]->message }) { # mark the thread dirty, if the message is new $touched_threads{ $root } = $root unless -e $self->config->output."/".$mail->filename && !$self->config->refresh; } }; $root->iterate_down($sub); undef $sub; # since we closed over ourself, we'll have to be specific } # figure out adjacent dirty threads my @threads = @{ $self->rootset }; for my $i (grep { $touched_threads{ $threads[$_] } } 0..$#threads) { $touched_threads{ $threads[$i-1] } = $threads[$i-1] if $i > 0; $touched_threads{ $threads[$i+1] } = $threads[$i+1] if $i+1 < @threads; } # and then render all the messages in the dirty threads my $count = 0; my $tt = $self->tt; for my $root (values %touched_threads) { my $sub = sub { my $mail = $_[0]->message or return; print STDERR "\rmessage $count" if ++$count % 100 == 0; $tt->process('message.tt2', { base => '../../../', mariachi => $self, thread => $root, message => $mail, container => $_[0], }, $self->config->output . "/" . $mail->filename) or die $tt->error; }; $root->recurse_down( $sub ); undef $sub; } print STDERR "\n"; }
sub perform { my $self = shift; $self->_bench("reticulating splines"); $self->load; $self->_bench("load ".scalar @{ $self->messages }); $self->dedupe; $self->_bench("dedupe"); #$self->sanitise; $self->_bench("sanitise"); $self->thread; $self->_bench("thread"); $self->sanity; $self->_bench("sanity"); $self->order; $self->_bench("order"); $self->sanity; $self->_bench("sanity"); $self->copy_files; $self->_bench("copy files"); $self->init_tt; $self->_bench("tt init"); $self->generate_lurker; $self->_bench("lurker output"); $self->strand; $self->_bench("strand"); $self->split_deep; $self->_bench("deep threads split up"); $self->sanity; $self->_bench("sanity"); $self->order; $self->_bench("order"); $self->generate_thread; $self->_bench("regular thread indexes"); $self->generate_date; $self->_bench("date indexes"); $self->generate_bodies; $self->_bench("messages"); } package Mariachi::Folder; use Mariachi::Message; use Email::Folder; use base 'Email::Folder'; sub bless_message { Mariachi::Message->new($_[1]) } package Mariachi::Lurker; use Mail::Thread::Chronological; use base 'Mail::Thread::Chronological'; sub extract_time { $_[1]->message->epoch_date } 1; __END__