| ponfish documentation | Contained in the ponfish distribution. |
Downloads new groups (or all groups)
Downloads headers and store them.
| ponfish documentation | Contained in the ponfish distribution. |
#!perl package Ponfish::News::Decoder; use strict; require Exporter; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); use Ponfish::ArticleJoiner; use Ponfish::News::MyNNTP; use Ponfish::Utilities; use Ponfish::Config; use IO::File; @ISA = qw(Exporter); @EXPORT = qw( ); $VERSION = '0.01'; sub extract_command_info { my $fn = shift; my $data = read_file( $fn ); my @fields = split /\t/, $data; return( $fields[0], $fields[1], [split /\,/, $fields[2]], $fields[3] ); } my $MAX_RETRIES = 3; my $SLEEP_TIME = 10; ### !!! my $ARTICLE_TIMEOUT = 60; ################################################################## sub new { my $type = shift; my $tid = shift; print "New '$type' instance created. (TID: $tid)\n"; return bless { servers => {}, active_group => "", active_server => "", tid => $tid, }, $type; } sub get_tid { my $self = shift; return $self->{tid}; } sub process_file { my $self = shift; my $fn = shift; # $self->msg( "Processing file: $fn\n" ); my $field_sep = FILENAME_FIELD_SEP; my($time,$first_id,$newsgroup,$server) = split /$field_sep/, $fn; my($cmd, $arg1, $arg2, $arg3) = extract_command_info( $fn ); # Note: $arg2 is a list ref. $self->set_active_server( $server ); $self->set_active_group( $newsgroup ); if( $cmd eq "save" ) { # die "save not implemented!"; return $self->cache_articles( $arg2 ); # return $self->save_articles( $arg, $articles ); } if( $cmd eq "decode" or $cmd eq "preview" ) { $self->cache_articles( $arg2 ); return $self->decode_cached_articles( $arg2, $arg1 ); } if( $cmd eq "erase" ) { $self->msg( "Erase command found in file: '$fn'. No download.\n" ); return 1; } if( $cmd eq "sync_articles" ) { return $self->sync_articles( $arg1 ); } if( $cmd eq "purge_unavailable" ) { return $self->purge_unavailable( $arg1 ); } if( $cmd eq "sync_groups" ) { return $self->sync_groups( $arg1, $arg3, $arg2 ); } }
sub sync_groups { my $self = shift; my $server_name = shift; my $groups_fn = shift; my $name = shift; $name = $name->[0]; my $last_time_filename = $groups_fn . ".last_newgroups_time"; $self->msg( "SG: $server_name, $groups_fn, ($name: $last_time_filename)\n" ); my $this_update_time = time; # Save current time... my $new_groups; # To hold the new newsgroups that we'll (maybe) download... my $NNTP = $self->get_or_create_active_server; if( ! $NNTP ) { log "Error connecting to server!"; return 0; } if( -f $last_time_filename ) { # Read the last filetime and get the new groups! my $time = read_file( $last_time_filename ); $self->msg( "Downloading NEW newsgroups from '$server_name' since '$time' (" . scalar(localtime($time)) . ")\n"); $new_groups = $NNTP->newgroups( $time ); } else { # Let's read all the groups! $self->msg( "Downloading ENTIRE newsgroup list from '$server_name'.\n" ); $new_groups = $NNTP->list; #newgroups( time - (3600 * 24 * 60) ); } # Save the data to file: my $OF = create_append_fh( $groups_fn ); for( sort keys %$new_groups ) { print $OF join(" ", $_, @{$new_groups->{$_}}),"\n"; } $OF->close; $OF = ""; overwrite_file( $this_update_time, $last_time_filename ); # Update the last time file... $self->msg( "Downloaded " . scalar( keys %$new_groups ) . " new groups.\n" ); return 0; # Force the command file to not be deleted! return scalar( keys %$new_groups ) || -1; } sub purge_unavailable { my $self = shift; my $ng = shift; my $NNTP = $self->get_or_create_active_server; my($num, $gf,$gl) = $NNTP->group( $ng ); # GroupFirst, GroupLast # Create / setup the ArticleJoiner... my $AJ = Ponfish::ArticleJoiner->new; $AJ->set_storage_path( ARTICLES_DIR, $self->{active_server}, $ng ); # $AJ->set_last_id_file( create_valid_filepath( ARTICLES_DIR, $self->{active_server}, $ng . ".retrieved" ) ); return $AJ->purge_articles( $gf, $ng ); }
sub sync_articles { my $self = shift; my $ng = shift; # First some set up: # Get the article number to start with: my $first = config_retrieve( ARTICLES_DIR, $self->{active_server}, $ng.".retrieved" ) || 0; $first =~ s/\n*//g; $first++; # Now it's the first article to get my $NNTP = $self->get_or_create_active_server; my($num, $gf,$gl) = $NNTP->group( $ng ); # GroupFirst, GroupLast $first = max($gf,$first); # GroupFirst may be larger than our last retrived article $self->msg( "Sync Articles: '$ng': From '$first' to '$gl'...\n" ); return 0 if( $gl == $first ); # No articles to retrive if( $gl < $first ) { warn "For group: '$ng': last < first ($gl < $first)"; return 0; } $first++; # The actual first article to grab... my $AJ = Ponfish::ArticleJoiner->new; $AJ->set_storage_path( ARTICLES_DIR, $self->{active_server}, $ng ); $AJ->set_last_id_file( create_valid_filepath( ARTICLES_DIR, $self->{active_server}, $ng . ".retrieved" ) ); # $NNTP->set_AJ( $AJ ); $AJ->load_prior_incompletes; # Load any prior incompletes $AJ->delete_incompletes_file; # Remove the incompletes files $NNTP->sync_headers_file( $ng, $first, $gl, $AJ ); # This does all of the work! $AJ->flush_all_incompletes; ### $AJ->write_out_old_incompletes( 2_000_000_000 ); $AJ->store_final_retrieved_numb; return 1; } sub set_active_server { my $self = shift; $self->{active_server} = shift || die "No server!"; } sub set_active_group { my $self = shift; $self->{active_group} = shift || die "No newsgroup!"; } sub decode_binary { my $self = shift; my $articles = shift; $self->cache_articles( $articles ); $self->decode_cached_articles( $articles ); } sub get_or_create_active_server { my $self = shift; my $NNTP = ""; my $active_server = $self->{active_server}; if( ! $self->{servers}{$active_server} ) { $self->msg("Opening new news connection to: '$active_server'\n" ); $self->open_news_connection( $active_server ) || die "Can't open connection to server: '$active_server'"; } $NNTP = $self->{servers}{$active_server}; return $NNTP; } sub cache_articles { my $self = shift; my $articles = shift || die "No articles provided!"; my $NNTP = $self->get_or_create_active_server; $NNTP->group( $self->{active_group} ); # if( ( $self->{active_server} ne $server ) or ( $self->{active_group} ne $newsgroup ) ) { # $NNTP->safe_group( $newsgroup ); # $NNTP->group( $newsgroup ); # } for( @$articles ) { $self->msg( "Caching article: $_.\n" ); $self->cache_article( $_ ); } } sub msg { my $self = shift; $| = 1; my $msg = join "", @_; print "(",$self->get_tid,"): ", $msg; } sub destroy { my $self = shift; $self->disconnect; $self->{servers} = undef; } sub open_news_connection { my $self = shift; my $server = shift; my $sleep_time = 30; my $NNTP; for( 0 .. 10 ) { $NNTP = Ponfish::News::MyNNTP->new( $server, Timeout => 60, ) || sleep $sleep_time; $sleep_time += 60; last if( $NNTP ); } my @authinfo = get_authinfo( $server ); if( $authinfo[0] ) { $NNTP->authinfo( @authinfo ); } $self->{servers}{$server} = $NNTP; return $NNTP; } sub disconnect { my $self = shift; # Close connections: foreach my $server ( values %{$self->{servers}} ) { $server->quit; delete $self->{servers}{$server} } $self->{servers} = {}; # Invalidate group: # delete $self->{active_group}; } sub wait_for_connection { my $self = shift; # my $NNTP = $self->{servers}{$self->{active_server}} || die "No NNTP object!"; # Reconnect! my $this_group = $self->{active_group}; while( 1 ) { $self->msg( "Disconnecting...\n" ); $self->disconnect; $self->msg( "Waiting for connection: testing with group( $self->{active_group} ) command...\n" ); my $NNTP = $self->get_or_create_active_server; my $group = $NNTP->group( $self->{active_group} ); if( $group ) { $self->msg( "Group returned: '$group'\n" ); return 1; } $self->msg( "group( $self->{active_group} ) command failed... sleeping...\n" ); sleep $SLEEP_TIME; } } sub cache_article { my $self = shift; my $id = shift; my $article_filename = $id . FILENAME_FIELD_SEP . $self->{active_group}; # If article already cached, don't download again... if( -f $article_filename and -s $article_filename ) { return -s $article_filename; } my $try; for ( 1 .. 3 ) { $try = $_; if( -f $article_filename.".try".$try ) { if( $try == 3 ) { # Tried 3 times already to cache this article... overwrite_file( "download error", $article_filename ); unlink $article_filename.".try".$try; return -666; } else { my $new_try = $try+1; overwrite_file( "try $try...", $article_filename.".try".$new_try ); unlink $article_filename.".try".$try; last; } } } if( $try != 3 ) { $self->msg( "TRY: $try\n" ); } if( $try == 3 ) { # Did not find any previous attempts at download... $try = 1; # First try overwrite_file( "try $try...", $article_filename.".try".$try ); } # Article is not cached, cache it! my $NNTP = $self->{servers}{$self->{active_server}} || die "No NNTP object!"; # $self->safe_group( $self->{current # if( ! $NNTP->group( $self->{active_group} ) ) { # $NNTP-> # } my $adata = undef; for( 1 .. $MAX_RETRIES ) { # eval { # local $SIG{ALRM} = sub { die "alarm" }; # alarm $ARTICLE_TIMEOUT; $adata = $NNTP->article( $id ); if( ! $adata ) { $self->wait_for_connection; } else { last; } } # alarm 0; # }; # if( ! $@ ) { # last; # } # } if( defined $adata ) { overwrite_file( join("", @$adata), $article_filename ); # Remove try file... unlink $article_filename.".try".$try; return 1; } else { return undef; } } sub decode_cached_articles { my $self = shift; my $articles = shift; my $arg = shift; # Create the filenames and decode the file... my $ng = $self->{active_group}; my @files = map { $_ . FILENAME_FIELD_SEP . $ng } @$articles; my $result = system( "uudeview", "-i", "-a", "-d", @files ); my $decoded_filename = $self->get_decoded_filename( $files[0] ); if( ! $decoded_filename ) { $decoded_filename = $self->get_decoded_filename( $files[1] ); # Hack - second arg could be the first one... } print ">>>>$decoded_filename<<<<\n"; # Fix for uudeview's problem with certain characters (&) if( $decoded_filename ) { if( $decoded_filename =~ /[\&\}\{]/ ) { # Need to fix: my $on_disk_filename = $decoded_filename; $on_disk_filename =~ s/[\&\}\{]/_/g; if( -f $on_disk_filename ) { portable_mv( $on_disk_filename, $decoded_filename ); } } } ################################################################## { # If we are saving to a directory, make sure it exists: if ( $arg ) { if ( -f $arg ) { # Can't decode to this directory! warn "Cannot decode to directory '$arg' - file called '$arg' already exists"; $arg = ""; } if ( ! -d $arg ) { ensure_dir_exists( $arg ); if ( ! -d $arg ) { warn "Could not create directory '$arg'!"; $arg = ""; } } } # Then if( $arg ) { if( $decoded_filename ) { # Attempt to move... portable_mv( $decoded_filename, $arg ); } } } ################################################################## # Now it's safe to delete the files... we don't need them anymore... if( ! $result ) { # Everything went ok, delete files... $self->delete_cached_files( @files ); } return 1; #!!! } sub get_decoded_filename { my $self = shift; my $enc_file = shift; my $IF = IO::File->new( $enc_file ); if( ! $IF ) { warn "Could not open file: '$enc_file'!"; return undef; } while( <$IF> ) { if( /begin \d\d\d (.*)/ ) { return $1; } if( /name=\"?(.*?)\"?$/ ) { return $1; } } } sub delete_cached_files { my $self = shift; unlink @_; } 1;