/usr/local/CPAN/GetWeb/GetWeb/Cmd.pm


package GetWeb::Cmd;

use HTML::FormatText;
use HTML::Parse;

use MailBot::Util;
use MailBot::Envelope;
use GetWeb::Util;
use GetWeb::Encoder;
use GetWeb::Chain;
use GetWeb::SURL;
use GetWeb::Fetcher;

use Carp;
use strict;

sub sDie
{
     croak "SYNTAX ERROR: ". shift;
}

sub d
{
    &MailBot::Util::debug(@_);
}

# my %ghSynonym = (
# 		 GET => ["\n\n"],
# 		 SEND => ["GET"],
# 		 WWW => ["GET"],
# 		 SEARCH => ["ALIAS"],
		 
# 		 RGET => ["GET,TO"],
# 		 RWWW => ["WWW,TO"],
# 		 RSEND => ["SEND,TO"],
# 		 RSOURCE => ["SOURCE,TO"],
# 		 RDEEP => ["DEEP,TO"],
# 		 );

# my @gaKeyword = ( qw( SOURCE SPLIT TO ALIAS FOLLOW NOMIME HELP ), "\n\n"
# 		   );

# jfjf add ENCODING (BASE64, UUE, QP, LATIN) command modifier
# jfjf handle accents better
# jfj make aliases configurable
# jfj configure new search-engines on the fly

my %ghAlias = (
	       # HELP =>
	       # ['file://help.html'],
	       APS97 =>
	       ['http://www.uth.tmc.edu/cgi-bin/apstracts/searchaps.pl',
		'indexfile=/usr/local/www/apstracts/1997/aps1997.swish&searchtags=&maxresults=100&submit=Search&keywords='
		],
	       APS96 =>
	       ['http://www.uth.tmc.edu/cgi-bin/apstracts/searchaps.pl',
		'indexfile=/usr/local/www/apstracts/1996/aps1996.swish&searchtags=&maxresults=100&submit=Search&keywords='
		],
	       APS95 =>
	       ['http://www.uth.tmc.edu/cgi-bin/apstracts/searchaps.pl',
		'indexfile=/usr/local/www/apstracts/1995/aps95.swish&searchtags=&maxresults=100&submit=Search&keywords='
		],
	       ALTAVISTA =>
	       ['http://altavista.digital.com/cgi-bin/query',
		'pg=q&what=web&fmt=.&q='],
	       PROMED =>
	       ['http://www.healthnet.org/cgi-bin/webglimpse/usr/www/htdocs/.promed',
	        'errors=0&age=&maxlines=30&query=',
	        { maxfiles => [ qw( 50 10 30 100 1000 ) ] } ]
	       ,
	       YAHOO =>
	       ['http://search.yahoo.com/bin/search',
		'd=y&g=0&s=a&w=s&n=25&p='],
	       GARBAGE_ENGINE =>
	       ['file://test/not_there',
	        'errors=0&age=&maxfiles=50&query=',
	        { maxlines => [ qw( 30 10 50 500 ) ],
	          is_ok => [ qw( yes no ) ],
	          all_right => [ qw( false true ) ] }
		]
	       ,
	       INFOSEEK =>
	       ['http://guide-p.infoseek.com//Titles',
		'sv=IS&lk=lcd&qt=',
		{ col => [ qw( WW NN CT EM NW FQ ) ] } ]
	       );

# jfj implement LINE, SIZE

sub new
{
    my $type = shift;
    my $cwd = shift;

    # my $state = new GetWeb::CmdState();

    my $self = {
	ENCODER => new GetWeb::Encoder(),
	# STATE => $state,
	CANON => "GET",
	CHAIN => new GetWeb::Chain($cwd),
	ENVELOPE => new MailBot::Envelope(),
	MIME => 1,
	CWD => $cwd
    };

    # allowed states:  CMD TO ALIAS HELP SURL FOLLOW CHAIN
    
    bless($self,$type);
}

sub getEncoder
{
    shift -> {ENCODER};
}

sub getEnvelope
{
    shift -> {ENVELOPE};
}

sub keyTwoHELP
{
    my $self = shift;
    my $param = shift;

    my $upperParam = uc $param;
    push(@{$$self{HELP_LIST}},$upperParam);
}

sub keySlurpALIAS
{
    my $self = shift;
    my $alias = shift;
   
    my $upperAlias = uc $alias;

    my $paAlias = $ghAlias{$upperAlias};
    unless (defined $paAlias)
    {
	my $legal = join(', ',keys %ghAlias);
	$legal =~ s/GARBAGE_ENGINE(, )?//;
	&sDie("not a legal search alias: $alias; legal aliases are: $legal\n");
    }
    
    my $retVal = $self -> appendAlias($paAlias);
    while (@_)
    {
	my $param = shift;
	$self -> appendParam($param);
    }
}

sub keyTwoTO
{
    my $self = shift;
    my $param = shift;

    my $envelope = $self -> {ENVELOPE};
    $envelope -> setRecipientList($param);

    my $ui = MailBot::UI::current;
    my $from = $ui -> getFrom;
    $envelope -> setFrom($from);
}

sub keyTwoUSER
{
    my $self = shift;
    $self -> {AUTH_USER} = shift;
}

sub keyTwoPASSWORD
{
    my $self = shift;
    $self -> {AUTH_PASSWORD} = shift;
}

sub keyTwoSPLIT
{
    my $self = shift;
    my $param = shift;

    $$self{ENVELOPE} -> setSplitSize($param);
}

sub keyAppTwoFOLLOW
{
    my $self = shift;
    my $param = shift;

    $self -> follow($param);
}

sub appendAlias
{
    my $self = shift;
    my $paAlias = shift;

    my $surl = new GetWeb::SURL @$paAlias;

    $self -> setSURL($surl);
}

sub setSURL
{
    my $self = shift;
    my $surl = shift;

    $$self{CHAIN} -> setSURL($surl);
}

sub appendParam
{
    my $self = shift;
    my $param = shift;

    my $chain = $$self{CHAIN};
    $chain -> addParam($param);

    0;
}

sub addCanon
{
    my $self = shift;
    my $add = join(' ',@_);

    $self -> {CANON} .= (" " . $add);
}

sub keyOneSOURCE
{
    my $self = shift;
    $$self{ENCODER} -> preferSource;
}

sub keyOneNOMIME
{
    shift -> {ENVELOPE} -> setMIME(0);
}

sub keySlurpHELP
{
    my $self = shift;

    $$self{HELP_LIST} = [ @_ ];
}

sub keyNEW_PAR
{
    return 1 unless shift -> isEmpty();
    0;
}

sub keyOneDEEP
{
    my $self = shift;
    $$self{DEEP} = 1;
}

# jfj change from 'newFetcher' to 'currentFetcher'
sub newFetcher
{
    my $self = shift;
    my $ua = shift;

    my $fetcher = $self -> {currentFetcher};

    if (defined $fetcher)
    {
	$fetcher -> {UA} = $ua;
	$fetcher -> authorizeUser($self -> {AUTH_USER});
	$fetcher -> authorizePassword($self -> {AUTH_PASSWORD});
    }
    else
    {
	$fetcher = new GetWeb::Fetcher($ua);
	$self -> {currentFetcher} = $fetcher;

	$fetcher -> authorizeUser($self -> {AUTH_USER});
	$fetcher -> authorizePassword($self -> {AUTH_PASSWORD});
	
	# jfj hierarchical help
	
	my $chain = $$self{CHAIN};
	
	my $state = $$self{STATE};
	my $urlString;
	my $paHelp = $$self{HELP_LIST};
	if (defined $paHelp)
	{
	    my $ui = MailBot::UI::current;
	    my $file = uc join(' ',@$paHelp);
	    $file =~ /\s/ and &sDie("too many parameters to HELP: $file");
	    $file eq "" and
		$file = $ui -> getServiceParam('help');
	    
	    my $config = MailBot::Config::current;
	    my $pubDir = $config -> getPubDir();
	    my $helpDir = "help";
	    my $helpPath = "$pubDir/$helpDir";
	    if (! -e "$helpPath/$file.html")
	    {
		my @aLegal = <$helpPath/*.html>;
		grep(s/\.html$//,@aLegal);
		grep(s/.+\///,@aLegal);
		my $helpList = "HELP, HELP " . join(', HELP ',@aLegal);
		die "illegal help command: HELP $file.\n  Legal help commands are: $helpList\n";
	    };
	    
	    $urlString = "file://help/$file.html";
	}
	else
	{
	    $urlString = $chain -> getURL($ua);
	}
	
	$$self{ENCODER} -> base($urlString);
	$fetcher -> base($urlString);
	
	&d("full url is $urlString");
	
	my $paFollow = $chain -> getFollowList();
	my $follow;
	foreach $follow (@$paFollow)
	{
	    $fetcher -> follow($follow);
	}
    }

    $fetcher;
}

sub discoveredReq
{
    my $self = shift;

    # jfj add INLINE command

    return undef unless $self -> {DEEP};

    my $paLink = $self -> {currentFetcher} -> getLinks(['a']);

    my $cmdIter = new GetWeb::CmdIter;
    my $link;
    foreach $link (@$paLink)
    {
	# jfj more robust cloning method for commands

	my $cmdText = $self -> asString;
	my $newIter = GetWeb::Parser -> getCmdIter("",[$cmdText]);

	my $cmd = $newIter -> next;
	defined $cmd or die "could not clone $cmdText";
	$newIter -> isEmpty or die "command $cmdText cloned into two commands: first is ", $cmd -> asString, "second is ", $newIter -> next -> asString, "\n";
	
	delete $cmd -> {DEEP};
	$cmd -> setSURL(new GetWeb::SURL($link));
	$cmd -> addCanon("#deep");
	$cmdIter -> pushCmd($cmd);
    }
    $cmdIter;
}

sub follow
{
    my $self = shift;
    my $followToken = shift;
    
    $$self{CHAIN} -> follow($followToken);
}

sub asString
{
    shift -> {CANON};
}

1;