#!/usr/bin/perl -w

use strict;

#my $rmask;
#my $maskseq;

# read the configuration file
print "Reading blast.conf...\n";
my $conf = `/bin/cat blast.conf`;

# open the log file
my $blastrunlog="./blastrun.log";
open (LOG, ">>$blastrunlog") || die print "Could not open $blastrunlog\n";

# what port to run on?
my $portin = $conf;
$portin =~ m/port (.*)\n/m;
$portin = $1;
#print "Running on port $portin\n";

# input dump directory
my $dumpdir = $conf;
$dumpdir =~ m/\ndumpdir (.*)\n/m;
$dumpdir = $1;
#print "Dumping inputs to $dumpdir\n";

# output dump directory
my $outdir = $conf;
$outdir =~ m/\noutdir (.*)\n/m;
$outdir = $1;
#print "Dumping outputs to $outdir\n";

# temp dump directory
my $tmpdir = $conf;
$tmpdir =~ m/\ntmpdir (.*)\n/m;
$tmpdir = $1;
#print "Using temp dir: $tmpdir\n";

my $list = `ls -1A $dumpdir`;
chomp $list;
$list =~ s/$dumpdir\///g;
my @files = split /\n/,$list;
print STDERR "******* $list\n *******";
while ($list ne "")
{
    #my $run = $files[$#files];
    my $run = $files[0];
if ($run =~ /^\.nfs/) {last;}
    
    print LOG "Running $run\n";
    my $date = `date`;
chomp $date;
    print LOG "----------------------------------------\n";
    print LOG "[$date] -- Beginning run $run\n";
 
 my $blastpath = `which blastn`;
 print LOG "blastpath:  $blastpath\n";
    
    # extract inputs from input file.
    my $inputfile = `cat $dumpdir/$run`;
    
    # email address
    my $email = $inputfile;
    $email =~ m/email (.*)\n/m;
    $email = $1;
    $email =~ s/(\;|\!|\:|\|)//g;
    print LOG "Email $email\n";

    # which blast executable to use
    my $blast_type = $inputfile;
    $blast_type =~ m/blast_type (.*)\n/m;
    $blast_type = $1;
    $blast_type =~ s/(\;|\!|\:|\|)//g;
    my $program = $blast_type;
    print LOG "Using Executable $blast_type\n";
    
    # which blast database to use
    my $db = $inputfile;
    $db =~ m/db (.*)\n/m;
    $db = $1;
    $db =~ s/(\;|\!|\:|\|)//g;
    print LOG "Using Database $db\n";
    
    # filtering?
#    my $filtering = $inputfile;
#    $filtering =~ m/filter (.*)\n/m;
#    $filtering = $1;
#    print LOG "Filtering is $filtering\n";
    
    # masking?
#    my $mask = $inputfile;
#    $mask =~ m/mask (.*)\n/m;
#    $mask = $1;
#    print LOG "Masking is $mask\n";

    # email results?
    my $emailres = $inputfile;
    $emailres =~ m/\nemailresults (.*)\n/m;
    $emailres = $1;
    $emailres =~ s/(\;|\!|\:|\|)//g;
    print LOG "Email results is $emailres\n";

    # remote host
    my $host = $inputfile;
    $host =~ m/host (.*)\n/m;
    $host = $1;
    print LOG "Remote host $host\n";

    # Remote IP address
    my $ip = $inputfile;
    $ip =~ m/ip (.*)\n/m;
    $ip = $1;
    print LOG "Remote IP $ip\n";
    
    # Remote Browser
    my $browser = $inputfile;
    $browser =~ m/browser (.*)\n/m;
    $browser = $1;
    print LOG "Remote Browser $browser\n";

# extract sequence
    my $seq = $inputfile;
# strip DOS linefeeds
$seq =~ s/\015//g;
#    $seq =~ m/SEQSTART\n(.*)\n/m;
#    my $fastaname = $1;
    $seq =~ m/SEQSTART\n(.*)SEQEND/s;
    $seq = $1;
    $seq =~ s/^\s+//;	#RW added Sep 20,2002
    if ($seq !~ /^\>/) 
    {
	if ($seq =~ /^\;/) {$seq =~ s/\;/>/}
	else {$seq = ">NO_NAME_SPECIFIED\n" . $seq;}
    }

#    print "Fastaname $fastaname\n";
    
#    my $seqdmp = $fastaname . "\n" . $seq;
#    $seq = $seqdmp;

# old pattern matching code - doesn't work.
#    my $seq = $inputfile;
#    $seq =~ m/SEQSTART\n(.*)\nSEQEND\n/m;
#    $seq = $1;
#    print $seq;


    
    #write the sequence out to a temp file.
    my $filename = $run;
    $filename =~ s/.in//g;
    my $file = $filename . ".temp";
    open (TEMPDIR, ">$tmpdir/$file");
    print TEMPDIR $seq;
    close TEMPDIR;
    
    # apply repeatmasker if necessary:
#    if ($mask eq "yes") {if ($program eq "blastn") {
#	if ($db eq "humancmid") {
#	    print "Executing Repeatmasker.";
#	    print LOG "Executing Repeatmasker on $run\n";
#	    $rmask = `repeatmasker/RepeatMasker $tmpdir/$file`;
#	    $maskseq = `cat $tmpdir/$file.masked`;
#	    unlink "$file.masked";
#	    open (REWRITE, ">$tmpdir/$file");
#	    print REWRITE $maskseq;
#	    close REWRITE;
#
#	}
#    }
#}

    my $exec;

    # make sure nobody tries to sneak in malicious commands.
    if ($program eq "blastn") {$exec = "blastn"}
    elsif ($program eq "tblastn") {$exec = "tblastn"}
#    elsif ($program eq "tblastx") {$exec = "tblastx"}
#    elsif ($program eq "blastp") {$exec = "blastp"}
#    elsif ($program eq "blastn") {$exec = "blastn"}
#    elsif ($program eq "blastx") {$exec = "blastx"}
    else {
	
	# do this if an invalid blast command was entered.
	
	print LOG "Invalid BLAST executable, in $run.\n";
	print LOG "Jumping to next.\n";
	unlink("$dumpdir/$run");
	$list = `ls -1A $dumpdir/`;
	$list =~ s/$dumpdir\///g;
	@files = split /\n/,$list;
	next;
    }
    
    

    # run blast code here.

#    $ENV{'WUBLASTDB'} = "/usr/databases";
#    $ENV{'BLASTDB'} = "/usr/databases";
    $ENV{'WUBLASTDB'} = "/nfs/www/docs/fish_lab/frank/cgi-bin/fish/Blast/WZ.seq";
    $ENV{'BLASTDB'} = "/nfs/www/docs/fish_lab/frank/cgi-bin/fish/Blast/WZ.seq";
#    $ENV{'WUBLASTFILTER'} = "/home/blast/filter";
#    $ENV{'BLASTFILTER'} = "/home/blast/filter";
    $ENV{'WUBLASTMAT'} = "/usr/seshare/Linux/wublast/matrix";
    $ENV{'BLASTMAT'} = "/usr/seshare/Linux/wublast/matrix";

    print "The seq im going to use is:\n";
    my $tempout = `cat $tmpdir/$file`;
    print $tempout;
    
    my $execstrg = "$exec $db $tmpdir/$file ";
    # prepare the command line, using 'dust' or 'seg' if necessary
    my $filterstrg = " ";
    my $exc = $execstrg;
#    if ($filtering eq "yes")
#    {
#	if ($program eq "blastn") {$execstrg = $exc . "FILTER=dust"}
#	elsif ($program eq "tblastn") {$execstrg = $exc . "FILTER=seg"}
#	else {$filterstrg = " "}
#    }

    my $exctmp = $execstrg;
    $execstrg = $exctmp . " 2>$tmpdir/$filename.err ";

    print "execstring $execstrg\n";
    my $blastout = `$execstrg`;
    my $blasterr = `cat $tmpdir/$filename.err`;
    if ($blasterr ne "")
    {
	print LOG "ERRORS GENERATED BY BLAST, RUNNING $file:\n$blasterr\n";
    }
    unlink "$tmpdir/$file.err";   
    my $errout = " ";
    if ($blasterr ne "")
    {
	print "Errors.\n";
	$errout = "BLAST gave the following errors:\n$blasterr\n\n"
	}
    
#    my $seqsub = "The sequence you submitted was:\n\n$seq";
    
    my $blastfini = $errout . $blastout;
    
    # now write to the output file
    
    open (OUTPUT, ">$outdir/$filename.out");
    print OUTPUT "STARTOUT\n",$blastfini,"ENDOUT";
    close OUTPUT;

    $date = `date`;
    chomp $date;
    print LOG "[$date] -- Completed $run\n";
    print LOG "           Output written to $tmpdir/$file\n";
    print LOG "           Deleting input file $run\n";
        
    unlink("$dumpdir/$run");
    if ( -e "$dumpdir/$run") {
    	open RM, "|rm -rf $dumpdir/$run";
	close RM;}
		#backup way of deleting $dumpdir/$run
    #print LOG
    
    $list = `ls -1Ac $dumpdir/`;
    $list =~ s/$dumpdir\///g;
    @files = split /\n/,$list;

    if ($emailres eq "yes")
    {
	open (MAIL, "| /usr/sbin/sendmail -t");
	print MAIL <<END_HEADER;
To: $email (Blast User)
From: blastserver\@genome.wustl.edu (BLAST Server)
Subject: BLAST search report

*** Note: *** 
    This is an automatically generated message.  Do not
    reply to it, as it has been sent from an invaild 
    email address and all replies will be returned to their 
    sender.  For technical questions, please email
    webmaster\@genome.wustl.edu for assistance instead.

The following is the result from your BLAST search:

$blastfini


END_HEADER
    close MAIL;
    }

}

print "Done with current queue.  Exiting...\n";
close LOG;
exit;
