#! usr/bin/perl use strict; # Check to make sure that one file was given on the command line, if not # print a correct usage statement. # if (@ARGV != 1) { die "\nUsage: perl nuc_count.pl \n\n"; } # Get the file name and open an instream to it. # my $file = shift; open IN, $file or die "Couldn't open file: $file\n"; my $firstline = ""; #create an empty string #First print sequence name #When a line begins with "Query =" print the line my $Queryname=""; while (my $line = ) { if ($line =~ /^Query= /){ my $Queryname=$line; print $line; } #The Plan #Identify the region of the file with the list of hits #Use an if loop to stay in that region #Use a while loop to read through each line until it hits the end of the list #In the while loop Pull out the Worm ID, Gene Annotation and E-value #Concatenate those results all together #At the end see if it is either the first line (b/c always print that) #or if the E-value is less than 1e-10 #if so we will print all the values we collected #otherwise we exit the while loop and move to the next Query #Pull out the Worm ID #Find the line that starts with "Sequences producing..." if ($line =~ /^Sequences producing High-scoring Segment Pairs/){ $line = ; $line = ; # We skip to the third line $firstline = "true"; #define as true for first time through the loop while ($line ne "\n"){ my @highhitarray = split /\s+/,$line; #turn it into an array split at spaces my $linetoprint = ""; $linetoprint .= "WormID = ".$highhitarray[0]; #print element 0, the WormID $linetoprint .= "\n"; #Pull out the gene annotation #In highhitarray, element 3 starts the Gene Annotation #Concatenate each consecutive element starting with 3 until the string is all digits my $count=3; my $GeneAnnotation; while (!($highhitarray[$count] =~ /^\d+$/)){ $GeneAnnotation.=$highhitarray[$count]." "; $count++; } #Print the concatenated string/Gene Annotation $linetoprint .= "Gene Annotation = ".$GeneAnnotation; $linetoprint .= "\n"; #Pull out the E-value #Reverse the array so now the E-value is in Element 1 and print it my @rev_array = reverse(@highhitarray); $linetoprint .= "E-value = ".$rev_array[1]; $linetoprint .= "\n"; $linetoprint .= "\n"; if ($firstline eq "true" || $rev_array[1] <= 1e-10){ #if this was the first line or E-value <= 1e-10 we will print print $linetoprint; #This has stored all the values we want $firstline = "false"; #Set firstline to false before going through loop again } $line = ; #Read the next line of while loop } print "---------------"; print "\n"; print "\n"; } } close IN; exit;