#!/usr/bin/perl -w # Check to make sure that one file was given on the command line, if not # print a correct usage statement. # use constant CUTOFF => 1E-10; if (@ARGV != 1) { die "\nUsage: ./worm_blast_parse.pl \n\n"; } print "Top worm blast hits for my 2-hybrid data\n\n"; # Get the file name and open an instream to it. # $file = shift; open IN, $file or die "Couldn't open file: $file\n"; # Read in the contents of the file # set the array counter at 0 my @output; #a new array that will store only what I want to keep for output while ($line = ) { #first look for "Query" in order to identify the start of an entry and its query number. # Find lines containing 'Query=', store them in the array $query_num # and add 1 to the counter. if ($line =~ /^Query=/) { #this line has the sequence name, grab it and print it out my ($junk, $seqname) = split /\s+/, $line; #we split on one or more space, and print out the second element in the list. $output[0] = $seqname; #we need to do this, because we need to use the sequence after the if statement } elsif ($line =~ /High-scoring Segment Pairs/){ #this is the start of the table of High score pairs $line = ; #read a blank line $line = ; #this is the first high scoring pair line while ($line !~ /^\s/){ #while I'm not at the end of the table, i.e. the first blank line my @pieces = split /\s+/, $line; #I now have an array of all of the words of my line. # we want the ID, the annotation, and the e value $output[1] = shift @pieces; #get the first element of pieces. $output[3] = $pieces[@pieces - 2]; #we know the e-value is the second to last element of pieces pop @pieces; pop @pieces; pop @pieces; #we'll pop off the last three elements (score, evalue, n). shift @pieces; #remove CExxxx shift @pieces; #remove WBGenexxxxxxx #the only thing left in @pieces is the annotation $output[2] = join " ", @pieces; if ($output[3] < CUTOFF){ print join "\t", @output; print "\n"; }else{ last; #we're not less than the cutoff, so there's no reason to go any further. Everything else # will also be greater. } $line = ; } } } close IN;