#!/usr/bin/perl # # Gurmukh Sahota # 04/11/2008 # # # Wright fisher model using the roulette wheel selection algorithm # use strict; # # set up constants # my $DEFAULT_FITNESS = 1; my $ITERATIONS = 100; my $ALLELES = 2; # flip the constant to change the program my $DOMINANCE = 'DOMINANT'; #my $DOMINANCE = 'RECESSIVE'; ############# #usage stuff# ############# if (@ARGV != 3) { die "\nUsage: perl wright_fisher.pl Pop_size #Generations Fitness_bio5488\n\tNote: default fitness is " . $DEFAULT_FITNESS . "\n\n"; } my $pop_size = shift @ARGV; my $generations = shift @ARGV; my $bio5488_fitness = shift @ARGV; if ($bio5488_fitness == $DEFAULT_FITNESS) { print STDERR "WARNING: Bio5488 Fitness and Default Fitness are the same .. program will not run correctly\n"; } # # Thanks to Brian Muegge for this idea of creating a fitness hash # this would be extensible to codominance, etc., etc. # my %inheritance_h = ( 'DOMINANT' => [ $DEFAULT_FITNESS, $bio5488_fitness, $bio5488_fitness], 'RECESSIVE' => [ $DEFAULT_FITNESS, $DEFAULT_FITNESS, $bio5488_fitness], ); my @fixation_info_a; #iterate the wright fisher multiple times for (my $iter = 0; $iter < $ITERATIONS; $iter++) { my @population; # initialize the population for (my $indiv=0; $indiv <$pop_size; $indiv++) { for (my $allele=0; $allele < $ALLELES; $allele++) { $population[$indiv][$allele] = 0; } } # set the first individual as the heterozygote $population[0][0] = 1; # I should be setting these variables, so the -1 will tell me there is something wrong with that part of the code my $final_generation = -1; my $fixation_sum = -1; # lets talk about ___ baby for (my $gen_count=0; $gen_count < $generations; $gen_count++) { # lets create a fitness array (ie. where we are going to pick our alleles from) my $fitness_array; #iterate through the population for (my $indiv=0; $indiv <$pop_size; $indiv++) { # lets figure out what the sum of our alleles is (remember, we coded them as 0, 1) my $allele_sum = 0; for (my $allele=0; $allele < $ALLELES; $allele++) { $allele_sum += $population[$indiv][$allele]; } # If block will allow me to deal with if it is recessive or bio5488 # we use this to generate a fitness array push @$fitness_array, $inheritance_h{$DOMINANCE}[$allele_sum]; } # in order to use the roulette wheel selection, we need probabilities, so normalize the array my $normalized_fitness_array = normalize_array($fitness_array); # we are going to use a sum to figure out fixation (look in the loop for how) $fixation_sum = 0; # Someone has to get the reference ... # also, lets make it a little faster by forcing perl to allocate the full size of the array at the beginning. my @tng; $#tng = $pop_size-1; # for our new population ... create them for (my $indiv=0; $indiv <$pop_size; $indiv++) { for (my $allele=0; $allele < $ALLELES; $allele++) { # select the individual based on the normalized fitness and select an allele randomly my $r_indiv = roulette_wheel_index($normalized_fitness_array); my $r_allele = int(rand($ALLELES)); # assign the new individual's allele based on the randomly selected one $tng[$indiv][$allele] = $population[$r_indiv][$r_allele]; # again, a cheap coding trick that we have 0, 1 alleles ... it is probably faster than doing the if blocks :) $fixation_sum += $tng[$indiv][$allele]; } } # check for fixation for either allele (ie. all 0's = default allele and all 1's => 2*population_size = bio5488 if ($fixation_sum == 0) { push @{$fixation_info_a[0]}, ($gen_count+1); last; } elsif ($fixation_sum == (2*$pop_size)) { push @{$fixation_info_a[1]}, ($gen_count+1); last; } # set the population to the new population for the next time around @population = @tng; } } # make sure that the array actually exists and then count the fixation time for both of the alleles if (ref($fixation_info_a[0]) eq "ARRAY") { print "Default Allele Fixed: ", (sprintf("%0.4f", (scalar(@{$fixation_info_a[0]}) / $ITERATIONS)) * 100 ), "% of the time\n"; print "Fixation Time required: ", mean(\@{$fixation_info_a[0]}), " Generations\n"; print "\n"; } else { print "Default Allele Fixed: 0% of the time\n"; print "\n"; } if (ref($fixation_info_a[1]) eq "ARRAY") { print "Bio5488 Fixed: ", (sprintf("%0.4f", (scalar(@{$fixation_info_a[1]}) / $ITERATIONS)) * 100 ), "% of the time\n"; print "Fixation Time required: ", mean(\@{$fixation_info_a[1]}), " Generations\n"; } else { print "Bio5488 Fixed: 0% of the time\n"; print "\n"; } sub mean { my $array = shift @_; my $sum=0; foreach my $elem (@$array) { $sum += $elem; } return $sum/scalar(@$array); } # normalize the array selection sub normalize_array { my $array = shift @_; my $sum=0; foreach my $elem (@$array) { $sum += $elem; } my $new_array; for (my $count=0; $count< (@$array); $count++) { $$new_array[$count] = $$array[$count]/$sum; } return $new_array; } # Fitness proportionate selection # http://en.wikipedia.org/wiki/Fitness_proportionate_selection sub roulette_wheel_index { my $norm_array = shift @_; my $cdf = rand 1; my $selected_index = -1; my $sum = 0; for (my $count=0; $count<(@$norm_array); $count++) { $sum += $$norm_array[$count]; $selected_index=$count, last if ($sum > $cdf); } return $selected_index; }