S1 File. The in-house PERL scripts. #!perl use warnings; use strict; use Getopt::Long; my %opts; GetOptions(\%opts, "i=s", "f=s", "p=s", "o=s", "w=i", "s=i"); my $usage = <<"USAGE"; Usage: perl $0 -i -f -p -o -w [window] !!! set '-' for missing genotype !!! Options: -i file input file -f file input fai of fasta -p string 2 population, sort by header of file, eg: A=10,B=15; A is blank, B is treat -o string outprefix -w int slide window, default 50000 USAGE die $usage if(!$opts{i} or !$opts{f} or !$opts{p} or !$opts{o}); $opts{w} ||= 50000; my $s = $opts{w} / 2; my (@id, @nsample, $allsample); my @popu = split /,/, $opts{p}; foreach(@popu) { my ($x, $y) = (split /=/); push @id, $x; $allsample += $y; push @nsample, $y; } die "Just only calculate 2 population!\n" if(@id != 2); my @pid = map {$_ .= "_pi"} @id; my @oid = map {$_ =~ s/_pi//;$_ .= "_omega"} @id; my @tid = map {$_ =~ s/_omega//;$_ .= "_tajiamaD"} @id; my @hid = map {$_ =~ s/_tajiamaD//;$_ .= "_Hp"} @id; my $rid = "$pid[1]/$pid[0]"; open OO, "> $opts{o}.omegapi.txt" or die $!; print OO join("\t", "#chr\tloci", @oid, @pid, $rid, "all_pi", "Fst", @tid, @hid)."\n"; my (@tmp, %chr); open FA, $opts{f} or die $!; while() { chomp; @tmp = split; $chr{$tmp[0]} = $tmp[1]; } close FA; my %hash; open FB, $opts{i} =~ /\.gz$/ ? "gzip -dc $opts{i} |" : $opts{i} or die $!; ; while() { chomp; @tmp = split; push @{$hash{$tmp[0]}}, join("\t", $tmp[1], @tmp[2..$#tmp]); } close FB; foreach(sort {$a cmp $b} keys %chr) { next unless(exists $hash{$_}); my $n = int($chr{$_} / $s); for(my $i = 0; $i <= $n; $i ++) { my $start = $s * $i + 1; my $end = $opts{w} + $s * $i; my $nloci = 0; my @lines; for(my $j = 0; $j < @{$hash{$_}}; $j ++) { my @line = split /\t/, $hash{$_}[$j]; if($line[0] >= $start and $line[0] <= $end) { $nloci ++; push @lines, join("\t", @line[1..$#line]); }elsif($line[0] > $end){ last; } # delete locis of last step if($line[0] >= $start and $line[0] <= (($i+1) * $s)) { shift @{$hash{$_}}; $j -= 1; } } my $rawlen = int(($start + $end) / 2); if($nloci == 0) { my @zero = (0) x 11; print OO join("\t", "$_\t$rawlen", @zero)."\n"; next; } my (@aomega, @bomega, @api, @bpi, @allpi, @avepi, @piratio, @fst, @ahp, @bhp, @atajimad, @btajimad); foreach my $l(@lines) { my @ls = split /\t/, $l; my $REF = shift @ls; my ($aref, $ahet, $amut, $bref, $bhet, $bmut) = (0) x 6; for(my $k = 0; $k < $nsample[0]; $k ++) { if($ls[$k] eq $REF) { $aref ++; }elsif($ls[$k] =~ /[ATCG]/){ $amut ++; }elsif($ls[$k] ne "-"){ $ahet ++; } } for(my $k = $nsample[0]; $k < $allsample; $k ++) { if($ls[$k] eq $REF) { $bref ++; }elsif($ls[$k] =~ /[ATCG]/){ $bmut ++; }elsif($ls[$k] ne "-"){ $bhet ++; } } my $aeffectsample = $aref + $amut + $ahet; my $beffectsample = $bref + $bmut + $bhet; # calculate Omega my $aomega_v = 1 / 1 / &tt($aeffectsample); my $bomega_v = 1 / 1 / &tt($beffectsample); push @aomega, $aomega_v; push @bomega, $bomega_v; # calculate Pi my $api_v = $aeffectsample >= 2 ? (0.5 * ($aref * $ahet + $amut * $ahet) + $aref * $amut) / ($aeffectsample * ($aeffectsample - 1) / 2) : 0; my $bpi_v = $beffectsample >= 2 ? (0.5 * ($bref * $bhet + $bmut * $bhet) + $bref * $bmut) / ($beffectsample * ($beffectsample - 1) / 2) : 0; push @api, $api_v; push @bpi, $bpi_v; my $allpi_v = (0.5 * ($aref * $bhet + $bref * $ahet + $amut * $bhet + $bmut * $ahet) + $aref * $bmut + $bref * $amut) / ($aeffectsample * $beffectsample); push @allpi, $allpi_v; push @avepi, ($api_v + $bpi_v) / 2; #push @piratio, $api_v == 0 ? 0 : $bpi_v / $api_v; # calculate fst #my $fst_v = 1 - ($api_v + $bpi_v) / 2 / $allpi_v; #push @fst, $fst_v; #calculate tajimad push @atajimad, $aeffectsample <= 3 ? 0 : ($api_v - $aomega_v) / &sdd($aeffectsample); push @btajimad, $beffectsample <= 3 ? 0 : ($bpi_v - $bomega_v) / &sdd($beffectsample); # calculate hp my ($amajor, $aminor, $bmajor, $bminor) = (0) x 4; if($aref >= $amut) { $amajor = $aref * 2 + $ahet; $aminor = $amut * 2 + $ahet; }else{ $amajor = $amut * 2 + $ahet; $aminor = $aref * 2 + $ahet; } if($bref >= $bmut) { $bmajor = $bref * 2 + $bhet; $bminor = $bmut * 2 + $bhet; }else{ $bmajor = $bmut * 2 + $bhet; $bminor = $bref * 2 + $bhet; } push @ahp, 2 * $amajor * $aminor / (($amajor + $aminor) ** 2); push @bhp, 2 * $bmajor * $bminor / (($bmajor + $bminor) ** 2); } my $final_fst = &ave(@allpi) != 0 ? 1 - &ave(@avepi) / &ave(@allpi) : 0; if($final_fst >= 1) { $final_fst = "1.0000"; }elsif($final_fst <= 0){ $final_fst = "0.0000"; }else{ $final_fst = sprintf("%.4f", $final_fst); } my $piratio = (&ave(@api))==0 ? 0 : &ave(@bpi)/&ave(@api); my $o = join("\t", "$_\t$rawlen", &ave(@aomega), &ave(@bomega), &ave(@api), &ave(@bpi), $piratio, &ave(@allpi), $final_fst, &ave(@atajimad), &ave(@btajimad), &ave(@ahp), &ave(@bhp)); print OO $o."\n"; } } sub ave{ my (@a) = @_; my ($sum) = (0) x 1; foreach my $i(@a) { $sum += $i; } return sprintf("%.6f", $sum / $opts{w}); } sub sdd{ my ($n) = @_; my ($a1, $a2); for(my $i = 1; $i < $n; $i ++) { $a1 += 1 / $i; $a2 += 1 / ($i * $i); } my $b1 = ($n + 1) / (3 * ($n - 1)); my $b2 = 2 * ($n * $n + $n + 3) / (9 * $n * ($n - 1)); my $c1 = $b1 - 1 / $a1; my $c2 = $b2 - ($n + 2) / ($a1 * $n) + $a2 / ($a1 * $a1); my $e1 = $c1 / $a1; my $e2 = $c2 / ($a1 * $a1 + $a2); my $value = sqrt($e1 * $n + $e2 * $n * ($n - 1)); return $value; } sub tt{ my ($N) = @_; my $sum; for(my $i = $N; $i > 0; $i --) { $sum += 1 / $i; } return $sum; }