the perl script of Tajima's D (backups)
#!/usr/bin/perl use strict; use warnings; =c--------------------------------------- this perl script is edited to compute tajima's D the origin file path is : /ifs5/ST_COMG/USER/yanzengli/tajima/data/ChrB01/ChrB01.bb.snp /ifs5/ST_COMG/USER/yanzengli/tajima/data/ChrB01/ChrB01.bb.tajima /ifs5/ST_COMG/USER/yanzengli/tajima/data/filter.site.position/ChrB01.filter.comm.gz edited by bangemantou (yanzengli@genomics.org.cn) 2012-03-29 pm; =cut die "nUsage: compute tajima's D;ncomands:nperl $0 snp.data tajima.outfile filter.site.data;nn" if (@ARGV != 3); my $snpdb = shift; my $tajima_outfile = shift; my $filter_position = shift; open AA,$snpdb or die $!; open BB,">",$tajima_outfile or die $!; open (CC,"gzip -dc $filter_position|" ) or die $!; my $window = 50000; my $bin = 0.1; my $cout = int ( 1 / $bin ); my $step = $window * $bin; my $sample = 0; my %filter = (); my %pi = (); my %snp = (); while ( <CC> ) { my @tt = split /s+/; my $k = int ( $tt[1] / $step ); $filter{$k} ++; } close CC; my $chr_name; my $chrlen = 0; while ( <AA> ) { chomp; my @tt = split; my @line = @tt; $chr_name = $line[0]; $chrlen = $tt[1]; $sample = ($tt[6]+$tt[7]) / 2; my $k = int ( $tt[1] / $step ); $pi{$k} += pi ( $line[6],$line[7] ); if ( ($line[6]*$line[7]) != 0 ) { $snp{$k}++; } } close AA; print BB " chrnametpositiontsum.pi/filter.sitesttajima's Dtsum.snptfilter.siten"; my $window_num = int ( $chrlen / $step ); #the number of steps for ( my $i=0; $i<=($window_num-$cout); $i++ ) { my $sum_pi = 0; my $sum_snp = 0; my $filter_site = 0; my $end = $i + $cout - 1; for ( my $aa=$i; $aa<=$end; $aa++ ){ if ( !defined($pi{$aa}) ) { $pi{$aa} = 0; } if ( !defined($snp{$aa}) ) { $snp{$aa} = 0; } if ( !defined($filter{$aa}) ) { $filter{$aa} = 0; } $sum_pi += $pi{$aa}; $sum_snp += $snp{$aa}; $filter_site += $filter{$aa}; } my $d = tajima( $sample,$sum_pi,$sum_snp ); my $id = ($i + 1) * $step; my $mean_pi = 0; my $theta = 0; if ( $filter_site ) { $mean_pi = $sum_pi / $filter_site; } print BB "$chr_namet$idt$mean_pit$dt$sum_snpt$filter_siten"; } close BB; sub pi { my $a = $_[0]; my $b = $_[1]; if ( $a == 0 || $b == 0 ) { return 0; } my $pi = ( 2 * $a * $b ) / ( ($a+$b) * ($a+$b-1) ); return $pi; } sub tajima { my $n = $_[0]; my $pi = $_[1]; my $t = $_[2]; my $a1; my $a2; for ( my $i=1; $i<$n; $i++ ){ $a1 += (1 / $i); $a2 += (1 / ($i*$i)); } my $b1 = ($n + 1) / ( 3 * ($n-1) ); my $b2 = 2 * ($n*$n + $n + 3) / (9 * $n * ($n-1)); my $c1 = $b1 - (1 / $a1); my $c2 = $b2 - ($n + 2) / ($a1 * $n) + $a2 / ($a1*$a1); my $e1 = $c1 / $a1; my $e2 = $c2 / ($a1*$a1 + $a2); if ( $t == 0) { return "NA"; next; } my $d = ( $pi - ($t/$a1) ) / sqrt ( $e1 * $t + $e2 * $t * ($t-1) ); return $d; } 说明: 1、? snp.data format chr_name position reference_base ancestral_base major minor major_# minor_# HWE genotypes ChrB01? 945???? A?????? A?????? G?????? A?????? 6?????? 30????? 0.166728??????? GG/0.000003???? GG/0.000198???? AG/1.000000???? GG/0.000000 ChrB01? 946???? G?????? G?????? T?????? G?????? 6?????? 30????? 0.166734??????? TT/0.000003???? TT/0.000394???? GT/1.000000???? TT/0.000000 ChrB01? 1660??? G?????? G?????? A?????? G?????? 5?????? 31????? 0.138889??????? AA/0.000000???? AA/0.000000???? AG/1.000000???? AA/0.000000 this is the standard format of snp. 2、? filter.site.data ChrB01? 190 ChrB01? 191 ChrB01? 192 ChrB01? 193 3、the algorithm is from tajima's paper. [] Fumio Tajima,Statistical Method for Testing the Neutral Mutation Hypothesis by DNA Polymorphism (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |