Perl--寻找两个基因组中的unique序列
发布时间:2020-12-16 00:00:26 所属栏目:大数据 来源:网络整理
导读:原始数据: 基因组的fasta文件,两个基因组之间blast的比对坐标 scaffold1 27 23 ??9022 ?? contig00005 ? ?238996 ??1 解决方法一: #!/usr/bin/perl -w use strict;use warnings;no strict "refs";my $usage = " perl $0 mutation-vs-wild.m8 mutation.fa
原始数据: 基因组的fasta文件,两个基因组之间blast的比对坐标 scaffold1 2723 ??9022 ??contig00005 ? ?238996 ??1 解决方法一: #!/usr/bin/perl -w use strict; use warnings; no strict "refs"; my $usage = " perl $0 mutation-vs-wild.m8 mutation.fa wild.fa n"; unless(@ARGV == 3){ die $usage; } my $aln = shift ; my $mut = shift; my $wild = shift; open ALN,"<",$aln or die "can't open $alnn"; #open MUT,$mut or die "can't open $mutn"; #open WILD,$wild or die "can't open $wildn"; my %mut = build_fa($mut); my %wild = build_fa($wild); coden(%mut); coden(%wild); while(<ALN>){ chomp; my ($scf,$st1,$ed1,$ctg,$st2,$ed2) = split"t",$_; coden_aln($scf,$ed1); coden_aln($ctg,$ed2); } my %want_mut = get_coord(%mut); my %want_wild = get_coord(%wild); get_fa_seq ("mut",%want_mut,%mut); get_fa_seq ("wild",%want_wild,%wild); sub build_fa { my $file = shift @_; my %fa; my $tit; open FA,$file or die "can't open $filen"; while(<FA>){ chomp; if(/>(.*)/){ $tit = $1; }else{ $fa{$tit} .= $_; } } return %fa; } sub coden { my $hash = shift @_; foreach my $key (keys %$hash){ foreach my $i (1 .. length $hash->{$key}){ ${$key}[$i] = 0; } } } sub coden_aln { my ($scf,$st,$ed) = @_; if($st > $ed){ ($st,$ed) = ($ed,$st); } foreach my $i ($st .. $ed){ ${$scf}[$i] =1; } } sub get_coord { my %want; my $hash = shift @_; my $flag; foreach my $key (keys %$hash){ my $start; my $end; if($key =~/scaffold/){ $flag = "mut"; }else{ $flag = "wild"; } foreach my $i (1.. length $hash->{$key}){ if($i == 1 && ${$key}[1] == 0){ $start = 1; }elsif(${$key}[$i] == 0 && ${$key}[$i-1] ==1){ $start = $i; } if($i == length $hash->{$key} && ${$key}[$i]==0 ){ $end = $i; }elsif(${$key}[$i] == 0 && ${$key}[$i+1] == 1){ $end = $i; } if($start && $end && $start <= $end ){ $want{$key}{$start} = $end; } } } open MUT_CORD,">",$flag."_coord"; foreach my $key (keys %want){ foreach my $st (keys %{$want{$key}}){ my $end = $want{$key}{$st}; print MUT_CORD $key,"t",$want{$key}{$st},"n"; } } close MUT_CORD; return %want; } sub get_fa_seq { my ($flag,$want,$fa) = @_; open MUT_FA,$flag."_uniq.fa"; foreach my $key (keys %$fa){ foreach my $cord (keys %{$want->{$key}}){ my $start = $cord; my $end = $want->{$key}{$cord}; my $seq =substr ($fa->{$key},$start-1,$end-$start+1); print MUT_FA ">$key $start->$endn$seqn"; } } close MUT_FA; } (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |