perl novel可变剪接识别(3)
发布时间:2020-12-16 00:15:08 所属栏目:大数据 来源:网络整理
导读:好吧,该到最后的时刻了,识别novel可变剪接的类型,首先要在原理上判断类型的情况,把类型画出来比较好一些: 以上即为大致的分类,不过有两点要说明的:intron retained 1用tophat的结果无法实现,这个看覆盖度即可;另外还有两个分类:intergenic和other
好吧,该到最后的时刻了,识别novel可变剪接的类型,首先要在原理上判断类型的情况,把类型画出来比较好一些: 以上即为大致的分类,不过有两点要说明的:intron retained 1用tophat的结果无法实现,这个看覆盖度即可;另外还有两个分类:intergenic和other,顾名思义基因间区域内的和无法识别的,当然也可以按照其他分类,就不多说了,就以此为例吧: #!/usr/bin/env perl use warnings; use strict; my %hash; open GTF,$ARGV[0] or die $!; while(<GTF>) { chomp; my @tmp = split; my ($a,$b) = (split /-/,$tmp[2])[0,1]; $hash{$tmp[0]}{$tmp[1]}{'pn'} = $tmp[3]; $hash{$tmp[0]}{$tmp[1]}{'rs'} = $a; $hash{$tmp[0]}{$tmp[1]}{'re'} = $b; $hash{$tmp[0]}{$tmp[1]}{'type'} = $tmp[4]; my @a = split /,/,$tmp[5]; my @b = split /,$tmp[6]; @{$hash{$tmp[0]}{$tmp[1]}{'start'}} = @a; @{$hash{$tmp[0]}{$tmp[1]}{'end'}} = @b; } close GTF; open NOVEL,$ARGV[1] or die $!; print "#chrtsplice_starttsplice_endtsplice_strandtsplice_readstsplice_typetgene_idtgene_strandtgene_typen"; while(<NOVEL>) { chomp; my @tmp = split; my $js = $tmp[1] + $tmp[5]; my $je = $tmp[2] - $tmp[6] + 1; my ($chr,$read,$jpn) = @tmp[0,3,4]; my $flag = 0; foreach my $t(keys %{$hash{$chr}}) { if($je < $hash{$chr}{$t}{'rs'} or $js > $hash{$chr}{$t}{'re'}) { next; }else{ $flag = 1; if($js < $hash{$chr}{$t}{'rs'} and $je >= $hash{$chr}{$t}{'rs'}) { if($hash{$chr}{$t}{'pn'} eq '+') { if($hash{$chr}{$t}{'type'} eq 0) { print "$chrt$jst$jet$jpnt$readtOthert$tt+tNonCodingn"; }else{ print "$chrt$jst$jet$jpnt$readt5UTRt$tt+tCodingn"; } }else{ if($hash{$chr}{$t}{'type'} eq 0) { print "$chrt$jst$jet$jpnt$readtOthert$tt-tNonCodingn"; }else{ print "$chrt$jst$jet$jpnt$readt3UTRt$tt-tCodingn"; } } last; }elsif($js <= $hash{$chr}{$t}{'re'} and $je > $hash{$chr}{$t}{'re'}){ if($hash{$chr}{$t}{'pn'} eq '+') { if($hash{$chr}{$t}{'type'} eq 0) { print "$chrt$jst$jet$jpnt$readtOthert$tt+tNonCodingn"; }else{ print "$chrt$jst$jet$jpnt$readt3UTRt$tt+tCodingn"; } }else{ if($hash{$chr}{$t}{'type'} eq 0) { print "$chrt$jst$jet$jpnt$readtOthert$tt-tNonCodingn"; }else{ print "$chrt$jst$jet$jpnt$readt5UTRt$tt-tCodingn"; } } last; }else{ my ($js_stat,$je_stat) = (0,0); foreach my $i(@{$hash{$chr}{$t}{'end'}}) { $js_stat = 1 if($js eq $i); } foreach my $j(@{$hash{$chr}{$t}{'start'}}) { $je_stat = 1 if($je eq $j); } if($js_stat eq 1 and $je_stat eq 1) { if($hash{$chr}{$t}{'type'} eq 0) { print "$chrt$jst$jet$jpnt$readtESt$tt$hash{$chr}{$t}{'pn'}tNonCodingn"; }else{ print "$chrt$jst$jet$jpnt$readtESt$tt$hash{$chr}{$t}{'pn'}tCodingn"; } }elsif($js_stat eq 1){ if($hash{$chr}{$t}{'pn'} eq '+') { if($hash{$chr}{$t}{'type'} eq 0) { print "$chrt$jst$jet$jpnt$readt3St$tt+tNonCodingn"; }else{ print "$chrt$jst$jet$jpnt$readt3St$tt+tCodingn"; } }else{ if($hash{$chr}{$t}{'type'} eq 0) { print "$chrt$jst$jet$jpnt$readt5St$tt-tNonCodingn"; }else{ print "$chrt$jst$jet$jpnt$readt5St$tt-tCodingn"; } } }elsif($je_stat eq 1){ if($hash{$chr}{$t}{'pn'} eq '+') { if($hash{$chr}{$t}{'type'} eq 0) { print "$chrt$jst$jet$jpnt$readt5St$tt+tNonCodingn"; }else{ print "$chrt$jst$jet$jpnt$readt5St$tt+tCodingn"; } }else{ if($hash{$chr}{$t}{'type'} eq 0) { print "$chrt$jst$jet$jpnt$readt3St$tt-tNonCodingn"; }else{ print "$chrt$jst$jet$jpnt$readt3St$tt-tCodingn"; } } }else{ my $f2 = 0; for(my $i = 0; $i < @{$hash{$chr}{$t}{'end'}}; $i ++) { if($js >= $hash{$chr}{$t}{'start'}->[$i] and $je <= $hash{$chr}{$t}{'end'}->[$i]) { $f2 = 1; if($hash{$chr}{$t}{'type'} eq 0) { print "$chrt$jst$jet$jpnt$readtIRt$tt$hash{$chr}{$t}{'pn'}tNonCodingn"; }else{ print "$chrt$jst$jet$jpnt$readtIRt$tt$hash{$chr}{$t}{'pn'}tCodingn"; } last; } } if($f2 eq 0) { if($hash{$chr}{$t}{'type'} eq 0) { print "$chrt$jst$jet$jpnt$readtOthert$tt$hash{$chr}{$t}{'pn'}tNonCodingn"; }else{ print "$chrt$jst$jet$jpnt$readtOthert$tt$hash{$chr}{$t}{'pn'}tCodingn"; } } } last; } } } if($flag eq 0) { print "$chrt$jst$jet$jpnt$readtIntergenict.t.t.n"; } } close NOVEL; (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |