加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 大数据 > 正文

perl novel可变剪接识别(3)

发布时间:2020-12-16 00:15:08 所属栏目:大数据 来源:网络整理
导读:好吧,该到最后的时刻了,识别novel可变剪接的类型,首先要在原理上判断类型的情况,把类型画出来比较好一些: 以上即为大致的分类,不过有两点要说明的:intron retained 1用tophat的结果无法实现,这个看覆盖度即可;另外还有两个分类:intergenic和other

好吧,该到最后的时刻了,识别novel可变剪接的类型,首先要在原理上判断类型的情况,把类型画出来比较好一些:

以上即为大致的分类,不过有两点要说明的:intron retained 1用tophat的结果无法实现,这个看覆盖度即可;另外还有两个分类:intergenic和other,顾名思义基因间区域内的和无法识别的,当然也可以按照其他分类,就不多说了,就以此为例吧:

#!/usr/bin/env perl
use warnings;
use strict;

my %hash;
open GTF,$ARGV[0] or die $!;
while(<GTF>)
{
	chomp;
	my @tmp = split;
	my ($a,$b) = (split /-/,$tmp[2])[0,1];
	$hash{$tmp[0]}{$tmp[1]}{'pn'} = $tmp[3];
	$hash{$tmp[0]}{$tmp[1]}{'rs'} = $a;
	$hash{$tmp[0]}{$tmp[1]}{'re'} = $b;
	$hash{$tmp[0]}{$tmp[1]}{'type'} = $tmp[4];
	my @a = split /,/,$tmp[5];
	my @b = split /,$tmp[6];
	@{$hash{$tmp[0]}{$tmp[1]}{'start'}} = @a;
	@{$hash{$tmp[0]}{$tmp[1]}{'end'}} = @b;
}
close GTF;

open NOVEL,$ARGV[1] or die $!;
print "#chrtsplice_starttsplice_endtsplice_strandtsplice_readstsplice_typetgene_idtgene_strandtgene_typen";
while(<NOVEL>)
{
	chomp;
	my @tmp = split;
	my $js = $tmp[1] + $tmp[5];
	my $je = $tmp[2] - $tmp[6] + 1;
	my ($chr,$read,$jpn) = @tmp[0,3,4];
	my $flag = 0;
	foreach my $t(keys %{$hash{$chr}})
	{
		if($je < $hash{$chr}{$t}{'rs'} or $js > $hash{$chr}{$t}{'re'})
		{
			next;
		}else{
			$flag = 1;
			if($js < $hash{$chr}{$t}{'rs'} and $je >= $hash{$chr}{$t}{'rs'})
			{
				if($hash{$chr}{$t}{'pn'} eq '+')
				{
					if($hash{$chr}{$t}{'type'} eq 0)
					{
						print "$chrt$jst$jet$jpnt$readtOthert$tt+tNonCodingn";
					}else{
						print "$chrt$jst$jet$jpnt$readt5UTRt$tt+tCodingn";
					}
				}else{
					if($hash{$chr}{$t}{'type'} eq 0)
					{
						print "$chrt$jst$jet$jpnt$readtOthert$tt-tNonCodingn";
					}else{
						print "$chrt$jst$jet$jpnt$readt3UTRt$tt-tCodingn";
					}
				}
				last;
			}elsif($js <= $hash{$chr}{$t}{'re'} and $je > $hash{$chr}{$t}{'re'}){
				if($hash{$chr}{$t}{'pn'} eq '+')
				{
					if($hash{$chr}{$t}{'type'} eq 0)
					{
						print "$chrt$jst$jet$jpnt$readtOthert$tt+tNonCodingn";
					}else{
						print "$chrt$jst$jet$jpnt$readt3UTRt$tt+tCodingn";
					}
				}else{
					if($hash{$chr}{$t}{'type'} eq 0)
					{
						print "$chrt$jst$jet$jpnt$readtOthert$tt-tNonCodingn";
					}else{
						print "$chrt$jst$jet$jpnt$readt5UTRt$tt-tCodingn";
					}
				}
				last;
			}else{
				my ($js_stat,$je_stat) = (0,0);
				foreach my $i(@{$hash{$chr}{$t}{'end'}})
				{
					$js_stat = 1 if($js eq $i);
				}
				foreach my $j(@{$hash{$chr}{$t}{'start'}})
				{
					$je_stat = 1 if($je eq $j);
				}
				if($js_stat eq 1 and $je_stat eq 1)
				{
					if($hash{$chr}{$t}{'type'} eq 0)
					{
						print "$chrt$jst$jet$jpnt$readtESt$tt$hash{$chr}{$t}{'pn'}tNonCodingn";
					}else{
						print "$chrt$jst$jet$jpnt$readtESt$tt$hash{$chr}{$t}{'pn'}tCodingn";
					}
				}elsif($js_stat eq 1){
					if($hash{$chr}{$t}{'pn'} eq '+')
					{
						if($hash{$chr}{$t}{'type'} eq 0)
						{
							print "$chrt$jst$jet$jpnt$readt3St$tt+tNonCodingn";
						}else{
							print "$chrt$jst$jet$jpnt$readt3St$tt+tCodingn";
						}
					}else{
						if($hash{$chr}{$t}{'type'} eq 0)
						{
							print "$chrt$jst$jet$jpnt$readt5St$tt-tNonCodingn";
						}else{
							print "$chrt$jst$jet$jpnt$readt5St$tt-tCodingn";
						}
					}
				}elsif($je_stat eq 1){
					if($hash{$chr}{$t}{'pn'} eq '+')
					{
						if($hash{$chr}{$t}{'type'} eq 0)
						{
							print "$chrt$jst$jet$jpnt$readt5St$tt+tNonCodingn";
						}else{
							print "$chrt$jst$jet$jpnt$readt5St$tt+tCodingn";
						}
					}else{
						if($hash{$chr}{$t}{'type'} eq 0)
						{
							print "$chrt$jst$jet$jpnt$readt3St$tt-tNonCodingn";
						}else{
							print "$chrt$jst$jet$jpnt$readt3St$tt-tCodingn";
						}
					}
				}else{
					my $f2 = 0;
					for(my $i = 0; $i < @{$hash{$chr}{$t}{'end'}}; $i ++)
					{
						if($js >= $hash{$chr}{$t}{'start'}->[$i] and $je <= $hash{$chr}{$t}{'end'}->[$i])
						{
							$f2 = 1;
							if($hash{$chr}{$t}{'type'} eq 0)
							{
								print "$chrt$jst$jet$jpnt$readtIRt$tt$hash{$chr}{$t}{'pn'}tNonCodingn";
							}else{
								print "$chrt$jst$jet$jpnt$readtIRt$tt$hash{$chr}{$t}{'pn'}tCodingn";
							}
							last;
						}
					}
					if($f2 eq 0)
					{
						if($hash{$chr}{$t}{'type'} eq 0)
						{
							print "$chrt$jst$jet$jpnt$readtOthert$tt$hash{$chr}{$t}{'pn'}tNonCodingn";
						}else{
							print "$chrt$jst$jet$jpnt$readtOthert$tt$hash{$chr}{$t}{'pn'}tCodingn";
						}
					}
				}
				last;
			}
		}
	}
	if($flag eq 0)
	{
		print "$chrt$jst$jet$jpnt$readtIntergenict.t.t.n";
	}
}
close NOVEL;

(编辑:李大同)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    推荐文章
      热点阅读