use Perl analysis BLAST data
发布时间:2020-12-16 00:28:43 所属栏目:大数据 来源:网络整理
导读:use Perl analysis BLAST data #!/usr/bin/perluse 5.010;use strict;say "Demo : use perl process BLAST's data : search a large sequence,and find the sepcial sequence .";my $REPORT_FILE = "report.txt";my $blast_file = $ARGV[0] || 'todo';# what
use Perl analysis BLAST data #!/usr/bin/perl use 5.010; use strict; say "Demo : use perl process BLAST's data : search a large sequence,and find the sepcial sequence ."; my $REPORT_FILE = "report.txt"; my $blast_file = $ARGV[0] || 'todo'; # what's || ? unless (-e $blast_file){ die "$0 : ERROR : missing file :$blast_file"; }else{ say "get $blast_file okay !"; } my ($query_src,$sbjct_src); open(IN,$blast_file) or die "$0,ERROR : $blast_file : $!"; while(my $line =<IN>){ chomp $line; say "processing line $ : $line."; my @words = split /s+/,$line; if($line =~ /^Query/){ $query_src .= $words[2]; }elsif($line =~ /^Sbjct/){ $sbjct_src .= $words[2]; } } close IN; my @patterns =('gagcc','caat','cat','tcggga','-'); my (%query_counts,%sbjct_counts); foreach my $pattern (@patterns){ while ($query_src =~ /$pattern/g){ $query_counts{$pattern}++; say "find $pattern for query!"; } while ($sbjct_src =~ /$pattern/g){ $sbjct_counts{$pattern}++; say "find $pattern for sbjct!"; } } open (OUT,">$REPORT_FILE") or die "$0: ERROR: Can't write $REPORT_FILE"; print OUT "Sequence Reportn","Run by Yummy on ",scalar localtime,"n","nNOTE: In the following reports,a dash (-) represents n"," missing data in the chromosomal sequence nn","Total length of 'Query' sequence: ",length $query_src," charactersn","Results for 'Query':n"; foreach my $key (sort @patterns){ print OUT "t'$key' seen $query_counts{$key}n"; } print OUT "nTotal length of 'Sbjct' sequence: ",length $sbjct_src,"Results for 'Sbjct':n"; foreach my $key (sort @patterns){ print OUT "t'$key' seen $sbjct_counts{$key}n"; } close OUT; __END__ and BLAST.data (just a demo ) Query: 1165 gagcccaggagttcaagaccagcctgggtaacatgatgaaacc-tcgtctctac 1217 Sbjct: 11895 gagctcaggagtttgagaccagcctggggaacacggtgaaaccctgtctctac 11843 Query: 1170 caggagttcaagaccagcctg 1190 Sbjct: 69962 caggag-ttcaagaccagcctg 69942 Query: 1106 tggtggctcacacctgcaatcccagcact 1134 Sbjct: 77363 tggtggctcacgcctgtaatcccagcact 77335for more information,you can have a reference from 《Developing_Bioinformatics_Computer_Skills》. by Yummy on Tue Feb? 7 23:06:18 2012 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |