远程blast的Perl脚本
发布时间:2020-12-15 23:45:39 所属栏目:大数据 来源:网络整理
导读:之前用blast2go太慢,于是自己写了个Perl脚本进行远程blast,可以将fasta文件拆分后同时运行,最后生成XML文件,可以直接导入blast2go软件,速度比blast2go-basic的快些。 代码块 #!/usr/bin/perl # 黄良博 2015-3-30 # 利用该脚本进行远程blast,生成XML格式
之前用blast2go太慢,于是自己写了个Perl脚本进行远程blast,可以将fasta文件拆分后同时运行,最后生成XML文件,可以直接导入blast2go软件,速度比blast2go-basic的快些。 代码块#!/usr/bin/perl
# 黄良博 2015-3-30
# 利用该脚本进行远程blast,生成XML格式(m7)文件;
# 需要安装XML::SAX模块
use v5.16;
use warnings;
use Bio::Tools::Run::RemoteBlast;
use Getopt::Long;
use File::Basename;
my $basename=basename($0);
my %opt;
GetOptions(%opt,"program:s","database:s","file:s","evalue","v:i","b:i","gi","help");
my $help=<<USAGE;
Please enter parameters:
-help print help information
-program string [ blastp,blastn,blastx,tblastn,tblastx ;default: blastp ]
-database string [ swissprot,nr,nt,etc... ]
-file string [ fasta,contains one or more fasta format sequences ]
-evalue string [ default: '1e-5' ]
-v Integer[ Number of database sequences to show one-line descriptions for (V); default:10 ]
-b Integer[ Number of database sequence to show alignments for (B); default:10 ]
-gi [T/F] [ Show GI in deflines,default:T ]
usage:
perl $basename -p program -d database -f file [-e e-value] [-b 5] [-v 5]
USAGE
if($opt{help} or keys %opt < 3){
say $help;
exit;
}
my $prog = $opt{program};
my $db = $opt{database};
my $file = $opt{file};
my $e_val = $opt{evalue}||'1e-5';
my $description = $opt{v}||10;
my $alignment = $opt{b}||10;
my $gi = $opt{gi}||'T';
my @params = ( '-prog' => $prog,'-data' => $db,'-expect' => $e_val,'-readmethod' => 'xml' );
my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
#$factory->retrieve_parameter('FORMAT_TYPE','XML');
#change a query paramter
#$Bio::Tools::Run::RemoteBlast::HEADER{'ENTREZ_QUERY'} = 'Homo sapiens [ORGN]';
#$Bio::Tools::Run::RemoteBlast::HEADER{'MATRIX_NAME'} = 'PAM30';
#$Bio::Tools::Run::RemoteBlast::HEADER{'GAPCOSTS'} = '9 1';
#$Bio::Tools::Run::RemoteBlast::HEADER{'WORD_SIZE'} = '2';
#Have to request the blast with the right amount of alignments,
#$Bio::Tools::Run::RemoteBlast::HEADER{'ALIGNMENTS'} = '1000';
#$Bio::Tools::Run::RemoteBlast::HEADER{'DESCRIPTIONS'} = '1000';
#change a retrieval parameter
$Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{'DESCRIPTIONS'} = $description;
$Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{'NCBI_GI'} = $gi=~/f/i?'no':'yes';
$Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{'ALIGNMENTS'} = $alignment;
$Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{'FORMAT_TYPE'} = 'XML';
#remove a parameter
#delete $Bio::Tools::Run::RemoteBlast::HEADER{'FILTER'};
#$v is just to turn on and off the messages
#$v is just a way to either print out messages or not. So you can toggle whether you want the messages like "waiting ..."
my $v = 1;
my $str = Bio::SeqIO->new(-file=>$file,-format => 'fasta' );
my $num = 1;
while (my $input = $str->next_seq()){
#Blast a sequence against a database:
#Alternatively,you could pass in a file with many
#sequences rather than loop through sequence one at a time
#Remove the loop starting 'while (my $input = $str->next_seq())'
#and swap the two lines below for an example of that.
my $r = $factory->submit_blast($input);
print STDERR "n$num: waiting...n" if( $v > 0 );
$num ++;
while ( my @rids = $factory->each_rid ) {
foreach my $rid ( @rids ) {
my $rc = $factory->retrieve_blast($rid);
if( !ref($rc) ) {
if( $rc < 0 ) {
$factory->remove_rid($rid);
}
print STDERR "." if ( $v > 0 );
sleep 5;
} else {
my $result = $rc->next_result();
#save the output
my $filename = $result->query_name().".xml";
$factory->save_output($filename);
`cat $filename >> blastRemoteResult.xml; rm $filename`;
$factory->remove_rid($rid);
}
}
}
}
(编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |