加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 大数据 > 正文

基因数据处理17之使用scala对BWA运行结果进行各阶段程序时间提取

发布时间:2020-12-14 01:59:20 所属栏目:大数据 来源:网络整理
导读:提取代码: package testimport scala.io.Source import java.io.File._import java.io.PrintWriterobject logPatternBwaAll extends App {val out=new PrintWriter("file/bwaResult/allbwa.txt")val files = (new java.io.File("file/allbwa")).listFiles()



提取代码:

package test
import scala.io.Source  
import java.io.File._
import java.io.PrintWriter
object logPatternBwaAll extends App {
val out=new PrintWriter("file/bwaResult/allbwa.txt")
val files = (new java.io.File("file/allbwa")).listFiles()
  for (ifile <- files) {

val source = Source.fromFile(ifile).mkString  
println("*************")
val pattern = """(bwa aln|bwa samse)[^:]+:s*([0-9]*.[0-9]*)[^:]+:s*([0-9]*.[0-9]*)""".r 
val pattern2="""[bwa_aln_core]s*([0-9]+)s+sequences have been processed.n[main]""".r

 val b1=for(pattern(s1,num1,num2)<-pattern.findAllIn(source)) yield (s1,num2)
 val b2=for(pattern2(seq)<-pattern2.findAllIn(source)) yield (seq)
   

val b22=b2.toArray
val b11=b1.toArray

 println("file:"+ifile+" i: "+b11.length+" j:"+b22.length);
 for(k<-0 until b22.length) {
//    println(b22(k)+","+b11(k)._1+","+b11(k)._2+","+b11(k)._3)
    out.println(b22(k)+","+b11(k)._3)
 }
// source.
 }
 out.close()
 
// for(i<- 1 to 10;j<-1 to 4){
//   if (sum==10){
//    println("i: "+i+" j:"+j);
//    }else{
//      sum=sum+1
//    }
   
// }
//   val i = Array(1 to 10)
//   val j=Array(1 to 10)
//   val k1=for(k<-0 until i.length) yield (i(k),j(k))
//   k1.foreach(println)
//   for (k<-0 until i)

}

文件略

运行结果:

*************
file:fileallbwaallbwaMcnode4201503152256.txt i: 80 j:80
*************
file:fileallbwaallbwaMcnode5201503152257.txt i: 48 j:48


文件:

1000000,bwa aln,2857.149,2635.091
1000000,bwa samse,636.521,44.126
100000,314.582,266.236
100000,65.879,8.278
10000,66.274,29.478
10000,57.813,6.455
1000,51.385,5.337
1000,60.005,6.191
100,64.015,3.698
100,57.623,6.245
10,43.264,3.674
10,56.932,6.122
1376701,3439.502,3368.835
1376701,954.696,63.026
1,46.101,4.239
1,105.631,7.134
1000000,2657.866,2596.362
1000000,641.605,44.599
100000,311.628,267.335
100000,80.114,8.553
10000,75.529,29.058
10000,68.253,6.805
1000,35.660,4.992
1000,68.479,6.400
100,68.807,3.467
100,60.773,6.473
10,36.554,3.394
10,58.565,6.227
1376701,3340.227,3291.661
1376701,965.292,64.123
1,50.033,4.259
1,97.036,2705.061,2602.329
1000000,746.309,45.121
100000,337.389,267.026
100000,128.688,8.410
10000,63.781,29.456
10000,63.852,6.370
1000,74.131,5.352
1000,61.345,6.322
100,36.548,3.718
100,59.413,6.290
10,40.851,3.534
10,58.213,6.190
1376701,3415.936,3352.930
1376701,959.164,62.181
1,61.348,4.295
1,69.598,7.006
1000000,2645.401,2596.651
1000000,699.306,45.061
100000,344.852,268.535
100000,80.399,8.396
10000,75.223,29.874
10000,63.489,6.645
1000,39.429,5.417
1000,88.568,6.457
100,38.652,3.785
100,107.520,6.421
10,37.600,3.740
10,61.169,6.344
1376701,3440.678,3392.082
1376701,907.994,63.867
1,77.315,4.379
1,88.482,7.261
1000000,2638.026,2591.768
1000000,677.774,45.411
100000,341.549,267.942
100000,69.476,8.002
10000,71.341,28.999
10000,92.539,6.592
1000,37.764,5.038
1000,85.393,6.309
100,36.931,3.441
100,55.189,6.164
10,36.591,3.376
10,60.511,6.233
1376701,3374.169,3306.679
1376701,904.889,62.845
1,45.566,4.285
1,105.423,7.195
1000000,2594.796,2550.208
1000000,351.171,39.139
100000,287.048,254.777
100000,56.808,7.123
10000,58.718,28.255
10000,53.658,5.820
1000,35.007,4.719
1000,51.336,5.897
100,31.942,2.995
100,49.557,5.772
10,31.886,3.022
10,48.025,5.613
1376701,3311.054,3263.967
1376701,406.588,56.095
1,34.313,3.296
1,48.968,5.889
1000000,2563.812,2518.568
1000000,274.543,39.424
100000,289.985,258.128
100000,54.027,7.268
10000,57.232,28.505
10000,51.642,5.992
1000,34.287,4.819
1000,54.404,5.852
100,31.889,3.098
100,50.533,5.624
10,30.825,3.039
10,48.792,5.689
1376701,3322.055,3278.166
1376701,401.298,54.739
1,33.126,3.158
1,48.648,5.679
1000000,2632.776,2550.853
1000000,274.748,38.248
100000,288.116,255.824
100000,53.583,7.121
10000,55.970,28.218
10000,50.422,5.742
1000,31.810,4.685
1000,49.833,5.626
100,30.145,3.002
100,48.126,5.498
10,29.755,2.876
10,48.692,5.546
1376701,3330.966,3287.796
1376701,399.715,54.858
1,33.618,3.219
1,52.871,5.712

求和代码1:

package test
import scala.io.Source
import scala.collection.mutable.ArrayBuffer
import java.io.PrintWriter
object sumbwaall  {
    val out=new PrintWriter("file/bwaResult/sumallbwaAll.txt")
    val out2=new PrintWriter("file/bwaResult/sumallbwaTotal.txt")
    def main(args:Array[String]){
    val source = Source.fromFile("file/bwaResult/allbwa.txt")
//    val source = Source.fromFile("file/bwaResult/allbwa.txt")
    val i=source.getLines.toArray
    val it0=for(j<-i) yield j.split(",")
   
      val isort=it0.sortBy(_(0)).reverse
      var it1=(for(i<-it0) yield i(0)).distinct
      var it2=(for(i<-it0) yield i(1)).distinct
      val size =it1.length*it2.length

      var array1=Array.ofDim[String](size,5)
      var array2=Array.ofDim[String](it1.length,4)
      var sum1=0.0
      var sum2=0.0
      var arr1=0.0
      var arr2=0.0
      var sumk=0
      var arrk=0
      for (i<-0 until it1.length){
        for(j<-0 until it2.length){
          for(i1<-0 until it0.length){
            if((it0(i1)(0)==it1(i)) &&(it0(i1)(1)==it2(j))){
              sum1=sum1+it0(i1)(2).toDouble
              sum2=sum2+it0(i1)(3).toDouble
              arr1=arr1+it0(i1)(2).toDouble
              arr2=arr2+it0(i1)(3).toDouble
              sumk=sumk+1 
              arrk=arrk+1
            } 
          }
           array1(i*it2.length+j)(0) = it1(i)
           array1(i*it2.length+j)(1) = it2(j)
           array1(i*it2.length+j)(2)=(sum1/sumk).toString
           array1(i*it2.length+j)(3)=(sum2/sumk).toString
           array1(i*it2.length+j)(4)=sumk.toString
           sum1=0
           sum2=0
           sumk=0
        }
          array2(i)(0) = it1(i)
          array2(i)(1) = (it2.length*arr1/(arrk)).toString
          array2(i)(2) = (it2.length*arr2/arrk).toString
          array2(i)(3)=(arrk/it2.length).toString 
          arrk=0
          arr1=0
          arr2=0
      }
//      println(it1.length+" "+ it2.length+" size:"+ size+" array:"+array1.length);
      out.println("reads,name,RealTime,CPUTime,number");
      for(i<-0 until array1.length) {
        println(array1(i)(0)+","+array1(i)(1)+","+array1(i)(2)+","+array1(i)(3)+","+array1(i)(4));
        out.println(array1(i)(0)+","+array1(i)(4));
      }
      out2.println("reads,number");
      for(i<-0 until array2.length) {
        println(array2(i)(0)+","+array2(i)(1)+","+array2(i)(2)+","+array2(i)(3));
        out2.println(array2(i)(0)+","+array2(i)(3));
      }
      out.close()
      out2.close()
}
}


运行结果:

1000000,2661.8608749999994,2580.2287499999998,8
1000000,537.747125,42.641124999999995,8
100000,314.393625,263.225375,73.62174999999999,7.893875,8
10000,65.50850000000001,28.980375,62.7085,6.302624999999999,8
1000,42.434124999999995,5.044875,64.92037499999999,6.131749999999999,8
100,42.366125,3.4004999999999996,61.091750000000005,6.060874999999999,8
10,35.91575,3.331875,55.112375,5.995500000000001,8
1376701,3371.823375,3317.7644999999998,737.4544999999999,60.21674999999999,8
1,47.677499999999995,3.8912500000000003,77.08212499999999,6.626250000000001,3199.6079999999997,2622.8698749999994,388.015375,271.11924999999997,128.217,35.283,107.35449999999999,11.176625000000001,103.45787499999999,9.461375,91.028125,9.327375000000002,4109.277875,3377.9812500000003,124.75962499999999,10.517500000000002,8

再次求和(合并bwa aln &bwa samse):

package test
import scala.io.Source
import scala.collection.mutable.ArrayBuffer
import java.io.PrintWriter
object sumbwaallTotal  {
    val out=new PrintWriter("file/bwaResult/sumallbwa.txt")
    val out2=new PrintWriter("file/bwaResult/sumallbwaTotal.txt")
    def main(args:Array[String]){
    val source = Source.fromFile("file/bwaResult/allbwa.txt")
    val i=source.getLines.toArray
    val it0=for(j<-i) yield j.split(",4)
      var sum1=0.0
      var sum2=0.0
      var arr1=0.0
      var arr2=0.0
      var sumk=0
      var arrk=0
      for (i<-0 until it1.length){
        for(j<-0 until it2.length){
          for(i1<-0 until it0.length){
            if((it0(i1)(0)==it1(i)) &&(it0(i1)(1)==it2(j))){
              sum1=sum1+it0(i1)(2).toDouble
              sum2=sum2+it0(i1)(3).toDouble
              arr1=arr1+it0(i1)(2).toDouble
              arr2=arr2+it0(i1)(3).toDouble
              sumk=sumk+1 
              arrk=arrk+1
            } 
          }
           array1(i*it2.length+j)(0) = it1(i)
           array1(i*it2.length+j)(1) = it2(j)
           array1(i*it2.length+j)(2)=(sum1/sumk).toString
           array1(i*it2.length+j)(3)=(sum2/sumk).toString
           array1(i*it2.length+j)(4)=sumk.toString
           sum1=0
           sum2=0
           sumk=0
        }
          array2(i)(0) = it1(i)
          array2(i)(1) = (2*arr1/(arrk)).toString
          array2(i)(2) = (2*arr2/arrk).toString
          array2(i)(3)=arrk.toString 
          arrk=0
          arr1=0
          arr2=0
      }
//      println(it1.length+" "+ it2.length+" size:"+ size+" array:"+array1.length);
      out.println("reads,"+array2(i)(3));
      }
      out.close()
      out2.close()
}
}
运行结果:

reads,number
1000000,8

(编辑:李大同)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    推荐文章
      热点阅读