基因数据处理17之使用scala对BWA运行结果进行各阶段程序时间提取
发布时间:2020-12-14 01:59:20 所属栏目:大数据 来源:网络整理
导读:提取代码: package testimport scala.io.Source import java.io.File._import java.io.PrintWriterobject logPatternBwaAll extends App {val out=new PrintWriter("file/bwaResult/allbwa.txt")val files = (new java.io.File("file/allbwa")).listFiles()
提取代码: package test import scala.io.Source import java.io.File._ import java.io.PrintWriter object logPatternBwaAll extends App { val out=new PrintWriter("file/bwaResult/allbwa.txt") val files = (new java.io.File("file/allbwa")).listFiles() for (ifile <- files) { val source = Source.fromFile(ifile).mkString println("*************") val pattern = """(bwa aln|bwa samse)[^:]+:s*([0-9]*.[0-9]*)[^:]+:s*([0-9]*.[0-9]*)""".r val pattern2="""[bwa_aln_core]s*([0-9]+)s+sequences have been processed.n[main]""".r val b1=for(pattern(s1,num1,num2)<-pattern.findAllIn(source)) yield (s1,num2) val b2=for(pattern2(seq)<-pattern2.findAllIn(source)) yield (seq) val b22=b2.toArray val b11=b1.toArray println("file:"+ifile+" i: "+b11.length+" j:"+b22.length); for(k<-0 until b22.length) { // println(b22(k)+","+b11(k)._1+","+b11(k)._2+","+b11(k)._3) out.println(b22(k)+","+b11(k)._3) } // source. } out.close() // for(i<- 1 to 10;j<-1 to 4){ // if (sum==10){ // println("i: "+i+" j:"+j); // }else{ // sum=sum+1 // } // } // val i = Array(1 to 10) // val j=Array(1 to 10) // val k1=for(k<-0 until i.length) yield (i(k),j(k)) // k1.foreach(println) // for (k<-0 until i) } 文件略 运行结果: ************* file:fileallbwaallbwaMcnode4201503152256.txt i: 80 j:80 ************* file:fileallbwaallbwaMcnode5201503152257.txt i: 48 j:48 1000000,bwa aln,2857.149,2635.091 1000000,bwa samse,636.521,44.126 100000,314.582,266.236 100000,65.879,8.278 10000,66.274,29.478 10000,57.813,6.455 1000,51.385,5.337 1000,60.005,6.191 100,64.015,3.698 100,57.623,6.245 10,43.264,3.674 10,56.932,6.122 1376701,3439.502,3368.835 1376701,954.696,63.026 1,46.101,4.239 1,105.631,7.134 1000000,2657.866,2596.362 1000000,641.605,44.599 100000,311.628,267.335 100000,80.114,8.553 10000,75.529,29.058 10000,68.253,6.805 1000,35.660,4.992 1000,68.479,6.400 100,68.807,3.467 100,60.773,6.473 10,36.554,3.394 10,58.565,6.227 1376701,3340.227,3291.661 1376701,965.292,64.123 1,50.033,4.259 1,97.036,2705.061,2602.329 1000000,746.309,45.121 100000,337.389,267.026 100000,128.688,8.410 10000,63.781,29.456 10000,63.852,6.370 1000,74.131,5.352 1000,61.345,6.322 100,36.548,3.718 100,59.413,6.290 10,40.851,3.534 10,58.213,6.190 1376701,3415.936,3352.930 1376701,959.164,62.181 1,61.348,4.295 1,69.598,7.006 1000000,2645.401,2596.651 1000000,699.306,45.061 100000,344.852,268.535 100000,80.399,8.396 10000,75.223,29.874 10000,63.489,6.645 1000,39.429,5.417 1000,88.568,6.457 100,38.652,3.785 100,107.520,6.421 10,37.600,3.740 10,61.169,6.344 1376701,3440.678,3392.082 1376701,907.994,63.867 1,77.315,4.379 1,88.482,7.261 1000000,2638.026,2591.768 1000000,677.774,45.411 100000,341.549,267.942 100000,69.476,8.002 10000,71.341,28.999 10000,92.539,6.592 1000,37.764,5.038 1000,85.393,6.309 100,36.931,3.441 100,55.189,6.164 10,36.591,3.376 10,60.511,6.233 1376701,3374.169,3306.679 1376701,904.889,62.845 1,45.566,4.285 1,105.423,7.195 1000000,2594.796,2550.208 1000000,351.171,39.139 100000,287.048,254.777 100000,56.808,7.123 10000,58.718,28.255 10000,53.658,5.820 1000,35.007,4.719 1000,51.336,5.897 100,31.942,2.995 100,49.557,5.772 10,31.886,3.022 10,48.025,5.613 1376701,3311.054,3263.967 1376701,406.588,56.095 1,34.313,3.296 1,48.968,5.889 1000000,2563.812,2518.568 1000000,274.543,39.424 100000,289.985,258.128 100000,54.027,7.268 10000,57.232,28.505 10000,51.642,5.992 1000,34.287,4.819 1000,54.404,5.852 100,31.889,3.098 100,50.533,5.624 10,30.825,3.039 10,48.792,5.689 1376701,3322.055,3278.166 1376701,401.298,54.739 1,33.126,3.158 1,48.648,5.679 1000000,2632.776,2550.853 1000000,274.748,38.248 100000,288.116,255.824 100000,53.583,7.121 10000,55.970,28.218 10000,50.422,5.742 1000,31.810,4.685 1000,49.833,5.626 100,30.145,3.002 100,48.126,5.498 10,29.755,2.876 10,48.692,5.546 1376701,3330.966,3287.796 1376701,399.715,54.858 1,33.618,3.219 1,52.871,5.712 求和代码1: package test import scala.io.Source import scala.collection.mutable.ArrayBuffer import java.io.PrintWriter object sumbwaall { val out=new PrintWriter("file/bwaResult/sumallbwaAll.txt") val out2=new PrintWriter("file/bwaResult/sumallbwaTotal.txt") def main(args:Array[String]){ val source = Source.fromFile("file/bwaResult/allbwa.txt") // val source = Source.fromFile("file/bwaResult/allbwa.txt") val i=source.getLines.toArray val it0=for(j<-i) yield j.split(",") val isort=it0.sortBy(_(0)).reverse var it1=(for(i<-it0) yield i(0)).distinct var it2=(for(i<-it0) yield i(1)).distinct val size =it1.length*it2.length var array1=Array.ofDim[String](size,5) var array2=Array.ofDim[String](it1.length,4) var sum1=0.0 var sum2=0.0 var arr1=0.0 var arr2=0.0 var sumk=0 var arrk=0 for (i<-0 until it1.length){ for(j<-0 until it2.length){ for(i1<-0 until it0.length){ if((it0(i1)(0)==it1(i)) &&(it0(i1)(1)==it2(j))){ sum1=sum1+it0(i1)(2).toDouble sum2=sum2+it0(i1)(3).toDouble arr1=arr1+it0(i1)(2).toDouble arr2=arr2+it0(i1)(3).toDouble sumk=sumk+1 arrk=arrk+1 } } array1(i*it2.length+j)(0) = it1(i) array1(i*it2.length+j)(1) = it2(j) array1(i*it2.length+j)(2)=(sum1/sumk).toString array1(i*it2.length+j)(3)=(sum2/sumk).toString array1(i*it2.length+j)(4)=sumk.toString sum1=0 sum2=0 sumk=0 } array2(i)(0) = it1(i) array2(i)(1) = (it2.length*arr1/(arrk)).toString array2(i)(2) = (it2.length*arr2/arrk).toString array2(i)(3)=(arrk/it2.length).toString arrk=0 arr1=0 arr2=0 } // println(it1.length+" "+ it2.length+" size:"+ size+" array:"+array1.length); out.println("reads,name,RealTime,CPUTime,number"); for(i<-0 until array1.length) { println(array1(i)(0)+","+array1(i)(1)+","+array1(i)(2)+","+array1(i)(3)+","+array1(i)(4)); out.println(array1(i)(0)+","+array1(i)(4)); } out2.println("reads,number"); for(i<-0 until array2.length) { println(array2(i)(0)+","+array2(i)(1)+","+array2(i)(2)+","+array2(i)(3)); out2.println(array2(i)(0)+","+array2(i)(3)); } out.close() out2.close() } } 1000000,2661.8608749999994,2580.2287499999998,8 1000000,537.747125,42.641124999999995,8 100000,314.393625,263.225375,73.62174999999999,7.893875,8 10000,65.50850000000001,28.980375,62.7085,6.302624999999999,8 1000,42.434124999999995,5.044875,64.92037499999999,6.131749999999999,8 100,42.366125,3.4004999999999996,61.091750000000005,6.060874999999999,8 10,35.91575,3.331875,55.112375,5.995500000000001,8 1376701,3371.823375,3317.7644999999998,737.4544999999999,60.21674999999999,8 1,47.677499999999995,3.8912500000000003,77.08212499999999,6.626250000000001,3199.6079999999997,2622.8698749999994,388.015375,271.11924999999997,128.217,35.283,107.35449999999999,11.176625000000001,103.45787499999999,9.461375,91.028125,9.327375000000002,4109.277875,3377.9812500000003,124.75962499999999,10.517500000000002,8 再次求和(合并bwa aln &bwa samse): package test import scala.io.Source import scala.collection.mutable.ArrayBuffer import java.io.PrintWriter object sumbwaallTotal { val out=new PrintWriter("file/bwaResult/sumallbwa.txt") val out2=new PrintWriter("file/bwaResult/sumallbwaTotal.txt") def main(args:Array[String]){ val source = Source.fromFile("file/bwaResult/allbwa.txt") val i=source.getLines.toArray val it0=for(j<-i) yield j.split(",4) var sum1=0.0 var sum2=0.0 var arr1=0.0 var arr2=0.0 var sumk=0 var arrk=0 for (i<-0 until it1.length){ for(j<-0 until it2.length){ for(i1<-0 until it0.length){ if((it0(i1)(0)==it1(i)) &&(it0(i1)(1)==it2(j))){ sum1=sum1+it0(i1)(2).toDouble sum2=sum2+it0(i1)(3).toDouble arr1=arr1+it0(i1)(2).toDouble arr2=arr2+it0(i1)(3).toDouble sumk=sumk+1 arrk=arrk+1 } } array1(i*it2.length+j)(0) = it1(i) array1(i*it2.length+j)(1) = it2(j) array1(i*it2.length+j)(2)=(sum1/sumk).toString array1(i*it2.length+j)(3)=(sum2/sumk).toString array1(i*it2.length+j)(4)=sumk.toString sum1=0 sum2=0 sumk=0 } array2(i)(0) = it1(i) array2(i)(1) = (2*arr1/(arrk)).toString array2(i)(2) = (2*arr2/arrk).toString array2(i)(3)=arrk.toString arrk=0 arr1=0 arr2=0 } // println(it1.length+" "+ it2.length+" size:"+ size+" array:"+array1.length); out.println("reads,"+array2(i)(3)); } out.close() out2.close() } }运行结果: reads,number 1000000,8 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |