scala – 无法通过Elasticsearch-hadoop库在多重火花节点上的RDD
发布时间:2020-12-16 18:36:49 所属栏目:安全 来源:网络整理
导读:import org.elasticsearch.spark._import org.apache.spark.{SparkConf,SparkContext}import org.apache.spark.serializer._;import com.esotericsoftware.kryo.Kryo;import org.elasticsearch.spark.rdd.EsSpark sc.stop()val conf = new SparkConf()conf.s
import org.elasticsearch.spark._ import org.apache.spark.{SparkConf,SparkContext} import org.apache.spark.serializer._; import com.esotericsoftware.kryo.Kryo; import org.elasticsearch.spark.rdd.EsSpark sc.stop() val conf = new SparkConf() conf.set("es.index.auto.create","true") conf.set("spark.serializer",classOf[KryoSerializer].getName) conf.set("es.nodes","localhost") val sc = new SparkContext(conf) val getAllQuery = "{"query":{"match_all":{}}}" val esRDDAll = sc.esRDD("test-index/typeA",getAllQuery) //WORKS esRDDAll.count //WORKS EsSpark.saveToEs(esRDDAll,"output-index/typeB") val esRDDMap = esRDDAll.map(r => r) //FAILS esRDDMap.count //FAILS EsSpark.saveToEs(esRDDMap,"output-index/typeB") 我得到的错误是: WARN TaskSetManager: Lost task 0.0 in stage 4.0 (TID 41,localhost): java.lang.ClassNotFoundException: $line594.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1 at java.net.URLClassLoader.findClass(URLClassLoader.java:381) at java.lang.ClassLoader.loadClass(ClassLoader.java:424) at java.lang.ClassLoader.loadClass(ClassLoader.java:357) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:348) at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:68) at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1613) at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1518) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1774) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351) at java.io.ObjectInputStream.readObject(ObjectInputStream.java:371) at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76) at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 注意 这只发生在我在Spark中使用主从模式时.在单个节点上它工作正常. 解决方法
我遇到过类似的问题,我希望这两个链接可能会有所帮助:
> SPARK-4170 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |