加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 综合聚焦 > 服务器 > 安全 > 正文

scala – 无法通过Elasticsearch-hadoop库在多重火花节点上的RDD

发布时间:2020-12-16 18:36:49 所属栏目:安全 来源:网络整理
导读:import org.elasticsearch.spark._import org.apache.spark.{SparkConf,SparkContext}import org.apache.spark.serializer._;import com.esotericsoftware.kryo.Kryo;import org.elasticsearch.spark.rdd.EsSpark sc.stop()val conf = new SparkConf()conf.s
import org.elasticsearch.spark._
import org.apache.spark.{SparkConf,SparkContext}
import org.apache.spark.serializer._;
import com.esotericsoftware.kryo.Kryo;
import org.elasticsearch.spark.rdd.EsSpark 

sc.stop()

val conf = new SparkConf()
conf.set("es.index.auto.create","true")
conf.set("spark.serializer",classOf[KryoSerializer].getName)

conf.set("es.nodes","localhost")
val sc = new SparkContext(conf)

val getAllQuery = "{"query":{"match_all":{}}}"
val esRDDAll = sc.esRDD("test-index/typeA",getAllQuery)

//WORKS
esRDDAll.count

//WORKS
EsSpark.saveToEs(esRDDAll,"output-index/typeB")

val esRDDMap = esRDDAll.map(r => r)

//FAILS
esRDDMap.count

//FAILS
EsSpark.saveToEs(esRDDMap,"output-index/typeB")

我得到的错误是:

WARN TaskSetManager: Lost task 0.0 in stage 4.0 (TID 41,localhost): java.lang.ClassNotFoundException: $line594.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1
        at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
        at java.lang.Class.forName0(Native Method)
        at java.lang.Class.forName(Class.java:348)
        at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:68)
        at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1613)
        at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1518)
        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1774)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
        at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76)
        at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
        at org.apache.spark.scheduler.Task.run(Task.scala:89)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)

注意

这只发生在我在Spark中使用主从模式时.在单个节点上它工作正常.

解决方法

我遇到过类似的问题,我希望这两个链接可能会有所帮助:

> SPARK-4170
> SPARK-2175

(编辑:李大同)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    推荐文章
      热点阅读