Spark App exception running in IntelliJ IDEA - intellij-idea

Can someone help me out?
I am running a spark App in IntelliJ IDEA.
object MainDriver {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("Spark Sentiment Analysis").setMaster("local[2]")
val sc = new SparkContext(conf)
val posWords = sc.textFile("src/main/resources/Hu_Liu_positive_word_list.txt")
val negWords = sc.textFile("src/main/resources/Hu_Liu_negative_word_list.txt")
val nltkStopWords = sc.textFile("src/main/resources/stopwords/english")
val moreStopWds = sc.parallelize(List("cant", "didnt", "doesnt", "dont", "goes", "isnt", "hes",
"shes", "thats", "theres", "theyre", "wont", "youll", "youre",
"youve", "br", "ve", "re", "vs", "dick", "ginger", "hollywood",
"jack", "jill", "john", "karloff", "kudrow", "orson", "peter", "tcm",
"tom", "toni", "welles", "william", "wolheim", "nikita"))
val stopWordsRDD = (nltkStopWords union moreStopWds).filter(_ != "").cache()
val stopWordsList = sc.broadcast(stopWordsRDD.collect())
val inTrainUnsup = sc.wholeTextFiles("src/main/resources/reviews/train/unsup")
val parsedTrainUnsup = inTrainUnsup mapValues (
_ map {
case c: Char if Character.isLetterOrDigit(c) => c
case _ => ' '
}
split (" ")
filter (_.trim() != "")
filter (_.length() > 1)
map (_.toLowerCase())
filter (!stopWordsList.value.contains(_))
)
val wordFreqDist = parsedTrainUnsup flatMap {
case (x, y) => y
} map (w => (w, 1)) reduceByKey (_ + _)
val posItems = (posWords map ((_, -1))) join wordFreqDist mapValues { case (x, y) => y}
val sortedPosItems = posItems map (_.swap) sortByKey (false) map (_.swap) //This is not useful now...
val negItems = (negWords map ((_, -1))) join wordFreqDist mapValues { case (x, y) => y}
val sortedNegItems = negItems map (_.swap) sortByKey (false) map (_.swap) //This is not useful now...
//Get the top 25 hot items
//implicit val is for top(25), defining sort on the 2nd element
implicit val pairSortByValue = new Ordering[(String, Int)] {
override def compare(a: (String, Int), b: (String, Int)) = a._2 compare b._2
}
println("Top 25 positive words in unsup dataset")
posItems.top(25).foreach(println)
println("Top 25 negative words in unsup dataset")
negItems.top(25).foreach(println)
sc.stop()
}
}
This code runs well if I use spark-submit.
But it throws exceptions when I directly run it in IntelliJ IDEA (Menu: Run > Run...). After looking into it, it seems something is wrong with val inTrainUnsup = sc.wholeTextFiles("src/main/resources/reviews/train/unsup"), because when I simply do inTrainUnsup.saveAsTextFile("test file"), it throws same exception.
14/11/11 10:21:07 ERROR executor.Executor: Exception in task 0.0 in stage 1.0 (TID 4)
java.lang.RuntimeException: java.lang.reflect.InvocationTargetException
at org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.initNextRecordReader(CombineFileRecordReader.java:164)
at org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.<init>(CombineFileRecordReader.java:126)
at org.apache.spark.input.WholeTextFileInputFormat.createRecordReader(WholeTextFileInputFormat.scala:44)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:115)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:103)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:65)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
at org.apache.spark.scheduler.Task.run(Task.scala:54)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:895)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:918)
at java.lang.Thread.run(Thread.java:695)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
at org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.initNextRecordReader(CombineFileRecordReader.java:155)
... 16 more
Caused by: java.lang.IncompatibleClassChangeError: Found class org.apache.hadoop.mapreduce.TaskAttemptContext, but interface was expected
at org.apache.spark.input.WholeTextFileRecordReader.<init>(WholeTextFileRecordReader.scala:40)
... 21 more
14/11/11 10:21:07 ERROR executor.Executor: Exception in task 1.0 in stage 1.0 (TID 5)
java.lang.RuntimeException: java.lang.reflect.InvocationTargetException
at org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.initNextRecordReader(CombineFileRecordReader.java:164)
at org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.<init>(CombineFileRecordReader.java:126)
at org.apache.spark.input.WholeTextFileInputFormat.createRecordReader(WholeTextFileInputFormat.scala:44)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:115)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:103)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:65)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
at org.apache.spark.scheduler.Task.run(Task.scala:54)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:895)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:918)
at java.lang.Thread.run(Thread.java:695)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
at org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.initNextRecordReader(CombineFileRecordReader.java:155)
... 16 more
Caused by: java.lang.IncompatibleClassChangeError: Found class org.apache.hadoop.mapreduce.TaskAttemptContext, but interface was expected
at org.apache.spark.input.WholeTextFileRecordReader.<init>(WholeTextFileRecordReader.scala:40)
... 21 more
14/11/11 10:21:07 WARN scheduler.TaskSetManager: Lost task 1.0 in stage 1.0 (TID 5, localhost): java.lang.RuntimeException: java.lang.reflect.InvocationTargetException
org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.initNextRecordReader(CombineFileRecordReader.java:164)
org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.<init>(CombineFileRecordReader.java:126)
org.apache.spark.input.WholeTextFileInputFormat.createRecordReader(WholeTextFileInputFormat.scala:44)
org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:115)
org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:103)
org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:65)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
org.apache.spark.scheduler.Task.run(Task.scala:54)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177)
java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:895)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:918)
java.lang.Thread.run(Thread.java:695)
14/11/11 10:21:07 ERROR scheduler.TaskSetManager: Task 1 in stage 1.0 failed 1 times; aborting job
14/11/11 10:21:07 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool
14/11/11 10:21:07 INFO scheduler.TaskSetManager: Lost task 0.0 in stage 1.0 (TID 4) on executor localhost: java.lang.RuntimeException (java.lang.reflect.InvocationTargetException) [duplicate 1]
14/11/11 10:21:07 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool
14/11/11 10:21:07 INFO scheduler.TaskSchedulerImpl: Cancelling stage 1
14/11/11 10:21:07 INFO scheduler.DAGScheduler: Failed to run saveAsTextFile at MainDriver.scala:30
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 1.0 failed 1 times, most recent failure: Lost task 1.0 in stage 1.0 (TID 5, localhost): java.lang.RuntimeException: java.lang.reflect.InvocationTargetException
org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.initNextRecordReader(CombineFileRecordReader.java:164)
org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.<init>(CombineFileRecordReader.java:126)
org.apache.spark.input.WholeTextFileInputFormat.createRecordReader(WholeTextFileInputFormat.scala:44)
org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:115)
org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:103)
org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:65)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
org.apache.spark.scheduler.Task.run(Task.scala:54)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177)
java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:895)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:918)
java.lang.Thread.run(Thread.java:695)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1185)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1174)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1173)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1173)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:688)
at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1391)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
at akka.actor.ActorCell.invoke(ActorCell.scala:456)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
at akka.dispatch.Mailbox.run(Mailbox.scala:219)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)

Related

i am getting an error in this code but i dis find it

val selection = MediaStore.Audio.Media.IS_MUSIC + "!=0"
val projection = arrayOf(MediaStore.Audio.Media._ID,MediaStore.Audio.Media.TITLE,MediaStore.Audio.Media.ALBUM, MediaStore.Audio.Media.ARTIST,
MediaStore.Audio.Media.DURATION,MediaStore.Audio.Media.DATE_ADDED,MediaStore.Audio.Media.DATA)
val cursor = this.contentResolver.query(MediaStore.Audio.Media.EXTERNAL_CONTENT_URI,projection,selection,null,
MediaStore.Audio.Media.DATE_ADDED + "DESC",null)///code at 126 line#
toggle.syncState()
supportActionBar?.setDisplayHomeAsUpEnabled(true)
MusicListMA = getAllAudio()
binding.musicrv.setHasFixedSize(true)//code at 111 line
super.onCreate(savedInstanceState)
intializeLayout()//code at 33line
this error i get
FATAL EXCEPTION: main
Process: com.example.hh, PID: 2858
java.lang.RuntimeException: Unable to start activity ComponentInfo{com.example.h3mplayer/com.example.hh.MainActivity}: java.lang.IllegalArgumentException: Invalid token date_addedDESC
at android.app.ActivityThread.performLaunchActivity(ActivityThread.java:3449)
at android.app.ActivityThread.handleLaunchActivity(ActivityThread.java:3601)
at android.app.servertransaction.LaunchActivityItem.execute(LaunchActivityItem.java:85)
at android.app.servertransaction.TransactionExecutor.executeCallbacks(TransactionExecutor.java:135)
at android.app.servertransaction.TransactionExecutor.execute(TransactionExecutor.java:95)
at android.app.ActivityThread$H.handleMessage(ActivityThread.java:2066)
at android.os.Handler.dispatchMessage(Handler.java:106)
at android.os.Looper.loop(Looper.java:223)
at android.app.ActivityThread.main(ActivityThread.java:7656)
at java.lang.reflect.Method.invoke(Native Method)
at com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run(RuntimeInit.java:592)
at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:947)
Caused by: java.lang.IllegalArgumentException: Invalid token date_addedDESC
at android.database.DatabaseUtils.readExceptionFromParcel(DatabaseUtils.java:172)
at android.database.DatabaseUtils.readExceptionFromParcel(DatabaseUtils.java:142)
at android.content.ContentProviderProxy.query(ContentProviderNative.java:472)
at android.content.ContentResolver.query(ContentResolver.java:1183)
at android.content.ContentResolver.query(ContentResolver.java:1115)
at com.example.h3mplayer.MainActivity.getAllAudio(MainActivity.kt:126)
at com.example.h3mplayer.MainActivity.intializeLayout(MainActivity.kt:110)
at com.example.h3mplayer.MainActivity.onCreate(MainActivity.kt:33)
at android.app.Activity.performCreate(Activity.java:7994)
at android.app.Activity.performCreate(Activity.java:7978)
at android.app.Instrumentation.callActivityOnCreate(Instrumentation.java:1309)
at android.app.ActivityThread.performLaunchActivity(ActivityThread.java:3422)
at android.app.ActivityThread.handleLaunchActivity(ActivityThread.java:3601) 
at android.app.servertransaction.LaunchActivityItem.execute(LaunchActivityItem.java:85) 
at android.app.servertransaction.TransactionExecutor.executeCallbacks(TransactionExecutor.java:135) 
at android.app.servertransaction.TransactionExecutor.execute(TransactionExecutor.java:95) 
at android.app.ActivityThread$H.handleMessage(ActivityThread.java:2066) 
at android.os.Handler.dispatchMessage(Handler.java:106) 
at android.os.Looper.loop(Looper.java:223) 
at android.app.ActivityThread.main(ActivityThread.java:7656) 
at java.lang.reflect.Method.invoke(Native Method) 
at com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run(RuntimeInit.java:592) 
at com.android.internal.os.Zygotyour texteInit.main(ZygoteInit.java:947) 
Invalid token date_addedDESC
Consider adding a space between date_added and DESC, e.g.
MediaStore.Audio.Media.DATE_ADDED + " DESC"

spark sql request two phases NPE

I am a new in Spark (use 2.4.0). I faced with the strange (for me) NPE exception. Following code return NPE.
val ds = "2020-04-01"
spark.sql("select ds, db_name, table_name, type FROM datainfra.hive_tables " +
"where ds = '%s' and db_name = 'db_exports' limit 1".format(ds)).map(table =>
spark.sql("select col_name FROM datainfra.hive_columns " +
"where ds = '%s' and db_name = '%s' and table_name = '%s' and table_type = '%s' and col_type = 'string'"
.format(table.getAs[String]("ds"),
table.getAs[String]("db_name"),
table.getAs[String]("table_name"),
table.getAs[String]("type")))
.map(columnNameRow => columnNameRow.getAs[String](0)).collect().mkString("||")
)
But separately each of DFs works fine:
spark.sql("select ds, db_name, table_name, type FROM datainfra.hive_tables " +
"where ds = '%s' and db_name = 'db_exports' limit 1".format(ds)).show // returns results
spark.sql("select col_name FROM datainfra.hive_columns " +
("where ds = '%s' and db_name = '%s' and table_name = '%s' and table_type = '%s' and col_type = 'string' " +
"and col_name != 'ds'")
.format(ds,
"hardcode_db_name",
"hardcode_table_name",
"hardcode_type")).map(columnNameRow => columnNameRow.getAs[String](0)).collect().mkString("||")
How it could be?
Q: I am a new in Spark (use 2.4.0). I faced with the strange (for me)
NPE exception. Following code return NPE. How it could be?
spark.sql(" sql").map.(spark.sql("some sql ")) pattern has the problem.
In your case is cause of null pointer exception
val ds = "2020-04-01"
val test1: Dataset[String] = spark.sql("select ds, db_name, table_name, type FROM datainfra.hive_tables " +
"where ds = '%s' and db_name = 'db_exports' limit 1".format(ds))
.map(table =>
spark.sql("select col_name FROM datainfra.hive_columns " +
"where ds = '%s' and db_name = '%s' and table_name = '%s' and table_type = '%s' and col_type = 'string'"
.format(table.getAs[String]("ds"),
table.getAs[String]("db_name"),
table.getAs[String]("table_name"),
table.getAs[String]("type")))
.map(columnNameRow => columnNameRow.getAs[String](0)).collect().mkString("||")
)
To prove this I prepared similar example pls see below.. I replicated the same null pointer exception looks like its not supported.
package com.examples
import org.apache.log4j.Level
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
/**
* Created by Ram Ghadiyaram
*/
object RDDOfTupleExample {
org.apache.log4j.Logger.getLogger("org").setLevel(Level.ERROR)
def main(args: Array[String]) {
val spark = SparkSession.builder.
master("local")
.appName(this.getClass.getName)
.getOrCreate()
import spark.implicits._
val donuts: DataFrame = Seq(("plain donut", 1.50), ("plain donut", 1.50)
, ("vanilla donut", 2.0), ("vanilla donut", 2.0)
, ("glazed donut", 2.50))
.toDF("Donut_Name", "Price")
//lets suppose this is your hive table since i dont have hive i simulated with temp table
donuts.createOrReplaceTempView("mydonuts")
// }
val test: Dataset[String] = spark.sql("select \"NCA-15\" as mylabel, count(Donut_Name) as mydonutcount from mydonuts")
.map(x => spark.sql(s"select ${x.get(0)}, ${x.get(1)} ").collect().mkString(",")) // this is problem
test.show
}
}
Result :
[2020-04-11 16:27:45,687] ERROR Exception in task 0.0 in stage 1.0 (TID 1) (org.apache.spark.executor.Executor:91)
java.lang.NullPointerException
at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:143)
at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:141)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
at com.examples.RDDOfTupleExample$$anonfun$1.apply(RDDOfTupleExample.scala:29)
at com.examples.RDDOfTupleExample$$anonfun$1.apply(RDDOfTupleExample.scala:29)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.mapelements_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.deserializetoobject_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
[2020-04-11 16:27:45,710] ERROR Task 0 in stage 1.0 failed 1 times; aborting job (org.apache.spark.scheduler.TaskSetManager:70)
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 1, localhost, executor driver): java.lang.NullPointerException
at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:143)
at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:141)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
at com.examples.RDDOfTupleExample$$anonfun$1.apply(RDDOfTupleExample.scala:29)
at com.examples.RDDOfTupleExample$$anonfun$1.apply(RDDOfTupleExample.scala:29)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.mapelements_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.deserializetoobject_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1891)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1879)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1878)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1878)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:927)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2112)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2061)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2050)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:738)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:365)
at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3389)
at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2550)
at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2550)
at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
at org.apache.spark.sql.Dataset.head(Dataset.scala:2550)
at org.apache.spark.sql.Dataset.take(Dataset.scala:2764)
at org.apache.spark.sql.Dataset.getRows(Dataset.scala:254)
at org.apache.spark.sql.Dataset.showString(Dataset.scala:291)
at org.apache.spark.sql.Dataset.show(Dataset.scala:751)
at org.apache.spark.sql.Dataset.show(Dataset.scala:710)
at org.apache.spark.sql.Dataset.show(Dataset.scala:719)
at com.examples.RDDOfTupleExample$.main(RDDOfTupleExample.scala:30)
at com.examples.RDDOfTupleExample.main(RDDOfTupleExample.scala)
Caused by: java.lang.NullPointerException
at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:143)
at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:141)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
at com.examples.RDDOfTupleExample$$anonfun$1.apply(RDDOfTupleExample.scala:29)
at com.examples.RDDOfTupleExample$$anonfun$1.apply(RDDOfTupleExample.scala:29)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.mapelements_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.deserializetoobject_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Conclusion : Afore mentioned nested spark.sql pattern not working (NPE). you have to execute seperately or use some other way

JavaFX TableView: Exception in thread "JavaFX Application Thread" java.lang.IndexOutOfBoundsException

I am attempting to populate a JavaFx TableView with some data from a MySQL query run through JDBC. I previously had no problem with populating the table, until I changed the query to select all columns in the table except one of them. The query runs perfectly in MySQL Workbench.
This is the portion of my code responsible for populating the TableView:
ObservableList<ObservableList<String>> data = FXCollections.observableArrayList();
TableView table = new TableView();
// rs is the ResultSet returned by executing the SELECT query that should be filling the table
ResultSetMetaData rsmd = rs.getMetaData();
for(int i = 1; i <= rsmd.getColumnCount(); i++){
final int j = i;
TableColumn col = new TableColumn(rsmd.getColumnLabel(i));
col.setCellValueFactory(new Callback<CellDataFeatures<ObservableList,String>, ObservableValue<String>>(){
public ObservableValue<String> call(CellDataFeatures<ObservableList, String> param) {
// The line below is Main.java:194, the problem source according to the stack trace
return new SimpleStringProperty(param.getValue().get(j).toString());
}
});
table.getColumns().addAll(col);
}
while(rs.next()){
ObservableList<String> row = FXCollections.observableArrayList();
for(int i = 1 ; i <= rsmd.getColumnCount(); i++){
row.add(rs.getString(i));
}
data.add(row);
}
table.setItems(data);`
The stack trace indicates that the problem lies at the line indicated in the snippet. And here is the stack trace:
Exception in thread "JavaFX Application Thread"
java.lang.IndexOutOfBoundsException: Index: 25, Size: 25
at java.util.ArrayList.rangeCheck(ArrayList.java:657)
at java.util.ArrayList.get(ArrayList.java:433)
at com.sun.javafx.collections.ObservableListWrapper.get(ObservableListWrapper.java:89)
at socparser.Main$2.call(Main.java:194)
at socparser.Main$2.call(Main.java:1)
at javafx.scene.control.TableColumn.getCellObservableValue(TableColumn.java:578)
at javafx.scene.control.TableColumn.getCellObservableValue(TableColumn.java:563)
at javafx.scene.control.TableCell.updateItem(TableCell.java:644)
at javafx.scene.control.TableCell.indexChanged(TableCell.java:468)
at javafx.scene.control.IndexedCell.updateIndex(IndexedCell.java:116)
at com.sun.javafx.scene.control.skin.TableViewSkin.resizeColumnToFitContent(TableViewSkin.java:241)
at com.sun.javafx.scene.control.skin.TableViewSkin.resizeColumnToFitContent(TableViewSkin.java:54)
at com.sun.javafx.scene.control.skin.TableColumnHeader.doColumnAutoSize(TableColumnHeader.java:531)
at com.sun.javafx.scene.control.skin.TableColumnHeader.updateScene(TableColumnHeader.java:474)
at com.sun.javafx.scene.control.skin.TableColumnHeader.handlePropertyChanged(TableColumnHeader.java:314)
at com.sun.javafx.scene.control.skin.TableColumnHeader.lambda$new$49(TableColumnHeader.java:149)
at com.sun.javafx.scene.control.MultiplePropertyChangeListenerHandler$1.changed(MultiplePropertyChangeListenerHandler.java:55)
at javafx.beans.value.WeakChangeListener.changed(WeakChangeListener.java:89)
at com.sun.javafx.binding.ExpressionHelper$SingleChange.fireValueChangedEvent(ExpressionHelper.java:182)
at com.sun.javafx.binding.ExpressionHelper.fireValueChangedEvent(ExpressionHelper.java:81)
at javafx.beans.property.ReadOnlyObjectPropertyBase.fireValueChangedEvent(ReadOnlyObjectPropertyBase.java:74)
at javafx.beans.property.ReadOnlyObjectWrapper.fireValueChangedEvent(ReadOnlyObjectWrapper.java:102)
at javafx.scene.Node$ReadOnlyObjectWrapperManualFire.fireSuperValueChangedEvent(Node.java:831)
at javafx.scene.Node.invalidatedScenes(Node.java:881)
at javafx.scene.Node.setScenes(Node.java:919)
at javafx.scene.Parent$1.onChanged(Parent.java:269)
at com.sun.javafx.collections.TrackableObservableList.lambda$new$29(TrackableObservableList.java:45)
at com.sun.javafx.collections.ListListenerHelper$Generic.fireValueChangedEvent(ListListenerHelper.java:329)
at com.sun.javafx.collections.ListListenerHelper.fireValueChangedEvent(ListListenerHelper.java:73)
at javafx.collections.ObservableListBase.fireChange(ObservableListBase.java:233)
at javafx.collections.ListChangeBuilder.commit(ListChangeBuilder.java:482)
at javafx.collections.ListChangeBuilder.endChange(ListChangeBuilder.java:541)
at javafx.collections.ObservableListBase.endChange(ObservableListBase.java:205)
at javafx.collections.ModifiableObservableListBase.setAll(ModifiableObservableListBase.java:90)
at com.sun.javafx.collections.VetoableListDecorator.setAll(VetoableListDecorator.java:116)
at com.sun.javafx.scene.control.skin.NestedTableColumnHeader.updateContent(NestedTableColumnHeader.java:487)
at com.sun.javafx.scene.control.skin.NestedTableColumnHeader.updateTableColumnHeaders(NestedTableColumnHeader.java:317)
at com.sun.javafx.scene.control.skin.NestedTableColumnHeader.checkState(NestedTableColumnHeader.java:544)
at com.sun.javafx.scene.control.skin.NestedTableColumnHeader.computePrefHeight(NestedTableColumnHeader.java:427)
at javafx.scene.Parent.prefHeight(Parent.java:929)
at javafx.scene.layout.Region.prefHeight(Region.java:1435)
at com.sun.javafx.scene.control.skin.TableHeaderRow.computePrefHeight(TableHeaderRow.java:331)
at com.sun.javafx.scene.control.skin.TableHeaderRow.computeMinHeight(TableHeaderRow.java:324)
at javafx.scene.Parent.minHeight(Parent.java:957)
at javafx.scene.layout.Region.minHeight(Region.java:1401)
at javafx.scene.control.SkinBase.computeMinHeight(SkinBase.java:254)
at javafx.scene.control.Control.computeMinHeight(Control.java:489)
at javafx.scene.Parent.minHeight(Parent.java:957)
at javafx.scene.layout.Region.minHeight(Region.java:1401)
at javafx.scene.layout.Region.computeChildPrefAreaHeight(Region.java:1762)
at javafx.scene.layout.GridPane.computePrefHeights(GridPane.java:1424)
at javafx.scene.layout.GridPane.layoutChildren(GridPane.java:1690)
at javafx.scene.Parent.layout(Parent.java:1087)
at javafx.scene.Scene.doLayoutPass(Scene.java:552)
at javafx.scene.Scene$ScenePulseListener.pulse(Scene.java:2397)
at com.sun.javafx.tk.Toolkit.lambda$runPulse$29(Toolkit.java:398)
at java.security.AccessController.doPrivileged(Native Method)
at com.sun.javafx.tk.Toolkit.runPulse(Toolkit.java:397)
at com.sun.javafx.tk.Toolkit.firePulse(Toolkit.java:424)
at com.sun.javafx.tk.quantum.QuantumToolkit.pulse(QuantumToolkit.java:518)
at com.sun.javafx.tk.quantum.QuantumToolkit.pulse(QuantumToolkit.java:498)
at com.sun.javafx.tk.quantum.QuantumToolkit.pulseFromQueue(QuantumToolkit.java:491)
at com.sun.javafx.tk.quantum.QuantumToolkit.lambda$runToolkit$403(QuantumToolkit.java:319)
at com.sun.glass.ui.InvokeLaterDispatcher$Future.run(InvokeLaterDispatcher.java:95)
at com.sun.glass.ui.win.WinApplication._runLoop(Native Method)
at com.sun.glass.ui.win.WinApplication.lambda$null$147(WinApplication.java:177)
at java.lang.Thread.run(Thread.java:748)
Exception in thread "JavaFX Application Thread"
java.lang.IndexOutOfBoundsException: Index: 25, Size: 25
at java.util.ArrayList.rangeCheck(ArrayList.java:657)
at java.util.ArrayList.get(ArrayList.java:433)
at com.sun.javafx.collections.ObservableListWrapper.get(ObservableListWrapper.java:89)
at socparser.Main$2.call(Main.java:194)
at socparser.Main$2.call(Main.java:1)
at javafx.scene.control.TableColumn.getCellObservableValue(TableColumn.java:578)
at javafx.scene.control.TableColumn.getCellObservableValue(TableColumn.java:563)
at javafx.scene.control.TableCell.updateItem(TableCell.java:644)
at javafx.scene.control.TableCell.indexChanged(TableCell.java:468)
at javafx.scene.control.IndexedCell.updateIndex(IndexedCell.java:116)
at com.sun.javafx.scene.control.skin.TableRowSkinBase.updateCells(TableRowSkinBase.java:533)
at com.sun.javafx.scene.control.skin.TableRowSkinBase.init(TableRowSkinBase.java:147)
at com.sun.javafx.scene.control.skin.TableRowSkin.<init>(TableRowSkin.java:64)
at javafx.scene.control.TableRow.createDefaultSkin(TableRow.java:212)
at javafx.scene.control.Control.impl_processCSS(Control.java:872)
at javafx.scene.Node.processCSS(Node.java:9056)
at javafx.scene.Node.applyCss(Node.java:9153)
at com.sun.javafx.scene.control.skin.VirtualFlow.setCellIndex(VirtualFlow.java:1964)
at com.sun.javafx.scene.control.skin.VirtualFlow.getCell(VirtualFlow.java:1797)
at com.sun.javafx.scene.control.skin.VirtualFlow.getCellLength(VirtualFlow.java:1879)
at com.sun.javafx.scene.control.skin.VirtualFlow.computeViewportOffset(VirtualFlow.java:2528)
at com.sun.javafx.scene.control.skin.VirtualFlow.layoutChildren(VirtualFlow.java:1189)
at javafx.scene.Parent.layout(Parent.java:1087)
at javafx.scene.Parent.layout(Parent.java:1093)
at javafx.scene.Parent.layout(Parent.java:1093)
at javafx.scene.Scene.doLayoutPass(Scene.java:552)
at javafx.scene.Scene$ScenePulseListener.pulse(Scene.java:2397)
at com.sun.javafx.tk.Toolkit.lambda$runPulse$29(Toolkit.java:398)
at java.security.AccessController.doPrivileged(Native Method)
at com.sun.javafx.tk.Toolkit.runPulse(Toolkit.java:397)
at com.sun.javafx.tk.Toolkit.firePulse(Toolkit.java:424)
at com.sun.javafx.tk.quantum.QuantumToolkit.pulse(QuantumToolkit.java:518)
at com.sun.javafx.tk.quantum.QuantumToolkit.pulse(QuantumToolkit.java:498)
at com.sun.javafx.tk.quantum.QuantumToolkit.pulseFromQueue(QuantumToolkit.java:491)
at com.sun.javafx.tk.quantum.QuantumToolkit.lambda$runToolkit$403(QuantumToolkit.java:319)
at com.sun.glass.ui.InvokeLaterDispatcher$Future.run(InvokeLaterDispatcher.java:95)
at com.sun.glass.ui.win.WinApplication._runLoop(Native Method)
at com.sun.glass.ui.win.WinApplication.lambda$null$147(WinApplication.java:177)
at java.lang.Thread.run(Thread.java:748)
Please try to either
change SimpleStringProperty(param.getValue().get(j).toString()) to
SimpleStringProperty(param.getValue().get(j-1).toString())
or
final int j = i; to final int j = i - 1;

Simple Hive write not working

Trying to write a simple POC using Apache Beam and Hive:
public static void main(String[] args) {
PipelineOptions options = PipelineOptionsFactory
.fromArgs(args)
.withValidation()
.as(PVAOptions.class);
Pipeline p = Pipeline.create(options);
p
.apply(TextIO.read().from("src/test/resources/words.txt"))
.apply(ParDo.of(new PukeHive()))
.apply(HCatalogIO.write()
.withBatchSize(100)
.withConfigProperties(getHiveConfigProperties())
.withTable(getHiveTable())
)
;
p.run().waitUntilFinish();
}
static class PukeHive extends DoFn<String, HCatRecord> {
#ProcessElement
public void processElement(ProcessContext c) throws IOException {
DefaultHCatRecord rec = new DefaultHCatRecord(1);
rec.set(0, c.element());
c.output(rec);
}
}
This results in the following exception. Debugging reveals that this is because Beam's WritableCoder tries to create a newInstance() of the abstract class HCatRecord.
org.apache.beam.sdk.Pipeline$PipelineExecutionException: java.lang.RuntimeException: org.apache.beam.sdk.coders.CoderException: unable to deserialize record
at org.apache.beam.runners.direct.DirectRunner$DirectPipelineResult.waitUntilFinish (DirectRunner.java:349)
at org.apache.beam.runners.direct.DirectRunner$DirectPipelineResult.waitUntilFinish (DirectRunner.java:319)
at org.apache.beam.runners.direct.DirectRunner.run (DirectRunner.java:210)
at org.apache.beam.runners.direct.DirectRunner.run (DirectRunner.java:66)
at org.apache.beam.sdk.Pipeline.run (Pipeline.java:311)
at org.apache.beam.sdk.Pipeline.run (Pipeline.java:297)
at com.comp.beam.Main.main (Main.java:48)
at sun.reflect.NativeMethodAccessorImpl.invoke0 (Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke (NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke (DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke (Method.java:498)
at org.codehaus.mojo.exec.ExecJavaMojo$1.run (ExecJavaMojo.java:282)
at java.lang.Thread.run (Thread.java:748)
Caused by: java.lang.RuntimeException: org.apache.beam.sdk.coders.CoderException: unable to deserialize record
at org.apache.beam.runners.direct.ImmutabilityCheckingBundleFactory$ImmutabilityEnforcingBundle.add (ImmutabilityCheckingBundleFactory.java:114)
at org.apache.beam.runners.direct.ParDoEvaluator$BundleOutputManager.output (ParDoEvaluator.java:242)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner.outputWindowedValue (SimpleDoFnRunner.java:219)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner.access$700 (SimpleDoFnRunner.java:69)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner$DoFnProcessContext.output (SimpleDoFnRunner.java:517)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner$DoFnProcessContext.output (SimpleDoFnRunner.java:505)
at com.comp.beam.Main$PukeHive.processElement (Main.java:61)
Caused by: org.apache.beam.sdk.coders.CoderException: unable to deserialize record
at org.apache.beam.sdk.io.hadoop.WritableCoder.decode (WritableCoder.java:92)
at org.apache.beam.sdk.io.hadoop.WritableCoder.decode (WritableCoder.java:54)
at org.apache.beam.sdk.coders.Coder.decode (Coder.java:170)
at org.apache.beam.sdk.util.CoderUtils.decodeFromSafeStream (CoderUtils.java:122)
at org.apache.beam.sdk.util.CoderUtils.decodeFromByteArray (CoderUtils.java:105)
at org.apache.beam.sdk.util.CoderUtils.decodeFromByteArray (CoderUtils.java:99)
at org.apache.beam.sdk.util.CoderUtils.clone (CoderUtils.java:148)
at org.apache.beam.sdk.util.MutationDetectors$CodedValueMutationDetector.<init> (MutationDetectors.java:117)
at org.apache.beam.sdk.util.MutationDetectors.forValueWithCoder (MutationDetectors.java:46)
at org.apache.beam.runners.direct.ImmutabilityCheckingBundleFactory$ImmutabilityEnforcingBundle.add (ImmutabilityCheckingBundleFactory.java:112)
at org.apache.beam.runners.direct.ParDoEvaluator$BundleOutputManager.output (ParDoEvaluator.java:242)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner.outputWindowedValue (SimpleDoFnRunner.java:219)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner.access$700 (SimpleDoFnRunner.java:69)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner$DoFnProcessContext.output (SimpleDoFnRunner.java:517)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner$DoFnProcessContext.output (SimpleDoFnRunner.java:505)
at com.comp.beam.Main$PukeHive.processElement (Main.java:61)
at com.comp.beam.Main$PukeHive$DoFnInvoker.invokeProcessElement (Unknown Source)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner.invokeProcessElement (SimpleDoFnRunner.java:185)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner.processElement (SimpleDoFnRunner.java:149)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimplePushbackSideInputDoFnRunner.processElementInReadyWindows (SimplePushbackSideInputDoFnRunner.java:78)
at org.apache.beam.runners.direct.ParDoEvaluator.processElement (ParDoEvaluator.java:189)
at org.apache.beam.runners.direct.DoFnLifecycleManagerRemovingTransformEvaluator.processElement (DoFnLifecycleManagerRemovingTransformEvaluator.java:55)
at org.apache.beam.runners.direct.DirectTransformExecutor.processElements (DirectTransformExecutor.java:161)
at org.apache.beam.runners.direct.DirectTransformExecutor.run (DirectTransformExecutor.java:125)
at java.util.concurrent.Executors$RunnableAdapter.call (Executors.java:511)
at java.util.concurrent.FutureTask.run (FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker (ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run (ThreadPoolExecutor.java:624)
at java.lang.Thread.run (Thread.java:748)
Caused by: java.lang.InstantiationException
at sun.reflect.InstantiationExceptionConstructorAccessorImpl.newInstance (InstantiationExceptionConstructorAccessorImpl.java:48)
at java.lang.reflect.Constructor.newInstance (Constructor.java:423)
at org.apache.beam.sdk.io.hadoop.WritableCoder.decode (WritableCoder.java:85)
at org.apache.beam.sdk.io.hadoop.WritableCoder.decode (WritableCoder.java:54)
at org.apache.beam.sdk.coders.Coder.decode (Coder.java:170)
at org.apache.beam.sdk.util.CoderUtils.decodeFromSafeStream (CoderUtils.java:122)
at org.apache.beam.sdk.util.CoderUtils.decodeFromByteArray (CoderUtils.java:105)
at org.apache.beam.sdk.util.CoderUtils.decodeFromByteArray (CoderUtils.java:99)
at org.apache.beam.sdk.util.CoderUtils.clone (CoderUtils.java:148)
at org.apache.beam.sdk.util.MutationDetectors$CodedValueMutationDetector.<init> (MutationDetectors.java:117)
at org.apache.beam.sdk.util.MutationDetectors.forValueWithCoder (MutationDetectors.java:46)
at org.apache.beam.runners.direct.ImmutabilityCheckingBundleFactory$ImmutabilityEnforcingBundle.add (ImmutabilityCheckingBundleFactory.java:112)
at org.apache.beam.runners.direct.ParDoEvaluator$BundleOutputManager.output (ParDoEvaluator.java:242)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner.outputWindowedValue (SimpleDoFnRunner.java:219)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner.access$700 (SimpleDoFnRunner.java:69)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner$DoFnProcessContext.output (SimpleDoFnRunner.java:517)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner$DoFnProcessContext.output (SimpleDoFnRunner.java:505)
at com.comp.beam.Main$PukeHive.processElement (Main.java:61)
at com.comp.beam.Main$PukeHive$DoFnInvoker.invokeProcessElement (Unknown Source)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner.invokeProcessElement (SimpleDoFnRunner.java:185)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimpleDoFnRunner.processElement (SimpleDoFnRunner.java:149)
at org.apache.beam.repackaged.beam_runners_direct_java.runners.core.SimplePushbackSideInputDoFnRunner.processElementInReadyWindows (SimplePushbackSideInputDoFnRunner.java:78)
at org.apache.beam.runners.direct.ParDoEvaluator.processElement (ParDoEvaluator.java:189)
at org.apache.beam.runners.direct.DoFnLifecycleManagerRemovingTransformEvaluator.processElement (DoFnLifecycleManagerRemovingTransformEvaluator.java:55)
at org.apache.beam.runners.direct.DirectTransformExecutor.processElements (DirectTransformExecutor.java:161)
at org.apache.beam.runners.direct.DirectTransformExecutor.run (DirectTransformExecutor.java:125)
at java.util.concurrent.Executors$RunnableAdapter.call (Executors.java:511)
at java.util.concurrent.FutureTask.run (FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker (ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run (ThreadPoolExecutor.java:624)
at java.lang.Thread.run (Thread.java:748)
How can I feed my data into Hive using Beam?
I believe you need to register the coder for the HCatRecord which would be:
Pipeline p = Pipeline.create(options);
p.getCoderRegistry()
.registerCoderForClass(HCatRecord.class, WritableCoder.of(DefaultHCatRecord.class));
To test this I added the following to the HCatalogIOTest class of the beam project. It uses a different schema but should demonstrate a complete example:
#Test
#NeedsEmptyTestTables
public void testSOKricket() {
// Register the coder
defaultPipeline
.getCoderRegistry()
.registerCoderForClass(HCatRecord.class, WritableCoder.of(DefaultHCatRecord.class));
defaultPipeline
.apply(TextIO.read().from("/tmp/words.txt"))
.apply(ParDo.of(new PukeHive()))
.apply(
HCatalogIO.write()
.withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf()))
.withDatabase(TEST_DATABASE)
.withTable(TEST_TABLE)
.withPartition(new java.util.HashMap<>())
.withBatchSize(1L));
defaultPipeline.run();
}
static class PukeHive extends DoFn<String, HCatRecord> {
#ProcessElement
public void processElement(ProcessContext c) throws Exception {
// our test schema is (mycol1 string, mycol2 int)
DefaultHCatRecord rec = new DefaultHCatRecord(2);
rec.set(0, c.element());
rec.set(1, 1);
c.output(rec);
}
}

Corda: Adding muliple output states in a single transaction

In our use case we have to add two output states with different contracts to a transaction. I tried to add multiple states to transaction in withItems. I call this flow from an RPC but it throws an exception as java.lang.IllegalArgumentException: List has more than one element. while signing the transaction.
This is how I am adding multiple output states:-
val outputstate1 = IouState1(dataModel1, me, otherParty)
val outputstate2 = IouState2(dataModel2, me, otherParty)
val cmd1 = Command(IouContract1.Commands.Create(), state1.participants.map { it.owningKey })
val cmd2 = Command(IouContract2.Commands.Initiated(), state2.participants.map { it.owningKey })
val txBuilder = TransactionBuilder(TransactionType.General, notary).withItems(state1, state2, cmd1, cmd2)
progressTracker.currentStep = VERIFYING_TRANSACTION
// Verifying the transaction.
txBuilder.toWireTransaction().toLedgerTransaction(serviceHub).verify()
progressTracker.currentStep = SIGNING_TRANSACTION
// Signing the transaction.
val partSignedTx = serviceHub.signInitialTransaction(txBuilder)
progressTracker.currentStep = GATHERING_SIGS
// Send the state to the counterparty, and receive it back with their signature.
val fullySignedTx = subFlow(CollectSignaturesFlow(partSignedTx, GATHERING_SIGS.childProgressTracker()))
// Finalising the transaction.
progressTracker.currentStep = FINALISING_TRANSACTION
// Notarise and record the transaction in both parties' vaults.
return subFlow(FinalityFlow(fullySignedTx, FINALISING_TRANSACTION.childProgressTracker())).single()
Stack trace:-
net.corda.core.flows.FlowException: java.lang.IllegalArgumentException: List has more than one element.
at net.corda.core.flows.SignTransactionFlow.call(CollectSignaturesFlow.kt:202) ~[corda-core-0.14.0.jar:?]
at net.corda.core.flows.SignTransactionFlow.call(CollectSignaturesFlow.kt:175) ~[corda-core-0.14.0.jar:?]
at net.corda.core.flows.FlowLogic.subFlow(FlowLogic.kt:166) ~[corda-core-0.14.0.jar:?]
at com.dwlabcorda.salescontract.LOCReceiverFlow.call(LOCContractFlow.kt:107) ~[tradefinancecorda-0.1.jar:?]
at com.dwlabcorda.salescontract.LOCReceiverFlow.call(LOCContractFlow.kt:95) ~[tradefinancecorda-0.1.jar:?]
at net.corda.node.services.statemachine.FlowStateMachineImpl.run(FlowStateMachineImpl.kt:101) [corda-node-0.14.0.jar:?]
at net.corda.node.services.statemachine.FlowStateMachineImpl.run(FlowStateMachineImpl.kt:33) [corda-node-0.14.0.jar:?]
at co.paralleluniverse.fibers.Fiber.run1(Fiber.java:1067) [quasar-core-0.7.6-jdk8.jar:0.7.6]
at co.paralleluniverse.fibers.Fiber.exec(Fiber.java:767) [quasar-core-0.7.6-jdk8.jar:0.7.6]
at co.paralleluniverse.fibers.RunnableFiberTask.doExec(RunnableFiberTask.java:100) [quasar-core-0.7.6-jdk8.jar:0.7.6]
at co.paralleluniverse.fibers.RunnableFiberTask.run(RunnableFiberTask.java:91) [quasar-core-0.7.6-jdk8.jar:0.7.6]
at java.util.concurrent.Executors$RunnableAdapter.call(Unknown Source) [?:1.8.0_144]
at java.util.concurrent.FutureTask.run(Unknown Source) [?:1.8.0_144]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(Unknown Source) [?:1.8.0_144]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(Unknown Source) [?:1.8.0_144]
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) [?:1.8.0_144]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) [?:1.8.0_144]
at net.corda.node.utilities.AffinityExecutor$ServiceAffinityExecutor$1$thread$1.run(AffinityExecutor.kt:69) [corda-node-0.14.0.jar:?]
Caused by: java.lang.IllegalArgumentException: List has more than one element.
at kotlin.collections.CollectionsKt___CollectionsKt.single(_Collections.kt:471) ~[kotlin-stdlib-1.1.1.jar:1.1.1]
at com.dwlabcorda.salescontract.LOCReceiverFlow$call$signTransactionFlow$1.checkTransaction(LOCContractFlow.kt:100) ~[tradefinancecorda-0.1.jar:?]
at net.corda.core.flows.SignTransactionFlow.call(CollectSignaturesFlow.kt:199) ~[corda-core-0.14.0.jar:?]
... 17 more
Probably you used a .single() cast in one of your contracts that is now throwing an exception