spark sql request two phases NPE - sql

I am a new in Spark (use 2.4.0). I faced with the strange (for me) NPE exception. Following code return NPE.
val ds = "2020-04-01"
spark.sql("select ds, db_name, table_name, type FROM datainfra.hive_tables " +
"where ds = '%s' and db_name = 'db_exports' limit 1".format(ds)).map(table =>
spark.sql("select col_name FROM datainfra.hive_columns " +
"where ds = '%s' and db_name = '%s' and table_name = '%s' and table_type = '%s' and col_type = 'string'"
.format(table.getAs[String]("ds"),
table.getAs[String]("db_name"),
table.getAs[String]("table_name"),
table.getAs[String]("type")))
.map(columnNameRow => columnNameRow.getAs[String](0)).collect().mkString("||")
)
But separately each of DFs works fine:
spark.sql("select ds, db_name, table_name, type FROM datainfra.hive_tables " +
"where ds = '%s' and db_name = 'db_exports' limit 1".format(ds)).show // returns results
spark.sql("select col_name FROM datainfra.hive_columns " +
("where ds = '%s' and db_name = '%s' and table_name = '%s' and table_type = '%s' and col_type = 'string' " +
"and col_name != 'ds'")
.format(ds,
"hardcode_db_name",
"hardcode_table_name",
"hardcode_type")).map(columnNameRow => columnNameRow.getAs[String](0)).collect().mkString("||")
How it could be?

Q: I am a new in Spark (use 2.4.0). I faced with the strange (for me)
NPE exception. Following code return NPE. How it could be?
spark.sql(" sql").map.(spark.sql("some sql ")) pattern has the problem.
In your case is cause of null pointer exception
val ds = "2020-04-01"
val test1: Dataset[String] = spark.sql("select ds, db_name, table_name, type FROM datainfra.hive_tables " +
"where ds = '%s' and db_name = 'db_exports' limit 1".format(ds))
.map(table =>
spark.sql("select col_name FROM datainfra.hive_columns " +
"where ds = '%s' and db_name = '%s' and table_name = '%s' and table_type = '%s' and col_type = 'string'"
.format(table.getAs[String]("ds"),
table.getAs[String]("db_name"),
table.getAs[String]("table_name"),
table.getAs[String]("type")))
.map(columnNameRow => columnNameRow.getAs[String](0)).collect().mkString("||")
)
To prove this I prepared similar example pls see below.. I replicated the same null pointer exception looks like its not supported.
package com.examples
import org.apache.log4j.Level
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
/**
* Created by Ram Ghadiyaram
*/
object RDDOfTupleExample {
org.apache.log4j.Logger.getLogger("org").setLevel(Level.ERROR)
def main(args: Array[String]) {
val spark = SparkSession.builder.
master("local")
.appName(this.getClass.getName)
.getOrCreate()
import spark.implicits._
val donuts: DataFrame = Seq(("plain donut", 1.50), ("plain donut", 1.50)
, ("vanilla donut", 2.0), ("vanilla donut", 2.0)
, ("glazed donut", 2.50))
.toDF("Donut_Name", "Price")
//lets suppose this is your hive table since i dont have hive i simulated with temp table
donuts.createOrReplaceTempView("mydonuts")
// }
val test: Dataset[String] = spark.sql("select \"NCA-15\" as mylabel, count(Donut_Name) as mydonutcount from mydonuts")
.map(x => spark.sql(s"select ${x.get(0)}, ${x.get(1)} ").collect().mkString(",")) // this is problem
test.show
}
}
Result :
[2020-04-11 16:27:45,687] ERROR Exception in task 0.0 in stage 1.0 (TID 1) (org.apache.spark.executor.Executor:91)
java.lang.NullPointerException
at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:143)
at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:141)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
at com.examples.RDDOfTupleExample$$anonfun$1.apply(RDDOfTupleExample.scala:29)
at com.examples.RDDOfTupleExample$$anonfun$1.apply(RDDOfTupleExample.scala:29)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.mapelements_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.deserializetoobject_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
[2020-04-11 16:27:45,710] ERROR Task 0 in stage 1.0 failed 1 times; aborting job (org.apache.spark.scheduler.TaskSetManager:70)
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 1, localhost, executor driver): java.lang.NullPointerException
at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:143)
at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:141)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
at com.examples.RDDOfTupleExample$$anonfun$1.apply(RDDOfTupleExample.scala:29)
at com.examples.RDDOfTupleExample$$anonfun$1.apply(RDDOfTupleExample.scala:29)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.mapelements_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.deserializetoobject_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1891)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1879)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1878)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1878)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:927)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2112)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2061)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2050)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:738)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:365)
at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3389)
at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2550)
at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2550)
at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
at org.apache.spark.sql.Dataset.head(Dataset.scala:2550)
at org.apache.spark.sql.Dataset.take(Dataset.scala:2764)
at org.apache.spark.sql.Dataset.getRows(Dataset.scala:254)
at org.apache.spark.sql.Dataset.showString(Dataset.scala:291)
at org.apache.spark.sql.Dataset.show(Dataset.scala:751)
at org.apache.spark.sql.Dataset.show(Dataset.scala:710)
at org.apache.spark.sql.Dataset.show(Dataset.scala:719)
at com.examples.RDDOfTupleExample$.main(RDDOfTupleExample.scala:30)
at com.examples.RDDOfTupleExample.main(RDDOfTupleExample.scala)
Caused by: java.lang.NullPointerException
at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:143)
at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:141)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
at com.examples.RDDOfTupleExample$$anonfun$1.apply(RDDOfTupleExample.scala:29)
at com.examples.RDDOfTupleExample$$anonfun$1.apply(RDDOfTupleExample.scala:29)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.mapelements_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.deserializetoobject_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Conclusion : Afore mentioned nested spark.sql pattern not working (NPE). you have to execute seperately or use some other way

Related

JavaFX TableView: Exception in thread "JavaFX Application Thread" java.lang.IndexOutOfBoundsException

I am attempting to populate a JavaFx TableView with some data from a MySQL query run through JDBC. I previously had no problem with populating the table, until I changed the query to select all columns in the table except one of them. The query runs perfectly in MySQL Workbench.
This is the portion of my code responsible for populating the TableView:
ObservableList<ObservableList<String>> data = FXCollections.observableArrayList();
TableView table = new TableView();
// rs is the ResultSet returned by executing the SELECT query that should be filling the table
ResultSetMetaData rsmd = rs.getMetaData();
for(int i = 1; i <= rsmd.getColumnCount(); i++){
final int j = i;
TableColumn col = new TableColumn(rsmd.getColumnLabel(i));
col.setCellValueFactory(new Callback<CellDataFeatures<ObservableList,String>, ObservableValue<String>>(){
public ObservableValue<String> call(CellDataFeatures<ObservableList, String> param) {
// The line below is Main.java:194, the problem source according to the stack trace
return new SimpleStringProperty(param.getValue().get(j).toString());
}
});
table.getColumns().addAll(col);
}
while(rs.next()){
ObservableList<String> row = FXCollections.observableArrayList();
for(int i = 1 ; i <= rsmd.getColumnCount(); i++){
row.add(rs.getString(i));
}
data.add(row);
}
table.setItems(data);`
The stack trace indicates that the problem lies at the line indicated in the snippet. And here is the stack trace:
Exception in thread "JavaFX Application Thread"
java.lang.IndexOutOfBoundsException: Index: 25, Size: 25
at java.util.ArrayList.rangeCheck(ArrayList.java:657)
at java.util.ArrayList.get(ArrayList.java:433)
at com.sun.javafx.collections.ObservableListWrapper.get(ObservableListWrapper.java:89)
at socparser.Main$2.call(Main.java:194)
at socparser.Main$2.call(Main.java:1)
at javafx.scene.control.TableColumn.getCellObservableValue(TableColumn.java:578)
at javafx.scene.control.TableColumn.getCellObservableValue(TableColumn.java:563)
at javafx.scene.control.TableCell.updateItem(TableCell.java:644)
at javafx.scene.control.TableCell.indexChanged(TableCell.java:468)
at javafx.scene.control.IndexedCell.updateIndex(IndexedCell.java:116)
at com.sun.javafx.scene.control.skin.TableViewSkin.resizeColumnToFitContent(TableViewSkin.java:241)
at com.sun.javafx.scene.control.skin.TableViewSkin.resizeColumnToFitContent(TableViewSkin.java:54)
at com.sun.javafx.scene.control.skin.TableColumnHeader.doColumnAutoSize(TableColumnHeader.java:531)
at com.sun.javafx.scene.control.skin.TableColumnHeader.updateScene(TableColumnHeader.java:474)
at com.sun.javafx.scene.control.skin.TableColumnHeader.handlePropertyChanged(TableColumnHeader.java:314)
at com.sun.javafx.scene.control.skin.TableColumnHeader.lambda$new$49(TableColumnHeader.java:149)
at com.sun.javafx.scene.control.MultiplePropertyChangeListenerHandler$1.changed(MultiplePropertyChangeListenerHandler.java:55)
at javafx.beans.value.WeakChangeListener.changed(WeakChangeListener.java:89)
at com.sun.javafx.binding.ExpressionHelper$SingleChange.fireValueChangedEvent(ExpressionHelper.java:182)
at com.sun.javafx.binding.ExpressionHelper.fireValueChangedEvent(ExpressionHelper.java:81)
at javafx.beans.property.ReadOnlyObjectPropertyBase.fireValueChangedEvent(ReadOnlyObjectPropertyBase.java:74)
at javafx.beans.property.ReadOnlyObjectWrapper.fireValueChangedEvent(ReadOnlyObjectWrapper.java:102)
at javafx.scene.Node$ReadOnlyObjectWrapperManualFire.fireSuperValueChangedEvent(Node.java:831)
at javafx.scene.Node.invalidatedScenes(Node.java:881)
at javafx.scene.Node.setScenes(Node.java:919)
at javafx.scene.Parent$1.onChanged(Parent.java:269)
at com.sun.javafx.collections.TrackableObservableList.lambda$new$29(TrackableObservableList.java:45)
at com.sun.javafx.collections.ListListenerHelper$Generic.fireValueChangedEvent(ListListenerHelper.java:329)
at com.sun.javafx.collections.ListListenerHelper.fireValueChangedEvent(ListListenerHelper.java:73)
at javafx.collections.ObservableListBase.fireChange(ObservableListBase.java:233)
at javafx.collections.ListChangeBuilder.commit(ListChangeBuilder.java:482)
at javafx.collections.ListChangeBuilder.endChange(ListChangeBuilder.java:541)
at javafx.collections.ObservableListBase.endChange(ObservableListBase.java:205)
at javafx.collections.ModifiableObservableListBase.setAll(ModifiableObservableListBase.java:90)
at com.sun.javafx.collections.VetoableListDecorator.setAll(VetoableListDecorator.java:116)
at com.sun.javafx.scene.control.skin.NestedTableColumnHeader.updateContent(NestedTableColumnHeader.java:487)
at com.sun.javafx.scene.control.skin.NestedTableColumnHeader.updateTableColumnHeaders(NestedTableColumnHeader.java:317)
at com.sun.javafx.scene.control.skin.NestedTableColumnHeader.checkState(NestedTableColumnHeader.java:544)
at com.sun.javafx.scene.control.skin.NestedTableColumnHeader.computePrefHeight(NestedTableColumnHeader.java:427)
at javafx.scene.Parent.prefHeight(Parent.java:929)
at javafx.scene.layout.Region.prefHeight(Region.java:1435)
at com.sun.javafx.scene.control.skin.TableHeaderRow.computePrefHeight(TableHeaderRow.java:331)
at com.sun.javafx.scene.control.skin.TableHeaderRow.computeMinHeight(TableHeaderRow.java:324)
at javafx.scene.Parent.minHeight(Parent.java:957)
at javafx.scene.layout.Region.minHeight(Region.java:1401)
at javafx.scene.control.SkinBase.computeMinHeight(SkinBase.java:254)
at javafx.scene.control.Control.computeMinHeight(Control.java:489)
at javafx.scene.Parent.minHeight(Parent.java:957)
at javafx.scene.layout.Region.minHeight(Region.java:1401)
at javafx.scene.layout.Region.computeChildPrefAreaHeight(Region.java:1762)
at javafx.scene.layout.GridPane.computePrefHeights(GridPane.java:1424)
at javafx.scene.layout.GridPane.layoutChildren(GridPane.java:1690)
at javafx.scene.Parent.layout(Parent.java:1087)
at javafx.scene.Scene.doLayoutPass(Scene.java:552)
at javafx.scene.Scene$ScenePulseListener.pulse(Scene.java:2397)
at com.sun.javafx.tk.Toolkit.lambda$runPulse$29(Toolkit.java:398)
at java.security.AccessController.doPrivileged(Native Method)
at com.sun.javafx.tk.Toolkit.runPulse(Toolkit.java:397)
at com.sun.javafx.tk.Toolkit.firePulse(Toolkit.java:424)
at com.sun.javafx.tk.quantum.QuantumToolkit.pulse(QuantumToolkit.java:518)
at com.sun.javafx.tk.quantum.QuantumToolkit.pulse(QuantumToolkit.java:498)
at com.sun.javafx.tk.quantum.QuantumToolkit.pulseFromQueue(QuantumToolkit.java:491)
at com.sun.javafx.tk.quantum.QuantumToolkit.lambda$runToolkit$403(QuantumToolkit.java:319)
at com.sun.glass.ui.InvokeLaterDispatcher$Future.run(InvokeLaterDispatcher.java:95)
at com.sun.glass.ui.win.WinApplication._runLoop(Native Method)
at com.sun.glass.ui.win.WinApplication.lambda$null$147(WinApplication.java:177)
at java.lang.Thread.run(Thread.java:748)
Exception in thread "JavaFX Application Thread"
java.lang.IndexOutOfBoundsException: Index: 25, Size: 25
at java.util.ArrayList.rangeCheck(ArrayList.java:657)
at java.util.ArrayList.get(ArrayList.java:433)
at com.sun.javafx.collections.ObservableListWrapper.get(ObservableListWrapper.java:89)
at socparser.Main$2.call(Main.java:194)
at socparser.Main$2.call(Main.java:1)
at javafx.scene.control.TableColumn.getCellObservableValue(TableColumn.java:578)
at javafx.scene.control.TableColumn.getCellObservableValue(TableColumn.java:563)
at javafx.scene.control.TableCell.updateItem(TableCell.java:644)
at javafx.scene.control.TableCell.indexChanged(TableCell.java:468)
at javafx.scene.control.IndexedCell.updateIndex(IndexedCell.java:116)
at com.sun.javafx.scene.control.skin.TableRowSkinBase.updateCells(TableRowSkinBase.java:533)
at com.sun.javafx.scene.control.skin.TableRowSkinBase.init(TableRowSkinBase.java:147)
at com.sun.javafx.scene.control.skin.TableRowSkin.<init>(TableRowSkin.java:64)
at javafx.scene.control.TableRow.createDefaultSkin(TableRow.java:212)
at javafx.scene.control.Control.impl_processCSS(Control.java:872)
at javafx.scene.Node.processCSS(Node.java:9056)
at javafx.scene.Node.applyCss(Node.java:9153)
at com.sun.javafx.scene.control.skin.VirtualFlow.setCellIndex(VirtualFlow.java:1964)
at com.sun.javafx.scene.control.skin.VirtualFlow.getCell(VirtualFlow.java:1797)
at com.sun.javafx.scene.control.skin.VirtualFlow.getCellLength(VirtualFlow.java:1879)
at com.sun.javafx.scene.control.skin.VirtualFlow.computeViewportOffset(VirtualFlow.java:2528)
at com.sun.javafx.scene.control.skin.VirtualFlow.layoutChildren(VirtualFlow.java:1189)
at javafx.scene.Parent.layout(Parent.java:1087)
at javafx.scene.Parent.layout(Parent.java:1093)
at javafx.scene.Parent.layout(Parent.java:1093)
at javafx.scene.Scene.doLayoutPass(Scene.java:552)
at javafx.scene.Scene$ScenePulseListener.pulse(Scene.java:2397)
at com.sun.javafx.tk.Toolkit.lambda$runPulse$29(Toolkit.java:398)
at java.security.AccessController.doPrivileged(Native Method)
at com.sun.javafx.tk.Toolkit.runPulse(Toolkit.java:397)
at com.sun.javafx.tk.Toolkit.firePulse(Toolkit.java:424)
at com.sun.javafx.tk.quantum.QuantumToolkit.pulse(QuantumToolkit.java:518)
at com.sun.javafx.tk.quantum.QuantumToolkit.pulse(QuantumToolkit.java:498)
at com.sun.javafx.tk.quantum.QuantumToolkit.pulseFromQueue(QuantumToolkit.java:491)
at com.sun.javafx.tk.quantum.QuantumToolkit.lambda$runToolkit$403(QuantumToolkit.java:319)
at com.sun.glass.ui.InvokeLaterDispatcher$Future.run(InvokeLaterDispatcher.java:95)
at com.sun.glass.ui.win.WinApplication._runLoop(Native Method)
at com.sun.glass.ui.win.WinApplication.lambda$null$147(WinApplication.java:177)
at java.lang.Thread.run(Thread.java:748)
Please try to either
change SimpleStringProperty(param.getValue().get(j).toString()) to
SimpleStringProperty(param.getValue().get(j-1).toString())
or
final int j = i; to final int j = i - 1;

How to Fix "java.lang.NullPointerException at org.apache.spark.sql.execution.datasources.orc.OrcColumnVector.getLong(OrcColumnVector.java:141)"

I'm trying to combine all columns in dataframe to one column named value.
Mycode :
val df = sparkSession.sql(sql)
val dfwithValue = df.withColumn("value",df.col("topic"))
dfwithValue.selectExpr("CAST(value AS STRING)").show() // no error
import org.apache.spark.sql.functions._
val cols = df.columns.map({ col =>
df.col(col)
}).toSeq
val newdf = df.withColumn("value", struct(cols : _*))
newdf.selectExpr("CAST(value AS STRING)").show() // error
when I use the second way, I encounter error
Caused by: java.lang.NullPointerException
at org.apache.spark.sql.execution.datasources.orc.OrcColumnVector.getLong(OrcColumnVector.java:141)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:403)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:409)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Can someone help?
I do not get errors with either approach but I find the solution overly complicated for the task. Also this code returns a dataframe of rows of Row[WarappedArray[String]] rather than Row[String]
try:
df.map(_.mkString("")).toDF("value").show()
Null pointer exception also comes up when the value is not accessible, check the scope of variable to fix the issue.

Corda: Adding muliple output states in a single transaction

In our use case we have to add two output states with different contracts to a transaction. I tried to add multiple states to transaction in withItems. I call this flow from an RPC but it throws an exception as java.lang.IllegalArgumentException: List has more than one element. while signing the transaction.
This is how I am adding multiple output states:-
val outputstate1 = IouState1(dataModel1, me, otherParty)
val outputstate2 = IouState2(dataModel2, me, otherParty)
val cmd1 = Command(IouContract1.Commands.Create(), state1.participants.map { it.owningKey })
val cmd2 = Command(IouContract2.Commands.Initiated(), state2.participants.map { it.owningKey })
val txBuilder = TransactionBuilder(TransactionType.General, notary).withItems(state1, state2, cmd1, cmd2)
progressTracker.currentStep = VERIFYING_TRANSACTION
// Verifying the transaction.
txBuilder.toWireTransaction().toLedgerTransaction(serviceHub).verify()
progressTracker.currentStep = SIGNING_TRANSACTION
// Signing the transaction.
val partSignedTx = serviceHub.signInitialTransaction(txBuilder)
progressTracker.currentStep = GATHERING_SIGS
// Send the state to the counterparty, and receive it back with their signature.
val fullySignedTx = subFlow(CollectSignaturesFlow(partSignedTx, GATHERING_SIGS.childProgressTracker()))
// Finalising the transaction.
progressTracker.currentStep = FINALISING_TRANSACTION
// Notarise and record the transaction in both parties' vaults.
return subFlow(FinalityFlow(fullySignedTx, FINALISING_TRANSACTION.childProgressTracker())).single()
Stack trace:-
net.corda.core.flows.FlowException: java.lang.IllegalArgumentException: List has more than one element.
at net.corda.core.flows.SignTransactionFlow.call(CollectSignaturesFlow.kt:202) ~[corda-core-0.14.0.jar:?]
at net.corda.core.flows.SignTransactionFlow.call(CollectSignaturesFlow.kt:175) ~[corda-core-0.14.0.jar:?]
at net.corda.core.flows.FlowLogic.subFlow(FlowLogic.kt:166) ~[corda-core-0.14.0.jar:?]
at com.dwlabcorda.salescontract.LOCReceiverFlow.call(LOCContractFlow.kt:107) ~[tradefinancecorda-0.1.jar:?]
at com.dwlabcorda.salescontract.LOCReceiverFlow.call(LOCContractFlow.kt:95) ~[tradefinancecorda-0.1.jar:?]
at net.corda.node.services.statemachine.FlowStateMachineImpl.run(FlowStateMachineImpl.kt:101) [corda-node-0.14.0.jar:?]
at net.corda.node.services.statemachine.FlowStateMachineImpl.run(FlowStateMachineImpl.kt:33) [corda-node-0.14.0.jar:?]
at co.paralleluniverse.fibers.Fiber.run1(Fiber.java:1067) [quasar-core-0.7.6-jdk8.jar:0.7.6]
at co.paralleluniverse.fibers.Fiber.exec(Fiber.java:767) [quasar-core-0.7.6-jdk8.jar:0.7.6]
at co.paralleluniverse.fibers.RunnableFiberTask.doExec(RunnableFiberTask.java:100) [quasar-core-0.7.6-jdk8.jar:0.7.6]
at co.paralleluniverse.fibers.RunnableFiberTask.run(RunnableFiberTask.java:91) [quasar-core-0.7.6-jdk8.jar:0.7.6]
at java.util.concurrent.Executors$RunnableAdapter.call(Unknown Source) [?:1.8.0_144]
at java.util.concurrent.FutureTask.run(Unknown Source) [?:1.8.0_144]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(Unknown Source) [?:1.8.0_144]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(Unknown Source) [?:1.8.0_144]
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) [?:1.8.0_144]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) [?:1.8.0_144]
at net.corda.node.utilities.AffinityExecutor$ServiceAffinityExecutor$1$thread$1.run(AffinityExecutor.kt:69) [corda-node-0.14.0.jar:?]
Caused by: java.lang.IllegalArgumentException: List has more than one element.
at kotlin.collections.CollectionsKt___CollectionsKt.single(_Collections.kt:471) ~[kotlin-stdlib-1.1.1.jar:1.1.1]
at com.dwlabcorda.salescontract.LOCReceiverFlow$call$signTransactionFlow$1.checkTransaction(LOCContractFlow.kt:100) ~[tradefinancecorda-0.1.jar:?]
at net.corda.core.flows.SignTransactionFlow.call(CollectSignaturesFlow.kt:199) ~[corda-core-0.14.0.jar:?]
... 17 more
Probably you used a .single() cast in one of your contracts that is now throwing an exception

Version Portable for Hazelcast : Converting Date to Long throws Exception in Predicates

I am trying to use com.hazelcast.nio.serialization.VersionedPortable for serialization for a Customer class. This does not support Date serialization by default. So we need to convert it to Long
#Override
public void writePortable(PortableWriter writer) throws IOException {
if (dob != null) {
Long dobLong = dob.getTime();
writer.writeLong(DOB_FIELD, dobLong);
} else {
writer.writeLong(DOB_FIELD, -1);
}
}
#Override
public void readPortable(PortableReader reader) throws IOException {
if (reader.hasField(DOB_FIELD)) {
Long dobLong = reader.readLong(DOB_FIELD);
dob = dobLong == -1 ? null : new Date(dobLong);
}
}
In CustomerService I have findCustomersByDob which uses com.hazelcast.query.Predicate
public Collection<Customer> findCustomersByDob(Date dobStart, Date dobEnd) {
Predicate dobStartPredicate = Predicates.greaterEqual("dob", dobStart);
Predicate dobEndPredicate = Predicates.lessThan("dob", dobEnd);
Predicate andPredicate = Predicates.and(dobStartPredicate, dobEndPredicate);
return idToCustomerMap.values(andPredicate);
}
at idToCustomerMap.values(andPredicate); I am getting the following exception.
java.lang.IllegalArgumentException: Cannot convert [Tue Jan 01 00:00:00 IST 1980] to long
at com.hazelcast.query.impl.TypeConverters$LongConverter.convert(TypeConverters.java:159)
at com.hazelcast.query.impl.IndexImpl.convert(IndexImpl.java:154)
at com.hazelcast.query.impl.IndexImpl.getSubRecords(IndexImpl.java:148)
at com.hazelcast.query.Predicates$GreaterLessPredicate.filter(Predicates.java:691)
at com.hazelcast.query.Predicates$AndPredicate.filter(Predicates.java:477)
at com.hazelcast.query.impl.IndexService.query(IndexService.java:97)
at com.hazelcast.map.impl.operation.QueryOperation.run(QueryOperation.java:92)
at com.hazelcast.spi.impl.operationservice.impl.OperationRunnerImpl.run(OperationRunnerImpl.java:137)
at com.hazelcast.spi.impl.operationservice.impl.OperationRunnerImpl.run(OperationRunnerImpl.java:309)
at com.hazelcast.spi.impl.operationexecutor.classic.OperationThread.processPacket(OperationThread.java:142)
at com.hazelcast.spi.impl.operationexecutor.classic.OperationThread.process(OperationThread.java:115)
at com.hazelcast.spi.impl.operationexecutor.classic.OperationThread.doRun(OperationThread.java:101)
at com.hazelcast.spi.impl.operationexecutor.classic.OperationThread.run(OperationThread.java:76)
at ------ End remote and begin local stack-trace ------.(Unknown Source)
at com.hazelcast.spi.impl.operationservice.impl.InvocationFuture.resolveApplicationResponse(InvocationFuture.java:384)
at com.hazelcast.spi.impl.operationservice.impl.InvocationFuture.resolveApplicationResponseOrThrowException(InvocationFuture.java:334)
at com.hazelcast.spi.impl.operationservice.impl.InvocationFuture.get(InvocationFuture.java:225)
at com.hazelcast.spi.impl.operationservice.impl.InvocationFuture.get(InvocationFuture.java:204)
at com.hazelcast.map.impl.client.AbstractMapQueryRequest.collectResults(AbstractMapQueryRequest.java:103)
at com.hazelcast.map.impl.client.AbstractMapQueryRequest.invoke(AbstractMapQueryRequest.java:77)
at com.hazelcast.client.impl.client.InvocationClientRequest.process(InvocationClientRequest.java:27)
at com.hazelcast.client.impl.ClientEngineImpl$ClientPacketProcessor.processRequest(ClientEngineImpl.java:463)
at com.hazelcast.client.impl.ClientEngineImpl$ClientPacketProcessor.run(ClientEngineImpl.java:379)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
at com.hazelcast.util.executor.HazelcastManagedThread.executeRun(HazelcastManagedThread.java:76)
at com.hazelcast.util.executor.HazelcastManagedThread.run(HazelcastManagedThread.java:92)
at ------ End remote and begin local stack-trace ------.(Unknown Source)
at com.hazelcast.client.spi.impl.ClientInvocationFuture.resolveResponse(ClientInvocationFuture.java:147)
at com.hazelcast.client.spi.impl.ClientInvocationFuture.get(ClientInvocationFuture.java:114)
at com.hazelcast.client.spi.impl.ClientInvocationFuture.get(ClientInvocationFuture.java:89)
at com.hazelcast.client.spi.ClientProxy.invoke(ClientProxy.java:151)
at com.hazelcast.client.proxy.ClientMapProxy.values(ClientMapProxy.java:837)
at com.foo.hazelcast.client.services.CustomerServiceImpl.findCustomersByDob(CustomerServiceImpl.java:99)
at com.foo.hazelcast.client.services.CustomerServiceTest.searchCustomersByDobRange(CustomerServiceTest.java:104)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
at org.springframework.test.context.junit4.statements.RunBeforeTestMethodCallbacks.evaluate(RunBeforeTestMethodCallbacks.java:75)
at org.springframework.test.context.junit4.statements.RunAfterTestMethodCallbacks.evaluate(RunAfterTestMethodCallbacks.java:86)
at org.springframework.test.context.junit4.statements.SpringRepeat.evaluate(SpringRepeat.java:84)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
at org.springframework.test.context.junit4.SpringJUnit4ClassRunner.runChild(SpringJUnit4ClassRunner.java:252)
at org.springframework.test.context.junit4.SpringJUnit4ClassRunner.runChild(SpringJUnit4ClassRunner.java:94)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
at org.springframework.test.context.junit4.statements.RunBeforeTestClassCallbacks.evaluate(RunBeforeTestClassCallbacks.java:61)
at org.springframework.test.context.junit4.statements.RunAfterTestClassCallbacks.evaluate(RunAfterTestClassCallbacks.java:70)
at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
at org.springframework.test.context.junit4.SpringJUnit4ClassRunner.run(SpringJUnit4ClassRunner.java:191)
at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:50)
at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:459)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:675)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:382)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:192)
Although it is clear the exception is because of this special handling of Date, I would like to know the exact reason for the TypeConversion here. I read that Hazelcast maintains the data in serialized form. So this should not be a problem right?
Also, how do I overcome this issue?
EDIT:
I fixed this issue by passing date.getTime() in the predicates as well
public Collection<Customer> findCustomersByDob(Date dobStart, Date dobEnd) {
Predicate dobStartPredicate = Predicates.greaterEqual("dob", dobStart.getTime());
Predicate dobEndPredicate = Predicates.lessThan("dob", dobEnd.getTime());
Predicate andPredicate = Predicates.and(dobStartPredicate, dobEndPredicate);
return idToCustomerMap.values(andPredicate);
}
I am guessing it is because hazelcast maintains data in serialized form and hence confused when it is trying to compare long against the date in predicate.
Still, this approach is definitely not cleaner. Is there cleaner way to avoid this in Version Portable?
#vinodhini-chockalingam, Portable designed to work with different languages, not just Java. So you can write a value from NodeJs & read it from Java with Portable. This is why you cannot write java.util.Date object to Portable directly. You need to convert it to a supported type.
And since you write the Date as long, Hazelcast recognizes that field as long. Only you know that this is a Date field. Then when you need to run a predicate, since you write this field as long, Hazelcast expect a long value to compare. This is the reason.

Spark App exception running in IntelliJ IDEA

Can someone help me out?
I am running a spark App in IntelliJ IDEA.
object MainDriver {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("Spark Sentiment Analysis").setMaster("local[2]")
val sc = new SparkContext(conf)
val posWords = sc.textFile("src/main/resources/Hu_Liu_positive_word_list.txt")
val negWords = sc.textFile("src/main/resources/Hu_Liu_negative_word_list.txt")
val nltkStopWords = sc.textFile("src/main/resources/stopwords/english")
val moreStopWds = sc.parallelize(List("cant", "didnt", "doesnt", "dont", "goes", "isnt", "hes",
"shes", "thats", "theres", "theyre", "wont", "youll", "youre",
"youve", "br", "ve", "re", "vs", "dick", "ginger", "hollywood",
"jack", "jill", "john", "karloff", "kudrow", "orson", "peter", "tcm",
"tom", "toni", "welles", "william", "wolheim", "nikita"))
val stopWordsRDD = (nltkStopWords union moreStopWds).filter(_ != "").cache()
val stopWordsList = sc.broadcast(stopWordsRDD.collect())
val inTrainUnsup = sc.wholeTextFiles("src/main/resources/reviews/train/unsup")
val parsedTrainUnsup = inTrainUnsup mapValues (
_ map {
case c: Char if Character.isLetterOrDigit(c) => c
case _ => ' '
}
split (" ")
filter (_.trim() != "")
filter (_.length() > 1)
map (_.toLowerCase())
filter (!stopWordsList.value.contains(_))
)
val wordFreqDist = parsedTrainUnsup flatMap {
case (x, y) => y
} map (w => (w, 1)) reduceByKey (_ + _)
val posItems = (posWords map ((_, -1))) join wordFreqDist mapValues { case (x, y) => y}
val sortedPosItems = posItems map (_.swap) sortByKey (false) map (_.swap) //This is not useful now...
val negItems = (negWords map ((_, -1))) join wordFreqDist mapValues { case (x, y) => y}
val sortedNegItems = negItems map (_.swap) sortByKey (false) map (_.swap) //This is not useful now...
//Get the top 25 hot items
//implicit val is for top(25), defining sort on the 2nd element
implicit val pairSortByValue = new Ordering[(String, Int)] {
override def compare(a: (String, Int), b: (String, Int)) = a._2 compare b._2
}
println("Top 25 positive words in unsup dataset")
posItems.top(25).foreach(println)
println("Top 25 negative words in unsup dataset")
negItems.top(25).foreach(println)
sc.stop()
}
}
This code runs well if I use spark-submit.
But it throws exceptions when I directly run it in IntelliJ IDEA (Menu: Run > Run...). After looking into it, it seems something is wrong with val inTrainUnsup = sc.wholeTextFiles("src/main/resources/reviews/train/unsup"), because when I simply do inTrainUnsup.saveAsTextFile("test file"), it throws same exception.
14/11/11 10:21:07 ERROR executor.Executor: Exception in task 0.0 in stage 1.0 (TID 4)
java.lang.RuntimeException: java.lang.reflect.InvocationTargetException
at org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.initNextRecordReader(CombineFileRecordReader.java:164)
at org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.<init>(CombineFileRecordReader.java:126)
at org.apache.spark.input.WholeTextFileInputFormat.createRecordReader(WholeTextFileInputFormat.scala:44)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:115)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:103)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:65)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
at org.apache.spark.scheduler.Task.run(Task.scala:54)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:895)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:918)
at java.lang.Thread.run(Thread.java:695)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
at org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.initNextRecordReader(CombineFileRecordReader.java:155)
... 16 more
Caused by: java.lang.IncompatibleClassChangeError: Found class org.apache.hadoop.mapreduce.TaskAttemptContext, but interface was expected
at org.apache.spark.input.WholeTextFileRecordReader.<init>(WholeTextFileRecordReader.scala:40)
... 21 more
14/11/11 10:21:07 ERROR executor.Executor: Exception in task 1.0 in stage 1.0 (TID 5)
java.lang.RuntimeException: java.lang.reflect.InvocationTargetException
at org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.initNextRecordReader(CombineFileRecordReader.java:164)
at org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.<init>(CombineFileRecordReader.java:126)
at org.apache.spark.input.WholeTextFileInputFormat.createRecordReader(WholeTextFileInputFormat.scala:44)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:115)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:103)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:65)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
at org.apache.spark.scheduler.Task.run(Task.scala:54)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:895)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:918)
at java.lang.Thread.run(Thread.java:695)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
at org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.initNextRecordReader(CombineFileRecordReader.java:155)
... 16 more
Caused by: java.lang.IncompatibleClassChangeError: Found class org.apache.hadoop.mapreduce.TaskAttemptContext, but interface was expected
at org.apache.spark.input.WholeTextFileRecordReader.<init>(WholeTextFileRecordReader.scala:40)
... 21 more
14/11/11 10:21:07 WARN scheduler.TaskSetManager: Lost task 1.0 in stage 1.0 (TID 5, localhost): java.lang.RuntimeException: java.lang.reflect.InvocationTargetException
org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.initNextRecordReader(CombineFileRecordReader.java:164)
org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.<init>(CombineFileRecordReader.java:126)
org.apache.spark.input.WholeTextFileInputFormat.createRecordReader(WholeTextFileInputFormat.scala:44)
org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:115)
org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:103)
org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:65)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
org.apache.spark.scheduler.Task.run(Task.scala:54)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177)
java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:895)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:918)
java.lang.Thread.run(Thread.java:695)
14/11/11 10:21:07 ERROR scheduler.TaskSetManager: Task 1 in stage 1.0 failed 1 times; aborting job
14/11/11 10:21:07 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool
14/11/11 10:21:07 INFO scheduler.TaskSetManager: Lost task 0.0 in stage 1.0 (TID 4) on executor localhost: java.lang.RuntimeException (java.lang.reflect.InvocationTargetException) [duplicate 1]
14/11/11 10:21:07 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool
14/11/11 10:21:07 INFO scheduler.TaskSchedulerImpl: Cancelling stage 1
14/11/11 10:21:07 INFO scheduler.DAGScheduler: Failed to run saveAsTextFile at MainDriver.scala:30
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 1.0 failed 1 times, most recent failure: Lost task 1.0 in stage 1.0 (TID 5, localhost): java.lang.RuntimeException: java.lang.reflect.InvocationTargetException
org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.initNextRecordReader(CombineFileRecordReader.java:164)
org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader.<init>(CombineFileRecordReader.java:126)
org.apache.spark.input.WholeTextFileInputFormat.createRecordReader(WholeTextFileInputFormat.scala:44)
org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:115)
org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:103)
org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:65)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
org.apache.spark.scheduler.Task.run(Task.scala:54)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177)
java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:895)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:918)
java.lang.Thread.run(Thread.java:695)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1185)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1174)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1173)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1173)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:688)
at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1391)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
at akka.actor.ActorCell.invoke(ActorCell.scala:456)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
at akka.dispatch.Mailbox.run(Mailbox.scala:219)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)