How to resolve py4jjavaerror? - pyspark

How to resolve py4jjavaerror
I got this error:
"name": "Py4JJavaError",
"message": "An error occurred while calling o1993.collectToPython.\n: org.apache.spark.SparkException: Job aborted due to stage failure: Task 4 in stage 88.0 failed 1 times, most recent failure: Lost task 4.0 in stage 88.0 (TID 339) (LAPTOP-LT9BMOHO executor driver): org.apache.spark.SparkException: Python worker failed to connect back.\r\n\tat org.apache.spark.api.python.PythonWorkerFactory.createSimpleWorker(PythonWorkerFactory.scala:189)\r\n\tat org.apache.spark.api.python.PythonWorkerFactory.create(PythonWorkerFactory.scala:109)\r\n\tat org.apache.spark.SparkEnv.createPythonWorker(SparkEnv.scala:124)\r\n\tat org.apache.spark.api.python.BasePythonRunner.compute(PythonRunner.scala:164)\r\n\tat org.apache.spark.sql.execution.python.PandasGroupUtils$.executePython(PandasGroupUtils.scala:44)\r\n\tat org.apache.spark.sql.execution.python.FlatMapGroupsInPandasExec.$anonfun$doExecute$1(FlatMapGroupsInPandasExec.scala:94)\r\n\tat org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:890)\r\n\tat org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:890)\r\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)\r\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:329)\r\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)\r\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:329)\r\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)\r\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:329)\r\n\tat org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\r\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)\r\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)\r\n\tat org.apache.spark.scheduler.Task.run(Task.scala:136)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)\r\n\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)\r\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\r\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\r\n\tat java.lang.Thread.run(Thread.java:750)\r\nCaused by: java.net.SocketTimeoutException: Accept timed out\r\n\tat java.net.DualStackPlainSocketImpl.waitForNewConnection(Native Method)\r\n\tat java.net.DualStackPlainSocketImpl.socketAccept(DualStackPlainSocketImpl.java:131)\r\n\tat java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:535)\r\n\tat java.net.PlainSocketImpl.accept(PlainSocketImpl.java:189)\r\n\tat java.net.ServerSocket.implAccept(ServerSocket.java:545)\r\n\tat java.net.ServerSocket.accept(ServerSocket.java:513)\r\n\tat org.apache.spark.api.python.PythonWorkerFactory.createSimpleWorker(PythonWorkerFactory.scala:176)\r\n\t... 26 more\r\n\nDriver stacktrace:\r\n\tat org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2672)\r\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2608)\r\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2607)\r\n\tat scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)\r\n\tat scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)\r\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)\r\n\tat org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2607)\r\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1182)\r\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1182)\r\n\tat scala.Option.foreach(Option.scala:407)\r\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1182)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2860)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2802)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2791)\r\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)\r\nCaused by: org.apache.spark.SparkException: Python worker failed to connect back.\r\n\tat org.apache.spark.api.python.PythonWorkerFactory.createSimpleWorker(PythonWorkerFactory.scala:189)\r\n\tat org.apache.spark.api.python.PythonWorkerFactory.create(PythonWorkerFactory.scala:109)\r\n\tat org.apache.spark.SparkEnv.createPythonWorker(SparkEnv.scala:124)\r\n\tat org.apache.spark.api.python.BasePythonRunner.compute(PythonRunner.scala:164)\r\n\tat org.apache.spark.sql.execution.python.PandasGroupUtils$.executePython(PandasGroupUtils.scala:44)\r\n\tat org.apache.spark.sql.execution.python.FlatMapGroupsInPandasExec.$anonfun$doExecute$1(FlatMapGroupsInPandasExec.scala:94)\r\n\tat org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:890)\r\n\tat org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:890)\r\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)\r\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:329)\r\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)\r\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:329)\r\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)\r\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:329)\r\n\tat org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\r\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)\r\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)\r\n\tat org.apache.spark.scheduler.Task.run(Task.scala:136)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)\r\n\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)\r\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\r\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\r\n\tat java.lang.Thread.run(Thread.java:750)\r\nCaused by: java.net.SocketTimeoutException: Accept timed out\r\n\tat java.net.DualStackPlainSocketImpl.waitForNewConnection(Native Method)\r\n\tat java.net.DualStackPlainSocketImpl.socketAccept(DualStackPlainSocketImpl.java:131)\r\n\tat java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:535)\r\n\tat java.net.PlainSocketImpl.accept(PlainSocketImpl.java:189)\r\n\tat java.net.ServerSocket.implAccept(ServerSocket.java:545)\r\n\tat java.net.ServerSocket.accept(ServerSocket.java:513)\r\n\tat org.apache.spark.api.python.PythonWorkerFactory.createSimpleWorker(PythonWorkerFactory.scala:176)\r\n\t... 26 more\r\n",
"stack": "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mPy4JJavaError\u001b[0m Traceback (most recent call last)\nCell \u001b[1;32mIn[16], line 22\u001b[0m\n\u001b[0;32m 20\u001b[0m model_df_validation[i]\u001b[39m=\u001b[39mmodel_data\u001b[39m.\u001b[39mfilter((col(\u001b[39m\"\u001b[39m\u001b[39mWEEK_START_DATE\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39m>\u001b[39m\u001b[39m=\u001b[39m data_start_dates[i]) \u001b[39m&\u001b[39m (col(\u001b[39m\"\u001b[39m\u001b[39mWEEK_START_DATE\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39m<\u001b[39m testing_start_dates[i]))\n\u001b[0;32m 21\u001b[0m model_df_validation[i]\u001b[39m=\u001b[39mmodel_df_validation[i]\u001b[39m.\u001b[39mwithColumn(\u001b[39m\"\u001b[39m\u001b[39mTRAIN_VALID\u001b[39m\u001b[39m\"\u001b[39m,when(model_df_validation[i]\u001b[39m.\u001b[39mWEEK_START_DATE\u001b[39m<\u001b[39mvalidation_start_dates[i],\u001b[39m\"\u001b[39m\u001b[39mTRAIN\u001b[39m\u001b[39m\"\u001b[39m)\u001b[39m.\u001b[39motherwise(\u001b[39m\"\u001b[39m\u001b[39mVALID\u001b[39m\u001b[39m\"\u001b[39m))\u001b[39m.\u001b[39mcache()\n\u001b[1;32m---> 22\u001b[0m all_errors[i], best_error_mean[i], best_loss_init[i], best_n_estimators_init[i], best_max_depth_init[i], best_learning_rate_init[i], best_subsample_init[i], best_max_features_init[i] \u001b[39m=\u001b[39m run_validation(model_df_validation[i], model_input_cols, model_output_col, loss_values, n_estimators_values, max_depth_values, learning_rate_values, subsample_values, max_features_values)\n\nCell \u001b[1;32mIn[13], line 82\u001b[0m, in \u001b[0;36mrun_validation\u001b[1;34m(model_df, train_model_inputs, model_output_col, loss_values, n_estimators_values, max_depth_values, learning_rate_values, subsample_values, max_features_values)\u001b[0m\n\u001b[0;32m 76\u001b[0m coeff \u001b[39m=\u001b[39m coeff\u001b[39m.\u001b[39mwithColumn(\n\u001b[0;32m 77\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mE\u001b[39m\u001b[39m\"\u001b[39m, col(\u001b[39m\"\u001b[39m\u001b[39mVALID_Y_PREDICTED_UNITS\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39m-\u001b[39m col(\u001b[39m\"\u001b[39m\u001b[39mVALID_Y_TRUE_UNITS\u001b[39m\u001b[39m\"\u001b[39m))\u001b[39m.\u001b[39mwithColumn(\n\u001b[0;32m 78\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAE\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mabs\u001b[39m(col(\u001b[39m\"\u001b[39m\u001b[39mE\u001b[39m\u001b[39m\"\u001b[39m)))\u001b[39m.\u001b[39mwithColumn(\n\u001b[0;32m 79\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mSE\u001b[39m\u001b[39m\"\u001b[39m, col(\u001b[39m\"\u001b[39m\u001b[39mE\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39m*\u001b[39m col(\u001b[39m\"\u001b[39m\u001b[39mE\u001b[39m\u001b[39m\"\u001b[39m))\u001b[39m.\u001b[39mwithColumn(\n\u001b[0;32m 80\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAPE\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m100\u001b[39m\u001b[39m*\u001b[39m\u001b[39mabs\u001b[39m(col(\u001b[39m\"\u001b[39m\u001b[39mVALID_Y_PREDICTED_UNITS\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39m-\u001b[39m col(\u001b[39m\"\u001b[39m\u001b[39mVALID_Y_TRUE_UNITS\u001b[39m\u001b[39m\"\u001b[39m))\u001b[39m/\u001b[39mcol(\u001b[39m\"\u001b[39m\u001b[39mVALID_Y_TRUE_UNITS\u001b[39m\u001b[39m\"\u001b[39m))\n\u001b[0;32m 81\u001b[0m coeff \u001b[39m=\u001b[39m coeff\u001b[39m.\u001b[39mwithColumn(\u001b[39m\"\u001b[39m\u001b[39mused_error\u001b[39m\u001b[39m\"\u001b[39m, when(col(\u001b[39m\"\u001b[39m\u001b[39mVALID_Y_TRUE_UNITS\u001b[39m\u001b[39m\"\u001b[39m)\u001b[39m<\u001b[39m\u001b[39m=\u001b[39m\u001b[39m100\u001b[39m, \u001b[39m2\u001b[39m\u001b[39m*\u001b[39mcol(\u001b[39m\"\u001b[39m\u001b[39mAE\u001b[39m\u001b[39m\"\u001b[39m))\u001b[39m.\u001b[39motherwise(col(\u001b[39m\"\u001b[39m\u001b[39mAPE\u001b[39m\u001b[39m\"\u001b[39m))) \u001b[39m# to do: pass relative importance as parameter\u001b[39;00m\n\u001b[1;32m---> 82\u001b[0m error_mean \u001b[39m=\u001b[39m coeff\u001b[39m.\u001b[39;49mfilter(coeff\u001b[39m.\u001b[39;49mused_error \u001b[39m<\u001b[39;49m \u001b[39m300\u001b[39;49m)\u001b[39m.\u001b[39;49mselect(mean(\u001b[39m'\u001b[39;49m\u001b[39mused_error\u001b[39;49m\u001b[39m'\u001b[39;49m))\u001b[39m.\u001b[39;49mcollect() \u001b[39m#to do: pass threshold as a parameter\u001b[39;00m\n\u001b[0;32m 84\u001b[0m all_errors \u001b[39m=\u001b[39m []\n\u001b[0;32m 85\u001b[0m all_errors\u001b[39m.\u001b[39mappend({\u001b[39m\"\u001b[39m\u001b[39mloss\u001b[39m\u001b[39m\"\u001b[39m:loss, \u001b[39m\"\u001b[39m\u001b[39mn_estimators\u001b[39m\u001b[39m\"\u001b[39m:n_estimators, \u001b[39m\"\u001b[39m\u001b[39mmax_depth\u001b[39m\u001b[39m\"\u001b[39m:max_depth, \u001b[39m\"\u001b[39m\u001b[39mlearning_rate\u001b[39m\u001b[39m\"\u001b[39m:learning_rate, \u001b[39m\"\u001b[39m\u001b[39msubsample\u001b[39m\u001b[39m\"\u001b[39m:subsample, \u001b[39m\"\u001b[39m\u001b[39mmax_features\u001b[39m\u001b[39m\"\u001b[39m:max_features, \u001b[39m\"\u001b[39m\u001b[39merror_mean\u001b[39m\u001b[39m\"\u001b[39m: error_mean})\n\nFile \u001b[1;32mC:\\Spark\\spark-3.3.1-bin-hadoop3\\spark-3.3.1-bin-hadoop3\\spark-3.3.1-bin-hadoop3\\python\\pyspark\\sql\\dataframe.py:817\u001b[0m, in \u001b[0;36mDataFrame.collect\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 807\u001b[0m \u001b[39m\"\"\"Returns all the records as a list of :class:`Row`.\u001b[39;00m\n\u001b[0;32m 808\u001b[0m \n\u001b[0;32m 809\u001b[0m \u001b[39m.. versionadded:: 1.3.0\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 814\u001b[0m \u001b[39m[Row(age=2, name='Alice'), Row(age=5, name='Bob')]\u001b[39;00m\n\u001b[0;32m 815\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 816\u001b[0m \u001b[39mwith\u001b[39;00m SCCallSiteSync(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sc):\n\u001b[1;32m--> 817\u001b[0m sock_info \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_jdf\u001b[39m.\u001b[39;49mcollectToPython()\n\u001b[0;32m 818\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mlist\u001b[39m(_load_from_socket(sock_info, BatchedSerializer(CPickleSerializer())))\n\nFile \u001b[1;32mC:\\Spark\\spark-3.3.1-bin-hadoop3\\spark-3.3.1-bin-hadoop3\\spark-3.3.1-bin-hadoop3\\python\\lib\\py4j-0.10.9.5-src.zip\\py4j\\java_gateway.py:1321\u001b[0m, in \u001b[0;36mJavaMember.__call__\u001b[1;34m(self, *args)\u001b[0m\n\u001b[0;32m 1315\u001b[0m command \u001b[39m=\u001b[39m proto\u001b[39m.\u001b[39mCALL_COMMAND_NAME \u001b[39m+\u001b[39m\\\n\u001b[0;32m 1316\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcommand_header \u001b[39m+\u001b[39m\\\n\u001b[0;32m 1317\u001b[0m args_command \u001b[39m+\u001b[39m\\\n\u001b[0;32m 1318\u001b[0m proto\u001b[39m.\u001b[39mEND_COMMAND_PART\n\u001b[0;32m 1320\u001b[0m answer \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mgateway_client\u001b[39m.\u001b[39msend_command(command)\n\u001b[1;32m-> 1321\u001b[0m return_value \u001b[39m=\u001b[39m get_return_value(\n\u001b[0;32m 1322\u001b[0m answer, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgateway_client, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtarget_id, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mname)\n\u001b[0;32m 1324\u001b[0m \u001b[39mfor\u001b[39;00m temp_arg \u001b[39min\u001b[39;00m temp_args:\n\u001b[0;32m 1325\u001b[0m temp_arg\u001b[39m.\u001b[39m_detach()\n\nFile \u001b[1;32mC:\\Spark\\spark-3.3.1-bin-hadoop3\\spark-3.3.1-bin-hadoop3\\spark-3.3.1-bin-hadoop3\\python\\pyspark\\sql\\utils.py:190\u001b[0m, in \u001b[0;36mcapture_sql_exception.<locals>.deco\u001b[1;34m(*a, **kw)\u001b[0m\n\u001b[0;32m 188\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdeco\u001b[39m(\u001b[39m*\u001b[39ma: Any, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkw: Any) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Any:\n\u001b[0;32m 189\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 190\u001b[0m \u001b[39mreturn\u001b[39;00m f(\u001b[39m*\u001b[39ma, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkw)\n\u001b[0;32m 191\u001b[0m \u001b[39mexcept\u001b[39;00m Py4JJavaError \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 192\u001b[0m converted \u001b[39m=\u001b[39m convert_exception(e\u001b[39m.\u001b[39mjava_exception)\n\nFile \u001b[1;32mC:\\Spark\\spark-3.3.1-bin-hadoop3\\spark-3.3.1-bin-hadoop3\\spark-3.3.1-bin-hadoop3\\python\\lib\\py4j-0.10.9.5-src.zip\\py4j\\protocol.py:326\u001b[0m, in \u001b[0;36mget_return_value\u001b[1;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[0;32m 324\u001b[0m value \u001b[39m=\u001b[39m OUTPUT_CONVERTER[\u001b[39mtype\u001b[39m](answer[\u001b[39m2\u001b[39m:], gateway_client)\n\u001b[0;32m 325\u001b[0m \u001b[39mif\u001b[39;00m answer[\u001b[39m1\u001b[39m] \u001b[39m==\u001b[39m REFERENCE_TYPE:\n\u001b[1;32m--> 326\u001b[0m \u001b[39mraise\u001b[39;00m Py4JJavaError(\n\u001b[0;32m 327\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAn error occurred while calling \u001b[39m\u001b[39m{0}\u001b[39;00m\u001b[39m{1}\u001b[39;00m\u001b[39m{2}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39m\n\u001b[0;32m 328\u001b[0m \u001b[39mformat\u001b[39m(target_id, \u001b[39m\"\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m, name), value)\n\u001b[0;32m 329\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 330\u001b[0m \u001b[39mraise\u001b[39;00m Py4JError(\n\u001b[0;32m 331\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAn error occurred while calling \u001b[39m\u001b[39m{0}\u001b[39;00m\u001b[39m{1}\u001b[39;00m\u001b[39m{2}\u001b[39;00m\u001b[39m. Trace:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m{3}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39m\n\u001b[0;32m 332\u001b[0m \u001b[39mformat\u001b[39m(target_id, \u001b[39m\"\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m, name, value))\n\n\u001b[1;31mPy4JJavaError\u001b[0m: An error occurred while calling o1993.collectToPython.\n: org.apache.spark.SparkException: Job aborted due to stage failure: Task 4 in stage 88.0 failed 1 times, most recent failure: Lost task 4.0 in stage 88.0 (TID 339) (LAPTOP-LT9BMOHO executor driver): org.apache.spark.SparkException: Python worker failed to connect back.\r\n\tat org.apache.spark.api.python.PythonWorkerFactory.createSimpleWorker(PythonWorkerFactory.scala:189)\r\n\tat org.apache.spark.api.python.PythonWorkerFactory.create(PythonWorkerFactory.scala:109)\r\n\tat org.apache.spark.SparkEnv.createPythonWorker(SparkEnv.scala:124)\r\n\tat org.apache.spark.api.python.BasePythonRunner.compute(PythonRunner.scala:164)\r\n\tat org.apache.spark.sql.execution.python.PandasGroupUtils$.executePython(PandasGroupUtils.scala:44)\r\n\tat org.apache.spark.sql.execution.python.FlatMapGroupsInPandasExec.$anonfun$doExecute$1(FlatMapGroupsInPandasExec.scala:94)\r\n\tat org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:890)\r\n\tat org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:890)\r\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)\r\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:329)\r\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)\r\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:329)\r\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)\r\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:329)\r\n\tat org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\r\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)\r\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)\r\n\tat org.apache.spark.scheduler.Task.run(Task.scala:136)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)\r\n\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)\r\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\r\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\r\n\tat java.lang.Thread.run(Thread.java:750)\r\nCaused by: java.net.SocketTimeoutException: Accept timed out\r\n\tat java.net.DualStackPlainSocketImpl.waitForNewConnection(Native Method)\r\n\tat java.net.DualStackPlainSocketImpl.socketAccept(DualStackPlainSocketImpl.java:131)\r\n\tat java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:535)\r\n\tat java.net.PlainSocketImpl.accept(PlainSocketImpl.java:189)\r\n\tat java.net.ServerSocket.implAccept(ServerSocket.java:545)\r\n\tat java.net.ServerSocket.accept(ServerSocket.java:513)\r\n\tat org.apache.spark.api.python.PythonWorkerFactory.createSimpleWorker(PythonWorkerFactory.scala:176)\r\n\t... 26 more\r\n\nDriver stacktrace:\r\n\tat org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2672)\r\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2608)\r\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2607)\r\n\tat scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)\r\n\tat scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)\r\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)\r\n\tat org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2607)\r\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1182)\r\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1182)\r\n\tat scala.Option.foreach(Option.scala:407)\r\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1182)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2860)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2802)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2791)\r\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)\r\nCaused by: org.apache.spark.SparkException: Python worker failed to connect back.\r\n\tat org.apache.spark.api.python.PythonWorkerFactory.createSimpleWorker(PythonWorkerFactory.scala:189)\r\n\tat org.apache.spark.api.python.PythonWorkerFactory.create(PythonWorkerFactory.scala:109)\r\n\tat org.apache.spark.SparkEnv.createPythonWorker(SparkEnv.scala:124)\r\n\tat org.apache.spark.api.python.BasePythonRunner.compute(PythonRunner.scala:164)\r\n\tat org.apache.spark.sql.execution.python.PandasGroupUtils$.executePython(PandasGroupUtils.scala:44)\r\n\tat org.apache.spark.sql.execution.python.FlatMapGroupsInPandasExec.$anonfun$doExecute$1(FlatMapGroupsInPandasExec.scala:94)\r\n\tat org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:890)\r\n\tat org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:890)\r\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)\r\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:329)\r\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)\r\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:329)\r\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)\r\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:329)\r\n\tat org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\r\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)\r\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)\r\n\tat org.apache.spark.scheduler.Task.run(Task.scala:136)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)\r\n\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)\r\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\r\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\r\n\tat java.lang.Thread.run(Thread.java:750)\r\nCaused by: java.net.SocketTimeoutException: Accept timed out\r\n\tat java.net.DualStackPlainSocketImpl.waitForNewConnection(Native Method)\r\n\tat java.net.DualStackPlainSocketImpl.socketAccept(DualStackPlainSocketImpl.java:131)\r\n\tat java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:535)\r\n\tat java.net.PlainSocketImpl.accept(PlainSocketImpl.java:189)\r\n\tat java.net.ServerSocket.implAccept(ServerSocket.java:545)\r\n\tat java.net.ServerSocket.accept(ServerSocket.java:513)\r\n\tat org.apache.spark.api.python.PythonWorkerFactory.createSimpleWorker(PythonWorkerFactory.scala:176)\r\n\t... 26 more\r\n"
}

Related

AnnotatedConnectException: finishConnect(..) failed: Connection refused: localhost/0:0:0:0:0:0:0:1:53686 error when executing beam pipeline on flink

I'm trying to setup a go-beam-flink-kafka cluster locally. I started the kafka cluster along with producer. I then started expansion service which is required for cross language features of kafkaio api of beam go sdk on port 8097. Then, I started flink job server with the command docker run --platform linux/amd64 --net=host apache/beam_flink1.14_job_server:latest which starts it's own embedded flink cluster. Then, I write a simple pipeline to read from kafka and print the values as below.
type LogFn struct{}
func (fn *LogFn) ProcessElement(ctx context.Context, elm []byte) {
log.Infof(ctx, "Ride info: %v", string(elm))
}
//FinishBundle waits a bit so the job server finishes receiving logs.
func (fn *LogFn) FinishBundle() {
time.Sleep(2 * time.Second)
}
func init() {
register.DoFn2x0[context.Context, []byte](&LogFn{})
}
func main() {
flag.Parse()
beam.Init()
p, s := beam.NewPipelineWithRoot()
ctx := context.Background()
messages := kafkaio.Read(s,
"localhost:8097",
"localhost:9092",
[]string{"quickstart-events"},
)
vals := beam.DropKey(s, messages)
beam.ParDo0(s, &LogFn{}, vals)
if _, err := flink.Execute(ctx, p); err != nil {
log.Fatalf(ctx, "Failed to execute job: %v", err.Error())
}
}
Then, I start the execution of the pipeline with the command go run main.go --endpoint localhost:8099 --environment_type LOOPBACK. But I'm getting this error
SEVERE: Error during job invocation go0job0101666771397124223000-root-1026080321-9741bbf6_e2058fe0-971e-4b98-b96e-77d2cb52fee0.
org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
at org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:144)
at org.apache.flink.runtime.minicluster.MiniClusterJobClient.lambda$getJobExecutionResult$3(MiniClusterJobClient.java:137)
at java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:616)
at java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:591)
at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
at java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1975)
at org.apache.flink.runtime.rpc.akka.AkkaInvocationHandler.lambda$invokeRpc$1(AkkaInvocationHandler.java:258)
at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
at java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1975)
at org.apache.flink.util.concurrent.FutureUtils.doForward(FutureUtils.java:1389)
at org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.lambda$null$1(ClassLoadingUtils.java:93)
at org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:68)
at org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.lambda$guardCompletionWithContextClassLoader$2(ClassLoadingUtils.java:92)
at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
at java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1975)
at org.apache.flink.runtime.concurrent.akka.AkkaFutureUtils$1.onComplete(AkkaFutureUtils.java:47)
at akka.dispatch.OnComplete.internal(Future.scala:300)
at akka.dispatch.OnComplete.internal(Future.scala:297)
at akka.dispatch.japi$CallbackBridge.apply(Future.scala:224)
at akka.dispatch.japi$CallbackBridge.apply(Future.scala:221)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:60)
at org.apache.flink.runtime.concurrent.akka.AkkaFutureUtils$DirectExecutionContext.execute(AkkaFutureUtils.java:65)
at scala.concurrent.impl.CallbackRunnable.executeWithValue(Promise.scala:68)
at scala.concurrent.impl.Promise$DefaultPromise.$anonfun$tryComplete$1(Promise.scala:284)
at scala.concurrent.impl.Promise$DefaultPromise.$anonfun$tryComplete$1$adapted(Promise.scala:284)
at scala.concurrent.impl.Promise$DefaultPromise.tryComplete(Promise.scala:284)
at akka.pattern.PromiseActorRef.$bang(AskSupport.scala:621)
at akka.pattern.PipeToSupport$PipeableFuture$$anonfun$pipeTo$1.applyOrElse(PipeToSupport.scala:24)
at akka.pattern.PipeToSupport$PipeableFuture$$anonfun$pipeTo$1.applyOrElse(PipeToSupport.scala:23)
at scala.concurrent.Future.$anonfun$andThen$1(Future.scala:532)
at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:29)
at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:29)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:60)
at akka.dispatch.BatchingExecutor$AbstractBatch.processBatch(BatchingExecutor.scala:63)
at akka.dispatch.BatchingExecutor$BlockableBatch.$anonfun$run$1(BatchingExecutor.scala:100)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
at scala.concurrent.BlockContext$.withBlockContext(BlockContext.scala:81)
at akka.dispatch.BatchingExecutor$BlockableBatch.run(BatchingExecutor.scala:100)
at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:49)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(ForkJoinExecutorConfigurator.scala:48)
at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289)
at java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056)
at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692)
at java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175)
Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed by NoRestartBackoffTimeStrategy
at org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:138)
at org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:82)
at org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:252)
at org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:242)
at org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:233)
at org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:684)
at org.apache.flink.runtime.scheduler.SchedulerNG.updateTaskExecutionState(SchedulerNG.java:79)
at org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:444)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.lambda$handleRpcInvocation$1(AkkaRpcActor.java:316)
at org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:83)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:314)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:217)
at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:78)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:163)
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24)
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20)
at scala.PartialFunction.applyOrElse(PartialFunction.scala:123)
at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122)
at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172)
at akka.actor.Actor.aroundReceive(Actor.scala:537)
at akka.actor.Actor.aroundReceive$(Actor.scala:535)
at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:220)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:580)
at akka.actor.ActorCell.invoke(ActorCell.scala:548)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:270)
at akka.dispatch.Mailbox.run(Mailbox.scala:231)
at akka.dispatch.Mailbox.exec(Mailbox.scala:243)
... 4 more
Caused by: org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.UncheckedExecutionException: org.apache.beam.vendor.grpc.v1p48p1.io.grpc.StatusRuntimeException: UNAVAILABLE: io exception
at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2050)
at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache.get(LocalCache.java:3952)
at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:3974)
at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4958)
at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$LocalLoadingCache.getUnchecked(LocalCache.java:4964)
at org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory$SimpleStageBundleFactory.<init>(DefaultJobBundleFactory.java:451)
at org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory$SimpleStageBundleFactory.<init>(DefaultJobBundleFactory.java:436)
at org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory.forStage(DefaultJobBundleFactory.java:303)
at org.apache.beam.runners.fnexecution.control.DefaultExecutableStageContext.getStageBundleFactory(DefaultExecutableStageContext.java:38)
at org.apache.beam.runners.fnexecution.control.ReferenceCountingExecutableStageContextFactory$WrappedContext.getStageBundleFactory(ReferenceCountingExecutableStageContextFactory.java:202)
at org.apache.beam.runners.flink.translation.wrappers.streaming.ExecutableStageDoFnOperator.open(ExecutableStageDoFnOperator.java:248)
at org.apache.flink.streaming.runtime.tasks.RegularOperatorChain.initializeStateAndOpenOperators(RegularOperatorChain.java:110)
at org.apache.flink.streaming.runtime.tasks.StreamTask.restoreGates(StreamTask.java:711)
at org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.call(StreamTaskActionExecutor.java:55)
at org.apache.flink.streaming.runtime.tasks.StreamTask.restoreInternal(StreamTask.java:687)
at org.apache.flink.streaming.runtime.tasks.StreamTask.restore(StreamTask.java:654)
at org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:958)
at org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:927)
at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:766)
at org.apache.flink.runtime.taskmanager.Task.run(Task.java:575)
at java.lang.Thread.run(Thread.java:750)
Caused by: org.apache.beam.vendor.grpc.v1p48p1.io.grpc.StatusRuntimeException: UNAVAILABLE: io exception
at org.apache.beam.vendor.grpc.v1p48p1.io.grpc.stub.ClientCalls.toStatusRuntimeException(ClientCalls.java:271)
at org.apache.beam.vendor.grpc.v1p48p1.io.grpc.stub.ClientCalls.getUnchecked(ClientCalls.java:252)
at org.apache.beam.vendor.grpc.v1p48p1.io.grpc.stub.ClientCalls.blockingUnaryCall(ClientCalls.java:165)
at org.apache.beam.model.fnexecution.v1.BeamFnExternalWorkerPoolGrpc$BeamFnExternalWorkerPoolBlockingStub.startWorker(BeamFnExternalWorkerPoolGrpc.java:225)
at org.apache.beam.runners.fnexecution.environment.ExternalEnvironmentFactory.createEnvironment(ExternalEnvironmentFactory.java:113)
at org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory$1.load(DefaultJobBundleFactory.java:252)
at org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory$1.load(DefaultJobBundleFactory.java:231)
at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3528)
at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2277)
at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2154)
at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2044)
... 20 more
Caused by: org.apache.beam.vendor.grpc.v1p48p1.io.netty.channel.AbstractChannel$AnnotatedConnectException: finishConnect(..) failed: Connection refused: localhost/0:0:0:0:0:0:0:1:53686
Caused by: java.net.ConnectException: finishConnect(..) failed: Connection refused
at org.apache.beam.vendor.grpc.v1p48p1.io.netty.channel.unix.Errors.newConnectException0(Errors.java:155)
at org.apache.beam.vendor.grpc.v1p48p1.io.netty.channel.unix.Errors.handleConnectErrno(Errors.java:128)
at org.apache.beam.vendor.grpc.v1p48p1.io.netty.channel.unix.Socket.finishConnect(Socket.java:321)
at org.apache.beam.vendor.grpc.v1p48p1.io.netty.channel.epoll.AbstractEpollChannel$AbstractEpollUnsafe.doFinishConnect(AbstractEpollChannel.java:710)
at org.apache.beam.vendor.grpc.v1p48p1.io.netty.channel.epoll.AbstractEpollChannel$AbstractEpollUnsafe.finishConnect(AbstractEpollChannel.java:687)
at org.apache.beam.vendor.grpc.v1p48p1.io.netty.channel.epoll.AbstractEpollChannel$AbstractEpollUnsafe.epollOutReady(AbstractEpollChannel.java:567)
at org.apache.beam.vendor.grpc.v1p48p1.io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:477)
at org.apache.beam.vendor.grpc.v1p48p1.io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:385)
at org.apache.beam.vendor.grpc.v1p48p1.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:995)
at org.apache.beam.vendor.grpc.v1p48p1.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at org.apache.beam.vendor.grpc.v1p48p1.io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
at java.lang.Thread.run(Thread.java:750)
If anyone has setup the exact stack, can you help resolving this issue?

pyspark foreach/foreachPartition send http request failed

I use urllib.request to send http request in foreach/foreachPartition. pyspark throw error as follow:
objc[74094]: +[__NSPlaceholderDate initialize] may have been in progress in another thread when fork() was called. We cannot safely call it or ignore it in the fork() child process. Crashing instead. Set a breakpoint on objc_initializeAfterForkError to debug.
20/07/20 19:05:58 ERROR Executor: Exception in task 7.0 in stage 0.0 (TID 7)
org.apache.spark.SparkException: Python worker exited unexpectedly (crashed)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:536)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:525)
at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:38)
at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:643)
at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:621)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:456)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator.foreach(Iterator.scala:941)
at scala.collection.Iterator.foreach$(Iterator.scala:941)
at org.apache.spark.InterruptibleIterator.foreach(InterruptibleIterator.scala:28)
at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62)
at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:105)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:49)
at scala.collection.TraversableOnce.to(TraversableOnce.scala:315)
at scala.collection.TraversableOnce.to$(TraversableOnce.scala:313)
at org.apache.spark.InterruptibleIterator.to(InterruptibleIterator.scala:28)
at scala.collection.TraversableOnce.toBuffer(TraversableOnce.scala:307)
at scala.collection.TraversableOnce.toBuffer$(TraversableOnce.scala:307)
at org.apache.spark.InterruptibleIterator.toBuffer(InterruptibleIterator.scala:28)
at scala.collection.TraversableOnce.toArray(TraversableOnce.scala:294)
at scala.collection.TraversableOnce.toArray$(TraversableOnce.scala:288)
at org.apache.spark.InterruptibleIterator.toArray(InterruptibleIterator.scala:28)
at org.apache.spark.rdd.RDD.$anonfun$collect$2(RDD.scala:1004)
at org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2133)
_
when i call rdd.foreach(send_http), rdd=sc.parallelize(["http://192.168.1.1:5000/index.html"]),
send_http defined as follow:
def send_http(url):
req = urllib.request.Request(url)
resp = urllib.request.urlopen(req)
anyone can tell me the problem? thanks.
This is #tmylt's answer in the comment above, but I too can confirm that using http.client instead of requests.get does work. I'm sure there's a reason why this is happening, but using python http.client is a quick fix.

App can't run from eclipse which created by 'jhipster --skipClient'

java.lang.IllegalStateException: Error processing condition on org.springframework.boot.autoconfigure.context.MessageSourceAutoConfiguration
at org.springframework.boot.autoconfigure.condition.SpringBootCondition.matches(SpringBootCondition.java:64)
at org.springframework.context.annotation.ConditionEvaluator.shouldSkip(ConditionEvaluator.java:108)
at org.springframework.context.annotation.ConfigurationClassBeanDefinitionReader$TrackedConditionEvaluator.shouldSkip(ConfigurationClassBeanDefinitionReader.java:441)
at org.springframework.context.annotation.ConfigurationClassBeanDefinitionReader.loadBeanDefinitionsForConfigurationClass(ConfigurationClassBeanDefinitionReader.java:129)
at org.springframework.context.annotation.ConfigurationClassBeanDefinitionReader.loadBeanDefinitions(ConfigurationClassBeanDefinitionReader.java:118)
at org.springframework.context.annotation.ConfigurationClassPostProcessor.processConfigBeanDefinitions(ConfigurationClassPostProcessor.java:328)
at org.springframework.context.annotation.ConfigurationClassPostProcessor.postProcessBeanDefinitionRegistry(ConfigurationClassPostProcessor.java:233)
at org.springframework.context.support.PostProcessorRegistrationDelegate.invokeBeanDefinitionRegistryPostProcessors(PostProcessorRegistrationDelegate.java:271)
at org.springframework.context.support.PostProcessorRegistrationDelegate.invokeBeanFactoryPostProcessors(PostProcessorRegistrationDelegate.java:91)
at org.springframework.context.support.AbstractApplicationContext.invokeBeanFactoryPostProcessors(AbstractApplicationContext.java:692)
at org.springframework.context.support.AbstractApplicationContext.refresh(AbstractApplicationContext.java:530)
at org.springframework.boot.SpringApplication.refresh(SpringApplication.java:754)
at org.springframework.boot.SpringApplication.refreshContext(SpringApplication.java:386)
at org.springframework.boot.SpringApplication.run(SpringApplication.java:307)
at com.einfochips.demo.TestApp.main(TestApp.java:63)
Caused by: java.lang.IllegalStateException: Failed to introspect Class [org.springframework.security.config.annotation.web.configuration.WebSecurityConfiguration] from ClassLoader [sun.misc.Launcher$AppClassLoader#659e0bfd]

Not able to convert DynamicFrame.toDF() getting exception

I am trying to convert my DynamicFrame to DataFrame in AWS Glue ETL job. I am getting the exception below. Please note that my DynamicFrame has DynamicRecords with different columns per record. But my understanding was DynamicFrame handles those and generates DataFrame.
'An error occurred while calling o80.showString.\n: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 10.0 failed 4 times, most recent failure: Lost task 0.3 in stage 10.0 (TID 619, ip-172-31-7-160.ec2.internal, executor 2): java.lang.NullPointerException\n\tat com.amazonaws.services.glue.schema.types.StructType.fieldIndex(StructType.java:53)\n\tat com.amazonaws.services.glue.RecordToRow$$anonfun$convertField$1.apply(RecordToRow.scala:93)\n\tat com.amazonaws.services.glue.RecordToRow$$anonfun$convertField$1.apply(RecordToRow.scala:92)\n\tat scala.collection.Iterator$class.foreach(Iterator.scala:893)\n\tat scala.collection.AbstractIterator.foreach(Iterator.scala:1336)\n\tat com.amazonaws.services.glue.RecordToRow$.convertField(RecordToRow.scala:92)\n\tat com.amazonaws.services.glue.RecordToRow$.apply(RecordToRow.scala:43)\n\tat com.amazonaws.services.glue.DynamicRecord.toRow(DynamicRecord.scala:260)\n\tat com.amazonaws.services.glue.DynamicFrame$$anonfun$toDF$1.apply(DynamicFrame.scala:280)\n\tat com.amazonaws.services.glue.DynamicFrame$$anonfun$toDF$1.apply(DynamicFrame.scala:280)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:409)\n\tat org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:235)\n\tat org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:228)\n\tat org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)\n\tat org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:287)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:108)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\tat java.lang.Thread.run(Thread.java:748)\n\nDriver stacktrace:\n\tat org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1517)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1505)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1504)\n\tat scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)\n\tat org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1504)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)\n\tat scala.Option.foreach(Option.scala:257)\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1732)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1687)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1676)\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)\n\tat org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2029)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2050)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2069)\n\tat org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:336)\n\tat org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)\n\tat org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:2861)\n\tat org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2150)\n\tat org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2150)\n\tat org.apache.spark.sql.Dataset$$anonfun$55.apply(Dataset.scala:2842)\n\tat org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)\n\tat org.apache.spark.sql.Dataset.withAction(Dataset.scala:2841)\n\tat org.apache.spark.sql.Dataset.head(Dataset.scala:2150)\n\tat org.apache.spark.sql.Dataset.take(Dataset.scala:2363)\n\tat org.apache.spark.sql.Dataset.showString(Dataset.scala:241)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.lang.reflect.Method.invoke(Method.java:498)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\n\tat py4j.Gateway.invoke(Gateway.java:280)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.GatewayConnection.run(GatewayConnection.java:214)\n\tat java.lang.Thread.run(Thread.java:748)\nCaused by: java.lang.NullPointerException\n\tat com.amazonaws.services.glue.schema.types.StructType.fieldIndex(StructType.java:53)\n\tat com.amazonaws.services.glue.RecordToRow$$anonfun$convertField$1.apply(RecordToRow.scala:93)\n\tat com.amazonaws.services.glue.RecordToRow$$anonfun$convertField$1.apply(RecordToRow.scala:92)\n\tat scala.collection.Iterator$class.foreach(Iterator.scala:893)\n\tat scala.collection.AbstractIterator.foreach(Iterator.scala:1336)\n\tat com.amazonaws.services.glue.RecordToRow$.convertField(RecordToRow.scala:92)\n\tat com.amazonaws.services.glue.RecordToRow$.apply(RecordToRow.scala:43)\n\tat com.amazonaws.services.glue.DynamicRecord.toRow(DynamicRecord.scala:260)\n\tat com.amazonaws.services.glue.DynamicFrame$$anonfun$toDF$1.apply(DynamicFrame.scala:280)\n\tat com.amazonaws.services.glue.DynamicFrame$$anonfun$toDF$1.apply(DynamicFrame.scala:280)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:409)\n\tat org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:235)\n\tat org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:228)\n\tat org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)\n\tat org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:287)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:108)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\t... 1 more\n')

Map and transform methods with using serializable class on DStream elements

I am writing app that will be extract logs, so I implemented function (Lisitng 1) which is taking string as parametr and extracts valuable informations (regexs: Listing 2) from it. I wanted that this method could be send to other workers so I impelemnt serializable class.
I have problem with apply this method on DStreams. Here is my streams minning solution:
def streamMinner(): Unit = {
val ssc = new StreamingContext(sc, Seconds(2))
val logsStream = ssc.textFileStream("logs/")
// Not works
val extractLogs = logsStream.map( log => new Matcher().matchLog(log))
extractLogs.print(1)
// Works
// val words = logsStream.transform( rdd => rdd.map( log => matchLog(log)))
// words.print()
ssc.start()
ssc.awaitTermination()
}
Problem is in line where every element of logsStream is maped with new object of Matcher class (new Matcher().matchLog(log)
Apache Spark gave my below errors:
ERROR YarnScheduler: Lost executor 2 on host1: Container marked as failed: container_e743_1499728610705_0043_01_000003 on host: host1. Exit status: 50. Diagnostics: Exception from container-launch.
Container id: container_e743_1499728610705_0043_01_000003
Exit code: 50
Stack trace: ExitCodeException exitCode=50:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:600)
at org.apache.hadoop.util.Shell.run(Shell.java:511)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:783)
at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:212)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:303)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Container exited with a non-zero exit code 50
ERROR YarnScheduler: Lost executor 5 on host2: Container marked as failed: container_e743_1499728610705_0043_01_000006 on host: host2. Exit status: 50. Diagnostics: Exception from container-launch.
Container id: container_e743_1499728610705_0043_01_000006
Exit code: 50
Stack trace: ExitCodeException exitCode=50:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:600)
at org.apache.hadoop.util.Shell.run(Shell.java:511)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:783)
at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:212)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:303)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Container exited with a non-zero exit code 50
...
ERROR YarnScheduler: Lost executor 6 ...
ERROR TaskSetManager: Task 0 in stage 0.0 failed 4 times; aborting job
17/07/11 09:41:09 ERROR JobScheduler: Error running job streaming job 1499758850000 ms.0
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 3, host1): ExecutorLostFailure (executor 6 exited caused by one of the running tasks) Reason: Container marked as failed: container_e743_1499728610705_0043_01_000007 on host: host1. Exit status: 50. Diagnostics: Exception from container-launch.
Container id: container_e743_1499728610705_0043_01_000007
Exit code: 50
Stack trace: ExitCodeException exitCode=50:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:600)
at org.apache.hadoop.util.Shell.run(Shell.java:511)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:783)
at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:212)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:303)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Container exited with a non-zero exit code 50
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1433)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1421)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1420)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1420)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:801)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:801)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:801)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1642)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1601)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1590)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:622)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1856)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1869)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1882)
at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1335)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:323)
at org.apache.spark.rdd.RDD.take(RDD.scala:1309)
at org.apache.spark.streaming.dstream.DStream$$anonfun$print$2$$anonfun$foreachFunc$5$1.apply(DStream.scala:768)
at org.apache.spark.streaming.dstream.DStream$$anonfun$print$2$$anonfun$foreachFunc$5$1.apply(DStream.scala:767)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:50)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:426)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:49)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
at scala.util.Try$.apply(Try.scala:161)
at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:227)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:227)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:227)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:226)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 3, host1): ExecutorLostFailure (executor 6 exited caused by one of the running tasks) Reason: Container marked as failed: container_e743_1499728610705_0043_01_000007 on host: host1. Exit status: 50. Diagnostics: Exception from container-launch.
Container id: container_e743_1499728610705_0043_01_000007
Exit code: 50
Stack trace: ExitCodeException exitCode=50:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:600)
at org.apache.hadoop.util.Shell.run(Shell.java:511)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:783)
at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:212)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:303)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Container exited with a non-zero exit code 50
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1433)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1421)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1420)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1420)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:801)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:801)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:801)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1642)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1601)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1590)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:622)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1856)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1869)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1882)
at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1335)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:323)
at org.apache.spark.rdd.RDD.take(RDD.scala:1309)
at org.apache.spark.streaming.dstream.DStream$$anonfun$print$2$$anonfun$foreachFunc$5$1.apply(DStream.scala:768)
at org.apache.spark.streaming.dstream.DStream$$anonfun$print$2$$anonfun$foreachFunc$5$1.apply(DStream.scala:767)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:50)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:426)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:49)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
at scala.util.Try$.apply(Try.scala:161)
at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:227)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:227)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:227)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:226)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
scala> 17/07/11 09:41:11 ERROR TransportResponseHandler: Still have 1 requests outstanding when connection from host3/11.11.11.11:11111 is closed
17/07/11 09:41:11 ERROR YarnScheduler: Lost executor 4 on host3: Slave lost
17/07/11 09:41:12 ERROR TransportClient: Failed to send RPC 7741519719369750843 to host3/11.11.11.11:11111: java.nio.channels.ClosedChannelException
java.nio.channels.ClosedChannelException
17/07/11 09:41:12 ERROR YarnScheduler: Lost executor 1 on host2: Slave lost
17/07/11 09:41:12 ERROR TransportClient: Failed to send RPC 7734757459881277232 to host3//11.11.11.11:11111: java.nio.channels.ClosedChannelException
java.nio.channels.ClosedChannelException
17/07/11 09:41:12 ERROR YarnScheduler: Lost executor 3 on host4: Slave lost
17/07/11 09:41:12 ERROR TransportClient: Failed to send RPC 5816053641531447955 to host3//11.11.11.11:11111: java.nio.channels.ClosedChannelException
java.nio.channels.ClosedChannelException
17/07/11 09:41:12 ERROR YarnScheduler: Lost executor 7 on host2: Slave lost
17/07/11 09:41:13 ERROR TransportClient: Failed to send RPC 8774007142277591342 to host3/11.11.11.11:11111: java.nio.channels.ClosedChannelException
java.nio.channels.ClosedChannelException
17/07/11 09:41:13 ERROR YarnScheduler: Lost executor 8 on host1: Slave lost
17/07/11 09:41:19 ERROR YarnScheduler: Lost executor 1 on host3: Container marked as failed: container_e743_1499728610705_0043_02_000002 on host: host3. Exit status: 50. Diagnostics: Exception from container-launch.
Container id: container_e743_1499728610705_0043_02_000002
Exit code: 50
Stack trace: ExitCodeException exitCode=50:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:600)
at org.apache.hadoop.util.Shell.run(Shell.java:511)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:783)
at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:212)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:303)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Container exited with a non-zero exit code 50
When I comment lines:
val extractLogs = logsStream.map( log => new Matcher().matchLog(log))
extractLogs.print(1)
and I uncomment lines:
// val words = logsStream.transform( rdd => rdd.map( log => matchLog(log)))
// words.print()
Everything works fine. My question is why? I'm afraid that solution that works may not be parallelized on cluster because method matchLog is not serializable. Someone has a similar problem or know how to deal with it?
Lisitng 1:
case class logValues2(time_stamp: String, action: String, protocol: String, connection_id: String, src_ip: String, dst_ip: String, src_port: String, dst_port: String, duration: String, bytes: String, user: String) extends Serializable
class Matcher extends Serializable {
def matchLog(x: String): logValues2 = {
var dst_ip = " "
var dst_port = " "
var time_stamp = time_stamp_reg.findAllIn(x).mkString(",")
var action = action_reg.findAllIn(x).mkString(",")
var protocol = protocol_reg.findAllIn(x).mkString(",")
var connection_id = connection_id_reg.findAllIn(x).mkString(",")
var ips = ips_reg.findAllIn(x).mkString(" ").split(""" """)
var src_ip = ips(0)
if (ips.length > 1) {
dst_ip = ips(1)
} else {
dst_ip = " "
}
var ports = ports_reg.findAllIn(x).mkString(" ").split(""" """)
var src_port = ports(0)
if (ports.length > 1) {
dst_port = ports(1)
} else {
dst_port = " "
}
var duration = duration_reg.findAllIn(x).mkString(",")
var bytes = bytes_reg.findAllIn(x).mkString(",")
var user = user_reg.findAllIn(x).mkString(",")
var logObject = logValues2(time_stamp, action, protocol, connection_id, src_ip, dst_ip, src_port, dst_port, duration, bytes, user)
return logObject
}
Above method is implemented also separately (no within Matcher class).
UPDATE:
My Regular expressions: Listing 2:
val time_stamp_reg = """^.*?(?=\s\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s%)""".r
val action_reg = """((?<=:\s)\w{4,10}(?=\s\w{2})|(?<=\w\s)(\w{7,9})(?=\s[f]))""".r
val protocol_reg = """(?<=[\w:]\s)(\w+)(?=\s[cr])""".r
val connection_id_reg = """(?<=\w\s)(\d+)(?=\sfor)""".r
val ips_reg = """(?<=[\d\w][:\s])(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?=\/\d+|\z| \w)""".r
val ports_reg = """(?<=\d\/)(\d{1,6})(?=\z|[\s(])""".r
val duration_reg = """(?<=duration\s)(\d{1,2}:\d{1,2}:\d{1,2})(?=\s|\z)""".r
val bytes_reg = """(?<=bytes\s)(\d+)(?=\s|\z)""".r
val user_reg = """(?<=\\\\)(\d+)(?=\W)""".r