First, everything ok.
After amost 40h, I receive a lot of error
org.springframework.dao.DataAccessResourceFailureException: Failed to obtain R2DBC Connection; nested exception is io.r2dbc.spi.R2dbcTimeoutException: Connection Acquisition timed out after 5000ms
at org.springframework.r2dbc.connection.ConnectionFactoryUtils.lambda$getConnection$0(ConnectionFactoryUtils.java:88) ~[spring-r2dbc-5.3.6.jar!/:5.3.6]
Suppressed: reactor.core.publisher.FluxOnAssembly$OnAssemblyException:
Error has been observed at the following site(s):
|_ checkpoint ⇢ Handler cn.booslink.controller.DeviceController#login(String, String) [DispatcherHandler]
Stack trace:
at org.springframework.r2dbc.connection.ConnectionFactoryUtils.lambda$getConnection$0(ConnectionFactoryUtils.java:88) ~[spring-r2dbc-5.3.6.jar!/:5.3.6]
at reactor.core.publisher.Mono.lambda$onErrorMap$31(Mono.java:3488) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.FluxOnErrorResume$ResumeSubscriber.onError(FluxOnErrorResume.java:94) ~[reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.FluxOnErrorResume$ResumeSubscriber.onError(FluxOnErrorResume.java:106) ~[reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.FluxRetry$RetrySubscriber.onError(FluxRetry.java:94) ~[reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.FluxOnErrorResume$ResumeSubscriber.onError(FluxOnErrorResume.java:106) ~[reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.Operators.error(Operators.java:197) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.MonoError.subscribe(MonoError.java:52) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.Mono.subscribe(Mono.java:4150) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.FluxOnErrorResume$ResumeSubscriber.onError(FluxOnErrorResume.java:103) ~[reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.SerializedSubscriber.onError(SerializedSubscriber.java:124) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.SerializedSubscriber.onError(SerializedSubscriber.java:124) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.FluxTimeout$TimeoutMainSubscriber.handleTimeout(FluxTimeout.java:294) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.FluxTimeout$TimeoutMainSubscriber.doTimeout(FluxTimeout.java:279) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.FluxTimeout$TimeoutTimeoutSubscriber.onNext(FluxTimeout.java:418) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.FluxOnErrorResume$ResumeSubscriber.onNext(FluxOnErrorResume.java:79) ~[reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.MonoDelay$MonoDelayRunnable.run(MonoDelay.java:119) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.scheduler.SchedulerTask.call(SchedulerTask.java:68) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.scheduler.SchedulerTask.call(SchedulerTask.java:28) [reactor-core-3.4.5.jar!/:3.4.5]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) [na:1.8.0_212]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180) [na:1.8.0_212]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293) [na:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [na:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [na:1.8.0_212]
at java.lang.Thread.run(Thread.java:748) [na:1.8.0_212]
Caused by: io.r2dbc.spi.R2dbcTimeoutException: Connection Acquisition timed out after 5000ms
at io.r2dbc.pool.ConnectionPool.lambda$null$6(ConnectionPool.java:123) ~[r2dbc-pool-0.8.6.RELEASE.jar!/:0.8.6.RELEASE]
at reactor.core.publisher.Mono.lambda$onErrorMap$30(Mono.java:3474) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.Mono.lambda$onErrorResume$32(Mono.java:3564) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.FluxOnErrorResume$ResumeSubscriber.onError(FluxOnErrorResume.java:94) ~[reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.SerializedSubscriber.onError(SerializedSubscriber.java:124) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.SerializedSubscriber.onError(SerializedSubscriber.java:124) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.FluxTimeout$TimeoutMainSubscriber.handleTimeout(FluxTimeout.java:294) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.FluxTimeout$TimeoutMainSubscriber.doTimeout(FluxTimeout.java:279) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.FluxTimeout$TimeoutTimeoutSubscriber.onNext(FluxTimeout.java:418) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.FluxOnErrorResume$ResumeSubscriber.onNext(FluxOnErrorResume.java:79) ~[reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.publisher.MonoDelay$MonoDelayRunnable.run(MonoDelay.java:119) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.scheduler.SchedulerTask.call(SchedulerTask.java:68) [reactor-core-3.4.5.jar!/:3.4.5]
at reactor.core.scheduler.SchedulerTask.call(SchedulerTask.java:28) [reactor-core-3.4.5.jar!/:3.4.5]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) [na:1.8.0_212]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180) [na:1.8.0_212]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293) [na:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [na:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [na:1.8.0_212]
at java.lang.Thread.run(Thread.java:748) [na:1.8.0_212]
Caused by: java.util.concurrent.TimeoutException: Did not observe any item or terminal signal within 5000ms in 'Connection Acquisition from [MariadbConnectionFactory{configuration=MariadbConnectionConfiguration{datbase='xx', host='xx', connectTimeout=PT10S, socketTimeout=null, tcpKeepAlive=false, tcpAbortiveClose=false, password=***********, port=3306, prepareCacheSize=250, socket='null', username='x'xxx', allowMultiQueries=false, allowPipelining=true, connectionAttributes=null, sessionVariables=null, sslConfig=SslConfig{sslMode=DISABLED, serverSslCert='null', clientSslCert='null', tlsProtocol=null}, rsaPublicKey='null', cachingRsaPublicKey='null', allowPublicKeyRetrieval=false, isolationLevel=IsolationLevel{sql='REPEATABLE READ'}, useServerPrepStmts=false, autocommit=true, pamOtherPwd=}}]' (and no fallback has been configured)
... 13 common frames omitted
It seems like connection pool can't acquire connection now. Bug of r2dbc-pool? Or mysql limit??
Not easy to reproduce, normally the peak will be at 19:00
Need help
Related
I to use a Databricks job to use the saveAsNewAPIHadoopFile interface to write an HFile.
Inside a local spark test instance it works great, but inside a Databricks job it fails with:
NoClassDefFoundError: Could not initialize class org.apache.hadoop.hbase.io.hfile.HFile.
In details
My current dependencies look like that:
My test notebook has the following steps:
1. Some imports
import java.sql.Timestamp
import java.nio.ByteBuffer
import org.apache.hadoop.hbase.client.{Admin, ConnectionFactory, HTable}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.{HBaseConfiguration, KeyValue, TableName}
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2
import java.util.UUID
import org.apache.hadoop.mapreduce.Job
2. Setup hbase config (copied from HBase cluster)
val hbaseConfig = HBaseConfiguration.create()
hbaseConfig.set("dfs.domain.socket.path", "/var/lib/hadoop-hdfs/dn_socket")
hbaseConfig.set("dfs.support.append", "false")
[...]
hbaseConfig.set("zookeeper.session.timeout", "120000")
hbaseConfig.set("zookeeper.znode.parent", "/hbase-unsecure")
hbaseConfig.set("hbase.mapreduce.hfileoutputformat.table.name", "DeviceData")
3. Save DataSet
mappedData.saveAsNewAPIHadoopFile(
stagingDir,
classOf[ImmutableBytesWritable],
classOf[KeyValue],
classOf[HFileOutputFormat2],
hbaseConfig
)
The error
Caused by: Job aborted due to stage failure.
Caused by: Task failed while writing rows
Caused by: NoClassDefFoundError: Could not initialize class org.apache.hadoop.hbase.io.hfile.HFile
at org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:109)
at org.apache.spark.rdd.PairRDDFunctions.$anonfun$saveAsNewAPIHadoopDataset$1(PairRDDFunctions.scala:1077)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:125)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:419)
at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1075)
at org.apache.spark.rdd.PairRDDFunctions.$anonfun$saveAsNewAPIHadoopFile$2(PairRDDFunctions.scala:994)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:125)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:419)
at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopFile(PairRDDFunctions.scala:985)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-4454301442308183:6)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-4454301442308183:90)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-4454301442308183:92)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-4454301442308183:94)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw$$iw$$iw$$iw.<init>(command-4454301442308183:96)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw$$iw$$iw.<init>(command-4454301442308183:98)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw$$iw.<init>(command-4454301442308183:100)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw.<init>(command-4454301442308183:102)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw.<init>(command-4454301442308183:104)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read.<init>(command-4454301442308183:106)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$.<init>(command-4454301442308183:110)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$.<clinit>(command-4454301442308183)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$eval$.$print$lzycompute(<notebook>:7)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$eval$.$print(<notebook>:6)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$eval.$print(<notebook>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:745)
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1021)
at scala.tools.nsc.interpreter.IMain.$anonfun$interpret$1(IMain.scala:574)
at scala.reflect.internal.util.ScalaClassLoader.asContext(ScalaClassLoader.scala:41)
at scala.reflect.internal.util.ScalaClassLoader.asContext$(ScalaClassLoader.scala:37)
at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:41)
at scala.tools.nsc.interpreter.IMain.loadAndRunReq$1(IMain.scala:573)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:600)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:570)
at com.databricks.backend.daemon.driver.DriverILoop.execute(DriverILoop.scala:219)
at com.databricks.backend.daemon.driver.ScalaDriverLocal.$anonfun$repl$1(ScalaDriverLocal.scala:235)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.trapExit(DriverLocal.scala:903)
at com.databricks.backend.daemon.driver.DriverLocal$TrapExit$.apply(DriverLocal.scala:856)
at com.databricks.backend.daemon.driver.ScalaDriverLocal.repl(ScalaDriverLocal.scala:235)
at com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$13(DriverLocal.scala:544)
at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:240)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:235)
at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:232)
at com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:53)
at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:279)
at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:271)
at com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:53)
at com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:521)
at com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$1(DriverWrapper.scala:689)
at scala.util.Try$.apply(Try.scala:213)
at com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:681)
at com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:522)
at com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:634)
at com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:427)
at com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:370)
at com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:221)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 3.0 failed 4 times, most recent failure: Lost task 1.3 in stage 3.0 (TID 38) (10.42.240.4 executor driver): org.apache.spark.SparkException: Task failed while writing rows
at org.apache.spark.internal.io.SparkHadoopWriter$.executeTask(SparkHadoopWriter.scala:166)
at org.apache.spark.internal.io.SparkHadoopWriter$.$anonfun$write$1(SparkHadoopWriter.scala:92)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$3(ResultTask.scala:75)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:75)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:55)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:150)
at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:119)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.Task.run(Task.scala:91)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:788)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1643)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:791)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:647)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NoClassDefFoundError: Could not initialize class org.apache.hadoop.hbase.io.hfile.HFile
at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2$1.getNewWriter(HFileOutputFormat2.java:419)
at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2$1.write(HFileOutputFormat2.java:321)
at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2$1.write(HFileOutputFormat2.java:239)
at org.apache.spark.internal.io.HadoopMapReduceWriteConfigUtil.write(SparkHadoopWriter.scala:371)
at org.apache.spark.internal.io.SparkHadoopWriter$.$anonfun$executeTask$1(SparkHadoopWriter.scala:141)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1677)
at org.apache.spark.internal.io.SparkHadoopWriter$.executeTask(SparkHadoopWriter.scala:138)
... 19 more
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2765)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2712)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2706)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2706)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1255)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1255)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1255)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2973)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2914)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2902)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1028)
at org.apache.spark.SparkContext.runJobInternal(SparkContext.scala:2446)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2429)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2467)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2499)
at org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:87)
at org.apache.spark.rdd.PairRDDFunctions.$anonfun$saveAsNewAPIHadoopDataset$1(PairRDDFunctions.scala:1077)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:125)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:419)
at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1075)
at org.apache.spark.rdd.PairRDDFunctions.$anonfun$saveAsNewAPIHadoopFile$2(PairRDDFunctions.scala:994)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:125)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:419)
at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopFile(PairRDDFunctions.scala:985)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-4454301442308183:6)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-4454301442308183:90)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-4454301442308183:92)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-4454301442308183:94)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw$$iw$$iw$$iw.<init>(command-4454301442308183:96)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw$$iw$$iw.<init>(command-4454301442308183:98)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw$$iw.<init>(command-4454301442308183:100)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw$$iw.<init>(command-4454301442308183:102)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$$iw.<init>(command-4454301442308183:104)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read.<init>(command-4454301442308183:106)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$.<init>(command-4454301442308183:110)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$read$.<clinit>(command-4454301442308183)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$eval$.$print$lzycompute(<notebook>:7)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$eval$.$print(<notebook>:6)
at $line3506744ecc8e4b3a8d207e04b18fbe0595.$eval.$print(<notebook>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:745)
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1021)
at scala.tools.nsc.interpreter.IMain.$anonfun$interpret$1(IMain.scala:574)
at scala.reflect.internal.util.ScalaClassLoader.asContext(ScalaClassLoader.scala:41)
at scala.reflect.internal.util.ScalaClassLoader.asContext$(ScalaClassLoader.scala:37)
at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:41)
at scala.tools.nsc.interpreter.IMain.loadAndRunReq$1(IMain.scala:573)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:600)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:570)
at com.databricks.backend.daemon.driver.DriverILoop.execute(DriverILoop.scala:219)
at com.databricks.backend.daemon.driver.ScalaDriverLocal.$anonfun$repl$1(ScalaDriverLocal.scala:235)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.trapExit(DriverLocal.scala:903)
at com.databricks.backend.daemon.driver.DriverLocal$TrapExit$.apply(DriverLocal.scala:856)
at com.databricks.backend.daemon.driver.ScalaDriverLocal.repl(ScalaDriverLocal.scala:235)
at com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$13(DriverLocal.scala:544)
at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:240)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:235)
at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:232)
at com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:53)
at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:279)
at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:271)
at com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:53)
at com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:521)
at com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$1(DriverWrapper.scala:689)
at scala.util.Try$.apply(Try.scala:213)
at com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:681)
at com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:522)
at com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:634)
at com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:427)
at com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:370)
at com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:221)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.SparkException: Task failed while writing rows
at org.apache.spark.internal.io.SparkHadoopWriter$.executeTask(SparkHadoopWriter.scala:166)
at org.apache.spark.internal.io.SparkHadoopWriter$.$anonfun$write$1(SparkHadoopWriter.scala:92)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$3(ResultTask.scala:75)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:75)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:55)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:150)
at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:119)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.Task.run(Task.scala:91)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:788)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1643)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:791)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:647)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NoClassDefFoundError: Could not initialize class org.apache.hadoop.hbase.io.hfile.HFile
at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2$1.getNewWriter(HFileOutputFormat2.java:419)
at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2$1.write(HFileOutputFormat2.java:321)
at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2$1.write(HFileOutputFormat2.java:239)
at org.apache.spark.internal.io.HadoopMapReduceWriteConfigUtil.write(SparkHadoopWriter.scala:371)
at org.apache.spark.internal.io.SparkHadoopWriter$.$anonfun$executeTask$1(SparkHadoopWriter.scala:141)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1677)
at org.apache.spark.internal.io.SparkHadoopWriter$.executeTask(SparkHadoopWriter.scala:138)
at org.apache.spark.internal.io.SparkHadoopWriter$.$anonfun$write$1(SparkHadoopWriter.scala:92)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$3(ResultTask.scala:75)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:75)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:55)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:150)
at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:119)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.Task.run(Task.scala:91)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:788)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1643)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:791)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:647)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
The question
I tried everything to understand whats the difference between the Databricks job and my local one.
I'm absolut out of ideas.
Update 1 - 2021-08-09
We have no also reduced the installed libs to the minimum needed, to use the default installed Hadoop-Common lib and avoid conflicts.
Still getting this neasty error even on really basic static operations.
Update 2
Based on the compatibility matrix of HBase, it would be good to know with Hadoop version DataBricks cluster is using by default and how to replace them with a compatible one.
The basic problem were some missing dependencies.
The point that I was not able to detect this was, that after cluster restart the concrete error was only showed one time. After this point, any operation on the running cluster only showing the misleading error NoClassDefFoundError. I think this is because at cluster start, HBase libraries are doing some initial stuff. If this fails, it will not produce any useful error message until you restart the cluster.
The concrete error here were two missing libraries org.apache.hbase:hbase-metrics and org.apache.hbase:hbase-metrics-api.
I have one step configurations as below
stepBuilderFactory.get("step2")
.<EventLogWrapper, BaseCscEvent>chunk(100)
.reader(eventLogReader)
.processor(eventLogItemProcessor)
.writer(compositeItemWriter)
.faultTolerant().retryLimit(10).retry(Exception.class)
.stream(dataBaseItemWriter)
.stream(rabbitAmqpItemWriter)
.taskExecutor(taskExecutor())
.build();
My Reader configuration is as below
AmqpItemReader<EventLog> eventLogReader(AmqpTemplate eventLogReaderRabbitTemplate){
AmqpItemReader amqpItemReader = new AmqpItemReader<EventLog>(eventLogReaderRabbitTemplate);
amqpItemReader.setItemType(EventLogWrapper.class);
return amqpItemReader;
}
I am getting one intermittent error as below
ExitMessage org.springframework.retry.RetryException: Non-skippable exception in recoverer while processing; nested exception is java.lang.NullPointerException at org.springframework.batch.core.step.item.FaultTolerantChunkProcessor$2.recover(FaultTolerantChunkProcessor.java:289) at org.springframework.retry.support.RetryTemplate.handleRetryExhausted(RetryTemplate.java:512) at org.springframework.retry.support.RetryTemplate.doExecute(RetryTemplate.java:351) at org.springframework.retry.support.RetryTemplate.execute(RetryTemplate.java:211) at org.springframework.batch.core.step.item.BatchRetryTemplate.execute(BatchRetryTemplate.java:217) at org.springframework.batch.core.step.item.FaultTolerantChunkProcessor.transform(FaultTolerantChunkProcessor.java:298) at org.springframework.batch.core.step.item.SimpleChunkProcessor.process(SimpleChunkProcessor.java:210) at org.springframework.batch.core.step.item.ChunkOrientedTasklet.execute(ChunkOrientedTasklet.java:77) at org.springframework.batch.core.step.tasklet.TaskletStep$ChunkTransactionCallback.doInTransaction(TaskletStep.java:407) at org.springframework.batch.core.step.tasklet.TaskletStep$ChunkTransactionCallback.doInTransaction(TaskletStep.java:331) at org.springframework.transaction.support.TransactionTemplate.execute(TransactionTemplate.java:140) at org.springframework.batch.core.step.tasklet.TaskletStep$2.doInChunkContext(TaskletStep.java:273) at org.springframework.batch.core.scope.context.StepContextRepeatCallback.doInIteration(StepContextRepeatCallback.java:82) at org.springframework.batch.repeat.support.TaskExecutorRepeatTemplate$ExecutingRunnable.run(TaskExecutorRepeatTemplate.java:262) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.NullPointerException
What is the reason behind this behavior ?
What should I do to resolve this ?
Thanks for reading!
I am trying to load dataframe in to teradata using JDBC driver, but getting below error.
jdbc version:- 16.10.00.07
cloudera version :- 5.15
invoking spark shell using below command.
spark2-shell --jars terajdbc4.jar,tdgssconfig.jar,teradata-connector-1.4.4.jar
we are able to get schema of the table but not able to select or insert operation for all the columns of the table.
also the code that i am using can be found below.
val insertdf = spark.sql(query)
val prop = new java.util.Properties
prop.setProperty("driver", "com.teradata.jdbc.TeraDriver")
prop.setProperty("user", "xxxxxx")
prop.setProperty("password", "*******")
val url = "jdbc:teradata://10.xxx.130.69/database=stg_database, TMODE=TERA"
val table = "test_table"
insertdf.write.mode("append").jdbc(url, table, prop)
ERROR:-
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2024)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2045)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2064)
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:336)
at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:2853)
at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2153)
at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2153)
at org.apache.spark.sql.Dataset$$anonfun$55.apply(Dataset.scala:2837)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:2836)
at org.apache.spark.sql.Dataset.head(Dataset.scala:2153)
at org.apache.spark.sql.Dataset.take(Dataset.scala:2366)
at org.apache.spark.sql.Dataset.showString(Dataset.scala:245)
at org.apache.spark.sql.Dataset.show(Dataset.scala:644)
at org.apache.spark.sql.Dataset.show(Dataset.scala:603)
at org.apache.spark.sql.Dataset.show(Dataset.scala:612)
... 48 elided
Caused by: java.sql.SQLException: [Teradata JDBC Driver] [TeraJDBC 16.10.00.07] [Error 1277] [SQLState 08S01] Login timeout for Connection to 10.130.130.69 Tue Feb 04 02:05:21 EST 2020 socket orig=10.130.130.69 cid=44c91563 sess=0 java.net.SocketTimeoutException: connect timed out at java.net.PlainSocketImpl.socketConnect(Native Method) at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) at java.net.Socket.connect(Socket.java:589) at com.teradata.jdbc.jdbc_4.io.TDNetworkIOIF$ConnectThread.run(TDNetworkIOIF.java:1242)
at com.teradata.jdbc.jdbc_4.util.ErrorFactory.makeDriverJDBCException(ErrorFactory.java:95)
at com.teradata.jdbc.jdbc_4.util.ErrorFactory.makeDriverJDBCException(ErrorFactory.java:70)
at com.teradata.jdbc.jdbc_4.util.ErrorFactory.makeIoJDBCException(ErrorFactory.java:208)
at com.teradata.jdbc.jdbc_4.util.ErrorAnalyzer.analyzeIoError(ErrorAnalyzer.java:59)
at com.teradata.jdbc.jdbc_4.io.TDNetworkIOIF.createSocketConnection(TDNetworkIOIF.java:163)
at com.teradata.jdbc.jdbc_4.io.TDNetworkIOIF.<init>(TDNetworkIOIF.java:142)
at com.teradata.jdbc.jdbc.GenericTeradataConnection.getIO(GenericTeradataConnection.java:142)
at com.teradata.jdbc.jdbc.GenericLogonController.run(GenericLogonController.java:100)
at com.teradata.jdbc.jdbc_4.TDSession.<init>(TDSession.java:211)
at com.teradata.jdbc.jdk6.JDK6_SQL_Connection.<init>(JDK6_SQL_Connection.java:36)
at com.teradata.jdbc.jdk6.JDK6ConnectionFactory.constructSQLConnection(JDK6ConnectionFactory.java:25)
at com.teradata.jdbc.jdbc.ConnectionFactory.createConnection(ConnectionFactory.java:178)
at com.teradata.jdbc.jdbc.ConnectionFactory.createConnection(ConnectionFactory.java:168)
at com.teradata.jdbc.TeraDriver.doConnect(TeraDriver.java:236)
at com.teradata.jdbc.TeraDriver.connect(TeraDriver.java:162)
at org.apache.spark.sql.execution.datasources.jdbc.DriverWrapper.connect(DriverWrapper.scala:45)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:61)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:52)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:286)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.net.SocketTimeoutException: connect timed out
at java.net.PlainSocketImpl.socketConnect(Native Method)
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
at java.net.Socket.connect(Socket.java:589)
These are the important parts of the stack trace from the Teradata JDBC Driver:
[Error 1277] [SQLState 08S01] Login timeout for Connection to 10.130.130.69
...
Caused by: java.net.SocketTimeoutException: connect timed out
This error means that the Teradata JDBC Driver was unable to connect to a database at the IP address 10.130.130.69.
Either that is the wrong IP address, or the database is not running (it is down), or the system that you are running your program on doesn't have connectivity to that IP address (prevented by firewall).
I have a springboot application which connects to a postgres DB.Connection pool used is Hikari.
I have a select query running every 5 minutes.Though it gets executed properly most of the times, i encounter a PSQLException intermittently
org.postgresql.util.PSQLException: An I/O error occurred while sending to the backend.
at org.postgresql.core.v3.QueryExecutorImpl.execute(QueryExecutorImpl.java:333) ~[postgresql-42.2.2.jar!/:42.2.2]
at org.postgresql.jdbc.PgStatement.executeInternal(PgStatement.java:441) ~[postgresql-42.2.2.jar!/:42.2.2]
at org.postgresql.jdbc.PgStatement.execute(PgStatement.java:365) ~[postgresql-42.2.2.jar!/:42.2.2]
at org.postgresql.jdbc.PgPreparedStatement.executeWithFlags(PgPreparedStatement.java:155) ~[postgresql-42.2.2.jar!/:42.2.2]
at org.postgresql.jdbc.PgPreparedStatement.executeUpdate(PgPreparedStatement.java:132) ~[postgresql-42.2.2.jar!/:42.2.2]
at org.postgresql.jdbc.PgConnection.isValid(PgConnection.java:1364) ~[postgresql-42.2.2.jar!/:42.2.2]
at com.zaxxer.hikari.pool.PoolBase.isConnectionAlive(PoolBase.java:153) [HikariCP-3.2.0.jar!/:na]
at com.zaxxer.hikari.pool.HikariPool.getConnection(HikariPool.java:179) [HikariCP-3.2.0.jar!/:na]
at com.zaxxer.hikari.pool.HikariPool.getConnection(HikariPool.java:155) [HikariCP-3.2.0.jar!/:na]
at com.zaxxer.hikari.HikariDataSource.getConnection(HikariDataSource.java:128) [HikariCP-3.2.0.jar!/:na]
at org.springframework.jdbc.datasource.DataSourceUtils.fetchConnection(DataSourceUtils.java:157) [spring-jdbc-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at org.springframework.jdbc.datasource.DataSourceUtils.doGetConnection(DataSourceUtils.java:115) [spring-jdbc-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at org.springframework.jdbc.datasource.DataSourceUtils.getConnection(DataSourceUtils.java:78) [spring-jdbc-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at org.springframework.jdbc.core.JdbcTemplate.execute(JdbcTemplate.java:371) [spring-jdbc-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at org.springframework.jdbc.core.JdbcTemplate.query(JdbcTemplate.java:452) [spring-jdbc-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at org.springframework.jdbc.core.JdbcTemplate.query(JdbcTemplate.java:462) [spring-jdbc-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at com.***.***.batch.job.CacheRefreshWatcher.cacheRefreshSchedular(CacheRefreshWatcher.java:38) [classes!/:na]
at sun.reflect.GeneratedMethodAccessor344.invoke(Unknown Source) ~[na:na]
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[na:1.8.0_212]
at java.lang.reflect.Method.invoke(Method.java:498) ~[na:1.8.0_212]
at org.springframework.scheduling.support.ScheduledMethodRunnable.run(ScheduledMethodRunnable.java:84) [spring-context-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at org.springframework.scheduling.support.DelegatingErrorHandlingRunnable.run(DelegatingErrorHandlingRunnable.java:54) [spring-context-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at org.springframework.scheduling.concurrent.ReschedulingRunnable.run(ReschedulingRunnable.java:93) [spring-context-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [na:1.8.0_212]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) [na:1.8.0_212]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180) [na:1.8.0_212]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293) [na:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [na:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [na:1.8.0_212]
at java.lang.Thread.run(Thread.java:748) [na:1.8.0_212]
Caused by: java.net.SocketException: Connection reset
at java.net.SocketOutputStream.socketWrite(SocketOutputStream.java:115) ~[na:1.8.0_212]
at java.net.SocketOutputStream.write(SocketOutputStream.java:155) ~[na:1.8.0_212]
at java.io.BufferedOutputStream.flushBuffer(BufferedOutputStream.java:82) ~[na:1.8.0_212]
at java.io.BufferedOutputStream.flush(BufferedOutputStream.java:140) ~[na:1.8.0_212]
at org.postgresql.core.PGStream.flush(PGStream.java:514) ~[postgresql-42.2.2.jar!/:42.2.2]
at org.postgresql.core.v3.QueryExecutorImpl.sendSync(QueryExecutorImpl.java:1363) ~[postgresql-42.2.2.jar!/:42.2.2]
at org.postgresql.core.v3.QueryExecutorImpl.execute(QueryExecutorImpl.java:304) ~[postgresql-42.2.2.jar!/:42.2.2]
... 29 common frames omitted
2019-12-14 07:55:00.015 WARN 1 --- [ scheduling-1] com.zaxxer.hikari.pool.PoolBase : HikariPool-1 - Failed to validate connection org.postgresql.jdbc.PgConnection#48cf96e3 (This connection has been closed.). Possibly consider using a shorter maxLifetime value.
2019-12-14 22:00:00.017 WARN 1 --- [ scheduling-1] org.postgresql.jdbc.PgConnection : Validating connection.
org.postgresql.util.PSQLException: An I/O error occurred while sending to the backend.
at org.postgresql.core.v3.QueryExecutorImpl.execute(QueryExecutorImpl.java:333) ~[postgresql-42.2.2.jar!/:42.2.2]
at org.postgresql.jdbc.PgStatement.executeInternal(PgStatement.java:441) ~[postgresql-42.2.2.jar!/:42.2.2]
at org.postgresql.jdbc.PgStatement.execute(PgStatement.java:365) ~[postgresql-42.2.2.jar!/:42.2.2]
at org.postgresql.jdbc.PgPreparedStatement.executeWithFlags(PgPreparedStatement.java:155) ~[postgresql-42.2.2.jar!/:42.2.2]
at org.postgresql.jdbc.PgPreparedStatement.executeUpdate(PgPreparedStatement.java:132) ~[postgresql-42.2.2.jar!/:42.2.2]
at org.postgresql.jdbc.PgConnection.isValid(PgConnection.java:1364) ~[postgresql-42.2.2.jar!/:42.2.2]
at com.zaxxer.hikari.pool.PoolBase.isConnectionAlive(PoolBase.java:153) [HikariCP-3.2.0.jar!/:na]
at com.zaxxer.hikari.pool.HikariPool.getConnection(HikariPool.java:179) [HikariCP-3.2.0.jar!/:na]
at com.zaxxer.hikari.pool.HikariPool.getConnection(HikariPool.java:155) [HikariCP-3.2.0.jar!/:na]
at com.zaxxer.hikari.HikariDataSource.getConnection(HikariDataSource.java:128) [HikariCP-3.2.0.jar!/:na]
at org.springframework.jdbc.datasource.DataSourceUtils.fetchConnection(DataSourceUtils.java:157) [spring-jdbc-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at org.springframework.jdbc.datasource.DataSourceUtils.doGetConnection(DataSourceUtils.java:115) [spring-jdbc-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at org.springframework.jdbc.datasource.DataSourceUtils.getConnection(DataSourceUtils.java:78) [spring-jdbc-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at org.springframework.jdbc.core.JdbcTemplate.execute(JdbcTemplate.java:371) [spring-jdbc-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at org.springframework.jdbc.core.JdbcTemplate.query(JdbcTemplate.java:452) [spring-jdbc-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at org.springframework.jdbc.core.JdbcTemplate.query(JdbcTemplate.java:462) [spring-jdbc-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at com.adessa.promiseengine.batch.job.CacheRefreshWatcher.cacheRefreshSchedular(CacheRefreshWatcher.java:38) [classes!/:na]
at sun.reflect.GeneratedMethodAccessor344.invoke(Unknown Source) ~[na:na]
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[na:1.8.0_212]
at java.lang.reflect.Method.invoke(Method.java:498) ~[na:1.8.0_212]
at org.springframework.scheduling.support.ScheduledMethodRunnable.run(ScheduledMethodRunnable.java:84) [spring-context-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at org.springframework.scheduling.support.DelegatingErrorHandlingRunnable.run(DelegatingErrorHandlingRunnable.java:54) [spring-context-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at org.springframework.scheduling.concurrent.ReschedulingRunnable.run(ReschedulingRunnable.java:93) [spring-context-5.1.5.RELEASE.jar!/:5.1.5.RELEASE]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [na:1.8.0_212]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) [na:1.8.0_212]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180) [na:1.8.0_212]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293) [na:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [na:1.8.0_212]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [na:1.8.0_212]
at java.lang.Thread.run(Thread.java:748) [na:1.8.0_212]
org.postgresql.util.PSQLException: An I/O error occurred while sending to the backend.
Caused by: java.net.SocketException: Connection reset
at java.net.SocketOutputStream.socketWrite(SocketOutputStream.java:115) ~[na:1.8.0_212]
at java.net.SocketOutputStream.write(SocketOutputStream.java:155) ~[na:1.8.0_212]
at java.io.BufferedOutputStream.flushBuffer(BufferedOutputStream.java:82) ~[na:1.8.0_212]
at java.io.BufferedOutputStream.flush(BufferedOutputStream.java:140) ~[na:1.8.0_212]
at org.postgresql.core.PGStream.flush(PGStream.java:514) ~[postgresql-42.2.2.jar!/:42.2.2]
at org.postgresql.core.v3.QueryExecutorImpl.sendSync(QueryExecutorImpl.java:1363) ~[postgresql-42.2.2.jar!/:42.2.2]
at org.postgresql.core.v3.QueryExecutorImpl.execute(QueryExecutorImpl.java:304) ~[postgresql-42.2.2.jar!/:42.2.2]
... 29 common frames omitted
2019-12-14 22:00:00.018 WARN 1 --- [ scheduling-1] com.zaxxer.hikari.pool.PoolBase : HikariPool-1 - Failed to validate connection org.postgresql.jdbc.PgConnection#3029eb50 (This connection has been closed.). Possibly consider using a shorter maxLifetime value.
Below is my hikari properties
hikari:
minimum-idle: 5
maximum-pool-size: 10
idle-timeout: 600000
max-lifetime: 550000
connection-timeout: 600000
Application is hosted in an on premise server connected to an on prem db, also its hosted in AKS connecting to a Azure Postgres DB.
Error is seen in both deployments.
Can someone help me understand why is this happening and suggest a solution.
I am trying to load a (very big) serialized RDD of objects into the memory of a cluster of ec2 nodes, and then do some extraction on those objects and store the resulting RDD on disk (as object files). Unluckily I get SocketException: Connection reset once and SocketTimeoutException: Read timed out a few times.
Here is the relevant part of my code:
val pairsLocation = args(0)
val pairsRDD = sc.objectFile[Pair](pairLocation)
// taking individual objects out of "Pair" objects (containing two of those simple objects)
val extracted = pairsRDD.filter(myFunc(_._1)).
flatMap(x => List(x._1, x._2)).distinct
val savePath = "s3 URI"
extracted.saveAsObjectFile(savePath)
Here are the details of the errors (warnings) I get:
15/03/12 18:40:27 WARN scheduler.TaskSetManager: Lost task 574.0 in stage 0.0 (TID 574, ip-10-45-14-27.us-west-2.compute.internal):
java.net.SocketException: Connection reset
at java.net.SocketInputStream.read(SocketInputStream.java:196)
at java.net.SocketInputStream.read(SocketInputStream.java:122)
at sun.security.ssl.InputRecord.readFully(InputRecord.java:442)
at sun.security.ssl.InputRecord.readV3Record(InputRecord.java:554)
at sun.security.ssl.InputRecord.read(InputRecord.java:509)
at sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:934)
at sun.security.ssl.SSLSocketImpl.readDataRecord(SSLSocketImpl.java:891)
at sun.security.ssl.AppInputStream.read(AppInputStream.java:102)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:273)
at java.io.BufferedInputStream.read(BufferedInputStream.java:334)
at org.apache.commons.httpclient.ContentLengthInputStream.read(ContentLengthInputStream.java:170)
at java.io.FilterInputStream.read(FilterInputStream.java:133)
at org.apache.commons.httpclient.AutoCloseInputStream.read(AutoCloseInputStream.java:108)
at org.jets3t.service.io.InterruptableInputStream.read(InterruptableInputStream.java:76)
at org.jets3t.service.impl.rest.httpclient.HttpMethodReleaseInputStream.read(HttpMethodReleaseInputStream.java:136)
at org.apache.hadoop.fs.s3native.NativeS3FileSystem$NativeS3FsInputStream.read(NativeS3FileSystem.java:98)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:235)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:275)
at java.io.BufferedInputStream.read(BufferedInputStream.java:334)
at java.io.DataInputStream.readFully(DataInputStream.java:195)
at org.apache.hadoop.io.DataOutputBuffer$Buffer.write(DataOutputBuffer.java:63)
at org.apache.hadoop.io.DataOutputBuffer.write(DataOutputBuffer.java:101)
at org.apache.hadoop.io.SequenceFile$Reader.next(SequenceFile.java:1988)
at org.apache.hadoop.io.SequenceFile$Reader.next(SequenceFile.java:2120)
at org.apache.hadoop.mapred.SequenceFileRecordReader.next(SequenceFileRecordReader.java:76)
at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:244)
at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:210)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:388)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:202)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:58)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:56)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/03/12 18:42:16 WARN scheduler.TaskSetManager: Lost task 380.0 in stage 0.0 (TID 380, ip-10-47-3-111.us-west-2.compute.internal):
java.net.SocketTimeoutException: Read timed out
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.read(SocketInputStream.java:152)
at java.net.SocketInputStream.read(SocketInputStream.java:122)
at sun.security.ssl.InputRecord.readFully(InputRecord.java:442)
at sun.security.ssl.InputRecord.readV3Record(InputRecord.java:554)
at sun.security.ssl.InputRecord.read(InputRecord.java:509)
at sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:934)
at sun.security.ssl.SSLSocketImpl.readDataRecord(SSLSocketImpl.java:891)
at sun.security.ssl.AppInputStream.read(AppInputStream.java:102)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:273)
at java.io.BufferedInputStream.read(BufferedInputStream.java:334)
at org.apache.commons.httpclient.ContentLengthInputStream.read(ContentLengthInputStream.java:170)
at java.io.FilterInputStream.read(FilterInputStream.java:133)
at org.apache.commons.httpclient.AutoCloseInputStream.read(AutoCloseInputStream.java:108)
at org.jets3t.service.io.InterruptableInputStream.read(InterruptableInputStream.java:76)
at org.jets3t.service.impl.rest.httpclient.HttpMethodReleaseInputStream.read(HttpMethodReleaseInputStream.java:136)
at org.apache.hadoop.fs.s3native.NativeS3FileSystem$NativeS3FsInputStream.read(NativeS3FileSystem.java:98)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:235)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:275)
at java.io.BufferedInputStream.read(BufferedInputStream.java:334)
at java.io.DataInputStream.readFully(DataInputStream.java:195)
at org.apache.hadoop.io.DataOutputBuffer$Buffer.write(DataOutputBuffer.java:63)
at org.apache.hadoop.io.DataOutputBuffer.write(DataOutputBuffer.java:101)
at org.apache.hadoop.io.SequenceFile$Reader.next(SequenceFile.java:1988)
at org.apache.hadoop.io.SequenceFile$Reader.next(SequenceFile.java:2120)
at org.apache.hadoop.mapred.SequenceFileRecordReader.next(SequenceFileRecordReader.java:76)
at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:244)
at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:210)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:388)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:202)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:58)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:56)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)