BsonInvalidOperationException: readStartDocument can only be called when CurrentBSONType is DOCUMENT, not when CurrentBSONType is STRING - mongodb

Data from source system -
abc#abc.com # kafka-avro-console-consumer --bootstrap-server
localhost:9092 --topic TTDF.TCDCPOC_DATA_TYPES --from-beginning
--property print.key=true --property print.value=true --property key.deserializer=org.apache.kafka.common.serialization.StringDeserializer
"57508564" {"data":{"SEQNO":{"int":57508564},"TEXT":{"string":"Lorem ipsum dolor sit amet,"},"BIGNUM":{"long":11122233344447},"BINOBJ":{"bytes":"#~¦`¬| DATA IS STORED AS BINARY|>"},"CHAROBJ":{"string":"<text>THIS DATA IS STORED AS CLOB</text>"},"FLOATNUM":{"double":6.62607015E-34},"CHARVAR":{"string":"consectetur adipiscing elit,sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."}},"headers":{"operation":"REFRESH","changeSequence":"","timestamp":"","streamPosition":"","transactionId":"","changeMask":null,"columnMask":null}}
^CProcessed a total of 6 messages
Schema registry -
{
"subject": "TTDF.TCDCPOC_DATA_TYPES-value",
"version": 3,
"id": 12,
"schema": "{"type":"record","name":"DataRecord","fields":[{"name":"data","type":{"type":"record","name":"Data","fields":[{"name":"SEQNO","type":["null","int"],"default":null},{"name":"TEXT","type":["null","string"],"default":null},{"name":"BIGNUM","type":["null","long"],"default":null},{"name":"BINOBJ","type":["null","bytes"],"default":null},{"name":"CHAROBJ","type":["null","string"],"default":null},{"name":"FLOATNUM","type":["null","double"],"default":null},{"name":"CHARVAR","type":["null","string"],"default":null}]}},{"name":"headers","type":{"type":"record","name":"Headers","fields":[{"name":"operation","type":{"type":"enum","name":"operation","symbols":["INSERT","UPDATE","DELETE","REFRESH"]}},{"name":"changeSequence","type":"string"},{"name":"timestamp","type":"string"},{"name":"streamPosition","type":"string"},{"name":"transactionId","type":"string"},{"name":"changeMask","type":["null","bytes"]},{"name":"columnMask","type":["null","bytes"]}]}}]}"
}
Errors -
[2019-02-12 12:28:48,364] ERROR WorkerSinkTask{id=mongo-0} Task threw an uncaught and unrecoverable exception. Task is being killed and will not recover until manually restarted. (org.apache
.kafka.connect.runtime.WorkerSinkTask:584)
org.bson.BsonInvalidOperationException: readStartDocument can only be called when CurrentBSONType is DOCUMENT, not when CurrentBSONType is STRING.
at org.bson.AbstractBsonReader.verifyBSONType(AbstractBsonReader.java:690)
at org.bson.AbstractBsonReader.checkPreconditions(AbstractBsonReader.java:722)
at org.bson.AbstractBsonReader.readStartDocument(AbstractBsonReader.java:450)
at org.bson.codecs.BsonDocumentCodec.decode(BsonDocumentCodec.java:81)
at org.bson.BsonDocument.parse(BsonDocument.java:62)
at at.grahsl.kafka.connect.mongodb.converter.JsonRawStringRecordConverter.convert(JsonRawStringRecordConverter.java:32)
at at.grahsl.kafka.connect.mongodb.converter.SinkConverter.convert(SinkConverter.java:44)
at at.grahsl.kafka.connect.mongodb.MongoDbSinkTask.lambda$buildWriteModel$3(MongoDbSinkTask.java:186)
at java.util.ArrayList.forEach(ArrayList.java:1257)
at at.grahsl.kafka.connect.mongodb.MongoDbSinkTask.buildWriteModel(MongoDbSinkTask.java:185)
at at.grahsl.kafka.connect.mongodb.MongoDbSinkTask.processSinkRecords(MongoDbSinkTask.java:122)
at at.grahsl.kafka.connect.mongodb.MongoDbSinkTask.lambda$null$0(MongoDbSinkTask.java:111)
at java.util.ArrayList.forEach(ArrayList.java:1257)
at at.grahsl.kafka.connect.mongodb.MongoDbSinkTask.lambda$put$1(MongoDbSinkTask.java:110)
at java.util.HashMap.forEach(HashMap.java:1289)
at at.grahsl.kafka.connect.mongodb.MongoDbSinkTask.put(MongoDbSinkTask.java:109)
at org.apache.kafka.connect.runtime.WorkerSinkTask.deliverMessages(WorkerSinkTask.java:564)
at org.apache.kafka.connect.runtime.WorkerSinkTask.poll(WorkerSinkTask.java:322)
at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:225)
at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:193)
at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:175)
at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:219)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
[2019-02-12 12:28:48,364] ERROR WorkerSinkTask{id=mongo-0} Task threw an uncaught and unrecoverable exception (org.apache.kafka.connect.runtime.WorkerTask:177)
org.apache.kafka.connect.errors.ConnectException: Exiting WorkerSinkTask due to unrecoverable exception.
at org.apache.kafka.connect.runtime.WorkerSinkTask.deliverMessages(WorkerSinkTask.java:586)
at org.apache.kafka.connect.runtime.WorkerSinkTask.poll(WorkerSinkTask.java:322)
at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:225)
at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:193)
Config file -
{
"name": "mongo",
"config": {
"key.converter":"org.apache.kafka.connect.storage.StringConverter",
"internal.key.converter":"org.apache.kafka.connect.storage.StringConverter",
"internal.key.converter.schemas.enable":"false",
"key.converter.schemas.enable": false,
"key.ignore":"true",
"value.converter":"io.confluent.connect.avro.AvroConverter",
"internal.value.converter":"io.confluent.connect.avro.AvroConverter",
"value.converter.schemas.enable": true,
"internal.value.converter.schemas.enable":"true",
"key.converter.schema.registry.url":"http://localhost:8081",
"value.converter.schema.registry.url":"http://localhost:8081",
"connector.class": "at.grahsl.kafka.connect.mongodb.MongoDbSinkConnector",
"topics":"TTDF.TCDCPOC_DATA_TYPES",
"mongodb.connection.uri":"mongodb://xxxx:Password1#xxxx:27017/testdb?authSource=xxx",
"mongodb.collection":"TCDCPOC_DATA_TYPES",
"_comment":"transforms\":\"createKey",
"_comment":"transforms.createKey.type:org.apache.kafka.connect.transforms.Flatten$Value",
"_comment":"transforms.Flatten.delimiter:_",
"_comment":"transforms.createKey.type:io.confluent.connect.transforms.Drop$Key",
"_comment":"transforms.createKey.skip.missing.or.null\":\"true",
"_comment":"transforms.createKey.type\":\"org.apache.kafka.connect.transforms.ValueToKey",
"_comment":"transforms.createKey.fields\":\"data.SEQNO",
"_comment":"transforms.createKey.static.key:test"
}
}

Related

Error deserializing message with Kafka Postgres Sink Connector

It's driving me crazy as I'm trying to sink a kafka topic into a Postgres table. Here's my setup and I'm not sure what I'm doing wrong.
This is a typical message from the Kafka topic
{
"flightId": "5cbc7ad25732ab0004c51c45",
"recordedAt": "2022-03-26T18:17:11.356Z",
"device": "iOS",
"platform": "A5",
"vehicleId": "621c12a9b12161009865bc5d"
}
Below is my docker-compose.yaml file
version: '3.7'
services:
connector:
image: custom-connector:latest
environment:
CONNECT_BOOTSTRAP_SERVERS: ${CONNECT_BOOTSTRAP_SERVERS}
CONNECT_GROUP_ID: "kafka-connect-group-id"
CONNECT_CONFIG_STORAGE_TOPIC: "kafka-connect-config"
CONNECT_OFFSET_STORAGE_TOPIC: "kafka-connect-offsets"
CONNECT_STATUS_STORAGE_TOPIC: "kafka-connect-status"
CONNECT_REST_ADVERTISED_HOST_NAME: ${CONNECT_REST_ADVERTISED_HOST_NAME}
CONNECT_SECURITY_PROTOCOL: ${CONNECT_SECURITY_PROTOCOL}
CONNECT_SASL_MECHANISM: ${CONNECT_SASL_MECHANISM}
CONNECT_REST_PORT: 8083
CONNECT_LOG4J_ROOT_LOGLEVEL: "INFO"
CONNECT_LOG4J_LOGGERS: "org.apache.kafka.connect.runtime.rest=WARN,org.reflections=ERROR"
CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: "3"
CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: "3"
CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: "3"
CONNECT_SSL_ENDPOINT_IDENTIFICATION_ALGORITHM: "https"
CONNECT_REQUEST_TIMEOUT_MS: "20000"
CONNECT_RETRY_BACKOFF_MS: "500"
CONNECT_CONSUMER_SSL_ENDPOINT_IDENTIFICATION_ALGORITHM: "https"
CONNECT_CONSUMER_SASL_MECHANISM: "PLAIN"
CONNECT_CONSUMER_REQUEST_TIMEOUT_MS: "20000"
CONNECT_CONSUMER_RETRY_BACKOFF_MS: "500"
CONNECT_CONSUMER_SECURITY_PROTOCOL: ${CONNECT_SECURITY_PROTOCOL}
CONNECT_PRODUCER_SSL_ENDPOINT_IDENTIFICATION_ALGORITHM: "https"
CONNECT_PRODUCER_SASL_MECHANISM: "PLAIN"
CONNECT_PRODUCER_REQUEST_TIMEOUT_MS: "20000"
CONNECT_PRODUCER_RETRY_BACKOFF_MS: "500"
CONNECT_PRODUCER_SECURITY_PROTOCOL: ${CONNECT_SECURITY_PROTOCOL}
CONNECT_PLUGIN_PATH: /usr/share/java,/usr/share/confluent-hub-components,/u01/connectors
CONNECT_SASL_JAAS_CONFIG: ${JAAS_CONFIG}
CONNECT_CONSUMER_SASL_JAAS_CONFIG: ${JAAS_CONFIG}
CONNECT_PRODUCER_SASL_JAAS_CONFIG: ${JAAS_CONFIG}
CONNECT_VALUE_CONVERTER: io.confluent.connect.json.JsonSchemaConverter
CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: 'http://schema-registry:8081'
CONNECT_KEY_CONVERTER: org.apache.kafka.connect.storage.StringConverter
CONNECT_KEY_IGNORE: 'true'
ports:
- "8083:8083"
schema-registry:
image: "confluentinc/cp-schema-registry:5.2.1"
ports:
- '8081:8081'
environment:
SCHEMA_REGISTRY_HOST_NAME: schema-registry
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: SASL_SSL://${CONNECT_BOOTSTRAP_SERVERS}
SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: SASL_SSL
SCHEMA_REGISTRY_KAFKASTORE_SASL_JAAS_CONFIG: ${JAAS_CONFIG}
SCHEMA_REGISTRY_KAFKASTORE_SASL_MECHANISM: PLAIN
SCHEMA_REGISTRY_LOG4J_ROOT_LOGLEVEL: INFO
My connector's config file when sending a PUT request to Kafka-connect.
{
"name": "test-postgres-sink-connector",
"config": {
"connector.class": "io.confluent.connect.jdbc.JdbcSinkConnector",
"connection.url": "jdbc:postgresql://******:5432/db",
"connection.user": "******",
"connection.password": "******",
"topics": "test-topic",
"table.name.format": "kafka_sink_test",
"value.converter": "io.confluent.connect.json.JsonSchemaConverter",
"value.converter.schemas.enable": "true",
"value.converter.schema.registry.url": "http://schema-registry:8081",
"key.converter": "org.apache.kafka.connect.storage.StringConverter",
"key.ignore": "true",
"name": "test-postgres-sink-connector"
},
"tasks": [
{
"connector": "test-postgres-sink-connector",
"task": 0
}
],
"type": "sink"
}
From the logs, kafka-connect is complaining:
ERROR WorkerSinkTask{id=test-postgres-sink-connector-0} Error converting message value in topic 'test-topic' partition 2 at offset 0 and timestamp 1647927842369: Converting byte[] to Kafka Connect data failed due to serialization error of topic test-topic: (org.apache.kafka.connect.runtime.WorkerSinkTask)
org.apache.kafka.connect.errors.DataException: Converting byte[] to Kafka Connect data failed due to serialization error of topic test-topic:
at io.confluent.connect.json.JsonSchemaConverter.toConnectData(JsonSchemaConverter.java:119)
at org.apache.kafka.connect.storage.Converter.toConnectData(Converter.java:87)
at org.apache.kafka.connect.runtime.WorkerSinkTask.convertValue(WorkerSinkTask.java:560)
at org.apache.kafka.connect.runtime.WorkerSinkTask.lambda$convertAndTransformRecord$4(WorkerSinkTask.java:516)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndRetry(RetryWithToleranceOperator.java:156)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndHandleError(RetryWithToleranceOperator.java:190)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execute(RetryWithToleranceOperator.java:132)
at org.apache.kafka.connect.runtime.WorkerSinkTask.convertAndTransformRecord(WorkerSinkTask.java:516)
at org.apache.kafka.connect.runtime.WorkerSinkTask.convertMessages(WorkerSinkTask.java:493)
at org.apache.kafka.connect.runtime.WorkerSinkTask.poll(WorkerSinkTask.java:332)
at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:234)
at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:203)
at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:188)
at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:243)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.kafka.common.errors.SerializationException: Error deserializing JSON message for id -1
at io.confluent.kafka.serializers.json.AbstractKafkaJsonSchemaDeserializer.deserialize(AbstractKafkaJsonSchemaDeserializer.java:177)
at io.confluent.kafka.serializers.json.AbstractKafkaJsonSchemaDeserializer.deserializeWithSchemaAndVersion(AbstractKafkaJsonSchemaDeserializer.java:235)
at io.confluent.connect.json.JsonSchemaConverter$Deserializer.deserialize(JsonSchemaConverter.java:165)
at io.confluent.connect.json.JsonSchemaConverter.toConnectData(JsonSchemaConverter.java:108)
... 18 more
Caused by: org.apache.kafka.common.errors.SerializationException: Unknown magic byte!
at io.confluent.kafka.serializers.AbstractKafkaSchemaSerDe.getByteBuffer(AbstractKafkaSchemaSerDe.java:250)
at io.confluent.kafka.serializers.json.AbstractKafkaJsonSchemaDeserializer.deserialize(AbstractKafkaJsonSchemaDeserializer.java:112)
... 21 more
[2022-03-26 18:11:31,779] ERROR WorkerSinkTask{id=test-postgres-sink-connector-0} Task threw an uncaught and unrecoverable exception. Task is being killed and will not recover until manually restarted (org.apache.kafka.connect.runtime.WorkerTask)
org.apache.kafka.connect.errors.ConnectException: Tolerance exceeded in error handler
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndHandleError(RetryWithToleranceOperator.java:206)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execute(RetryWithToleranceOperator.java:132)
at org.apache.kafka.connect.runtime.WorkerSinkTask.convertAndTransformRecord(WorkerSinkTask.java:516)
at org.apache.kafka.connect.runtime.WorkerSinkTask.convertMessages(WorkerSinkTask.java:493)
at org.apache.kafka.connect.runtime.WorkerSinkTask.poll(WorkerSinkTask.java:332)
at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:234)
at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:203)
at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:188)
at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:243)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.kafka.connect.errors.DataException: Converting byte[] to Kafka Connect data failed due to serialization error of topic test-topic:
at io.confluent.connect.json.JsonSchemaConverter.toConnectData(JsonSchemaConverter.java:119)
at org.apache.kafka.connect.storage.Converter.toConnectData(Converter.java:87)
at org.apache.kafka.connect.runtime.WorkerSinkTask.convertValue(WorkerSinkTask.java:560)
at org.apache.kafka.connect.runtime.WorkerSinkTask.lambda$convertAndTransformRecord$4(WorkerSinkTask.java:516)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndRetry(RetryWithToleranceOperator.java:156)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndHandleError(RetryWithToleranceOperator.java:190)
... 13 more
Caused by: org.apache.kafka.common.errors.SerializationException: Error deserializing JSON message for id -1
at io.confluent.kafka.serializers.json.AbstractKafkaJsonSchemaDeserializer.deserialize(AbstractKafkaJsonSchemaDeserializer.java:177)
at io.confluent.kafka.serializers.json.AbstractKafkaJsonSchemaDeserializer.deserializeWithSchemaAndVersion(AbstractKafkaJsonSchemaDeserializer.java:235)
at io.confluent.connect.json.JsonSchemaConverter$Deserializer.deserialize(JsonSchemaConverter.java:165)
at io.confluent.connect.json.JsonSchemaConverter.toConnectData(JsonSchemaConverter.java:108)
... 18 more
Caused by: org.apache.kafka.common.errors.SerializationException: Unknown magic byte!
at io.confluent.kafka.serializers.AbstractKafkaSchemaSerDe.getByteBuffer(AbstractKafkaSchemaSerDe.java:250)
at io.confluent.kafka.serializers.json.AbstractKafkaJsonSchemaDeserializer.deserialize(AbstractKafkaJsonSchemaDeserializer.java:112)
... 21 more
[2022-03-26 18:11:31,780] INFO Stopping task (io.confluent.connect.jdbc.sink.JdbcSinkTask)
[2022-03-26 18:11:31,781] INFO [Consumer clientId=connector-consumer-test-postgres-sink-connector-0, groupId=test-postgres-sink-connector] Revoke previously assigned partitions test-topic-0, test-topic-1, test-topic-2, test-topic-3, test-topic-4, test-topic-5 (org.apache.kafka.clients.consumer.internals.ConsumerCoordinator)
[2022-03-26 18:11:31,781] INFO [Consumer clientId=test-postgres-sink-connector-0, groupId=test-postgres-sink-connector] Member test-postgres-sink-connector-0-89225797-cac6-41f5-9373-bbd16bc8a1b6 sending LeaveGroup request to coordinator b2-pkc-2396y.us-east-1.aws.confluent.cloud:9092 (id: 2147483645 rack: null) due to the consumer is being closed (org.apache.kafka.clients.consumer.internals.ConsumerCoordinator)
[2022-03-26 18:11:31,783] INFO [Consumer clientId=test-postgres-sink-connector-0, groupId=test-postgres-sink-connector] Resetting generation due to: consumer pro-actively leaving the group (org.apache.kafka.clients.consumer.internals.ConsumerCoordinator)
[2022-03-26 18:11:31,783] INFO [Consumer clientId=connector-test-postgres-sink-connector-0, groupId=connect-test-postgres-sink-connector] Request joining group due to: consumer pro-actively leaving the group (org.apache.kafka.clients.consumer.internals.ConsumerCoordinator)
[2022-03-26 18:11:32,284] INFO Metrics scheduler closed (org.apache.kafka.common.metrics.Metrics)
[2022-03-26 18:11:32,285] INFO Closing reporter org.apache.kafka.common.metrics.JmxReporter (org.apache.kafka.common.metrics.Metrics)
[2022-03-26 18:11:32,286] INFO Metrics reporters closed (org.apache.kafka.common.metrics.Metrics)
[2022-03-26 18:11:32,316] INFO App info kafka.consumer for connector-test-postgres-sink-connector-0 unregistered (org.apache.kafka.common.utils.AppInfoParser)
This is a typical message from the Kafka topic
Your data has no schema, so you cannot use JsonSchemaConverter. Plus, the JDBC Sink requires a schema. JDBC Sink Deep Dive
Since it is has no schema, and specifically didn't use the JSONSchema serializer with the Confluent Schema Registry, then you are getting Unknown magic byte! error from that Converter. Instead, you'll need to instead use the regular JSONConverter class (not prefixed with io.confluent, but rather org.apache.kafka). But as stated, value.converter.schemas.enable must be true.
More info - Converter Deep Dive

Kafka-Connect HDFS Sink Connector NullPointerException

While we are trying Kafka Connect for HDFS Sink based on the Confluent blog article: https://docs.confluent.io/current/connect/kafka-connect-hdfs/index.html, we are met with
Java.lang.NullPointerException
Following information are attached in the document:
etc/connect-distributed.properties,
Error Stack-trace,
Connector REST API.
We have used connect-standalone.properties, connect-distributed.propertied and also quickstart-hdfs.properties. All the assistance will be highly appreciated.
Thanks
We are trying various source and sink connector with many different data sources and sinks
kafka-avro-console-producer --broker-list localhost:9092 --topic test_hdfs --property value.schema='{"type":"record","name":"myrecord","fields":[{"name":"f1","type":"string"}]}'
{"f1": "value1"}
{"f1": "value2"}
{"f1": "value3"}
kafka-avro-console-consumer --bootstrap-server localhost:9092 --property schema.registry.url=http://localhost:8081 --topic test_hdfs --from-beginning
{"f1":"value1"}
{"f1":"value2"}
{"f1":"value3"}
Connector-configurations
-------------------------
curl -i -X POST -H "Content-Type:application/json" http://localhost:8083/connectors/ -d'{
"name": "hdfs-sink-connect-6",
"config": {
"connector.class": "io.confluent.connect.hdfs.HdfsSinkConnector",
"tasks.max": "1",
"value.converter": "io.confluent.connect.avro.AvroConverter",
"value.converter.schema.registry.url": "http://localhost:8081",
"key.converter.schema.registry.url": "http://localhost:8081",
"key.converter":"io.confluent.connect.avro.AvroConverter",
"key.converter.schemas.enable": "true",
"value.converter.schemas.enable": "true",
"topics": "test_hdfs",
"hdfs.url": "hdfs://localhost:9000",
"flush.size": "3",
"key.ignore":"true"
}
}'
----------------------------------------------------------------------------------------------------------------------------------------------
connect-distributed.properties
----------------------------
key.converter=io.confluent.connect.avro.AvroConverter
value.converter=io.confluent.connect.avro.AvroConverter
name=hdfs-sink
connector.class=io.confluent.connect.hdfs.HdfsSinkConnector
tasks.max=1
topics=test_hdfs
hdfs.url=hdfs://localhost:9000
flush.size=3
hadoop.conf.dir=/usr/local/hadoop/hadoop-2.8.5/etc/hadoop
rotate.interval.ms=100000
format.class=io.confluent.connect.hdfs.avro.AvroFormat
# Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply
# it to
key.converter.schemas.enable=true
value.converter.schemas.enable=true
value.converter.schema.registry.url=http://localhost:8081
key.converter.schema.registry.url=http://localhost:8081
-----------------------------------------------------------------------------------------------------------------------------------------------
ErrorStack Trace
----------------
"java.lang.NullPointerException at io.confluent.connect.hdfs.HdfsSinkTask.open(HdfsSinkTask.java:142) at org.apache.kafka.connect.runtime.WorkerSinkTask.openPartitions(WorkerSinkTask.java:586) at org.apache.kafka.connect.runtime.WorkerSinkTask.access$1100(WorkerSinkTask.java:67) at org.apache.kafka.connect.runtime.WorkerSinkTask$HandleRebalance.onPartitionsAssigned(WorkerSinkTask.java:646) at org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.onJoinComplete(ConsumerCoordinator.java:292) at org.apache.kafka.clients.consumer.internals.AbstractCoordinator.joinGroupIfNeeded(AbstractCoordinator.java:410) at org.apache.kafka.clients.consumer.internals.AbstractCoordinator.ensureActiveGroup(AbstractCoordinator.java:344) at org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.poll(ConsumerCoordinator.java:342) at org.apache.kafka.clients.consumer.KafkaConsumer.updateAssignmentMetadataIfNeeded(KafkaConsumer.java:1226) at org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:1191) at org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:1176) at org.apache.kafka.connect.runtime.WorkerSinkTask.pollConsumer(WorkerSinkTask.java:443) at org.apache.kafka.connect.runtime.WorkerSinkTask.poll(WorkerSinkTask.java:316) at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:224) at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:192) at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:175) at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:219) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266)at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748)"

kafka-connect : Getting error in distributed configuration for connector sink cassandra

I get task error for a distributed configuration for a connector sink cassandra. I was running the command :
curl -s localhost:8083/connectors/cassandraSinkConnector2/status | jq
to get the status
{
"name": "cassandraSinkConnector2",
"connector": {
"state": "RUNNING",
"worker_id": localhost:8083"
},
"tasks": [
{
"id": 0,
"state": "FAILED",
"worker_id": "localhost:8083",
"trace": "org.apache.kafka.common.KafkaException: Failed to construct kafka consumer\n\tat org.apache.kafka.clients.consumer.KafkaConsumer.<init>(KafkaConsumer.java:811)\n\tat org.apache.kafka.clients.consumer.KafkaConsumer.<init>(KafkaConsumer.java:624)\n\tat org.apache.kafka.clients.consumer.KafkaConsumer.<init>(KafkaConsumer.java:605)\n\tat org.apache.kafka.connect.runtime.Worker.buildWorkerTask(Worker.java:505)\n\tat org.apache.kafka.connect.runtime.Worker.startTask(Worker.java:441)\n\tat org.apache.kafka.connect.runtime.distributed.DistributedHerder.startTask(DistributedHerder.java:865)\n\tat org.apache.kafka.connect.runtime.distributed.DistributedHerder.access$1600(DistributedHerder.java:110)\n\tat org.apache.kafka.connect.runtime.distributed.DistributedHerder$13.call(DistributedHerder.java:880)\n\tat org.apache.kafka.connect.runtime.distributed.DistributedHerder$13.call(DistributedHerder.java:876)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\tat java.lang.Thread.run(Thread.java:748)\nCaused by: org.apache.kafka.common.KafkaException: io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor ClassNotFoundException exception occurred\n\tat org.apache.kafka.common.config.AbstractConfig.getConfiguredInstances(AbstractConfig.java:357)\n\tat org.apache.kafka.common.config.AbstractConfig.getConfiguredInstances(AbstractConfig.java:332)\n\tat org.apache.kafka.common.config.AbstractConfig.getConfiguredInstances(AbstractConfig.java:319)\n\tat org.apache.kafka.clients.consumer.KafkaConsumer.<init>(KafkaConsumer.java:701)\n\t... 12 more\nCaused by: java.lang.ClassNotFoundException: io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor\n\tat java.net.URLClassLoader.findClass(URLClassLoader.java:382)\n\tat java.lang.ClassLoader.loadClass(ClassLoader.java:424)\n\tat org.apache.kafka.connect.runtime.isolation.PluginClassLoader.loadClass(PluginClassLoader.java:104)\n\tat java.lang.ClassLoader.loadClass(ClassLoader.java:357)\n\tat java.lang.Class.forName0(Native Method)\n\tat java.lang.Class.forName(Class.java:348)\n\tat org.apache.kafka.common.utils.Utils.loadClass(Utils.java:338)\n\tat org.apache.kafka.common.utils.Utils.newInstance(Utils.java:327)\n\tat org.apache.kafka.common.config.AbstractConfig.getConfiguredInstances(AbstractConfig.java:355)\n\t... 15 more\n"
}
],
"type": "sink"
Stack trace:
"trace": "org.apache.kafka.common.KafkaException: Failed to construct kafka consumer
at org.apache.kafka.clients.consumer.KafkaConsumer.<init>(KafkaConsumer.java:811)
at org.apache.kafka.clients.consumer.KafkaConsumer.<init>(KafkaConsumer.java:624)
at org.apache.kafka.clients.consumer.KafkaConsumer.<init>(KafkaConsumer.java:605)
at org.apache.kafka.connect.runtime.Worker.buildWorkerTask(Worker.java:505)
at org.apache.kafka.connect.runtime.Worker.startTask(Worker.java:441)
at org.apache.kafka.connect.runtime.distributed.DistributedHerder.startTask(DistributedHerder.java:865)
at org.apache.kafka.connect.runtime.distributed.DistributedHerder.access$1600(DistributedHerder.java:110)
at org.apache.kafka.connect.runtime.distributed.DistributedHerder$13.call(DistributedHerder.java:880)
at org.apache.kafka.connect.runtime.distributed.DistributedHerder$13.call(DistributedHerder.java:876)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.kafka.common.KafkaException: io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor ClassNotFoundException exception occurred
at org.apache.kafka.common.config.AbstractConfig.getConfiguredInstances(AbstractConfig.java:357)
at org.apache.kafka.common.config.AbstractConfig.getConfiguredInstances(AbstractConfig.java:332)
at org.apache.kafka.common.config.AbstractConfig.getConfiguredInstances(AbstractConfig.java:319)
at org.apache.kafka.clients.consumer.KafkaConsumer.<init>(KafkaConsumer.java:701)
... 12 more
Caused by: java.lang.ClassNotFoundException: io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at org.apache.kafka.connect.runtime.isolation.PluginClassLoader.loadClass(PluginClassLoader.java:104)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.kafka.common.utils.Utils.loadClass(Utils.java:338)
at org.apache.kafka.common.utils.Utils.newInstance(Utils.java:327)
at org.apache.kafka.common.config.AbstractConfig.getConfiguredInstances(AbstractConfig.java:355)
... 15 more
You can find below the configuration of the connector.
{
"name": "cassandraSinkConnector2",
"config": {
"connector.class": "io.confluent.connect.cassandra.CassandraSinkConnector",
"tasks.max": "1",
"topics": "appartenance_de",
"cassandra.contact.points": "localhost",
"cassandra.kcql": "INSERT INTO app_test SELECT * FROM app_de",
"cassandra.port": "9042",
"cassandra.keyspace": "dev_dkks",
"cassandra.username": "superuser",
"cassandra.password": "password",
"cassandra.write.mode": "insert",
"value.converter.schemas.enable": "true",
"value.converter": "io.confluent.connect.avro.AvroConverter",
"value.converter.schema.registry.url": "http://localhost:8081",
"name": "cassandraSinkConnector2"
},
"tasks": [
{
"connector": "cassandraSinkConnector2",
"task": 0
}
],
"type": "sink"
}
New error:
org.apache.kafka.connect.errors.ConnectException: Exiting WorkerSinkTask due to unrecoverable exception.
at org.apache.kafka.connect.runtime.WorkerSinkTask.deliverMessages(WorkerSinkTask.java:560)
at org.apache.kafka.connect.runtime.WorkerSinkTask.poll(WorkerSinkTask.java:321)
at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:224)
at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:192)
at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:175)
at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:219)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.kafka.connect.errors.DataException: Record with a null key was encountered. This connector requires that records from Kafka contain the keys for the Cassandra table. Please use a transformation like org.apache.kafka.connect.transforms.ValueToKey to create a key with the proper fields.
at io.confluent.connect.cassandra.CassandraSinkTask.put(CassandraSinkTask.java:86)
at org.apache.kafka.connect.runtime.WorkerSinkTask.deliverMessages(WorkerSinkTask.java:538)
... 10 more
"
The root error is
java.lang.ClassNotFoundException: io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor
The Monitoring Interceptors are part of Confluent Platform. You can either disable their use in your Kafka Connect worker config, or better, make sure that the /usr/share/java/monitoring-interceptors/monitoring-interceptors-5.2.1.jar JAR is available to your Kafka Connect worker.
The new error you're seeing is
org.apache.kafka.connect.errors.DataException:
Record with a null key was encountered. This connector requires that records from Kafka contain the keys for the Cassandra table.
Please use a transformation like org.apache.kafka.connect.transforms.ValueToKey to create a key with the proper fields.
I'd suggest using a Single Message Transform as suggested in the error to correctly key your data. You can see an example of doing this here and the documentation for the transform here.

Errors/Issues using different converters in kafka-connect to S3

I have been trying to implement the confluent kafka-connect image to connect or our on prem S3. We have successfully written to s3 from the box using Boto3. So we know it is not a connection issue.
Depending on what converters I use..they produce differing errors.
Here are the environment variables running in the docker container.
CONNECT_CONFIG_STORAGE_TOPIC=__kafka-connect-config
CONNECT_OFFSET_STORAGE_TOPIC=__kafka-connect-offsets
CONNECT_STATUS_STORAGE_TOPIC=__kafka-connect-status
CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR=3
CONNECT_CONFIG_STORAGE_PARTITIONS=1
CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR=3
CONNECT_OFFSET_STORAGE_PARTITIONS=1
CONNECT_STATUS_STORAGE_REPLICATION_FACTOR=3
CONNECT_STATUS_STORAGE_PARTITIONS=1
CONNECT_REST_ADVERTISED_HOST_NAME=hostname
CONNECT_REST_ADVERTIZED_LISTENER=listener
CONNECT_INTERNAL_KEY_CONVERTER=org.apache.kafka.connect.json.JsonConverter
CONNECT_INTERNAL_VALUE_CONVERTER=org.apache.kafka.connect.json.JsonConverter
CONNECT_KEY_CONVERTER=org.apache.kafka.connect.json.JsonConverter
CONNECT_VALUE_CONVERTER=io.confluent.connect.avro.AvroConverter
CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL=http://schema-registry:8081
CONNECT_KEY_CONVERTER_SCHEMAS_ENABLED=false
CONNECT_VALUE_CONVERTER_SCHEMAS_ENABLED=true
CONNECT_REST_ADVERTISED_PORT=8083
CONNECT_REPLICATION_FACTOR=2
CONNECT_GROUP_ID=APP-CONNECT
CONNECT_CONSUMER_BOOTSTRAP_SERVERS=SASL_SSL://server-1.com:9092,SASL_SSL://server-2.com:9092,SASL_SSL://server-3.com:9092
CONNECT_BOOTSTRAP_SERVERS=SASL_SSL://server-1.com:9092,SASL_SSL://server-2.com:9092,SASL_SSL://server-3.com:9092
CONNECT_CONSUMER_SECURITY_PROTOCOL=SASL_SSL
CONNECT_CONSUMER_SASL_JAAS_CONFIG=org.apache.kafka.common.security.plain.PlainLoginModule required username='admin' password='pw';
CONNECT_CONSUMER_SSL_PROTOCOL=SSL
CONNECT_CONSUMER_SSL_TRUSTSTORE_LOCATION=/etc/kafka/secrets/kafka.client.truststore.jks
CONNECT_CONSUMER_SSL_TRUSTSTORE_PASSWORD=password
CONNECT_CONSUMER_SSL_ENDPOINT_IDENTIFICATION_ALGORITHM=
CONNECT_CONSUMER_SASL_MECHANISM=PLAIN
CONNECT_LOG4J_OPTS=-Dlog4j.configuration=file:/etc/kafka_connect/log4j/log4j.properties
CONNECT_OFFSET_FLUSH_INTERVAL_MS=10000
CONNECT_PLUGIN_PATH=/usr/share/java,/usr/share/confluent-hub-components
CONNECT_REST_PORT=8083
CONNECT_SECURITY_PROTOCOL=SASL_SSL
CONNECT_SASL_JAAS_CONFIG=org.apache.kafka.common.security.plain.PlainLoginModule required username='admin' password='pw';
CONNECT_SASL_MECHANISM=PLAIN
CONNECT_SSL_ENDPOINT_IDENTIFICATION_ALGORITHM=
CONNECT_SSL_PROTOCOL=SSL
CONNECT_SSL_TRUSTSTORE_LOCATION=/etc/kafka/secrets/kafka.client.truststore.jks
CONNECT_SSL_TRUSTSTORE_PASSWORD=password
CONNECT_ZOOKEEPER_CONNECT=SASL_SSL://server-1.com:9092,SASL_SSL://server-2.com:9092,SASL_SSL://server-3.com:9092
{
"connector.class": "io.confluent.connect.s3.S3SinkConnector",
"consumer.sasl.jaas.config": "org.apache.kafka.common.security.plain.PlainLoginModule required username='admin' password='pw';",
"flush.size": "1500",
"topics": "inventory",
"tasks.max": "2",
"rotate.interval.ms": "1000",
"consumer.sasl.mechanism": "PLAIN",
"store.url": "http://s3-server:9020",
"format.class": "io.confluent.connect.s3.format.avro.AvroFormat",
"internal.key.converter.schemas.enable": "false",
"internal.value.converter.schemas.enable": "false",
"value.converter": "org.apache.kafka.connect.storage.StringConverter",
"value.converter.schema.registry.url": "http://schema-registry:8081",
"key.converter.schemas.enabled": "false",
"value.converter.schemas.enabled": "true",
"partitioner.class": "io.confluent.connect.storage.partitioner.DefaultPartitioner",
"schema.generator.class": "io.confluent.connect.storage.hive.schema.DefaultSchemaGenerator",
"name": "inventory-2",
"consumer.security.protocol": "SASL_SSL",
"storage.class": "io.confluent.connect.s3.storage.S3Storage",
"s3.bucket.name": "inventory-stage"
}
I get what appears to be a successful startup. However when I check the bucket; I do not have any objects there. I have confirmed using the kafka-avro-consule-consumer that avro messages do exist in the topic.
[2019-04-11 18:14:52,612] INFO [Consumer clientId=consumer-42, groupId=connect-inventory-2] Resetting offset for partition inventory-0 to offset 9. (org.apache.kafka.clients.consumer.internals.Fetcher)
[2019-04-11 18:14:52,614] INFO Opening record writer for: topics/inventory/partition=2/inventory+2+0000000008.avro (io.confluent.connect.s3.format.avro.AvroRecordWriterProvider)
[2019-04-11 18:14:52,621] INFO [Consumer clientId=consumer-42, groupId=connect-inventory-2] Resetting offset for partition inventory-1 to offset 8. (org.apache.kafka.clients.consumer.internals.Fetcher)
[2019-04-11 18:14:52,621] WARN Property 'rotate.interval.ms' is set to '1000ms' but partitioner is not an instance of 'io.confluent.connect.storage.partitioner.TimeBasedPartitioner'. This property is ignored. (io.confluent.connect.s3.TopicPartitionWriter)
[2019-04-11 18:14:52,621] WARN Property 'rotate.interval.ms' is set to '1000ms' but partitioner is not an instance of 'io.confluent.connect.storage.partitioner.TimeBasedPartitioner'. This property is ignored. (io.confluent.connect.s3.TopicPartitionWriter)
[2019-04-11 18:14:52,626] INFO Opening record writer for: topics/inventory/partition=1/inventory+1+0000000008.avro (io.confluent.connect.s3.format.avro.AvroRecordWriterProvider)
[2019-04-11 18:14:52,645] INFO Opening record writer for: topics/inventory/partition=0/inventory+0+0000000009.avro (io.confluent.connect.s3.format.avro.AvroRecordWriterProvider)
When I change the value converter to the AvroConverter. Thinking that the messages are in Avro and will need to be converted out to be consumed by the connector API.
{
"connector.class": "io.confluent.connect.s3.S3SinkConnector",
"consumer.sasl.jaas.config": "org.apache.kafka.common.security.plain.PlainLoginModule required username='admin' password='pw';",
"flush.size": "1500",
"topics": "inventory",
"tasks.max": "2",
"rotate.interval.ms": "1000",
"consumer.sasl.mechanism": "PLAIN",
"store.url": "http://s3-server:9020",
"format.class": "io.confluent.connect.s3.format.avro.AvroFormat",
"internal.key.converter.schemas.enable": "false",
"internal.value.converter.schemas.enable": "false",
"value.converter": "io.confluent.connect.avro.AvroConverter",
"value.converter.schema.registry.url": "http://schema-registry:8081",
"key.converter.schemas.enabled": "false",
"value.converter.schemas.enabled": "true",
"partitioner.class": "io.confluent.connect.storage.partitioner.DefaultPartitioner",
"schema.generator.class": "io.confluent.connect.storage.hive.schema.DefaultSchemaGenerator",
"name": "inventory-2",
"consumer.security.protocol": "SASL_SSL",
"storage.class": "io.confluent.connect.s3.storage.S3Storage",
"s3.bucket.name": "inventory-stage"
}
This indicates that the avro converter cannot find the schema with the ID of 41. However, that ID exists in the schema registry. See Below
[2019-04-11 18:26:56,813] ERROR WorkerSinkTask{id=inventory-2-1} Task threw an uncaught and unrecoverable exception (org.apache.kafka.connect.runtime.WorkerTask)
org.apache.kafka.connect.errors.ConnectException: Tolerance exceeded in error handler
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndHandleError(RetryWithToleranceOperator.java:178)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execute(RetryWithToleranceOperator.java:104)
at org.apache.kafka.connect.runtime.WorkerSinkTask.convertAndTransformRecord(WorkerSinkTask.java:514)
at org.apache.kafka.connect.runtime.WorkerSinkTask.convertMessages(WorkerSinkTask.java:491)
at org.apache.kafka.connect.runtime.WorkerSinkTask.poll(WorkerSinkTask.java:322)
at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:226)
at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:194)
at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:175)
at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:219)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.kafka.connect.errors.DataException: inventory
at io.confluent.connect.avro.AvroConverter.toConnectData(AvroConverter.java:103)
at org.apache.kafka.connect.runtime.WorkerSinkTask.lambda$convertAndTransformRecord$1(WorkerSinkTask.java:514)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndRetry(RetryWithToleranceOperator.java:128)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndHandleError(RetryWithToleranceOperator.java:162)
... 13 more
Caused by: org.apache.kafka.common.errors.SerializationException: Error retrieving Avro schema for id 41
Caused by: io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException: Subject not found.; error code: 40401
at io.confluent.kafka.schemaregistry.client.rest.RestService.sendHttpRequest(RestService.java:209)
at io.confluent.kafka.schemaregistry.client.rest.RestService.httpRequest(RestService.java:235)
at io.confluent.kafka.schemaregistry.client.rest.RestService.lookUpSubjectVersion(RestService.java:302)
at io.confluent.kafka.schemaregistry.client.rest.RestService.lookUpSubjectVersion(RestService.java:290)
at io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient.getVersionFromRegistry(CachedSchemaRegistryClient.java:129)
at io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient.getVersion(CachedSchemaRegistryClient.java:230)
at io.confluent.kafka.serializers.AbstractKafkaAvroDeserializer.schemaVersion(AbstractKafkaAvroDeserializer.java:184)
at io.confluent.kafka.serializers.AbstractKafkaAvroDeserializer.deserialize(AbstractKafkaAvroDeserializer.java:153)
at io.confluent.kafka.serializers.AbstractKafkaAvroDeserializer.deserializeWithSchemaAndVersion(AbstractKafkaAvroDeserializer.java:215)
at io.confluent.connect.avro.AvroConverter$Deserializer.deserialize(AvroConverter.java:139)
at io.confluent.connect.avro.AvroConverter.toConnectData(AvroConverter.java:87)
at org.apache.kafka.connect.runtime.WorkerSinkTask.lambda$convertAndTransformRecord$1(WorkerSinkTask.java:514)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndRetry(RetryWithToleranceOperator.java:128)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndHandleError(RetryWithToleranceOperator.java:162)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execute(RetryWithToleranceOperator.java:104)
at org.apache.kafka.connect.runtime.WorkerSinkTask.convertAndTransformRecord(WorkerSinkTask.java:514)
at org.apache.kafka.connect.runtime.WorkerSinkTask.convertMessages(WorkerSinkTask.java:491)
at org.apache.kafka.connect.runtime.WorkerSinkTask.poll(WorkerSinkTask.java:322)
at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:226)
at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:194)
at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:175)
at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:219)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
[2019-04-11 18:26:56,814] ERROR WorkerSinkTask{id=inventory-2-1} Task is being killed and will not recover until manually restarted (org.apache.kafka.connect.runtime.WorkerTask)
[2019-04-11 18:26:56,815] INFO [Consumer clientId=consumer-44, groupId=connect-inventory-2] Sending LeaveGroup request to coordinator localhost:9092 (id: 2147483644 rack: null) (org.apache.kafka.clients.consumer.internals.AbstractCoordinator)
{
"subject": "inventory-com.company.dcp.event.schema.shotify.SongCreatedEvent",
"version": 1,
"id": 41,
"schema": "{\"type\":\"record\",\"name\":\"SongCreatedEvent\",\"namespace\":\"com.company.dcp.event.schema.shotify\",\"doc\":\"Information about the Song Added event\",\"fields\":[{\"name\":\"eventHeader\",\"type\":{\"type\":\"record\",\"name\":\"EventHeader\",\"namespace\":\"com.company.commons.shotify\",\"fields\":[{\"name\":\"id\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"}},{\"name\":\"time\",\"type\":{\"type\":\"long\",\"logicalType\":\"timestamp-millis\"}},{\"name\":\"type\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"}},{\"name\":\"source\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"}}]}},{\"name\":\"song\",\"type\":{\"type\":\"record\",\"name\":\"Song\",\"namespace\":\"com.company.commons.shotify\",\"fields\":[{\"name\":\"title\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"doc\":\"Title of the Song\"},{\"name\":\"artist\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"doc\":\"The song composer\"},{\"name\":\"duration\",\"type\":\"int\",\"doc\":\"Song Duration in minutes\"},{\"name\":\"bitrate\",\"type\":\"int\",\"doc\":\"Song Bitrate, measured in kilobytes per second\"},{\"name\":\"lyrics\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"doc\":\"Lyrics of the Song\"},{\"name\":\"fileURL\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"doc\":\"Unique file Reference to the song\"}]}}],\"version\":\"2\"}"
}

Kafka Connect sink tasks ignore tolerance limits

I try to ignore bad messages in sink connector with errors.tolerance: all option. Full connector configuration:
{
"name": "crm_data-sink_pandora",
"config": {
"connector.class": "io.confluent.connect.jdbc.JdbcSinkConnector",
"tasks.max": 6,
"topics": "crm_account_detail,crm_account_on_competitors,crm_event,crm_event_participation",
"connection.url": "jdbc:postgresql://dburl/service?prepareThreshold=0",
"connection.user": "pandora.app",
"connection.password": "*******",
"dialect.name": "PostgreSqlDatabaseDialect",
"insert.mode": "upsert",
"pk.mode": "record_value",
"pk.fields": "guid",
"table.name.format": "pandora.${topic}",
"errors.tolerance": "all",
"errors.log.enable":true,
"errors.log.include.messages":true,
"errors.deadletterqueue.topic.name":"crm_data_deadletterqueue",
"errors.deadletterqueue.context.headers.enable":true
}
}
Target table DDL:
create table crm_event_participation
(
guid char(36) not null
constraint crm_event_participation_pkey
primary key,
created_on timestamp,
created_by_guid char(36),
modified_on timestamp,
modified_by_guid char(36),
process_listeners integer,
event_guid char(36),
event_response varchar(250),
note varchar(500),
is_from_group boolean,
contact_guid char(36),
target_item integer,
account_guid char(36),
employer_id integer
);
Connector starts successfully, but it fails if error occurs (e.g. missing field).
curl -X GET http://kafka-connect:9092/connectors/crm_data-sink_pandora/status:
{
"name": "crm_data-sink_pandora",
"connector": {
"state": "RUNNING",
"worker_id": "192.168.2.254:10900"
},
"tasks": [
{
"state": "FAILED",
"trace":
"org.apache.kafka.connect.errors.ConnectException: Exiting WorkerSinkTask due to unrecoverable exception.
at org.apache.kafka.connect.runtime.WorkerSinkTask.deliverMessages(WorkerSinkTask.java:586)
at org.apache.kafka.connect.runtime.WorkerSinkTask.poll(WorkerSinkTask.java:322)
at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:225)
at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:193)
at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:175)
at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:219)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.kafka.connect.errors.ConnectException: Table \"pandora\".\"crm_event_participation\" is missing fields ([SinkRecordField{schema=Schema{STRING}, name='event_id', isPrimaryKey=false}, SinkRecordField{schema=Schema{STRING}, name='event_response_guid', isPrimaryKey=false}]) and auto-evolution is disabled
at io.confluent.connect.jdbc.sink.DbStructure.amendIfNecessary(DbStructure.java:140)
at io.confluent.connect.jdbc.sink.DbStructure.createOrAmendIfNecessary(DbStructure.java:73)
at io.confluent.connect.jdbc.sink.BufferedRecords.add(BufferedRecords.java:84)
at io.confluent.connect.jdbc.sink.JdbcDbWriter.write(JdbcDbWriter.java:65)
at io.confluent.connect.jdbc.sink.JdbcSinkTask.put(JdbcSinkTask.java:73)
at org.apache.kafka.connect.runtime.WorkerSinkTask.deliverMessages(WorkerSinkTask.java:564)
... 10 more",
"id": 0,
"worker_id": "192.168.2.254:10900"
}
...
]
}
Log with exception:
[2019-03-29 16:59:30,924] INFO Unable to find fields [SinkRecordField{schema=Schema{INT32}, name='process_listners', isPrimaryKey=false}] among column names [employer_id, modified_on, modified_by_guid, contact_guid, target_item, guid, created_on, process_listeners, event_guid, created_by_guid, is_from_group, account_guid, event_response, note] (io.confluent.connect.jdbc.sink.DbStructure)
[2019-03-29 16:59:30,924] ERROR WorkerSinkTask{id=crm_data-sink_pandora-1} Task threw an uncaught and unrecoverable exception. Task is being killed and will not recover until manually restarted. (org.apache.kafka.connect.runtime.WorkerSinkTask)
org.apache.kafka.connect.errors.ConnectException: Table "pandora"."crm_event_participation" is missing fields ([SinkRecordField{schema=Schema{INT32}, name='process_listners', isPrimaryKey=false}]) and auto-evolution is disabled at io.confluent.connect.jdbc.sink.DbStructure.amendIfNecessary(DbStructure.java:140)
at io.confluent.connect.jdbc.sink.DbStructure.createOrAmendIfNecessary(DbStructure.java:73)
at io.confluent.connect.jdbc.sink.BufferedRecords.add(BufferedRecords.java:84)
at io.confluent.connect.jdbc.sink.JdbcDbWriter.write(JdbcDbWriter.java:65)
at io.confluent.connect.jdbc.sink.JdbcSinkTask.put(JdbcSinkTask.java:73)
at org.apache.kafka.connect.runtime.WorkerSinkTask.deliverMessages(WorkerSinkTask.java:564)
at org.apache.kafka.connect.runtime.WorkerSinkTask.poll(WorkerSinkTask.java:322)
at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:225)
at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:193)
at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:175)
at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:219)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Please explain me what could be wrong in connector configuration? I use Kafka 2.0.0 and JdbcSinkConnector 5.1.0.
In your Kafka message you have a field process_listners. Column with that name is not present in your table.
I think you have typo. In table you have column process_listeners, not process_listners.
errors.tolerance property apply only to errors during Converting messages.
More regarding errors.tolerance you can read: kafka connect - jdbc sink sql exception