Json parsing using Play Json with different fields - scala

I have below JSON and I'm parsing it using play-json. Somehow "datafeeds/schema/fields" Node is not getting properly parsed.
I have created standard reads to parse this Json but "datafeeds" node seems not to be parsing correctly due to "format"(datafeeds/schema/fields) node being String or JsObject sometime and same goes for the "type" node.
If I consider Schema as JsObject then whole Json get parsed correctly and seems I then have to process Schema separately.
My Json looks like this
{
"entities": [
{
"name": "customers",
"number_of_buckets": 5,
"entity_column_name": "customer_id",
"entity_column_type": "integer"
},
{
"name": "accounts",
"number_of_buckets": 7,
"entity_column_name": "account_id",
"entity_column_type": "string"
},
{
"name": "products",
"number_of_buckets": 1,
"entity_column_name": "product_id",
"entity_column_type": "integer"
}
],
"datafeeds": [
{
"name": "customer_demographics",
"version": "1",
"delimiter": "|",
"filename_re_pattern": ".*(customer_demographics_v1_[0-9]{8}\\.psv)$",
"frequency": {
"days": 1
},
"from": "2015-07-01",
"drop_threshold": {
"rows": null,
"percentage": 0.05
},
"dry_run": false,
"header": true,
"text_qualifier": null,
"landing_path": "landing",
"schema": {
"fields": [
{
"time_key": true,
"format": "yyyy-MM-dd",
"metadata": {},
"name": "record_date",
"nullable": false,
"primary_key": true,
"type": "timestamp",
"timezone": "Australia/Sydney"
},
{
"format": "yyyy-MM-dd",
"metadata": {},
"name": "extract_date",
"nullable": false,
"primary_key": true,
"type": "timestamp",
"timezone": "Australia/Sydney"
},
{
"entity_type": "customers",
"metadata": {},
"name": "customer_id",
"nullable": false,
"primary_key": true,
"type": "integer"
},
{
"metadata": {},
"name": "year_of_birth",
"nullable": true,
"type": "integer"
},
{
"metadata": {},
"name": "month_of_birth",
"nullable": true,
"type": "integer"
},
{
"metadata": {},
"name": "postcode",
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "state",
"nullable": true,
"type": "string"
},
{
"format": {
"false": "N",
"true": "Y"
},
"metadata": {},
"name": "marketing_consent",
"nullable": true,
"type": "boolean"
}
],
"type": "struct"
}
},
{
"name": "customer_statistics",
"version": "1",
"delimiter": "|",
"filename_re_pattern": ".*(customer_statistics_v1_[0-9]{8}\\.psv)$",
"frequency": {
"days": 1
},
"from": "2015-07-01",
"drop_threshold": {
"rows": null,
"percentage": 0.05
},
"dry_run": false,
"header": true,
"text_qualifier": null,
"landing_path": "landing",
"schema": {
"fields": [
{
"time_key": true,
"format": "yyyy-MM-dd",
"metadata": {},
"name": "record_date",
"nullable": false,
"primary_key": true,
"type": "timestamp",
"timezone": "Australia/Sydney"
},
{
"format": "yyyy-MM-dd",
"metadata": {},
"name": "extract_date",
"nullable": false,
"primary_key": true,
"type": "timestamp",
"timezone": "Australia/Sydney"
},
{
"entity_type": "customers",
"metadata": {},
"name": "customer_id",
"nullable": false,
"primary_key": true,
"type": "integer"
},
{
"metadata": {},
"name": "risk_score",
"nullable": true,
"type": "double"
},
{
"metadata": {},
"name": "mkg_segments",
"nullable": true,
"type": {
"type":"array",
"elementType":"string",
"containsNull": false
}
},
{
"metadata": {},
"name": "avg_balance",
"nullable": true,
"type": "decimal"
},
{
"metadata": {},
"name": "num_accounts",
"nullable": true,
"type": "integer"
}
],
"type": "struct"
}
}
],
"tables": [
{
"name": "table_name",
"version": "version",
"augmentations": [
{
"left_table_name": "left_table_name",
"left_table_version": "v1",
"right_table_name": "right_table_name",
"right_table_version": "v1",
"columns": [
"column_a",
"column_b",
"column_c"
],
"join_cols": [
{
"left_table": "system_code",
"right_table": "key_a"
},
{
"left_table": "group_product_code",
"right_table": "key_b"
},
{
"left_table": "sub_product_code",
"right_table": "key_c"
}
]
}
],
"sources": [
{
"name": "table_name",
"version": "v1",
"mandatory": true,
"type": "datafeed | table"
}
],
"aggregations": [
{
"column_name": "customer_age_customer_age",
"column_type": "long",
"description": "date_diff",
"expression": "max_by",
"source_columns": [
{
"column_name": "customer_age_year_of_birth",
"source": {
"name": "customers",
"type": "table",
"version": "v1"
}
},
{
"column_name": "customer_age_month_of_birth",
"source": {
"name": "customers",
"type": "table",
"version": "v1"
}
}
]
}
],
"column_level_transformations": [
{
"column_name": "column_added",
"column_type": "long",
"description": "adding two columns to return something else",
"expression": "column_a+column_b",
"source_columns": [
{
"column_name": "column_a",
"source": {
"name": "source_a",
"type": "table",
"version": "v1"
}
},
{
"column_name": "column_b",
"source": {
"name": "source_b",
"type": "table",
"version": "v1"
}
}
]
}
],
"frequency": {
"months": 1
},
"joins": [
{
"name": "table_name",
"version": "v1"
},
{
"name": "table_name_b",
"version": "v2"
}
],
"from": "2015-07-01",
"format": "parquet",
"structure": "primitives",
"index_query": "sql statement",
"insert_query": "sql statement"
}
]
}
Any idea how to parse this Json?

Edit: updated to answer the updated question
I'm not sure how you're parsing now, but you can try this:
import play.api.libs.json.Reads._
import play.api.libs.json._
case class Frequency(days: Int)
case class DropThreshold(
rows: Option[Int], //guessing type here
percentage: Double
)
case class Format(`false`: String, `true`: String)
case class Type(`type`: String, elementType: String, containsNull: Boolean)
case class Field(
entity_type: Option[String],
time_key: Option[Boolean],
format: Option[Either[String, Format]],
metadata: Option[JsObject],
name: Option[String],
nullable: Option[Boolean],
primary_key: Option[Boolean],
`type`: Option[Either[String, Type]],
timezone: Option[String]
)
case class Schema(fields: Seq[Field])
case class Datafeed(
name: String,
version: String,
delimiter: String,
filename_re_pattern: String,
frequency: Frequency,
from: String,
drop_threshold: DropThreshold,
dry_run: Boolean,
header: Boolean,
text_qualifier: Option[String], //guessing type here
landing_path: String,
schema: Schema
)
case class Entity(name: String, number_of_buckets: Int, entity_column_name: String, entity_column_type: String)
case class MyJson(entities: Seq[Entity], datafeeds: Seq[Datafeed])
implicit def eitherReads[A, B](implicit A: Reads[A], B: Reads[B]): Reads[Either[A, B]] = Reads[Either[A, B]] { json =>
A.reads(json) match {
case JsSuccess(value, path) => JsSuccess(Left(value), path)
case JsError(e1) => B.reads(json) match {
case JsSuccess(value, path) => JsSuccess(Right(value), path)
case JsError(e2) => JsError(JsError.merge(e1, e2))
}
}
}
implicit val frequencyReads: Reads[Frequency] = Json.reads[Frequency]
implicit val dropThresholdReads: Reads[DropThreshold] = Json.reads[DropThreshold]
implicit val formatReads: Reads[Format] = Json.reads[Format]
implicit val typeReads: Reads[Type] = Json.reads[Type]
implicit val fieldReads: Reads[Field] = Json.reads[Field]
implicit val schemaReads: Reads[Schema] = Json.reads[Schema]
implicit val datafeedReads: Reads[Datafeed] = Json.reads[Datafeed]
implicit val entityReads: Reads[Entity] = Json.reads[Entity]
implicit val myJsonReads: Reads[MyJson] = Json.reads[MyJson]
With the Either Reads copied from here. To test:
scala> val json = Json.parse("""{"entities": [{"name": "customers","number_of_buckets": 5,"entity_column_name": "customer_id","entity_column_type": "integer"},{"name": "accounts","number_of_buckets": 7,"entity_column_name": "account_id","entity_column_type": "string"},{"name": "products","number_of_buckets": 1,"entity_column_name": "product_id","entity_column_type": "integer"}],"datafeeds": [{"name": "customer_demographics","version": "1","delimiter": "|","filename_re_pattern": ".*(customer_demographics_v1_[0-9]{8}\\.psv)$","frequency": {"days": 1},"from": "2015-07-01","drop_threshold": {"rows": null,"percentage": 0.05},"dry_run": false,"header": true,"text_qualifier": null,"landing_path": "landing","schema": {"fields": [{"time_key": true,"format": "yyyy-MM-dd","metadata": {},"name": "record_date","nullable": false,"primary_key": true,"type": "timestamp","timezone": "Australia/Sydney"},{"format": "yyyy-MM-dd","metadata": {},"name": "extract_date","nullable": false,"primary_key": true,"type": "timestamp","timezone": "Australia/Sydney"},{"entity_type": "customers","metadata": {},"name": "customer_id","nullable": false,"primary_key": true,"type": "integer"},{"metadata": {},"name": "year_of_birth","nullable": true,"type": "integer"},{"metadata": {},"name": "month_of_birth","nullable": true,"type": "integer"},{"metadata": {},"name": "postcode","nullable": true,"type": "string"},{"metadata": {},"name": "state","nullable": true,"type": "string"},{"format": {"false": "N","true": "Y"},"metadata": {},"name": "marketing_consent","nullable": true,"type": "boolean"}],"type": "struct"}},{"name": "customer_statistics","version": "1","delimiter": "|","filename_re_pattern": ".*(customer_statistics_v1_[0-9]{8}\\.psv)$","frequency": {"days": 1},"from": "2015-07-01","drop_threshold": {"rows": null,"percentage": 0.05},"dry_run": false,"header": true,"text_qualifier": null,"landing_path": "landing","schema": {"fields": [{"time_key": true,"format": "yyyy-MM-dd","metadata": {},"name": "record_date","nullable": false,"primary_key": true,"type": "timestamp","timezone": "Australia/Sydney"},{"format": "yyyy-MM-dd","metadata": {},"name": "extract_date","nullable": false,"primary_key": true,"type": "timestamp","timezone": "Australia/Sydney"},{"entity_type": "customers","metadata": {},"name": "customer_id","nullable": false,"primary_key": true,"type": "integer"},{"metadata": {},"name": "risk_score","nullable": true,"type": "double"},{"metadata": {},"name": "mkg_segments","nullable": true,"type": {"type":"array","elementType":"string","containsNull": false}},{"metadata": {},"name": "avg_balance","nullable": true,"type": "decimal"},{"metadata": {},"name": "num_accounts","nullable": true,"type": "integer"}],"type": "struct"}}],"tables": [{"name": "table_name","version": "version","augmentations": [{"left_table_name": "left_table_name","left_table_version": "v1","right_table_name": "right_table_name","right_table_version": "v1","columns": ["column_a","column_b","column_c"],"join_cols": [{"left_table": "system_code","right_table": "key_a"},{"left_table": "group_product_code","right_table": "key_b"},{"left_table": "sub_product_code","right_table": "key_c"}]}],"sources": [{"name": "table_name","version": "v1","mandatory": true,"type": "datafeed | table"}],"aggregations": [{"column_name": "customer_age_customer_age","column_type": "long","description": "date_diff","expression": "max_by","source_columns": [{"column_name": "customer_age_year_of_birth","source": {"name": "customers","type": "table","version": "v1"}},{"column_name": "customer_age_month_of_birth","source": {"name": "customers","type": "table","version": "v1"}}]}],"column_level_transformations": [{"column_name": "column_added","column_type": "long","description": "adding two columns to return something else","expression": "column_a+column_b","source_columns": [{"column_name": "column_a","source": {"name": "source_a","type": "table","version": "v1"}},{"column_name": "column_b","source": {"name": "source_b","type": "table","version": "v1"}}]}],"frequency": {"months": 1},"joins": [{"name": "table_name","version": "v1"},{"name": "table_name_b","version": "v2"}],"from": "2015-07-01","format": "parquet","structure": "primitives","index_query": "sql statement","insert_query": "sql statement"}]}""")
json: play.api.libs.json.JsValue = {"entities":[{"name":"customers","number_of_buckets":5,"entity_column_name":"customer_id","entity_column_type":"integer"},{"name":"accounts","number_of_buckets":7,"entity_column_name":"account_id","entity_column_type":"string"},{"name":"products","number_of_buckets":1,"entity_column_name":"product_id","entity_column_type":"integer"}],"datafeeds":[{"name":"customer_demographics","version":"1","delimiter":"|","filename_re_pattern":".*(customer_demographics_v1_[0-9]{8}\\.psv)$","frequency":{"days":1},"from":"2015-07-01","drop_threshold":{"rows":null,"percentage":0.05},"dry_run":false,"header":true,"text_qualifier":null,"landing_path":"landing","schema":{"fields":[{"time_key":true,"format":"yyyy-MM-dd","metadata":{},"name":"record...
scala> json.validate[MyJson]
res0: play.api.libs.json.JsResult[MyJson] = JsSuccess(MyJson(List(Entity(customers,5,customer_id,integer), Entity(accounts,7,account_id,string), Entity(products,1,product_id,integer)),List(Datafeed(customer_demographics,1,|,.*(customer_demographics_v1_[0-9]{8}\.psv)$,Frequency(1),2015-07-01,DropThreshold(None,0.05),false,true,None,landing,Schema(List(Field(None,Some(true),Some(Left(yyyy-MM-dd)),Some({}),Some(record_date),Some(false),Some(true),Some(Left(timestamp)),Some(Australia/Sydney)), Field(None,None,Some(Left(yyyy-MM-dd)),Some({}),Some(extract_date),Some(false),Some(true),Some(Left(timestamp)),Some(Australia/Sydney)), Field(Some(customers),None,None,Some({}),Some(customer_id),Some(false),Some(true),Some(Left(integer)),None), Field(None,None,None,Some({}),...
Remember to set any optional or nullable fields to an Option type.

Related

How to group by single field and return more values together

I'm starting to use apache druid but having some difficult to run native queries (and some SQL too).
1- Is it possible to groupBy a single column while also returning more channels?
2- How could I groupBy a single column, while returning different grouped itens on same query/row ?
Query I'm trying to use:
{
"queryType": "groupBy",
"dataSource": "my-data-source",
"granularity": "all",
"intervals": ["2022-06-27T03:00:00.000Z/2022-06-28T03:00:00.000Z"],
"context:": { "timeout: 30000 },
"dimensions": ["userId"],
"filter": {
"type": "and",
"fields": [
{
"type": "or",
"fields": [{...}]
}
]
},
"aggregations": [
{
"type": "count",
"name": "count"
}
]
}
Tried to add a filtered type inside aggregations:[] but 0 changes happened.
"aggregations": [
{
"type: "count",
"name": "count"
},
{
"type": "filtered",
"filter": {
"type": "selector",
"dimension": "block_id",
"value": "block1"
},
"aggregator": {
"type": "count",
"name": "block1",
"fieldName": "block_id"
}
}
]
Grouping Aggregator also didn't work.
"aggregations": [
{
"type": "count",
"name": "count"
},
{
"type": "grouping",
"name": "groupedData",
"groupings": ["block_id"]
}
],
Below is the image illustrating the results I'm trying to achieve.
Not sure yet how to get the results in the format you want, but as a start, something like this might be a step:
{
"queryType": "groupBy",
"dataSource": {
"type": "table",
"name": "dataTest"
},
"intervals": {
"type": "intervals",
"intervals": [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"
]
},
"filter": null,
"granularity": {
"type": "all"
},
"dimensions": [
{
"type": "default",
"dimension": "d2_ts2",
"outputType": "STRING"
},
{
"type": "default",
"dimension": "d3_email",
"outputType": "STRING"
}
],
"aggregations": [
{
"type": "count",
"name": "myCount",
}
],
"descending": false
}
I'm curious, what is the use case?
Using a SQL query you can do it this way:
SELECT UserID,
sum(1) FILTER (WHERE BlockId = 'block1') as Block1,
sum(1) FILTER (WHERE BlockId = 'block2') as Block2,
sum(1) FILTER (WHERE BlockId = 'block3') as Block3
FROM inline_data
GROUP BY 1
The Native Query for this (from the explain) is:
{
"queryType": "topN",
"dataSource": {
"type": "table",
"name": "inline_data"
},
"virtualColumns": [
{
"type": "expression",
"name": "v0",
"expression": "1",
"outputType": "LONG"
}
],
"dimension": {
"type": "default",
"dimension": "UserID",
"outputName": "d0",
"outputType": "STRING"
},
"metric": {
"type": "dimension",
"previousStop": null,
"ordering": {
"type": "lexicographic"
}
},
"threshold": 101,
"intervals": {
"type": "intervals",
"intervals": [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"
]
},
"filter": null,
"granularity": {
"type": "all"
},
"aggregations": [
{
"type": "filtered",
"aggregator": {
"type": "longSum",
"name": "a0",
"fieldName": "v0",
"expression": null
},
"filter": {
"type": "selector",
"dimension": "BlockId",
"value": "block1",
"extractionFn": null
},
"name": "a0"
},
{
"type": "filtered",
"aggregator": {
"type": "longSum",
"name": "a1",
"fieldName": "v0",
"expression": null
},
"filter": {
"type": "selector",
"dimension": "BlockId",
"value": "block2",
"extractionFn": null
},
"name": "a1"
},
{
"type": "filtered",
"aggregator": {
"type": "longSum",
"name": "a2",
"fieldName": "v0",
"expression": null
},
"filter": {
"type": "selector",
"dimension": "BlockId",
"value": "block3",
"extractionFn": null
},
"name": "a2"
}
],
"postAggregations": [],
"context": {
"populateCache": false,
"sqlOuterLimit": 101,
"sqlQueryId": "bb92e899-c127-49b0-be1b-d4b38909d166",
"useApproximateCountDistinct": false,
"useApproximateTopN": false,
"useCache": false,
"useNativeQueryExplain": true
},
"descending": false
}

AWS-API gateway -- jsonschema child object should validate when parent object exists

I need to create Jsonschema for the following JSON input. Here properties under Vehicle like( Manufacturer, Model, etc) should be required only when Vehicle object exists.
{
"Manufacturer": "",
"Characteristics": {
"Starts": "new",
"vehicle": {
"Manufacturer": "hello",
"Model": "hh",
"Opening": "",
"Quantity": "",
"Principle": "",
"Type": ""
}
}
}
I tried the following JsonSchema but this works when Vehicle object is not there but if we rename Vehicle to some other ex: Vehicle1 it doesn't give an error. Please guide me on how to fix this.
{
"$schema": "http://json-schema.org/draft-07/schema",
"type": "object",
"properties": {
"Manufacturer": {
"type": [
"string",
"null"
]
},
"Characteristics": {
"type": "object",
"properties": {
"Starts": {
"type": [
"string",
"null"
]
},
"Vehicle": {
"$ref": "#/definitions/Vehicle"
}
},
"required": [
"Starts", "Vehcle"
]
}
},
"required": [
"Manufacturer"
],
"definitions": {
"Vehicle": {
"type": "object",
"properties": {
"Manufacturer": {
"type": [
"string",
"null"
]
},
"Model": {
"type": [
"string",
"null"
]
},
"Opening": {
"type": [
"string",
"null"
]
},
"PanelQuantity": {
"type": [
"string",
"null"
]
},
"Principle": {
"type": [
"string",
"null"
]
},
"Type": {
"type": [
"string",
"null"
]
}
},
"required": ["Manufacturer", "Model", "Opening", "Quantity", "Principle", "Type"]
}
}
}
Thanks,
Bhaskar
Sounds like you want to add "additionalProperties": false -- which will generate an error if any other properties are present that aren't defined under properties.

while consuming from kafka in druid, roll up merges two rows to 1 instead of adding them

I trying to use druid to consume events from kafka, however when I'm using roll-up to consume the data, the number of events seem to coming in wrong. without roll-up the numbers are accurate. I am using Druid 0.17.1.
i have observed that while roll up is happening instead of aggregating the events to n it aggregates to 1.
here is my ingestion spec
{
"dataSchema": {
"dataSource": "notificationstatus",
"timestampSpec": {
"column": "date",
"format": "yyyy-MM-dd-HH:mm:ss Z",
"missingValue": null
},
"dimensionsSpec": {
"dimensions": [{
"type": "string",
"name": "Process",
"multiValueHandling": "SORTED_ARRAY",
"createBitmapIndex": true
},
{
"type": "string",
"name": "Channel",
"multiValueHandling": "SORTED_ARRAY",
"createBitmapIndex": true
},
{
"type": "string",
"name": "Status",
"multiValueHandling": "SORTED_ARRAY",
"createBitmapIndex": true
},
{
"type": "string",
"name": "Message",
"multiValueHandling": "SORTED_ARRAY",
"createBitmapIndex": true
},
{
"type": "string",
"name": "CampaignID",
"multiValueHandling": "SORTED_ARRAY",
"createBitmapIndex": true
},
{
"type": "string",
"name": "BannerID",
"multiValueHandling": "SORTED_ARRAY",
"createBitmapIndex": true
}
],
"dimensionExclusions": [
"date",
"count"
]
},
"metricsSpec": [{
"type": "count",
"name": "count"
}],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "HOUR",
"queryGranularity": "MINUTE",
"rollup": true,
"intervals": null
},
"transformSpec": {
"filter": {
"type": "not",
"field": {
"type": "like",
"dimension": "Status",
"pattern": "INFO",
"escape": null,
"extractionFn": null
}
},
"transforms": []
}
},
"ioConfig": {
"topic": "notificationstatus",
"inputFormat": {
"type": "tsv",
"columns": [
"source",
"ymd",
"date",
"Process",
"deviceID",
"Channel",
"CampaignID",
"BannerID",
"Status",
"Message",
"11",
"12"
],
"listDelimiter": null,
"delimiter": "\t",
"findColumnsFromHeader": false,
"skipHeaderRows": 0
},
"replicas": 1,
"taskCount": 1,
"taskDuration": "PT3600S",
"consumerProperties": {},
"pollTimeout": 100,
"startDelay": "PT5S",
"period": "PT30S",
"useEarliestOffset": false,
"completionTimeout": "PT1800S",
"lateMessageRejectionPeriod": null,
"earlyMessageRejectionPeriod": null,
"lateMessageRejectionStartDateTime": null,
"stream": "notificationstatus",
"useEarliestSequenceNumber": false,
"type": "kafka"
},
"tuningConfig": {
"type": "kafka",
"maxRowsInMemory": 1000000,
"maxBytesInMemory": 0,
"maxRowsPerSegment": 5000000,
"maxTotalRows": null,
"intermediatePersistPeriod": "PT10M",
"basePersistDirectory": "/home/akash/Downloads/druidVer/apache-druid-0.17.1/var/tmp/druid-realtime-persist622909873559398926",
"maxPendingPersists": 0,
"indexSpec": {
"bitmap": {
"type": "concise"
},
"dimensionCompression": "lz4",
"metricCompression": "lz4",
"longEncoding": "longs"
},
"indexSpecForIntermediatePersists": {
"bitmap": {
"type": "concise"
},
"dimensionCompression": "lz4",
"metricCompression": "lz4",
"longEncoding": "longs"
},
"buildV9Directly": true,
"reportParseExceptions": false,
"handoffConditionTimeout": 0,
"resetOffsetAutomatically": false,
"segmentWriteOutMediumFactory": null,
"workerThreads": null,
"chatThreads": null,
"chatRetries": 8,
"httpTimeout": "PT10S",
"shutdownTimeout": "PT80S",
"offsetFetchPeriod": "PT30S",
"intermediateHandoffPeriod": "P2147483647D",
"logParseExceptions": false,
"maxParseExceptions": 2147483647,
"maxSavedParseExceptions": 0,
"skipSequenceNumberAvailabilityCheck": false,
"repartitionTransitionDuration": "PT120S"
},
"type": "kafka"
}

how to create stream in ksql from topic with decimal type column

I want to create a stream from kafka topic that monitor a mysql table. mysql table has columns with decimal(16,4) type and when I create stream with this command:
create stream test with (KAFKA_TOPIC='dbServer.Kafka.DailyUdr',VALUE_FORMAT='AVRO');
stream created and run but columns with decimal(16,4) type don't appear in result stream.
source topic value schema:
{
"type": "record",
"name": "Envelope",
"namespace": "dbServer.Kafka.DailyUdr",
"fields": [
{
"name": "before",
"type": [
"null",
{
"type": "record",
"name": "Value",
"fields": [
{
"name": "UserId",
"type": "int"
},
{
"name": "NationalCode",
"type": "string"
},
{
"name": "TotalInputOcted",
"type": "int"
},
{
"name": "TotalOutputOcted",
"type": "int"
},
{
"name": "Date",
"type": "string"
},
{
"name": "Service",
"type": "string"
},
{
"name": "decimalCol",
"type": [
"null",
{
"type": "bytes",
"scale": 4,
"precision": 16,
"connect.version": 1,
"connect.parameters": {
"scale": "4",
"connect.decimal.precision": "16"
},
"connect.name": "org.apache.kafka.connect.data.Decimal",
"logicalType": "decimal"
}
],
"default": null
}
],
"connect.name": "dbServer.Kafka.DailyUdr.Value"
}
],
"default": null
},
{
"name": "after",
"type": [
"null",
"Value"
],
"default": null
},
{
"name": "source",
"type": {
"type": "record",
"name": "Source",
"namespace": "io.debezium.connector.mysql",
"fields": [
{
"name": "version",
"type": [
"null",
"string"
],
"default": null
},
{
"name": "connector",
"type": [
"null",
"string"
],
"default": null
},
{
"name": "name",
"type": "string"
},
{
"name": "server_id",
"type": "long"
},
{
"name": "ts_sec",
"type": "long"
},
{
"name": "gtid",
"type": [
"null",
"string"
],
"default": null
},
{
"name": "file",
"type": "string"
},
{
"name": "pos",
"type": "long"
},
{
"name": "row",
"type": "int"
},
{
"name": "snapshot",
"type": [
{
"type": "boolean",
"connect.default": false
},
"null"
],
"default": false
},
{
"name": "thread",
"type": [
"null",
"long"
],
"default": null
},
{
"name": "db",
"type": [
"null",
"string"
],
"default": null
},
{
"name": "table",
"type": [
"null",
"string"
],
"default": null
},
{
"name": "query",
"type": [
"null",
"string"
],
"default": null
}
],
"connect.name": "io.debezium.connector.mysql.Source"
}
},
{
"name": "op",
"type": "string"
},
{
"name": "ts_ms",
"type": [
"null",
"long"
],
"default": null
}
],
"connect.name": "dbServer.Kafka.DailyUdr.Envelope"
}
my problem is in decimalCol column
KSQL does not yet support DECIMAL data type.
There is an issue here that you can track and upvote if you think it would be useful.

avro.io.AvroTypeException: The datum [object] is not an example of the schema

I have been struggling through this issue quite for some time. I am working on AvroProducer(confluent kafka) and getting error related to schema defined.
Here is the complete stacktrace of the issue I am getting:
<!--language: lang-none-->
raise AvroTypeException(self.writer_schema, datum)
avro.io.AvroTypeException: The datum {'totalDifficulty': 2726165051, 'stateRoot': '0xf09bd6730b3ae7f5728836564837d7f776a8f7333628c8b84cb57d7c6d48ebba', 'sha3Uncles': '0x1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347', 'size': 538, 'logs': [], 'gasLimit': 8000000, 'mixHash': '0x410b2b19519be16496727c93515f399072ffecf06defe4913d00eb4d10bb7351', 'logsBloom': '0x00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', 'nonce': '0x18dc6c0d30839c91', 'proofOfAuthorityData': '0xd883010817846765746888676f312e31302e34856c696e7578', 'number': 5414, 'timestamp': 1552577641, 'difficulty': 589091, 'gasUsed': 0, 'miner': '0x48FA5EBc2f0D82B5D52faAe624Fa2426998ab492', 'hash': '0x71259991acb407a85befa8b3c5df26a94a11a6c08f92f3e3b7c9c0e8e1f5916d', 'transactionsRoot': '0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421', 'receiptsRoot': '0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421', 'transactions': [], 'parentHash': '0x9f0c25eeab86fc144296cb034c94857beed331936016d60c0986a35ac07d9c68', 'uncles': []} is not an example of the schema {
"type": "record",
"name": "value",
"namespace": "exporter.value.opsnetBlock",
"fields": [
{
"type": "int",
"name": "difficulty"
},
{
"type": "string",
"name": "proofOfAuthorityData"
},
{
"type": "int",
"name": "gasLimit"
},
{
"type": "int",
"name": "gasUsed"
},
{
"type": "string",
"name": "hash"
},
{
"type": "string",
"name": "logsBloom"
},
{
"type": "int",
"name": "size"
},
{
"type": "string",
"name": "miner"
},
{
"type": "string",
"name": "mixHash"
},
{
"type": "string",
"name": "nonce"
},
{
"type": "int",
"name": "number"
},
{
"type": "string",
"name": "parentHash"
},
{
"type": "string",
"name": "receiptsRoot"
},
{
"type": "string",
"name": "sha3Uncles"
},
{
"type": "string",
"name": "stateRoot"
},
{
"type": "int",
"name": "timestamp"
},
{
"type": "int",
"name": "totalDifficulty"
},
{
"type": "string",
"name": "transactionsRoot"
},
{
"type": {
"type": "array",
"items": "string"
},
"name": "transactions"
},
{
"type": {
"type": "array",
"items": "string"
},
"name": "uncles"
},
{
"type": {
"type": "array",
"items": {
"type": "record",
"name": "Child",
"namespace": "exporter.value.opsnetBlock",
"fields": [
{
"type": "string",
"name": "address"
},
{
"type": "string",
"name": "blockHash"
},
{
"type": "int",
"name": "blockNumber"
},
{
"type": "string",
"name": "data"
},
{
"type": "int",
"name": "logIndex"
},
{
"type": "boolean",
"name": "removed"
},
{
"type": {
"type": "array",
"items": "string"
},
"name": "topics"
},
{
"type": "string",
"name": "transactionHash"
},
{
"type": "int",
"name": "transactionIndex"
}
]
}
},
"name": "logs"
}
]
}
Can anybody please tell me where am I going wrong in this?
Thanks in advance