I have been struggling through this issue quite for some time. I am working on AvroProducer(confluent kafka) and getting error related to schema defined.
Here is the complete stacktrace of the issue I am getting:
<!--language: lang-none-->
raise AvroTypeException(self.writer_schema, datum)
avro.io.AvroTypeException: The datum {'totalDifficulty': 2726165051, 'stateRoot': '0xf09bd6730b3ae7f5728836564837d7f776a8f7333628c8b84cb57d7c6d48ebba', 'sha3Uncles': '0x1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347', 'size': 538, 'logs': [], 'gasLimit': 8000000, 'mixHash': '0x410b2b19519be16496727c93515f399072ffecf06defe4913d00eb4d10bb7351', 'logsBloom': '0x00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', 'nonce': '0x18dc6c0d30839c91', 'proofOfAuthorityData': '0xd883010817846765746888676f312e31302e34856c696e7578', 'number': 5414, 'timestamp': 1552577641, 'difficulty': 589091, 'gasUsed': 0, 'miner': '0x48FA5EBc2f0D82B5D52faAe624Fa2426998ab492', 'hash': '0x71259991acb407a85befa8b3c5df26a94a11a6c08f92f3e3b7c9c0e8e1f5916d', 'transactionsRoot': '0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421', 'receiptsRoot': '0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421', 'transactions': [], 'parentHash': '0x9f0c25eeab86fc144296cb034c94857beed331936016d60c0986a35ac07d9c68', 'uncles': []} is not an example of the schema {
"type": "record",
"name": "value",
"namespace": "exporter.value.opsnetBlock",
"fields": [
{
"type": "int",
"name": "difficulty"
},
{
"type": "string",
"name": "proofOfAuthorityData"
},
{
"type": "int",
"name": "gasLimit"
},
{
"type": "int",
"name": "gasUsed"
},
{
"type": "string",
"name": "hash"
},
{
"type": "string",
"name": "logsBloom"
},
{
"type": "int",
"name": "size"
},
{
"type": "string",
"name": "miner"
},
{
"type": "string",
"name": "mixHash"
},
{
"type": "string",
"name": "nonce"
},
{
"type": "int",
"name": "number"
},
{
"type": "string",
"name": "parentHash"
},
{
"type": "string",
"name": "receiptsRoot"
},
{
"type": "string",
"name": "sha3Uncles"
},
{
"type": "string",
"name": "stateRoot"
},
{
"type": "int",
"name": "timestamp"
},
{
"type": "int",
"name": "totalDifficulty"
},
{
"type": "string",
"name": "transactionsRoot"
},
{
"type": {
"type": "array",
"items": "string"
},
"name": "transactions"
},
{
"type": {
"type": "array",
"items": "string"
},
"name": "uncles"
},
{
"type": {
"type": "array",
"items": {
"type": "record",
"name": "Child",
"namespace": "exporter.value.opsnetBlock",
"fields": [
{
"type": "string",
"name": "address"
},
{
"type": "string",
"name": "blockHash"
},
{
"type": "int",
"name": "blockNumber"
},
{
"type": "string",
"name": "data"
},
{
"type": "int",
"name": "logIndex"
},
{
"type": "boolean",
"name": "removed"
},
{
"type": {
"type": "array",
"items": "string"
},
"name": "topics"
},
{
"type": "string",
"name": "transactionHash"
},
{
"type": "int",
"name": "transactionIndex"
}
]
}
},
"name": "logs"
}
]
}
Can anybody please tell me where am I going wrong in this?
Thanks in advance
I have below JSON and I'm parsing it using play-json. Somehow "datafeeds/schema/fields" Node is not getting properly parsed.
I have created standard reads to parse this Json but "datafeeds" node seems not to be parsing correctly due to "format"(datafeeds/schema/fields) node being String or JsObject sometime and same goes for the "type" node.
If I consider Schema as JsObject then whole Json get parsed correctly and seems I then have to process Schema separately.
My Json looks like this
{
"entities": [
{
"name": "customers",
"number_of_buckets": 5,
"entity_column_name": "customer_id",
"entity_column_type": "integer"
},
{
"name": "accounts",
"number_of_buckets": 7,
"entity_column_name": "account_id",
"entity_column_type": "string"
},
{
"name": "products",
"number_of_buckets": 1,
"entity_column_name": "product_id",
"entity_column_type": "integer"
}
],
"datafeeds": [
{
"name": "customer_demographics",
"version": "1",
"delimiter": "|",
"filename_re_pattern": ".*(customer_demographics_v1_[0-9]{8}\\.psv)$",
"frequency": {
"days": 1
},
"from": "2015-07-01",
"drop_threshold": {
"rows": null,
"percentage": 0.05
},
"dry_run": false,
"header": true,
"text_qualifier": null,
"landing_path": "landing",
"schema": {
"fields": [
{
"time_key": true,
"format": "yyyy-MM-dd",
"metadata": {},
"name": "record_date",
"nullable": false,
"primary_key": true,
"type": "timestamp",
"timezone": "Australia/Sydney"
},
{
"format": "yyyy-MM-dd",
"metadata": {},
"name": "extract_date",
"nullable": false,
"primary_key": true,
"type": "timestamp",
"timezone": "Australia/Sydney"
},
{
"entity_type": "customers",
"metadata": {},
"name": "customer_id",
"nullable": false,
"primary_key": true,
"type": "integer"
},
{
"metadata": {},
"name": "year_of_birth",
"nullable": true,
"type": "integer"
},
{
"metadata": {},
"name": "month_of_birth",
"nullable": true,
"type": "integer"
},
{
"metadata": {},
"name": "postcode",
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "state",
"nullable": true,
"type": "string"
},
{
"format": {
"false": "N",
"true": "Y"
},
"metadata": {},
"name": "marketing_consent",
"nullable": true,
"type": "boolean"
}
],
"type": "struct"
}
},
{
"name": "customer_statistics",
"version": "1",
"delimiter": "|",
"filename_re_pattern": ".*(customer_statistics_v1_[0-9]{8}\\.psv)$",
"frequency": {
"days": 1
},
"from": "2015-07-01",
"drop_threshold": {
"rows": null,
"percentage": 0.05
},
"dry_run": false,
"header": true,
"text_qualifier": null,
"landing_path": "landing",
"schema": {
"fields": [
{
"time_key": true,
"format": "yyyy-MM-dd",
"metadata": {},
"name": "record_date",
"nullable": false,
"primary_key": true,
"type": "timestamp",
"timezone": "Australia/Sydney"
},
{
"format": "yyyy-MM-dd",
"metadata": {},
"name": "extract_date",
"nullable": false,
"primary_key": true,
"type": "timestamp",
"timezone": "Australia/Sydney"
},
{
"entity_type": "customers",
"metadata": {},
"name": "customer_id",
"nullable": false,
"primary_key": true,
"type": "integer"
},
{
"metadata": {},
"name": "risk_score",
"nullable": true,
"type": "double"
},
{
"metadata": {},
"name": "mkg_segments",
"nullable": true,
"type": {
"type":"array",
"elementType":"string",
"containsNull": false
}
},
{
"metadata": {},
"name": "avg_balance",
"nullable": true,
"type": "decimal"
},
{
"metadata": {},
"name": "num_accounts",
"nullable": true,
"type": "integer"
}
],
"type": "struct"
}
}
],
"tables": [
{
"name": "table_name",
"version": "version",
"augmentations": [
{
"left_table_name": "left_table_name",
"left_table_version": "v1",
"right_table_name": "right_table_name",
"right_table_version": "v1",
"columns": [
"column_a",
"column_b",
"column_c"
],
"join_cols": [
{
"left_table": "system_code",
"right_table": "key_a"
},
{
"left_table": "group_product_code",
"right_table": "key_b"
},
{
"left_table": "sub_product_code",
"right_table": "key_c"
}
]
}
],
"sources": [
{
"name": "table_name",
"version": "v1",
"mandatory": true,
"type": "datafeed | table"
}
],
"aggregations": [
{
"column_name": "customer_age_customer_age",
"column_type": "long",
"description": "date_diff",
"expression": "max_by",
"source_columns": [
{
"column_name": "customer_age_year_of_birth",
"source": {
"name": "customers",
"type": "table",
"version": "v1"
}
},
{
"column_name": "customer_age_month_of_birth",
"source": {
"name": "customers",
"type": "table",
"version": "v1"
}
}
]
}
],
"column_level_transformations": [
{
"column_name": "column_added",
"column_type": "long",
"description": "adding two columns to return something else",
"expression": "column_a+column_b",
"source_columns": [
{
"column_name": "column_a",
"source": {
"name": "source_a",
"type": "table",
"version": "v1"
}
},
{
"column_name": "column_b",
"source": {
"name": "source_b",
"type": "table",
"version": "v1"
}
}
]
}
],
"frequency": {
"months": 1
},
"joins": [
{
"name": "table_name",
"version": "v1"
},
{
"name": "table_name_b",
"version": "v2"
}
],
"from": "2015-07-01",
"format": "parquet",
"structure": "primitives",
"index_query": "sql statement",
"insert_query": "sql statement"
}
]
}
Any idea how to parse this Json?
Edit: updated to answer the updated question
I'm not sure how you're parsing now, but you can try this:
import play.api.libs.json.Reads._
import play.api.libs.json._
case class Frequency(days: Int)
case class DropThreshold(
rows: Option[Int], //guessing type here
percentage: Double
)
case class Format(`false`: String, `true`: String)
case class Type(`type`: String, elementType: String, containsNull: Boolean)
case class Field(
entity_type: Option[String],
time_key: Option[Boolean],
format: Option[Either[String, Format]],
metadata: Option[JsObject],
name: Option[String],
nullable: Option[Boolean],
primary_key: Option[Boolean],
`type`: Option[Either[String, Type]],
timezone: Option[String]
)
case class Schema(fields: Seq[Field])
case class Datafeed(
name: String,
version: String,
delimiter: String,
filename_re_pattern: String,
frequency: Frequency,
from: String,
drop_threshold: DropThreshold,
dry_run: Boolean,
header: Boolean,
text_qualifier: Option[String], //guessing type here
landing_path: String,
schema: Schema
)
case class Entity(name: String, number_of_buckets: Int, entity_column_name: String, entity_column_type: String)
case class MyJson(entities: Seq[Entity], datafeeds: Seq[Datafeed])
implicit def eitherReads[A, B](implicit A: Reads[A], B: Reads[B]): Reads[Either[A, B]] = Reads[Either[A, B]] { json =>
A.reads(json) match {
case JsSuccess(value, path) => JsSuccess(Left(value), path)
case JsError(e1) => B.reads(json) match {
case JsSuccess(value, path) => JsSuccess(Right(value), path)
case JsError(e2) => JsError(JsError.merge(e1, e2))
}
}
}
implicit val frequencyReads: Reads[Frequency] = Json.reads[Frequency]
implicit val dropThresholdReads: Reads[DropThreshold] = Json.reads[DropThreshold]
implicit val formatReads: Reads[Format] = Json.reads[Format]
implicit val typeReads: Reads[Type] = Json.reads[Type]
implicit val fieldReads: Reads[Field] = Json.reads[Field]
implicit val schemaReads: Reads[Schema] = Json.reads[Schema]
implicit val datafeedReads: Reads[Datafeed] = Json.reads[Datafeed]
implicit val entityReads: Reads[Entity] = Json.reads[Entity]
implicit val myJsonReads: Reads[MyJson] = Json.reads[MyJson]
With the Either Reads copied from here. To test:
scala> val json = Json.parse("""{"entities": [{"name": "customers","number_of_buckets": 5,"entity_column_name": "customer_id","entity_column_type": "integer"},{"name": "accounts","number_of_buckets": 7,"entity_column_name": "account_id","entity_column_type": "string"},{"name": "products","number_of_buckets": 1,"entity_column_name": "product_id","entity_column_type": "integer"}],"datafeeds": [{"name": "customer_demographics","version": "1","delimiter": "|","filename_re_pattern": ".*(customer_demographics_v1_[0-9]{8}\\.psv)$","frequency": {"days": 1},"from": "2015-07-01","drop_threshold": {"rows": null,"percentage": 0.05},"dry_run": false,"header": true,"text_qualifier": null,"landing_path": "landing","schema": {"fields": [{"time_key": true,"format": "yyyy-MM-dd","metadata": {},"name": "record_date","nullable": false,"primary_key": true,"type": "timestamp","timezone": "Australia/Sydney"},{"format": "yyyy-MM-dd","metadata": {},"name": "extract_date","nullable": false,"primary_key": true,"type": "timestamp","timezone": "Australia/Sydney"},{"entity_type": "customers","metadata": {},"name": "customer_id","nullable": false,"primary_key": true,"type": "integer"},{"metadata": {},"name": "year_of_birth","nullable": true,"type": "integer"},{"metadata": {},"name": "month_of_birth","nullable": true,"type": "integer"},{"metadata": {},"name": "postcode","nullable": true,"type": "string"},{"metadata": {},"name": "state","nullable": true,"type": "string"},{"format": {"false": "N","true": "Y"},"metadata": {},"name": "marketing_consent","nullable": true,"type": "boolean"}],"type": "struct"}},{"name": "customer_statistics","version": "1","delimiter": "|","filename_re_pattern": ".*(customer_statistics_v1_[0-9]{8}\\.psv)$","frequency": {"days": 1},"from": "2015-07-01","drop_threshold": {"rows": null,"percentage": 0.05},"dry_run": false,"header": true,"text_qualifier": null,"landing_path": "landing","schema": {"fields": [{"time_key": true,"format": "yyyy-MM-dd","metadata": {},"name": "record_date","nullable": false,"primary_key": true,"type": "timestamp","timezone": "Australia/Sydney"},{"format": "yyyy-MM-dd","metadata": {},"name": "extract_date","nullable": false,"primary_key": true,"type": "timestamp","timezone": "Australia/Sydney"},{"entity_type": "customers","metadata": {},"name": "customer_id","nullable": false,"primary_key": true,"type": "integer"},{"metadata": {},"name": "risk_score","nullable": true,"type": "double"},{"metadata": {},"name": "mkg_segments","nullable": true,"type": {"type":"array","elementType":"string","containsNull": false}},{"metadata": {},"name": "avg_balance","nullable": true,"type": "decimal"},{"metadata": {},"name": "num_accounts","nullable": true,"type": "integer"}],"type": "struct"}}],"tables": [{"name": "table_name","version": "version","augmentations": [{"left_table_name": "left_table_name","left_table_version": "v1","right_table_name": "right_table_name","right_table_version": "v1","columns": ["column_a","column_b","column_c"],"join_cols": [{"left_table": "system_code","right_table": "key_a"},{"left_table": "group_product_code","right_table": "key_b"},{"left_table": "sub_product_code","right_table": "key_c"}]}],"sources": [{"name": "table_name","version": "v1","mandatory": true,"type": "datafeed | table"}],"aggregations": [{"column_name": "customer_age_customer_age","column_type": "long","description": "date_diff","expression": "max_by","source_columns": [{"column_name": "customer_age_year_of_birth","source": {"name": "customers","type": "table","version": "v1"}},{"column_name": "customer_age_month_of_birth","source": {"name": "customers","type": "table","version": "v1"}}]}],"column_level_transformations": [{"column_name": "column_added","column_type": "long","description": "adding two columns to return something else","expression": "column_a+column_b","source_columns": [{"column_name": "column_a","source": {"name": "source_a","type": "table","version": "v1"}},{"column_name": "column_b","source": {"name": "source_b","type": "table","version": "v1"}}]}],"frequency": {"months": 1},"joins": [{"name": "table_name","version": "v1"},{"name": "table_name_b","version": "v2"}],"from": "2015-07-01","format": "parquet","structure": "primitives","index_query": "sql statement","insert_query": "sql statement"}]}""")
json: play.api.libs.json.JsValue = {"entities":[{"name":"customers","number_of_buckets":5,"entity_column_name":"customer_id","entity_column_type":"integer"},{"name":"accounts","number_of_buckets":7,"entity_column_name":"account_id","entity_column_type":"string"},{"name":"products","number_of_buckets":1,"entity_column_name":"product_id","entity_column_type":"integer"}],"datafeeds":[{"name":"customer_demographics","version":"1","delimiter":"|","filename_re_pattern":".*(customer_demographics_v1_[0-9]{8}\\.psv)$","frequency":{"days":1},"from":"2015-07-01","drop_threshold":{"rows":null,"percentage":0.05},"dry_run":false,"header":true,"text_qualifier":null,"landing_path":"landing","schema":{"fields":[{"time_key":true,"format":"yyyy-MM-dd","metadata":{},"name":"record...
scala> json.validate[MyJson]
res0: play.api.libs.json.JsResult[MyJson] = JsSuccess(MyJson(List(Entity(customers,5,customer_id,integer), Entity(accounts,7,account_id,string), Entity(products,1,product_id,integer)),List(Datafeed(customer_demographics,1,|,.*(customer_demographics_v1_[0-9]{8}\.psv)$,Frequency(1),2015-07-01,DropThreshold(None,0.05),false,true,None,landing,Schema(List(Field(None,Some(true),Some(Left(yyyy-MM-dd)),Some({}),Some(record_date),Some(false),Some(true),Some(Left(timestamp)),Some(Australia/Sydney)), Field(None,None,Some(Left(yyyy-MM-dd)),Some({}),Some(extract_date),Some(false),Some(true),Some(Left(timestamp)),Some(Australia/Sydney)), Field(Some(customers),None,None,Some({}),Some(customer_id),Some(false),Some(true),Some(Left(integer)),None), Field(None,None,None,Some({}),...
Remember to set any optional or nullable fields to an Option type.