Related
I am trying to convert a json to avro using 'kafka-avro-console-producer' and publish it to kafka topic.
I am able to do that flat json/schema's but for below given schema and json I am getting
"org.apache.avro.AvroTypeException: Unknown union branch EventId" error.
Any help would be appreciated.
Schema :
{
"type": "record",
"name": "Envelope",
"namespace": "CoreOLTPEvents.dbo.Event",
"fields": [{
"name": "before",
"type": ["null", {
"type": "record",
"name": "Value",
"fields": [{
"name": "EventId",
"type": "long"
}, {
"name": "CameraId",
"type": ["null", "long"],
"default": null
}, {
"name": "SiteId",
"type": ["null", "long"],
"default": null
}],
"connect.name": "CoreOLTPEvents.dbo.Event.Value"
}],
"default": null
}, {
"name": "after",
"type": ["null", "Value"],
"default": null
}, {
"name": "op",
"type": "string"
}, {
"name": "ts_ms",
"type": ["null", "long"],
"default": null
}],
"connect.name": "CoreOLTPEvents.dbo.Event.Envelope"
}
And Json input is like below :
{
"before": null,
"after": {
"EventId": 12,
"CameraId": 10,
"SiteId": 11974
},
"op": "C",
"ts_ms": null
}
And in my case I cant alter schema, I can alter only json such a way that it works
If you are using the Avro JSON format, the input you have is slightly off. For unions, non-null values need to be specified such that the type information is listed: https://avro.apache.org/docs/current/spec.html#json_encoding
See below for an example which I think should work.
{
"before": null,
"after": {
"CoreOLTPEvents.dbo.Event.Value": {
"EventId": 12,
"CameraId": {
"long": 10
},
"SiteId": {
"long": 11974
}
}
},
"op": "C",
"ts_ms": null
}
Removing "connect.name": "CoreOLTPEvents.dbo.Event.Value" and "connect.name": "CoreOLTPEvents.dbo.Event.Envelope" as The RecordType can only contains {'namespace', 'aliases', 'fields', 'name', 'type', 'doc'} keys.
Could you try with below schema and see if you are able to produce the msg?
{
"type": "record",
"name": "Envelope",
"namespace": "CoreOLTPEvents.dbo.Event",
"fields": [
{
"name": "before",
"type": [
"null",
{
"type": "record",
"name": "Value",
"fields": [
{
"name": "EventId",
"type": "long"
},
{
"name": "CameraId",
"type": [
"null",
"long"
],
"default": "null"
},
{
"name": "SiteId",
"type": [
"null",
"long"
],
"default": "null"
}
]
}
],
"default": null
},
{
"name": "after",
"type": [
"null",
"Value"
],
"default": null
},
{
"name": "op",
"type": "string"
},
{
"name": "ts_ms",
"type": [
"null",
"long"
],
"default": null
}
]
}
I am creating a DataFusion pipeline to ingest a CSV file from s3 bucket, applying wrangler directives and storing it in GCS bucket. The input CSV file had 18 columns. However, the output CSV file has only 8 columns. I have a doubt that this could be due to the CSV encoding format, but I am not sure. What could be the reason here?
Pipeline JSON
{
"name": "aws_fusion_v1",
"description": "Data Pipeline Application",
"artifact": {
"name": "cdap-data-pipeline",
"version": "6.1.2",
"scope": "SYSTEM"
},
"config": {
"resources": {
"memoryMB": 2048,
"virtualCores": 1
},
"driverResources": {
"memoryMB": 2048,
"virtualCores": 1
},
"connections": [
{
"from": "Amazon S3",
"to": "Wrangler"
},
{
"from": "Wrangler",
"to": "GCS2"
},
{
"from": "Argument Setter",
"to": "Amazon S3"
}
],
"comments": [],
"postActions": [],
"properties": {},
"processTimingEnabled": true,
"stageLoggingEnabled": true,
"stages": [
{
"name": "Amazon S3",
"plugin": {
"name": "S3",
"type": "batchsource",
"label": "Amazon S3",
"artifact": {
"name": "amazon-s3-plugins",
"version": "1.11.0",
"scope": "SYSTEM"
},
"properties": {
"format": "text",
"authenticationMethod": "Access Credentials",
"filenameOnly": "false",
"recursive": "false",
"ignoreNonExistingFolders": "false",
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"body\",\"type\":\"string\"}]}",
"referenceName": "aws_source",
"path": "${input.bucket}",
"accessID": "${input.access_id}",
"accessKey": "${input.access_key}"
}
},
"outputSchema": [
{
"name": "etlSchemaBody",
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"body\",\"type\":\"string\"}]}"
}
],
"type": "batchsource",
"label": "Amazon S3",
"icon": "icon-s3"
},
{
"name": "Wrangler",
"plugin": {
"name": "Wrangler",
"type": "transform",
"label": "Wrangler",
"artifact": {
"name": "wrangler-transform",
"version": "4.1.5",
"scope": "SYSTEM"
},
"properties": {
"field": "*",
"precondition": "false",
"threshold": "1",
"workspaceId": "804a2995-7c06-4ab2-b342-a9a01aa03a3d",
"schema": "${output.schema}",
"directives": "${directive}"
}
},
"outputSchema": [
{
"name": "etlSchemaBody",
"schema": "${output.schema}"
}
],
"inputSchema": [
{
"name": "Amazon S3",
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"body\",\"type\":\"string\"}]}"
}
],
"type": "transform",
"label": "Wrangler",
"icon": "icon-DataPreparation"
},
{
"name": "GCS2",
"plugin": {
"name": "GCS",
"type": "batchsink",
"label": "GCS2",
"artifact": {
"name": "google-cloud",
"version": "0.14.2",
"scope": "SYSTEM"
},
"properties": {
"project": "auto-detect",
"suffix": "yyyy-MM-dd-HH-mm",
"format": "csv",
"serviceFilePath": "auto-detect",
"location": "us",
"referenceName": "gcs_sink",
"path": "${output.path}",
"schema": "${output.schema}"
}
},
"outputSchema": [
{
"name": "etlSchemaBody",
"schema": "${output.schema}"
}
],
"inputSchema": [
{
"name": "Wrangler",
"schema": ""
}
],
"type": "batchsink",
"label": "GCS2",
"icon": "fa-plug"
},
{
"name": "Argument Setter",
"plugin": {
"name": "ArgumentSetter",
"type": "action",
"label": "Argument Setter",
"artifact": {
"name": "argument-setter-plugins",
"version": "1.1.1",
"scope": "USER"
},
"properties": {
"method": "GET",
"connectTimeout": "60000",
"readTimeout": "60000",
"numRetries": "0",
"followRedirects": "true",
"url": "${argfile}"
}
},
"outputSchema": [
{
"name": "etlSchemaBody",
"schema": ""
}
],
"type": "action",
"label": "Argument Setter",
"icon": "fa-plug"
}
],
"schedule": "0 * * * *",
"engine": "spark",
"numOfRecordsPreview": 100,
"description": "Data Pipeline Application",
"maxConcurrentRuns": 1
}
}
Edit:
The missing columns in the output file were due to spaces in the column names. But I am facing another issue. In wrangler, when I pass a directive as
"parse-as-csv :body ',' false", the output file is empty. But when I pass something like "parse-as-csv :body ',' true", the output file has all the data without header as expected.
I'm trying to perform Header validation for incoming GET request. I referred the Camel JSON schema validator component and followed below steps to implement in my project i.e.
Adding camel-json-validator-starter dependency in build.gradle
Adding Employee.json (YAML converted to JSON) in Resource folder of my Spring boot project. Here initially I had Open API 3.0 yaml specification file and I converted the same to json
Invoking validation with below code
rest(/employee).id("get-employee")
.produces(JSON_MEDIA_TYPE)
.get()
.description("The employee API")
.outType(EmployeeResponse.class)
.responseMessage()
.code(HttpStatus.OK.toString())
.message("Get Employee")
.endResponseMessage()
.route()
.to("json-validator:openapi.json")
.to("bean:employeeService?method=getEmployee()");
Running the project throws a org.apache.camel.component.jsonvalidator.NoJsonBodyValidationException, I'm using GET request but why is it expecting Request body, I just wanted to validate the Headers and request param from the incoming request. I'm not sure if my approach is right and what I'm missing.
I ran into this problem last year when adopting OpenAPI and came to the conclusion that it was too much work. I could not get FULL validation from the JSON validator using OpenAPI because there was some differences between the way OpenAPI declares schema definitions and the full JSON schema definitions.
Looking a the documentation of the JSON validation component you find this:
The JSON Schema Validator component performs bean validation of the message body against JSON Schemas v4 draft using the NetworkNT JSON Schema library (https://github.com/networknt/json-schema-validator). This is a full stand alone JSON Schema and if you read the github pages you find this.
OpenAPI Support
The OpenAPI 3.0 specification is using JSON schema to validate the request/response, but there are some differences. With a configuration file, you can enable the library to work with OpenAPI 3.0 validation.
OpenAPI schema appears to be a subset of the real JSON Schema.
Before I show you a more detailed example. Look at the example given in the camel documentation here: https://camel.apache.org/components/latest/json-validator-component.html. Compare that json schema file with the openAPI schema definitions and you will see they are not the same.
A useful tool here is https://jsonschema.net you can paste your json example here and infer a schema. I use this tool and the OpenAPI Pet Store example in the example below,
OpenAPI Petstore Pet Object Example:
{
"id": 0,
"category": {
"id": 0,
"name": "string"
},
"name": "doggie",
"photoUrls": [
"string"
],
"tags": [
{
"id": 0,
"name": "string"
}
],
"status": "available"
}
The openAPI specification saved in JSON produces this definition:
"Pet": {
"type": "object",
"required": [
"name",
"photoUrls"
],
"properties": {
"id": {
"type": "integer",
"format": "int64"
},
"category": {
"$ref": "#/definitions/Category"
},
"name": {
"type": "string",
"example": "doggie"
},
"photoUrls": {
"type": "array",
"xml": {
"name": "photoUrl",
"wrapped": true
},
"items": {
"type": "string"
}
},
"tags": {
"type": "array",
"xml": {
"name": "tag",
"wrapped": true
},
"items": {
"$ref": "#/definitions/Tag"
}
},
"status": {
"type": "string",
"description": "pet status in the store",
"enum": [
"available",
"pending",
"sold"
]
}
},
"xml": {
"name": "Pet"
}
}
When I convert this to proper JSON schema syntax the JSON Schema looks like this:
{
"definitions": {},
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "http://example.com/root.json",
"type": "object",
"title": "The Root Schema",
"required": [
"id",
"category",
"name",
"photoUrls",
"tags",
"status"
],
"properties": {
"id": {
"$id": "#/properties/id",
"type": "integer",
"title": "The Id Schema",
"default": 0,
"examples": [
0
]
},
"category": {
"$id": "#/properties/category",
"type": "object",
"title": "The Category Schema",
"required": [
"id",
"name"
],
"properties": {
"id": {
"$id": "#/properties/category/properties/id",
"type": "integer",
"title": "The Id Schema",
"default": 0,
"examples": [
0
]
},
"name": {
"$id": "#/properties/category/properties/name",
"type": "string",
"title": "The Name Schema",
"default": "",
"examples": [
"string"
],
"pattern": "^(.*)$"
}
}
},
"name": {
"$id": "#/properties/name",
"type": "string",
"title": "The Name Schema",
"default": "",
"examples": [
"doggie"
],
"pattern": "^(.*)$"
},
"photoUrls": {
"$id": "#/properties/photoUrls",
"type": "array",
"title": "The Photourls Schema",
"items": {
"$id": "#/properties/photoUrls/items",
"type": "string",
"title": "The Items Schema",
"default": "",
"examples": [
"string"
],
"pattern": "^(.*)$"
}
},
"tags": {
"$id": "#/properties/tags",
"type": "array",
"title": "The Tags Schema",
"items": {
"$id": "#/properties/tags/items",
"type": "object",
"title": "The Items Schema",
"required": [
"id",
"name"
],
"properties": {
"id": {
"$id": "#/properties/tags/items/properties/id",
"type": "integer",
"title": "The Id Schema",
"default": 0,
"examples": [
0
]
},
"name": {
"$id": "#/properties/tags/items/properties/name",
"type": "string",
"title": "The Name Schema",
"default": "",
"examples": [
"string"
],
"pattern": "^(.*)$"
}
}
}
},
"status": {
"$id": "#/properties/status",
"type": "string",
"title": "The Status Schema",
"default": "",
"examples": [
"available"
],
"pattern": "^(.*)$"
}
}
}
There is some differences between OpenAPI's schema definition and JSON Schema definition.
failOnNullBody (producer) - Whether to fail if no body exists.
Default is true
Try setting the option in your call:
.to("json-validator:openapi.json?failOnNullBody=false")
I have followed the official guide about entity creation/update and subscribtion in Orion, they are working and I get success responses. But Orion doesnt send notifications to Cygnus.
Am i missing some step here?
These are the basic scripts I am using:
create entity
{
"contextElements": [{
"type": "Room",
"isPattern": "false",
"id": "2FebRoom",
"attributes": [{
"name": "temperature",
"type": "float",
"value": "888"
}]
}],
"updateAction": "APPEND"
}
subscribe entity http://orion.lab.fiware.org:1026/v1/subscribeContext
{
"entities": [
{
"type": "Room",
"isPattern": "false",
"id": "2FebRoom"
}
],
"attributes": [
"temperature"
],
"reference": "http://A.B.C.D:5050/notify",
"duration": "P1M",
"notifyConditions": [
{
"type": "ONCHANGE",
"condValues": [
"temperature"
]
}
],
"throttling": "PT5S"
}
update entity
{
"contextElements": [
{
"type": "Room",
"isPattern": "false",
"id": "2FebRoom",
"attributes": [
{
"name": "temperature",
"type": "float",
"value": "111"
}
]
}
],
"updateAction": "UPDATE"
}
I can query the new value in Orion after the update operation but Cygnus doesnt receive any notification, what would be the problem?
Many thanks
Problem was caused by a temporal outage in outgoing notifications from orion.lab.fiware.org. The problem has been solved.
I am using jira rest api's in my application.
I have found the api for getting the meta-data for creating jira issue but that API doesn't return default values of the fields for example :-
This is the request :-
http://kelpie9:8081/rest/api/latest/issue/createmeta?projectKeys=QA&issuetypeNames=Bug&expand=project.issuetypes.fields
the default value of priority field is set to "major" and the description of priority is also customized but the return from api is:-
{
"expand": "projects",
"projects": [
{
"expand": "issuetypes",
"self": "http://kelpie9:8081/rest/api/2/project/QA",
"id": "10010",
"key": "QA",
"name": "QA",
"avatarUrls": {
"16x16": "http://kelpie9:8081/secure/projectavatar?size=small&pid=10010&avatarId=10011",
"48x48": "http://kelpie9:8081/secure/projectavatar?pid=10010&avatarId=10011"
},
"issuetypes": [
{
"expand": "fields",
"self": "http://kelpie9:8081/rest/api/2/issuetype/1",
"id": 1,
"name": "Bug",
"iconUrl": "http://kelpie9:8081/images/icons/bug.gif",
"fields": {
"summary": {
"required": true,
"schema": {
"type": "string",
"system": "summary"
},
"operations": [
"set"
]
},
"timetracking": {
"required": false,
"operations": [ ]
},
"issuetype": {
"required": true,
"schema": {
"type": "issuetype",
"system": "issuetype"
},
"operations": [ ],
"allowedValues": [
{
"id": "1",
"name": "Bug",
"description": "A problem which impairs or prevents the functions of the product.",
"iconUrl": "http://kelpie9:8081/images/icons/bug.gif"
}
]
},
"priority": {
"required": false,
"schema": {
"type": "priority",
"system": "priority"
},
"name": "Priority",
"operations": [
"set"
],
"allowedValues": [
{
"self": "http://172.19.30.101:18080/rest/api/2/priority/1",
"iconUrl": "http://172.19.30.101:18080/images/icons/priority_blocker.gif",
"name": "Blocker",
"id": "1"
},
{
"self": "http://172.19.30.101:18080/rest/api/2/priority/2",
"iconUrl": "http://172.19.30.101:18080/images/icons/priority_critical.gif",
"name": "Critical",
"id": "2"
},
{
"self": "http://172.19.30.101:18080/rest/api/2/priority/3",
"iconUrl": "http://172.19.30.101:18080/images/icons/priority_major.gif",
"name": "Major",
"id": "3"
},
{
"self": "http://172.19.30.101:18080/rest/api/2/priority/4",
"iconUrl": "http://172.19.30.101:18080/images/icons/priority_minor.gif",
"name": "Minor",
"id": "4"
},
{
"self": "http://172.19.30.101:18080/rest/api/2/priority/5",
"iconUrl": "http://172.19.30.101:18080/images/icons/priority_trivial.gif",
"name": "Trivial",
"id": "5"
}
]
},
"customfield_10080": {
"required": false,
"schema": {
"type": "array",
"items": "string",
"custom": "com.atlassian.jira.plugin.system.customfieldtypes:labels",
"customId": 10080
},
"operations": [ ]
},
"customfield_10010": {
"required": false,
"schema": {
"type": "array",
"items": "string",
"custom": "com.atlassian.jira.plugin.system.customfieldtypes:labels",
"customId": 10010
},
"operations": [ ]
},
"customfield_10071": {
"required": false,
"schema": {
"type": "array",
"items": "string",
"custom": "com.atlassian.jira.plugin.system.customfieldtypes:textfield",
"customId": 10071
},
"operations": [ ]
}
}
}
]
}
]
}
There is nothing like default value or description in priority field, how will I get those values?