JOLT spec for ternary operation - jolt

I am trying to write a jolt spec for conversion of the following input in the expected output mentioned below
INPUT :
{
"city": "Seattle",
"state": "WA",
"country": "US",
"date": "10/20/2018",
"userList": [
{
"name": "David",
"age": "22",
"sex": "M",
"company": "good"
},
{
"name": "Tom",
"age": "30",
"sex": "M",
"company": "good"
},
{
"name": "Annie",
"age": "25",
"sex": "F",
"company": "bad"
},
{
"name": "Aaron",
"age": "27",
"sex": "M",
"company": "bad"
}
]
}
EXPECTED OUTPUT:
{
"users" : [ {
"date" : "10/20/2018",
"username" : "David",
"age" : "22",
"sex" : "M",
"organization" : "good"
}, {
"date" : "10/20/2018",
"username" : "Tom",
"age" : "30",
"sex" : "M",
"organization" : "good"
} ],
"Date" : "10/20/2018",
"State" : "WA",
"Country" : "US"
}
I want to filter out all the elements in the list where company = bad or sex = F.Or alternatively keep only the elements where company = good and sex=M.
I need help in removing the elements from the list based on the specific conditions.
Is jolt recommended for such data driven conversions?
The Spec that I have written so far is
[{
"operation": "shift",
"spec": {
"userList": {
"*": {
"name": "users.[&1].username",
"age": "users.[&1].age",
"sex": "users.[&1].sex",
"company": "users.[&1].organization",
"#(2,date)": "users.[&1].date"
}
},
"date": "Date",
"state": "State",
"country": "Country"
}
}
]

ok, here is the process I followed to come up to a solution for the "take all M+good"
copy top level element as-is, not impacting the scope
for users content, go to the first field used for filtering "users -> * -> sex -> M" and copy all fields in that case
do the same for the other field used for filtering
in step 2 and 3, I used a map to avoid the "null" in the array. Now switching back to array
[{
"operation": "shift",
"spec": {
"userList": {
"*": {
"name": "users.[&1].username",
"age": "users.[&1].age",
"sex": "users.[&1].sex",
"company": "users.[&1].organization",
"#(2,date)": "users.[&1].date"
}
},
"date": "Date",
"state": "State",
"country": "Country"
}
}
,
{
"operation": "shift",
"spec": {
"*": "&",
"users": {
"*": {
"sex": {
"M": {
"#(2,username)": "users.&3.username",
"#(2,age)": "users.&3.age",
"#(2,sex)": "users.&3.sex",
"#(2,organization)": "users.&3.organization",
"#(2,date)": "users.&3.date"
}
}
}
}
}
}
,
{
"operation": "shift",
"spec": {
"*": "&",
"users": {
"*": {
"organization": {
"good": {
"#(2,username)": "users.&3.username",
"#(2,age)": "users.&3.age",
"#(2,sex)": "users.&3.sex",
"#(2,organization)": "users.&3.organization",
"#(2,date)": "users.&3.date"
}
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": "&",
"users": {
"*": "users[]"
}
}
}
]
For the other case, you just have to change the filter rules to do nothing when the rule match and copy the field in the other cases (*). Which give following spec (did it only on first field, updating for second field should not be an issue)
[{
"operation": "shift",
"spec": {
"userList": {
"*": {
"name": "users.[&1].username",
"age": "users.[&1].age",
"sex": "users.[&1].sex",
"company": "users.[&1].organization",
"#(2,date)": "users.[&1].date"
}
},
"date": "Date",
"state": "State",
"country": "Country"
}
}
,
{
"operation": "shift",
"spec": {
"*": "&",
"users": {
"*": {
"sex": {
"F": null,
"*": {
"#(2,username)": "users.&3.username",
"#(2,age)": "users.&3.age",
"#(2,sex)": "users.&3.sex",
"#(2,organization)": "users.&3.organization",
"#(2,date)": "users.&3.date"
}
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": "&",
"users": {
"*": "users[]"
}
}
}
]

Related

jolt transformation : remove duplicates from json (list elements)

I am looking for some inputs to remove duplicates from a json string similar to below sample :
Sample Input :
{
"profile": true,
"address": [
"12",
"23"
],
"zipCodes": [
"12345",
"56789",
"12345",
"56789"
],
"phoneNumber": [
"87857",
"927465",
"274894",
"87857"
],
"userName": [
"ABC",
"PQR",
"ABC"
],
"enableEmailNot": "No"
}
Expected Output :
{
"profile": true,
"zipCodes": [
"12345",
"56789"
],
"phoneNumber": [
"87857",
"927465",
"274894"
],
"userName": [
"ABC",
"PQR"
],
"address": [
"12",
"23"
],
"enableEmailNot": "No"
}
Thanks for the help
You can use following specs
[
{
// exchange key-value pairs while seperating lists and non-lists by prefixing "xx" non-lists
"operation": "shift",
"spec": {
"profile": "xx.&",
"*": {
"*": {
"$": "&2.#(0)"
}
},
"enableEmailNot": "xx.&"
}
},
{
// pick the first components of lists
"operation": "cardinality",
"spec": {
"*": {
"*": "ONE"
}
}
},
{
// reform the lists through use of "$" operator
"operation": "shift",
"spec": {
"xx": {
"*": "&"
},
"*": {
"*": {
"$": "&2[]"
}
}
}
}
]

JOLT Nested if condition

I have this input JSON:
{
"user": "123456",
"product": "television",
"category": "electronics",
"tag": "summer"
}
And this transformation:
[
{
"operation": "shift",
"spec": {
"product": {
"#(1,product)": "item",
"#(1,user)": {
"#2": "userBias"
}
},
"user": {
"#(1,user)": "user"
},
"category": {
"#category": "rules.[0].name",
"#(1,category)": "rules.[0].values[0]"
},
"tag": {
"rules": "rules",
"#tag": "rules.[1].name",
"#(2,tag)": "rules.[1].values[0]"
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"userBias?": "=toInteger"
}
}
]
Which works fine and produces the following JSON:
{
"item": "television",
"userBias": 2,
"user": "123456",
"rules": [
{
"name": "category",
"values": [
"electronics"
]
},
{
"name": "tag",
"values": [
"summer"
]
}
]
}
If from the input though I delete "category": "electronics" so it becomes:
{
"user": "123456",
"product": "television",
"tag": "summer"
}
Then i get back the following result:
{
"item": "television",
"userBias": 2,
"user": "123456",
"rules": [
null,
{
"name": "tag",
"values": [
"summer"
]
}
]
}
The problem with the above is that it contains a null element inside the array and I do not know how to get rid of it. I have tried with recursivelySquashNulls but it does not work.
Also basically what am looking for is if both category and tag exist then tag should go to rules[1] if only tag exists then tag should go to rules[0].
Thanks in advance,
giannis
Because you specify tag and category elements individually. Rather, prefer putting them into the category rest by combining them under asterisked key notation such as
[
{
"operation": "shift",
"spec": {
"product": {
"#(1,product)": "item",
"#(1,user)": {
"#2": "userBias"
}
},
"user": {
"#(1,user)": "user"
},
"*": {
"$": "r[0].&.name",
"#(1,&)": "r[0].&.values[]"
}
}
},
{
"operation": "shift",
"spec": {
"*": "&",
"r": {
"*": { "*": "rules" }
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"userBias?": "=toInteger"
}
}
]
Result1 :
Result2(without "category": "electronics" pair for the Input) :

Add array inside object with same key in jolt spec

I would like to move the recommendations array by row_id key inside the investors with the same row_id
Original Json
{
"investors": [
{
"row_id": 1,
"name": "AAAA"
},
{
"row_id": 2,
"name": "BBBB"
}
],
"recommendations": [
{
"row_id": "1",
"title": "ABC"
},
{
"row_id": "2",
"title": "CDE"
}
]
}
I've tried a lot of specs at https://jolt-demo.appspot.com with no success
Specs tried...
[{
"operation": "shift",
"spec": {
"investors": {
"*": "investors[]"
},
"recommendations": {
"#": "recommendations[]"
}
}
}]
Desired Json
{
"investors": [
{
"row_id": 1,
"name": "AAAA",
"recommendations":[{
"row_id": "1",
"title": "ABC"
}]
},
{
"row_id": 2,
"name": "BBBB",
"recommendations":[{
"row_id": "2",
"title": "CDE"
}]
}
]
}
This can be done in two stage shift
First shift groups everything based on row_id.
(I'd suggest running the first shift of its own to see what the output is)
Second shift uses that grouped output and formats results.
[
{
"operation": "shift",
"spec": {
"*": {
"*": {
"row_id": {
"*": {
"#2": "&.&4"
}
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"investors": "investors.[#2]",
"recommendations": "investors.[#2].recommendations[]"
}
}
}
]

how to validate datatype in jolt transformation

I'm new to jolt transformation. I was wondering if there is a way to do a validation on data type then proceed.
I'm processing a json to insert record into hbase. From source I'm getting timestamp repeated for the same resource id which I want to use for row key.
So I just retrieve the first timestamp and concate with resource id to create row key. But I have an issue when there is only one timestamp in the record i.e when its not a list. Appreciate if someone can help me how to handle this situation.
input data
{ "resource": {
"id": "200629068",
"name": "resource_name_1)",
"parent": {
"id": 200053744,
"name": "parent_name"
},
"properties": {
"AP_ifSpeed": "0",
"DisplaySpeed": "0 (NotApplicable)",
"description": "description"
}
},
"data": [
{
"metric": {
"id": "2215",
"name": "metric_name 1"
},
"timestamp": 1535064595000,
"value": 0
},
{
"metric": {
"id": "2216",
"name": "metric_name_2"
},
"timestamp": 1535064595000,
"value": 1
}
]
}
Jolt transformation
[{
"operation": "shift",
"spec": {
"resource": {
// "id": "resource_&",
"name": "resource_&",
"id": "resource_&",
"parent": {
"id": "parent_&",
"name": "parent_&"
},
"properties": {
"*": "&"
}
},
"data": {
"*": {
"metric": {
"id": {
"*": {
"#(3,value)": "&1"
}
},
"name": {
"*": {
"#(3,value)": "&1"
}
}
},
"timestamp": "timestamp"
}
}
}
}, {
"operation": "shift",
"spec": {
"timestamp": {
// get first element from list
"0": "&1"
},
"*": "&"
}
},
{
"operation": "modify-default-beta",
"spec": {
"rowkey": "=concat(#(1,resource_id),'_',#(1,timestamp))"
}
}
]
Output I'm getting
{ "resource_name" : "resource_name_1)",
"resource_id" : "200629068",
"parent_id" : 200053744,
"parent_name" : "parent_name",
"AP_ifSpeed" : "0",
"DisplaySpeed" : "0 (NotApplicable)",
"description" : "description",
"2215" : 0,
"metric_name 1" : 0,
"timestamp" : 1535064595000,
"2216" : 1,
"metric_name_2" : 1,
"rowkey" : "200629068_1535064595000"
}
when there is only one timestamp then I get
"rowkey" : "200629068_"
In your shift make the output "timestamp" always be an array, even if the incoming data array only has one element in it.
"timestamp": "timestamp[]"

I have a simple json file that I'm trying to transform using jolt and having trouble with it since I'm very new to jolt

Here's what I want to do:
1. Concatenate first and last name with name
2. Change id to employeeID and add prefix with employee ID: emp_id
3. If department is equal to sales, than department should be "SL"
4. If department is equal to sales, than department should be "RET"
Here's my input:
{
"employees": [{
"f_name": "tom",`
"l_name": "smith",
"id": "100",
"department": "sales",
"company": "ABC Intelligence"
},
{
"f_name": "john",
"l_name": "doe",
"id": "102",
"department": "returns",
"company": "ABC Intelligence"
}, {
"f_name": "jane",
"l_name": "doe",
"id": "103",
"department": "sales",
"company": "ABC Intelligence"
}
]
}
specs:
[{
"operation": "shift",
"spec": {
"employees": {
"*": {
"name": "=concat(#(1,f_name),' ',#(1,l_name))"
}
}
}
},
{
"operation": "remove",
"spec": {
"employees": {
"*": {
"f_name": "",
"l_name": ""
}
}
}
}
]
desired output:
{
"employees": [
{
"name": "tom smith",
"employeeID": "emp_100",
"department": "SL",
"company": "ABC Intelligence"
},
{
"name": "john doe",
"employeeID": "emp_102",
"department": "RET",
"company": "ABC Intelligence"
},
{
"name": "jane doe",
"employeeID": "emp_103",
"department": "SL",
"company": "ABC Intelligence"
}
]
}
I was able to get the first rule but still struggling with the others. Any help would be appreciated
Spec
[
{
"operation": "modify-default-beta",
"spec": {
// add the mapping of department name to code, so we can use it later
"deptMap": {
"sales": "SL",
"returns": "RET"
},
"employees": {
"*": {
// build the fullName from the first and last names
"name": "=concat(#(1,f_name),' ',#(1,l_name))",
// build the employeeID
"employeeID": "=concat(emp_,#(1,id))"
}
}
}
},
{
"operation": "shift",
"spec": {
"employees": {
"*": { // employees arrays
// pass name, company, and employeeID thru
"name": "employees[&1].name",
"company": "employees[&1].company",
"employeeID": "employees[&1].employeeID",
// lookup the deparment code
"department": {
"*": { // value of dept
// got up 5 levels, come back down the deptMap
"#(4,deptMap.&)": "employees[&3].department"
}
}
}
}
}
}
]