Group results in Jolt - jolt

I am trying to group some data in arrays based on a json field.
[
{
"Type": "Cash",
"Value1": "668131AA3",
"ParentId": "87984E01",
"CashId": "6688E582",
"StockId": null
},
{
"Type": "Stock",
"Value1": "668131AA3",
"ParentId": "87984E01",
"CashId": "1FD714A9",
"StockId": "1FD714A9"
},
{
"Type": "Stock",
"Value1": "668131AA3",
"ParentId": "87984E01",
"CashId": "0635B045",
"StockId": "0635B045"
},
{
"Type": "Cash",
"Value1": "668131AA3",
"ParentId": "87984E01",
"CashId": "47E65472",
"StockId": null
}
]
And the desired output is:
"Value1": "668131AA3",
"ParentId": "87984E01",
"CashPayouts" : [
{"CashId": "6688E582"},
{"CashId": "87984E01"}
],
"StockPayouts" : [
{"StockId": "1FD714A9","CashId": "1FD714A9"},
{"StockId": "0635B045","CashId": "0635B045"}
]
}
Basically, if the Type=Cash, CashId field should be grouped in CashPayouts array. For Type=Stock, group two fields in StockPayouts array.
Is it possible to do this transformation in Jolt? If it is true, how?
Thanks,
Juan

This should be completed example:
[
{
"operation": "shift",
"spec": {
"*": {
"Value1": "Value1",
"ParentId": "ParentId",
"Type": {
"Stock": {
"#(2)": {
"CashId": "t.StockPayouts[&4].CashId",
"StockId": "t.StockPayouts[&4].StockId"
}
},
"Cash": {
"#(2)": {
"CashId": "t.CashPayouts[&4].CashId"
}
}
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"*": "=recursivelySquashNulls",
"Value1": "=firstElement(#(1,Value1))",
"ParentId": "=firstElement(#(1,ParentId))"
}
},
{
"operation": "shift",
"spec": {
"Value1": "Value1",
"ParentId": "ParentId",
"t": ""
}
}
]

Related

Improve JOLT spec to get expected result

Source JSON:
[
{
"other": false,
"granularity": [
{
"impressions": 15,
"date": "2021-11-02"
},
{
"impressions": 67,
"date": "2021-11-03"
}
],
"metadata": {
"adGroupId": 5792423,
"startTime": "2021-06-29T21:00:00.000"
}
},
{
"other": false,
"granularity": [
{
"impressions": 226,
"date": "2021-11-02"
},
{
"impressions": 2339,
"date": "2021-11-03"
}
],
"metadata": {
"adGroupId": 578101600,
"startTime": "2021-06-29T08:05:52.176"
}
}
]
I tried with this spec:
[
{
"operation": "shift",
"spec": {
"*": {
"granularity": {
"*": "[&2].&"
},
"metadata": {
"adGroupId": "[&2].adGroupId"
}
}
}
}
]
Result:
[
{
"0": {
"impressions": 15,
"date": "2021-11-02"
},
"1": {
"impressions": 67,
"date": "2021-11-03"
},
"adGroupId": 5792423
},
{
"0": {
"impressions": 226,
"date": "2021-11-02"
},
"1": {
"impressions": 2339,
"date": "2021-11-03"
},
"adGroupId": 578101600
}
]
I want to remove these indexes 0,1,...etc from top level. And adGroupId should be in each row.
So I expect this result:
[
{
"impressions": 15,
"date": "2021-11-02",
"adGroupId": 5792423
},
{
"impressions": 67,
"date": "2021-11-03",
"adGroupId": 5792423
},
{
"impressions": 226,
"date": "2021-11-02",
"adGroupId": 578101600
},
{
"impressions": 2339,
"date": "2021-11-03",
"adGroupId": 578101600
}
]
How to fix it? Thanks!
You can collect the key-value pairs under the granularity key while prefixing the subkeys by [&3].&1. in order to pick the common indexes by going three levels up, and then remove the keys of the indexes at the last step such as
[
{
"operation": "shift",
"spec": {
"*": {
"granularity": {
"*": {
"*": "[&3].&1.&",
"#(2,metadata.adGroupId)": "[&3].&1.adGroupId"
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"*": ""
}
}
}
]

how to find selected fields from sample data in nested array in mongodb

I have sample collection of data
[
{
data: [
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters": {
"batter": [
{
"id": "1001",
"type": "Regular"
},
{
"id": "1002",
"type": "Chocolate"
},
{
"id": "1003",
"type": "Blueberry"
},
{
"id": "1004",
"type": "Devil's Food"
}
]
},
"topping": [
{
"id": "5001",
"type": "None"
},
{
"id": "5002",
"type": "Glazed"
},
{
"id": "5005",
"type": "Sugar"
},
{
"id": "5007",
"type": "Powdered Sugar"
},
{
"id": "5006",
"type": "Chocolate with Sprinkles"
},
{
"id": "5003",
"type": "Chocolate"
},
{
"id": "5004",
"type": "Maple"
}
]
},
{
"id": "0002",
"type": "donut",
"name": "Raised",
"ppu": 0.55,
"batters": {
"batter": [
{
"id": "1001",
"type": "Regular"
}
]
},
"topping": [
{
"id": "5001",
"type": "None"
},
{
"id": "5002",
"type": "Glazed"
},
{
"id": "5005",
"type": "Sugar"
},
{
"id": "5003",
"type": "Chocolate"
},
{
"id": "5004",
"type": "Maple"
}
]
},
{
"id": "0003",
"type": "donut",
"name": "Old Fashioned",
"ppu": 0.55,
"batters": {
"batter": [
{
"id": "1001",
"type": "Regular"
},
{
"id": "1002",
"type": "Chocolate"
}
]
},
"topping": [
{
"id": "5001",
"type": "None"
},
{
"id": "5002",
"type": "Glazed"
},
{
"id": "5003",
"type": "Chocolate"
},
{
"id": "5004",
"type": "Maple"
}
]
}
]
}
]
and I need data only in this formate of a specific id.
[
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55
}
]
aggregate
db.collection.aggregate({
"$unwind": "$data"
},
{
"$match": {
"data.id": "0001"
}
},
{
"$project": {
"_id": "$data.id",
"type": "$data.type",
"name": "$data.name",
"ppu": "$data.ppu"
}
})
mongoplayground

mongodb agregate and filter data

I try to filter some results data from mongodb with mongoose in javascript.
This is my json structure:
{
"name": "john",
"firstname": "doe",
"yearold": 22,
"recipes": [
{
"title": "cheesecake",
"data": [
{
"name": "egg",
"label": "Eggs for",
"value": 6,
"unit": "piece"
},
{
"name": "oil",
"label": "Specific oil",
"unit": "oz",
"value": 0.2
},
{
"name": "flour",
"label": "Wholemel flour",
"value": 450,
"unit": "gr"
}
]
},
{
"title": "cake",
"data": [
{
"name": "egg",
"label": "Eggs for",
"value": 6,
"unit": "piece"
},
{
"name": "flour",
"label": "Wholemel flour",
"value": 500,
"unit": "gr"
},
]
}
]
}
In some case i need to return json data with hiding some values. For example I have a list that specifies all the values ​​to hide
hidekeys=["egg"];
and i would like to get this:
{
"name": "john",
"firstname": "doe",
"yearold": 22,
"recipes": [
{
"title": "cheesecake",
"data": [
{
"name": "egg",
"label": "Eggs for",
"value": #######,
"unit": "piece"
},
{
"name": "oil",
"label": "Specific oil",
"unit": "oz",
"value": 0.2
},
{
"name": "flour",
"label": "Wholemel flour",
"value": 450,
"unit": "gr"
}
]
},
{
"title": "cake",
"data": [
{
"name": "egg",
"label": "Eggs for",
"value": #######,
"unit": "piece"
},
{
"name": "flour",
"label": "Wholemel flour",
"value": 500,
"unit": "gr"
},
]
}
]
}
For each recipe i need to hide ingredient value if it is specified in hidekeys.
I tried something with $project and $cond but it doesnt works
Here's a quick way of how to achieve this using $map
const hidekeys = ["egg"];
db.collection.aggregate([
{
$addFields: {
recipes: {
$map: {
input: "$recipes",
as: "recipe",
in: {
$mergeObjects: [
"$$recipe",
{
data: {
$map: {
input: "$$recipe.data",
as: "datum",
in: {
"$mergeObjects": [
"$$datum",
{
$cond: [
{
"$setIsSubset": [
[
"$$datum.name"
],
hidekeys
]
},
{
value: "#####"
},
{
value: "$$datum.value"
}
]
}
]
}
}
}
}
]
}
}
}
}
}
])
Mongo Playground

JOLT Nested if condition

I have this input JSON:
{
"user": "123456",
"product": "television",
"category": "electronics",
"tag": "summer"
}
And this transformation:
[
{
"operation": "shift",
"spec": {
"product": {
"#(1,product)": "item",
"#(1,user)": {
"#2": "userBias"
}
},
"user": {
"#(1,user)": "user"
},
"category": {
"#category": "rules.[0].name",
"#(1,category)": "rules.[0].values[0]"
},
"tag": {
"rules": "rules",
"#tag": "rules.[1].name",
"#(2,tag)": "rules.[1].values[0]"
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"userBias?": "=toInteger"
}
}
]
Which works fine and produces the following JSON:
{
"item": "television",
"userBias": 2,
"user": "123456",
"rules": [
{
"name": "category",
"values": [
"electronics"
]
},
{
"name": "tag",
"values": [
"summer"
]
}
]
}
If from the input though I delete "category": "electronics" so it becomes:
{
"user": "123456",
"product": "television",
"tag": "summer"
}
Then i get back the following result:
{
"item": "television",
"userBias": 2,
"user": "123456",
"rules": [
null,
{
"name": "tag",
"values": [
"summer"
]
}
]
}
The problem with the above is that it contains a null element inside the array and I do not know how to get rid of it. I have tried with recursivelySquashNulls but it does not work.
Also basically what am looking for is if both category and tag exist then tag should go to rules[1] if only tag exists then tag should go to rules[0].
Thanks in advance,
giannis
Because you specify tag and category elements individually. Rather, prefer putting them into the category rest by combining them under asterisked key notation such as
[
{
"operation": "shift",
"spec": {
"product": {
"#(1,product)": "item",
"#(1,user)": {
"#2": "userBias"
}
},
"user": {
"#(1,user)": "user"
},
"*": {
"$": "r[0].&.name",
"#(1,&)": "r[0].&.values[]"
}
}
},
{
"operation": "shift",
"spec": {
"*": "&",
"r": {
"*": { "*": "rules" }
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"userBias?": "=toInteger"
}
}
]
Result1 :
Result2(without "category": "electronics" pair for the Input) :

JOLT Spec for supporting the input json

My input JSON looks like below, but i am not sure of how to do internal array related parameter transformation using JOLT. Any help is appreciated as i am new to JOLT
{
"pktId": 7603,
"seq": 1,
"vehicleNumber": "66079",
"rmdLocation": "1",
"rmdTime": "2019-01-07T11:27:05.745Z",
"position": {
"lat": 55.4911232,
"lng": -3.686831
},
"dataSource": 11,
"frames": [
{
"seq": 0,
"card": 8,
"channel": 6,
"value": 117
},
{
"seq": 1,
"card": 8,
"channel": 6,
"value": 120
}
]
}
Below is the spec file i have created but it is not complete
[
{
"operation": "shift",
"spec": {
"frames": {
"*": {
"seq": "parameters[&1].seq",
"card": "parameters[&1].card",
"channel": "parameters[&1].channel",
"value": "parameters[&1].value"
}
},
"rmdTime": "messageTime",
"vehicleNumber": "roadNumber"
}
},
{
"operation": "default",
"spec": {
"appId": "configMsgXX",
"customerId": "ABC",
"messageRev": 1,
"messageType": "customStatistics"
}
}
]
Expected output is as below
{
"appId": "configMsgXX",
"customerId": "ABC",
"deviceId": string1+roadNumber+string2",
"messageRev": 1,
"messageTime": 1543395341000,
"messageType": "customStatistics",
"parameters": [
{
"address": string1+string2,
"name": "EM2000VoltageMainGenerator",
"timestamp": 1543395341000,
"quality": "3",
"datatype": "INTEGER",
"value": 100,
"qualityReason": "Stale Data",
"category": "REAL"
}
],
"roadNumber": 66079
}
I am using this library https://github.com/bazaarvoice/jolt
[
{
"operation": "shift",
"spec": {
"frames": {
"*": {
"seq": "parameters[&1].seq",
"card": "parameters[&1].card",
"channel": "parameters[&1].channel",
"value": "parameters[&1].value",
"#(2,rmdTime)": "parameters[&1].timestamp"
}
},
"rmdTime": "messageTime",
"vehicleNumber": "roadNumber"
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"parameters": {
"*": {
"quality": "3",
"name": "",
"address": "",
"datatype": "INTEGER",
"qualityReason": "Stale Data",
"category": "REAL"
}
}
}
},
{
"operation": "default",
"spec": {
"appId": "configMsgXX",
"customerId": "ABC",
"messageRev": 1,
"messageType": "customStatistics"
}
}
]