Nested transformation using jolt transformer - jolt

How can I do nested / recursive transformation using Jolt transformation.
I m trying to the get the desired output before inputting it to the subsequent service.
My Input is
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 1.000438,
"hits": [
{
"_id": "AV-SJgvFPkCspwtrqHA1",
"_source": {
"tenant_id": "tenant1",
"session_id": "e780ff74-d33e-4024-9bb7-971f067484ea"
},
"inner_hits": {
"network_events": {
"hits": {
"total": 1,
"max_score": 6.0892797,
"hits": [
{
"_source": {
"event_id": 16,
"response_time": 0,
"url": "http://www.google.com/"
}
},
{
"_source": {
"event_id": 18,
"response_time": 1,
"url": "http://www.google1.com/"
}
}
]
}
}
}
},
{
"_id": "BS-SJgvFPkCspwtrqHA1",
"_source": {
"tenant_id": "tenant2",
"session_id": "f4939272-d33e-4024-9bb7-971f067484ea"
},
"inner_hits": {
"network_events": {
"hits": {
"total": 1,
"max_score": 6.0892797,
"hits": [
{
"_source": {
"event_id": 18,
"response_time": 4,
"url": "http://www.google4.com/"
}
},
{
"_source": {
"event_id": 5,
"response_time": 5,
"url": "http://www.google5.com/"
}
}
]
}
}
}
}
]
}
}
And the desired output is
{
“sessions”: [
{
session_id : “S1”,
tenant_id : “T1”,
network_events : [
{
“url”: “A”,
“response_time” : 22
},
{
“url”: “B,
“response_time” : 1
}
]
},
{
session_id : “S2”,
tenant_id : “T1”,
network_events : [
{
“url”: “C”,
“response_time” : 22
}
]
}
]
}
Is this possible using Jolt. I tried multiple combination using the sample examples but didn't get much.
I am new to Jolt so any help will be appreciated.

[
{
"operation": "shift",
"spec": {
"hits": {
"hits": {
"*": {
"_source": {
"tenant_id": "sessions[#3].tenant_id",
"session_id": "sessions[#3].session_id"
},
"inner_hits": {
"network_events": {
"hits": {
"hits": {
"*": {
"_source": {
"#": "sessions[#8].network_events.[]"
}
}
}
}
}
}
}
}
}
}
},
{
"operation": "remove",
"spec": {
"sessions": {
"*": {
"network_events": {
"*": {
"data_in": "",
"data_out": "",
"attributes": "",
"event_id": "",
"parent_url": "",
"resource_type": ""
}
}
}
}
}
}
]

You may consider another library.
https://github.com/octomix/josson
Deserialization
Josson josson = Josson.fromJsonString(
"{" +
" \"took\": 7," +
" \"timed_out\": false," +
" \"_shards\": {" +
" \"total\": 1," +
" \"successful\": 1," +
" \"failed\": 0" +
" }," +
" \"hits\": {" +
" \"total\": 5," +
" \"max_score\": 1.000438," +
" \"hits\": [" +
" {" +
" \"_id\": \"AV-SJgvFPkCspwtrqHA1\"," +
" \"_source\": {" +
" \"tenant_id\": \"tenant1\"," +
" \"session_id\": \"e780ff74-d33e-4024-9bb7-971f067484ea\"" +
" }," +
" \"inner_hits\": {" +
" \"network_events\": {" +
" \"hits\": {" +
" \"total\": 1," +
" \"max_score\": 6.0892797," +
" \"hits\": [" +
" {" +
" \"_source\": {" +
" \"event_id\": 16," +
" \"response_time\": 0," +
" \"url\": \"http://www.google.com/\"" +
" }" +
" }," +
" {" +
" \"_source\": {" +
" \"event_id\": 18," +
" \"response_time\": 1," +
" \"url\": \"http://www.google1.com/\"" +
" }" +
" }" +
" ]" +
" }" +
" }" +
" }" +
" }," +
" {" +
" \"_id\": \"BS-SJgvFPkCspwtrqHA1\"," +
" \"_source\": {" +
" \"tenant_id\": \"tenant2\"," +
" \"session_id\": \"f4939272-d33e-4024-9bb7-971f067484ea\"" +
" }," +
" \"inner_hits\": {" +
" \"network_events\": {" +
" \"hits\": {" +
" \"total\": 1," +
" \"max_score\": 6.0892797," +
" \"hits\": [" +
" {" +
" \"_source\": {" +
" \"event_id\": 18," +
" \"response_time\": 4," +
" \"url\": \"http://www.google4.com/\"" +
" }" +
" }," +
" {" +
" \"_source\": {" +
" \"event_id\": 5," +
" \"response_time\": 5," +
" \"url\": \"http://www.google5.com/\"" +
" }" +
" }" +
" ]" +
" }" +
" }" +
" }" +
" }" +
" ]" +
" }" +
"}");
Transformation
JsonNode node = josson.getNode(
"hits.hits" +
".map(_source.session_id," +
" _source.tenant_id," +
" netwrok_events: inner_hits.network_events.hits.hits._source.map(url, response_time))" +
".toObject('sessions')");
System.out.println(node.toPrettyString());
Output
{
"sessions" : [ {
"session_id" : "e780ff74-d33e-4024-9bb7-971f067484ea",
"tenant_id" : "tenant1",
"netwrok_events" : [ {
"url" : "http://www.google.com/",
"response_time" : 0
}, {
"url" : "http://www.google1.com/",
"response_time" : 1
} ]
}, {
"session_id" : "f4939272-d33e-4024-9bb7-971f067484ea",
"tenant_id" : "tenant2",
"netwrok_events" : [ {
"url" : "http://www.google4.com/",
"response_time" : 4
}, {
"url" : "http://www.google5.com/",
"response_time" : 5
} ]
} ]
}

Related

Group nested objects array by property

I saw a low of question this type suggesting 'Unwind' & 'Group' stages, But it did not managed to get what i want yet.
After quite long aggregation pipeline i almost got what i need. If required ill post my entire initial documents structure and pipeline but I don`t sure if its necessary, It prettily big.
So i got up untill this step:
$group:
{
{
_id:
{
Frequency:"$Frequency",
LowLimit:"$LowLimit",
HighLimit:"$HighLimit",
Units:"$Units",
Params:"$Params"
},
count:{$sum:1},
Steps:
{
$push:
{
FPGAVersion:"$FPGAVersion",
Result:"$Result",
PassFail:"$PassFail",
KeyIndex:"$KeyIndex",
CatalogNumber:"$CatalogNumber",
AppVer:"$AtsVersion",
Id:"$Id"
}
}
}
}
Which reproduce document like that:
_id:{
Frequency:"XXX"
LowLimit:"220.11"
HighLimit:"285.89"
Units:""
Params:""
count:16
}
Steps:
[
{
FPGAVersion:"XXX"
Result:"232.00"
PassFail:" "
KeyIndex:"2305202109411720D27255"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.15.583"
Id:60fd61a2d8034825203ac424
},
{
FPGAVersion:"XXX"
Result:"235.00"
PassFail:" "
KeyIndex:"2405202117040220A07687"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.17.607"
Id:60fd61a2d8034825203ac42a
},
{
FPGAVersion:"XXX"
Result:"231.00"
PassFail:" "
KeyIndex:"0306202110431821A04704"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.18.618"
},
{
FPGAVersion:"YYY"
Result:"231.00"
PassFail:" "
KeyIndex:"0306202110431821A04704"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.18.618"
},
{
FPGAVersion:"YYY"
Result:"231.00"
PassFail:" "
KeyIndex:"0306202110431821A04704"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.18.618"
},
]
What i need is the main grouping will still be the same, But i want the inner array to also have grouping by FPGAVersion. Like that:
_id:{
Frequency:"XXX"
LowLimit:"220.11"
HighLimit:"285.89"
Units:""
Params:""
count:16
}
Steps:
[
{
FPGAVersion:"XXX"
Steps:
[
{
Result:"232.00"
PassFail:" "
KeyIndex:"2305202109411720D27255"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.15.583"
Id:60fd61a2d8034825203ac424
},
{
Result:"235.00"
PassFail:" "
KeyIndex:"2405202117040220A07687"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.17.607"
Id:60fd61a2d8034825203ac42a
},
{
Result:"231.00"
PassFail:" "
KeyIndex:"0306202110431821A04704"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.18.618"
},
]
},
{
FPGAVersion:"YYY"
Steps:
[
{
Result:"235.00"
PassFail:" "
KeyIndex:"2405202117040220A07687"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.17.607"
Id:60fd61a2d8034825203ac42a
},
{
Result:"231.00"
PassFail:" "
KeyIndex:"0306202110431821A04704"
CatalogNumber:"333_GENERAL"
AppVer:"1.0.18.618"
},
]
},
]
EDIT
A complete document example (some unnecessary data removed )
{
"_id":{
"$oid":"60fd4cdbbbbc873d1c831df4"
},
"Index":0,
"KeyIndex":"25042021173703bb12345678",
"SerialNumber":"XXX",
"UnitName":"Unit A",
"CatalogNumber":"XXX",
"StartWorkDate":{
"$date":"2021-04-25T17:37:03.000Z"
},
"FinishWorkDate":{
"$date":"2021-04-25T17:44:14.000Z"
},
"WorkTime":"00:07:10",
"TotalTests":4,
"FailedTestsCount":0,
"PassedTestsCount":4,
"Status":"PASS",
"AppVersion":"XXX",
"FPGAVersion":"XXX",
"Tests":[
{
"TestName":"Test A",
"Status":"Passed",
"TestTime":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Duration":"00:00:01",
"TotalSteps":9,
"Steps":[
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.00",
"HighLimit":"0.45",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.40",
"HighLimit":"1.00",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.01",
"HighLimit":"0.20",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.01",
"HighLimit":"1.00",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.10",
"HighLimit":"0.40",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:00.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.50",
"HighLimit":"1.70",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:01.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.30",
"HighLimit":"2.00",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:01.000Z"
},
"Frequency":"XXX",
"LowLimit":"0.00",
"HighLimit":"1.10",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
},
{
"TestDate":{
"$date":"2021-04-25T14:38:01.000Z"
},
"Frequency":"XXX",
"LowLimit":"0",
"HighLimit":"0.04",
"Units":"A",
"Result":"XXX",
"PassFail":" ",
"Params":" "
}
],
"Logs":[
{
"Type":"Info",
"Message":"Log Message.."
},
{
"Type":"Info",
"Message":"Log Message.."
},
]
},
{
"TestName":"Test B",
"Status":"Passed",
"TestTime":{
"$date":"2021-04-25T14:38:20.000Z"
},
"Duration":"00:00:00",
"TotalSteps":1,
"Steps":[
{
"TestDate":{
"$date":"2021-04-25T14:38:20.000Z"
},
"Frequency":" ",
"LowLimit":"AC",
"HighLimit":"AC",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"FPGA Version"
}
],
"Logs":[
{
"Type":"Info",
"Message":"Log Message..."
},
{
"Type":"Info",
"Message":"Log Message..."
}
]
},
{
"TestName":"Test C",
"Status":"Passed",
"TestTime":{
"$date":"2021-04-25T14:39:27.000Z"
},
"Duration":"00:01:44",
"TotalSteps":4,
"Steps":[
{
"TestDate":{
"$date":"2021-04-25T14:39:27.000Z"
},
"Frequency":"XXX",
"LowLimit":"69.00",
"HighLimit":"89.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 6500"
},
{
"TestDate":{
"$date":"2021-04-25T14:39:57.000Z"
},
"Frequency":"XXX",
"LowLimit":"89.00",
"HighLimit":"109.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 2B00"
},
{
"TestDate":{
"$date":"2021-04-25T14:40:36.000Z"
},
"Frequency":"XXX",
"LowLimit":"394.00",
"HighLimit":"414.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value B500"
},
{
"TestDate":{
"$date":"2021-04-25T14:41:11.000Z"
},
"Frequency":"XXX",
"LowLimit":"699.00",
"HighLimit":"719.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value B370"
}
],
"Logs":[
{
"Type":"Info",
"Message":"Log Message"
},
{
"Type":"Info",
"Message":"Log Message"
}
]
},
{
"TestName":"Test D",
"Status":"Passed",
"TestTime":{
"$date":"2021-04-25T14:41:55.000Z"
},
"Duration":"00:01:42",
"TotalSteps":6,
"Steps":[
{
"TestDate":{
"$date":"2021-04-25T14:41:55.000Z"
},
"Frequency":"XXX",
"LowLimit":"107.00",
"HighLimit":"127.00",
"Units":" ",
"Result":"0",
"PassFail":" ",
"Params":"Value 5100"
},
{
"TestDate":{
"$date":"2021-04-25T14:42:36.000Z"
},
"Frequency":"XXX",
"LowLimit":"227.00",
"HighLimit":"247.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 4800"
},
{
"TestDate":{
"$date":"2021-04-25T14:42:47.000Z"
},
"Frequency":"XXX",
"LowLimit":"282.00",
"HighLimit":"302.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 2000"
},
{
"TestDate":{
"$date":"2021-04-25T14:43:05.000Z"
},
"Frequency":"XXX",
"LowLimit":"462.00",
"HighLimit":"482.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 2D00"
},
{
"TestDate":{
"$date":"2021-04-25T14:43:19.000Z"
},
"Frequency":"XXX",
"LowLimit":"517.00",
"HighLimit":"537.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 1570"
},
{
"TestDate":{
"$date":"2021-04-25T14:43:37.000Z"
},
"Frequency":"XXX",
"LowLimit":"697.00",
"HighLimit":"717.00",
"Units":" ",
"Result":"XXX",
"PassFail":" ",
"Params":"Value 2500"
}
],
"Logs":[
{
"Type":"Info",
"Message":"Log Message..."
},
{
"Type":"Info",
"Message":"Log Message..."
}
]
}
]
}
The complete pipline so far (An example document output is shown before edit):
[{$match: {
$and:[
{StartWorkDate:{ $gte:ISODate("2019-02-04T11:15:15.000+00:00")}},
{FinishWorkDate:{ $lte:ISODate("2022-04-05T14:15:15.000+00:00")}},
{UnitName:{$eq:"XXX"}},
{Component:{$eq:"TYYY"}},
{FPGAVersion:{$in:["XXX",
"YYY",
"ZZZ"]}},
{"Tests.TestName":{$eq:"Test A"} }
]
}}, {$unset: "Tests.Logs"}, {$project: {
KeyIndex:"$KeyIndex",
SerialNumber:"$SerialNumber",
CatalogNumber:"$CatalogNumber",
AtsVersion:"$AppVersion",
FPGAVersion:"$FPGAVersion",
Tests:
{
$filter:
{
input: '$Tests',
as: 'test',
cond: {$eq: ['$$test.TestName', 'Test A']}
}
}
}}, {$project: {
KeyIndex:"$KeyIndex",
SerialNumber:"$SerialNumber",
CatalogNumber:"$CatalogNumber",
AtsVersion:"$AtsVersion",
Id:"$_id",
FPGAVersion:"$FPGAVersion",
Steps:{
$reduce:{
input:"$Tests.Steps",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
}
}}, {$unwind: {
path: "$Steps"
}}, {$project: {
FPGAVersion:1,
Id:1,
KeyIndex:1,
SerialNumber:1,
CatalogNumber:1,
AtsVersion:1,
Frequency:"$Steps.Frequency",
LowLimit:"$Steps.LowLimit",
HighLimit:"$Steps.HighLimit",
TestDate:"$Steps.TestDate",
Params:"$Steps.Params",
Units:"$Steps.Units",
Result:"$Steps.Result",
PassFail:"$Steps.PassFail",
}}, {$group: {
_id:
{
FPGAVersion:"$FPGAVersion",
Frequency:"$Frequency",
LowLimit:"$LowLimit",
HighLimit:"$HighLimit",
Units:"$Units",
Params:"$Params"
},
count:{$sum:1},
Steps:
{
$push:
{
Result:"$Result",
PassFail:"$PassFail",
KeyIndex:"$KeyIndex",
SerialNumber:"$SerialNumber",
CatalogNumber:"$CatalogNumber",
AppVer:"$AtsVersion",
FPGAVersion:"$FPGAVersion",
Id:"$Id"
}
}
}}, {$project: {
_id:0,
FPGAVersion:"$_id.FPGAVersion",
Frequency:"$_id.Frequency",
LowLimit:"$_id.LowLimit",
HighLimit:"$_id.HighLimit",
Units:"$_id.Units",
Params:"$_id.Params",
Steps:"$Steps",
"Total":
{
$size:"$Steps"
},
Passed:
{
$size:
{
$filter:
{
input:"$Steps.PassFail",
as:"res",
cond:
{
$eq:["$$res"," "]
}
}
}
},
Failed:
{
$size:
{
$filter:
{
input:"$Steps.PassFail",
as:"res",
cond:
{
$eq:["$$res","*"]
}
}
}
}
}}]
add FPGAVersion field in group by in $group stage
second $group stage group by your required fields and construct the array of FPGAVersion and Steps fields
{
$group: {
_id: {
Frequency:"$Frequency",
LowLimit:"$LowLimit",
HighLimit:"$HighLimit",
Units:"$Units",
Params:"$Params",
FPGAVersion:"$FPGAVersion"
},
count: { $sum: 1 },
Steps: {
$push: {
Result:"$Result",
PassFail:"$PassFail",
KeyIndex:"$KeyIndex",
CatalogNumber:"$CatalogNumber",
AppVer:"$AtsVersion",
Id:"$Id"
}
}
}
},
{
$group: {
_id: {
Frequency:"$_id.Frequency",
LowLimit:"$_id.LowLimit",
HighLimit:"$_id.HighLimit",
Units:"$_id.Units",
Params:"$_id.Params"
},
count: { $sum: "$count" },
Steps: {
$push: {
FPGAVersion: "$_id.FPGAVersion",
Steps: "$Steps"
}
}
}
}

mongodb update the last array element

the following documents:
{
"_id": ObjectId("5d648b0d5aeada5177bb54e4"),
"time": [{
"start": "2019/8/25 9:59:30",
"end": "2019/8/25 10:59:30"
},
{
"start": "2019/8/26 9:59:30",
"end": "2019/8/26 10:59:30"
},
{
"start": "2019/8/27 9:59:30",
"end": "2019/8/26 9:59:30"
}
]
}
How to update the last element in array?
I'm already tried:
db.document.update(
{
"_id": ObjectId("5d648b0d5aeada5177bb54e4")
},
{
$set: {
"time.-1.end": "2019/8/26 10:59:30"
}
}
)
but it does not work...
it is very simple. get the last element's index/position.
var data = {
"_id" : "5d648b0d5aeada5177bb54e4",
"time" : [
{
"start" : "2019/8/25 9:59:30",
"end" : "2019/8/25 10:59:30"
},
{
"start" : "2019/8/26 9:59:30",
"end" : "2019/8/26 10:59:30"
},
{
"start" : "2019/8/27 9:59:30",
"end" : "2019/8/26 9:59:30"
}
]
}
var len = data.time.length - 1;
var objUpdate = {};
var updateQuery = "time." + len + ".end";
objUpdate[updateQuery] = "2019/8/26 11:59:30";
db.getCollection('test').update({"_id" : ObjectId("5d648b0d5aeada5177bb54e4")}, {$set:objUpdate});
Filter out the document using time.start field:
Try the below query:
db.arraytest.updateOne({"_id" : ObjectId("5d648b0d5aeada5177bb54e4"), "time.start" : "2019/8/27 9:59:30"},
{$set: { "time.$.end": "2019/8/26 10:59:30" }})

Group by Date mongoDB

I have a set of data in mongoDB that I have to sum up grouped by $timestamp. This field contains a date, but's formatted as String (example data above).
How should I proceed to convert $timestamp into a date so I can group them all together?
Next, I have to sum each scores_today for each date and iden, and the same with each scores_total.
Example data:
[
{
_id: "1442",
timestamp: "2016-03-15T22:24:02.000Z",
iden: "15",
scores_today: "0.000000",
scores_total: "52337.000000"
}
]
My code
var project = {
"$project":{
"_id": 0,
"y": {
"$year": "$timestamp" // tried this way, not working
},
"m": {
"$month": new Date("$timestamp") // tried either this, not working
},
"d": {
"$dayOfMonth": new Date("$timestamp")
},
"iden" : "$iden"
}
},
group = {
"$group": {
"_id": {
"iden" : "$iden",
"year": "$y",
"month": "$m",
"day": "$d"
},
"count" : { "$sum" : "$scores_today" }
}
};
mongoDB.collection('raw').aggregate([ project, group ]).toArray()....
This is the error logged by node.js service
Err: { [MongoError: exception: can't convert from BSON type String to
Date] name: 'MongoError', message: 'exception: can\'t convert from
BSON type String to Date', errmsg: 'exception: can\'t convert from
BSON type String to Date', code: 16006, ok: 0 }
You can construct Date object from string using ISODate($timestamp).
var project = {
"$project":{
"_id": 0,
"y": {
"$year": ISODate("$timestamp").getFullYear()
},
"m": {
"$month": ISODate("$timestamp").getMonth()+1 // months start from 0
},
"d": {
"$dayOfMonth": ISODate("$timestamp").getDate()
},
"iden" : "$iden"
}
},
group = {
"$group": {
"_id": {
"iden" : "$iden",
"year": "$y",
"month": "$m",
"day": "$d"
},
"count" : { "$sum" : "$scores_today" }
}
};
UPDATE
If you're not running MongoDb shell then you can't use ISODate directly. In this case try to invoke eval command.
var aggregationResult=mongoDB.eval(
'
'function() '+
'{ '+
' var project = { '+
' "$project":{ '+
' "_id": 0, '+
' "y": { '+
' "$year": ISODate("$timestamp").getFullYear() '+
' }, '+
' "m": { '+
' "$month": ISODate("$timestamp").getMonth()+1 // months start from 0 '+
' }, '+
' "d": { '+
' "$dayOfMonth": ISODate("$timestamp").getDate() '+
' }, '+
' "iden" : "$iden" '+
' } '+
' }, '+
' group = { '+
' "$group": { '+
' "_id": { '+
' "iden" : "$iden", '+
' "year": "$y", '+
' "month": "$m", '+
' "day": "$d" '+
' }, '+
' "count" : { "$sum" : "$scores_today" } '+
' } '+
' };
' var result=db.raw.aggregate([ project, group ]); '+
' return result; '+
' } '+
'
);

How could I remove the duplicated items(complex object) from array

In each document,
the records is an array containing many duplicated objects.
and in buy_items there are also containing many duplicated items.
How could I clean the duplicated items ?
Original documents:
{
"_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e",
"records": [
{
"DATE": new Date("1996-02-08T08:00:00+0800"),
"buy_items": [
"5210 ",
"5210 ",
"5210 "
]
},
{
"DATE": new Date("1996-02-08T08:00:00+0800"),
"buy_items": [
"5210 ",
"5210 ",
"5210 "
]
}
{
"DATE": new Date("2012-12-08T08:00:00+0800"),
"buy_items": [
"5210 ",
"1234 ",
" "
]
}
]
}
Expected Output:
{
"_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e",
"records": [
{
"DATE": new Date("1996-02-08T08:00:00+0800"),
"buy_items": [
"5210 "
]
},
{
"DATE": new Date("2012-12-08T08:00:00+0800"),
"buy_items": [
"5210 ",
"1234 ",
" "
]
}
]
}
With Michaels solution, the output might looks like this
{
"_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e",
"records": [
"date": new Date("1996-02-08T08:00:00+0800"),
"buy_items": [
"5210 "
"1234 ",
" "
]
]
}
You can remove duplicated objects using the aggregation framework
db.collection.aggregate(
[
{ $unwind: "$records" },
{ $unwind: "$records.buy_items" },
{ $group: { "_id": {id: "$_id", date: "$records.DATE" }, buy_items: { $addToSet: "$records.buy_items" }}},
{ $group: {"_id": "$_id.id", records: { $push: {"date": "$_id.date", "buy_items": "$buy_items" }}}}, { $sort: { "records.0.date": 1 }} ,
{ $out: "collection" }
]
)
The $out operator let you write your aggregation result in specified collection or Replace you existing collection.
Even better using "Bulk" operations
var bulk = bulk = db.collection.initializeOrderedBulkOp(),
count = 0;
db.collection.aggregate([
{ "$unwind": "$records" },
{ "$project": {
"date": "$records.DATE",
"buy_items": { "$setIntersection": "$records.buy_items" }
}},
{ "$unwind": "$buy_items" },
{ "$group": {
"_id": { "id": "$_id", "date": "$date" },
"buy_items": { "$addToSet": "$buy_items" }
}},
{ "$group": {
"_id": "$_id.id",
"records": { "$push": {
"date": "$_id.date",
"buy_items": "$buy_items"
}}
}}
]).forEach(function(doc) {
bulk.find({"_id": doc._id}).updateOne({
"$set": { "records": doc.records }
});
count++;
if (count % 500 == 0) {
bulk.execute();
bulk = db.collection.initializeOrderedBulkOp();
}
})
if (count % 500 != 0)
bulk.execute();
Result:
{
"_id" : "0005d116qwwewdq82a1b84f148fa6027d429f3e",
"records" : [
{
"date" : ISODate("2012-12-08T00:00:00Z"),
"buy_items" : [
" ",
"1234 ",
"5210 "
]
},
{
"date" : ISODate("1996-02-08T00:00:00Z"),
"buy_items" : [
"5210 "
]
}
]
}
If you want to update your current collections without creating new collection and drop previous collection. I tried this but doing this you should run two different update commands.
First update records with distinct like this :
db.collectionName.update({},{"$set":{"records":db.collectionName.distinct('records')}})
and second update for buy_items with distinct like this :
db.collectionName.update({},{"$set":{"records.0.buy_items":db.collectionName.distinct('records.buy_items')}})
If you want to avoid two update query then follow Michael answer .
You could try using the forEach() method of the find() cursor to iterate over each document properties, check for uniqueness and filter distinct values as follows:
db.collection.find().forEach(function(doc){
var records = [], seen = {};
doc.records.forEach(function (item){
var uniqueBuyItems = item["buy_items"].filter(function(i, pos) {
return item["buy_items"].indexOf(i) == pos;
});
item["buy_items"] = uniqueBuyItems;
if (JSON.stringify(item["buy_items"]) !== JSON.stringify(seen["buy_items"])) {
records.push(item);
seen["buy_items"] = item["buy_items"];
}
});
doc.records = records;
db.collection.save(doc);
})

How to query the nested JSON structure in MongoDB/Mongoid

The following is one of the document in the MongoDB database.
I want to select the year between 2007 ,2008.
and the key includes "Actual" or "Upper End of Range"
and the table_name equals to Unemployment rate
How to finish it in Mongoid or MongoDB query.
Or I only can do it in application layer like Ruby or Python ?
id 2012-04-25_unemployment_rate
{
"_id": "2012-04-25_unemployment_rate",
"table_name": "Unemployment rate",
"unit": "Percent",
"data": [
{
"2007": [
{
"Actual": "3.5"
},
{
"Upper End of Range": "-"
},
{
"Upper End of Central Tendency": "-"
},
{
"Lower End of Central Tendency": "-"
},
{
"Lower End of Range": "-"
}
]
},
{
"2008": [
{
"Actual": "1.7"
},
{
"Upper End of Range": "-"
},
{
"Upper End of Central Tendency": "-"
},
{
"Lower End of Central Tendency": "-"
},
{
"Lower End of Range": "-"
}
]
}
}
id 2014-04-25_unemployment_rate
{
"_id": "2014-04-25_unemployment_rate",
"table_name": "Unemployment rate",
"unit": "Percent",
"data": [
{
"2008": [
{
"Actual": "3.5"
},
{
"Upper End of Range": "-"
},
{
"Upper End of Central Tendency": "-"
},
{
"Lower End of Central Tendency": "-"
},
{
"Lower End of Range": "-"
}
]
},
{
"2009": [
{
"Actual": "1.7"
},
{
"Upper End of Range": "-"
},
{
"Upper End of Central Tendency": "-"
},
{
"Lower End of Central Tendency": "-"
},
{
"Lower End of Range": "-"
}
]
}
}
You don't select documents by keys; you select documents by values. You should restructure your documents to have fields like "year" : 2007. For example,
{
"_id": "2012-04-25_unemployment_rate",
"table_name": "Unemployment rate",
"unit": "Percent",
"data": [
{
"year" : 2007,
"Actual": "3.5",
"Upper End of Range": "-",
"Upper End of Central Tendency": "-",
"Lower End of Central Tendency": "-",
"Lower End of Range": "-"
}
]
}
I'm not sure what you mean by the condition that "key includes 'Actual' or 'Upper End of Range'", but if you want documents with a data element with year 2007 or 2008 and table_name equal to "Unemployment rate", use the query spec
{ "table_name" : "Unemployment rate", "data.year" : { "$in" : [2007, 2008] } }