mongodb aggregation with array - mongodb

I have data like this:
{
"_id" : ObjectId("..."),
"name" : "Entry 1",
"time" : ISODate("2013-12-28T06:00:00.000Z"),
"value" : 100
},
{
"_id" : ObjectId("..."),
"name" : "Entry 2",
"time" : ISODate("2013-12-28T06:00:00.000Z"),
"value" : 200
},
{
"_id" : ObjectId("..."),
"name" : "Entry 1",
"time" : ISODate("2013-12-28T11:00:00.000Z"),
"value" : 110
},
{
"_id" : ObjectId("..."),
"name" : "Entry 2",
"time" : ISODate("2013-12-28T11:00:00.000Z"),
"value" : 230
},
{
"_id" : ObjectId("..."),
"name" : "Entry 3",
"time" : ISODate("2013-12-28T11:00:00.000Z"),
"value" : 25
},
{
"_id" : ObjectId("..."),
"name" : "Entry 4",
"time" : ISODate("2013-12-28T11:00:00.000Z"),
"value" : 15
}
I need the result grouped by time with percentage for each entry like this (group entries by volume "others" when entries for time period more than two, but it's not necessary):
{
"_id": ISODate("2013-12-28T11:00:00.000Z"),
"entries": [
{
"name": "Entry 1",
"percentage": 33.3
},
{
"name": "Entry 2",
"percentage": 66.6
},
]
},
{
"_id": ISODate("2013-12-28T06:00:00.000Z"),
"entries": [
{
"name": "Entry 1",
"percentage": 28.9
},
{
"name": "Entry 2",
"percentage": 60.5
},
{
"name": "Others",
"percentage": 10.5
}
]
}
So the request I was try:
db.collection.aggregate([
{
"$addFields": {
"full_datetime": {"$substr": ["$time", 0, 19]}
}
},
{
"$group": {
"_id": "$full_datetime",
"value_sum": {"$sum": "$value"},
"entries": {
"$push": {
"name": "$name",
"percentage": {
"$multiply": [{
"$divide": ["$value", {"$literal": "$value_sum" }]
}, 100 ]
}
}
}
}
}
])
This request is not work because $value_sum does not exists inside $push.
Please help me how I can to send this $value_sum into the $push statement

You can use one more stage to calculate percentage using $map as,
db.collection.aggregate([
"$addFields": {
"full_datetime": {
"$substr": ["$time", 0, 19]
}
}
}, {
"$group": {
"_id": "$full_datetime",
"value_sum": {
"$sum": "$value"
},
"entries": {
"$push": {
"name": "$name",
"value": "$value"
}
}
}
}, {
"$project": {
"entriesNew": {
"$map": {
"input": "$entries",
"as": "entry",
"in": {
"name": "$$entry.name",
"percentage": {
"$multiply": [{
"$divide": ["$$entry.value", "$value_sum"]
}, 100]
}
}
}
}
}
}])
Output:
/* 1 */
{
"_id" : "2013-12-28T11:00:00",
"entries" : [
{
"name" : "Entry 1",
"percentage" : 28.9473684210526
},
{
"name" : "Entry 2",
"percentage" : 60.5263157894737
},
{
"name" : "Entry 3",
"percentage" : 6.57894736842105
},
{
"name" : "Entry 4",
"percentage" : 3.94736842105263
}
]
}
/* 2 */
{
"_id" : "2013-12-28T06:00:00",
"entries" : [
{
"name" : "Entry 1",
"percentage" : 33.3333333333333
},
{
"name" : "Entry 2",
"percentage" : 66.6666666666667
}
]
}

Related

MongoDB: Get most recent objects from embedded arrays and append parent field

I have an array of companies where each company has an array of subscription objects. A subscription object has an array of items and a date when those items was added. I want to retrieve only the most recently added subscription object from each company. I also need to be able to identify where each subscription object came from, so I need to append the company name somehow. Sorry for the confusing semantics!
The array of companies:
[
{
"_id": "company1",
"name": "Company 1",
"subscriptions": [
{
"items": [
{
"_id": "5e13b0207b05e37f12f05beb",
"data": 40,
"price": 39
},
{
"_id": "5e13b0207b05e37f12f05bea",
"data": 100,
"price": 59
}
],
"_id": "5e13b0207b05e37f12f05be9",
"date_added": "2019-12-24T22:09:36.442Z"
}
]
},
{ "_id": "company2", "name": "Company 2", "subscriptions": [] },
{
"_id": "company3",
"name": "Company 3",
"subscriptions": [
{
"items": [
{
"_id": "5e13a47d7c370667c7e67d7a",
"data": 40,
"price": 69
},
{
"_id": "5e13a47d7c370667c7e67d79",
"data": 20,
"price": 39
}
],
"_id": "5e13a47d7c370667c7e67d78",
"date_added": "2019-12-24T21:19:57.804Z"
},
{
"items": [
{
"_id": "5e13a4f87c370667c7e67d7d",
"data": 35,
"price": 39
},
{
"_id": "5e13a4f87c370667c7e67d7c",
"data": 60,
"price": 59
}
],
"_id": "5e13a4f87c370667c7e67d7b",
"date_added": "2020-01-04T21:22:00.832Z"
}
]
}
]
Desired output:
[
{
"name": "Company 1",
"items": [
{
"_id": "5e13b0207b05e37f12f05beb",
"data": 40,
"price": 39
},
{
"_id": "5e13b0207b05e37f12f05bea",
"data": 100,
"price": 59
}
],
"_id": "5e13b0207b05e37f12f05be9",
"date_added": "2019-12-24T22:09:36.442Z"
},
{
"name": "Company 3",
"items": [
{
"_id": "5e13a4f87c370667c7e67d7d",
"data": 35,
"price": 39
},
{
"_id": "5e13a4f87c370667c7e67d7c",
"data": 60,
"price": 59
}
],
"_id": "5e13a4f87c370667c7e67d7b",
"date_added": "2020-01-04T21:22:00.832Z"
}
]
What I am trying:
await Companies.aggregate([
{ $unwind: '$subscriptions' },
{ $sort: { 'subscriptions.date_added': -1 } },
{ $group: { _id: '$_id' } }
])
Output:
[{"_id":"company1"},{"_id":"company3"}]
I think I filtered out the correct objects (the most recent ones) but I am only retrieving the _id of the root element, not the nested subscription objects. These id's might be used to identify the subscriptions, but I need also need the actual subscription items.
As all you want is latest subscription object, if that means if it's the last object being pushed to subscriptions array then, Try this :
Companies.aggregate([{$match : {$and :[{subscriptions: {$exists: true}}, {subscriptions: {$ne: []}}]}},
{$addFields :{subscriptions: {$arrayElemAt : ['$subscriptions',-1]} }}, {$project :{_id:0,subscriptions:1, name:1 }}])
Collection data :
/* 1 */
{
"_id" : "company1",
"name" : "Company 1",
"subscriptions" : [
{
"items" : [
{
"_id" : "5e13b0207b05e37f12f05beb",
"data" : 40.0,
"price" : 39.0
},
{
"_id" : "5e13b0207b05e37f12f05bea",
"data" : 100.0,
"price" : 59.0
}
],
"_id" : "5e13b0207b05e37f12f05be9",
"date_added" : "2019-12-24T22:09:36.442Z"
}
]
}
/* 2 */
{
"_id" : "company2",
"name" : "Company 2",
"subscriptions" : []
}
/* 3 */
{
"_id" : "company3",
"name" : "Company 3",
"subscriptions" : [
{
"items" : [
{
"_id" : "5e13a47d7c370667c7e67d7a",
"data" : 40.0,
"price" : 69.0
},
{
"_id" : "5e13a47d7c370667c7e67d79",
"data" : 20.0,
"price" : 39.0
}
],
"_id" : "5e13a47d7c370667c7e67d78",
"date_added" : "2019-12-24T21:19:57.804Z"
},
{
"items" : [
{
"_id" : "5e13a4f87c370667c7e67d7d",
"data" : 35.0,
"price" : 39.0
},
{
"_id" : "5e13a4f87c370667c7e67d7c",
"data" : 60.0,
"price" : 59.0
}
],
"_id" : "5e13a4f87c370667c7e67d7b",
"date_added" : "2020-01-04T21:22:00.832Z"
}
]
}
Result :
/* 1 */
{
"name" : "Company 1",
"subscriptions" : {
"items" : [
{
"_id" : "5e13b0207b05e37f12f05beb",
"data" : 40.0,
"price" : 39.0
},
{
"_id" : "5e13b0207b05e37f12f05bea",
"data" : 100.0,
"price" : 59.0
}
],
"_id" : "5e13b0207b05e37f12f05be9",
"date_added" : "2019-12-24T22:09:36.442Z"
}
}
/* 2 */
{
"name" : "Company 3",
"subscriptions" : {
"items" : [
{
"_id" : "5e13a4f87c370667c7e67d7d",
"data" : 35.0,
"price" : 39.0
},
{
"_id" : "5e13a4f87c370667c7e67d7c",
"data" : 60.0,
"price" : 59.0
}
],
"_id" : "5e13a4f87c370667c7e67d7b",
"date_added" : "2020-01-04T21:22:00.832Z"
}
}
In your solution, you need to accumulate subscriptions with $first operator and with $project you can get desired result.
db.companies.aggregate([
{
$unwind: "$subscriptions"
},
{
$sort: {
"subscriptions.date_added": -1
}
},
{
$group: {
_id: "$_id",
name: {
$first: "$name"
},
subscriptions: {
$first: "$subscriptions"
}
}
},
{
$project: {
_id: "$subscriptions._id",
name: "$name",
date_added: "$subscriptions.date_added",
items: "$subscriptions.items"
}
}
])
MongoPlayground

Mongodb aggregate with cond and query value

I'm new to mongodb. I need to know how it is possible to query item for set to the value with aggregate
Data
[
{
"_id" : "11111",
"parent_id" : "99",
"name" : "AAAA"
},
{
"_id" : "11112",
"parent_id" : "99",
"name" : "BBBB"
},
{
"_id" : "11113",
"parent_id" : "100",
"name" : "CCCC"
},
{
"_id" : "11114",
"parent_id" : "99",
"name" : "DDDD"
}
]
mongoshell
Assume $check is false
db.getCollection('test').aggregate(
[
{
"$group": {
"_id": "$id",
//...,
"item": {
"$last": {
"$cond": [
{"$eq": ["$check", true]},
"YES",
* * ANSWER **,
}
]
}
},
}
]
)
So i need the result for item is all the name contain with same parent_id as string of array
Expect result
[
{
"_id" : "11111",
"parent_id" : "99",
"name" : "AAAA",
"item" : ["AAAA","BBBB","DDDD"]
},
{
"_id" : "11112",
"parent_id" : "99",
"name" : "BBBB",
"item" : ["AAAA","BBBB","DDDD"]
},
{
"_id" : "11113",
"parent_id" : "100",
"name" : "CCCC",
"item" : ["CCCC"]
},
{
"_id" : "11114",
"parent_id" : "99",
"name" : "DDDD",
"item" : ["AAAA","BBBB","DDDD"]
}
]
Try this..
Sample live demo
db.collection.aggregate([
{
"$group": {
"_id": "$parent_id",
"item": {
"$push": "$name"
},
"data": {
"$push": {
"_id": "$_id",
"name": "$name"
}
}
}
},
{
"$unwind": "$data"
},
{
"$project": {
"_id": "$data._id",
"parent_id": "$_id",
"name": "$data.name",
"item": 1
}
}
])

how to sort an array in a nested array which is located under an object in mongodb

I have a collection data like below.
{
"name": "Devices",
"exten": {
"parameters": [{
"name": "Date",
"value": ["5","2"]
}, {
"name": "Time",
"value": ["2"]
}, {
"name": "Season",
"value": ["6"]
}
]
}
}
I want to take all data which is name "Devices" and sort by first index of "Value" which is parameter name is "Date"
ex: mongo will get
name = "devices"
exten.parameters.name = "Date"
will sort it by
exten.parameters.value[0]
in this example it will be sorted by "5".
below query returns 0 record.
db.brand.aggregate(
{ $match: {
"name" : "Devices"
}},
{ $unwind: "$exten.parameters" },
{ $match: {
'exten.parameters.name': 'Date'
}},
{ $sort: {
'exten.parameters.value': -1
}}
)
The following query can get us the expected output:
db.collection.aggregate([
{
$match:{
"name":"Devices"
}
},
{
$unwind:"$exten.parameters"
},
{
$match:{
"exten.parameters.name":"Date"
}
},
{
$project:{
"name":1,
"exten":1,
"firstParam":{
$arrayElemAt:["$exten.parameters.value",0]
}
}
},
{
$sort:{
"firstParam":1
}
},
{
$project:{
"firstParam":0
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5da02fb86472ba670fd8c159"),
"name" : "Devices",
"exten" : {
"parameters" : [
{
"name" : "Date",
"value" : [
"5",
"2"
]
},
{
"name" : "Date",
"value" : [
"2",
"7"
]
},
{
"name" : "Time",
"value" : [
"2"
]
},
{
"name" : "Season",
"value" : [
"6"
]
}
]
}
}
Output:
{
"_id" : ObjectId("5da02fb86472ba670fd8c159"),
"name" : "Devices",
"exten" : {
"parameters" : {
"name" : "Date",
"value" : [
"2",
"7"
]
}
}
}
{
"_id" : ObjectId("5da02fb86472ba670fd8c159"),
"name" : "Devices",
"exten" : {
"parameters" : {
"name" : "Date",
"value" : [
"5",
"2"
]
}
}
}

MongoDb aggregation query with $group and $push into subdocument

I have a question regarding the $group argument of MongoDb aggregations. My data structure looks as follows:
My "Event" collection contains this single document:
{
"_id": ObjectId("mongodbobjectid..."),
"name": "Some Event",
"attendeeContainer": {
"min": 0,
"max": 10,
"attendees": [
{
"type": 1,
"status": 2,
"contact": ObjectId("mongodbobjectidHEX1")
},
{
"type": 7,
"status": 4,
"contact": ObjectId("mongodbobjectidHEX2")
}
]
}
}
My "Contact" collection contains these documents:
{
"_id": ObjectId("mongodbobjectidHEX1"),
"name": "John Doe",
"age": 35
},
{
"_id": ObjectId("mongodbobjectidHEX2"),
"name": "Peter Pan",
"age": 60
}
What I want to do is perform an aggregate query on the "Event" collection and get the following result with full "contact" data:
{
"_id": ObjectId("mongodbobjectid..."),
"name": "Some Event",
"attendeeContainer": {
"min": 0,
"max": 10,
"attendees": [
{
"type": 1,
"status": 2,
"contact": {
"_id": ObjectId("mongodbobjectidHEX1"),
"name": "John Doe",
"age": 35
}
},
{
"type": 7,
"status": 4,
"contact": {
"_id": ObjectId("mongodbobjectidHEX2"),
"name": "Peter Pan",
"age": 60
}
}
]
}
}
The arguments I am using right now look as follows (shortened version):
"$unwind" : "$attendeeContainer.attendees",
"$lookup" : { "from" : "contactinfo", "localField" : "attendeeContainer.attendees.contact","foreignField" : "_id", "as" : "contactInfo" },
"$unwind" : "$contactInfo",
"$group" : { "_id": "$_id",
"name": { "$first" : "$name" },
...
"contact": { "$push": { "contact": "$contactInfo"} }
}
However, this leads to the "contact" array being on "Event" level (because of the grouping) instead of one document of the array being at each "attendeeContainer.attendees". How can I push the "contact" array to be at "attendeeContainer.attendees"? (as shown in the desired output above)
I tried things like:
"attendeeContainer.attendees.contact": { "$push": { "contact": "$contactInfo"} }
But mongodb apparently does not allow "." at $group stage.
Try running the following aggregation pipeline, the key is using a final $project pipeline to create the attendeeContainer subdocument:
db.event.aggregate([
{ "$unwind": "$attendeeContainer.attendees" },
{
"$lookup" : {
"from" : "contactinfo",
"localField" : "attendeeContainer.attendees.contact",
"foreignField" : "_id",
"as" : "attendeeContainer.attendees.contactInfo"
}
},
{ "$unwind": "$attendeeContainer.attendees.contactInfo" },
{
"$group": {
"_id" : "$_id",
"name": { "$first": "$name" },
"min" : { "$first": "$attendeeContainer.min" },
"max" : { "$first": "$attendeeContainer.max" },
"attendees": { "$push": "$attendeeContainer.attendees" }
}
},
{
"$project": {
"name": 1,
"attendeeContainer.min": "$min",
"attendeeContainer.max": "$min",
"attendeeContainer.attendees": "$attendees"
}
}
])
Debugging Tips
Debugging the pipeline at the 4th stage, you would get the result
db.event.aggregate([
{ "$unwind": "$attendeeContainer.attendees" },
{
"$lookup" : {
"from" : "contactinfo",
"localField" : "attendeeContainer.attendees.contact",
"foreignField" : "_id",
"as" : "attendeeContainer.attendees.contactInfo"
}
},
{ "$unwind": "$attendeeContainer.attendees.contactInfo" },
{
"$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"min" : { "$first": "$attendeeContainer.min" },
"max" : { "$first": "$attendeeContainer.max" },
"attendees": { "$push": "$attendeeContainer.attendees" }
}
}/*,
{
"$project": {
"name": 1,
"attendeeContainer.min": "$min",
"attendeeContainer.max": "$min",
"attendeeContainer.attendees": "$attendees"
}
}*/
])
Pipeline result
{
"_id" : ObjectId("582c789282a9183adc0b53f5"),
"name" : "Some Event",
"min" : 0,
"max" : 10,
"attendees" : [
{
"type" : 1,
"status" : 2,
"contact" : ObjectId("582c787682a9183adc0b53f3"),
"contactInfo" : {
"_id" : ObjectId("582c787682a9183adc0b53f3"),
"name" : "John Doe",
"age" : 35
}
},
{
"type" : 7,
"status" : 4,
"contact" : ObjectId("582c787682a9183adc0b53f4"),
"contactInfo" : {
"_id" : ObjectId("582c787682a9183adc0b53f4"),
"name" : "Peter Pan",
"age" : 60
}
}
]
}
and the final $project pipeline will give you the desired result:
db.event.aggregate([
{ "$unwind": "$attendeeContainer.attendees" },
{
"$lookup" : {
"from" : "contactinfo",
"localField" : "attendeeContainer.attendees.contact",
"foreignField" : "_id",
"as" : "attendeeContainer.attendees.contactInfo"
}
},
{ "$unwind": "$attendeeContainer.attendees.contactInfo" },
{
"$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"min" : { "$first": "$attendeeContainer.min" },
"max" : { "$first": "$attendeeContainer.max" },
"attendees": { "$push": "$attendeeContainer.attendees" }
}
},
{
"$project": {
"name": 1,
"attendeeContainer.min": "$min",
"attendeeContainer.max": "$min",
"attendeeContainer.attendees": "$attendees"
}
}/**/
])
Desired/Actual Output
{
"_id" : ObjectId("582c789282a9183adc0b53f5"),
"name" : "Some Event",
"attendeeContainer" : {
"min" : 0,
"max" : 10,
"attendees" : [
{
"type" : 1,
"status" : 2,
"contact" : ObjectId("582c787682a9183adc0b53f3"),
"contactInfo" : {
"_id" : ObjectId("582c787682a9183adc0b53f3"),
"name" : "John Doe",
"age" : 35
}
},
{
"type" : 7,
"status" : 4,
"contact" : ObjectId("582c787682a9183adc0b53f4"),
"contactInfo" : {
"_id" : ObjectId("582c787682a9183adc0b53f4"),
"name" : "Peter Pan",
"age" : 60
}
}
]
}
}

MongoDB Aggregation Pipeline Optimization

I have an aggregation pipeline as mentioned below. Considering that there can be hundreds of thousands of documents on which I want to apply this pipeline, I have questions on how it can be optimized. Below are my questions:
1.) What is the most optimal index that be applied to the match query. Should I have an index = myArray.tags.tagKey, myArray.tags.tagValue, myArray.someRules.count, myArray.entryTime OR should I create separate indexes for each of these fields. What is the best way to do this?
2.) If you see my pipeline, I am applying match operator twice to filter the documents: Once in the beginning of pipeline to get the benefit of my index. Second, I am using it after unwinding operation to filter the specific array elements of the array "myArray". Is there any way I can combine them. In other words, can array elements be filtered using $match aggregation operator?
Here is an example of my document:
{
"_id" : "8001409457639",
"myArray" : [
{
"entryID" : ObjectId("5101ab1116b12614fb083022"),
"requestID" : "adb0d2asdfbe10",
"owner" : "ABC",
"version" : 1,
"requestType" : "1",
"someRules" : {
"count" : 0,
"somethingElse" : 11
},
"entryTime" : ISODate("2015-09-22T19:25:19.014Z"),
"tags" : [
{
"tagKey" : "Owner",
"tagValue" : "ABC"
},
{
"tagKey" : "Request Type",
"tagValue" : "1"
}
]
},
{
"entryID" : ObjectId("5101a111c6b12614fb083022"),
"requestID" : "fc057asdf16480",
"owner" : "ABC",
"version" : 1,
"requestType" : "1",
"someRules" : {
"count" : 10,
"somethingElse" : 0
},
"entryTime" : ISODate("2015-09-22T19:44:26.558Z"),
"tags" : [
{
"tagKey" : "Owner",
"tagValue" : "ABC"
},
{
"tagKey" : "Request Type",
"tagValue" : "1"
}
]
},
{
"entryID" : ObjectId("5101b111c6b12614fb083011"),
"requestID" : "40c7a0ads2dd8c2",
"owner" : "ABC",
"version" : 1,
"requestType" : "1",
"someRules" : {
"count" : 10,
"somethingElse" : 0
},
"entryTime" : ISODate("2015-09-22T20:24:15.347Z"),
"tags" : [
{
"tagKey" : "Owner",
"tagValue" : "ABC"
},
{
"tagKey" : "Request Type",
"tagValue" : "1"
}
]
}
],
"lockAcquiredBy" : "james",
"lockStartTime" : ISODate("2015-11-18T22:36:05.266Z")
}
This is my aggregation pipeline:
[{
"$match": {
"$and": [{
"$or": [{
"myArray.tags": {
"$elemMatch": {
"tagKey": "Owner",
"tagValue": "ABC"
}
}
}, {
"myArray.tags": {
"$elemMatch": {
"tagKey": "Owner",
"tagValue": "DEF"
}
}
}]
}, {
"$or": [{
"myArray.tags": {
"$elemMatch": {
"tagKey": "Request Type",
"tagValue": "4"
}
}
}, {
"myArray.tags": {
"$elemMatch": {
"tagKey": "Request Type",
"tagValue": "Retry"
}
}
}]
}],
"myArray.someRules.count": 0,
"myArray.entryTime": {
"$gte": {
"$date": "2016-01-05T01:59:07.763Z"
}
}
}
}, {
"$unwind": "$myArray"
}, {
"$match": {
"$and": [{
"$or": [{
"myArray.tags": {
"$elemMatch": {
"tagKey": "Owner",
"tagValue": "ABC"
}
}
}, {
"myArray.tags": {
"$elemMatch": {
"tagKey": "Owner",
"tagValue": "DEF"
}
}
}]
}, {
"$or": [{
"myArray.tags": {
"$elemMatch": {
"tagKey": "Request Type",
"tagValue": "4"
}
}
}, {
"myArray.tags": {
"$elemMatch": {
"tagKey": "Request Type",
"tagValue": "Retry"
}
}
}]
}],
"myArray.someRules.count": 0,
"myArray.entryTime": {
"$gte": {
"$date": "2016-01-05T01:59:07.763Z"
}
}
}
}
// More steps in the pipeline.
]