Aggregation: Grouping based on array element in MongoDB - mongodb

I am new to MongoDB, trying to write an aggregation function such that my output for the input should be same as below
[
{
"_id": {
"month": 1,
"year": 2022
},
"childServices": [
{"service":"MCT Latency", "sli":99.9},
{"service":"MCT Packet Loss", "sli":99.9}
],
"service": "Network"
},
{
"_id": {
"month": 2,
"year": 2022
},
"childServices": [
{"service":"MCT Latency", "sli":98.9},
{"service":"MCT Packet Loss", "sli":99.9}
]
"service": "Network",
}
]
Tried with below, but it's not grouping each childService by date.
[{
$unwind: {
path: '$childServices'
}
}, {
$group: {
_id: {
month: {
$month: '$date'
},
year: {
$year: '$date'
}
},
service: {
$first: '$service'
},
childServices: {
$first: '$childServices.service'
},
sli: {
$avg: '$childServices.availability'
}
}
}, {
$sort: {
'_id.month': 1,
'_id.year': 1
}
}]
SAMPLE DATA
[{
"_id": {
"$oid": "62fc99c00f5b1cb61d5f1072"
},
"service": "Network",
"date": "01/02/2022 00:32:51",
"childServices": [
{
"service": "MCT Latency",
"availability": 99.9,
},
{
"service": "MCT Packet Loss",
"availability": 99.9,
}
},
{
"_id": {
"$oid": "62fc99df0f5b1cb61d5f1073"
},
"service": "Network",
"date": "02/02/2022 00:32:51",
"childServices": [
{
"service": "MCT Latency",
"availability": 98.3,
},
"service": "MCT Packet Loss",
"availability": 99.9,
}
}
]
Basically, I want to get into the childService > pick each service > group them by month+year and get their monthly avg.

Convert the date from a string to a date type, before grouping, like this:
db.collection.aggregate([
{
$unwind: {
path: "$childServices"
}
},
{
$addFields: {
date: {
"$toDate": "$date"
}
}
},
{
$group: { <---- Here we are grouping the data for each distinct combination of month, year and child service. This needs to be done because we are using $first accumulator
_id: {
month: {
$month: "$date"
},
year: {
$year: "$date"
},
service: "$childServices.service"
},
service: {
$first: "$service"
},
childServices: {
$first: "$childServices.service"
},
sli: {
$avg: "$childServices.availability"
}
}
},
{
"$group": { <-- In this group, we groupBy month and year, and we push the child services record into an array, using $push. This gives us, for every month and year, the average of all distinct childservices
"_id": {
month: "$_id.month",
year: "$_id.year"
},
"childServices": {
"$push": {
service: "$childServices",
sli: "$sli"
}
}
}
},
{
$sort: {
"_id.month": 1,
"_id.year": 1
}
}
])
Playground link.

Related

Group on field while getting the last document for each field with MongoDB

Problem
I'm trying to group a stock inventory by products. At first, my stock entries was fully filled each time so I made this aggregate:
[
{ $sort: { date: 1 } },
{
$group: {
_id: '$userId',
stocks: { $last: '$stocks' },
},
},
{ $unwind: '$stocks' },
{
$group: {
_id: '$stocks.productId',
totalQuantity: { $sum: '$stocks.quantity' },
stocks: { $push: { userId: '$_id', quantity: '$stocks.quantity' } },
},
},
]
Now, it can be possible that a stock entry doesn't contain all the products filled. So I'm stuck while writing the new aggregate.
Basically I need to group every products by productId and have an array of the last entry for each user.
Output
This is my expected output:
[
{
"_id": ObjectId("5e75eae1359fc8159d5b6073"),
"totalQuantity": 33,
"stocks": [
{
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"lastDate": "2020-03-21T11:45:53.077Z",
"quantity": 33
}
]
},
{
"_id": ObjectId("5e75eaea359fc8159d5b6074"),
"totalQuantity": 2,
"stocks": [
{
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"lastDate": "2020-03-21T11:45:53.077Z",
"quantity": 2
}
]
}
]
Documents
Documents (when fully filled):
{
"_id": ObjectId("5e75fe71e4a3e0323ba47e0a"),
"date": "2020-03-21T11:45:53.077Z",
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"stocks": [
{
"productId": ObjectId("5e75eae1359fc8159d5b6073"),
"quantity": 33
},
{
"productId": ObjectId("5e75eaea359fc8159d5b6074"),
"quantity": 2
}
]
}
Sometimes it won't be filled for the whole inventory (that's why I need the lastDate):
{
"_id": ObjectId("5e75fe71e4a3e0323ba47e0a"),
"date": "2020-03-21T11:45:53.077Z",
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"stocks": [
{
"productId": ObjectId("5e75eae1359fc8159d5b6073"),
"quantity": 33
}
]
}
Try this one:
db.collection.aggregate([
{
$group: {
_id: "$userId",
root: {
$push: "$$ROOT"
}
}
},
{
$addFields: {
root: {
$map: {
input: "$root",
as: "data",
in: {
"stocks": {
$map: {
input: "$$data.stocks",
as: "stock",
in: {
"productId": "$$stock.productId",
"userId": "$$data.userId",
"quantity": "$$stock.quantity",
"lastDate": "$$data.date"
}
}
}
}
}
}
}
},
{
$unwind: "$root"
},
{
$replaceRoot: {
newRoot: "$root"
}
},
{
$unwind: "$stocks"
},
{
$sort: {
"stocks.lastDate": 1
}
},
{
$group: {
_id: "$stocks.productId",
totalQuantity: {
$last: "$stocks.quantity"
},
stocks: {
$last: "$stocks"
}
}
},
{
$addFields: {
stocks: [
{
"lastDate": "$stocks.lastDate",
"quantity": "$stocks.quantity",
"userId": "$stocks.userId"
}
]
}
}
])
MongoPlayground

Mongodb - Add extra fields based on other field value

I have documents in db in the following format:
{
"_id" : ObjectId("5d6fb50852020c4a182fc773"),
"startTimestamp" : "1567601927157"
}
What I want to achieve is, using the "startTimestamp" value, create the following new fields:
date (in the format "04-09-2019")
hour (like "18")
month (like "9")
time (like "18:28:47")
weekDay (like "Wednesday")
Can I get a query to do the above operation in all the documents and finally create respective documents in the following format:
{
"startTimestamp" : "1567601927157",
"date" : "04-09-2019",
"hour" : "18",
"month" : "9",
"time" : "18:28:47",
"weekDay" : "Wednesday",
}
Edit:
"startTimestamp" is not the only field present in the documents, it has other fields as well, like below:
{
"useCaseStatus" : "In Progress",
"feedbackRequested" : false,
"userFeedback" : null,
"startTimestamp" : "1567669352778"
}
By adding new fields to the above document, I dont want to delete the fields that are already present(because all the solutions I have got so far removes the other fields present in the documents). Also, adding one more expected document below (Please note that hour and month fields are in string format, not int):
{
"useCaseStatus" : "In Progress",
"feedbackRequested" : false,
"userFeedback" : null,
"startTimestamp" : "1567669352778",
"endTimestamp" : null,
"date" : "05-09-2019",
"hour" : "13",
"month" : "9",
"time" : "13:12:32",
"weekDay" : "Thursday"
}
You can use below aggregation
db.collection.aggregate([
{ "$replaceRoot": {
"newRoot": {
"$let": {
"vars": { "date": { "$toDate": { "$toLong": "$startTimestamp" } } },
"in": {
"$mergeObjects": [
{
"date": { "$dateToString": { "date": "$$date", "format": "%d-%m-%Y" } },
"month": { "$toString": { "$month": "$$date" } },
"hour": { "$toString": { "$hour": "$$date" } },
"time": { "$dateToString": { "date": "$$date", "format": "%H-%M-%S" } },
"weekDay": { "$dayOfWeek": "$$date" }
},
"$$ROOT"
]
}
}
}
}},
{ "$out": "collectionName" }
])
Output
{
"date": "04-09-2019",
"hour": 12,
"month": 9,
"startTimestamp": "1567601927157",
"time": "12-58-47",
"weekDay": 4
}
You need to start with $toLong and $toDate to parse your string. Then you can use $dateToParts and $dayOfWeek. To translate number into string you can use $switch
db.collection.aggregate([
{
$addFields: {
date: {
$toDate: {
$toLong: "$startTimestamp"
}
}
}
},
{
$addFields: {
dateParts: { $dateToParts: { date: "$date" } },
dayOfWeek: { $dayOfWeek: "$date" }
}
},
{
$project: {
startTimestamp: 1,
date: { $dateToString: { date: "$date", format: "%d-%m-%Y" } },
hour: "$dateParts.hour",
month: "$dateParts.month",
time: { $dateToString: { date: "$date", format: "%H:%M:%S" } },
weekDay: {
$switch: {
branches: [
{ case: { $eq: [ "$dayOfWeek", 1 ] }, then: "Sunday" },
{ case: { $eq: [ "$dayOfWeek", 2 ] }, then: "Monday" },
{ case: { $eq: [ "$dayOfWeek", 3 ] }, then: "Tuesday" },
{ case: { $eq: [ "$dayOfWeek", 4 ] }, then: "Wednesday" },
{ case: { $eq: [ "$dayOfWeek", 5 ] }, then: "Thursday" },
{ case: { $eq: [ "$dayOfWeek", 6 ] }, then: "Friday" }
],
default: "Saturday"
}
}
}
}
])
Mongo Playground
You need to implement aggregate pipeline and use date operators available but as you have millisecond saved in string first we have to convert it to int then date then perfrom date operators notice some of them will need timezone to give accurate result instead will just give utc results
db.collection.aggregate([
{
$addFields: {
longMillis: {
$toLong: "$startTimestamp"
}
}
},
{
$project: {
startTimestamp: 1,
"date": {
"$add": [
new Date(0),
"$longMillis"
]
}
}
},
{
$project: {
startTimestamp: 1,
month: {
$month: "$date"
},
day: {
$switch: {
branches: [
{
case: {
$eq: [
{
$dayOfMonth: "$date"
},
1
]
},
then: "Sunday"
},
{
case: {
$eq: [
{
$dayOfMonth: "$date"
},
2
]
},
then: "Monday"
},
{
case: {
$eq: [
{
$dayOfMonth: "$date"
},
3
]
},
then: "Tuesday"
},
{
case: {
$eq: [
{
$dayOfMonth: "$date"
},
4
]
},
then: "Wednesday"
},
{
case: {
$eq: [
{
$dayOfMonth: "$date"
},
5
]
},
then: "Thursday"
},
{
case: {
$eq: [
{
$dayOfMonth: "$date"
},
6
]
},
then: "Friday"
},
{
case: {
$eq: [
{
$dayOfMonth: "$date"
},
7
]
},
then: "Saturday"
},
],
default: 6
}
},
hour: {
$hour: {
"date": "$date",
"timezone": "+05:30"
}
},
date: {
$dateToString: {
format: "%d-%m-%Y",
date: "$date"
}
},
time: {
$dateToString: {
format: "%H:%M:%S",
date: "$date",
timezone: "+05:30"
}
},
}
}
])
Giving result:
[
{
"date": "04-09-2019",
"day": "Wednesday",
"hour": 18,
"month": 9,
"startTimestamp": "1567601927157",
"time": "18:28:47"
}
]

Mongo groupby date inside array

I have collection in my db as,
[
{
"groupName" : "testName",
"participants" : [
{
"participantEmail" : "test#test.com",
"lastClearedDate" : 12223213123
},
{
"participantEmail" : "test2#test.com",
"lastClearedDate" : 1234343243423
}
],
"messages" : [
{
"message":"sdasdasdasdasdasd",
"time":22312312312,
"sender":"test#test.com"
},
{
"message":"gfdfvd dssdfdsfs",
"time":2231231237789,
"sender":"test#test.com"
}
]
}
]
This is a collection of group which contains all the participants and messages in that group.
The time field inside the message is Timestamp.
I want get all the messages inside a group which are posted after the given date and grouped by date.
I wrote the following code,
ChatGroup.aggregate([
{ $match: { group_name: groupName } },
{ $unwind: "$messages" },
{ $match: { "messages.time": { $gte: messagesFrom } } },
{
$project: {
_id: 0,
y: {
$year: {
$add: [new Date(0), { $multiply: [1000, "$messages.time"] }]
}
},
m: {
$month: {
$add: [new Date(0), { $multiply: [1000, "$messages.time"] }]
}
},
d: {
$dayOfMonth: {
$add: [new Date(0), { $multiply: [1000, "$messages.time"] }]
}
}
}
},
{
$group: {
_id: {
year: "$y",
month: "$m",
day: "$d"
},
messages: { $push: "$messages" },
count: { $sum: 1 }
}
}
]).then(
group => {
console.log("length of messages", group);
resolve(group);
},
err => {
console.log(err);
}
);
});
and I getting the following output,
[
{
"_id": {
"year": 50694,
"month": 9,
"day": 5
},
"messages": [],
"count": 3
},
{
"_id": {
"year": 50694,
"month": 8,
"day": 27
},
"messages": [],
"count": 1
},
{
"_id": {
"year": 50694,
"month": 8,
"day": 26
},
"messages": [],
"count": 10
}
]
I am not getting the messages but the count is correct.
Also the time which is displayed in the result is incorrect e.g. year, date and month.
Mongo version is 3.2.
I referred the groupby and push documentation from mongodb along with other stackoverflow questions on mongo group by.
What am I doing wrong?
Your timestamp is already in seconds. So, you don't need to convert them to millisecond by multiplying with 1000.
So your final query should be something like this
ChatGroup.aggregate([
{ "$match": {
"group_name": groupName,
"messages.time": { "$gte": messagesFrom }
}},
{ "$unwind": "$messages" },
{ "$match": { "messages.time": { "$gte": messagesFrom }}},
{ "$group": {
"_id": {
"year": { "$year": { "$add": [new Date(0), "$messages.time"] }},
"month": { "$month": { "$add": [new Date(0), "$messages.time"] }},
"day": { "$dayOfMonth": { "$add": [new Date(0), "$messages.time"] }}
},
"messages": { "$push": "$messages" },
"count": { "$sum": 1 }
}}
])
Add messages in $project
{
$project: {
_id: 0,
messages : 1,
.........
},
}

mongodb aggregation with multiple sub groups

I have a collection with documents that look similar to this:
[
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeA",
"color": "ColorA",
"soldFor": 12.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeA",
"color": "ColorB",
"soldFor": 13.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeB",
"color": "ColorA",
"soldFor": 12.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeB",
"color": "ColorB",
"soldFor": 12.15
}
]
I know that this is not a good way to store such information, but unfortunately I have no influence in that.
What I need to get out of the collection is something like this:
[
2017: {
typeA: {
colorA: {
sum: 125.00
},
colorB: {
sum: 110.00
}
},
typeB: {
colorA: {
sum: 125.000
}
}
},
2016: {
typeA: {
colorB: {
sum: 125.000
}
}
}
]
At the moment I have two group stages that give me everything grouped by year, but I have no clue how to get the two other sub-groups. Building the sum would be a nice to have, but I am certain that I can figure out how that would be done in a group.
So far my pipeline looks like this:
[
{
$group: {
_id: { type: '$type', color: '$color', year: { $year: '$date' } },
docs: {
$push: '$$ROOT'
}
}
},
{
$group: {
_id: { year: '$_id.year' },
docs: {
$push: '$$ROOT'
}
}
}
]
which results in something like this:
[
{
"_id": {
"year": 2006
},
"docs": {
"_id": {
"type": "typeA",
"color": "colorA",
"year": 2006
},
"docs": [
{
... root document
}
]
}
},
{
"_id": {
"year": 2016
},
"docs": [
{
"_id": {
"type": "typeA",
"color": "colorB",
"year": 2016
},
"docs": [
{
... root document
}
]
}
... more docs with three keys in id
]
}
]
Help is much appreciated!
Using a cohort of operators found in MongoDB 3.4.4 and newer, i.e. $addFields, $arrayToObject and $replaceRoot, you can compose a pipeline like the following to get the desired result:
[
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"type": "$type",
"color": "$color"
},
"count": { "$sum": "$soldFor" }
} },
{ "$group": {
"_id": {
"year": "$_id.year",
"type": "$_id.type"
},
"counts": {
"$push": {
"k": "$_id.color",
"v": { "sum": "$count" }
}
}
} },
{ "$addFields": {
"counts": { "$arrayToObject": "$counts" }
} },
{ "$group": {
"_id": "$_id.year",
"counts": {
"$push": {
"k": "$_id.type",
"v": "$counts"
}
}
} },
{ "$addFields": {
"counts": { "$arrayToObject": "$counts" }
} },
{ "$group": {
"_id": null,
"counts": {
"$push": {
"k": { "$substr": ["$_id", 0, -1 ]},
"v": "$counts"
}
}
} },
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "$arrayToObject": "$counts" },
"$$ROOT"
]
}
} },
{ "$project": { "counts": 0 } }
]

Group by day/month/week basis on the date range

This is in reference to this question.
This is my data set:
[
{
"rating": 4,
"ceatedAt": ISODate("2016-08-08T15:32:41.262+0000")
},
{
"rating": 3,
"createdAt": ISODate("2016-08-08T15:32:41.262+0000")
},
{
"rating": 3,
"ceatedAt": ISODate("2016-07-01T15:32:41.262+0000")
},
{
"rating": 5,
"createdAt": ISODate("2016-07-01T15:32:41.262+0000")
}
]
I want to be able to filter basis on week or month basis on the date range.
How would I do that in mongo?
This was the answer given for grouping by days.
db.collection.aggregate([
{
"$project": {
"formattedDate": {
"$dateToString": { "format": "%Y-%m-%d", "date": "$ceatedAt" }
},
"createdAtMonth": { "$month": "$ceatedAt" },
"rating": 1
}
},
{
"$group": {
"_id": "$formattedDate",
"average": { "$avg": "$rating" },
"month": { "$first": "$createdAtMonth" },
}
}
])
For grouping on weekly basis, run the following pipeline which mainly uses the Date Aggregation Operators to extract the date parts:
db.collection.aggregate([
{
"$project": {
"createdAtWeek": { "$week": "$createdAt" },
"createdAtMonth": { "$month": "$createdAt" },
"rating": 1
}
},
{
"$group": {
"_id": "$createdAtWeek",
"average": { "$avg": "$rating" },
"month": { "$first": "$createdAtMonth" }
}
}
])
and for monthly aggregates, interchange the $group key to use the created month field:
db.collection.aggregate([
{
"$project": {
"createdAtWeek": { "$week": "$createdAt" },
"createdAtMonth": { "$month": "$createdAt" },
"rating": 1
}
},
{
"$group": {
"_id": "$createdAtMonth",
"average": { "$avg": "$rating" },
"week": { "$first": "$createdAtWeek" }
}
}
])