MongoDB query based on last query by max date - mongodb

I have the following collection
{
_id: ObjectId(...),
consumerId: "...",
dealId: "...",
status: "...",
stage: 0,
createdDate: 2020-09-06T00:17:16.719+00:00
}
I would like to figure out how to get distinct documents given a specific consumerId and status not equal to say "x" or "y" based on the latest createdDate. Also what would be the best indexes of fields for the collection?

You can use $match to apply the condition. $sort helps to do descending order of createdDate. Then groups helps to give the distinct data with the help of $first
[
{
$match: {
$expr: {
$and: [
{$eq: ["$consumerId","1"]},
{
$not: {
$in: ["$status",["x","y"]]
}
}
]
}
}
},
{
$sort: {
createdDate: -1
}
},
{
$group: {
_id: "$consumerId",
consumerId: { $first: "$consumerId" },
dealId: { $first: "$dealId" },
status: { $first: "$status" },
stage: { $first: "$stage" },
createdDate: { $first: "$createdDate" }
}
}
]
Working Mongo playground

Related

Mongodb aggregate grouping elements of an array type field

I have below data in my collection:
[
{
"_id":{
"month":"Jan",
"year":"2022"
},
"products":[
{
"product":"ProdA",
"status":"failed",
"count":15
},
{
"product":"ProdA",
"status":"success",
"count":5
},
{
"product":"ProdB",
"status":"failed",
"count":20
},
{
"product":"ProdB",
"status":"success",
"count":10
}
]
},
...//more such data
]
I want to group the elements of products array on the name of the product, so that we have record of how what was the count of failure of success of each product in each month. Every record is guaranteed to have both success and failure count each month. The output should look like below:
[
{
"_id":{
"month":"Jan",
"year":"2022"
},
"products":[
{
"product":"ProdA","status":[{"name":"success","count":5},{"name":"failed","count":15}]
},
{
"product":"ProdB","status":[{"name":"success","count":10},{"name":"failed","count":20}]
}
]
},
...//data for succeeding months
]
I have tried to do something like this:
db.collection.aggregate([{ $unwind: "$products" },
{
$group: {
"_id": {
month: "$_id.month",
year: "$_id.year"
},
products: { $push: { "product": "$product", status: { $push: { name: "$status", count: "$count" } } } }
}
}]);
But above query doesn't work.
On which level I need to group fields so as to obtain above output.
Please help me to find out what I am doing wrong.
Thank You!
Your first group stage needs to group by both the _id and the product name, aggregate a list of status counts and then another group stage which then forms the products list:
db.collection.aggregate([
{$unwind: "$products"},
{$group: {
_id: {
id: "$_id",
product: "$products.product",
},
status: {
$push: {
name: "$products.status",
count: "$products.count"
}
}
}
},
{$group: {
_id: "$_id.id",
products: {
$push: {
product: "$_id.product",
status: "$status"
}
}
}
}
])
Mongo Playground

How to group mongodb aggregate array of objects data To sum numbers on the same date

I'm trying to create charts but I can't combine the data to be like "Expected result below"
What can I use to return "Expected result".
I tried using $group in $group and $reduce and it didn't work well.
I hope someone can help me solve this task
Current result is
[
{
"_id":"5fd4c3586e83b334d97c5218",
"consumption":5,
"charts":[
{
"date":"2020-10",
"consumption":1
},
{
"date":"2020-10",
"consumption":1
},
{
"date":"2020-11",
"consumption":1
},
{
"date":"2020-11",
"consumption":1
}
]
}
]
Expected result is
[
{
"_id":"5fd4c3586e83b334d97c5218",
"consumption":5,
"charts":[
{
"date":"2020-10",
"consumption":2
},
{
"date":"2020-11",
"consumption":2
},
}
]
You need to go with aggregations
db.collection.aggregate([
{ $unwind: "$charts" },
{
$group: {
_id: { _id: "$_id", month: "$charts.date.month", year: "$charts.date.year" },
consumption: { $first: "$consumption" },
total: { $sum: "$charts.consumption" },
date: { $first: "$charts.date" }
}
},
{
$group: {
_id: "$_id._id",
consumption: { $first: "$consumption" },
charts: {
$push: {
date: "$date",
consumption: "$total"
}
}
}
}
])
Working Mongo playground

Referencing root _id in aggregate lookup match expression not working

This is my first experience using aggregate pipeline. I'm not able to get a "$match" expression to work inside the pipeline. If I remove the "_id" match, I get every document in the collection past the start date, but once I add the $eq expression, it returns empty.
I read a lot of other examples and tried many different ways, and this seems like it is correct. But the result is empty.
Any suggestions?
let now = new Date()
let doc = await Team.aggregate([
{ $match: { created_by: mongoose.Types.ObjectId(req.params.user_oid)} },
{ $sort: { create_date: 1 } },
{ $lookup: {
from: 'events',
let: { "team_oid": "$team_oid" },
pipeline: [
{ $addFields: { "team_oid" : { "$toObjectId": "$team_oid" }}},
{ $match: {
$expr: {
$and: [
{ $gt: [ "$start", now ] },
{ $eq: [ "$_id", "$$team_oid" ] }
]
},
}
},
{
$sort: { start: 1 }
},
{
$limit: 1
}
],
as: 'events',
}},
{
$group: {
_id: "$_id",
team_name: { $first: "$team_name" },
status: { $first: "$status" },
invited: { $first: "$invited" },
uninvited: { $first: "$uninvited" },
events: { $first: "$events.action" },
dates: { $first: "$events.start" } ,
team_oid: { $first: "$events.team_oid" }
}
}])
Example Docs (added by request)
Events:
_id:ObjectId("60350837c57b3a15a414d265")
invitees:null
accepted:null
sequence:7
team_oid:ObjectId("60350837c57b3a15a414d263")
type:"Calendar Invite"
action:"Huddle"
status:"Questions Issued"
title:"Huddle"
body:"This is a Huddle; you should receive new questions 5 days befor..."
creator_oid:ObjectId("5ff9e50a206b1924dccd691e")
start:2021-02-26T07:00:59.999+00:00
end:2021-02-26T07:30:59.999+00:00
__v:0
Team:
_id:ObjectId("60350837c57b3a15a414d263")
weekly_schedule:1
status:"Live"
huddle_number:2
reminders:2
active:true
created_by:ObjectId("5ff9e50a206b1924dccd691e")
team_name:"tESTI"
create_date:2021-02-23T13:50:47.172+00:00
__v:0
This is just a guess since you don't have schema in your question. But it looks like your have some of your _ids mixed up. Where you are currently trying to $match events whose _id is equal to a team_oid. Rather than the event's team_oid field being equal to the current 'team' _id.
I'm pretty confident this will produce the correct output. If you post any schema or sample docs I will edit it.
https://mongoplayground.net/p/5i1w2Ii7KCR
let now = new Date()
let doc = await Team.aggregate([
{ $match: { created_by: mongoose.Types.ObjectId(req.params.user_oid)} },
{ $sort: { create_date: 1 } },
{ $lookup: {
from: 'events',
// Set tea_oid as the current team _id
let: { "team_oid": "$_id" },
pipeline: [
{ $match: {
$expr: {
$and: [
{ $gt: [ "$start", now ] },
// Match events whose 'team_oid' field matches the 'team' _id set above
{ $eq: [ "$team_oid", "$$team_oid" ] }
]
},
}
},
{
$sort: { start: 1 }
},
{
$limit: 1
}
],
as: 'events',
}},
{
$group: {
_id: "$_id",
team_name: { $first: "$team_name" },
status: { $first: "$status" },
invited: { $first: "$invited" },
uninvited: { $first: "$uninvited" },
events: { $first: "$events.action" },
dates: { $first: "$events.start" } ,
team_oid: { $first: "$events.team_oid" }
}
}])

MongoDB - aggregating with nested objects, and changeable keys

I have a document which describes counts of different things observed by a camera within a 15 minute period. It looks like this:
{
"_id" : ObjectId("5b1a709a83552d002516ac19"),
"start" : ISODate("2018-06-08T11:45:00.000Z"),
"end" : ISODate("2018-06-08T12:00:00.000Z"),
"recording" : ObjectId("5b1a654683552d002516ac16"),
"data" : {
"counts" : {
"5b434d05da1f0e00252566be" : 12,
"5b434d05da1f0e00252566cc" : 4,
"5b434d05da1f0e00252566ca" : 1
}
}
}
The keys inside the data.counts object change with each document and refer to additional data that is fetched at a later date. There are unlimited number of keys inside data.counts (but usually about 20)
I am trying to aggregate all these 15 minute documents up to daily aggregated documents.
I have this query at the moment to do that:
db.getCollection("segments").aggregate([
{$match:{
"recording": ObjectId("5bf7f68ad8293a00261dd83f")
}},
{$project:{
"start": 1,
"recording": 1,
"data": 1
}},
{$group:{
_id: { $dateToString: { format: "%Y-%m-%d", date: "$start" } },
"segments": { $push: "$$ROOT" }
}},
{$sort: {_id: -1}},
]);
This does the grouping and returns all the segments in an array.
I want to also aggregate the information inside data.counts, so that I get the sum of values for all keys that are the same within the daily group.
This would save me from having another service loop through each 15 minute segment summing values with the same keys. E.g. the query would return something like this:
{
"_id" : "2019-02-27",
"counts" : {
"5b434d05da1f0e00252566be" : 351,
"5b434d05da1f0e00252566cc" : 194,
"5b434d05da1f0e00252566ca" : 111
... any other keys that were found within a day
}
}
How might I amend the query I already have, or use a different query?
Thanks!
You could use the $facet pipeline stage to create two sub-pipelines; one for segments and another for counts. These sub-pipelines can be joined by using $zip to stitch them together and $map to merge each 2-element array produced from zip. Note this will only work correctly if the sub-pipelines output sorted arrays of the same size, which is why we group and sort by start_date in each sub-pipeline.
Here's the query:
db.getCollection("segments").aggregate([{
$match: {
recording: ObjectId("5b1a654683552d002516ac16")
}
}, {
$project: {
start: 1,
recording: 1,
data: 1,
start_date: { $dateToString: { format: "%Y-%m-%d", date: "$start" }}
}
}, {
$facet: {
segments_pipeline: [{
$group: {
_id: "$start_date",
segments: {
$push: {
start: "$start",
recording: "$recording",
data: "$data"
}
}
}
}, {
$sort: {
_id: -1
}
}],
counts_pipeline: [{
$project: {
start_date: "$start_date",
count: { $objectToArray: "$data.counts" }
}
}, {
$unwind: "$count"
}, {
$group: {
_id: {
start_date: "$start_date",
count_id: "$count.k"
},
count_sum: { $sum: "$count.v" }
}
}, {
$group: {
_id: "$_id.start_date",
counts: {
$push: {
$arrayToObject: [[{
k: "$_id.count_id",
v: "$count_sum"
}]]
}
}
}
}, {
$project: {
counts: { $mergeObjects: "$counts" }
}
}, {
$sort: {
_id: -1
}
}]
}
}, {
$project: {
result: {
$map: {
input: { $zip: { inputs: ["$segments_pipeline", "$counts_pipeline"] }},
in: { $mergeObjects: "$$this" }
}
}
}
}, {
$unwind: "$result"
}, {
$replaceRoot: {
newRoot: "$result"
}
}])
Try it out here: Mongoplayground.

Retrieving a count that matches specified criteria in a $group aggregation

So I am looking to group documents in my collection on a specific field, and for the output results of each group, I am looking to include the following:
A count of all documents in the group that match a specific query (i.e. a count of documents that satisfy some expression { "$Property": "Value" })
The total number of documents in the group
(Bonus, as I suspect that this is not easily accomplished) Properties of a document that correspond to a $min/$max accumulator
I am very new to the syntax used to query in mongo and don't quite understand how it all works, but after some research, I've managed to get it down to the following query (please note, I am currently using version 3.0.12 for my mongo db, but I believe we will upgrade in a couple of months time):
db.getCollection('myCollection').aggregate(
[
{
$group: {
_id: {
GroupID: "$GroupID",
Status: "$Status"
},
total: { $sum: 1 },
GroupName: { $first: "$GroupName" },
EarliestCreatedDate: { $min: "$DateCreated" },
LastModifiedDate: { $max: "$LastModifiedDate" }
}
},
{
$group: {
_id: "$_id.GroupID",
Statuses: {
$push: {
Status: "$_id.Status",
Count: "$total"
}
},
TotalCount: { $sum: "$total" },
GroupName: { $first: "$GroupName" },
EarliestCreatedDate: { $min: "$EarliestCreatedDate" },
LastModifiedDate: { $max: "$LastModifiedDate" }
}
}
]
)
Essentially what I am looking to retrieve is the Count for specific Status values, and project them into one final result document that looks like the following:
{
GroupName,
EarliestCreatedDate,
EarliestCreatedBy,
LastModifiedDate,
LastModifiedBy,
TotalCount,
PendingCount,
ClosedCount
}
Where PendingCount and ClosedCount are the total number of documents in each group that have a status Pending/Closed. I suspect I need to use $project with some other expression to extract this value, but I don't really understand the aggregation pipeline well enough to figure this out.
Also the EarliestCreatedBy and LastModifiedBy are the users who created/modified the document(s) corresponding to the EarliestCreatedDate and LastModifiedDate respectively. As I mentioned, I think retrieving these values will add another layer of complexity, so if there is no practical solution, I am willing to forgo this requirement.
Any suggestions/tips would be very much appreciated.
You can try below aggregation stages.
$group
Calculate all the necessary counts TotalCount, PendingCount and ClosedCount for each GroupID
Calculate $min and $max for EarliestCreatedDate and LastModifiedDate respectively and push all the fields to CreatedByLastModifiedBy to be compared later for fetching EarliestCreatedBy and LastModifiedBy for each GroupID
$project
Project all the fields for response
$filter the EarliestCreatedDate value against the data in the CreatedByLastModifiedBy and $map the matching CreatedBy to the EarliestCreatedBy and $arrayElemAt to convert the array to object.
Similar steps for calculating LastModifiedBy
db.getCollection('myCollection').aggregate(
[{
$group: {
_id: "$GroupID",
TotalCount: {
$sum: 1
},
PendingCount: {
$sum: {
$cond: {
if: {
$eq: ["Status", "Pending"]
},
then: 1,
else: 0
}
}
},
ClosedCount: {
$sum: {
$cond: {
if: {
$eq: ["Status", "Closed "]
},
then: 1,
else: 0
}
}
},
GroupName: {
$first: "$GroupName"
},
EarliestCreatedDate: {
$min: "$DateCreated"
},
LastModifiedDate: {
$max: "$LastModifiedDate"
},
CreatedByLastModifiedBy: {
$push: {
CreatedBy: "$CreatedBy",
LastModifiedBy: "$LastModifiedBy",
DateCreated: "$DateCreated",
LastModifiedDate: "$LastModifiedDate"
}
}
}
}, {
$project: {
_id: 0,
GroupName: 1,
EarliestCreatedDate: 1,
EarliestCreatedBy: {
$arrayElemAt: [{
$map: {
input: {
$filter: {
input: "$CreatedByLastModifiedBy",
as: "CrBy",
cond: {
"$eq": ["$EarliestCreatedDate", "$$CrBy.DateCreated"]
}
}
},
as: "EaCrBy",
in: {
"$$EaCrBy.CreatedBy"
}
}
}, 0]
},
LastModifiedDate: 1,
LastModifiedBy: {
$arrayElemAt: [{
$map: {
input: {
$filter: {
input: "$CreatedByLastModifiedBy",
as: "MoBy",
cond: {
"$eq": ["$LastModifiedDate", "$$MoBy.LastModifiedDate"]
}
}
},
as: "LaMoBy",
in: {
"$$LaMoBy.LastModifiedBy"
}
}
}, 0]
},
TotalCount: 1,
PendingCount: 1,
ClosedCount: 1
}
}]
)
Update for Version < 3.2
$filter is also not available in your version. Below is the equivalent.
The comparison logic is the same and creates an array with for every non matching entry the value of false or LastModifiedBy otherwise.
Next step is to use $setDifference to compare the previous array values with array [false] which returns the elements that only exist in the first set.
LastModifiedBy: {
$setDifference: [{
$map: {
input: "$CreatedByLastModifiedBy",
as: "MoBy",
in: {
$cond: [{
$eq: ["$LastModifiedDate", "$$MoBy.LastModifiedDate"]
},
"$$MoBy.LastModifiedBy",
false
]
}
}
},
[false]
]
}
Add $unwind stage after $project stage to change to object
{$unwind:"$LastModifiedBy"}
Similar steps for calculating EarliestCreatedBy