mongo group by multiple times - mongodb

Lets asume I have this set of data:
{ ValidFrom: "2019-03-25T16:01:55.714+0000", ValidTo: "2019-03-25T16:01:55.714+0000" },
{ ValidFrom: "2019-03-26T16:01:55.714+0000", ValidTo: "2019-03-25T16:01:55.714+0000" },
{ ValidFrom: "2019-03-25T16:01:55.714+0000", ValidTo: "2019-03-27T16:01:55.714+0000" }
I would like to see this result with one query:
{ "Day": "2019-03-25", ValidFromCount: 2, ValidToCount: 2 },
{ "Day": "2019-03-26", ValidFromCount: 1, ValidToCount: 0 },
{ "Day": "2019-03-27", ValidFromCount: 0, ValidToCount: 1 }
Currently I wrote this aggregation but I am stuck now:
{
$addFields: {
ValidFromDay: { $dateToString: { format: "%Y-%m-%d", date: "$ValidFrom" } },
ValidUntilDay: { $dateToString: { format: "%Y-%m-%d", date: "$ValidUntil" } }
}
},
{
$group : {
_id: { FromDate: '$ValidFromDay', ToDate: '$ValidUntilDay' },
Count: { "$sum": 1 },
}
},
{
$group : {
_id: null,
FromDates: { "$addToSet": { "Date": "$_id.FromDate", "FromCount": { "$sum": "$Count" } } },
ToDate: { "$addToSet": { "Date": "$_id.ToDate", "UntilCount": "$Count" } }
}
}
Is it possible to produce the results I am looking for in some way?

You need to add an array of 2 fields, not just 2 fields. It will let you to unwind it and count by date:
{
$addFields: {
boundary: [
{ day: {$dateToString: { format: "%Y-%m-%d", date: "$ValidFrom" } }, from: 1 },
{ day: { $dateToString: { format: "%Y-%m-%d", date: "$ValidTo" } } , to: 1 }
]
}
},
{
$unwind: "$boundary"
},
{
$group: {
_id: "$boundary.day",
ValidFromCount: {$sum: "$boundary.from"},
ValidToCount: {$sum: "$boundary.to"},
}
}

I think this will do what you want. There are three stages to the pipeline. A$project that constructs a separate day, month and year fields.
> projector
{
"$project" : {
"day" : {
"$dayOfMonth" : "$ValidFrom"
},
"month" : {
"$month" : "$ValidFrom"
},
"year" : {
"$year" : "$ValidFrom"
},
"ValidFrom" : 1
}
}
Then a $group to create the totals and count them by individual day by using an _id of {year, month, day}.
> grouper
{
"$group" : {
"_id" : {
"year" : "$year",
"month" : "$month",
"day" : "$day"
},
"ValidFromCount" : {
"$sum" : 1
},
"ValidToCount" : {
"$sum" : 1
}
}
}
Finally, a projection to eliminate the spurious fields and also get the Day field into the format you want.
> converter
{
"$project" : {
"_id" : 0,
"Day" : {
"$concat" : [
{
"$toString" : "$_id.year"
},
"-",
{
"$toString" : "$_id.month"
},
"-",
{
"$toString" : "$_id.day"
}
]
},
"ValidFromCount" : 1,
"ValidToCount" : 1
}
}
to run just execute (I created your data in collection so2):
> db.so2.find()
{ "_id" : ObjectId("5ca75adfd1a64a2919883a8d"), "ValidFrom" : "2019-03-25T16:01:55.714+0000", "ValidTo" : "2019-03-25T16:01:55.714+0000" }
{ "_id" : ObjectId("5ca75adfd1a64a2919883a8e"), "ValidFrom" : "2019-03-26T16:01:55.714+0000", "ValidTo" : "2019-03-25T16:01:55.714+0000" }
{ "_id" : ObjectId("5ca75adfd1a64a2919883a8f"), "ValidFrom" : "2019-03-25T16:01:55.714+0000", "ValidTo" : "2019-03-27T16:01:55.714+0000" }
>
> db.so3.aggregate([projector,grouper,converter])
{ "ValidFromCount" : 1, "ValidToCount" : 1, "Day" : "2019-3-26" }
{ "ValidFromCount" : 2, "ValidToCount" : 2, "Day" : "2019-3-25" }
>
I'm not sure if the test data you supplied is correct because the second document appears to go back in time so the ValidTo is before the ValidFrom.

Related

Grouping and summing after using $addToSet in MongoDB

Assume I have the following data:
[{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 2.43,
"transactionDate" : "2019-01-01T17:02:36+0000",
"transactionId" : 1,
"transactionItem" : {
"instrument" : {
"symbol" : "SPHD"
}
}
},
{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 5.00,
"transactionDate" : "2019-01-01T17:02:36+0000",
"transactionId" : 2,
"transactionItem" : {
"instrument" : {
"symbol" : "ATT"
}
}
},
{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 2.43,
"transactionDate" : "2019-02-01T17:02:36+0000",
"transactionId" : 3,
"transactionItem" : {
"instrument" : {
"symbol" : "SPHD"
}
}
},
{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 5.00,
"transactionDate" : "2019-02-01T17:02:36+0000",
"transactionId" : 4,
"transactionItem" : {
"instrument" : {
"symbol" : "ATT"
}
}
}]
I want to group the data by year and get a total sum for that year. I also want an array of the items used during the group, grouped by a field and summed, if that makes sense. This is ultimately what I want to end up with:
{
"year": [
{
"year": "2019",
"totalYear": 14.86,
"dividends": [
{
"symbol": "T",
"amount": 10.00
},
{
"symbol": "SPHD",
"amount": 4.86
}
]
}
]
}
Below is the code I have written so far using Mongoose. The problem is that I can't figure out how to group and sum the items that I added to the set. I could always do that in the application layer but I was hoping to accomplish this entirely inside of a query.:
const [transactions] = await Transaction.aggregate([
{ $match: { type: TransactionType.DIVIDEND_OR_INTEREST, netAmount: { $gte: 0 } } },
{
$facet: {
year: [
{
$group: {
_id: { $dateToString: { format: '%Y', date: '$transactionDate' } },
totalYear: { $sum: '$netAmount' },
dividends: {
$addToSet: {
symbol: '$transactionItem.instrument.symbol',
amount: '$netAmount',
},
},
},
},
{ $sort: { _id: 1 } },
{
$project: {
year: '$_id',
totalYear: { $round: ['$totalYear', 2] },
dividends: '$dividends',
_id: false,
},
},
],
},
},
]).exec();
It requires to do two group stages,
First group by year and symbol
Second group by only year
If the transactionDate field has date type value then just use $year operator to get the year
I would suggest you do $sort after the immediate $match stage to use an index if you have created or planning for future
const [transactions] = await Transaction.aggregate([
{
$match: {
type: TransactionType.DIVIDEND_OR_INTEREST,
netAmount: { $gte: 0 }
}
},
{ $sort: { transactionDate: 1 } },
{
$facet: {
year: [
{
$group: {
_id: {
year: { $year: "$transactionDate" },
symbol: "$transactionItem.instrument.symbol"
},
netAmount: { $sum: "$netAmount" }
}
},
{
$group: {
_id: "$_id.year",
totalYear: { $sum: "$netAmount" },
dividends: {
$push: {
symbol: "$_id.symbol",
amount: "$netAmount"
}
}
}
},
{
$project: {
_id: 0,
year: "$_id",
totalYear: 1,
dividends: 1
}
}
]
}
}
]).exec();
Playground

How to regroup (or merge) objects in aggregation pipeline in MongoDB?

I currently have an aggregation pipeline:
db.getCollection('forms').aggregate([
{ $unwind: //unwind },
{
$match: {
//some matches
}
},
{
$project: {
//some projections
}
},
{
//Finally, im grouping the results
$group: {
_id: {
year: { $year: '$createdAt' },
month: { $month: '$createdAt' },
raceEthnicity: '$demographic.raceEthnicity'
},
count: { $sum: 1 },
}
]
My current results are similar to:
[{
"_id" : {
"year" : 2020,
"month" : 11,
"raceEthnicity" : "Asian"
},
"count" : 1.0
},
{
"_id" : {
"year" : 2020,
"month" : 11,
"raceEthnicity" : "Multiracial"
},
"count" : 3.0
},
{
"_id" : {
"year" : 2020,
"month" : 11,
"raceEthnicity" : "White"
},
"count" : 3.0
},
{
"_id" : {
"year" : 2020,
"month" : 10,
"raceEthnicity" : "White"
},
"count" : 33.0
}]
Is there a way to add a new stage on the pipeline to "merge" results of the same year/month into a single object?
I want to achieve something like:
{
"_id" : {
"year" : 2020,
"month" : 11,
},
"Asian" : 1.0,
"Multiracial": 3.0,
"White": 1.0
},
{
"_id" : {
"year" : 2020,
"month" : 10,
},
"White": 33
}
Is it possible? How can I do that?
Add this one to your aggregation pipeline.
db.collection.aggregate([
{ $set: { "data": { k: "$_id.raceEthnicity", v: "$count" } } },
{ $group: { _id: { year: "$_id.year", month: "$_id.month" }, data: { $push: "$data" } } },
{ $set: { "data": { $arrayToObject: "$data" } } },
{ $replaceRoot: { newRoot: { $mergeObjects: ["$$ROOT", "$data"] } } },
{ $unset: "data" }
])
Unlike the solution from #wak786 you don't need to know all ethnicity at design time. It works for arbitrary ethnicity.
Add these stages to your pipeline.
db.collection.aggregate([
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$$ROOT",
{
$arrayToObject: [
[
{
k: "$_id.raceEthnicity",
v: "$count"
}
]
]
}
]
}
}
},
{
"$group": {
"_id": {
year: "$_id.year",
month: "$_id.month",
},
"Asian": {
"$sum": "$Asian"
},
"Multiracial": {
"$sum": "$Multiracial"
},
"White": {
"$sum": "$White"
}
}
}
])
Below is the mongo playground link. I have taken the current result of your pipeline as input to my query.
Try it here

Get data for month wise in mongodb

I want to get data to each month. in my table data is stored like this:-
"patient" : [
{
"status" : 'arrived',
start_time: '2017-08-17T09:17:00.000Z
},
{
"status" : 'arraived',
start_time: '2017-08-16T07:17:00.000Z
},
{
"status" : 'arrived',
start_time: '2017-07-12T09:17:00.000Z
},
{
"status" : 'arraived',
start_time: '2017-07-05T08:10:00.000Z
},
{
"status" : 'arrived',
start_time: '2017-06-02T09:17:00.000Z
},
{
"status" : 'arraived',
start_time: '2017-05-05T08:16:00.000Z
}
]
etc,
and I want to sum of patient of each month (jan to des), like this :-
{
"month" : 8,
"count" : 2
}and like this month 1 to 12
I assume, patient array is associated with a customer and the date is stored in mongo ISO format.
So, the actual document would look like :
{
name: "stackOverflow",
"patient" : [
{
"status" : 'arrived',
"start_time": ISODate("2017-08-17T09:17:00.000Z")
},
{
"status" : 'arraived',
"start_time": ISODate("2017-08-16T07:17:00.000Z")
},
{
"status" : 'arrived',
"start_time": ISODate("2017-07-12T09:17:00.000Z")
},
{
"status" : 'arraived',
"start_time": ISODate("2017-07-05T08:10:00.000Z")
},
{
"status" : 'arrived',
"start_time": ISODate("2017-06-02T09:17:00.000Z")
},
{
"status" : 'arraived',
"start_time": ISODate("2017-05-05T08:16:00.000Z")
}
]
}
here is a sample query which you can try -
db.test.aggregate([
{$unwind: "$patient"},
{ $group: {
_id: {name: "$name", month: {$month: "$patient.start_time"}},
count: { $sum: 1}
}},
{$group: {
_id: "$_id.name",
patient: {$push: {month: "$_id.month", count: "$count"}}
}}
])
Sample output:
{
"_id" : "stackOverflow",
"patient" : [
{
"month" : 5,
"count" : 1
},
{
"month" : 6,
"count" : 1
},
{
"month" : 7,
"count" : 2
},
{
"month" : 8,
"count" : 2
}
]
}
You can change query according to your use-case. hope this will help you!
This is my code:-
db.appointments.aggregate( [
{
$project:
{
"patient_id": 1,
"start_time": 1,
"status": 1
}
},
{
$match: {
'start_time' : { $gte: startdate.toISOString() },
'status': { $eq: 'arrived' }
} ,
},
{ $group: {
_id: {id: "$_id", start_time: {$month: "$appointments.start_time"}},
count: { $sum: 1}
}}
])
When I used this :-
{ $group: {
_id: {id: "$_id", start_time: {$month: "$start_time"}},
count: { $sum: 1}
}
}
its showing error message:-
{"name":"MongoError","message":"can't convert from BSON type missing to Date","ok":0,"errmsg":"can't convert from BSON type missing to Date","code":16006,"codeName":"Location16006"}
And when I comment this its showing this :-
Out Put here:-
:[{"count":{"_id":"595b6f95ab43ec1f6c92b898","patient_id":"595649904dbe9525c0e036ef","start_time":"2017-07-04T10:35:00.000Z","status":"arrived"}},
{"count":{"_id":"595dff870960d425d4f14633","patient_id":"5956478b4dbe9525c0e036ea","start_time":"2017-03-08T09:14:00.000Z","status":"arrived"}},{"count":{"_id":"595dffaa0960d425d4f14634","patient_id":"595649904dbe9525c0e036ef","start_time":"2017-03-17T09:15:00.000Z","status":"arrived"}},{"count":{"_id":"595dffcf0960d425d4f14635","patient_id":"595648394dbe9525c0e036ec","start_time":"2017-06-08T09:15:00.000Z","status":"arrived"}},{"count":{"_id":"595dfffb0960d425d4f14636","patient_id":"5956478b4dbe9525c0e036ea","start_time":"2017-06-20T09:16:00.000Z","status":"arrived"}},{"count":{"_id":"595e00160960d425d4f14637","patient_id":"5959ea7f80388b19e0b57817","start_time":"2017-08-17T09:17:00.000Z","status":"arrived"}}]}
const group = {
$group: {
_id: { month: { $month: "$createdAt" } },
count: { $sum: 1 },
},
};
const groups = {
$group: {
_id: null,
patient: { $push: { month: '$_id.month', count: '$count' } },
},
};
return db.Patient.aggregate([group, groups]);

Group and count by month

I have a booking table and I want to get number of bookings in a month i.e. group by month.
And I am confused that how to get month from a date.
Here is my schema:
{
"_id" : ObjectId("5485dd6af4708669af35ffe6"),
"bookingid" : 1,
"operatorid" : 1,
...,
"bookingdatetime" : "2012-10-11T07:00:00Z"
}
{
"_id" : ObjectId("5485dd6af4708669af35ffe7"),
"bookingid" : 2,
"operatorid" : 1,
...,
"bookingdatetime" : "2014-07-26T05:00:00Z"
}
{
"_id" : ObjectId("5485dd6af4708669af35ffe8"),
"bookingid" : 3,
"operatorid" : 2,
...,
"bookingdatetime" : "2014-03-17T11:00:00Z"
}
And this is I have tried:
db.booking.aggregate([
{ $group: {
_id: new Date("$bookingdatetime").getMonth(),
numberofbookings: { $sum: 1 }
}}
])
but it returns:
{ "_id" : NaN, "numberofbookings" : 3 }
Where am I going wrong?
You need to use the $month keyword in your group. Your new Date().getMonth() call will only happen once, and will try and create a month out of the string "$bookingdatetime".
db.booking.aggregate([
{$group: {
_id: {$month: "$bookingdatetime"},
numberofbookings: {$sum: 1}
}}
]);
You can't include arbitrary JavaScript in your aggregation pipeline, so because you're storing bookingdatetime as a string instead of a Date you can't use the $month operator.
However, because your date strings follow a strict format, you can use the $substr operator to extract the month value from the string:
db.test.aggregate([
{$group: {
_id: {$substr: ['$bookingdatetime', 5, 2]},
numberofbookings: {$sum: 1}
}}
])
Outputs:
{
"result" : [
{
"_id" : "03",
"numberofbookings" : 1
},
{
"_id" : "07",
"numberofbookings" : 1
},
{
"_id" : "10",
"numberofbookings" : 1
}
],
"ok" : 1
}
Starting in Mongo 4, you can use the $toDate operator to convert your string to date (building on the answer given by Will Shaver):
// { date: "2012-10-11T07:00:00Z" }
// { date: "2012-10-23T18:30:00Z" }
// { date: "2012-11-02T21:30:00Z" }
db.bookings.aggregate([
{ $group: {
_id: { month: { $month: { $toDate: "$date" } } },
bookings: { $sum: 1 }
}}
])
// { "_id" : { "month" : 10 }, "bookings" : 2 }
// { "_id" : { "month" : 11 }, "bookings" : 1 }
If you intend to get groups by months even if your data spreads over multiple years, you can use a combination of $dateFromString and $dateToString (in order to format dates as "%Y-%m" (e.g. 2012-10)):
// { date: "2012-10-11T07:00:00Z" }
// { date: "2012-10-23T18:30:00Z" }
// { date: "2012-11-02T21:30:00Z" }
// { date: "2013-01-11T18:30:00Z" }
// { date: "2013-10-07T14:15:00Z" }
db.bookings.aggregate([
{ $group: {
_id: {
$dateToString: {
date: { $dateFromString: { dateString: "$date" } },
format: "%Y-%m"
}
},
bookings: { $count: {} } // or { $sum: 1 } prior to Mongo 5
}}
])
// { _id: "2012-10", bookings: 2 }
// { _id: "2012-11", bookings: 1 }
// { _id: "2013-01", bookings: 1 }
// { _id: "2013-10", bookings: 1 }
This:
first transforms the string date into a string: $dateFromString: { dateString: "$date" }
in order to format the date as %Y-%m: $dateToString: { date: { }, format: "%Y-%m" }
the combination of which ($dateFromString/$dateToString) is used as our group key
and finally we count our grouped bookings with $count (or { $sum: 1 } prior to Mongo 5)

MongoDB aggregate using distinct

I have an aggregation that groups on a date and creates a sum.
db.InboundWorkItems.aggregate({
$match: {
notificationDate: {
$gte: ISODate("2013-07-18T04:00:00Z")
},
dropType: 'drop'
}
}, {
$group: {
_id: {
notificationDate: "$notificationDate"
},
nd: {
$first: "$notificationDate"
},
count: {
$sum: 1
}
}
}, {
$sort: {
nd: 1
}
})
The output is
"result" : [
{
"_id" : {
"notificationDate" : ISODate("2013-07-18T04:00:00Z")
},
"nd" : ISODate("2013-07-18T04:00:00Z"),
"count" : 484
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-19T04:00:00Z")
},
"nd" : ISODate("2013-07-19T04:00:00Z"),
"count" : 490
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-20T04:00:00Z")
},
"nd" : ISODate("2013-07-20T04:00:00Z"),
"count" : 174
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-21T04:00:00Z")
},
"nd" : ISODate("2013-07-21T04:00:00Z"),
"count" : 6
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-22T04:00:00Z")
},
"nd" : ISODate("2013-07-22T04:00:00Z"),
"count" : 339
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-23T04:00:00Z")
},
"nd" : ISODate("2013-07-23T04:00:00Z"),
"count" : 394
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-24T04:00:00Z")
},
"nd" : ISODate("2013-07-24T04:00:00Z"),
"count" : 17
}
],
"ok" : 1
so far so good. What I need to do now is to keep this, but also add a distinct in the criteria (for argument's sake I want to use AccountId). The would yield me the count of the grouped dates only using distinct AccountId. Is distinct even possible within the aggregation framework?
you can use two group commands in the pipeline, the first to group by accoundId, followed by second group that does usual operation. something like this:
db.InboundWorkItems.aggregate(
{$match: {notificationDate: {$gte: ISODate("2013-07-18T04:00:00Z")}, dropType:'drop' }},
{$group: {_id:"accountId",notificationDate:"$notificationDate"}},
{$group: {_id:1, nd: {$first:"$notificationDate"}, count:{$sum:1} }},
{$sort:{nd:1}} )
db.InboundWorkItems.aggregate({
$match: {
notificationDate: {
$gte: ISODate("2013-07-18T04:00:00Z")
},
dropType: 'drop'
}
}, {
$group: {
_id: "$AccountId",
notificationDate: {
$max: "$notificationDate"
},
dropType: {
$max: "$dropType"
}
}
}, {
$group: {
_id: {
notificationDate: "$notificationDate"
},
nd: {
$first: "$notificationDate"
},
count: {
$sum: 1
}
}
}, {
$sort: {
nd: 1
}
})
I think you might actually be looking for a single group (English is a bit confusing) like so:
db.InboundWorkItems.aggregate({
$match: {
notificationDate: {
$gte: ISODate("2013-07-18T04:00:00Z")
},
dropType: 'drop'
}
}, {
$group: {
_id: {
notificationDate: "$notificationDate", accountId: '$accountId'
},
nd: {
$first: "$notificationDate"
},
count: {
$sum: 1
}
}
}, {
$sort: {
nd: 1
}
})
I add the compound _id in the $group because of:
The would yield me the count of the grouped dates only using distinct AccountId.
Which makes me think you want the grouped date count by account ID.