Group and count distinct occurrences - mongodb

I am trying to derive a query to get a count of distinct values and display the relevant fields. The grouping is done by the tempId and the date where the tempId can occur one-to-many times within a single day and within a time frame.
following is my approach,
db.getCollection('targetCollection').aggregate(
{
$match:{
"user.vendor": 'vendor1',
tool: "tool1",
date: {
"$gte": ISODate("2016-04-01"),
"$lt": ISODate("2016-04-04")
}
}
},
{
$group:{
_id: {
tempId: '$tempId',
month: { $month: "$date" },
day: { $dayOfMonth: "$date" },
year: { $year: "$date" }
},
count: {$sum : 1}
}
},
{
$group:{
_id: 1,
count: {$sum : 1}
}
})
This query generates the following output,
{
"_id" : 1,
"count" : 107
}
Which is correct but, I would like to show them separated by the date and with the particular count for that date. For example something like this,
{
"date" : 2016-04-01
"count" : 50
},
{
"date" : 2016-04-02
"count" : 30
},
{
"date" : 2016-04-03
"count" : 27
}
P.S. I am not sure how to put this question together as I am quite new to this technology. Please let me know if refinements are required in the question.
Following is the sample data of the mongodb collection that I am trying to query,
{
"_id" : 1,
"tempId" : "temp1",
"user" : {
"_id" : "user1",
"email" : "user1#email.com",
"vendor" : "vendor1"
},
"tool" : "tool1",
"date" : ISODate("2016-03-09T08:30:42.403Z")
},...

I have come up with the solution myself. What i did was,
I first grouped by the tempId and the date
Then I grouped by the date
This printed out the daily distinct count of tempId, the result I want. The query is as follows,
db.getCollection('targetCollection').aggregate(
{
$match:{
"user.vendor": 'vendor1',
tool: "tool1",
date: {
"$gte": ISODate("2016-04-01"),
"$lt": ISODate("2016-04-13")
}
}
},
{
$group:{
_id: {
tempId: "$tempId",
month: { $month: "$date" },
day: { $dayOfMonth: "$date" },
year: { $year: "$date" }
},
count: {$sum : 1}
}
},
{
$group:{
_id: {
month:"$_id.month" ,
day: "$_id.day" ,
year: "$_id.year"
},
count: {$sum : 1}
}
})

group them via date
db.getCollection('targetCollection').aggregate([
{
$match:{
"user.vendor": 'vendor1',
tool: "tool1",
date: {
"$gte": ISODate("2016-04-01"),
"$lt": ISODate("2016-04-04")
}
}
},
{
$group: {
_id: {
date: "$date",
tempId: "$tempId"
},
count: { $sum: 1 }
}
}
]);

Related

Using Mongo to calculate sum in Aggregator pipeline

I have a timeseries data in mongodb and I want to calculate the sum per day between two given dates of every sensor after I have calculated the difference between the max and min reading of the day by the sensor, using the below query
db.ts_events.aggregate([
{ $match: {
"metadata.assetCode": { $in: [
"h"
]
},
"timestamp": { $gte: ISODate("2022-07-01T02:39:02.000+0000"), $lte: ISODate("2022-07-01T06:30:00.000+0000")
}
}
},
{
$project: {
date: {
$dateToParts: { date: "$timestamp"
}
},
activeEnergy: 1,
"metadata.meterId": 1,
}
},
{
$group: {
_id: {
date: {
year: "$date.year",
month: "$date.month",
day: "$date.day"
},
meter: "$metadata.meterId",
},
maxValue: { $max: "$activeEnergy"
},
minValue: { $min: "$activeEnergy"
},
}
},
{
$addFields: {
differnce: { $subtract: [
"$maxValue",
"$minValue"
]
},
}
},
])
I get the following output
{
"_id" : {
"date" : {
"year" : NumberInt(2022),
"month" : NumberInt(7),
"day" : NumberInt(1)
},
"meter" : "B"
},
"maxValue" : 1979.78,
"minValue" : 1979.77,
"differnce" : 0.009999999999990905
}
{
"_id" : {
"date" : {
"year" : NumberInt(2022),
"month" : NumberInt(7),
"day" : NumberInt(1)
},
"meter" : "A"
},
"maxValue" : 7108.01,
"minValue" : 7098.18,
"differnce" : 9.829999999999927
}
I want to calculate the sum of both meter difference how can I do that?
Apart from this one more problem I am facing which I am putting forward in this edited version, as you can see date is in ISODate format but I will be getting a unix epoch format,
I tried to tweak the query but it is not working
db.ts_events.aggregate([
{
$project: {
date: {
$dateToParts: {
date: "$timestamp"
}
},
activeEnergy: 1,
"metadata.meterId": 1,
"metadata.assetCode": 1,
"timestamp": 1,
startDate: {
$toDate: 1656686342000
},
endDate: {
$toDate: 1656700200000
}
}
},
{
$match: {
"metadata.assetCode": {
$in: [
"h"
]
},
"timestamp": {
$gte: "$startDate", $lte: "$endDate"
}
}
},
{
$group: {
_id: {
date: {
year: "$date.year",
month: "$date.month",
day: "$date.day"
},
meter: "$metadata.meterId",
},
maxValue: {
$max: "$activeEnergy"
},
minValue: {
$min: "$activeEnergy"
},
}
},
{
$addFields: {
differnce: {
$subtract: [
"$maxValue",
"$minValue"
]
},
}
},
{
$group: {
_id: "$_id.date", res: {
$push: '$$ROOT'
}, differnceSum: {
$sum: '$differnce'
}
}
}
])
Can you help me solve the problem?
One option is to add one more step like this (depending on your expected output format):
This step will group together your separate documents, into one document, which will allow you to sum their values together. Be careful when grouping, since now it is a one big document and a document has a size limit.
We use $$ROOT to keep the original document structure (here inside a new array)
{$group: {_id: 0, res: {$push: '$$ROOT'}, differnceSum: {$sum: $differnce'}}}

Get distinct ISO dates by days, months, year

I want to get a distinct set of years and months for all document objects in my MongoDB.
For example, if documents have dates:
2015/08/11
2015/08/11
2015/08/12
2015/09/14
2014/10/30
2014/10/30
2014/08/11
Return unique months and years for all documents, ex:
2015/08
2015/09
2014/10
2014/08
Schema snippet:
var myObjSchema = mongoose.Schema({
date: Date,
request: {
...
I tried using distinct against schema field date:
db.mycollection.distinct('date', {}, {})
But this gave duplicate dates. Output snippet:
ISODate("2015-08-11T20:03:42.122Z"),
ISODate("2015-08-11T20:53:31.135Z"),
ISODate("2015-08-11T21:31:32.972Z"),
ISODate("2015-08-11T22:16:27.497Z"),
ISODate("2015-08-11T22:41:58.587Z"),
ISODate("2015-08-11T23:28:17.526Z"),
ISODate("2015-08-11T23:38:45.778Z"),
ISODate("2015-08-12T06:21:53.898Z"),
ISODate("2015-08-12T13:25:33.627Z"),
ISODate("2015-08-12T14:46:59.763Z")
So the question is:
a: How can I accomplish the above?
b: Is it possible to specify which part of the date you want distinct? Like distinct('date.month'...)?
EDIT: I've found you can get these dates and such with the following query, however the results are not distinct:
db.mycollection.aggregate(
[
{
$project : {
month : {
$month: "$date"
},
year : {
$year: "$date"
},
day: {
$dayOfMonth: "$date"
}
}
}
]
);
Output: duplicates
{ "_id" : "", "month" : 7, "year" : 2015, "day" : 14 }
{ "_id" : "", "month" : 7, "year" : 2015, "day" : 15 }
{ "_id" : "", "month" : 7, "year" : 2015, "day" : 15 }
You need to group your document after the projection and use $addToSet accumulator operator
db.mycollection.aggregate([
{ "$project": {
"year": { "$year": "$date" },
"month": { "$month": "$date" }
}},
{ "$group": {
"_id": null,
"distinctDate": { "$addToSet": { "year": "$year", "month": "$month" }}
}}
])
Indeed, you can distinct values via a $group/_id: null/$addToSet stage.
I'm also including here the use of dateToString that formats your dates into "%Y-%m" (e.g. 2021-12).
// { date: ISODate("2021-12-05") }
// { date: ISODate("2021-12-08") }
// { date: ISODate("2022-04-05") }
// { date: ISODate("2022-12-14") }
db.collection.aggregate([
{ $group: {
_id: null,
months: { $addToSet: { $dateToString: { date: "$date", format: "%Y-%m" } } }
}}
])
// { _id: null, months: ["2021-12", "2022-04", "2022-12"] }
db.mycollection.aggregate(
[
{
"$project": {
"year": { "$year": "$date" },
"month": { "$month": "$date" }
}
},{ $group : {
"_id" :{"year" : "$year" }
}
},
{
$sort: {'_id': -1
}
}
])

Group and count by month

I have a booking table and I want to get number of bookings in a month i.e. group by month.
And I am confused that how to get month from a date.
Here is my schema:
{
"_id" : ObjectId("5485dd6af4708669af35ffe6"),
"bookingid" : 1,
"operatorid" : 1,
...,
"bookingdatetime" : "2012-10-11T07:00:00Z"
}
{
"_id" : ObjectId("5485dd6af4708669af35ffe7"),
"bookingid" : 2,
"operatorid" : 1,
...,
"bookingdatetime" : "2014-07-26T05:00:00Z"
}
{
"_id" : ObjectId("5485dd6af4708669af35ffe8"),
"bookingid" : 3,
"operatorid" : 2,
...,
"bookingdatetime" : "2014-03-17T11:00:00Z"
}
And this is I have tried:
db.booking.aggregate([
{ $group: {
_id: new Date("$bookingdatetime").getMonth(),
numberofbookings: { $sum: 1 }
}}
])
but it returns:
{ "_id" : NaN, "numberofbookings" : 3 }
Where am I going wrong?
You need to use the $month keyword in your group. Your new Date().getMonth() call will only happen once, and will try and create a month out of the string "$bookingdatetime".
db.booking.aggregate([
{$group: {
_id: {$month: "$bookingdatetime"},
numberofbookings: {$sum: 1}
}}
]);
You can't include arbitrary JavaScript in your aggregation pipeline, so because you're storing bookingdatetime as a string instead of a Date you can't use the $month operator.
However, because your date strings follow a strict format, you can use the $substr operator to extract the month value from the string:
db.test.aggregate([
{$group: {
_id: {$substr: ['$bookingdatetime', 5, 2]},
numberofbookings: {$sum: 1}
}}
])
Outputs:
{
"result" : [
{
"_id" : "03",
"numberofbookings" : 1
},
{
"_id" : "07",
"numberofbookings" : 1
},
{
"_id" : "10",
"numberofbookings" : 1
}
],
"ok" : 1
}
Starting in Mongo 4, you can use the $toDate operator to convert your string to date (building on the answer given by Will Shaver):
// { date: "2012-10-11T07:00:00Z" }
// { date: "2012-10-23T18:30:00Z" }
// { date: "2012-11-02T21:30:00Z" }
db.bookings.aggregate([
{ $group: {
_id: { month: { $month: { $toDate: "$date" } } },
bookings: { $sum: 1 }
}}
])
// { "_id" : { "month" : 10 }, "bookings" : 2 }
// { "_id" : { "month" : 11 }, "bookings" : 1 }
If you intend to get groups by months even if your data spreads over multiple years, you can use a combination of $dateFromString and $dateToString (in order to format dates as "%Y-%m" (e.g. 2012-10)):
// { date: "2012-10-11T07:00:00Z" }
// { date: "2012-10-23T18:30:00Z" }
// { date: "2012-11-02T21:30:00Z" }
// { date: "2013-01-11T18:30:00Z" }
// { date: "2013-10-07T14:15:00Z" }
db.bookings.aggregate([
{ $group: {
_id: {
$dateToString: {
date: { $dateFromString: { dateString: "$date" } },
format: "%Y-%m"
}
},
bookings: { $count: {} } // or { $sum: 1 } prior to Mongo 5
}}
])
// { _id: "2012-10", bookings: 2 }
// { _id: "2012-11", bookings: 1 }
// { _id: "2013-01", bookings: 1 }
// { _id: "2013-10", bookings: 1 }
This:
first transforms the string date into a string: $dateFromString: { dateString: "$date" }
in order to format the date as %Y-%m: $dateToString: { date: { }, format: "%Y-%m" }
the combination of which ($dateFromString/$dateToString) is used as our group key
and finally we count our grouped bookings with $count (or { $sum: 1 } prior to Mongo 5)

MongoDB aggregate return count of 0 if no results

I have the following MongoDB query that groups by date and result and gives a count. I'd like to have the query also return a count of 0 for a particular date and result if data doesn't exist for that day.
For example I have the following result statuses: SUCCESS and FAILED. If on the 21st there were no results of FAILED I would want a count returned of 0:
{
"_id" : {
"month" : 1,
"day" : 21,
"year" : 2014,
"buildResult" : "FAILURE"
},
"count" : 0
}
I've done something similar with a relational database and a calendar table, but I'm not sure how to approach this with MongoDB. Is this possible or should I do something programatically after running the query?
Here is an example of a document (simplified) in the database:
{
"_id" : ObjectId("52deab2fe4b0a491abb54108"),
"type" : "build",
"time" : ISODate("2014-01-21T17:15:27.471Z"),
"data" : {
"buildNumber" : 43,
"buildDuration" : 997308,
"buildResult" : "SUCCESS"
}
}
Here is my current query:
db.builds.aggregate([
{ $match: { "data.buildResult" : { $ne : null} }},
{ $group: {
_id: {
month: { $month: "$time" },
day: { $dayOfMonth: "$time" },
year: { $year: "$time" },
buildResult: "$data.buildResult",
},
count: { $sum: 1}
} },
{ $sort: { "_id.year": 1, "_id.month": 1, "_id.day": 1} }
])
If I correctly understand what do you want, you could try this:
db.builds.aggregate([
{ $project:
{
time: 1,
projectedData: { $ifNull: ['$data.buildResult', 'none'] }
}
},
{ $group: {
_id: {
month: { $month: "$time" },
day: { $dayOfMonth: "$time" },
year: { $year: "$time" },
buildResult: "$projectedData"
},
count: { $sum: { $cond: [ { $eq: [ "$projectedData", "none" ] }, 0, 1 ] } }
} },
{ $sort: { "_id.year": 1, "_id.month": 1, "_id.day": 1 } }
])
Update:
You want to get from output more documents that been in input, it is possible only with unwind operator that works with arrays, but you haven't any arrays, so as I know it is impossible to get more documents in your case. So you should add some logic after query result to create new data for existing dates with 0 count for another type of buildResult...

MongoDB beginner - Group by date

Just starting with mongodb and used this post to do a similar query but I need the actual date like mm/dd/yyyy instead of just the day of year - help!
How to group by multiple fields in MongoDB when one is a date field
Here is the query I have (almost the same as the post above):
db.col.aggregate(
{ $group: {
_id: {
status: "$status",
dayOfYear: { $dayOfYear: "$datetime" }
},
hits: { $sum: "$hits" }
} }
)
Here is sample data:
{
"_id" : ObjectId("45f5ed29f4e1a522bfe53f13"),
"hits" : 2,
"status" : 400,
"datetime" : ISODate("2014-01-10T10:17:57.216Z")
}
You can add more date columns or remove them to get different groupings:
db.col.aggregate(
{ $group: {
_id: {
status: "$status",
month: { $month: "$datetime" },
day: { $dayOfYear: "$datetime" },
year: { $year: "$datetime" }
},
hits: { $sum: "$hits" }
} }
)
What this means is: for each unique status that appeared on a particular day (ignore time) sum up all hits.