Get distinct ISO dates by days, months, year - mongodb

I want to get a distinct set of years and months for all document objects in my MongoDB.
For example, if documents have dates:
2015/08/11
2015/08/11
2015/08/12
2015/09/14
2014/10/30
2014/10/30
2014/08/11
Return unique months and years for all documents, ex:
2015/08
2015/09
2014/10
2014/08
Schema snippet:
var myObjSchema = mongoose.Schema({
date: Date,
request: {
...
I tried using distinct against schema field date:
db.mycollection.distinct('date', {}, {})
But this gave duplicate dates. Output snippet:
ISODate("2015-08-11T20:03:42.122Z"),
ISODate("2015-08-11T20:53:31.135Z"),
ISODate("2015-08-11T21:31:32.972Z"),
ISODate("2015-08-11T22:16:27.497Z"),
ISODate("2015-08-11T22:41:58.587Z"),
ISODate("2015-08-11T23:28:17.526Z"),
ISODate("2015-08-11T23:38:45.778Z"),
ISODate("2015-08-12T06:21:53.898Z"),
ISODate("2015-08-12T13:25:33.627Z"),
ISODate("2015-08-12T14:46:59.763Z")
So the question is:
a: How can I accomplish the above?
b: Is it possible to specify which part of the date you want distinct? Like distinct('date.month'...)?
EDIT: I've found you can get these dates and such with the following query, however the results are not distinct:
db.mycollection.aggregate(
[
{
$project : {
month : {
$month: "$date"
},
year : {
$year: "$date"
},
day: {
$dayOfMonth: "$date"
}
}
}
]
);
Output: duplicates
{ "_id" : "", "month" : 7, "year" : 2015, "day" : 14 }
{ "_id" : "", "month" : 7, "year" : 2015, "day" : 15 }
{ "_id" : "", "month" : 7, "year" : 2015, "day" : 15 }

You need to group your document after the projection and use $addToSet accumulator operator
db.mycollection.aggregate([
{ "$project": {
"year": { "$year": "$date" },
"month": { "$month": "$date" }
}},
{ "$group": {
"_id": null,
"distinctDate": { "$addToSet": { "year": "$year", "month": "$month" }}
}}
])

Indeed, you can distinct values via a $group/_id: null/$addToSet stage.
I'm also including here the use of dateToString that formats your dates into "%Y-%m" (e.g. 2021-12).
// { date: ISODate("2021-12-05") }
// { date: ISODate("2021-12-08") }
// { date: ISODate("2022-04-05") }
// { date: ISODate("2022-12-14") }
db.collection.aggregate([
{ $group: {
_id: null,
months: { $addToSet: { $dateToString: { date: "$date", format: "%Y-%m" } } }
}}
])
// { _id: null, months: ["2021-12", "2022-04", "2022-12"] }

db.mycollection.aggregate(
[
{
"$project": {
"year": { "$year": "$date" },
"month": { "$month": "$date" }
}
},{ $group : {
"_id" :{"year" : "$year" }
}
},
{
$sort: {'_id': -1
}
}
])

Related

MongoDB - convert string to timestamp, group by hour

I have the following documents stored in a collection:
{
"REQUESTTIMESTAMP" : "26-JUN-19 01.34.10.095000000 AM",
"UNHANDLED_INTENT" : 0,
"USERID" : "John",
"START_OF_INTENT_SKILL_CONVERSATION" : 0,
"PROPERTYCODE" : ""
}
I want to group this by the hour(which we will get from 'REQUESTTIMESTAMP')
Earlier, I had this document stored in the collection in a different way, where I had a separate field for hours, and used that hours field to group:
Previous aggregation query :
collection.aggregate([
{'$match': query}, {
'$group': {
"_id": {
"hour": "$hour",
"sessionId": "$sessionId"
}
}
}, {
"$group": {
"_id": "$_id.hour",
"count": {
"$sum": 1
}
}
}
])
Previous collection structure:
{
"timestamp" : "1581533210921",
"date" : "12-02-2020",
"hour" : "13",
"month" : "02",
"time" : "13:46:50",
"weekDay" : "Wednesday",
"__v" : 0
}
How can I do the above same Previous aggregation query with the new document structure (After extracting hours from 'REQUESTTIMESTAMP' field?)
You should convert your timestamp to Date object then take hour from your date object.
db.collection.aggregate([{
'$match': query
}, {
$project: {
date: {
$dateFromString: {
dateString: '$REQUESTTIMESTAMP',
format: "%m-%d-%Y" //This should be your date format
}
}
}
}, {
$group: {
_id: {
hour: {
$hour: "$date"
}
}
}
}])
Problem is months names are not supported by MongoDB. Either you write a lot of code or you use libraries like moments.js. First update your REQUESTTIMESTAMP to proper Date object, then you can group it.
db.collection.find().forEach(function (doc) {
var d = moment(doc.REQUESTTIMESTAMP, "DD-MMM-YY hh.mm.ss.SSS a");
db.collection.updateOne(
{ _id: doc._id },
{ $set: { date: d.toDate() } }
);
})
db.collection.aggregate([
{
$group: {
_id: { $hour: "$date" },
count: { $sum: 1 }
}
}
])
In case if you're not able to update DB with actual date field & still wanted to proceed with existing format, try this query it will add hour field extracted from given string field REQUESTTIMESTAMP :
Query :
db.collection.aggregate([
{
$addFields: {
hour: {
$let: {
/** split string into three parts date + hours + AM/PM */
vars: { hour: { $slice: [{ $split: ["$REQUESTTIMESTAMP", " "] }, 1, 2] } },
in: {
$cond: [{ $in: ["AM", "$$hour"] }, // Check AM exists in array
{ $toInt: { $substr: [{ $arrayElemAt: ["$$hour", 0] }, 0, 2] } }, // If yes then return int of first 2 letters of first element in hour array
{ $add: [{ $toInt: { $substr: [{ $arrayElemAt: ["$$hour", 0] }, 0, 2] } }, 12] } ] // If PM add 12 to int of first 2 letters of first element in hour array
}
}
}
}
}
])
Test : MongoDB-Playground

How to use $dayOfYear aggregation with epoch timestamps [duplicate]

I am trying to aggregate records in a MongoDB collection by hour and need to convert date stored as timestamp (milliseconds) to ISODate so that I can use aggregate framework's built-in date operators ($hour, $month, etc.)
Records are stored as
{
"data" : { "UserId" : "abc", "ProjId" : "xyz"},
"time" : NumberLong("1395140780706"),
"_id" : ObjectId("532828ac338ed9c33aa8eca7")
}
I am trying to use an aggregate query of following type:
db.events.aggregate(
{
$match : {
"time" : { $gte : 1395186209804, $lte : 1395192902825 }
}
},
{
$project : {
_id : "$_id",
dt : {$concat : (Date("$time")).toString()} // need to project as ISODate
}
},
// process records further in $project or $group clause
)
which produces results of the form:
{
"result" : [
{
"_id" : ObjectId("5328da21fd207d9c3567d3ec"),
"dt" : "Fri Mar 21 2014 17:35:46 GMT-0400 (EDT)"
},
{
"_id" : ObjectId("5328da21fd207d9c3567d3ed"),
"dt" : "Fri Mar 21 2014 17:35:46 GMT-0400 (EDT)"
},
...
}
I want to extract hour, day, month, and year from the date but since time is projected forward as string I am unable to use aggregate framework's built-in date operators ($hour, etc.).
How can I convert time from milliseconds to ISO date to do sometime like the following:
db.events.aggregate(
{
$match : {
"time" : { $gte : 1395186209804, $lte : 1395192902825 }
}
},
{
$project : {
_id : "$_id",
dt : <ISO date from "$time">
}
},
{
$project : {
_id : "$_id",
date : {
hour : {$hour : "$dt"}
}
}
}
)
Actually, it is possible, the trick is to add your milliseconds time to a zero-milliseconds Date() object using syntax similar to:
dt : {$add: [new Date(0), "$time"]}
I modified your aggregation from above to produce the result:
db.events.aggregate(
{
$project : {
_id : "$_id",
dt : {$add: [new Date(0), "$time"]}
}
},
{
$project : {
_id : "$_id",
date : {
hour : {$hour : "$dt"}
}
}
}
);
The result is (with one entry of your sample data):
{
"result": [
{
"_id": ObjectId("532828ac338ed9c33aa8eca7"),
"date": {
"hour": 11
}
}
],
"ok": 1
}
I assume there's no way to do it. Because aggregation framework is written in native code. not making use of the V8 engine. Thus everything of JavaScript is not gonna work within the framework (And that's also why aggregation framework runs much faster).
Map/Reduce is a way to work this out, but aggregation framework definitely got much better performance.
About Map/Reduce performance, read this thread.
Another way to work it out would be get a "raw" result from aggregation framework, put it into an JSON array. Then do the conversion by running JavaScript. Sort of like:
var results = db.events.aggregate(...);
reasult.forEach(function(data) {
data.date = new Date(data.dateInMillionSeconds);
// date is now stored in the "date" property
}
To return a valid BSON date all you need is a little date "maths" using the $add operator. You need to add new Date(0) to the timestamp. The new Date(0) represents the number of milliseconds since the Unix epoch (Jan 1, 1970) and is a shorthand for new Date("1970-01-01").
db.events.aggregate([
{ "$match": { "time": { "$gte" : 1395136209804, "$lte" : 1395192902825 } } },
{ "$project": {
"hour": { "$hour": { "$add": [ new Date(0), "$time" ] } },
"day": { "$dayOfMonth": { "$add": [ new Date(0), "$time" ] } },
"month": { "$month": { "$add": [ new Date(0), "$time" ] } },
"year": { "$year": { "$add": [ new Date(0), "$time" ] } }
}}
])
Which yields:
{
"_id" : ObjectId("532828ac338ed9c33aa8eca7"),
"hour" : 11,
"day" : 18,
"month" : 3,
"year" : 2014
}
Starting Mongo 4.0, there is a new $toDate aggregation operator which can convert from various types to a date (in this case from a long):
// { time: NumberLong("1395140780706") }
db.collection.aggregate({ $set: { time: { $toDate: "$time" } } })
// { time: ISODate("2014-03-18T11:06:20.706Z") }
And to get the hour out of it:
// { time: NumberLong("1395140780706") }
db.collection.aggregate({ $project: { hour: { $hour: { $toDate: "$time" } } } })
// { hour: 11 }
use this if {$add: [new Date(0), "$time"]} function returning string type not an ISO date type
I use all of that option but still fail, because my new date from $project return a string type like '2000-11-2:xxxxxxx' not date type like ISO('2000-11-2:xxxxxxx') for anyone who have same problem with me use this.
db.events.aggregate(
{
$project : {
_id : "$_id",
dt : {$add: [new Date(0), "$time"]}
}
},
{
$project : {
_id : "$_id",
"year": { $substr: [ "$dt", 0, 4 ] },
"month": { $substr: [ "$dt", 5, 2] },
"day": { $substr: [ "$dt", 8, 2 ] }
}
}
);
the result will be
{ _id: '59f940eaea87453b30f42cf5',
year: '2017',
month: '07',
day: '04'
},
you can get hours or minute if you want depending on which string you want to subset, then you can group that again according to same date,month or year

Group and count distinct occurrences

I am trying to derive a query to get a count of distinct values and display the relevant fields. The grouping is done by the tempId and the date where the tempId can occur one-to-many times within a single day and within a time frame.
following is my approach,
db.getCollection('targetCollection').aggregate(
{
$match:{
"user.vendor": 'vendor1',
tool: "tool1",
date: {
"$gte": ISODate("2016-04-01"),
"$lt": ISODate("2016-04-04")
}
}
},
{
$group:{
_id: {
tempId: '$tempId',
month: { $month: "$date" },
day: { $dayOfMonth: "$date" },
year: { $year: "$date" }
},
count: {$sum : 1}
}
},
{
$group:{
_id: 1,
count: {$sum : 1}
}
})
This query generates the following output,
{
"_id" : 1,
"count" : 107
}
Which is correct but, I would like to show them separated by the date and with the particular count for that date. For example something like this,
{
"date" : 2016-04-01
"count" : 50
},
{
"date" : 2016-04-02
"count" : 30
},
{
"date" : 2016-04-03
"count" : 27
}
P.S. I am not sure how to put this question together as I am quite new to this technology. Please let me know if refinements are required in the question.
Following is the sample data of the mongodb collection that I am trying to query,
{
"_id" : 1,
"tempId" : "temp1",
"user" : {
"_id" : "user1",
"email" : "user1#email.com",
"vendor" : "vendor1"
},
"tool" : "tool1",
"date" : ISODate("2016-03-09T08:30:42.403Z")
},...
I have come up with the solution myself. What i did was,
I first grouped by the tempId and the date
Then I grouped by the date
This printed out the daily distinct count of tempId, the result I want. The query is as follows,
db.getCollection('targetCollection').aggregate(
{
$match:{
"user.vendor": 'vendor1',
tool: "tool1",
date: {
"$gte": ISODate("2016-04-01"),
"$lt": ISODate("2016-04-13")
}
}
},
{
$group:{
_id: {
tempId: "$tempId",
month: { $month: "$date" },
day: { $dayOfMonth: "$date" },
year: { $year: "$date" }
},
count: {$sum : 1}
}
},
{
$group:{
_id: {
month:"$_id.month" ,
day: "$_id.day" ,
year: "$_id.year"
},
count: {$sum : 1}
}
})
group them via date
db.getCollection('targetCollection').aggregate([
{
$match:{
"user.vendor": 'vendor1',
tool: "tool1",
date: {
"$gte": ISODate("2016-04-01"),
"$lt": ISODate("2016-04-04")
}
}
},
{
$group: {
_id: {
date: "$date",
tempId: "$tempId"
},
count: { $sum: 1 }
}
}
]);

Group and count by month

I have a booking table and I want to get number of bookings in a month i.e. group by month.
And I am confused that how to get month from a date.
Here is my schema:
{
"_id" : ObjectId("5485dd6af4708669af35ffe6"),
"bookingid" : 1,
"operatorid" : 1,
...,
"bookingdatetime" : "2012-10-11T07:00:00Z"
}
{
"_id" : ObjectId("5485dd6af4708669af35ffe7"),
"bookingid" : 2,
"operatorid" : 1,
...,
"bookingdatetime" : "2014-07-26T05:00:00Z"
}
{
"_id" : ObjectId("5485dd6af4708669af35ffe8"),
"bookingid" : 3,
"operatorid" : 2,
...,
"bookingdatetime" : "2014-03-17T11:00:00Z"
}
And this is I have tried:
db.booking.aggregate([
{ $group: {
_id: new Date("$bookingdatetime").getMonth(),
numberofbookings: { $sum: 1 }
}}
])
but it returns:
{ "_id" : NaN, "numberofbookings" : 3 }
Where am I going wrong?
You need to use the $month keyword in your group. Your new Date().getMonth() call will only happen once, and will try and create a month out of the string "$bookingdatetime".
db.booking.aggregate([
{$group: {
_id: {$month: "$bookingdatetime"},
numberofbookings: {$sum: 1}
}}
]);
You can't include arbitrary JavaScript in your aggregation pipeline, so because you're storing bookingdatetime as a string instead of a Date you can't use the $month operator.
However, because your date strings follow a strict format, you can use the $substr operator to extract the month value from the string:
db.test.aggregate([
{$group: {
_id: {$substr: ['$bookingdatetime', 5, 2]},
numberofbookings: {$sum: 1}
}}
])
Outputs:
{
"result" : [
{
"_id" : "03",
"numberofbookings" : 1
},
{
"_id" : "07",
"numberofbookings" : 1
},
{
"_id" : "10",
"numberofbookings" : 1
}
],
"ok" : 1
}
Starting in Mongo 4, you can use the $toDate operator to convert your string to date (building on the answer given by Will Shaver):
// { date: "2012-10-11T07:00:00Z" }
// { date: "2012-10-23T18:30:00Z" }
// { date: "2012-11-02T21:30:00Z" }
db.bookings.aggregate([
{ $group: {
_id: { month: { $month: { $toDate: "$date" } } },
bookings: { $sum: 1 }
}}
])
// { "_id" : { "month" : 10 }, "bookings" : 2 }
// { "_id" : { "month" : 11 }, "bookings" : 1 }
If you intend to get groups by months even if your data spreads over multiple years, you can use a combination of $dateFromString and $dateToString (in order to format dates as "%Y-%m" (e.g. 2012-10)):
// { date: "2012-10-11T07:00:00Z" }
// { date: "2012-10-23T18:30:00Z" }
// { date: "2012-11-02T21:30:00Z" }
// { date: "2013-01-11T18:30:00Z" }
// { date: "2013-10-07T14:15:00Z" }
db.bookings.aggregate([
{ $group: {
_id: {
$dateToString: {
date: { $dateFromString: { dateString: "$date" } },
format: "%Y-%m"
}
},
bookings: { $count: {} } // or { $sum: 1 } prior to Mongo 5
}}
])
// { _id: "2012-10", bookings: 2 }
// { _id: "2012-11", bookings: 1 }
// { _id: "2013-01", bookings: 1 }
// { _id: "2013-10", bookings: 1 }
This:
first transforms the string date into a string: $dateFromString: { dateString: "$date" }
in order to format the date as %Y-%m: $dateToString: { date: { }, format: "%Y-%m" }
the combination of which ($dateFromString/$dateToString) is used as our group key
and finally we count our grouped bookings with $count (or { $sum: 1 } prior to Mongo 5)

MongoDB aggregate return count of 0 if no results

I have the following MongoDB query that groups by date and result and gives a count. I'd like to have the query also return a count of 0 for a particular date and result if data doesn't exist for that day.
For example I have the following result statuses: SUCCESS and FAILED. If on the 21st there were no results of FAILED I would want a count returned of 0:
{
"_id" : {
"month" : 1,
"day" : 21,
"year" : 2014,
"buildResult" : "FAILURE"
},
"count" : 0
}
I've done something similar with a relational database and a calendar table, but I'm not sure how to approach this with MongoDB. Is this possible or should I do something programatically after running the query?
Here is an example of a document (simplified) in the database:
{
"_id" : ObjectId("52deab2fe4b0a491abb54108"),
"type" : "build",
"time" : ISODate("2014-01-21T17:15:27.471Z"),
"data" : {
"buildNumber" : 43,
"buildDuration" : 997308,
"buildResult" : "SUCCESS"
}
}
Here is my current query:
db.builds.aggregate([
{ $match: { "data.buildResult" : { $ne : null} }},
{ $group: {
_id: {
month: { $month: "$time" },
day: { $dayOfMonth: "$time" },
year: { $year: "$time" },
buildResult: "$data.buildResult",
},
count: { $sum: 1}
} },
{ $sort: { "_id.year": 1, "_id.month": 1, "_id.day": 1} }
])
If I correctly understand what do you want, you could try this:
db.builds.aggregate([
{ $project:
{
time: 1,
projectedData: { $ifNull: ['$data.buildResult', 'none'] }
}
},
{ $group: {
_id: {
month: { $month: "$time" },
day: { $dayOfMonth: "$time" },
year: { $year: "$time" },
buildResult: "$projectedData"
},
count: { $sum: { $cond: [ { $eq: [ "$projectedData", "none" ] }, 0, 1 ] } }
} },
{ $sort: { "_id.year": 1, "_id.month": 1, "_id.day": 1 } }
])
Update:
You want to get from output more documents that been in input, it is possible only with unwind operator that works with arrays, but you haven't any arrays, so as I know it is impossible to get more documents in your case. So you should add some logic after query result to create new data for existing dates with 0 count for another type of buildResult...