How to group by seconds without the ISODate decimal part in MongoDB - mongodb

I wanted to query the database in order to find the number of post per second to feed into a graph to show activity trend. I use spring-data-mongo but for now, the first step is to do this in the mongo shell before worrying about how to do from java.
I used the aggregation framework on it as shown below:
db.post.group({
key:{dateCreated: 1},
cond: { dateCreated:
{
"$gt": new ISODate("2013-08-09T05:51:15Z"),
"$lt": new ISODate("2013-08-09T05:51:20Z")
}
},
reduce: function(cur, result){
result.count += 1
},
initial: {count:0}
})
The result is encouraging but is seems because of the decimal part of the ISODate, the count seems wrong as it does group per seconds with the decimal making each count 1.
[
{
"dateCreated" : ISODate("2013-08-09T05:51:15.332Z"),
"count" : 1
},
{
"dateCreated" : ISODate("2013-08-09T05:51:15.378Z"),
"count" : 1
},
{
"dateCreated" : ISODate("2013-08-09T05:51:15.377Z"),
"count" : 1
},
// many more here
]
Is there a way to just consider only the seconds part as in result like below:
[
{
"dateCreated" : ISODate("2013-08-09T05:51:15Z"),
"count" : 5
},
{
"dateCreated" : ISODate("2013-08-09T05:51:16Z"),
"count" : 8
},
{
"dateCreated" : ISODate("2013-08-09T05:51:17Z"),
"count" : 3
},
{
"dateCreated" : ISODate("2013-08-09T05:51:18Z"),
"count" : 10
},
{
"dateCreated" : ISODate("2013-08-09T05:51:19Z"),
"count" : 2
},
{
"dateCreated" : ISODate("2013-08-09T05:51:20Z"),
"count" : 13
}
]
Thank for reading this.

For those in the same situation. here is how I modified my query. Thanks to #Sammaye.
db.post.aggregate(
{
$match: { dateCreated:
{
"$gt": new ISODate("2013-08-09T05:51:15.000Z"),
"$lt": new ISODate("2013-08-09T05:51:20.000Z")
}
}
},
{
$group: {
_id: {
hour: {$hour: "$dateCreated"},
minute: {$minute: "$dateCreated"},
second: {$second: "$dateCreated"}
},
cnt: {$sum : 1}
}
}
)
{
"result" : [
{
"_id" : {
"hour" : 5,
"minute" : 51,
"second" : 19
},
"cnt" : 26
},
{
"_id" : {
"hour" : 5,
"minute" : 51,
"second" : 18
},
"cnt" : 29
},
{
"_id" : {
"hour" : 5,
"minute" : 51,
"second" : 17
},
"cnt" : 27
},
{
"_id" : {
"hour" : 5,
"minute" : 51,
"second" : 16
},
"cnt" : 25
},
{
"_id" : {
"hour" : 5,
"minute" : 51,
"second" : 15
},
"cnt" : 16
}
],
"ok" : 1
}

Related

Aggregate value of each hour by MongoDB

Like the image, the above table represents my original data, time field is irregular. Now I want to get the data that represents the average value between every hour. What I thought was by using $match, $group, $project even with for method. I don't get an accurate idea and method.
id: ObjectId,
value: Number,
time: Date()
I have sample collection, hours.:
{ "_id" : 1, "value" : 10, "dt" : ISODate("2019-10-17T00:01:32Z") }
{ "_id" : 2, "value" : 16, "dt" : ISODate("2019-10-17T00:02:12Z") }
{ "_id" : 3, "value" : 8, "dt" : ISODate("2019-10-17T01:04:09Z") }
{ "_id" : 4, "value" : 12, "dt" : ISODate("2019-10-17T02:14:21Z") }
{ "_id" : 5, "value" : 6, "dt" : ISODate("2019-10-17T02:54:02Z") }
{ "_id" : 6, "value" : 11, "dt" : ISODate("2019-10-17T04:06:31Z") }
The following aggregation query returns the average value by the hour (the hour is of the date field):
db.hours.aggregate( [
{ $project: { value: 1, hr: { $hour: "$dt" } } } ,
{ $addFields: { hour: { $add: [ "$hr", 1 ] } } },
{ $group: { _id: "$hour",
count: { $sum: 1 },
totalValue: { $sum: "$value" },
avgValue: { $avg: "$value" }
}
},
{ $project: { hour: "$_id", _id: 0, count: 1, totalValue: 1, avgValue: 1} }
] )
=>
{ "count" : 2, "totalValue" : 18, "avgValue" : 9, "hour" : 3 }
{ "count" : 1, "totalValue" : 8, "avgValue" : 8, "hour" : 2 }
{ "count" : 1, "totalValue" : 11, "avgValue" : 11, "hour" : 5 }
{ "count" : 2, "totalValue" : 26, "avgValue" : 13, "hour" : 1 }
Finally, I solve this issue. Below is my code.

how to show 0 for week when no record is matching that week in $week mongodb query

My collection looks like below with details
/* 1 createdAt:6/13/2018, 5:17:07 PM*/
{ "_id" : ObjectId("5b21043b18f3bc7c0be3414c"),
"Number" : 242,
"State" : "2",
"City" : "3",
"Website" : "",
"Contact_Person_Name" : "Ajithmullassery",
"CreatedById" : "Admin",
"UpdatedById" : "Admin",
"IsActive" : true,
"UpdatedOn" : ISODate("2018-06-13T17:17:07.313+05:30"),
"CreatedOn" : ISODate("2018-06-13T17:17:07.313+05:30")
},
/* 2 createdAt:6/13/2018, 6:45:42 PM*/
{
"_id" : ObjectId("5b2118fe18f3bc7c0be3415b"),
"Number" : 243,
"State" : "1",
"City" : "143",
"Website" : "",
"Contact_Person_Name" : "sachitkumar",
"CreatedById" : "vinoth",
"UpdatedById" : "Admin",
"IsActive" : true,
"UpdatedOn" : ISODate("2018-06-13T18:45:42.590+05:30"),
"CreatedOn" : ISODate("2018-06-13T18:45:42.590+05:30")
},
/* 3 createdAt:6/18/2018, 5:34:33 PM*/
{
"_id" : ObjectId("5b279fd118f3bc7c0be34166"),
"Number" : 244,
"State" : "0",
"City" : "8",
"Website" : "",
"Contact_Person_Name" : "Akshay",
"CreatedById" : "vinoth",
"UpdatedById" : "Admin",
"IsActive" : true,
"UpdatedOn" : ISODate("2018-06-18T17:34:33.270+05:30"),
"CreatedOn" : ISODate("2018-06-18T17:34:33.270+05:30")
},
/* 4 createdAt:6/20/2018, 1:02:21 PM*/
{
"_id" : ObjectId("5b2a030518f3bc7c0be3416d"),
"Number" : 245,
"State" : "5",
"City" : "6",
"Website" : "",
"Contact_Person_Name" : "Dr DS Mithra",
"CreatedById" : "vinoth",
"UpdatedById" : "Admin",
"FacilityID" : "594387f5e2de7be83be5d5f1",
"IsActive" : true,
"UpdatedOn" : ISODate("2018-06-20T13:02:21.887+05:30"),
"CreatedOn" : ISODate("2018-06-20T13:02:21.887+05:30")
},
/* 5 createdAt:6/20/2018, 1:08:58 PM*/
{
"_id" : ObjectId("5b2a049218f3bc7c0be3416e"),
"Number" : 245,
"State" : "5",
"City" : "6",
"Website" : "",
"Contact_Person_Name" : "Ramaswamy Manickam",
"CreatedById" : "vinoth",
"UpdatedById" : "Admin",
"IsActive" : true,
"UpdatedOn" : ISODate("2018-06-20T13:08:58.040+05:30"),
"CreatedOn" : ISODate("2018-06-20T13:08:58.040+05:30")
}
I have the query like below
db.collectionName.aggregate([
//where query
{ "$match": { $and:[{CreatedOn:{$lte:ISODate("2018-07-14T13:59:08.266+05:30")}},{CreatedOn:{$gte:ISODate("2018-06-10T13:59:08.266+05:30")}}] } },
//distinct column
{
"$group": {
_id: {$week: '$CreatedOn'},
documentCount: {$sum: 1}
}
}
])
The query will return the weeknumber and number of documents created as below
/* 1 */
{
"_id" : 26,
"documentCount" : 1
},
/* 2 */
{
"_id" : 25,
"documentCount" : 1
},
/* 3 */
{
"_id" : 24,
"documentCount" : 9
},
/* 4 */
{
"_id" : 23,
"documentCount" : 2
}
In above _id is the weeknumber. If in case in above results weekNumber : 23 no records are created then the query gives only 3 records removing the "_id":23.
How to get the records with documentcount as zero when there is no records created.
Like in above example when no records for _id: 23 should get like below
/* 4 */
{
"_id" : 23,
"documentCount" : 0
}
As $week can return a value between 0 and 53 I assume you expect 54 documents as a result with 0 or non-zero values for documentCount. To achieve that you should collect all your documents into one ($group-ing by null) and then generate the output.
To generate a range of numbers you can use $range operator and then you can generate the output using $map. To transform an array of documents into multiple docs you can use $unwind.
db.collectionName.aggregate([
//where query
{ "$match": { $and:[{CreatedOn:{$lte:ISODate("2018-07-14T13:59:08.266+05:30")}},{CreatedOn:{$gte:ISODate("2018-06-10T13:59:08.266+05:30")}}] } },
//distinct column
{
"$group": {
_id: {$week: '$CreatedOn'},
documentCount: {$sum: 1}
}
},
{
$group: {
_id: null,
docs: { $push: "$$ROOT" }
}
},
{
$project: {
docs: {
$map: {
input: { $range: [ {$week:ISODate("2018-06-10T13:59:08.266+05:30")}, {$week:ISODate("2018-07-14T13:59:08.266+05:30")}]},
as: "weekNumber",
in: {
$let: {
vars: { index: { $indexOfArray: [ "$docs._id", "$$weekNumber" ] } },
in: {
$cond: {
if: { $eq: [ "$$index", -1 ] },
then: { _id: "$$weekNumber", documentCount: 0 },
else: { $arrayElemAt: [ "$docs", "$$index" ] }
}
}
}
}
}
}
}
},
{
$unwind: "$docs"
},
{
$replaceRoot: {
newRoot: "$docs"
}
}
])
Using $indexOfArray to check if array of current docs contains the document (-1 otherwise) and $arrayElemAt to get existing document from docs. Last step ($replaceRoot) is just to get rid of one level of nesting (docs). Outputs:
{ "_id" : 0, "documentCount" : 0 }
{ "_id" : 1, "documentCount" : 0 }
{ "_id" : 2, "documentCount" : 0 }
...
{ "_id" : 22, "documentCount" : 0 }
{ "_id" : 23, "documentCount" : 2 }
{ "_id" : 24, "documentCount" : 9 }
{ "_id" : 25, "documentCount" : 1 }
{ "_id" : 26, "documentCount" : 1 }
{ "_id" : 27, "documentCount" : 0 }
...
{ "_id" : 52, "documentCount" : 0 }
{ "_id" : 53, "documentCount" : 0 }
You can easily customize returned results modifying the input of $map stage. For instance you can pass an array of consts like input: [21, 22, 23, 24] as well.
EDIT: To get the weeks between specified dates you can use $week for start and end date to get the numbers.

mongodb aggregation - unwind/group/project query combination

I have records in a collection of the following format.
//One parent record
{
"_id" : "someDocID",
"title" : "some title",
"analytics" : [
{
"_id" : "analyticsID1",
"timeSpent" : [
{
"time" : 14,
"pageNo" : 1
},
{
"time" : 4,
"pageNo" : 2
},
{
"time" : 3,
"pageNo" : 1
},
{
"time" : 1,
"pageNo" : 2
}
]
},
{
"_id" : "analyticsID2",
"timeSpent" : [
{
"time" : 12,
"pageNo" : 10
},
{
"time" : 15,
"pageNo" : 11
},
{
"time" : 26,
"pageNo" : 12
},
{
"time" : 13,
"pageNo" : 11
},
{
"time" : 17,
"pageNo" : 10
},
{
"time" : 30,
"pageNo" : 11
}
]
}
]
}
The "pageNo" field contains repeated values. I need to group the pageNo field with adding their respective "time".
This is my required output. ( after "$unwind" operation on analytics )
//Two records after "$unwind" on analytics
{
"_id" : "someDocID",
"title" : "some title",
"analytics" : {
"_id" : "analyticsID1",
"timeSpent" : [
{
"time" : 17, //14+3
"pageNo" : 1
},
{
"time" : 5, //4+1
"pageNo" : 2
}
]
}
}
{
"_id" : "someDocID",
"title" : "some title",
"analytics" : {
"_id" : "analyticsID2",
"timeSpent" : [
{
"time" : 29, //12+17
"pageNo" : 10
},
{
"time" : 58, //15+13+30
"pageNo" : 11
},
{
"time" : 26,
"pageNo" : 12
}
]
}
}
I've tried various combinations of aggregate, group, unwind and project but still can't quite get there and would really appreciate any suggestions.
Here is an aggregate I came up with to provide the output that you mentioned in your comment above. As an FYI, the more elements you have in an array that needs to be unwound, the more memory usage you'll have, and it will take an exponentially amount of time based on array sizes. I would highly recommend you structure your data differently if your arrays are not limited in length.
var aggregrate = [{
$unwind: '$analytics'
}, {
$unwind: '$analytics.timeSpent'
}, {
$group: {
_id: {
analytics_id: '$analytics._id',
pageNo: '$analytics.timeSpent.pageNo'
},
title:{$first:'$title'},
time: {
$sum: '$analytics.timeSpent.time'
},
}
}, {
$group: {
_id: '$_id.analytics_id',
title:{$first:'$title'},
timeSpent: {
$push: {
time: '$time',
pageNo: '$_id.pageNo'
}
}
}
}, ];
This Outputs:
[{
"_id": "analyticsID1",
"title" : "some title",
"timeSpent": [{
"time": NumberInt(17),
"pageNo": NumberInt(1)
}, {
"time": NumberInt(5),
"pageNo": NumberInt(2)
}]
}, {
"_id": "analyticsID2",
"title" : "some title",
"timeSpent": [{
"time": NumberInt(26),
"pageNo": NumberInt(12)
}, {
"time": NumberInt(29),
"pageNo": NumberInt(10)
}, {
"time": NumberInt(58),
"pageNo": NumberInt(11)
}]
}]

Why would I get a "-Infinity" result for $avg in an aggregate query?

My aggregate query for retrieving "average percents per month" returns a -Infinity average for some months. What would cause this?
The relative properties in mycollection are mydate and mynumericfield, which stores a percentage value as a double.
db.mycollection.aggregate(
[
{
$match: {
mydate: {
$gte: new Date(Date.UTC(2014, 8, 1)),
$lte: new Date(Date.UTC(2014, 12, 1)),
}
}
},
{
$group : {
_id : { month: { $month: "$mydate" }, year: { $year: "$mydate" } },
average: { $avg: "$mynumericfield" },
count: { $sum: 1 }
}
}
]
)
Here's a sample of the result:
/* 1 */
{
"result" : [
{
"_id" : {
"month" : 9,
"year" : 2014
},
"average" : 84.2586640583996598,
"count" : 20959.0000000000000000
},
{
"_id" : {
"month" : 11,
"year" : 2014
},
"average" : 96.9326915103888638,
"count" : 20743.0000000000000000
},
{
"_id" : {
"month" : 10,
"year" : 2014
},
"average" : -Infinity,
"count" : 20939.0000000000000000
},
{
"_id" : {
"month" : 12,
"year" : 2014
},
"average" : -Infinity,
"count" : 20913.0000000000000000
}
],
"ok" : 1.0000000000000000
}
I've managed to somehow reproduce the -Infinity problem but on a smaller scale.
Let's take sample collection nums having only 4 documents in it:
{ "_id" : 0, "grp" : 1, "mynum" : -Infinity }
{ "_id" : 1, "grp" : 1, "mynum" : 5 }
{ "_id" : 3, "grp" : 2, "mynum" : 8 }
{ "_id" : 4, "grp" : 2, "mynum" : 89 }
Performing simple aggregation on this collection like:
> db.nums.aggregate([{$group:{"_id":"$grp", "average" : {$avg : "$mynum"}}}])
gives the following result:
{ "_id" : 2, "average" : 48.5 }
{ "_id" : 1, "average" : -Infinity }
which is identical in effects to what you have experienced.
Please try to find out whether in your collection there is a document which has mynumericfield with value -Infinity - maybe your situation is similar to the reproduced one:
> db.mycollection.find({mynumericfield : -Infinity})
I hope it might help you some way.

Mongo aggregation $subtract between dynamic document value

Need to find the difference between two values of attendance,group by ward_id, based on patient id for two dates. The result has dynamic values based on the array. The difference is between two dates. Key would be ward_id, the difference will be between counts of patient's visit to the ward.
Example sample data
{
"_id" : {
"type" : "patient_attendence",
"ts" : ISODate("2015-02-03T21:31:29.902Z"),
"ward_id" : 2561
},
"count" : 4112,
"values" : [
{
"count" : 9,
"patient" : ObjectId("54766f973f35473ffc644618")
},
{
"count" : 19,
"patient" : ObjectId("546680e2d660e2dc5ebfea39")
},
{
"count" : 47,
"patient" : ObjectId("546680e3d660e2dc5ebfea72")
},
{
"count" : 1,
"patient" : ObjectId("546a137bdab5f21e612ea7ef")
},
{
"count" : 93,
"patient" : ObjectId("546680e3d660e2dc5ebfea89")
}
]
}
{
"_id" : {
"type" : "patient_attendence",
"ts" : ISODate("2015-02-03T21:31:29.902Z"),
"ward_id" : 3720
},
"count" : 1,
"values" : [
{
"count" : 1,
"patient" : ObjectId("546a136ddab5f21e612ea6a6")
}
]
}
{
"_id" : {
"type" : "patient_attendence",
"ts" : ISODate("2015-02-04T21:31:29.902Z"),
"ward_id" : 2561
},
"count" : 4112,
"values" : [
{
"count" : 10,
"patient" : ObjectId("54766f973f35473ffc644618")
},
{
"count" : 10,
"patient" : ObjectId("546680e2d660e2dc5ebfea39")
},
{
"count" : 6,
"patient" : ObjectId("5474e9e46606f32570fa48ff")
},
{
"count" : 1,
"patient" : ObjectId("5474e9e36606f32570fa48f2")
},
{
"count" : 1,
"patient" : ObjectId("546680e3d660e2dc5ebfea77")
},
{
"count" : 543,
"patient" : ObjectId("546680e2d660e2dc5ebfea43")
},
{
"count" : 1,
"patient" : ObjectId("5485fdc8d27a9122956b1c66")
}
]
}
{
"_id" : {
"type" : "patient_attendence",
"ts" : ISODate("2015-02-04T21:31:29.902Z"),
"ward_id" : 3720
},
"count" : 1,
"values" : [
{
"count" : 7,
"patient" : ObjectId("546a136ddab5f21e612ea6a6")
}
]
}
Output
{
"ward_id":2561,
"result" : [{"person": ObjectId("54766f973f35473ffc644618"),
"count_1": 9,
"count_1": 10,
"difference":1 },{"person": ObjectId("546680e2d660e2dc5ebfea39"),
"count_1": 19,
"count_1": 10,
"difference":-9 } ....]
},
{
"ward_id":3720,
"result" : [{"person": ObjectId("546a136ddab5f21e612ea6a6"),
"count_1": 9,
"count_1": 10,
"difference":1 },{"person": ObjectId("546680e2d660e2dc5ebfea39"),
"count_1": 1,
"count_1": 7,
"difference":-6 }]
}
you can use the aggregation framework's $subtract operator outlined here: http://docs.mongodb.org/manual/reference/operator/aggregation-arithmetic/
db.wards.aggregate([
{
$match: {id: {$elemMatch: {ward_id: my_ward_id, ts: my_desired_ts}}},
},
{
$limit: 2
},
{
$project: {values: 1}
},
{
$unwind: '$values'
},
{
$match: {patient: my_patient_id}
},
{
$group: {
_id: null,
'count1': {$first: '$values.count'},
'count2': {$last: '$values.count'}
}
},
{
$subtract: ['$count1', '$count2']
}
])
i haven't tested this but it would probably look like something above