mongodb aggregation - unwind/group/project query combination - mongodb

I have records in a collection of the following format.
//One parent record
{
"_id" : "someDocID",
"title" : "some title",
"analytics" : [
{
"_id" : "analyticsID1",
"timeSpent" : [
{
"time" : 14,
"pageNo" : 1
},
{
"time" : 4,
"pageNo" : 2
},
{
"time" : 3,
"pageNo" : 1
},
{
"time" : 1,
"pageNo" : 2
}
]
},
{
"_id" : "analyticsID2",
"timeSpent" : [
{
"time" : 12,
"pageNo" : 10
},
{
"time" : 15,
"pageNo" : 11
},
{
"time" : 26,
"pageNo" : 12
},
{
"time" : 13,
"pageNo" : 11
},
{
"time" : 17,
"pageNo" : 10
},
{
"time" : 30,
"pageNo" : 11
}
]
}
]
}
The "pageNo" field contains repeated values. I need to group the pageNo field with adding their respective "time".
This is my required output. ( after "$unwind" operation on analytics )
//Two records after "$unwind" on analytics
{
"_id" : "someDocID",
"title" : "some title",
"analytics" : {
"_id" : "analyticsID1",
"timeSpent" : [
{
"time" : 17, //14+3
"pageNo" : 1
},
{
"time" : 5, //4+1
"pageNo" : 2
}
]
}
}
{
"_id" : "someDocID",
"title" : "some title",
"analytics" : {
"_id" : "analyticsID2",
"timeSpent" : [
{
"time" : 29, //12+17
"pageNo" : 10
},
{
"time" : 58, //15+13+30
"pageNo" : 11
},
{
"time" : 26,
"pageNo" : 12
}
]
}
}
I've tried various combinations of aggregate, group, unwind and project but still can't quite get there and would really appreciate any suggestions.

Here is an aggregate I came up with to provide the output that you mentioned in your comment above. As an FYI, the more elements you have in an array that needs to be unwound, the more memory usage you'll have, and it will take an exponentially amount of time based on array sizes. I would highly recommend you structure your data differently if your arrays are not limited in length.
var aggregrate = [{
$unwind: '$analytics'
}, {
$unwind: '$analytics.timeSpent'
}, {
$group: {
_id: {
analytics_id: '$analytics._id',
pageNo: '$analytics.timeSpent.pageNo'
},
title:{$first:'$title'},
time: {
$sum: '$analytics.timeSpent.time'
},
}
}, {
$group: {
_id: '$_id.analytics_id',
title:{$first:'$title'},
timeSpent: {
$push: {
time: '$time',
pageNo: '$_id.pageNo'
}
}
}
}, ];
This Outputs:
[{
"_id": "analyticsID1",
"title" : "some title",
"timeSpent": [{
"time": NumberInt(17),
"pageNo": NumberInt(1)
}, {
"time": NumberInt(5),
"pageNo": NumberInt(2)
}]
}, {
"_id": "analyticsID2",
"title" : "some title",
"timeSpent": [{
"time": NumberInt(26),
"pageNo": NumberInt(12)
}, {
"time": NumberInt(29),
"pageNo": NumberInt(10)
}, {
"time": NumberInt(58),
"pageNo": NumberInt(11)
}]
}]

Related

How to multiple push to nested array

I have the following object:
{
"_id" : ObjectId("5d7052a3807ab14e286ba5bd"),
"companyBases" : [
{
"vehicles" : [],
"_id" : ObjectId("5d7052a3807ab14e286ba5b0"),
"name" : "Tech Parking 3",
"location" : {
"lng" : 50.01744,
"lat" : 20.033522
},
"country" : ObjectId("5d7052a2807ab14e286ba578"),
"__v" : 0
},
{
"vehicles" : [],
"_id" : ObjectId("5d7052a3807ab14e286ba5af"),
"name" : "Tech Parking 2",
"location" : {
"lng" : 50.036017,
"lat" : 20.086752
},
"country" : ObjectId("5d7052a2807ab14e286ba578"),
"__v" : 0
}
],
"nameOfCompany" : "Transport Tech Service 2 ",
"plan" : {
"name" : "Enterprise",
"vehicles" : 56,
"companyBases" : 10,
"users" : 10,
"price" : 1200
},
"__v" : 0
}
I've tried to do something like this:
db.companies.update(
{
_id: ObjectId("5d7052a3807ab14e286ba5bd")
},
{
$push: {
"companyBases.$[filter1].vehicles": {
"name": "Truck 1",
"combustion": 28
},
"companyBases.$[filter2].vehicles": {
"name": "Truck 2",
"combustion": 28
}
}
},
{
"arrayFilters": [
{
"filter1._id": "5d7052a3807ab14e286ba5b0"
},
{
"filter2._id": "5d7052a3807ab14e286ba5af"
}
]
}
)
But, it doesn't update my nested arrays "vehicles"
It returns me:
WriteResult({ "nMatched" : 1, "nUpserted" : 0, "nModified" : 0 })
I checked IDs and it's ok. I've created similar question a few days ago but with $set pipeline not $push - How to update in one query, multiple times without sharing to simple queries? , but i was thinking it's possible to rewrite that example to $push.
Issue: In array filters, the _id is matched with string instead of ObjectId
The following query would precisely update the collection:
db.companies.update(
{
_id: ObjectId("5d7052a3807ab14e286ba5bd")
},
{
$push: {
"companyBases.$[filter1].vehicles": {
"name": "Truck 1",
"combustion": 28
},
"companyBases.$[filter2].vehicles": {
"name": "Truck 2",
"combustion": 28
}
}
},
{
"arrayFilters": [{
"filter1._id": ObjectId("5d7052a3807ab14e286ba5b0")
},
{
"filter2._id": ObjectId("5d7052a3807ab14e286ba5af")
}
]
}
)

how to show 0 for week when no record is matching that week in $week mongodb query

My collection looks like below with details
/* 1 createdAt:6/13/2018, 5:17:07 PM*/
{ "_id" : ObjectId("5b21043b18f3bc7c0be3414c"),
"Number" : 242,
"State" : "2",
"City" : "3",
"Website" : "",
"Contact_Person_Name" : "Ajithmullassery",
"CreatedById" : "Admin",
"UpdatedById" : "Admin",
"IsActive" : true,
"UpdatedOn" : ISODate("2018-06-13T17:17:07.313+05:30"),
"CreatedOn" : ISODate("2018-06-13T17:17:07.313+05:30")
},
/* 2 createdAt:6/13/2018, 6:45:42 PM*/
{
"_id" : ObjectId("5b2118fe18f3bc7c0be3415b"),
"Number" : 243,
"State" : "1",
"City" : "143",
"Website" : "",
"Contact_Person_Name" : "sachitkumar",
"CreatedById" : "vinoth",
"UpdatedById" : "Admin",
"IsActive" : true,
"UpdatedOn" : ISODate("2018-06-13T18:45:42.590+05:30"),
"CreatedOn" : ISODate("2018-06-13T18:45:42.590+05:30")
},
/* 3 createdAt:6/18/2018, 5:34:33 PM*/
{
"_id" : ObjectId("5b279fd118f3bc7c0be34166"),
"Number" : 244,
"State" : "0",
"City" : "8",
"Website" : "",
"Contact_Person_Name" : "Akshay",
"CreatedById" : "vinoth",
"UpdatedById" : "Admin",
"IsActive" : true,
"UpdatedOn" : ISODate("2018-06-18T17:34:33.270+05:30"),
"CreatedOn" : ISODate("2018-06-18T17:34:33.270+05:30")
},
/* 4 createdAt:6/20/2018, 1:02:21 PM*/
{
"_id" : ObjectId("5b2a030518f3bc7c0be3416d"),
"Number" : 245,
"State" : "5",
"City" : "6",
"Website" : "",
"Contact_Person_Name" : "Dr DS Mithra",
"CreatedById" : "vinoth",
"UpdatedById" : "Admin",
"FacilityID" : "594387f5e2de7be83be5d5f1",
"IsActive" : true,
"UpdatedOn" : ISODate("2018-06-20T13:02:21.887+05:30"),
"CreatedOn" : ISODate("2018-06-20T13:02:21.887+05:30")
},
/* 5 createdAt:6/20/2018, 1:08:58 PM*/
{
"_id" : ObjectId("5b2a049218f3bc7c0be3416e"),
"Number" : 245,
"State" : "5",
"City" : "6",
"Website" : "",
"Contact_Person_Name" : "Ramaswamy Manickam",
"CreatedById" : "vinoth",
"UpdatedById" : "Admin",
"IsActive" : true,
"UpdatedOn" : ISODate("2018-06-20T13:08:58.040+05:30"),
"CreatedOn" : ISODate("2018-06-20T13:08:58.040+05:30")
}
I have the query like below
db.collectionName.aggregate([
//where query
{ "$match": { $and:[{CreatedOn:{$lte:ISODate("2018-07-14T13:59:08.266+05:30")}},{CreatedOn:{$gte:ISODate("2018-06-10T13:59:08.266+05:30")}}] } },
//distinct column
{
"$group": {
_id: {$week: '$CreatedOn'},
documentCount: {$sum: 1}
}
}
])
The query will return the weeknumber and number of documents created as below
/* 1 */
{
"_id" : 26,
"documentCount" : 1
},
/* 2 */
{
"_id" : 25,
"documentCount" : 1
},
/* 3 */
{
"_id" : 24,
"documentCount" : 9
},
/* 4 */
{
"_id" : 23,
"documentCount" : 2
}
In above _id is the weeknumber. If in case in above results weekNumber : 23 no records are created then the query gives only 3 records removing the "_id":23.
How to get the records with documentcount as zero when there is no records created.
Like in above example when no records for _id: 23 should get like below
/* 4 */
{
"_id" : 23,
"documentCount" : 0
}
As $week can return a value between 0 and 53 I assume you expect 54 documents as a result with 0 or non-zero values for documentCount. To achieve that you should collect all your documents into one ($group-ing by null) and then generate the output.
To generate a range of numbers you can use $range operator and then you can generate the output using $map. To transform an array of documents into multiple docs you can use $unwind.
db.collectionName.aggregate([
//where query
{ "$match": { $and:[{CreatedOn:{$lte:ISODate("2018-07-14T13:59:08.266+05:30")}},{CreatedOn:{$gte:ISODate("2018-06-10T13:59:08.266+05:30")}}] } },
//distinct column
{
"$group": {
_id: {$week: '$CreatedOn'},
documentCount: {$sum: 1}
}
},
{
$group: {
_id: null,
docs: { $push: "$$ROOT" }
}
},
{
$project: {
docs: {
$map: {
input: { $range: [ {$week:ISODate("2018-06-10T13:59:08.266+05:30")}, {$week:ISODate("2018-07-14T13:59:08.266+05:30")}]},
as: "weekNumber",
in: {
$let: {
vars: { index: { $indexOfArray: [ "$docs._id", "$$weekNumber" ] } },
in: {
$cond: {
if: { $eq: [ "$$index", -1 ] },
then: { _id: "$$weekNumber", documentCount: 0 },
else: { $arrayElemAt: [ "$docs", "$$index" ] }
}
}
}
}
}
}
}
},
{
$unwind: "$docs"
},
{
$replaceRoot: {
newRoot: "$docs"
}
}
])
Using $indexOfArray to check if array of current docs contains the document (-1 otherwise) and $arrayElemAt to get existing document from docs. Last step ($replaceRoot) is just to get rid of one level of nesting (docs). Outputs:
{ "_id" : 0, "documentCount" : 0 }
{ "_id" : 1, "documentCount" : 0 }
{ "_id" : 2, "documentCount" : 0 }
...
{ "_id" : 22, "documentCount" : 0 }
{ "_id" : 23, "documentCount" : 2 }
{ "_id" : 24, "documentCount" : 9 }
{ "_id" : 25, "documentCount" : 1 }
{ "_id" : 26, "documentCount" : 1 }
{ "_id" : 27, "documentCount" : 0 }
...
{ "_id" : 52, "documentCount" : 0 }
{ "_id" : 53, "documentCount" : 0 }
You can easily customize returned results modifying the input of $map stage. For instance you can pass an array of consts like input: [21, 22, 23, 24] as well.
EDIT: To get the weeks between specified dates you can use $week for start and end date to get the numbers.

mongodb find the document by id and then group the result based on name field

I have a collection with multiple documents like
{
"_id" : ObjectId("5a64d076bfd103df081967ae"),
"status" : "",
"Number" : 53,
"values" : [
{
"date" : "2015-05-18",
"value" : 12.41
},
{
"date" : "2015-05-19",
"value" : 12.45
},
],
"Name" : "ABC Banking",
"scheme":"ABC1",
"createdDate" : "21-01-2018"
}
{
"_id" : ObjectId("5a64d076bfd103df081967ae"),
"status" : "",
"Number" : 53,
"values" : [
{
"date" : "2015-05-18",
"value" : 13.41
},
{
"date" : "2015-05-19",
"value" : 13.45
},
],
"Name" : "ABC Banking",
"scheme":"ABC2",
"createdDate" : "21-01-2018"
}
I am Querying collection based on Number field like
db.getCollection('mfhistories').find({'Number':53})
to get all the documents with this Number.
Now I want to group all the collection with Name 'ABC Banking' into an array. so that I will get result based on Name.
so the result should be like
{
"Name":"ABC Banking",
[
{
"_id" : ObjectId("5a64d076bfd103df081967ae"),
"status" : "",
"Number" : 53,
"values" : [
{
"date" : "2015-05-18",
"value" : 13.41
},
{
"date" : "2015-05-19",
"value" : 13.45
},
],
"scheme":"ABC1",
"createdDate" : "21-01-2018"
},
{
"_id" : ObjectId("5a64d076bfd103df081967ae"),
"status" : "",
"Number" : 53,
"values" : [
{
"date" : "2015-05-18",
"value" : 13.41
},
{
"date" : "2015-05-19",
"value" : 13.45
}
],
"scheme":"ABC2",
"createdDate" : "21-01-2018"
}
]
}
Please help..
Thanks,
J
You can use Aggregation Framework for that:
db.col.aggregate([
{
$match: { Number: 53, Name: "ABC Banking" }
},
{
$group: {
_id: "$Name",
docs: { $push: "$$ROOT" }
}
},
{
$project: {
Name: "$_id",
_id: 0,
docs: 1
}
}
])
$$ROOT is a special variable which captures entire document. More here.
db.mfhistories.aggregate(
// Pipeline
[
// Stage 1
{
$match: {
Number: 53
}
},
// Stage 2
{
$group: {
_id: {
Name: '$Name'
},
docObj: {
$addToSet: '$$CURRENT'
}
}
},
// Stage 3
{
$project: {
Name: '$_id.Name',
docObj: 1,
_id: 0
}
}
]
);

Mongo aggregation $subtract between dynamic document value

Need to find the difference between two values of attendance,group by ward_id, based on patient id for two dates. The result has dynamic values based on the array. The difference is between two dates. Key would be ward_id, the difference will be between counts of patient's visit to the ward.
Example sample data
{
"_id" : {
"type" : "patient_attendence",
"ts" : ISODate("2015-02-03T21:31:29.902Z"),
"ward_id" : 2561
},
"count" : 4112,
"values" : [
{
"count" : 9,
"patient" : ObjectId("54766f973f35473ffc644618")
},
{
"count" : 19,
"patient" : ObjectId("546680e2d660e2dc5ebfea39")
},
{
"count" : 47,
"patient" : ObjectId("546680e3d660e2dc5ebfea72")
},
{
"count" : 1,
"patient" : ObjectId("546a137bdab5f21e612ea7ef")
},
{
"count" : 93,
"patient" : ObjectId("546680e3d660e2dc5ebfea89")
}
]
}
{
"_id" : {
"type" : "patient_attendence",
"ts" : ISODate("2015-02-03T21:31:29.902Z"),
"ward_id" : 3720
},
"count" : 1,
"values" : [
{
"count" : 1,
"patient" : ObjectId("546a136ddab5f21e612ea6a6")
}
]
}
{
"_id" : {
"type" : "patient_attendence",
"ts" : ISODate("2015-02-04T21:31:29.902Z"),
"ward_id" : 2561
},
"count" : 4112,
"values" : [
{
"count" : 10,
"patient" : ObjectId("54766f973f35473ffc644618")
},
{
"count" : 10,
"patient" : ObjectId("546680e2d660e2dc5ebfea39")
},
{
"count" : 6,
"patient" : ObjectId("5474e9e46606f32570fa48ff")
},
{
"count" : 1,
"patient" : ObjectId("5474e9e36606f32570fa48f2")
},
{
"count" : 1,
"patient" : ObjectId("546680e3d660e2dc5ebfea77")
},
{
"count" : 543,
"patient" : ObjectId("546680e2d660e2dc5ebfea43")
},
{
"count" : 1,
"patient" : ObjectId("5485fdc8d27a9122956b1c66")
}
]
}
{
"_id" : {
"type" : "patient_attendence",
"ts" : ISODate("2015-02-04T21:31:29.902Z"),
"ward_id" : 3720
},
"count" : 1,
"values" : [
{
"count" : 7,
"patient" : ObjectId("546a136ddab5f21e612ea6a6")
}
]
}
Output
{
"ward_id":2561,
"result" : [{"person": ObjectId("54766f973f35473ffc644618"),
"count_1": 9,
"count_1": 10,
"difference":1 },{"person": ObjectId("546680e2d660e2dc5ebfea39"),
"count_1": 19,
"count_1": 10,
"difference":-9 } ....]
},
{
"ward_id":3720,
"result" : [{"person": ObjectId("546a136ddab5f21e612ea6a6"),
"count_1": 9,
"count_1": 10,
"difference":1 },{"person": ObjectId("546680e2d660e2dc5ebfea39"),
"count_1": 1,
"count_1": 7,
"difference":-6 }]
}
you can use the aggregation framework's $subtract operator outlined here: http://docs.mongodb.org/manual/reference/operator/aggregation-arithmetic/
db.wards.aggregate([
{
$match: {id: {$elemMatch: {ward_id: my_ward_id, ts: my_desired_ts}}},
},
{
$limit: 2
},
{
$project: {values: 1}
},
{
$unwind: '$values'
},
{
$match: {patient: my_patient_id}
},
{
$group: {
_id: null,
'count1': {$first: '$values.count'},
'count2': {$last: '$values.count'}
}
},
{
$subtract: ['$count1', '$count2']
}
])
i haven't tested this but it would probably look like something above

How to group by seconds without the ISODate decimal part in MongoDB

I wanted to query the database in order to find the number of post per second to feed into a graph to show activity trend. I use spring-data-mongo but for now, the first step is to do this in the mongo shell before worrying about how to do from java.
I used the aggregation framework on it as shown below:
db.post.group({
key:{dateCreated: 1},
cond: { dateCreated:
{
"$gt": new ISODate("2013-08-09T05:51:15Z"),
"$lt": new ISODate("2013-08-09T05:51:20Z")
}
},
reduce: function(cur, result){
result.count += 1
},
initial: {count:0}
})
The result is encouraging but is seems because of the decimal part of the ISODate, the count seems wrong as it does group per seconds with the decimal making each count 1.
[
{
"dateCreated" : ISODate("2013-08-09T05:51:15.332Z"),
"count" : 1
},
{
"dateCreated" : ISODate("2013-08-09T05:51:15.378Z"),
"count" : 1
},
{
"dateCreated" : ISODate("2013-08-09T05:51:15.377Z"),
"count" : 1
},
// many more here
]
Is there a way to just consider only the seconds part as in result like below:
[
{
"dateCreated" : ISODate("2013-08-09T05:51:15Z"),
"count" : 5
},
{
"dateCreated" : ISODate("2013-08-09T05:51:16Z"),
"count" : 8
},
{
"dateCreated" : ISODate("2013-08-09T05:51:17Z"),
"count" : 3
},
{
"dateCreated" : ISODate("2013-08-09T05:51:18Z"),
"count" : 10
},
{
"dateCreated" : ISODate("2013-08-09T05:51:19Z"),
"count" : 2
},
{
"dateCreated" : ISODate("2013-08-09T05:51:20Z"),
"count" : 13
}
]
Thank for reading this.
For those in the same situation. here is how I modified my query. Thanks to #Sammaye.
db.post.aggregate(
{
$match: { dateCreated:
{
"$gt": new ISODate("2013-08-09T05:51:15.000Z"),
"$lt": new ISODate("2013-08-09T05:51:20.000Z")
}
}
},
{
$group: {
_id: {
hour: {$hour: "$dateCreated"},
minute: {$minute: "$dateCreated"},
second: {$second: "$dateCreated"}
},
cnt: {$sum : 1}
}
}
)
{
"result" : [
{
"_id" : {
"hour" : 5,
"minute" : 51,
"second" : 19
},
"cnt" : 26
},
{
"_id" : {
"hour" : 5,
"minute" : 51,
"second" : 18
},
"cnt" : 29
},
{
"_id" : {
"hour" : 5,
"minute" : 51,
"second" : 17
},
"cnt" : 27
},
{
"_id" : {
"hour" : 5,
"minute" : 51,
"second" : 16
},
"cnt" : 25
},
{
"_id" : {
"hour" : 5,
"minute" : 51,
"second" : 15
},
"cnt" : 16
}
],
"ok" : 1
}