Mongodb multiple key aggregation by date (from timestamp) - mongodb

I have a mongodb collection with millions of records regarding transactions. I would like to create a query aggregated by date and resolution.
My document look like:
{
"_id": "Dan",
"finish_date": "2017-01-02 15:23:45.234Z",
"resolution": "canceled"
}
{
"_id": "John",
"finish_date": "2017-01-02 18:54:19.090Z",
"resolution": "completed"
}
{
"_id": "Pete",
"finish_date": "2017-01-02 19:11:27.418Z",
"order_resolution": "completed"
}
I would like the query output to look something like:
{
"2017-01-02" : {
"canceled": 1,
"completed": 2,
}
}
{
"2017-01-03" : {
"completed": 5,
}
}
Is this even possible? Currently, my output looks like:
{
"_id" : {
"curDate" : "2017-01-02",
"reason" : "canceled"
},
"count" : 1.0
}
"_id" : {
"curDate" : "2017-01-02",
"reason" : "completed"
},
"count" : 2.0
}
{
"_id" : {
"curDate" : "2017-01-03",
"reason" : "completed"
},
"count" : 5.0
}
The query looks like:
db.collection.aggregate(
[
{
"$match": {
"finish_date": {
"$gt": new Date("2017-01-02"),
"$lt": new Date("2017-01-08")
}
}
},
{
"$group" : {
_id : {
curDate: {
$substr: ["$finish_date", 0, 10]
},
reason: "$resolution"
},
count: { "$sum": 1 }
}
},
{
"$sort": { "_id.curDate": 1, "_id.reason": 1 }
}
]
)

You can use the new operator $arrayToObject available in 3.4.4 version with below aggregation query.
$group by curDate and push the reason and count values into reasoncount array.
$zip the reason and count array values together followed by $arrayToObject to create reason and count structure.
Same logic to create a curDate structure while keeping the previous reason and count structure.
db.collection.aggregate(
[
{$match:{finish_date:{$gt: new Date("2017-01-02"),$lt: new Date("2017-01-08")}}},
{$group:{_id:{curDate:{$substr:["$finish_date",0,10]},reason:"$resolution"},count:{$sum: 1}}},
{$sort:{"_id.curDate":1,"_id.reason":1}},
{$group:{_id:"$_id.curDate", reasoncount:{$push:{reason:"$_id.reason",count:"$count"}}}},
{$addFields:{reasoncount: {$arrayToObject:{$zip:{inputs:["$reasoncount.reason", "$reasoncount.count"]}}}}},
{$group:{_id:null, result:{$push:{curDate:"$_id",reasoncount:"$reasoncount"}}}},
{$addFields:{result: {$arrayToObject:{$zip:{inputs:["$result.curDate", "$result.reasoncount"]}}}}}
]
)

Related

Count the objects inside of an array on each document MongoDB

My documents are organized this way:
{
"_id" : ObjectId("5ea899d7e7da54cabbc022e7"),
"date" : ISODate("2018-01-27T00:00:00Z"),
"vehicleid" : 32028,
"points" : [
{
"direction" : 225,
"location" : {
"type" : "Point",
"coordinates" : [
-3.801898,
-38.501078
]
},
"odometer" : 134746396,
"routecode" : 0,
"speed" : 0,
"deviceid" : 148590,
"metrictimestamp" : ISODate("2018-01-27T23:32:03Z")
}
Where points is an array of objects. I need to group this documents and return the amount of elements inside each array. I guess that is something like:
pipe = [
{
'$project':{
"_id":0
}
},
{
'$group':{
"_id":{
"vehicleid":"$vehicleid",
"date":"$date"
},'count':{'$size':'points'}
}
}
]
Detail: I need to run this on pymongo.
You have to use $sum to sum the size of each array like this
{
"$group": {
"_id": {
"vehicleid": "$vehicleid",
"date": "$date"
},
"count": { "$sum": { "$size": "$points" } }
}
}
You can use any of the following aggregation pipelines. You will get the size of the points array field. Each pipeline uses different approach, and the output details differ, but the size info will be same.
The code runs with PyMongo:
pipeline = [
{
"$unwind": "$points"
},
{
"$group": {
"_id": { "vehicleid": "$vehicleid", "date": "$date" },
"count": { "$sum": 1 }
}
}
]
pipeline = [
{
"$addFields": { "count": { "$size": "$points" } }
}
]
You can follow this code
$group : {
_id : {
"vehicleid":"$vehicleid",
"date":"$date"
count: { $sum: 1 }
}
}

How to check keys of key value pair inside mongodb structure

I have a mongodb collection where objects are structured as such:
{
"id": "1234",
"history": [
{
"userid": 100,
"myobjects": [{id, id1, id4}]
},
{
"userid": 200,
"myobjects": [{id2, id3, id5}]
},
}
Goal: if my userid is 100, return an object that doesn't contain my userid in its history. I'm guessing it'd be some kind of "my userid not in keys of history field" but i'm not sure how to write that out. Here's my basic idea:
Collection.findOne(
{
in_progress : null,
history : {"$nin": myuserid } ???
}
);
Any help would be appreciated!
Use MongoDB aggregation.
db.test.aggregate([
{
"$unwind" : "$history"
},
{
"$match" : {
"history.userid" : {
"$ne" : 100
}
}
},
{
"$group" : {
"_id" : "$id",
"history" : {
"$push" : "$history"
}
}
}
]);

Count Distinct Within Date Range

I have a MongoDB database with a collection of site-events. The documents look like:
{
"_id" : ObjectId("5785bb02eac0636f1dc07023"),
"referrer" : "https://example.com",
"_t" : ISODate("2016-07-12T18:10:17Z"),
"_p" : "ucd7+hvjpacuhtgbq1caps4rqepvwzuoxm=",
"_n" : "visited site",
"km screen resolution" : "1680x1050"
},
{
"_id" : ObjectId("5785bb02eac0636f1dc07047"),
"url" : "https://www.example.com/",
"referrer" : "Direct",
"_t" : ISODate("2016-07-12T18:10:49Z"),
"_p" : "txt6t1siuingcgo483aabmses2et5uqk0=",
"_n" : "visited site",
"km screen resolution" : "1366x768"
},
{
"_id" : ObjectId("5785bb02eac0636f1dc07053"),
"url" : "https://www.example.com/",
"referrer" : "Direct",
"_t" : ISODate("2016-07-12T18:10:56Z"),
"_p" : "gcama1az5jxa74wa6o9r4v/3k+zulciqiu=",
"_n" : "visited site",
"km screen resolution" : "1366x768"
}
I want to get a count of the unique persons within a date range. In SQL it would be
SELECT COUNT(DISTINCT(`_p`)) FROM collection WHERE `_t` > '<SOME DATE>' AND `_t` <= '<SOME OTHER DATE>'
So far, I've grouped the dates along using the aggregation pipeline:
db.siteEvents.aggregate(
[
{
$match : {"_n": "visited site"}
},
{
$group : {
_id: {
year : { $year : "$_t" },
month : { $month : "$_t" },
day : { $dayOfMonth : "$_t" },
_p : "$_p"
},
count: { $sum: 1 }
}
},
{
$group : {
_id : {
year : { $year : "$_id.year" },
month : { $month : "$_id.month" },
day : { $dayOfMonth : "$_id.day" }
},
count: { $sum: 1 }
}
}
]
);
But this gives errors - I believe because of the second grouping _id trying to grab an intermediate field. I'm currently just using the Mongo shell, but if I had to choose an alternative driver it would be PyMongo. I'd like to get this to work in the shell (so I can understand the process).
With an aggregation pipeline it could look like so
db.getCollection('siteEvents').aggregate([
{
$match: {
_t: {
$gt: ISODate("2016-07-11T08:10:17.000Z"),
$lt: ISODate("2016-07-12T14:10:17.000Z")
}
}
},
{
$group: {
_id: "$_p"
}
},
{
$group: {
_id: null,
distinctCount: { $sum: 1 }
}
}
])
If you know the resulting distinct values won't be large then you could use a simply query like so
db.getCollection('siteEvents').distinct(
'_p',
{
_t: {
$gt: ISODate("2016-07-11T08:10:17.000Z"),
$lt: ISODate("2016-07-12T14:10:17.000Z")
}
}).length
You can use the $addToSet operator in the $group stage to return an array of distinct "_p" value then $project the resulted document to return the size of the array which is nothing other than the distinct count.
db.siteEvents.aggregate(
[
{"$match": {"_n": "visited site", "_t": {"$gt": <SOME DATE>, "$lt": <SOME OTHER DATE>}}},
{"$group": {
"_id": None,
"_p_values": {"$addToSet": "$_p"}
}},
{"$project": {"_id": 0, "count": {"$size": "$_p_values"}}}
]
)
For small size collection you can simply use distinct but you need to pass in the query argument.
len(db.siteEvents.distinct("_p", {"_n": "visited site", "_t": {"$gt": <SOME DATE>, "$lt": <SOME OTHER DATE>}}))

"Structured" grouping query in MongoDB

I have the following items collection :
[{
"_id": 1,
"manufactureId": 1,
"itemTypeId": "Type1"
},
{
"_id": 2,
"manufactureId": 1,
"itemTypeId": "Type2"
},
{
"_id": 3,
"manufactureId": 2,
"itemTypeId": "Type1"
}]
I would like to create a query that will return the amount of items for each item type that each manufacturer have in the following structure (or something similar) :
[
{
_id:1, //this would be the manufactureId
itemsCount:{
"Type1":1, //Type1 items count
"Type2":1 //...
}
},
{
_id:2,
itemsCount:{
"Type1":1
}
}
]
I have tried to use the aggregation framework but i couldn't figure out if there is a way to create a "structured" groupby queries with it.
I can easily achieve the desired result by post-processing this simple aggregation query result :
db.items.aggregate([{$group:{_id:{itemTypeId:"$itemTypeId",manufactureId:"$manufactureId"},count:{$sum:1}}}])
but if possible I prefer not to post-process the result.
Data stays data
I would rather use this query which, I believe, will give you the closest data structure to what you want, without post-processing.
Query
db.items.aggregate(
{
$group:
{
_id:
{
itemTypeId: "$itemTypeId",
manufactureId: "$manufactureId"
},
count:
{
$sum: 1
}
},
},
{
$group:
{
_id: "$_id.manufactureId",
itemCounts:
{
"$push":
{
itemTypeId: "$_id.itemTypeId",
count: "$count"
}
}
}
})
Output
{
"_id" : 1,
"itemCounts" : [
{
"itemTypeId" : "Type1",
"count" : 1
},
{
"itemTypeId" : "Type2",
"count" : 1
}
]
},
{
"_id" : 2,
"itemCounts" : [
{
"itemTypeId" : "Type1",
"count" : 1
}
]
}
Data transformed to object fields
This is actually an approach that I wouldn't advice in general. It is harder to manage in your application, because the field names between different objects will be inconsistent and you won't know what object fields to expect in advance. This would be a crucial point if you use a strongly typed language—automatic data binding to your domain objects will become impossible.
Anyway, the only way to get the exact data structure you want is to apply post-processing.
Query
db.items.aggregate(
{
$group:
{
_id:
{
itemTypeId: "$itemTypeId",
manufactureId: "$manufactureId"
},
count:
{
$sum: 1
}
},
},
{
$group:
{
_id: "$_id.manufactureId",
itemCounts:
{
"$push":
{
itemTypeId: "$_id.itemTypeId",
count: "$count"
}
}
}
}).forEach(function(doc) {
var obj = {
_id: doc._id,
itemCounts: {}
};
doc.itemCounts.forEach(function(typeCount) {
obj.itemCounts[typeCount.itemTypeId] = typeCount.count;
});
printjson(obj);
})
Output
{ "_id" : 1, "itemCounts" : { "Type1" : 1, "Type2" : 1 } }
{ "_id" : 2, "itemCounts" : { "Type1" : 1 } }

MongoDB order by a sum on a subset

I have the following collection:
error_reports
[
{
"_id":{
"$oid":"5184de1261"
},
"date":"29/04/2013",
"errors":[
{
"_id":"10",
"failures":2,
"alerts":1,
},
{
"_id":"11",
"failures":7,
"alerts":4,
}
]
},
{
"_id":{
"$oid":"5184de1262"
},
"date":"30/04/2013",
"errors":[
{
"_id":"15",
"failures":3,
"alerts":2,
},
{
"_id":"16",
"failures":9,
"alerts":1,
}
]
}
]
Is it possible to retrieve the list of documents with failures and alerts sum sorted by failures in descending order? I am new to mongodb, I have been searching for 2 days but I can't figure out what is the proper query...
I tried something like this :
db.error_reports.aggregate(
{ $sort : { failures: -1} },
{ $group:
{ _id: "$_id",
failures: { "$sum": "$errors.failures" }
}
}
);
But it didn't work, I think it is because of the $sum: $errors.failures thing, I would like to sum this attribute on every item of the day_hours subcollection but I don't know of to do this in a query...
You were very close with your attempt. The only thing missing is the $unwind aggregation operator. $unwind basically splits each document out based on a sub-document. So before you group the failures and alerts, you unwind the errors, like so:
db.error_reports.aggregate(
{ $unwind : '$errors' },
{ $group : {
_id : '$_id',
'failures' : { $sum : '$errors.failures' },
'alerts' : { $sum : '$errors.alerts' }
} },
{ $sort : { 'failures': -1 } }
);
Which gives you the follow result:
{
"result" : [
{
"_id" : ObjectId("5184de1262"),
"failures" : 12,
"alerts" : 3
},
{
"_id" : ObjectId("5184de1261"),
"failures" : 9,
"alerts" : 5
}
],
"ok" : 1
}