MongoDB - Aggregate by distinct field then count per day - mongodb

I have a mongodb database that collects device data.
Example document is
{
"_id" : ObjectId("5c125a185dea1b0252c5352"),
"time" : ISODate("2018-12-13T15:09:42.536Z"),
"mac" : "10:06:21:3e:0a:ff",
}
The goal would be to count the unique mac values per day, from the first document in the db to the last document in the db.
I've been playing around and came to the conclusion that I would need to have multiple groups as well as projects during my aggregations.
This is what I tried - not sure if it's in the right direction or not or just completely messed up.
pipeline = [
{"$project": {
"_id": 1,
"mac": 1,
"day": {
"$dayOfMonth":"$time"
},
"month": {
"$month":"$time"
},
"year": {
"$year":"$time"
}
}
},
{
"$project": {
"_id": 1,
"mac": 1,
"time": {
"$concat": [{
"$substr":["$year", 0, 4]
},
"-", {
"$substr": ["$month", 0, 2]
},
"-",
{
"$substr":["$day", 0, 2]
}]
}
}
},
{
"$group": {
"_id": {
"time": "$time",
"mac": "$mac"
}
},
"$group": {
"_id": "$_id.time",
"count":{"$sum": 1},
}
}
]
data = list(collection.aggregate(pipeline, allowDiskUse=True))
The output now doesn't look like it did any aggregation,
[{"_id": null, "count": 751050}]
I'm using Pymongo as my driver and using Mongodb 4.
Ideally it should just show the date and count (eg { "_id" : "2018-12-13", "count" : 2 }.
I would love some feedback and advice.
Thanks in advance.

I prefer to minimize the number of stages, and especially to avoid unnecessary $group stages. So I would do it with the following pipeline:
pipeline = [
{ '$group' : {
'_id': { '$dateToString': { 'format': "%Y-%m-%d", 'date': "$time" } },
'macs':{ '$addToSet': '$mac' }
} },
{$addFields:{ 'macs':{'$size':'$macs'}}}
]

There's an operator called "$dateToString", which would solve most of your problems.
Edit: Didn't read the question carefully, #Asya Kamsky, thank you for pointing out. Here' the new answer.
pipeline = [
{
"$group": {
"_id": {
"date": {
$dateToString: {
format: "%Y-%m-%d",
date: "$time"
}
},
"mac": "$mac"
}
}
},
{
"$group": {
"_id": "$_id.date",
"count": {
"$sum": 1
}
}
}
]

[
{
"$project": {
"_id": 1,
"mac": 1,
"time": { "$dateToString": { "format": "%Y-%m-%d", "date": "$time", "timezone": "Africa/Johannesburg"}}
},
},
{
"$group": {
"_id":{
"time": "$time",
"mac": "$mac",
}}},{
"$group": {
"_id": "$_id.time",
"count":{"$sum": 1}
}},
{"$sort": SON([("_id", -1)])}
]
Does exactly what it should do.
Thanks. :)

Related

need to convert the data in another format

We have Data:
[
{
"_id": ObjectId("5f87e152219aaf1f9404ef3f"),
"parameterId": "5f914ca2679bae721d38410b",
"average": 574998.153846154,
"count": 26.0,
"date": ISODate("2020-09-08T18:30:00.000Z"),
"_class": "org.nec.iotplatform.entities.RawData"
},
{
"_id": ObjectId("5f87e1e2219aaf1f9404eff5"),
"parameterId": "5f914ca2679bae721d38410b",
"average": 494217.606225681,
"count": 1285.0,
"date": ISODate("2020-09-09T18:30:00.000Z"),
"_class": "org.nec.iotplatform.entities.RawData"
}
]
I have query which I am executing on above data and then getting the result as below the query
db.collection.aggregate([
{
"$project": {
"year": {
"$year": "$date"
},
"month": {
"$month": "$date"
},
"dayOfMonth": {
"$dayOfMonth": "$date"
},
"average": "$average",
"count": "$count",
"Symbol": 1
}
},
{
"$group": {
"_id": {
year: "$year",
month: "$month",
dayOfMonth: "$dayOfMonth"
},
"data": {
"$push": "$$ROOT"
}
}
},
{
"$project": {
"average": {
"$divide": [
{
"$reduce": {
"input": "$data",
"initialValue": 0,
"in": {
"$add": [
"$$value",
{
"$multiply": [
"$$this.count",
"$$this.average"
]
}
]
}
}
},
{
$reduce: {
input: "$data",
initialValue: 0,
in: {
"$add": [
"$$value",
"$$this.count"
]
}
}
}
]
}
}
}
])
I am getting output :
[{
"_id" : {
"year" : 2020,
"month" : 9,
"dayOfMonth" : 8
},
"average" : 574998.153846154
},
{
"_id" : {
"year" : 2020,
"month" : 9,
"dayOfMonth" : 9
},
"average" : 494217.606225681
}]
But I need to format the result data like this. by adding the date like this:
{
2020-09-08T18:30:00.000Z : 574998.153846154,
2020-09-09T18:30:00.000Z : 494217.606225681
}
Thanks in advance.
You can use $dateFromString to create the date you want.
Also, you need $concat and $toString to parse the numbers to string and concat into a single string.
After that, using $group you can get the all values you need in the same array. And how you want set the date as KEY, is neccesary create fields k and v and parse again to string.
With the values together, using $arrayToObject you can cerate the schema you want date: average and use $replaceRoot to get only the values at top level.
To do this you need to add this query at the end of your aggregation.
{
"$set": {
"date": { "$dateFromString": { "dateString": {
"$concat": [
{ "$toString": "$_id.dayOfMonth" }, "-",
{ "$toString": "$_id.month" }, "-",
{ "$toString": "$_id.year" }
] },
"format": "%d-%m-%Y", "timezone": "Europe/Madrid"
} } }
},
{
"$group": {
"_id": null,
"date": { "$push": { "k": { "$toString": "$date" }, "v": "$average" } }
}
},
{
"$replaceRoot": { "newRoot": { "$arrayToObject": "$date" } }
}
This query add a new field called date like this:
"date": ISODate("2020-09-08T04:00:00Z")
I've used Europe/Madrid as timezone but you can choose you want to get your desired date.
Example here.
The output is:
{
"2020-09-07T22:00:00.000Z": 574998.153846154,
"2020-09-08T22:00:00.000Z": 494217.606225681
}
Using America/New_York as timezone:
{
"2020-09-08T04:00:00.000Z": 574998.153846154,
"2020-09-09T04:00:00.000Z": 494217.606225681
}

Group by date in mongoDB while counting other fields

I've been using MongoDB for just a week and I have problems achieving this result: I want to group my documents by date while also keeping track of the number of entries that have a certain field set to a certain value.
So, my documents look like this:
{
"_id" : ObjectId("5f3f79fc266a891167ca8f65"),
"recipe" : "A",
"timestamp" : ISODate("2020-08-22T09:38:36.306Z")
}
where recipe is either "A", "B" or "C". Right now I'm grouping the documents by date using this pymongo query:
mongo.db.aggregate(
# Pipeline
[
# Stage 1
{
"$project": {
"createdAt": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$timestamp"
}
},
"progressivo": 1,
"temperatura_fusione": 1
}
},
# Stage 2
{
"$group": {
"_id": {
"createdAt": "$createdAt"
},
"products": {
"$sum": 1
}
}
},
# Stage 3
{
"$project": {
"label": "$_id.createdAt",
"value": "$products",
"_id": 0
}
}])
Which gives me results like this:
[{"label": "2020-08-22", "value": 1}, {"label": "2020-08-15", "value": 2}, {"label": "2020-08-11", "value": 1}, {"label": "2020-08-21", "value": 5}]
What I'd like to have is also the counting of how many times each recipe appears on every date. So, if for example on August 21 I have 2 entries with the "A" recipe, 3 with the "B" recipe and 0 with the "C" recipe, the desired output would be
{"label": "2020-08-21", "value": 5, "A": 2, "B":3, "C":0}
Do you have any tips?
Thank you!
You can do like following, what have you done is excellent. After that,
In second grouping, We just get total value and value of each recipe.
$map is used to go through/modify each objects
$arrayToObject is used to covert the array what we have done via map (key : value pair) to object
$ifNull is used for, sometimes your data might not have "A" or "B" or "C". But you need the value should be 0 if there is no name as expected output.
Here is the code
[
{
"$project": {
"createdAt": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$timestamp"
}
},
recipe: 1,
"progressivo": 1,
"temperatura_fusione": 1
}
},
{
"$group": {
"_id": {
"createdAt": "$createdAt",
"recipeName": "$recipe",
},
"products": {
$sum: 1
}
}
},
{
"$group": {
"_id": "$_id.createdAt",
value: {
$sum: "$products"
},
recipes: {
$push: {
name: "$_id.recipeName",
val: "$products"
}
}
}
},
{
$project: {
"content": {
"$arrayToObject": {
"$map": {
"input": "$recipes",
"as": "el",
"in": {
"k": "$$el.name",
"v": "$$el.val"
}
}
}
},
value: 1
}
},
{
$project: {
_id: 1,
value: 1,
A: {
$ifNull: [
"$content.A",
0
]
},
B: {
$ifNull: [
"$content.B",
0
]
},
C: {
$ifNull: [
"$content.C",
0
]
}
}
}
]
Working Mongo playground

Group and count in Mongo DB

I have many tweets object like this:
{
"_id" : ObjectId("5a2f4a381cb29b482553e2c9"),
"user_id" : 21898942,
"created_at" : ISODate("2009-03-09T19:48:50Z"),
"id" : 1301923516,
"place" : "",
"retweet_count" : 0,
"tweet" : "Save the Date! March 28th Vietnamese Cooking Class! Call to Reserve 312.255.0088",
"favorite_count" : 0
"type": A
}
I'm using this code to qroup the tweets by date and by type:
pipeline = [
{
"$group": {
"_id": {
"date": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$created_at"
}
},
"type": "$type"
},
"count": {
"$sum": 1
}
}
}
]
results = mongo.db.tweets.aggregate(pipeline)
Here is the result I get:
{
"_id": {
"date": "2009-03-17",
"type": A
},
"count": 4
,
{
"_id": {
"date": "2009-03-17",
"type": B
},
"count": 6
}
But now I want to have the result in this format:
{date: "2009-03-17", A: 4, B: 6, C: 9}
Is there anyway I can achieve this through aggregate directly?
Note: I'm using MongoDB and PyMongo
You can try the below aggregation query in 3.6 version.
Added the second group to create array of type and count value pairs followed by $mergeObjects to merge date key value with $arrayToObject, which produces create a type value key and count value pairs, to generate the expected response.
$replaceRoot to promote the document to the top level.
pipeline = [
{
"$group": {
"_id": {
"date": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$created_at"
}
},
"type": "$type"
},
"count": {
"$sum": 1
}
}
},
{
"$group": {
"_id": "$_id.date",
"typeandcount": {
"$push": {
"k": "$_id.type",
"v": "$count"
}
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{
"date": "$_id"
},
{
"$arrayToObject": "$typeandcount"
}
]
}
}
}
]
Mongo 3.4 version:
Replace the last stage with below
{
"$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$concatArrays": [
[
{
"k": "date",
"v": "$_id"
}
],
"$typeandcount"
]
}
}
}
}

Grouping different amounts together in MongoDB

If I have a set of objects each with the same description, but with different amounts.
{
{
"_id": "101",
"description": "DD from my employer1",
"amount": 1000.33
},
{
"_id": "102",
"description": "DD from my employer1",
"amount": 1000.34
},
{
"_id": "103",
"description": "DD from my employer1",
"amount": 1000.35
},
{
"_id": "104",
"description": "DD from employer1",
"amount": 5000.00
},
{
"_id": "105",
"description": "DD from my employer2",
"amount": 2000.33
},
{
"_id": "106",
"description": "DD from my employer2",
"amount": 2000.33
},
{
"_id": "107",
"description": "DD from my employer2",
"amount": 2000.33
}
}
Below, I am able to group them using the description:
{
{
"$group": {
"_id": {
"description": "$description"
},
"count": {
"$sum": 1
},
"_id": {
"$addToSet": "$_id"
}
}
},
{
"$match": {
"count": {
"$gte": 3
}
}
}
}
Is there a way to include all the amounts in the group (_ids: 101, 102, and 103 plus 105,106,107) even if they have a small difference, but exclude the bonus amount, which in the sample above is _id 104?
I don't believe it could be done in a group stage, but is there something that could be done at a later stage that could group _ids 101, 102 and 103 together and exclude _id 104. Basically, I want MongoDB to ignore the small differences in 101, 102, 103 and group them together since the are paychecks coming from the same employer.
I have been working with $stdDevPop, but can't get a solid formula down.
I am looking for a simple array output of just the _ids.
{
"result": [
"101",
"102",
"103",
"105",
"106",
"107"
]
}
You can do this by doing some math on the "amount" to round it down to the nearest 1000 and use that as the grouping _id:
db.collection.aggregate([
{ "$group": {
"_id": {
"$subtract": [
{ "$trunc": "$amount" },
{ "$mod": [
{ "$trunc": "$amount" },
1000
]}
]
},
"results": { "$push": "$_id" }
}},
{ "$redact": {
"$cond": {
"if": { "$gt": [ { "$size": "$results" }, 1 ] },
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
{ "$unwind": "$results" },
{ "$group": {
"_id": null,
"results": { "$push": "$results" }
}}
])
If your MongoDB is older than 3.2 then you would just need to use a long form with $mod of what $trunc is doing. And if your MongoDB is older than 2.6 then rather than $redact you would $match. So in the longer form this is:
db.collection.aggregate([
{ "$group": {
"_id": {
"$subtract": [
{ "$subtract": [
"$amount",
{ "$mod": [ "$amount", 1 ] }
]},
{ "$mod": [
{ "$subtract": [
"$amount",
{ "$mod": [ "$amount", 1 ] }
]},
1000
]}
]
},
"results": { "$push": "$_id" },
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$unwind": "$results" },
{ "$group": {
"_id": null,
"results": { "$push": "$results" }
}}
])
Either way the output is just the _id values whose amounts grouped to the boundaries with a count more than once.
{ "_id" : null, "results" : [ "105", "106", "107", "101", "102", "103" ] }
You could either add a $sort in there or live with sorting the result array in client code.
db.yourDBNameHere.aggregate( [
{ $match: { "amount" : { $lt : 5000 } } },
{ $project: { _id: 1 } },
])
that will grab the ID only of every transaction less than 5000$.

how to aggregate in mongoDB

I have a document called user.monthly, in that I have we used store 'day' : no. of clicks .
Here I have given 2 samples for different date
For month January
{
name : "devid",
date : ISODate("2014-01-21T11:32:42.392Z"),
daily: {'1':12,'9':13,'30':13}
}
For month February
{
name : "devid",
date : ISODate("2014-02-21T11:32:42.392Z"),
daily: {'3':12,'12':13,'25':13}
}
How can I aggregate this and get total clicks for January and February ?
Please help me to resolve my problem.
Your current schema is not helping you here as the "daily" field ( which we presume is your clicks per type or something like that ) is represented as a sub-document, which means that you need to explicitly name the path to each field in order to do something with it.
A better approach would be to put this information in an array:
{
"name" : "devid",
"date" : ISODate("2014-02-21T11:32:42.392Z"),
"daily": [
{ "type": "3", "clicks": 12 },
{ "type": "12", "clicks": 13 },
{ "type": "25", "clicks": 13 }
]
}
Then you have an aggregation statement that goes like this:
db.collection.aggregate([
// Just match the dates in January and February
{ "$match": {
"date": {
"$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
}
}},
// Unwind the "daily" array
{ "$unwind": "$daily" },
// Group the values together by "type" on "January" and "February"
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"month": { "$month": "$date" },
"type": "$daily.type"
},
"clicks": { "$sum": "$daily.clicks" }
}},
// Sort the result nicely
{ "$sort": {
"_id.year": 1,
"_id.month": 1,
"_id.type": 1
}}
])
That form is pretty simple. Or even if you do not care about the type as a grouping and just want the month totals:
db.collection.aggregate([
{ "$match": {
"date": {
"$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
}
}},
{ "$unwind": "$daily" },
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"month": { "$month": "$date" },
},
"clicks": { "$sum": "$daily.clicks" }
}},
{ "$sort": { "_id.year": 1, "_id.month": 1 }}
])
But with the current sub-document form you currently have this becomes ugly:
db.collection.aggregate([
{ "$match": {
"date": {
"$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
}
}},
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"month": { "$month": "$date" },
},
"clicks": {
"$sum": {
"$add": [
{ "$ifNull": ["$daily.1", 0] },
{ "$ifNull": ["$daily.3", 0] },
{ "$ifNull": ["$daily.9", 0] },
{ "$ifNull": ["$daily.12", 0] },
{ "$ifNull": ["$daily.25", 0] },
{ "$ifNull": ["$daily.30", 0] },
]
}
}
}}
])
That shows that you have no other option here other than to specify what is essentially every possible field under daily ( so probably much larger ). Then we have to evaluate as that key may possibly not exist for a given document to return a default value.
For example, your first document has no key "daily.3" so without the $ifNull check the returned value would be null and invalidate the whole $sum process so that the total would be "0".
Grouping on those keys as in the first aggregate example gets even worse:
db.collection.aggregate([
// Just match the dates in January and February
{ "$match": {
"date": {
"$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
}
}},
// Project with an array to match all possible values
{ "$project": {
"date": 1,
"daily": 1,
"type": { "$literal": ["1", "3", "9", "12", "25", "30" ] }
}},
// Unwind the "type" array
{ "$unwind": "$type" },
// Project values onto the "type" while grouping
{ "$group" : {
"_id": {
"year": { "$year": "$date" },
"month": { "$month": "$date" },
"type": "$type"
},
"clicks": { "$sum": { "$cond": [
{ "$eq": [ "$type", "1" ] },
"$daily.1",
{ "$cond": [
{ "$eq": [ "$type", "3" ] },
"$daily.3",
{ "$cond": [
{ "$eq": [ "$type", "9" ] },
"$daily.9",
{ "$cond": [
{ "$eq": [ "$type", "12" ] },
"$daily.12",
{ "$cond": [
{ "$eq": [ "$type", "25" ] },
"$daily.25",
"$daily.30"
]}
]}
]}
]}
]}}
}},
{ "$sort": {
"_id.year": 1,
"_id.month": 1,
"_id.type": 1
}}
])
Which is creating one big conditional evaluation using $cond to match out the values to the "type" which we projected all possible values in an array using the $literal operator.
If you do not have MongoDB 2.6 or greater you can always do this in place of the $literal operator statement:
"type": { "$cond": [1, ["1", "3", "9", "12", "25", "30" ], 0] }
Where essentially the true evaluation from $cond returns a "literal" declared value, which is how you specify an array. There is also the hidden $const operator that is not documented, but now exposed as $literal.
As you can see the structure here is doing you no favors, so the best option is to change it. But if you cannot and otherwise find the aggregation concept for this too hard to handle, then mapReduce offers an approach, but the processing will be much slower:
db.collection.mapReduce(
function () {
for ( var k in this.daily ) {
emit(
{
year: this.date.getFullYear(),
month: this.date.getMonth() + 1,
type: k
},
this.daily[k]
);
}
},
function(key,values) {
return Array.sum( values );
},
{
"query": {
"date": {
"$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
}
},
"out": { "inline": 1 }
}
)
The general lesson here is that you will get the cleanest and fastest results by altering the document format and using the aggregation framework. But all the ways to do this are listed here.