MongoDB 2.4.10 GroupBy - mongodb

I have created a MongoDB on my computer with MongoDB v3.4.4 and now I want to migrate to a Raspberry Pi and MongoDB (v 2.4.10), but when I want to execute this query it doesn't work because of the version.
db.product.aggregate(
{
"$project": {
"price": 1,
"y": {
"$year": "$date"
},
"m": {
"$month": "$date"
}
}
},
{
"$group": {
"_id": {
"year": "$y",
"month": "$m"
},
"total": {
"$sum": "$price"
},
}
},
{
$sort: {
"_id.year": 1,
"_id.month": 1
}
})
Is there a way to translate this query to MongoDB 2.4.10?
The error is this:
Thanks in advance!

Related

Large data (over 1million rows) aggregate

I am currently using the following query but hitting a small issue - It's really slow.
Is there any way that mongooseJS can either:
a) split this into smaller queries and run it in segments
b) a better way to query this data.
My query
[
{
$project: {
"location": 1,
"year": { "$year": "$timestamp" }
}
},
{
$match: { "year": 2020 }
},
{
$group: {
"_id": "$location.coordinates",
"count": { "$sum": 1 }
}
},
{
$project: {
"_id": 0,
"count": 1,
"location.coordinates": {
"$map": {
"input": "$_id",
"in": { "$toString": "$$this" }
}
}
}
},
{
$sort: { count: -1 }
}
])
in NodeJS I am querying it like I have M10 atlas so no issue running the query it works in Mongodb compass
router.get('/pastlocation', (req, res) =>{
const pastlocation = require('mongoose').model('pastlistenerslocation');
pastlocation.aggregate([
{
$project: {
"location": 1,
"year": { "$year": "$timestamp" }
}
},
{
$match: { "year": 2020 }
},
{
$group: {
"_id": "$location.coordinates",
"count": { "$sum": 1 }
}
},
{
$project: {
"_id": 0,
"count": 1,
"location.coordinates": {
"$map": {
"input": "$_id",
"in": { "$toString": "$$this" }
}
}
}
},
{ $sort: { count: -1 } }
])).allowDiskUse(true).exec(function(err,result) {
if(err){console.log(err)}
res.json(result);
});
});
Data looks like this:
{
"UUID": "9B3640AC-3AA9-4313-94DB-9DFFEC7362E1",
"location": {
"coordinates": [115.8802288492358, -31.92553401927643],
"type": "Point"
},
"timestamp": {
"$date": "2020-11-04T13:20:58.224Z"
},
"__v": 0
}

MongoDB - Aggregate by distinct field then count per day

I have a mongodb database that collects device data.
Example document is
{
"_id" : ObjectId("5c125a185dea1b0252c5352"),
"time" : ISODate("2018-12-13T15:09:42.536Z"),
"mac" : "10:06:21:3e:0a:ff",
}
The goal would be to count the unique mac values per day, from the first document in the db to the last document in the db.
I've been playing around and came to the conclusion that I would need to have multiple groups as well as projects during my aggregations.
This is what I tried - not sure if it's in the right direction or not or just completely messed up.
pipeline = [
{"$project": {
"_id": 1,
"mac": 1,
"day": {
"$dayOfMonth":"$time"
},
"month": {
"$month":"$time"
},
"year": {
"$year":"$time"
}
}
},
{
"$project": {
"_id": 1,
"mac": 1,
"time": {
"$concat": [{
"$substr":["$year", 0, 4]
},
"-", {
"$substr": ["$month", 0, 2]
},
"-",
{
"$substr":["$day", 0, 2]
}]
}
}
},
{
"$group": {
"_id": {
"time": "$time",
"mac": "$mac"
}
},
"$group": {
"_id": "$_id.time",
"count":{"$sum": 1},
}
}
]
data = list(collection.aggregate(pipeline, allowDiskUse=True))
The output now doesn't look like it did any aggregation,
[{"_id": null, "count": 751050}]
I'm using Pymongo as my driver and using Mongodb 4.
Ideally it should just show the date and count (eg { "_id" : "2018-12-13", "count" : 2 }.
I would love some feedback and advice.
Thanks in advance.
I prefer to minimize the number of stages, and especially to avoid unnecessary $group stages. So I would do it with the following pipeline:
pipeline = [
{ '$group' : {
'_id': { '$dateToString': { 'format': "%Y-%m-%d", 'date': "$time" } },
'macs':{ '$addToSet': '$mac' }
} },
{$addFields:{ 'macs':{'$size':'$macs'}}}
]
There's an operator called "$dateToString", which would solve most of your problems.
Edit: Didn't read the question carefully, #Asya Kamsky, thank you for pointing out. Here' the new answer.
pipeline = [
{
"$group": {
"_id": {
"date": {
$dateToString: {
format: "%Y-%m-%d",
date: "$time"
}
},
"mac": "$mac"
}
}
},
{
"$group": {
"_id": "$_id.date",
"count": {
"$sum": 1
}
}
}
]
[
{
"$project": {
"_id": 1,
"mac": 1,
"time": { "$dateToString": { "format": "%Y-%m-%d", "date": "$time", "timezone": "Africa/Johannesburg"}}
},
},
{
"$group": {
"_id":{
"time": "$time",
"mac": "$mac",
}}},{
"$group": {
"_id": "$_id.time",
"count":{"$sum": 1}
}},
{"$sort": SON([("_id", -1)])}
]
Does exactly what it should do.
Thanks. :)

Match the field in mongodb

I have mongodb sample data result like this:
{
"_id" : {
"month" : 3,
"day" : 24,
"year" : 2017
},
"commodity" : [
{
"commodityId" : ObjectId("58d434c30da1364f1e2d682d"),
"commodityStock" : "88889s"
}
],
"totalStock" : 0,
"count" : 1.0 }
my question is, How can i get the result where month = 3 with $match?
below is my query:
db.orders.aggregate(
[
{ $match : {_id.month : 3}},
{
$group : {
_id : { month: { $month: "$createdAt" }, day: { $dayOfMonth: "$createdAt" }, year: { $year: "$createdAt" } },
commodity : {$push : {
'commodityId' : "$commodity",
'commodityStock' : "$stock",
}
},
totalStock: { $sum: "$stock" },
count: { $sum: 1 }
}
}
]
)
You could use a $redact pipeline which incorporates the functionality of $project and $match so that you can filter the documents in the collection by using a logical condition with the $cond operator and uses the special operations $$KEEP to "keep" the document where the logical condition is true or $$PRUNE to "remove" the document where the condition was false.
db.orders.aggregate([
{
"$redact": {
"$cond": [
{ "$eq": [{ "$month": "$createdAt" }, 3]},
"$$KEEP",
"$$PRUNE"
]
}
},
{
"$group": {
"_id": {
"month": { "$month": "$createdAt" },
"day": { "$dayOfMonth": "$createdAt" },
"year": { "$year": "$createdAt" }
},
"commodity": {
"$push": {
"commodityId": "$commodity",
"commodityStock": "$stock",
}
},
"totalStock": { "$sum": "$stock" },
"count": { "$sum": 1 }
}
}
])
Keep in mind that $redact does not use indexes, it performs a collection scan, but if you need to take advantage of indexes use the $project and $match pipeline stages as:
db.orders.aggregate([
{
"$project": {
"createdAt": 1,
"month": { "$month": "$createdAt" },
"day": { "$dayOfMonth": "$createdAt" },
"year": { "$year": "$createdAt" },
"commodity": 1,
"stock": 1
}
},
{ "$match": { "month": 3 } },
{
"$group": {
"_id": { "month": "$month", "day": "$day", "year": "$year" },
"commodity": {
"$push": {
"commodityId": "$commodity",
"commodityStock": "$stock",
}
},
"totalStock": { "$sum": "$stock" },
"count": { "$sum": 1 }
}
}
])

Mongo date aggregate output Date object

I need to group datetime field by minute. That's rather easy:
db.my_collection.aggregate([
{ "$group": {
"_id": {
"year": { "$year": "$meta_data.created_at" },
"dayOfYear": { "$dayOfYear": "$meta_data.created_at" },
"hour": { "$hour": "$meta_data.created_at"},
"minute": { "$minute": "$meta_data.created_at"}
},
"count": { "$sum": 1 }
}}])
The problem is that the output is:
{
"_id": {
"year": 2016,
"dayOfYear": 349,
"hour": 16,
"minute": 43
},
"count": 4
}
Which is not really convenient if I want to query by Date later on.
How can I make the output of the aggregation back in DateTime object?
OK - so what I meant in the question is doable by doing:
db.my_collection.aggregate([
{"$group": {
"_id": {
"date_by_minute": {"$subtract": [{"$subtract":
["$meta_data.created_at",
{"$multiply":[{"$second": "$meta_data.created_at"} , 1000]}]},
{"$millisecond": "$meta_data.created_at"}]}},
"count": { "$sum": 1 }
}}
])

Group by day/month/week basis on the date range

This is in reference to this question.
This is my data set:
[
{
"rating": 4,
"ceatedAt": ISODate("2016-08-08T15:32:41.262+0000")
},
{
"rating": 3,
"createdAt": ISODate("2016-08-08T15:32:41.262+0000")
},
{
"rating": 3,
"ceatedAt": ISODate("2016-07-01T15:32:41.262+0000")
},
{
"rating": 5,
"createdAt": ISODate("2016-07-01T15:32:41.262+0000")
}
]
I want to be able to filter basis on week or month basis on the date range.
How would I do that in mongo?
This was the answer given for grouping by days.
db.collection.aggregate([
{
"$project": {
"formattedDate": {
"$dateToString": { "format": "%Y-%m-%d", "date": "$ceatedAt" }
},
"createdAtMonth": { "$month": "$ceatedAt" },
"rating": 1
}
},
{
"$group": {
"_id": "$formattedDate",
"average": { "$avg": "$rating" },
"month": { "$first": "$createdAtMonth" },
}
}
])
For grouping on weekly basis, run the following pipeline which mainly uses the Date Aggregation Operators to extract the date parts:
db.collection.aggregate([
{
"$project": {
"createdAtWeek": { "$week": "$createdAt" },
"createdAtMonth": { "$month": "$createdAt" },
"rating": 1
}
},
{
"$group": {
"_id": "$createdAtWeek",
"average": { "$avg": "$rating" },
"month": { "$first": "$createdAtMonth" }
}
}
])
and for monthly aggregates, interchange the $group key to use the created month field:
db.collection.aggregate([
{
"$project": {
"createdAtWeek": { "$week": "$createdAt" },
"createdAtMonth": { "$month": "$createdAt" },
"rating": 1
}
},
{
"$group": {
"_id": "$createdAtMonth",
"average": { "$avg": "$rating" },
"week": { "$first": "$createdAtWeek" }
}
}
])