Large data (over 1million rows) aggregate - mongodb

I am currently using the following query but hitting a small issue - It's really slow.
Is there any way that mongooseJS can either:
a) split this into smaller queries and run it in segments
b) a better way to query this data.
My query
[
{
$project: {
"location": 1,
"year": { "$year": "$timestamp" }
}
},
{
$match: { "year": 2020 }
},
{
$group: {
"_id": "$location.coordinates",
"count": { "$sum": 1 }
}
},
{
$project: {
"_id": 0,
"count": 1,
"location.coordinates": {
"$map": {
"input": "$_id",
"in": { "$toString": "$$this" }
}
}
}
},
{
$sort: { count: -1 }
}
])
in NodeJS I am querying it like I have M10 atlas so no issue running the query it works in Mongodb compass
router.get('/pastlocation', (req, res) =>{
const pastlocation = require('mongoose').model('pastlistenerslocation');
pastlocation.aggregate([
{
$project: {
"location": 1,
"year": { "$year": "$timestamp" }
}
},
{
$match: { "year": 2020 }
},
{
$group: {
"_id": "$location.coordinates",
"count": { "$sum": 1 }
}
},
{
$project: {
"_id": 0,
"count": 1,
"location.coordinates": {
"$map": {
"input": "$_id",
"in": { "$toString": "$$this" }
}
}
}
},
{ $sort: { count: -1 } }
])).allowDiskUse(true).exec(function(err,result) {
if(err){console.log(err)}
res.json(result);
});
});
Data looks like this:
{
"UUID": "9B3640AC-3AA9-4313-94DB-9DFFEC7362E1",
"location": {
"coordinates": [115.8802288492358, -31.92553401927643],
"type": "Point"
},
"timestamp": {
"$date": "2020-11-04T13:20:58.224Z"
},
"__v": 0
}

Related

NODE JS + Mongodb aggregation pipelines from MongoCharts

Currently can't figure out why one pipeline works and the other doesn't. I got both pipelines from MongoDB charts and they both returned something and displaying charts on MongoDBCharts. However, when I use them in my code, only the first pipeline returns something. I used the same data for all cases. Any suggestions would be greatly appreciated!
The first one doesn't filter the last 30 days (hard coded by Mongo), both pipelines are copied from Mongodb charts and are not altered.
[
{
"$addFields": {
"trigger_time": {
"$convert": {
"input": "$trigger_time",
"to": "date",
"onError": null
}
}
}
},
{
"$match": {
"event_type": {
"$nin": [
null,
"",
"AC Lost",
"Device Lost",
"logged into Database",
"logged into Nexus Database",
"logged out of Nexus Database",
"Low Battery"
]
}
}
},
{
"$addFields": {
"trigger_time": {
"$cond": {
"if": {
"$eq": [
{
"$type": "$trigger_time"
},
"date"
]
},
"then": "$trigger_time",
"else": null
}
}
}
},
{
"$addFields": {
"__alias_0": {
"hours": {
"$hour": "$trigger_time"
}
}
}
},
{
"$group": {
"_id": {
"__alias_0": "$__alias_0"
},
"__alias_1": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0,
"__alias_0": "$_id.__alias_0",
"__alias_1": 1
}
},
{
"$project": {
"y": "$__alias_1",
"x": "$__alias_0",
"_id": 0
}
},
{
"$sort": {
"x.hours": 1
}
},
{
"$limit": 5000
}
]
The second one
[
{
"$addFields": {
"trigger_time": {
"$convert": {
"input": "$trigger_time",
"to": "date",
"onError": null
}
}
}
},
{
"$match": {
"event_type": {
"$nin": [
null,
"",
"AC Lost",
"Device Lost",
"logged into Database",
"logged into Nexus Database",
"logged out of Nexus Database",
"Low Battery"
]
},
"trigger_time": {
"$gte": {
"$date": "2021-03-29T08:35:47.804Z"
}
}
}
},
{
"$addFields": {
"trigger_time": {
"$cond": {
"if": {
"$eq": [
{
"$type": "$trigger_time"
},
"date"
]
},
"then": "$trigger_time",
"else": null
}
}
}
},
{
"$addFields": {
"__alias_0": {
"hours": {
"$hour": "$trigger_time"
}
}
}
},
{
"$group": {
"_id": {
"__alias_0": "$__alias_0"
},
"__alias_1": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0,
"__alias_0": "$_id.__alias_0",
"__alias_1": 1
}
},
{
"$project": {
"y": "$__alias_1",
"x": "$__alias_0",
"_id": 0
}
},
{
"$sort": {
"x.hours": 1
}
},
{
"$limit": 5000
}
]
I end up solving my own problem. After a bit of digging and asking.
Node.js does some funny things with Mongodb when it comes to using '$date', that's why the pipeline didn't work.
The resolve was to remove '$date' and pass in a date object. For my case,
"trigger_time": {
"$gte": new Date("2021-03-29T08:35:47.804Z")
}

Group based on multiple fields in a single query

I have the following document structure
[
{
"tag": "abc",
"created_at": 2020-02-05T14:20:52.907+00:00
"likes": 12,
"comments": 3
},
{
"tag": "abc",
"created_at": 2020-02-04T14:20:52.907+00:00
"likes": 10,
"comments": 1
}
{
"tag": "abc",
"created_at": 2020-01-04T14:21:52.907+00:00
"likes": 12,
"comments": 3
},
{
"tag": "abc",
"created_at": 2020-01-04T14:22:52.907+00:00
"likes": 2,
"comments": 1
}
]
First, I want to group my documents based on tag value and then on created_at field to get something like this.
[
{
"tag_name": "abc",
"day_wise": [
{
"dayMonthYear": "2020-01-04",
"comments": 4,
"likes": 14
},
{
"dayMonthYear": "2020-02-04",
"comments": 1,
"likes": 10
},
{
"dayMonthYear": "2020-02-05",
"comments": 3,
"likes": 12
},
],
"month_wise": [
{
"monthYear": "2020-04",
"comments": 5,
"likes": 24
},
{
"monthYear": "2020-05",
"comments": 3,
"likes": 12
},
],
}
]
There can be multiple tags so need to group them accordingly. Can anyone please suggest what aggregation query can be applied to achieve this result.
You can use below aggregation
db.collection.aggregate([
{ "$group": {
"_id": "$tag",
"data": {
"$push": {
"comments": "$comments",
"likes": "$likes",
"monthWise": { "$dateToString": { "date": "$created_at", "format": "%m-%Y" } },
"dateWise": { "$dateToString": { "date": "$created_at", "format": "%d-%m-%Y" } }
}
}
}},
{ "$project": {
"dateWise": {
"$map": {
"input": { "$setUnion": ["$data.dateWise"] },
"as": "m",
"in": {
"fieldA": "$$m",
"count": {
"$filter": {
"input": "$data",
"as": "d",
"cond": { "$eq": ["$$d.dateWise", "$$m"] }
}
}
}
}
},
"monthWise": {
"$map": {
"input": { "$setUnion": ["$data.monthWise"] },
"as": "m",
"in": {
"fieldA": "$$m",
"count": {
"$filter": {
"input": "$data",
"as": "d",
"cond": { "$eq": ["$$d.monthWise", "$$m"] }
}
}
}
}
}
}},
{ "$project": {
"dateWise": {
"$map": {
"input": "$dateWise",
"in": {
"dateWise": "$$this.fieldA",
"likes": { "$sum": "$$this.count.likes" },
"comments": { "$sum": "$$this.count.comments" }
}
}
},
"monthWise": {
"$map": {
"input": "$monthWise",
"in": { "monthWise": "$$this.fieldA",
"likes": { "$sum": "$$this.count.likes" },
"comments": { "$sum": "$$this.count.comments" }
}
}
}
}}
])
MongoPlayground
Try this one:
db.col.aggregate([
{
$facet:
{
day_wise: [
{
$group: {
_id: {
tag: "$tag",
day: { $dateFromParts: { year: { $year: "$created_at" }, month: { $month: "$created_at" }, day: { $dayOfMonth: "$created_at" } } }
},
likes: { $sum: "$likes" },
comments: { $sum: "$comments" }
}
},
{
$project: {
_id: "$_id.tag",
likes: 1,
comments: 1,
dayMonthYear: { $dateToString: { date: "$_id.day", format: "%Y-%m-%d" } },
day: "$_id.day"
}
}
],
month_wise: [
{
$group: {
_id: {
tag: "$tag",
month: { $dateFromParts: { year: { $year: "$created_at" }, month: { $month: "$created_at" } } }
},
likes: { $sum: "$likes" },
comments: { $sum: "$comments" }
}
},
{
$project: {
_id: "$_id.tag",
likes: 1,
comments: 1,
monthYear: { $dateToString: { date: "$_id.month", format: "%Y-%m" } },
month: "$_id.month"
}
}
],
tags: [{ $group: { _id: "$tag" } }]
}
},
{ $unwind: "$tags" },
{
$project: {
_id: "$tags._id",
day_wise: {
$filter: {
input: "$day_wise",
as: "item",
cond: { $eq: ["$tags._id", "$$item._id"] }
// here you can add additional conditions. e.g.
// { $gt: ["$day", { $subtract: ["$$NOW", 1000 * 60 * 60 * 5] }] } }
}
},
month_wise: {
$filter: {
input: "$month_wise",
as: "item",
cond: { $eq: ["$tags._id", "$$item._id"] }
}
}
}
},
{
$project: {
_id: 1,
day_wise: { likes: 1, comments: 1, dayMonthYear: 1 },
month_wise: { likes: 1, comments: 1, monthYear: 1 },
}
}
])
Mongo playground

How do I group by day/month in mongoDB?

Here's how one document looks like:
{
"login_Id": "c",
"name": "Abhishek Soni",
"location": "BLAHBLAH",
"work": [
{
"date":ISODate("2014-01-01"),
"total_time": 100,
},
{
"date":ISODate("2014-09-02"),
"total_time": 100,
},
{
"date":ISODate("2014-01-01"),
"total_time": 10,
},
]
}
What I want to do is to run a query that'll give an output like this:
{login_Id: 'c', work:{'01' : 110, '02': 100, ... and so on}}
Basically, I just want to group the work part month wise.
This is what I have tried:
db.employees.aggregate([
{
"$project": {
"_id": 0,
"login_Id": 1,
"time": {
"$sum": "$work.total_time"
}
}
},
{
"$group": {
"_id": {
"$dayOfYear": "$work.date"
},
"time": {
"$sum": "$work.total_time"
}
}
}
]);
But it outputs null. If I remove the group clause, I get the total sum (i.e., 210) What's wrong?
You can try below aggregation
db.collection.aggregate([
{ "$unwind": "$work" },
{ "$match": { "work.date": { "$type": "date" }}},
{ "$group": {
"_id": { "date": { "$dayOfMonth": "$work.date" }},
"time": { "$sum": "$work.total_time" },
"login_Id": { "$first": "$login_Id" }
}},
{ "$group": {
"_id": "$login_Id",
"data": {
"$push": {
"k": { "$toString": "$_id.date" },
"v": "$time"
}
}
}},
{ "$project": {
"work": { "$arrayToObject": "$data" },
"_id": 0,
"login_id": "$_id"
}}
])
Output
[
{
"login_id": "c",
"work": {
"1": 110,
"2": 100
}
}
]

mongodb aggregation with multiple sub groups

I have a collection with documents that look similar to this:
[
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeA",
"color": "ColorA",
"soldFor": 12.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeA",
"color": "ColorB",
"soldFor": 13.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeB",
"color": "ColorA",
"soldFor": 12.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeB",
"color": "ColorB",
"soldFor": 12.15
}
]
I know that this is not a good way to store such information, but unfortunately I have no influence in that.
What I need to get out of the collection is something like this:
[
2017: {
typeA: {
colorA: {
sum: 125.00
},
colorB: {
sum: 110.00
}
},
typeB: {
colorA: {
sum: 125.000
}
}
},
2016: {
typeA: {
colorB: {
sum: 125.000
}
}
}
]
At the moment I have two group stages that give me everything grouped by year, but I have no clue how to get the two other sub-groups. Building the sum would be a nice to have, but I am certain that I can figure out how that would be done in a group.
So far my pipeline looks like this:
[
{
$group: {
_id: { type: '$type', color: '$color', year: { $year: '$date' } },
docs: {
$push: '$$ROOT'
}
}
},
{
$group: {
_id: { year: '$_id.year' },
docs: {
$push: '$$ROOT'
}
}
}
]
which results in something like this:
[
{
"_id": {
"year": 2006
},
"docs": {
"_id": {
"type": "typeA",
"color": "colorA",
"year": 2006
},
"docs": [
{
... root document
}
]
}
},
{
"_id": {
"year": 2016
},
"docs": [
{
"_id": {
"type": "typeA",
"color": "colorB",
"year": 2016
},
"docs": [
{
... root document
}
]
}
... more docs with three keys in id
]
}
]
Help is much appreciated!
Using a cohort of operators found in MongoDB 3.4.4 and newer, i.e. $addFields, $arrayToObject and $replaceRoot, you can compose a pipeline like the following to get the desired result:
[
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"type": "$type",
"color": "$color"
},
"count": { "$sum": "$soldFor" }
} },
{ "$group": {
"_id": {
"year": "$_id.year",
"type": "$_id.type"
},
"counts": {
"$push": {
"k": "$_id.color",
"v": { "sum": "$count" }
}
}
} },
{ "$addFields": {
"counts": { "$arrayToObject": "$counts" }
} },
{ "$group": {
"_id": "$_id.year",
"counts": {
"$push": {
"k": "$_id.type",
"v": "$counts"
}
}
} },
{ "$addFields": {
"counts": { "$arrayToObject": "$counts" }
} },
{ "$group": {
"_id": null,
"counts": {
"$push": {
"k": { "$substr": ["$_id", 0, -1 ]},
"v": "$counts"
}
}
} },
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "$arrayToObject": "$counts" },
"$$ROOT"
]
}
} },
{ "$project": { "counts": 0 } }
]

Active and Total User Count Mongodb

I want to write a group by query to written active user and total count(both active and inactive) grouped by a date column in mongodb. I am able to run them as two separate scripts but how to retrieve the same information in one script
db.user.aggregate(
{
"$match": { 'phoneInfo.verifiedFlag': true}
},
{
"$project": {
yearMonthDayUTC: { $dateToString: { format: "%Y-%m-%d", date: "$createdOn" } }
}
},
{
"$group": {
"_id": {day: "$yearMonthDayUTC"},
count: {
"$sum": 1
}
}
},
{
$sort: {
"_id.day": 1,
}
})
You can use the $cond operator in your group to create a conditional count as follows (assuming the inactive/active values are in a field called status):
db.user.aggregate([
{ "$match": { 'phoneInfo.verifiedFlag': true} },
{
"$group": {
"_id": { "$dateToString": { "format": "%Y-%m-%d", "date": "$createdOn" } },
"total": { "$sum": 1 },
"active_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "active" ] }, 1, 0 ]
}
},
"inactive_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "inactive" ] }, 1, 0 ]
}
}
}
},
{ "$sort": { "_id": 1 } }
])
For different values you can adapt the following pipeline:
db.user.aggregate([
{ "$match": { 'phoneInfo.verifiedFlag': true} },
{
"$group": {
"_id": {
"day": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$createdOn"
}
},
"status": { "$toLower": "$status" }
},
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.day",
"counts": {
"$push": {
"status": "$_id.status",
"count": "$count"
}
}
}
},
{ "$sort": { "_id": 1 } }
])